| 1 | //===----------------------------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "llvm/CAS/ObjectStore.h" |
| 10 | #include "llvm/ADT/DenseSet.h" |
| 11 | #include "llvm/Support/Debug.h" |
| 12 | #include "llvm/Support/Errc.h" |
| 13 | #include "llvm/Support/FileSystem.h" |
| 14 | #include "llvm/Support/MemoryBuffer.h" |
| 15 | #include <deque> |
| 16 | |
| 17 | using namespace llvm; |
| 18 | using namespace llvm::cas; |
| 19 | |
| 20 | void CASContext::anchor() {} |
| 21 | void ObjectStore::anchor() {} |
| 22 | |
| 23 | LLVM_DUMP_METHOD void CASID::dump() const { print(OS&: dbgs()); } |
| 24 | LLVM_DUMP_METHOD void ObjectStore::dump() const { print(dbgs()); } |
| 25 | LLVM_DUMP_METHOD void ObjectRef::dump() const { print(OS&: dbgs()); } |
| 26 | LLVM_DUMP_METHOD void ObjectHandle::dump() const { print(OS&: dbgs()); } |
| 27 | |
| 28 | std::string CASID::toString() const { |
| 29 | std::string S; |
| 30 | raw_string_ostream(S) << *this; |
| 31 | return S; |
| 32 | } |
| 33 | |
| 34 | static void printReferenceBase(raw_ostream &OS, StringRef Kind, |
| 35 | uint64_t InternalRef, std::optional<CASID> ID) { |
| 36 | OS << Kind << "=" << InternalRef; |
| 37 | if (ID) |
| 38 | OS << "[" << *ID << "]" ; |
| 39 | } |
| 40 | |
| 41 | void ReferenceBase::print(raw_ostream &OS, const ObjectHandle &This) const { |
| 42 | assert(this == &This); |
| 43 | printReferenceBase(OS, Kind: "object-handle" , InternalRef, ID: std::nullopt); |
| 44 | } |
| 45 | |
| 46 | void ReferenceBase::print(raw_ostream &OS, const ObjectRef &This) const { |
| 47 | assert(this == &This); |
| 48 | |
| 49 | std::optional<CASID> ID; |
| 50 | #if LLVM_ENABLE_ABI_BREAKING_CHECKS |
| 51 | if (CAS) |
| 52 | ID = CAS->getID(This); |
| 53 | #endif |
| 54 | printReferenceBase(OS, Kind: "object-ref" , InternalRef, ID); |
| 55 | } |
| 56 | |
| 57 | Expected<ObjectHandle> ObjectStore::load(ObjectRef Ref) { |
| 58 | std::optional<ObjectHandle> Handle; |
| 59 | if (Error E = loadIfExists(Ref).moveInto(Value&: Handle)) |
| 60 | return std::move(E); |
| 61 | if (!Handle) |
| 62 | return createStringError(EC: errc::invalid_argument, |
| 63 | S: "missing object '" + getID(Ref).toString() + "'" ); |
| 64 | return *Handle; |
| 65 | } |
| 66 | |
| 67 | std::unique_ptr<MemoryBuffer> |
| 68 | ObjectStore::getMemoryBuffer(ObjectHandle Node, StringRef Name, |
| 69 | bool RequiresNullTerminator) { |
| 70 | return MemoryBuffer::getMemBuffer( |
| 71 | InputData: toStringRef(Input: getData(Node, RequiresNullTerminator)), BufferName: Name, |
| 72 | RequiresNullTerminator); |
| 73 | } |
| 74 | |
| 75 | void ObjectStore::readRefs(ObjectHandle Node, |
| 76 | SmallVectorImpl<ObjectRef> &Refs) const { |
| 77 | consumeError(Err: forEachRef(Node, Callback: [&Refs](ObjectRef Ref) -> Error { |
| 78 | Refs.push_back(Elt: Ref); |
| 79 | return Error::success(); |
| 80 | })); |
| 81 | } |
| 82 | |
| 83 | Expected<ObjectProxy> ObjectStore::getProxy(const CASID &ID) { |
| 84 | std::optional<ObjectRef> Ref = getReference(ID); |
| 85 | if (!Ref) |
| 86 | return createUnknownObjectError(ID); |
| 87 | |
| 88 | return getProxy(Ref: *Ref); |
| 89 | } |
| 90 | |
| 91 | Expected<ObjectProxy> ObjectStore::getProxy(ObjectRef Ref) { |
| 92 | std::optional<ObjectHandle> H; |
| 93 | if (Error E = load(Ref).moveInto(Value&: H)) |
| 94 | return std::move(E); |
| 95 | |
| 96 | return ObjectProxy::load(CAS&: *this, Ref, Node: *H); |
| 97 | } |
| 98 | |
| 99 | Expected<std::optional<ObjectProxy>> |
| 100 | ObjectStore::getProxyIfExists(ObjectRef Ref) { |
| 101 | std::optional<ObjectHandle> H; |
| 102 | if (Error E = loadIfExists(Ref).moveInto(Value&: H)) |
| 103 | return std::move(E); |
| 104 | if (!H) |
| 105 | return std::nullopt; |
| 106 | return ObjectProxy::load(CAS&: *this, Ref, Node: *H); |
| 107 | } |
| 108 | |
| 109 | Error ObjectStore::createUnknownObjectError(const CASID &ID) { |
| 110 | return createStringError(EC: std::make_error_code(e: std::errc::invalid_argument), |
| 111 | S: "unknown object '" + ID.toString() + "'" ); |
| 112 | } |
| 113 | |
| 114 | Expected<ObjectProxy> ObjectStore::createProxy(ArrayRef<ObjectRef> Refs, |
| 115 | StringRef Data) { |
| 116 | Expected<ObjectRef> Ref = store(Refs, Data: arrayRefFromStringRef<char>(Input: Data)); |
| 117 | if (!Ref) |
| 118 | return Ref.takeError(); |
| 119 | return getProxy(Ref: *Ref); |
| 120 | } |
| 121 | |
| 122 | Expected<ObjectRef> |
| 123 | ObjectStore::storeFromOpenFileImpl(sys::fs::file_t FD, |
| 124 | std::optional<sys::fs::file_status> Status) { |
| 125 | // TODO: For the on-disk CAS implementation use cloning to store it as a |
| 126 | // standalone file if the file-system supports it and the file is large. |
| 127 | uint64_t Size = Status ? Status->getSize() : -1; |
| 128 | auto Buffer = MemoryBuffer::getOpenFile(FD, /*Filename=*/"" , FileSize: Size); |
| 129 | if (!Buffer) |
| 130 | return errorCodeToError(EC: Buffer.getError()); |
| 131 | |
| 132 | return store(Refs: {}, Data: arrayRefFromStringRef<char>(Input: (*Buffer)->getBuffer())); |
| 133 | } |
| 134 | |
| 135 | Error ObjectStore::validateTree(ObjectRef Root) { |
| 136 | SmallDenseSet<ObjectRef> ValidatedRefs; |
| 137 | SmallVector<ObjectRef, 16> RefsToValidate; |
| 138 | RefsToValidate.push_back(Elt: Root); |
| 139 | |
| 140 | while (!RefsToValidate.empty()) { |
| 141 | ObjectRef Ref = RefsToValidate.pop_back_val(); |
| 142 | auto [I, Inserted] = ValidatedRefs.insert(V: Ref); |
| 143 | if (!Inserted) |
| 144 | continue; // already validated. |
| 145 | if (Error E = validateObject(ID: getID(Ref))) |
| 146 | return E; |
| 147 | Expected<ObjectHandle> Obj = load(Ref); |
| 148 | if (!Obj) |
| 149 | return Obj.takeError(); |
| 150 | if (Error E = forEachRef(Node: *Obj, Callback: [&RefsToValidate](ObjectRef R) -> Error { |
| 151 | RefsToValidate.push_back(Elt: R); |
| 152 | return Error::success(); |
| 153 | })) |
| 154 | return E; |
| 155 | } |
| 156 | return Error::success(); |
| 157 | } |
| 158 | |
| 159 | Expected<ObjectRef> ObjectStore::importObject(ObjectStore &Upstream, |
| 160 | ObjectRef Other) { |
| 161 | // Copy the full CAS tree from upstream with depth-first ordering to ensure |
| 162 | // all the child nodes are available in downstream CAS before inserting |
| 163 | // current object. This uses a similar algorithm as |
| 164 | // `OnDiskGraphDB::importFullTree` but doesn't assume the upstream CAS schema |
| 165 | // so it can be used to import from any other ObjectStore reguardless of the |
| 166 | // CAS schema. |
| 167 | |
| 168 | // There is no work to do if importing from self. |
| 169 | if (this == &Upstream) |
| 170 | return Other; |
| 171 | |
| 172 | /// Keeps track of the state of visitation for current node and all of its |
| 173 | /// parents. Upstream Cursor holds information only from upstream CAS. |
| 174 | struct UpstreamCursor { |
| 175 | ObjectRef Ref; |
| 176 | ObjectHandle Node; |
| 177 | size_t RefsCount; |
| 178 | std::deque<ObjectRef> Refs; |
| 179 | }; |
| 180 | SmallVector<UpstreamCursor, 16> CursorStack; |
| 181 | /// PrimaryNodeStack holds the ObjectRef of the current CAS, with nodes either |
| 182 | /// just stored in the CAS or nodes already exists in the current CAS. |
| 183 | SmallVector<ObjectRef, 128> PrimaryRefStack; |
| 184 | /// A map from upstream ObjectRef to current ObjectRef. |
| 185 | llvm::DenseMap<ObjectRef, ObjectRef> CreatedObjects; |
| 186 | |
| 187 | auto enqueueNode = [&](ObjectRef Ref, ObjectHandle Node) { |
| 188 | unsigned NumRefs = Upstream.getNumRefs(Node); |
| 189 | std::deque<ObjectRef> Refs; |
| 190 | for (unsigned I = 0; I < NumRefs; ++I) |
| 191 | Refs.push_back(x: Upstream.readRef(Node, I)); |
| 192 | |
| 193 | CursorStack.push_back(Elt: {.Ref: Ref, .Node: Node, .RefsCount: NumRefs, .Refs: std::move(Refs)}); |
| 194 | }; |
| 195 | |
| 196 | auto UpstreamHandle = Upstream.load(Ref: Other); |
| 197 | if (!UpstreamHandle) |
| 198 | return UpstreamHandle.takeError(); |
| 199 | enqueueNode(Other, *UpstreamHandle); |
| 200 | |
| 201 | while (!CursorStack.empty()) { |
| 202 | UpstreamCursor &Cur = CursorStack.back(); |
| 203 | if (Cur.Refs.empty()) { |
| 204 | // Copy the node data into the primary store. |
| 205 | // The bottom of \p PrimaryRefStack contains the ObjectRef for the |
| 206 | // current node. |
| 207 | assert(PrimaryRefStack.size() >= Cur.RefsCount); |
| 208 | auto Refs = ArrayRef(PrimaryRefStack) |
| 209 | .slice(N: PrimaryRefStack.size() - Cur.RefsCount); |
| 210 | auto NewNode = store(Refs, Data: Upstream.getData(Node: Cur.Node)); |
| 211 | if (!NewNode) |
| 212 | return NewNode.takeError(); |
| 213 | |
| 214 | // Remove the current node and its IDs from the stack. |
| 215 | PrimaryRefStack.truncate(N: PrimaryRefStack.size() - Cur.RefsCount); |
| 216 | |
| 217 | // Push new node into created objects. |
| 218 | PrimaryRefStack.push_back(Elt: *NewNode); |
| 219 | CreatedObjects.try_emplace(Key: Cur.Ref, Args&: *NewNode); |
| 220 | |
| 221 | // Pop the cursor in the end after all uses. |
| 222 | CursorStack.pop_back(); |
| 223 | continue; |
| 224 | } |
| 225 | |
| 226 | // Check if the node exists already. |
| 227 | auto CurrentID = Cur.Refs.front(); |
| 228 | Cur.Refs.pop_front(); |
| 229 | auto Ref = CreatedObjects.find(Val: CurrentID); |
| 230 | if (Ref != CreatedObjects.end()) { |
| 231 | // If exists already, just need to enqueue the primary node. |
| 232 | PrimaryRefStack.push_back(Elt: Ref->second); |
| 233 | continue; |
| 234 | } |
| 235 | |
| 236 | // Load child. |
| 237 | auto PrimaryID = Upstream.load(Ref: CurrentID); |
| 238 | if (LLVM_UNLIKELY(!PrimaryID)) |
| 239 | return PrimaryID.takeError(); |
| 240 | |
| 241 | enqueueNode(CurrentID, *PrimaryID); |
| 242 | } |
| 243 | |
| 244 | assert(PrimaryRefStack.size() == 1); |
| 245 | return PrimaryRefStack.front(); |
| 246 | } |
| 247 | |
| 248 | std::unique_ptr<MemoryBuffer> |
| 249 | ObjectProxy::getMemoryBuffer(StringRef Name, |
| 250 | bool RequiresNullTerminator) const { |
| 251 | return CAS->getMemoryBuffer(Node: H, Name, RequiresNullTerminator); |
| 252 | } |
| 253 | |