| 1 | //===----------------------------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "llvm/CAS/ObjectStore.h" |
| 10 | #include "llvm/ADT/DenseSet.h" |
| 11 | #include "llvm/ADT/ScopeExit.h" |
| 12 | #include "llvm/Support/Debug.h" |
| 13 | #include "llvm/Support/Errc.h" |
| 14 | #include "llvm/Support/FileSystem.h" |
| 15 | #include "llvm/Support/IOSandbox.h" |
| 16 | #include "llvm/Support/MemoryBuffer.h" |
| 17 | #include "llvm/Support/Path.h" |
| 18 | #include <deque> |
| 19 | |
| 20 | using namespace llvm; |
| 21 | using namespace llvm::cas; |
| 22 | |
| 23 | void CASContext::anchor() {} |
| 24 | void ObjectStore::anchor() {} |
| 25 | |
| 26 | LLVM_DUMP_METHOD void CASID::dump() const { print(OS&: dbgs()); } |
| 27 | LLVM_DUMP_METHOD void ObjectStore::dump() const { print(dbgs()); } |
| 28 | LLVM_DUMP_METHOD void ObjectRef::dump() const { print(OS&: dbgs()); } |
| 29 | LLVM_DUMP_METHOD void ObjectHandle::dump() const { print(OS&: dbgs()); } |
| 30 | |
| 31 | std::string CASID::toString() const { |
| 32 | std::string S; |
| 33 | raw_string_ostream(S) << *this; |
| 34 | return S; |
| 35 | } |
| 36 | |
| 37 | static void printReferenceBase(raw_ostream &OS, StringRef Kind, |
| 38 | uint64_t InternalRef, std::optional<CASID> ID) { |
| 39 | OS << Kind << "=" << InternalRef; |
| 40 | if (ID) |
| 41 | OS << "[" << *ID << "]" ; |
| 42 | } |
| 43 | |
| 44 | void ReferenceBase::print(raw_ostream &OS, const ObjectHandle &This) const { |
| 45 | assert(this == &This); |
| 46 | printReferenceBase(OS, Kind: "object-handle" , InternalRef, ID: std::nullopt); |
| 47 | } |
| 48 | |
| 49 | void ReferenceBase::print(raw_ostream &OS, const ObjectRef &This) const { |
| 50 | assert(this == &This); |
| 51 | |
| 52 | std::optional<CASID> ID; |
| 53 | #if LLVM_ENABLE_ABI_BREAKING_CHECKS |
| 54 | if (CAS) |
| 55 | ID = CAS->getID(This); |
| 56 | #endif |
| 57 | printReferenceBase(OS, Kind: "object-ref" , InternalRef, ID); |
| 58 | } |
| 59 | |
| 60 | Expected<ObjectHandle> ObjectStore::load(ObjectRef Ref) { |
| 61 | std::optional<ObjectHandle> Handle; |
| 62 | if (Error E = loadIfExists(Ref).moveInto(Value&: Handle)) |
| 63 | return std::move(E); |
| 64 | if (!Handle) |
| 65 | return createStringError(EC: errc::invalid_argument, |
| 66 | S: "missing object '" + getID(Ref).toString() + "'" ); |
| 67 | return *Handle; |
| 68 | } |
| 69 | |
| 70 | std::unique_ptr<MemoryBuffer> |
| 71 | ObjectStore::getMemoryBuffer(ObjectHandle Node, StringRef Name, |
| 72 | bool RequiresNullTerminator) { |
| 73 | return MemoryBuffer::getMemBuffer( |
| 74 | InputData: toStringRef(Input: getData(Node, RequiresNullTerminator)), BufferName: Name, |
| 75 | RequiresNullTerminator); |
| 76 | } |
| 77 | |
| 78 | void ObjectStore::readRefs(ObjectHandle Node, |
| 79 | SmallVectorImpl<ObjectRef> &Refs) const { |
| 80 | consumeError(Err: forEachRef(Node, Callback: [&Refs](ObjectRef Ref) -> Error { |
| 81 | Refs.push_back(Elt: Ref); |
| 82 | return Error::success(); |
| 83 | })); |
| 84 | } |
| 85 | |
| 86 | Expected<ObjectProxy> ObjectStore::getProxy(const CASID &ID) { |
| 87 | std::optional<ObjectRef> Ref = getReference(ID); |
| 88 | if (!Ref) |
| 89 | return createUnknownObjectError(ID); |
| 90 | |
| 91 | return getProxy(Ref: *Ref); |
| 92 | } |
| 93 | |
| 94 | Expected<ObjectProxy> ObjectStore::getProxy(ObjectRef Ref) { |
| 95 | std::optional<ObjectHandle> H; |
| 96 | if (Error E = load(Ref).moveInto(Value&: H)) |
| 97 | return std::move(E); |
| 98 | |
| 99 | return ObjectProxy::load(CAS&: *this, Ref, Node: *H); |
| 100 | } |
| 101 | |
| 102 | Expected<std::optional<ObjectProxy>> |
| 103 | ObjectStore::getProxyIfExists(ObjectRef Ref) { |
| 104 | std::optional<ObjectHandle> H; |
| 105 | if (Error E = loadIfExists(Ref).moveInto(Value&: H)) |
| 106 | return std::move(E); |
| 107 | if (!H) |
| 108 | return std::nullopt; |
| 109 | return ObjectProxy::load(CAS&: *this, Ref, Node: *H); |
| 110 | } |
| 111 | |
| 112 | Error ObjectStore::createUnknownObjectError(const CASID &ID) { |
| 113 | return createStringError(EC: std::make_error_code(e: std::errc::invalid_argument), |
| 114 | S: "unknown object '" + ID.toString() + "'" ); |
| 115 | } |
| 116 | |
| 117 | Expected<ObjectProxy> ObjectStore::createProxy(ArrayRef<ObjectRef> Refs, |
| 118 | StringRef Data) { |
| 119 | Expected<ObjectRef> Ref = store(Refs, Data: arrayRefFromStringRef<char>(Input: Data)); |
| 120 | if (!Ref) |
| 121 | return Ref.takeError(); |
| 122 | return getProxy(Ref: *Ref); |
| 123 | } |
| 124 | |
| 125 | Expected<ObjectRef> |
| 126 | ObjectStore::storeFromOpenFileImpl(sys::fs::file_t FD, |
| 127 | std::optional<sys::fs::file_status> Status) { |
| 128 | // TODO: For the on-disk CAS implementation use cloning to store it as a |
| 129 | // standalone file if the file-system supports it and the file is large. |
| 130 | uint64_t Size = Status ? Status->getSize() : -1; |
| 131 | auto Buffer = MemoryBuffer::getOpenFile(FD, /*Filename=*/"" , FileSize: Size); |
| 132 | if (!Buffer) |
| 133 | return errorCodeToError(EC: Buffer.getError()); |
| 134 | |
| 135 | return store(Refs: {}, Data: arrayRefFromStringRef<char>(Input: (*Buffer)->getBuffer())); |
| 136 | } |
| 137 | |
| 138 | Expected<ObjectRef> ObjectStore::storeFromFile(StringRef Path) { |
| 139 | auto BypassSandbox = sys::sandbox::scopedDisable(); |
| 140 | |
| 141 | sys::fs::file_t FD; |
| 142 | if (Error E = sys::fs::openNativeFileForRead(Name: Path).moveInto(Value&: FD)) |
| 143 | return E; |
| 144 | auto CloseFile = scope_exit([&FD] { sys::fs::closeFile(F&: FD); }); |
| 145 | return storeFromOpenFile(FD); |
| 146 | } |
| 147 | |
| 148 | Error ObjectStore::exportDataToFile(ObjectHandle Node, StringRef Path) const { |
| 149 | auto BypassSandbox = sys::sandbox::scopedDisable(); |
| 150 | |
| 151 | SmallString<256> TmpPath; |
| 152 | SmallString<256> Model; |
| 153 | Model += sys::path::parent_path(path: Path); |
| 154 | sys::path::append(path&: Model, a: "%%%%%%%.tmp" ); |
| 155 | if (std::error_code EC = sys::fs::createUniqueFile(Model, ResultPath&: TmpPath)) |
| 156 | return createFileError(F: Model, EC); |
| 157 | auto RemoveTmpFile = scope_exit([&] { |
| 158 | if (!TmpPath.empty()) |
| 159 | sys::fs::remove(path: TmpPath); |
| 160 | }); |
| 161 | |
| 162 | ArrayRef<char> Data = getData(Node); |
| 163 | std::error_code EC; |
| 164 | raw_fd_ostream FS(TmpPath, EC); |
| 165 | if (EC) |
| 166 | return createFileError(F: TmpPath, EC); |
| 167 | FS.write(Ptr: Data.begin(), Size: Data.size()); |
| 168 | FS.close(); |
| 169 | if (FS.has_error()) |
| 170 | return createFileError(F: TmpPath, EC: FS.error()); |
| 171 | |
| 172 | if (std::error_code EC = sys::fs::rename(from: TmpPath, to: Path)) |
| 173 | return createFileError(F: Path, EC); |
| 174 | TmpPath.clear(); |
| 175 | |
| 176 | return Error::success(); |
| 177 | } |
| 178 | |
| 179 | Error ObjectStore::validateTree(ObjectRef Root) { |
| 180 | SmallDenseSet<ObjectRef> ValidatedRefs; |
| 181 | SmallVector<ObjectRef, 16> RefsToValidate; |
| 182 | RefsToValidate.push_back(Elt: Root); |
| 183 | |
| 184 | while (!RefsToValidate.empty()) { |
| 185 | ObjectRef Ref = RefsToValidate.pop_back_val(); |
| 186 | auto [I, Inserted] = ValidatedRefs.insert(V: Ref); |
| 187 | if (!Inserted) |
| 188 | continue; // already validated. |
| 189 | if (Error E = validateObject(ID: getID(Ref))) |
| 190 | return E; |
| 191 | Expected<ObjectHandle> Obj = load(Ref); |
| 192 | if (!Obj) |
| 193 | return Obj.takeError(); |
| 194 | if (Error E = forEachRef(Node: *Obj, Callback: [&RefsToValidate](ObjectRef R) -> Error { |
| 195 | RefsToValidate.push_back(Elt: R); |
| 196 | return Error::success(); |
| 197 | })) |
| 198 | return E; |
| 199 | } |
| 200 | return Error::success(); |
| 201 | } |
| 202 | |
| 203 | Expected<ObjectRef> ObjectStore::importObject(ObjectStore &Upstream, |
| 204 | ObjectRef Other) { |
| 205 | // Copy the full CAS tree from upstream with depth-first ordering to ensure |
| 206 | // all the child nodes are available in downstream CAS before inserting |
| 207 | // current object. This uses a similar algorithm as |
| 208 | // `OnDiskGraphDB::importFullTree` but doesn't assume the upstream CAS schema |
| 209 | // so it can be used to import from any other ObjectStore reguardless of the |
| 210 | // CAS schema. |
| 211 | |
| 212 | // There is no work to do if importing from self. |
| 213 | if (this == &Upstream) |
| 214 | return Other; |
| 215 | |
| 216 | /// Keeps track of the state of visitation for current node and all of its |
| 217 | /// parents. Upstream Cursor holds information only from upstream CAS. |
| 218 | struct UpstreamCursor { |
| 219 | ObjectRef Ref; |
| 220 | ObjectHandle Node; |
| 221 | size_t RefsCount; |
| 222 | std::deque<ObjectRef> Refs; |
| 223 | }; |
| 224 | SmallVector<UpstreamCursor, 16> CursorStack; |
| 225 | /// PrimaryNodeStack holds the ObjectRef of the current CAS, with nodes either |
| 226 | /// just stored in the CAS or nodes already exists in the current CAS. |
| 227 | SmallVector<ObjectRef, 128> PrimaryRefStack; |
| 228 | /// A map from upstream ObjectRef to current ObjectRef. |
| 229 | llvm::DenseMap<ObjectRef, ObjectRef> CreatedObjects; |
| 230 | |
| 231 | auto enqueueNode = [&](ObjectRef Ref, ObjectHandle Node) { |
| 232 | unsigned NumRefs = Upstream.getNumRefs(Node); |
| 233 | std::deque<ObjectRef> Refs; |
| 234 | for (unsigned I = 0; I < NumRefs; ++I) |
| 235 | Refs.push_back(x: Upstream.readRef(Node, I)); |
| 236 | |
| 237 | CursorStack.push_back(Elt: {.Ref: Ref, .Node: Node, .RefsCount: NumRefs, .Refs: std::move(Refs)}); |
| 238 | }; |
| 239 | |
| 240 | auto UpstreamHandle = Upstream.load(Ref: Other); |
| 241 | if (!UpstreamHandle) |
| 242 | return UpstreamHandle.takeError(); |
| 243 | enqueueNode(Other, *UpstreamHandle); |
| 244 | |
| 245 | while (!CursorStack.empty()) { |
| 246 | UpstreamCursor &Cur = CursorStack.back(); |
| 247 | if (Cur.Refs.empty()) { |
| 248 | // Copy the node data into the primary store. |
| 249 | // The bottom of \p PrimaryRefStack contains the ObjectRef for the |
| 250 | // current node. |
| 251 | assert(PrimaryRefStack.size() >= Cur.RefsCount); |
| 252 | auto Refs = ArrayRef(PrimaryRefStack) |
| 253 | .slice(N: PrimaryRefStack.size() - Cur.RefsCount); |
| 254 | auto NewNode = store(Refs, Data: Upstream.getData(Node: Cur.Node)); |
| 255 | if (!NewNode) |
| 256 | return NewNode.takeError(); |
| 257 | |
| 258 | // Remove the current node and its IDs from the stack. |
| 259 | PrimaryRefStack.truncate(N: PrimaryRefStack.size() - Cur.RefsCount); |
| 260 | |
| 261 | // Push new node into created objects. |
| 262 | PrimaryRefStack.push_back(Elt: *NewNode); |
| 263 | CreatedObjects.try_emplace(Key: Cur.Ref, Args&: *NewNode); |
| 264 | |
| 265 | // Pop the cursor in the end after all uses. |
| 266 | CursorStack.pop_back(); |
| 267 | continue; |
| 268 | } |
| 269 | |
| 270 | // Check if the node exists already. |
| 271 | auto CurrentID = Cur.Refs.front(); |
| 272 | Cur.Refs.pop_front(); |
| 273 | auto Ref = CreatedObjects.find(Val: CurrentID); |
| 274 | if (Ref != CreatedObjects.end()) { |
| 275 | // If exists already, just need to enqueue the primary node. |
| 276 | PrimaryRefStack.push_back(Elt: Ref->second); |
| 277 | continue; |
| 278 | } |
| 279 | |
| 280 | // Load child. |
| 281 | auto PrimaryID = Upstream.load(Ref: CurrentID); |
| 282 | if (LLVM_UNLIKELY(!PrimaryID)) |
| 283 | return PrimaryID.takeError(); |
| 284 | |
| 285 | enqueueNode(CurrentID, *PrimaryID); |
| 286 | } |
| 287 | |
| 288 | assert(PrimaryRefStack.size() == 1); |
| 289 | return PrimaryRefStack.front(); |
| 290 | } |
| 291 | |
| 292 | std::unique_ptr<MemoryBuffer> |
| 293 | ObjectProxy::getMemoryBuffer(StringRef Name, |
| 294 | bool RequiresNullTerminator) const { |
| 295 | return CAS->getMemoryBuffer(Node: H, Name, RequiresNullTerminator); |
| 296 | } |
| 297 | |