1 | //===- DeltaTree.cpp - B-Tree for Rewrite Delta tracking ------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the DeltaTree and related classes. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/ADT/DeltaTree.h" |
14 | #include "llvm/Support/Casting.h" |
15 | #include <cassert> |
16 | #include <cstring> |
17 | |
18 | using namespace llvm; |
19 | |
20 | /// The DeltaTree class is a multiway search tree (BTree) structure with some |
21 | /// fancy features. B-Trees are generally more memory and cache efficient |
22 | /// than binary trees, because they store multiple keys/values in each node. |
23 | /// |
24 | /// DeltaTree implements a key/value mapping from FileIndex to Delta, allowing |
25 | /// fast lookup by FileIndex. However, an added (important) bonus is that it |
26 | /// can also efficiently tell us the full accumulated delta for a specific |
27 | /// file offset as well, without traversing the whole tree. |
28 | /// |
29 | /// The nodes of the tree are made up of instances of two classes: |
30 | /// DeltaTreeNode and DeltaTreeInteriorNode. The later subclasses the |
31 | /// former and adds children pointers. Each node knows the full delta of all |
32 | /// entries (recursively) contained inside of it, which allows us to get the |
33 | /// full delta implied by a whole subtree in constant time. |
34 | |
35 | namespace { |
36 | |
37 | /// SourceDelta - As code in the original input buffer is added and deleted, |
38 | /// SourceDelta records are used to keep track of how the input SourceLocation |
39 | /// object is mapped into the output buffer. |
40 | struct SourceDelta { |
41 | unsigned FileLoc; |
42 | int Delta; |
43 | |
44 | static SourceDelta get(unsigned Loc, int D) { |
45 | SourceDelta Delta; |
46 | Delta.FileLoc = Loc; |
47 | Delta.Delta = D; |
48 | return Delta; |
49 | } |
50 | }; |
51 | |
52 | /// DeltaTreeNode - The common part of all nodes. |
53 | /// |
54 | class DeltaTreeNode { |
55 | public: |
56 | struct InsertResult { |
57 | DeltaTreeNode *LHS, *RHS; |
58 | SourceDelta Split; |
59 | }; |
60 | |
61 | private: |
62 | friend class DeltaTreeInteriorNode; |
63 | |
64 | /// WidthFactor - This controls the number of K/V slots held in the BTree: |
65 | /// how wide it is. Each level of the BTree is guaranteed to have at least |
66 | /// WidthFactor-1 K/V pairs (except the root) and may have at most |
67 | /// 2*WidthFactor-1 K/V pairs. |
68 | enum { WidthFactor = 8 }; |
69 | |
70 | /// Values - This tracks the SourceDelta's currently in this node. |
71 | SourceDelta Values[2 * WidthFactor - 1]; |
72 | |
73 | /// NumValuesUsed - This tracks the number of values this node currently |
74 | /// holds. |
75 | unsigned char NumValuesUsed = 0; |
76 | |
77 | /// IsLeaf - This is true if this is a leaf of the btree. If false, this is |
78 | /// an interior node, and is actually an instance of DeltaTreeInteriorNode. |
79 | bool IsLeaf; |
80 | |
81 | /// FullDelta - This is the full delta of all the values in this node and |
82 | /// all children nodes. |
83 | int FullDelta = 0; |
84 | |
85 | public: |
86 | DeltaTreeNode(bool isLeaf = true) : IsLeaf(isLeaf) {} |
87 | |
88 | bool isLeaf() const { return IsLeaf; } |
89 | int getFullDelta() const { return FullDelta; } |
90 | bool isFull() const { return NumValuesUsed == 2 * WidthFactor - 1; } |
91 | |
92 | unsigned getNumValuesUsed() const { return NumValuesUsed; } |
93 | |
94 | const SourceDelta &getValue(unsigned i) const { |
95 | assert(i < NumValuesUsed && "Invalid value #" ); |
96 | return Values[i]; |
97 | } |
98 | |
99 | SourceDelta &getValue(unsigned i) { |
100 | assert(i < NumValuesUsed && "Invalid value #" ); |
101 | return Values[i]; |
102 | } |
103 | |
104 | /// DoInsertion - Do an insertion of the specified FileIndex/Delta pair into |
105 | /// this node. If insertion is easy, do it and return false. Otherwise, |
106 | /// split the node, populate InsertRes with info about the split, and return |
107 | /// true. |
108 | bool DoInsertion(unsigned FileIndex, int Delta, InsertResult *InsertRes); |
109 | |
110 | void DoSplit(InsertResult &InsertRes); |
111 | |
112 | /// RecomputeFullDeltaLocally - Recompute the FullDelta field by doing a |
113 | /// local walk over our contained deltas. |
114 | void RecomputeFullDeltaLocally(); |
115 | |
116 | void Destroy(); |
117 | }; |
118 | |
119 | /// DeltaTreeInteriorNode - When isLeaf = false, a node has child pointers. |
120 | /// This class tracks them. |
121 | class DeltaTreeInteriorNode : public DeltaTreeNode { |
122 | friend class DeltaTreeNode; |
123 | |
124 | DeltaTreeNode *Children[2 * WidthFactor]; |
125 | |
126 | ~DeltaTreeInteriorNode() { |
127 | for (unsigned i = 0, e = NumValuesUsed + 1; i != e; ++i) |
128 | Children[i]->Destroy(); |
129 | } |
130 | |
131 | public: |
132 | DeltaTreeInteriorNode() : DeltaTreeNode(false /*nonleaf*/) {} |
133 | |
134 | DeltaTreeInteriorNode(const InsertResult &IR) |
135 | : DeltaTreeNode(false /*nonleaf*/) { |
136 | Children[0] = IR.LHS; |
137 | Children[1] = IR.RHS; |
138 | Values[0] = IR.Split; |
139 | FullDelta = |
140 | IR.LHS->getFullDelta() + IR.RHS->getFullDelta() + IR.Split.Delta; |
141 | NumValuesUsed = 1; |
142 | } |
143 | |
144 | const DeltaTreeNode *getChild(unsigned i) const { |
145 | assert(i < getNumValuesUsed() + 1 && "Invalid child" ); |
146 | return Children[i]; |
147 | } |
148 | |
149 | DeltaTreeNode *getChild(unsigned i) { |
150 | assert(i < getNumValuesUsed() + 1 && "Invalid child" ); |
151 | return Children[i]; |
152 | } |
153 | |
154 | static bool classof(const DeltaTreeNode *N) { return !N->isLeaf(); } |
155 | }; |
156 | |
157 | } // namespace |
158 | |
159 | /// Destroy - A 'virtual' destructor. |
160 | void DeltaTreeNode::Destroy() { |
161 | if (isLeaf()) |
162 | delete this; |
163 | else |
164 | delete cast<DeltaTreeInteriorNode>(Val: this); |
165 | } |
166 | |
167 | /// RecomputeFullDeltaLocally - Recompute the FullDelta field by doing a |
168 | /// local walk over our contained deltas. |
169 | void DeltaTreeNode::RecomputeFullDeltaLocally() { |
170 | int NewFullDelta = 0; |
171 | for (unsigned i = 0, e = getNumValuesUsed(); i != e; ++i) |
172 | NewFullDelta += Values[i].Delta; |
173 | if (auto *IN = dyn_cast<DeltaTreeInteriorNode>(Val: this)) |
174 | for (unsigned i = 0, e = getNumValuesUsed() + 1; i != e; ++i) |
175 | NewFullDelta += IN->getChild(i)->getFullDelta(); |
176 | FullDelta = NewFullDelta; |
177 | } |
178 | |
179 | /// DoInsertion - Do an insertion of the specified FileIndex/Delta pair into |
180 | /// this node. If insertion is easy, do it and return false. Otherwise, |
181 | /// split the node, populate InsertRes with info about the split, and return |
182 | /// true. |
183 | bool DeltaTreeNode::DoInsertion(unsigned FileIndex, int Delta, |
184 | InsertResult *InsertRes) { |
185 | // Maintain full delta for this node. |
186 | FullDelta += Delta; |
187 | |
188 | // Find the insertion point, the first delta whose index is >= FileIndex. |
189 | unsigned i = 0, e = getNumValuesUsed(); |
190 | while (i != e && FileIndex > getValue(i).FileLoc) |
191 | ++i; |
192 | |
193 | // If we found an a record for exactly this file index, just merge this |
194 | // value into the pre-existing record and finish early. |
195 | if (i != e && getValue(i).FileLoc == FileIndex) { |
196 | // NOTE: Delta could drop to zero here. This means that the delta entry is |
197 | // useless and could be removed. Supporting erases is more complex than |
198 | // leaving an entry with Delta=0, so we just leave an entry with Delta=0 in |
199 | // the tree. |
200 | Values[i].Delta += Delta; |
201 | return false; |
202 | } |
203 | |
204 | // Otherwise, we found an insertion point, and we know that the value at the |
205 | // specified index is > FileIndex. Handle the leaf case first. |
206 | if (isLeaf()) { |
207 | if (!isFull()) { |
208 | // For an insertion into a non-full leaf node, just insert the value in |
209 | // its sorted position. This requires moving later values over. |
210 | if (i != e) |
211 | memmove(dest: &Values[i + 1], src: &Values[i], n: sizeof(Values[0]) * (e - i)); |
212 | Values[i] = SourceDelta::get(Loc: FileIndex, D: Delta); |
213 | ++NumValuesUsed; |
214 | return false; |
215 | } |
216 | |
217 | // Otherwise, if this is leaf is full, split the node at its median, insert |
218 | // the value into one of the children, and return the result. |
219 | assert(InsertRes && "No result location specified" ); |
220 | DoSplit(InsertRes&: *InsertRes); |
221 | |
222 | if (InsertRes->Split.FileLoc > FileIndex) |
223 | InsertRes->LHS->DoInsertion(FileIndex, Delta, InsertRes: nullptr /*can't fail*/); |
224 | else |
225 | InsertRes->RHS->DoInsertion(FileIndex, Delta, InsertRes: nullptr /*can't fail*/); |
226 | return true; |
227 | } |
228 | |
229 | // Otherwise, this is an interior node. Send the request down the tree. |
230 | auto *IN = cast<DeltaTreeInteriorNode>(Val: this); |
231 | if (!IN->Children[i]->DoInsertion(FileIndex, Delta, InsertRes)) |
232 | return false; // If there was space in the child, just return. |
233 | |
234 | // Okay, this split the subtree, producing a new value and two children to |
235 | // insert here. If this node is non-full, we can just insert it directly. |
236 | if (!isFull()) { |
237 | // Now that we have two nodes and a new element, insert the perclated value |
238 | // into ourself by moving all the later values/children down, then inserting |
239 | // the new one. |
240 | if (i != e) |
241 | memmove(dest: &IN->Children[i + 2], src: &IN->Children[i + 1], |
242 | n: (e - i) * sizeof(IN->Children[0])); |
243 | IN->Children[i] = InsertRes->LHS; |
244 | IN->Children[i + 1] = InsertRes->RHS; |
245 | |
246 | if (e != i) |
247 | memmove(dest: &Values[i + 1], src: &Values[i], n: (e - i) * sizeof(Values[0])); |
248 | Values[i] = InsertRes->Split; |
249 | ++NumValuesUsed; |
250 | return false; |
251 | } |
252 | |
253 | // Finally, if this interior node was full and a node is percolated up, split |
254 | // ourself and return that up the chain. Start by saving all our info to |
255 | // avoid having the split clobber it. |
256 | IN->Children[i] = InsertRes->LHS; |
257 | DeltaTreeNode *SubRHS = InsertRes->RHS; |
258 | SourceDelta SubSplit = InsertRes->Split; |
259 | |
260 | // Do the split. |
261 | DoSplit(InsertRes&: *InsertRes); |
262 | |
263 | // Figure out where to insert SubRHS/NewSplit. |
264 | DeltaTreeInteriorNode *InsertSide; |
265 | if (SubSplit.FileLoc < InsertRes->Split.FileLoc) |
266 | InsertSide = cast<DeltaTreeInteriorNode>(Val: InsertRes->LHS); |
267 | else |
268 | InsertSide = cast<DeltaTreeInteriorNode>(Val: InsertRes->RHS); |
269 | |
270 | // We now have a non-empty interior node 'InsertSide' to insert |
271 | // SubRHS/SubSplit into. Find out where to insert SubSplit. |
272 | |
273 | // Find the insertion point, the first delta whose index is >SubSplit.FileLoc. |
274 | i = 0; |
275 | e = InsertSide->getNumValuesUsed(); |
276 | while (i != e && SubSplit.FileLoc > InsertSide->getValue(i).FileLoc) |
277 | ++i; |
278 | |
279 | // Now we know that i is the place to insert the split value into. Insert it |
280 | // and the child right after it. |
281 | if (i != e) |
282 | memmove(dest: &InsertSide->Children[i + 2], src: &InsertSide->Children[i + 1], |
283 | n: (e - i) * sizeof(IN->Children[0])); |
284 | InsertSide->Children[i + 1] = SubRHS; |
285 | |
286 | if (e != i) |
287 | memmove(dest: &InsertSide->Values[i + 1], src: &InsertSide->Values[i], |
288 | n: (e - i) * sizeof(Values[0])); |
289 | InsertSide->Values[i] = SubSplit; |
290 | ++InsertSide->NumValuesUsed; |
291 | InsertSide->FullDelta += SubSplit.Delta + SubRHS->getFullDelta(); |
292 | return true; |
293 | } |
294 | |
295 | /// DoSplit - Split the currently full node (which has 2*WidthFactor-1 values) |
296 | /// into two subtrees each with "WidthFactor-1" values and a pivot value. |
297 | /// Return the pieces in InsertRes. |
298 | void DeltaTreeNode::DoSplit(InsertResult &InsertRes) { |
299 | assert(isFull() && "Why split a non-full node?" ); |
300 | |
301 | // Since this node is full, it contains 2*WidthFactor-1 values. We move |
302 | // the first 'WidthFactor-1' values to the LHS child (which we leave in this |
303 | // node), propagate one value up, and move the last 'WidthFactor-1' values |
304 | // into the RHS child. |
305 | |
306 | // Create the new child node. |
307 | DeltaTreeNode *NewNode; |
308 | if (auto *IN = dyn_cast<DeltaTreeInteriorNode>(Val: this)) { |
309 | // If this is an interior node, also move over 'WidthFactor' children |
310 | // into the new node. |
311 | DeltaTreeInteriorNode *New = new DeltaTreeInteriorNode(); |
312 | memcpy(dest: &New->Children[0], src: &IN->Children[WidthFactor], |
313 | n: WidthFactor * sizeof(IN->Children[0])); |
314 | NewNode = New; |
315 | } else { |
316 | // Just create the new leaf node. |
317 | NewNode = new DeltaTreeNode(); |
318 | } |
319 | |
320 | // Move over the last 'WidthFactor-1' values from here to NewNode. |
321 | memcpy(dest: &NewNode->Values[0], src: &Values[WidthFactor], |
322 | n: (WidthFactor - 1) * sizeof(Values[0])); |
323 | |
324 | // Decrease the number of values in the two nodes. |
325 | NewNode->NumValuesUsed = NumValuesUsed = WidthFactor - 1; |
326 | |
327 | // Recompute the two nodes' full delta. |
328 | NewNode->RecomputeFullDeltaLocally(); |
329 | RecomputeFullDeltaLocally(); |
330 | |
331 | InsertRes.LHS = this; |
332 | InsertRes.RHS = NewNode; |
333 | InsertRes.Split = Values[WidthFactor - 1]; |
334 | } |
335 | |
336 | //===----------------------------------------------------------------------===// |
337 | // DeltaTree Implementation |
338 | //===----------------------------------------------------------------------===// |
339 | |
340 | // #define VERIFY_TREE |
341 | |
342 | #ifdef VERIFY_TREE |
343 | /// VerifyTree - Walk the btree performing assertions on various properties to |
344 | /// verify consistency. This is useful for debugging new changes to the tree. |
345 | static void VerifyTree(const DeltaTreeNode *N) { |
346 | const auto *IN = dyn_cast<DeltaTreeInteriorNode>(N); |
347 | if (IN == 0) { |
348 | // Verify leaves, just ensure that FullDelta matches up and the elements |
349 | // are in proper order. |
350 | int FullDelta = 0; |
351 | for (unsigned i = 0, e = N->getNumValuesUsed(); i != e; ++i) { |
352 | if (i) |
353 | assert(N->getValue(i - 1).FileLoc < N->getValue(i).FileLoc); |
354 | FullDelta += N->getValue(i).Delta; |
355 | } |
356 | assert(FullDelta == N->getFullDelta()); |
357 | return; |
358 | } |
359 | |
360 | // Verify interior nodes: Ensure that FullDelta matches up and the |
361 | // elements are in proper order and the children are in proper order. |
362 | int FullDelta = 0; |
363 | for (unsigned i = 0, e = IN->getNumValuesUsed(); i != e; ++i) { |
364 | const SourceDelta &IVal = N->getValue(i); |
365 | const DeltaTreeNode *IChild = IN->getChild(i); |
366 | if (i) |
367 | assert(IN->getValue(i - 1).FileLoc < IVal.FileLoc); |
368 | FullDelta += IVal.Delta; |
369 | FullDelta += IChild->getFullDelta(); |
370 | |
371 | // The largest value in child #i should be smaller than FileLoc. |
372 | assert(IChild->getValue(IChild->getNumValuesUsed() - 1).FileLoc < |
373 | IVal.FileLoc); |
374 | |
375 | // The smallest value in child #i+1 should be larger than FileLoc. |
376 | assert(IN->getChild(i + 1)->getValue(0).FileLoc > IVal.FileLoc); |
377 | VerifyTree(IChild); |
378 | } |
379 | |
380 | FullDelta += IN->getChild(IN->getNumValuesUsed())->getFullDelta(); |
381 | |
382 | assert(FullDelta == N->getFullDelta()); |
383 | } |
384 | #endif // VERIFY_TREE |
385 | |
386 | static DeltaTreeNode *getRoot(void *Root) { return (DeltaTreeNode *)Root; } |
387 | |
388 | DeltaTree::DeltaTree() { Root = new DeltaTreeNode(); } |
389 | |
390 | DeltaTree::DeltaTree(const DeltaTree &RHS) { |
391 | // Currently we only support copying when the RHS is empty. |
392 | assert(getRoot(RHS.Root)->getNumValuesUsed() == 0 && |
393 | "Can only copy empty tree" ); |
394 | Root = new DeltaTreeNode(); |
395 | } |
396 | |
397 | DeltaTree::~DeltaTree() { getRoot(Root)->Destroy(); } |
398 | |
399 | /// getDeltaAt - Return the accumulated delta at the specified file offset. |
400 | /// This includes all insertions or delections that occurred *before* the |
401 | /// specified file index. |
402 | int DeltaTree::getDeltaAt(unsigned FileIndex) const { |
403 | const DeltaTreeNode *Node = getRoot(Root); |
404 | |
405 | int Result = 0; |
406 | |
407 | // Walk down the tree. |
408 | while (true) { |
409 | // For all nodes, include any local deltas before the specified file |
410 | // index by summing them up directly. Keep track of how many were |
411 | // included. |
412 | unsigned NumValsGreater = 0; |
413 | for (unsigned e = Node->getNumValuesUsed(); NumValsGreater != e; |
414 | ++NumValsGreater) { |
415 | const SourceDelta &Val = Node->getValue(i: NumValsGreater); |
416 | |
417 | if (Val.FileLoc >= FileIndex) |
418 | break; |
419 | Result += Val.Delta; |
420 | } |
421 | |
422 | // If we have an interior node, include information about children and |
423 | // recurse. Otherwise, if we have a leaf, we're done. |
424 | const auto *IN = dyn_cast<DeltaTreeInteriorNode>(Val: Node); |
425 | if (!IN) |
426 | return Result; |
427 | |
428 | // Include any children to the left of the values we skipped, all of |
429 | // their deltas should be included as well. |
430 | for (unsigned i = 0; i != NumValsGreater; ++i) |
431 | Result += IN->getChild(i)->getFullDelta(); |
432 | |
433 | // If we found exactly the value we were looking for, break off the |
434 | // search early. There is no need to search the RHS of the value for |
435 | // partial results. |
436 | if (NumValsGreater != Node->getNumValuesUsed() && |
437 | Node->getValue(i: NumValsGreater).FileLoc == FileIndex) |
438 | return Result + IN->getChild(i: NumValsGreater)->getFullDelta(); |
439 | |
440 | // Otherwise, traverse down the tree. The selected subtree may be |
441 | // partially included in the range. |
442 | Node = IN->getChild(i: NumValsGreater); |
443 | } |
444 | // NOT REACHED. |
445 | } |
446 | |
447 | /// AddDelta - When a change is made that shifts around the text buffer, |
448 | /// this method is used to record that info. It inserts a delta of 'Delta' |
449 | /// into the current DeltaTree at offset FileIndex. |
450 | void DeltaTree::AddDelta(unsigned FileIndex, int Delta) { |
451 | assert(Delta && "Adding a noop?" ); |
452 | DeltaTreeNode *MyRoot = getRoot(Root); |
453 | |
454 | DeltaTreeNode::InsertResult InsertRes; |
455 | if (MyRoot->DoInsertion(FileIndex, Delta, InsertRes: &InsertRes)) { |
456 | Root = new DeltaTreeInteriorNode(InsertRes); |
457 | #ifdef VERIFY_TREE |
458 | MyRoot = Root; |
459 | #endif |
460 | } |
461 | |
462 | #ifdef VERIFY_TREE |
463 | VerifyTree(MyRoot); |
464 | #endif |
465 | } |
466 | |