1 | //===- DeltaTree.cpp - B-Tree for Rewrite Delta tracking ------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the DeltaTree and related classes. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "clang/Rewrite/Core/DeltaTree.h" |
14 | #include "clang/Basic/LLVM.h" |
15 | #include "llvm/Support/Casting.h" |
16 | #include <cassert> |
17 | #include <cstring> |
18 | |
19 | using namespace clang; |
20 | |
21 | /// The DeltaTree class is a multiway search tree (BTree) structure with some |
22 | /// fancy features. B-Trees are generally more memory and cache efficient |
23 | /// than binary trees, because they store multiple keys/values in each node. |
24 | /// |
25 | /// DeltaTree implements a key/value mapping from FileIndex to Delta, allowing |
26 | /// fast lookup by FileIndex. However, an added (important) bonus is that it |
27 | /// can also efficiently tell us the full accumulated delta for a specific |
28 | /// file offset as well, without traversing the whole tree. |
29 | /// |
30 | /// The nodes of the tree are made up of instances of two classes: |
31 | /// DeltaTreeNode and DeltaTreeInteriorNode. The later subclasses the |
32 | /// former and adds children pointers. Each node knows the full delta of all |
33 | /// entries (recursively) contained inside of it, which allows us to get the |
34 | /// full delta implied by a whole subtree in constant time. |
35 | |
36 | namespace { |
37 | |
38 | /// SourceDelta - As code in the original input buffer is added and deleted, |
39 | /// SourceDelta records are used to keep track of how the input SourceLocation |
40 | /// object is mapped into the output buffer. |
41 | struct SourceDelta { |
42 | unsigned FileLoc; |
43 | int Delta; |
44 | |
45 | static SourceDelta get(unsigned Loc, int D) { |
46 | SourceDelta Delta; |
47 | Delta.FileLoc = Loc; |
48 | Delta.Delta = D; |
49 | return Delta; |
50 | } |
51 | }; |
52 | |
53 | /// DeltaTreeNode - The common part of all nodes. |
54 | /// |
55 | class DeltaTreeNode { |
56 | public: |
57 | struct InsertResult { |
58 | DeltaTreeNode *LHS, *RHS; |
59 | SourceDelta Split; |
60 | }; |
61 | |
62 | private: |
63 | friend class DeltaTreeInteriorNode; |
64 | |
65 | /// WidthFactor - This controls the number of K/V slots held in the BTree: |
66 | /// how wide it is. Each level of the BTree is guaranteed to have at least |
67 | /// WidthFactor-1 K/V pairs (except the root) and may have at most |
68 | /// 2*WidthFactor-1 K/V pairs. |
69 | enum { WidthFactor = 8 }; |
70 | |
71 | /// Values - This tracks the SourceDelta's currently in this node. |
72 | SourceDelta Values[2*WidthFactor-1]; |
73 | |
74 | /// NumValuesUsed - This tracks the number of values this node currently |
75 | /// holds. |
76 | unsigned char NumValuesUsed = 0; |
77 | |
78 | /// IsLeaf - This is true if this is a leaf of the btree. If false, this is |
79 | /// an interior node, and is actually an instance of DeltaTreeInteriorNode. |
80 | bool IsLeaf; |
81 | |
82 | /// FullDelta - This is the full delta of all the values in this node and |
83 | /// all children nodes. |
84 | int FullDelta = 0; |
85 | |
86 | public: |
87 | DeltaTreeNode(bool isLeaf = true) : IsLeaf(isLeaf) {} |
88 | |
89 | bool isLeaf() const { return IsLeaf; } |
90 | int getFullDelta() const { return FullDelta; } |
91 | bool isFull() const { return NumValuesUsed == 2*WidthFactor-1; } |
92 | |
93 | unsigned getNumValuesUsed() const { return NumValuesUsed; } |
94 | |
95 | const SourceDelta &getValue(unsigned i) const { |
96 | assert(i < NumValuesUsed && "Invalid value #" ); |
97 | return Values[i]; |
98 | } |
99 | |
100 | SourceDelta &getValue(unsigned i) { |
101 | assert(i < NumValuesUsed && "Invalid value #" ); |
102 | return Values[i]; |
103 | } |
104 | |
105 | /// DoInsertion - Do an insertion of the specified FileIndex/Delta pair into |
106 | /// this node. If insertion is easy, do it and return false. Otherwise, |
107 | /// split the node, populate InsertRes with info about the split, and return |
108 | /// true. |
109 | bool DoInsertion(unsigned FileIndex, int Delta, InsertResult *InsertRes); |
110 | |
111 | void DoSplit(InsertResult &InsertRes); |
112 | |
113 | |
114 | /// RecomputeFullDeltaLocally - Recompute the FullDelta field by doing a |
115 | /// local walk over our contained deltas. |
116 | void RecomputeFullDeltaLocally(); |
117 | |
118 | void Destroy(); |
119 | }; |
120 | |
121 | /// DeltaTreeInteriorNode - When isLeaf = false, a node has child pointers. |
122 | /// This class tracks them. |
123 | class DeltaTreeInteriorNode : public DeltaTreeNode { |
124 | friend class DeltaTreeNode; |
125 | |
126 | DeltaTreeNode *Children[2*WidthFactor]; |
127 | |
128 | ~DeltaTreeInteriorNode() { |
129 | for (unsigned i = 0, e = NumValuesUsed+1; i != e; ++i) |
130 | Children[i]->Destroy(); |
131 | } |
132 | |
133 | public: |
134 | DeltaTreeInteriorNode() : DeltaTreeNode(false /*nonleaf*/) {} |
135 | |
136 | DeltaTreeInteriorNode(const InsertResult &IR) |
137 | : DeltaTreeNode(false /*nonleaf*/) { |
138 | Children[0] = IR.LHS; |
139 | Children[1] = IR.RHS; |
140 | Values[0] = IR.Split; |
141 | FullDelta = IR.LHS->getFullDelta()+IR.RHS->getFullDelta()+IR.Split.Delta; |
142 | NumValuesUsed = 1; |
143 | } |
144 | |
145 | const DeltaTreeNode *getChild(unsigned i) const { |
146 | assert(i < getNumValuesUsed()+1 && "Invalid child" ); |
147 | return Children[i]; |
148 | } |
149 | |
150 | DeltaTreeNode *getChild(unsigned i) { |
151 | assert(i < getNumValuesUsed()+1 && "Invalid child" ); |
152 | return Children[i]; |
153 | } |
154 | |
155 | static bool classof(const DeltaTreeNode *N) { return !N->isLeaf(); } |
156 | }; |
157 | |
158 | } // namespace |
159 | |
160 | /// Destroy - A 'virtual' destructor. |
161 | void DeltaTreeNode::Destroy() { |
162 | if (isLeaf()) |
163 | delete this; |
164 | else |
165 | delete cast<DeltaTreeInteriorNode>(Val: this); |
166 | } |
167 | |
168 | /// RecomputeFullDeltaLocally - Recompute the FullDelta field by doing a |
169 | /// local walk over our contained deltas. |
170 | void DeltaTreeNode::RecomputeFullDeltaLocally() { |
171 | int NewFullDelta = 0; |
172 | for (unsigned i = 0, e = getNumValuesUsed(); i != e; ++i) |
173 | NewFullDelta += Values[i].Delta; |
174 | if (auto *IN = dyn_cast<DeltaTreeInteriorNode>(Val: this)) |
175 | for (unsigned i = 0, e = getNumValuesUsed()+1; i != e; ++i) |
176 | NewFullDelta += IN->getChild(i)->getFullDelta(); |
177 | FullDelta = NewFullDelta; |
178 | } |
179 | |
180 | /// DoInsertion - Do an insertion of the specified FileIndex/Delta pair into |
181 | /// this node. If insertion is easy, do it and return false. Otherwise, |
182 | /// split the node, populate InsertRes with info about the split, and return |
183 | /// true. |
184 | bool DeltaTreeNode::DoInsertion(unsigned FileIndex, int Delta, |
185 | InsertResult *InsertRes) { |
186 | // Maintain full delta for this node. |
187 | FullDelta += Delta; |
188 | |
189 | // Find the insertion point, the first delta whose index is >= FileIndex. |
190 | unsigned i = 0, e = getNumValuesUsed(); |
191 | while (i != e && FileIndex > getValue(i).FileLoc) |
192 | ++i; |
193 | |
194 | // If we found an a record for exactly this file index, just merge this |
195 | // value into the pre-existing record and finish early. |
196 | if (i != e && getValue(i).FileLoc == FileIndex) { |
197 | // NOTE: Delta could drop to zero here. This means that the delta entry is |
198 | // useless and could be removed. Supporting erases is more complex than |
199 | // leaving an entry with Delta=0, so we just leave an entry with Delta=0 in |
200 | // the tree. |
201 | Values[i].Delta += Delta; |
202 | return false; |
203 | } |
204 | |
205 | // Otherwise, we found an insertion point, and we know that the value at the |
206 | // specified index is > FileIndex. Handle the leaf case first. |
207 | if (isLeaf()) { |
208 | if (!isFull()) { |
209 | // For an insertion into a non-full leaf node, just insert the value in |
210 | // its sorted position. This requires moving later values over. |
211 | if (i != e) |
212 | memmove(dest: &Values[i+1], src: &Values[i], n: sizeof(Values[0])*(e-i)); |
213 | Values[i] = SourceDelta::get(Loc: FileIndex, D: Delta); |
214 | ++NumValuesUsed; |
215 | return false; |
216 | } |
217 | |
218 | // Otherwise, if this is leaf is full, split the node at its median, insert |
219 | // the value into one of the children, and return the result. |
220 | assert(InsertRes && "No result location specified" ); |
221 | DoSplit(InsertRes&: *InsertRes); |
222 | |
223 | if (InsertRes->Split.FileLoc > FileIndex) |
224 | InsertRes->LHS->DoInsertion(FileIndex, Delta, InsertRes: nullptr /*can't fail*/); |
225 | else |
226 | InsertRes->RHS->DoInsertion(FileIndex, Delta, InsertRes: nullptr /*can't fail*/); |
227 | return true; |
228 | } |
229 | |
230 | // Otherwise, this is an interior node. Send the request down the tree. |
231 | auto *IN = cast<DeltaTreeInteriorNode>(Val: this); |
232 | if (!IN->Children[i]->DoInsertion(FileIndex, Delta, InsertRes)) |
233 | return false; // If there was space in the child, just return. |
234 | |
235 | // Okay, this split the subtree, producing a new value and two children to |
236 | // insert here. If this node is non-full, we can just insert it directly. |
237 | if (!isFull()) { |
238 | // Now that we have two nodes and a new element, insert the perclated value |
239 | // into ourself by moving all the later values/children down, then inserting |
240 | // the new one. |
241 | if (i != e) |
242 | memmove(dest: &IN->Children[i+2], src: &IN->Children[i+1], |
243 | n: (e-i)*sizeof(IN->Children[0])); |
244 | IN->Children[i] = InsertRes->LHS; |
245 | IN->Children[i+1] = InsertRes->RHS; |
246 | |
247 | if (e != i) |
248 | memmove(dest: &Values[i+1], src: &Values[i], n: (e-i)*sizeof(Values[0])); |
249 | Values[i] = InsertRes->Split; |
250 | ++NumValuesUsed; |
251 | return false; |
252 | } |
253 | |
254 | // Finally, if this interior node was full and a node is percolated up, split |
255 | // ourself and return that up the chain. Start by saving all our info to |
256 | // avoid having the split clobber it. |
257 | IN->Children[i] = InsertRes->LHS; |
258 | DeltaTreeNode *SubRHS = InsertRes->RHS; |
259 | SourceDelta SubSplit = InsertRes->Split; |
260 | |
261 | // Do the split. |
262 | DoSplit(InsertRes&: *InsertRes); |
263 | |
264 | // Figure out where to insert SubRHS/NewSplit. |
265 | DeltaTreeInteriorNode *InsertSide; |
266 | if (SubSplit.FileLoc < InsertRes->Split.FileLoc) |
267 | InsertSide = cast<DeltaTreeInteriorNode>(Val: InsertRes->LHS); |
268 | else |
269 | InsertSide = cast<DeltaTreeInteriorNode>(Val: InsertRes->RHS); |
270 | |
271 | // We now have a non-empty interior node 'InsertSide' to insert |
272 | // SubRHS/SubSplit into. Find out where to insert SubSplit. |
273 | |
274 | // Find the insertion point, the first delta whose index is >SubSplit.FileLoc. |
275 | i = 0; e = InsertSide->getNumValuesUsed(); |
276 | while (i != e && SubSplit.FileLoc > InsertSide->getValue(i).FileLoc) |
277 | ++i; |
278 | |
279 | // Now we know that i is the place to insert the split value into. Insert it |
280 | // and the child right after it. |
281 | if (i != e) |
282 | memmove(dest: &InsertSide->Children[i+2], src: &InsertSide->Children[i+1], |
283 | n: (e-i)*sizeof(IN->Children[0])); |
284 | InsertSide->Children[i+1] = SubRHS; |
285 | |
286 | if (e != i) |
287 | memmove(dest: &InsertSide->Values[i+1], src: &InsertSide->Values[i], |
288 | n: (e-i)*sizeof(Values[0])); |
289 | InsertSide->Values[i] = SubSplit; |
290 | ++InsertSide->NumValuesUsed; |
291 | InsertSide->FullDelta += SubSplit.Delta + SubRHS->getFullDelta(); |
292 | return true; |
293 | } |
294 | |
295 | /// DoSplit - Split the currently full node (which has 2*WidthFactor-1 values) |
296 | /// into two subtrees each with "WidthFactor-1" values and a pivot value. |
297 | /// Return the pieces in InsertRes. |
298 | void DeltaTreeNode::DoSplit(InsertResult &InsertRes) { |
299 | assert(isFull() && "Why split a non-full node?" ); |
300 | |
301 | // Since this node is full, it contains 2*WidthFactor-1 values. We move |
302 | // the first 'WidthFactor-1' values to the LHS child (which we leave in this |
303 | // node), propagate one value up, and move the last 'WidthFactor-1' values |
304 | // into the RHS child. |
305 | |
306 | // Create the new child node. |
307 | DeltaTreeNode *NewNode; |
308 | if (auto *IN = dyn_cast<DeltaTreeInteriorNode>(Val: this)) { |
309 | // If this is an interior node, also move over 'WidthFactor' children |
310 | // into the new node. |
311 | DeltaTreeInteriorNode *New = new DeltaTreeInteriorNode(); |
312 | memcpy(dest: &New->Children[0], src: &IN->Children[WidthFactor], |
313 | n: WidthFactor*sizeof(IN->Children[0])); |
314 | NewNode = New; |
315 | } else { |
316 | // Just create the new leaf node. |
317 | NewNode = new DeltaTreeNode(); |
318 | } |
319 | |
320 | // Move over the last 'WidthFactor-1' values from here to NewNode. |
321 | memcpy(dest: &NewNode->Values[0], src: &Values[WidthFactor], |
322 | n: (WidthFactor-1)*sizeof(Values[0])); |
323 | |
324 | // Decrease the number of values in the two nodes. |
325 | NewNode->NumValuesUsed = NumValuesUsed = WidthFactor-1; |
326 | |
327 | // Recompute the two nodes' full delta. |
328 | NewNode->RecomputeFullDeltaLocally(); |
329 | RecomputeFullDeltaLocally(); |
330 | |
331 | InsertRes.LHS = this; |
332 | InsertRes.RHS = NewNode; |
333 | InsertRes.Split = Values[WidthFactor-1]; |
334 | } |
335 | |
336 | //===----------------------------------------------------------------------===// |
337 | // DeltaTree Implementation |
338 | //===----------------------------------------------------------------------===// |
339 | |
340 | //#define VERIFY_TREE |
341 | |
342 | #ifdef VERIFY_TREE |
343 | /// VerifyTree - Walk the btree performing assertions on various properties to |
344 | /// verify consistency. This is useful for debugging new changes to the tree. |
345 | static void VerifyTree(const DeltaTreeNode *N) { |
346 | const auto *IN = dyn_cast<DeltaTreeInteriorNode>(N); |
347 | if (IN == 0) { |
348 | // Verify leaves, just ensure that FullDelta matches up and the elements |
349 | // are in proper order. |
350 | int FullDelta = 0; |
351 | for (unsigned i = 0, e = N->getNumValuesUsed(); i != e; ++i) { |
352 | if (i) |
353 | assert(N->getValue(i-1).FileLoc < N->getValue(i).FileLoc); |
354 | FullDelta += N->getValue(i).Delta; |
355 | } |
356 | assert(FullDelta == N->getFullDelta()); |
357 | return; |
358 | } |
359 | |
360 | // Verify interior nodes: Ensure that FullDelta matches up and the |
361 | // elements are in proper order and the children are in proper order. |
362 | int FullDelta = 0; |
363 | for (unsigned i = 0, e = IN->getNumValuesUsed(); i != e; ++i) { |
364 | const SourceDelta &IVal = N->getValue(i); |
365 | const DeltaTreeNode *IChild = IN->getChild(i); |
366 | if (i) |
367 | assert(IN->getValue(i-1).FileLoc < IVal.FileLoc); |
368 | FullDelta += IVal.Delta; |
369 | FullDelta += IChild->getFullDelta(); |
370 | |
371 | // The largest value in child #i should be smaller than FileLoc. |
372 | assert(IChild->getValue(IChild->getNumValuesUsed()-1).FileLoc < |
373 | IVal.FileLoc); |
374 | |
375 | // The smallest value in child #i+1 should be larger than FileLoc. |
376 | assert(IN->getChild(i+1)->getValue(0).FileLoc > IVal.FileLoc); |
377 | VerifyTree(IChild); |
378 | } |
379 | |
380 | FullDelta += IN->getChild(IN->getNumValuesUsed())->getFullDelta(); |
381 | |
382 | assert(FullDelta == N->getFullDelta()); |
383 | } |
384 | #endif // VERIFY_TREE |
385 | |
386 | static DeltaTreeNode *getRoot(void *Root) { |
387 | return (DeltaTreeNode*)Root; |
388 | } |
389 | |
390 | DeltaTree::DeltaTree() { |
391 | Root = new DeltaTreeNode(); |
392 | } |
393 | |
394 | DeltaTree::DeltaTree(const DeltaTree &RHS) { |
395 | // Currently we only support copying when the RHS is empty. |
396 | assert(getRoot(RHS.Root)->getNumValuesUsed() == 0 && |
397 | "Can only copy empty tree" ); |
398 | Root = new DeltaTreeNode(); |
399 | } |
400 | |
401 | DeltaTree::~DeltaTree() { |
402 | getRoot(Root)->Destroy(); |
403 | } |
404 | |
405 | /// getDeltaAt - Return the accumulated delta at the specified file offset. |
406 | /// This includes all insertions or delections that occurred *before* the |
407 | /// specified file index. |
408 | int DeltaTree::getDeltaAt(unsigned FileIndex) const { |
409 | const DeltaTreeNode *Node = getRoot(Root); |
410 | |
411 | int Result = 0; |
412 | |
413 | // Walk down the tree. |
414 | while (true) { |
415 | // For all nodes, include any local deltas before the specified file |
416 | // index by summing them up directly. Keep track of how many were |
417 | // included. |
418 | unsigned NumValsGreater = 0; |
419 | for (unsigned e = Node->getNumValuesUsed(); NumValsGreater != e; |
420 | ++NumValsGreater) { |
421 | const SourceDelta &Val = Node->getValue(i: NumValsGreater); |
422 | |
423 | if (Val.FileLoc >= FileIndex) |
424 | break; |
425 | Result += Val.Delta; |
426 | } |
427 | |
428 | // If we have an interior node, include information about children and |
429 | // recurse. Otherwise, if we have a leaf, we're done. |
430 | const auto *IN = dyn_cast<DeltaTreeInteriorNode>(Val: Node); |
431 | if (!IN) return Result; |
432 | |
433 | // Include any children to the left of the values we skipped, all of |
434 | // their deltas should be included as well. |
435 | for (unsigned i = 0; i != NumValsGreater; ++i) |
436 | Result += IN->getChild(i)->getFullDelta(); |
437 | |
438 | // If we found exactly the value we were looking for, break off the |
439 | // search early. There is no need to search the RHS of the value for |
440 | // partial results. |
441 | if (NumValsGreater != Node->getNumValuesUsed() && |
442 | Node->getValue(i: NumValsGreater).FileLoc == FileIndex) |
443 | return Result+IN->getChild(i: NumValsGreater)->getFullDelta(); |
444 | |
445 | // Otherwise, traverse down the tree. The selected subtree may be |
446 | // partially included in the range. |
447 | Node = IN->getChild(i: NumValsGreater); |
448 | } |
449 | // NOT REACHED. |
450 | } |
451 | |
452 | /// AddDelta - When a change is made that shifts around the text buffer, |
453 | /// this method is used to record that info. It inserts a delta of 'Delta' |
454 | /// into the current DeltaTree at offset FileIndex. |
455 | void DeltaTree::AddDelta(unsigned FileIndex, int Delta) { |
456 | assert(Delta && "Adding a noop?" ); |
457 | DeltaTreeNode *MyRoot = getRoot(Root); |
458 | |
459 | DeltaTreeNode::InsertResult InsertRes; |
460 | if (MyRoot->DoInsertion(FileIndex, Delta, InsertRes: &InsertRes)) { |
461 | Root = new DeltaTreeInteriorNode(InsertRes); |
462 | #ifdef VERIFY_TREE |
463 | MyRoot = Root; |
464 | #endif |
465 | } |
466 | |
467 | #ifdef VERIFY_TREE |
468 | VerifyTree(MyRoot); |
469 | #endif |
470 | } |
471 | |