1//===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This file contains classes used to discover if for a particular value
9// its definition precedes and its uses follow a suspend block. This is
10// referred to as a suspend crossing value.
11//
12// Using the information discovered we form a Coroutine Frame structure to
13// contain those values. All uses of those values are replaced with appropriate
14// GEP + load from the coroutine frame. At the point of the definition we spill
15// the value into the coroutine frame.
16//===----------------------------------------------------------------------===//
17
18#include "CoroInternal.h"
19#include "llvm/ADT/ScopeExit.h"
20#include "llvm/ADT/SmallString.h"
21#include "llvm/Analysis/StackLifetime.h"
22#include "llvm/IR/DIBuilder.h"
23#include "llvm/IR/DebugInfo.h"
24#include "llvm/IR/Dominators.h"
25#include "llvm/IR/IRBuilder.h"
26#include "llvm/IR/InstIterator.h"
27#include "llvm/IR/IntrinsicInst.h"
28#include "llvm/IR/MDBuilder.h"
29#include "llvm/IR/Module.h"
30#include "llvm/Support/Compiler.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/OptimizedStructLayout.h"
33#include "llvm/Transforms/Coroutines/ABI.h"
34#include "llvm/Transforms/Coroutines/CoroInstr.h"
35#include "llvm/Transforms/Coroutines/MaterializationUtils.h"
36#include "llvm/Transforms/Coroutines/SpillUtils.h"
37#include "llvm/Transforms/Coroutines/SuspendCrossingInfo.h"
38#include "llvm/Transforms/Utils/BasicBlockUtils.h"
39#include "llvm/Transforms/Utils/Local.h"
40#include "llvm/Transforms/Utils/PromoteMemToReg.h"
41#include <algorithm>
42#include <optional>
43
44using namespace llvm;
45
46#define DEBUG_TYPE "coro-frame"
47
48namespace {
49class FrameTypeBuilder;
50// Mapping from the to-be-spilled value to all the users that need reload.
51struct FrameDataInfo {
52 // All the values (that are not allocas) that needs to be spilled to the
53 // frame.
54 coro::SpillInfo &Spills;
55 // Allocas contains all values defined as allocas that need to live in the
56 // frame.
57 SmallVectorImpl<coro::AllocaInfo> &Allocas;
58
59 FrameDataInfo(coro::SpillInfo &Spills,
60 SmallVectorImpl<coro::AllocaInfo> &Allocas)
61 : Spills(Spills), Allocas(Allocas) {}
62
63 SmallVector<Value *, 8> getAllDefs() const {
64 SmallVector<Value *, 8> Defs;
65 for (const auto &P : Spills)
66 Defs.push_back(Elt: P.first);
67 for (const auto &A : Allocas)
68 Defs.push_back(Elt: A.Alloca);
69 return Defs;
70 }
71
72 uint32_t getFieldIndex(Value *V) const {
73 auto Itr = FieldIndexMap.find(Val: V);
74 assert(Itr != FieldIndexMap.end() &&
75 "Value does not have a frame field index");
76 return Itr->second;
77 }
78
79 void setFieldIndex(Value *V, uint32_t Index) {
80 assert(FieldIndexMap.count(V) == 0 &&
81 "Cannot set the index for the same field twice.");
82 FieldIndexMap[V] = Index;
83 }
84
85 Align getAlign(Value *V) const {
86 auto Iter = FieldAlignMap.find(Val: V);
87 assert(Iter != FieldAlignMap.end());
88 return Iter->second;
89 }
90
91 void setAlign(Value *V, Align AL) {
92 assert(FieldAlignMap.count(V) == 0);
93 FieldAlignMap.insert(KV: {V, AL});
94 }
95
96 uint64_t getDynamicAlign(Value *V) const {
97 auto Iter = FieldDynamicAlignMap.find(Val: V);
98 assert(Iter != FieldDynamicAlignMap.end());
99 return Iter->second;
100 }
101
102 void setDynamicAlign(Value *V, uint64_t Align) {
103 assert(FieldDynamicAlignMap.count(V) == 0);
104 FieldDynamicAlignMap.insert(KV: {V, Align});
105 }
106
107 uint64_t getOffset(Value *V) const {
108 auto Iter = FieldOffsetMap.find(Val: V);
109 assert(Iter != FieldOffsetMap.end());
110 return Iter->second;
111 }
112
113 void setOffset(Value *V, uint64_t Offset) {
114 assert(FieldOffsetMap.count(V) == 0);
115 FieldOffsetMap.insert(KV: {V, Offset});
116 }
117
118 // Update field offset and alignment information from FrameTypeBuilder.
119 void updateLayoutInfo(FrameTypeBuilder &B);
120
121private:
122 // Map from values to their slot indexes on the frame (insertion order).
123 DenseMap<Value *, uint32_t> FieldIndexMap;
124 // Map from values to their alignment on the frame. They would be set after
125 // the frame is built.
126 DenseMap<Value *, Align> FieldAlignMap;
127 DenseMap<Value *, uint64_t> FieldDynamicAlignMap;
128 // Map from values to their offset on the frame. They would be set after
129 // the frame is built.
130 DenseMap<Value *, uint64_t> FieldOffsetMap;
131};
132} // namespace
133
134#ifndef NDEBUG
135static void dumpSpills(StringRef Title, const coro::SpillInfo &Spills) {
136 dbgs() << "------------- " << Title << " --------------\n";
137 for (const auto &E : Spills) {
138 E.first->dump();
139 dbgs() << " user: ";
140 for (auto *I : E.second)
141 I->dump();
142 }
143}
144
145static void dumpAllocas(const SmallVectorImpl<coro::AllocaInfo> &Allocas) {
146 dbgs() << "------------- Allocas --------------\n";
147 for (const auto &A : Allocas) {
148 A.Alloca->dump();
149 }
150}
151#endif
152
153namespace {
154using FieldIDType = size_t;
155// We cannot rely solely on natural alignment of a type when building a
156// coroutine frame and if the alignment specified on the Alloca instruction
157// differs from the natural alignment of the alloca type we will need to insert
158// padding.
159class FrameTypeBuilder {
160private:
161 struct Field {
162 uint64_t Size;
163 uint64_t Offset;
164 Align Alignment;
165 uint64_t DynamicAlignBuffer;
166 };
167
168 const DataLayout &DL;
169 uint64_t StructSize = 0;
170 Align StructAlign;
171 bool IsFinished = false;
172
173 std::optional<Align> MaxFrameAlignment;
174
175 SmallVector<Field, 8> Fields;
176 DenseMap<Value*, unsigned> FieldIndexByKey;
177
178public:
179 FrameTypeBuilder(const DataLayout &DL, std::optional<Align> MaxFrameAlignment)
180 : DL(DL), MaxFrameAlignment(MaxFrameAlignment) {}
181
182 /// Add a field to this structure for the storage of an `alloca`
183 /// instruction.
184 [[nodiscard]] FieldIDType addFieldForAlloca(AllocaInst *AI,
185 bool IsHeader = false) {
186 auto Size = AI->getAllocationSize(DL: AI->getDataLayout());
187 if (!Size || !Size->isFixed())
188 report_fatal_error(
189 reason: "Coroutines cannot handle non static or vscale allocas yet");
190 return addField(FieldSize: Size->getFixedValue(), FieldAlignment: AI->getAlign(), IsHeader);
191 }
192
193 /// We want to put the allocas whose lifetime-ranges are not overlapped
194 /// into one slot of coroutine frame.
195 /// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566
196 ///
197 /// cppcoro::task<void> alternative_paths(bool cond) {
198 /// if (cond) {
199 /// big_structure a;
200 /// process(a);
201 /// co_await something();
202 /// } else {
203 /// big_structure b;
204 /// process2(b);
205 /// co_await something();
206 /// }
207 /// }
208 ///
209 /// We want to put variable a and variable b in the same slot to
210 /// reduce the size of coroutine frame.
211 ///
212 /// This function use StackLifetime algorithm to partition the AllocaInsts in
213 /// Spills to non-overlapped sets in order to put Alloca in the same
214 /// non-overlapped set into the same slot in the Coroutine Frame. Then add
215 /// field for the allocas in the same non-overlapped set by using the largest
216 /// type as the field type.
217 ///
218 /// Side Effects: Because We sort the allocas, the order of allocas in the
219 /// frame may be different with the order in the source code.
220 void addFieldForAllocas(const Function &F, FrameDataInfo &FrameData,
221 coro::Shape &Shape, bool OptimizeFrame);
222
223 /// Add a field to this structure for a spill.
224 [[nodiscard]] FieldIDType addField(Type *Ty, MaybeAlign MaybeFieldAlignment,
225 bool IsHeader = false,
226 bool IsSpillOfValue = false) {
227 assert(Ty && "must provide a type for a field");
228 // The field size is the alloc size of the type.
229 uint64_t FieldSize = DL.getTypeAllocSize(Ty);
230 // The field alignment is usually the type alignment.
231 // But if we are spilling values we don't need to worry about ABI alignment
232 // concerns.
233 Align ABIAlign = DL.getABITypeAlign(Ty);
234 Align TyAlignment = ABIAlign;
235 if (IsSpillOfValue && MaxFrameAlignment && *MaxFrameAlignment < ABIAlign)
236 TyAlignment = *MaxFrameAlignment;
237 Align FieldAlignment = MaybeFieldAlignment.value_or(u&: TyAlignment);
238 return addField(FieldSize, FieldAlignment, IsHeader);
239 }
240
241 /// Add a field to this structure.
242 [[nodiscard]] FieldIDType addField(uint64_t FieldSize, Align FieldAlignment,
243 bool IsHeader = false) {
244 assert(!IsFinished && "adding fields to a finished builder");
245
246 // For an alloca with size=0, we don't need to add a field and they
247 // can just point to any index in the frame. Use index 0.
248 if (FieldSize == 0)
249 return 0;
250
251 // The field alignment could be bigger than the max frame case, in that case
252 // we request additional storage to be able to dynamically align the
253 // pointer.
254 uint64_t DynamicAlignBuffer = 0;
255 if (MaxFrameAlignment && (FieldAlignment > *MaxFrameAlignment)) {
256 DynamicAlignBuffer =
257 offsetToAlignment(Value: MaxFrameAlignment->value(), Alignment: FieldAlignment);
258 FieldAlignment = *MaxFrameAlignment;
259 FieldSize = FieldSize + DynamicAlignBuffer;
260 }
261
262 // Lay out header fields immediately.
263 uint64_t Offset;
264 if (IsHeader) {
265 Offset = alignTo(Size: StructSize, A: FieldAlignment);
266 StructSize = Offset + FieldSize;
267
268 // Everything else has a flexible offset.
269 } else {
270 Offset = OptimizedStructLayoutField::FlexibleOffset;
271 }
272
273 Fields.push_back(Elt: {.Size: FieldSize, .Offset: Offset, .Alignment: FieldAlignment, .DynamicAlignBuffer: DynamicAlignBuffer});
274 return Fields.size() - 1;
275 }
276
277 /// Finish the layout and compute final size and alignment.
278 void finish();
279
280 uint64_t getStructSize() const {
281 assert(IsFinished && "not yet finished!");
282 return StructSize;
283 }
284
285 Align getStructAlign() const {
286 assert(IsFinished && "not yet finished!");
287 return StructAlign;
288 }
289
290 Field getLayoutField(FieldIDType Id) const {
291 assert(IsFinished && "not yet finished!");
292 return Fields[Id];
293 }
294};
295} // namespace
296
297void FrameDataInfo::updateLayoutInfo(FrameTypeBuilder &B) {
298 auto Updater = [&](Value *I) {
299 uint32_t FieldIndex = getFieldIndex(V: I);
300 auto Field = B.getLayoutField(Id: FieldIndex);
301 setAlign(V: I, AL: Field.Alignment);
302 uint64_t dynamicAlign =
303 Field.DynamicAlignBuffer
304 ? Field.DynamicAlignBuffer + Field.Alignment.value()
305 : 0;
306 setDynamicAlign(V: I, Align: dynamicAlign);
307 setOffset(V: I, Offset: Field.Offset);
308 };
309 for (auto &S : Spills)
310 Updater(S.first);
311 for (const auto &A : Allocas)
312 Updater(A.Alloca);
313}
314
315void FrameTypeBuilder::addFieldForAllocas(const Function &F,
316 FrameDataInfo &FrameData,
317 coro::Shape &Shape,
318 bool OptimizeFrame) {
319 using AllocaSetType = SmallVector<AllocaInst *, 4>;
320 SmallVector<AllocaSetType, 4> NonOverlapedAllocas;
321
322 // We need to add field for allocas at the end of this function.
323 llvm::scope_exit AddFieldForAllocasAtExit([&]() {
324 for (auto AllocaList : NonOverlapedAllocas) {
325 auto *LargestAI = *AllocaList.begin();
326 FieldIDType Id = addFieldForAlloca(AI: LargestAI);
327 for (auto *Alloca : AllocaList)
328 FrameData.setFieldIndex(V: Alloca, Index: Id);
329 }
330 });
331
332 if (!OptimizeFrame) {
333 for (const auto &A : FrameData.Allocas) {
334 AllocaInst *Alloca = A.Alloca;
335 NonOverlapedAllocas.emplace_back(Args: AllocaSetType(1, Alloca));
336 }
337 return;
338 }
339
340 // Because there are paths from the lifetime.start to coro.end
341 // for each alloca, the liferanges for every alloca is overlaped
342 // in the blocks who contain coro.end and the successor blocks.
343 // So we choose to skip there blocks when we calculate the liferange
344 // for each alloca. It should be reasonable since there shouldn't be uses
345 // in these blocks and the coroutine frame shouldn't be used outside the
346 // coroutine body.
347 //
348 // Note that the user of coro.suspend may not be SwitchInst. However, this
349 // case seems too complex to handle. And it is harmless to skip these
350 // patterns since it just prevend putting the allocas to live in the same
351 // slot.
352 DenseMap<SwitchInst *, BasicBlock *> DefaultSuspendDest;
353 for (auto *CoroSuspendInst : Shape.CoroSuspends) {
354 for (auto *U : CoroSuspendInst->users()) {
355 if (auto *ConstSWI = dyn_cast<SwitchInst>(Val: U)) {
356 auto *SWI = const_cast<SwitchInst *>(ConstSWI);
357 DefaultSuspendDest[SWI] = SWI->getDefaultDest();
358 SWI->setDefaultDest(SWI->getSuccessor(idx: 1));
359 }
360 }
361 }
362
363 auto ExtractAllocas = [&]() {
364 AllocaSetType Allocas;
365 Allocas.reserve(N: FrameData.Allocas.size());
366 for (const auto &A : FrameData.Allocas)
367 Allocas.push_back(Elt: A.Alloca);
368 return Allocas;
369 };
370 StackLifetime StackLifetimeAnalyzer(F, ExtractAllocas(),
371 StackLifetime::LivenessType::May);
372 StackLifetimeAnalyzer.run();
373 auto DoAllocasInterfere = [&](const AllocaInst *AI1, const AllocaInst *AI2) {
374 return StackLifetimeAnalyzer.getLiveRange(AI: AI1).overlaps(
375 Other: StackLifetimeAnalyzer.getLiveRange(AI: AI2));
376 };
377 auto GetAllocaSize = [&](const coro::AllocaInfo &A) {
378 std::optional<TypeSize> RetSize = A.Alloca->getAllocationSize(DL);
379 assert(RetSize && "Variable Length Arrays (VLA) are not supported.\n");
380 assert(!RetSize->isScalable() && "Scalable vectors are not yet supported");
381 return RetSize->getFixedValue();
382 };
383 // Put larger allocas in the front. So the larger allocas have higher
384 // priority to merge, which can save more space potentially. Also each
385 // AllocaSet would be ordered. So we can get the largest Alloca in one
386 // AllocaSet easily.
387 sort(C&: FrameData.Allocas, Comp: [&](const auto &Iter1, const auto &Iter2) {
388 return GetAllocaSize(Iter1) > GetAllocaSize(Iter2);
389 });
390 for (const auto &A : FrameData.Allocas) {
391 AllocaInst *Alloca = A.Alloca;
392 bool Merged = false;
393 // Try to find if the Alloca does not interfere with any existing
394 // NonOverlappedAllocaSet. If it is true, insert the alloca to that
395 // NonOverlappedAllocaSet.
396 for (auto &AllocaSet : NonOverlapedAllocas) {
397 assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n");
398 bool NoInterference = none_of(Range&: AllocaSet, P: [&](auto Iter) {
399 return DoAllocasInterfere(Alloca, Iter);
400 });
401 // If the alignment of A is multiple of the alignment of B, the address
402 // of A should satisfy the requirement for aligning for B.
403 //
404 // There may be other more fine-grained strategies to handle the alignment
405 // infomation during the merging process. But it seems hard to handle
406 // these strategies and benefit little.
407 bool Alignable = [&]() -> bool {
408 auto *LargestAlloca = *AllocaSet.begin();
409 return LargestAlloca->getAlign().value() % Alloca->getAlign().value() ==
410 0;
411 }();
412 bool CouldMerge = NoInterference && Alignable;
413 if (!CouldMerge)
414 continue;
415 AllocaSet.push_back(Elt: Alloca);
416 Merged = true;
417 break;
418 }
419 if (!Merged) {
420 NonOverlapedAllocas.emplace_back(Args: AllocaSetType(1, Alloca));
421 }
422 }
423 // Recover the default target destination for each Switch statement
424 // reserved.
425 for (auto SwitchAndDefaultDest : DefaultSuspendDest) {
426 SwitchInst *SWI = SwitchAndDefaultDest.first;
427 BasicBlock *DestBB = SwitchAndDefaultDest.second;
428 SWI->setDefaultDest(DestBB);
429 }
430 // This Debug Info could tell us which allocas are merged into one slot.
431 LLVM_DEBUG(for (auto &AllocaSet
432 : NonOverlapedAllocas) {
433 if (AllocaSet.size() > 1) {
434 dbgs() << "In Function:" << F.getName() << "\n";
435 dbgs() << "Find Union Set "
436 << "\n";
437 dbgs() << "\tAllocas are \n";
438 for (auto Alloca : AllocaSet)
439 dbgs() << "\t\t" << *Alloca << "\n";
440 }
441 });
442}
443
444void FrameTypeBuilder::finish() {
445 assert(!IsFinished && "already finished!");
446
447 // Prepare the optimal-layout field array.
448 // The Id in the layout field is a pointer to our Field for it.
449 SmallVector<OptimizedStructLayoutField, 8> LayoutFields;
450 LayoutFields.reserve(N: Fields.size());
451 for (auto &Field : Fields) {
452 LayoutFields.emplace_back(Args: &Field, Args&: Field.Size, Args&: Field.Alignment,
453 Args&: Field.Offset);
454 }
455
456 // Perform layout to compute size, alignment, and field offsets.
457 auto SizeAndAlign = performOptimizedStructLayout(Fields: LayoutFields);
458 StructSize = SizeAndAlign.first;
459 StructAlign = SizeAndAlign.second;
460
461 auto getField = [](const OptimizedStructLayoutField &LayoutField) -> Field & {
462 return *static_cast<Field *>(const_cast<void*>(LayoutField.Id));
463 };
464
465 // Update field offsets from the computed layout.
466 for (auto &LayoutField : LayoutFields) {
467 auto &F = getField(LayoutField);
468 F.Offset = LayoutField.Offset;
469 }
470
471 IsFinished = true;
472}
473
474static void cacheDIVar(FrameDataInfo &FrameData,
475 DenseMap<Value *, DILocalVariable *> &DIVarCache) {
476 for (auto *V : FrameData.getAllDefs()) {
477 if (DIVarCache.contains(Val: V))
478 continue;
479
480 auto CacheIt = [&DIVarCache, V](const auto &Container) {
481 auto *I = llvm::find_if(Container, [](auto *DDI) {
482 return DDI->getExpression()->getNumElements() == 0;
483 });
484 if (I != Container.end())
485 DIVarCache.insert({V, (*I)->getVariable()});
486 };
487 CacheIt(findDVRDeclares(V));
488 CacheIt(findDVRDeclareValues(V));
489 }
490}
491
492/// Create name for Type. It uses MDString to store new created string to
493/// avoid memory leak.
494static StringRef solveTypeName(Type *Ty) {
495 if (Ty->isIntegerTy()) {
496 // The longest name in common may be '__int_128', which has 9 bits.
497 SmallString<16> Buffer;
498 raw_svector_ostream OS(Buffer);
499 OS << "__int_" << cast<IntegerType>(Val: Ty)->getBitWidth();
500 auto *MDName = MDString::get(Context&: Ty->getContext(), Str: OS.str());
501 return MDName->getString();
502 }
503
504 if (Ty->isFloatingPointTy()) {
505 if (Ty->isFloatTy())
506 return "__float_";
507 if (Ty->isDoubleTy())
508 return "__double_";
509 return "__floating_type_";
510 }
511
512 if (Ty->isPointerTy())
513 return "PointerType";
514
515 if (Ty->isStructTy()) {
516 if (!cast<StructType>(Val: Ty)->hasName())
517 return "__LiteralStructType_";
518
519 auto Name = Ty->getStructName();
520
521 SmallString<16> Buffer(Name);
522 for (auto &Iter : Buffer)
523 if (Iter == '.' || Iter == ':')
524 Iter = '_';
525 auto *MDName = MDString::get(Context&: Ty->getContext(), Str: Buffer.str());
526 return MDName->getString();
527 }
528
529 return "UnknownType";
530}
531
532static DIType *solveDIType(DIBuilder &Builder, Type *Ty,
533 const DataLayout &Layout, DIScope *Scope,
534 unsigned LineNum,
535 DenseMap<Type *, DIType *> &DITypeCache) {
536 if (DIType *DT = DITypeCache.lookup(Val: Ty))
537 return DT;
538
539 StringRef Name = solveTypeName(Ty);
540
541 DIType *RetType = nullptr;
542
543 if (Ty->isIntegerTy()) {
544 auto BitWidth = cast<IntegerType>(Val: Ty)->getBitWidth();
545 RetType = Builder.createBasicType(Name, SizeInBits: BitWidth, Encoding: dwarf::DW_ATE_signed,
546 Flags: llvm::DINode::FlagArtificial);
547 } else if (Ty->isFloatingPointTy()) {
548 RetType = Builder.createBasicType(Name, SizeInBits: Layout.getTypeSizeInBits(Ty),
549 Encoding: dwarf::DW_ATE_float,
550 Flags: llvm::DINode::FlagArtificial);
551 } else if (Ty->isPointerTy()) {
552 // Construct PointerType points to null (aka void *) instead of exploring
553 // pointee type to avoid infinite search problem. For example, we would be
554 // in trouble if we traverse recursively:
555 //
556 // struct Node {
557 // Node* ptr;
558 // };
559 RetType =
560 Builder.createPointerType(PointeeTy: nullptr, SizeInBits: Layout.getTypeSizeInBits(Ty),
561 AlignInBits: Layout.getABITypeAlign(Ty).value() * CHAR_BIT,
562 /*DWARFAddressSpace=*/std::nullopt, Name);
563 } else if (Ty->isStructTy()) {
564 auto *DIStruct = Builder.createStructType(
565 Scope, Name, File: Scope->getFile(), LineNumber: LineNum, SizeInBits: Layout.getTypeSizeInBits(Ty),
566 AlignInBits: Layout.getPrefTypeAlign(Ty).value() * CHAR_BIT,
567 Flags: llvm::DINode::FlagArtificial, DerivedFrom: nullptr, Elements: llvm::DINodeArray());
568
569 auto *StructTy = cast<StructType>(Val: Ty);
570 SmallVector<Metadata *, 16> Elements;
571 for (unsigned I = 0; I < StructTy->getNumElements(); I++) {
572 DIType *DITy = solveDIType(Builder, Ty: StructTy->getElementType(N: I), Layout,
573 Scope: DIStruct, LineNum, DITypeCache);
574 assert(DITy);
575 Elements.push_back(Elt: Builder.createMemberType(
576 Scope: DIStruct, Name: DITy->getName(), File: DIStruct->getFile(), LineNo: LineNum,
577 SizeInBits: DITy->getSizeInBits(), AlignInBits: DITy->getAlignInBits(),
578 OffsetInBits: Layout.getStructLayout(Ty: StructTy)->getElementOffsetInBits(Idx: I),
579 Flags: llvm::DINode::FlagArtificial, Ty: DITy));
580 }
581
582 Builder.replaceArrays(T&: DIStruct, Elements: Builder.getOrCreateArray(Elements));
583
584 RetType = DIStruct;
585 } else {
586 LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty << "\n");
587 TypeSize Size = Layout.getTypeSizeInBits(Ty);
588 auto *CharSizeType = Builder.createBasicType(
589 Name, SizeInBits: 8, Encoding: dwarf::DW_ATE_unsigned_char, Flags: llvm::DINode::FlagArtificial);
590
591 if (Size <= 8)
592 RetType = CharSizeType;
593 else {
594 if (Size % 8 != 0)
595 Size = TypeSize::getFixed(ExactSize: Size + 8 - (Size % 8));
596
597 RetType = Builder.createArrayType(
598 Size, AlignInBits: Layout.getPrefTypeAlign(Ty).value(), Ty: CharSizeType,
599 Subscripts: Builder.getOrCreateArray(Elements: Builder.getOrCreateSubrange(Lo: 0, Count: Size / 8)));
600 }
601 }
602
603 DITypeCache.insert(KV: {Ty, RetType});
604 return RetType;
605}
606
607/// Build artificial debug info for C++ coroutine frames to allow users to
608/// inspect the contents of the frame directly
609///
610/// Create Debug information for coroutine frame with debug name "__coro_frame".
611/// The debug information for the fields of coroutine frame is constructed from
612/// the following way:
613/// 1. For all the value in the Frame, we search the use of dbg.declare to find
614/// the corresponding debug variables for the value. If we can find the
615/// debug variable, we can get full and accurate debug information.
616/// 2. If we can't get debug information in step 1 and 2, we could only try to
617/// build the DIType by Type. We did this in solveDIType. We only handle
618/// integer, float, double, integer type and struct type for now.
619static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
620 FrameDataInfo &FrameData) {
621 DISubprogram *DIS = F.getSubprogram();
622 // If there is no DISubprogram for F, it implies the function is compiled
623 // without debug info. So we also don't generate debug info for the frame.
624
625 if (!DIS || !DIS->getUnit())
626 return;
627
628 if (!dwarf::isCPlusPlus(S: static_cast<llvm::dwarf::SourceLanguage>(
629 DIS->getUnit()->getSourceLanguage().getUnversionedName())) ||
630 DIS->getUnit()->getEmissionKind() !=
631 DICompileUnit::DebugEmissionKind::FullDebug)
632 return;
633
634 assert(Shape.ABI == coro::ABI::Switch &&
635 "We could only build debug infomation for C++ coroutine now.\n");
636
637 DIBuilder DBuilder(*F.getParent(), /*AllowUnresolved*/ false);
638
639 DIFile *DFile = DIS->getFile();
640 unsigned LineNum = DIS->getLine();
641
642 DICompositeType *FrameDITy = DBuilder.createStructType(
643 Scope: DIS->getUnit(), Name: Twine(F.getName() + ".coro_frame_ty").str(), File: DFile,
644 LineNumber: LineNum, SizeInBits: Shape.FrameSize * 8, AlignInBits: Shape.FrameAlign.value() * 8,
645 Flags: llvm::DINode::FlagArtificial, DerivedFrom: nullptr, Elements: llvm::DINodeArray());
646 SmallVector<Metadata *, 16> Elements;
647 DataLayout Layout = F.getDataLayout();
648
649 DenseMap<Value *, DILocalVariable *> DIVarCache;
650 cacheDIVar(FrameData, DIVarCache);
651
652 // This counter is used to avoid same type names. e.g., there would be
653 // many i32 and i64 types in one coroutine. And we would use i32_0 and
654 // i32_1 to avoid the same type. Since it makes no sense the name of the
655 // fields confilicts with each other.
656 unsigned UnknownTypeNum = 0;
657 DenseMap<Type *, DIType *> DITypeCache;
658
659 auto addElement = [&](StringRef Name, uint64_t SizeInBits, uint64_t Alignment,
660 uint64_t Offset, DIType *DITy) {
661 Elements.push_back(Elt: DBuilder.createMemberType(
662 Scope: FrameDITy, Name, File: DFile, LineNo: LineNum, SizeInBits, AlignInBits: Alignment, OffsetInBits: Offset * 8,
663 Flags: llvm::DINode::FlagArtificial, Ty: DITy));
664 };
665
666 auto addDIDef = [&](Value *V) {
667 // Get the offset and alignment for this value.
668 uint64_t Offset = FrameData.getOffset(V);
669 Align Alignment = FrameData.getAlign(V);
670
671 std::string Name;
672 uint64_t SizeInBits;
673 DIType *DITy = nullptr;
674
675 auto It = DIVarCache.find(Val: V);
676 if (It != DIVarCache.end()) {
677 // Get the type from the debug variable.
678 Name = It->second->getName().str();
679 DITy = It->second->getType();
680 } else {
681 if (auto AI = dyn_cast<AllocaInst>(Val: V)) {
682 // Frame alloca
683 DITy = solveDIType(Builder&: DBuilder, Ty: AI->getAllocatedType(), Layout, Scope: FrameDITy,
684 LineNum, DITypeCache);
685 } else {
686 // Spill
687 DITy = solveDIType(Builder&: DBuilder, Ty: V->getType(), Layout, Scope: FrameDITy, LineNum,
688 DITypeCache);
689 }
690 assert(DITy && "SolveDIType shouldn't return nullptr.\n");
691 Name = DITy->getName().str();
692 Name += "_" + std::to_string(val: UnknownTypeNum);
693 UnknownTypeNum++;
694 }
695
696 if (auto AI = dyn_cast<AllocaInst>(Val: V)) {
697 // Lookup the total size of this alloca originally
698 auto Size = AI->getAllocationSize(DL: Layout);
699 assert(Size && Size->isFixed() &&
700 "unreachable due to addFieldForAlloca checks");
701 SizeInBits = Size->getFixedValue() * 8;
702 } else {
703 // Compute the size of the active data of this member for this spill
704 SizeInBits = Layout.getTypeSizeInBits(Ty: V->getType());
705 }
706
707 addElement(Name, SizeInBits, Alignment.value() * 8, Offset, DITy);
708 };
709
710 // For Switch ABI, add debug info for the added fields (resume, destroy).
711 if (Shape.ABI == coro::ABI::Switch) {
712 auto *FnPtrTy = Shape.getSwitchResumePointerType();
713 uint64_t PtrSize = Layout.getPointerSizeInBits(AS: FnPtrTy->getAddressSpace());
714 uint64_t PtrAlign =
715 Layout.getPointerABIAlignment(AS: FnPtrTy->getAddressSpace()).value() * 8;
716 auto *DIPtr = DBuilder.createPointerType(PointeeTy: nullptr, SizeInBits: PtrSize,
717 AlignInBits: FnPtrTy->getAddressSpace());
718 addElement("__resume_fn", PtrSize, PtrAlign, 0, DIPtr);
719 addElement("__destroy_fn", PtrSize, PtrAlign,
720 Shape.SwitchLowering.DestroyOffset, DIPtr);
721 uint64_t IndexSize =
722 Layout.getTypeSizeInBits(Ty: Shape.getIndexType()).getFixedValue();
723 addElement("__coro_index", IndexSize, Shape.SwitchLowering.IndexAlign * 8,
724 Shape.SwitchLowering.IndexOffset,
725 DBuilder.createBasicType(Name: "__coro_index",
726 SizeInBits: (IndexSize < 8) ? 8 : IndexSize,
727 Encoding: dwarf::DW_ATE_unsigned_char));
728 }
729 auto Defs = FrameData.getAllDefs();
730 for (auto *V : Defs)
731 addDIDef(V);
732
733 DBuilder.replaceArrays(T&: FrameDITy, Elements: DBuilder.getOrCreateArray(Elements));
734
735 auto *FrameDIVar =
736 DBuilder.createAutoVariable(Scope: DIS, Name: "__coro_frame", File: DFile, LineNo: LineNum,
737 Ty: FrameDITy, AlwaysPreserve: true, Flags: DINode::FlagArtificial);
738
739 // Subprogram would have ContainedNodes field which records the debug
740 // variables it contained. So we need to add __coro_frame to the
741 // ContainedNodes of it.
742 //
743 // If we don't add __coro_frame to the RetainedNodes, user may get
744 // `no symbol __coro_frame in context` rather than `__coro_frame`
745 // is optimized out, which is more precise.
746 auto RetainedNodes = DIS->getRetainedNodes();
747 SmallVector<Metadata *, 32> RetainedNodesVec(RetainedNodes.begin(),
748 RetainedNodes.end());
749 RetainedNodesVec.push_back(Elt: FrameDIVar);
750 DIS->replaceOperandWith(I: 7, New: (MDTuple::get(Context&: F.getContext(), MDs: RetainedNodesVec)));
751
752 // Construct the location for the frame debug variable. The column number
753 // is fake but it should be fine.
754 DILocation *DILoc =
755 DILocation::get(Context&: DIS->getContext(), Line: LineNum, /*Column=*/1, Scope: DIS);
756 assert(FrameDIVar->isValidLocationForIntrinsic(DILoc));
757
758 DbgVariableRecord *NewDVR =
759 new DbgVariableRecord(ValueAsMetadata::get(V: Shape.FramePtr), FrameDIVar,
760 DBuilder.createExpression(), DILoc,
761 DbgVariableRecord::LocationType::Declare);
762 BasicBlock::iterator It = Shape.getInsertPtAfterFramePtr();
763 It->getParent()->insertDbgRecordBefore(DR: NewDVR, Here: It);
764}
765
766// If there is memory accessing to promise alloca before CoroBegin
767static bool hasAccessingPromiseBeforeCB(const DominatorTree &DT,
768 coro::Shape &Shape) {
769 auto *PA = Shape.SwitchLowering.PromiseAlloca;
770 return llvm::any_of(Range: PA->uses(), P: [&](Use &U) {
771 auto *Inst = dyn_cast<Instruction>(Val: U.getUser());
772 if (!Inst || DT.dominates(Def: Shape.CoroBegin, User: Inst))
773 return false;
774
775 if (auto *CI = dyn_cast<CallInst>(Val: Inst)) {
776 // It is fine if the call wouldn't write to the Promise.
777 // This is possible for @llvm.coro.id intrinsics, which
778 // would take the promise as the second argument as a
779 // marker.
780 if (CI->onlyReadsMemory() || CI->onlyReadsMemory(OpNo: CI->getArgOperandNo(U: &U)))
781 return false;
782 return true;
783 }
784
785 return isa<StoreInst>(Val: Inst) ||
786 // It may take too much time to track the uses.
787 // Be conservative about the case the use may escape.
788 isa<GetElementPtrInst>(Val: Inst) ||
789 // There would always be a bitcast for the promise alloca
790 // before we enabled Opaque pointers. And now given
791 // opaque pointers are enabled by default. This should be
792 // fine.
793 isa<BitCastInst>(Val: Inst);
794 });
795}
796// Build the coroutine frame type as a byte array.
797// The frame layout includes:
798// - Resume function pointer at offset 0 (Switch ABI only)
799// - Destroy function pointer at offset ptrsize (Switch ABI only)
800// - Promise alloca (Switch ABI only, only if present)
801// - Suspend/Resume index
802// - Spilled values and allocas
803static void buildFrameLayout(Function &F, const DominatorTree &DT,
804 coro::Shape &Shape, FrameDataInfo &FrameData,
805 bool OptimizeFrame) {
806 const DataLayout &DL = F.getDataLayout();
807
808 // We will use this value to cap the alignment of spilled values.
809 std::optional<Align> MaxFrameAlignment;
810 if (Shape.ABI == coro::ABI::Async)
811 MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment();
812 FrameTypeBuilder B(DL, MaxFrameAlignment);
813
814 AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
815 std::optional<FieldIDType> SwitchIndexFieldId;
816 IntegerType *SwitchIndexType = nullptr;
817
818 if (Shape.ABI == coro::ABI::Switch) {
819 auto *FnPtrTy = Shape.getSwitchResumePointerType();
820
821 // Add header fields for the resume and destroy functions.
822 // We can rely on these being perfectly packed.
823 (void)B.addField(Ty: FnPtrTy, MaybeFieldAlignment: MaybeAlign(), /*header*/ IsHeader: true);
824 (void)B.addField(Ty: FnPtrTy, MaybeFieldAlignment: MaybeAlign(), /*header*/ IsHeader: true);
825
826 // PromiseAlloca field needs to be explicitly added here because it's
827 // a header field with a fixed offset based on its alignment. Hence it
828 // needs special handling.
829 if (PromiseAlloca)
830 FrameData.setFieldIndex(
831 V: PromiseAlloca, Index: B.addFieldForAlloca(AI: PromiseAlloca, /*header*/ IsHeader: true));
832
833 // Add a field to store the suspend index. This doesn't need to
834 // be in the header.
835 unsigned IndexBits = std::max(a: 1U, b: Log2_64_Ceil(Value: Shape.CoroSuspends.size()));
836 SwitchIndexType = Type::getIntNTy(C&: F.getContext(), N: IndexBits);
837
838 SwitchIndexFieldId = B.addField(Ty: SwitchIndexType, MaybeFieldAlignment: MaybeAlign());
839 } else {
840 assert(PromiseAlloca == nullptr && "lowering doesn't support promises");
841 }
842
843 // Because multiple allocas may own the same field slot,
844 // we add allocas to field here.
845 B.addFieldForAllocas(F, FrameData, Shape, OptimizeFrame);
846 // Add PromiseAlloca to Allocas list so that
847 // 1. updateLayoutIndex could update its index after
848 // `performOptimizedStructLayout`
849 // 2. it is processed in insertSpills.
850 if (Shape.ABI == coro::ABI::Switch && PromiseAlloca) {
851 // We assume that no alias will be create before CoroBegin.
852 FrameData.Allocas.emplace_back(
853 Args&: PromiseAlloca, Args: DenseMap<Instruction *, std::optional<APInt>>{},
854 Args: hasAccessingPromiseBeforeCB(DT, Shape));
855 }
856 // Create an entry for every spilled value.
857 for (auto &S : FrameData.Spills) {
858 Type *FieldType = S.first->getType();
859 MaybeAlign MA;
860 // For byval arguments, we need to store the pointed value in the frame,
861 // instead of the pointer itself.
862 if (const Argument *A = dyn_cast<Argument>(Val: S.first)) {
863 if (A->hasByValAttr()) {
864 FieldType = A->getParamByValType();
865 MA = A->getParamAlign();
866 }
867 }
868 FieldIDType Id =
869 B.addField(Ty: FieldType, MaybeFieldAlignment: MA, IsHeader: false /*header*/, IsSpillOfValue: true /*IsSpillOfValue*/);
870 FrameData.setFieldIndex(V: S.first, Index: Id);
871 }
872
873 B.finish();
874
875 FrameData.updateLayoutInfo(B);
876 Shape.FrameAlign = B.getStructAlign();
877 Shape.FrameSize = B.getStructSize();
878
879 switch (Shape.ABI) {
880 case coro::ABI::Switch: {
881 // In the switch ABI, remember the function pointer and index field info.
882 // Resume and Destroy function pointers are in the frame header.
883 const DataLayout &DL = F.getDataLayout();
884 Shape.SwitchLowering.DestroyOffset = DL.getPointerSize();
885
886 auto IndexField = B.getLayoutField(Id: *SwitchIndexFieldId);
887 Shape.SwitchLowering.IndexType = SwitchIndexType;
888 Shape.SwitchLowering.IndexAlign = IndexField.Alignment.value();
889 Shape.SwitchLowering.IndexOffset = IndexField.Offset;
890
891 // Also round the frame size up to a multiple of its alignment, as is
892 // generally expected in C/C++.
893 Shape.FrameSize = alignTo(Size: Shape.FrameSize, A: Shape.FrameAlign);
894 break;
895 }
896
897 // In the retcon ABI, remember whether the frame is inline in the storage.
898 case coro::ABI::Retcon:
899 case coro::ABI::RetconOnce: {
900 auto Id = Shape.getRetconCoroId();
901 Shape.RetconLowering.IsFrameInlineInStorage
902 = (B.getStructSize() <= Id->getStorageSize() &&
903 B.getStructAlign() <= Id->getStorageAlignment());
904 break;
905 }
906 case coro::ABI::Async: {
907 Shape.AsyncLowering.FrameOffset =
908 alignTo(Size: Shape.AsyncLowering.ContextHeaderSize, A: Shape.FrameAlign);
909 // Also make the final context size a multiple of the context alignment to
910 // make allocation easier for allocators.
911 Shape.AsyncLowering.ContextSize =
912 alignTo(Size: Shape.AsyncLowering.FrameOffset + Shape.FrameSize,
913 A: Shape.AsyncLowering.getContextAlignment());
914 if (Shape.AsyncLowering.getContextAlignment() < Shape.FrameAlign) {
915 report_fatal_error(
916 reason: "The alignment requirment of frame variables cannot be higher than "
917 "the alignment of the async function context");
918 }
919 break;
920 }
921 }
922}
923
924/// If MaybeArgument is a byval Argument, return its byval type. Also removes
925/// the captures attribute, so that the argument *value* may be stored directly
926/// on the coroutine frame.
927static Type *extractByvalIfArgument(Value *MaybeArgument) {
928 if (auto *Arg = dyn_cast<Argument>(Val: MaybeArgument)) {
929 Arg->getParent()->removeParamAttr(ArgNo: Arg->getArgNo(), Kind: Attribute::Captures);
930
931 if (Arg->hasByValAttr())
932 return Arg->getParamByValType();
933 }
934 return nullptr;
935}
936
937/// Store Def into the coroutine frame.
938static void createStoreIntoFrame(IRBuilder<> &Builder, Value *Def,
939 Type *ByValTy, const coro::Shape &Shape,
940 const FrameDataInfo &FrameData) {
941 LLVMContext &Ctx = Shape.CoroBegin->getContext();
942 uint64_t Offset = FrameData.getOffset(V: Def);
943
944 Value *G = Shape.FramePtr;
945 if (Offset != 0) {
946 auto *OffsetVal = ConstantInt::get(Ty: Type::getInt64Ty(C&: Ctx), V: Offset);
947 G = Builder.CreateInBoundsPtrAdd(Ptr: G, Offset: OffsetVal,
948 Name: Def->getName() + Twine(".spill.addr"));
949 }
950 auto SpillAlignment = Align(FrameData.getAlign(V: Def));
951
952 // For byval arguments, copy the pointed-to value to the frame.
953 if (ByValTy) {
954 auto &DL = Builder.GetInsertBlock()->getDataLayout();
955 auto Size = DL.getTypeStoreSize(Ty: ByValTy);
956 // Def is a pointer to the byval argument
957 Builder.CreateMemCpy(Dst: G, DstAlign: SpillAlignment, Src: Def, SrcAlign: SpillAlignment, Size);
958 } else {
959 Builder.CreateAlignedStore(Val: Def, Ptr: G, Align: SpillAlignment);
960 }
961}
962
963/// Returns a pointer into the coroutine frame at the offset where Orig is
964/// located.
965static Value *createGEPToFramePointer(const FrameDataInfo &FrameData,
966 IRBuilder<> &Builder, coro::Shape &Shape,
967 Value *Orig) {
968 LLVMContext &Ctx = Shape.CoroBegin->getContext();
969 uint64_t Offset = FrameData.getOffset(V: Orig);
970 auto *OffsetVal = ConstantInt::get(Ty: Type::getInt64Ty(C&: Ctx), V: Offset);
971 Value *Ptr = Builder.CreateInBoundsPtrAdd(Ptr: Shape.FramePtr, Offset: OffsetVal);
972
973 if (auto *AI = dyn_cast<AllocaInst>(Val: Orig)) {
974 if (FrameData.getDynamicAlign(V: Orig) != 0) {
975 assert(FrameData.getDynamicAlign(Orig) == AI->getAlign().value());
976 auto *M = AI->getModule();
977 auto *IntPtrTy = M->getDataLayout().getIntPtrType(AI->getType());
978 auto *PtrValue = Builder.CreatePtrToInt(V: Ptr, DestTy: IntPtrTy);
979 auto *AlignMask = ConstantInt::get(Ty: IntPtrTy, V: AI->getAlign().value() - 1);
980 PtrValue = Builder.CreateAdd(LHS: PtrValue, RHS: AlignMask);
981 PtrValue = Builder.CreateAnd(LHS: PtrValue, RHS: Builder.CreateNot(V: AlignMask));
982 return Builder.CreateIntToPtr(V: PtrValue, DestTy: AI->getType());
983 }
984 // If the type of Ptr is not equal to the type of AllocaInst, it implies
985 // that the AllocaInst may be reused in the Frame slot of other AllocaInst.
986 // Note: If the strategy dealing with alignment changes, this cast must be
987 // refined
988 if (Ptr->getType() != Orig->getType())
989 Ptr = Builder.CreateAddrSpaceCast(V: Ptr, DestTy: Orig->getType(),
990 Name: Orig->getName() + Twine(".cast"));
991 }
992 return Ptr;
993}
994
995/// Find dbg.declare or dbg.declare_value records referencing `Def`. If none are
996/// found, walk up the load chain to find one.
997template <DbgVariableRecord::LocationType record_type>
998static TinyPtrVector<DbgVariableRecord *>
999findDbgRecordsThroughLoads(Function &F, Value *Def) {
1000 static_assert(record_type == DbgVariableRecord::LocationType::Declare ||
1001 record_type == DbgVariableRecord::LocationType::DeclareValue);
1002 constexpr auto FindFunc =
1003 record_type == DbgVariableRecord::LocationType::Declare
1004 ? findDVRDeclares
1005 : findDVRDeclareValues;
1006
1007 TinyPtrVector<DbgVariableRecord *> Records = FindFunc(Def);
1008
1009 if (!F.getSubprogram())
1010 return Records;
1011
1012 Value *CurDef = Def;
1013 while (Records.empty() && isa<LoadInst>(Val: CurDef)) {
1014 auto *LdInst = cast<LoadInst>(Val: CurDef);
1015 if (!LdInst->getType()->isPointerTy())
1016 break;
1017 CurDef = LdInst->getPointerOperand();
1018 if (!isa<AllocaInst, LoadInst>(Val: CurDef))
1019 break;
1020 Records = FindFunc(CurDef);
1021 }
1022
1023 return Records;
1024}
1025
1026// Helper function to handle allocas that may be accessed before CoroBegin.
1027// This creates a memcpy from the original alloca to the coroutine frame after
1028// CoroBegin, ensuring the frame has the correct initial values.
1029static void handleAccessBeforeCoroBegin(const FrameDataInfo &FrameData,
1030 coro::Shape &Shape,
1031 IRBuilder<> &Builder,
1032 AllocaInst *Alloca) {
1033 Value *Size = Builder.CreateAllocationSize(DestTy: Builder.getInt64Ty(), AI: Alloca);
1034 auto *G = createGEPToFramePointer(FrameData, Builder, Shape, Orig: Alloca);
1035 Builder.CreateMemCpy(Dst: G, DstAlign: FrameData.getAlign(V: Alloca), Src: Alloca,
1036 SrcAlign: Alloca->getAlign(), Size);
1037}
1038
1039// Replace all alloca and SSA values that are accessed across suspend points
1040// with GetElementPointer from coroutine frame + loads and stores. Create an
1041// AllocaSpillBB that will become the new entry block for the resume parts of
1042// the coroutine:
1043//
1044// %hdl = coro.begin(...)
1045// whatever
1046//
1047// becomes:
1048//
1049// %hdl = coro.begin(...)
1050// br label %AllocaSpillBB
1051//
1052// AllocaSpillBB:
1053// ; geps corresponding to allocas that were moved to coroutine frame
1054// br label PostSpill
1055//
1056// PostSpill:
1057// whatever
1058//
1059//
1060static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
1061 LLVMContext &C = Shape.CoroBegin->getContext();
1062 Function *F = Shape.CoroBegin->getFunction();
1063 IRBuilder<> Builder(C);
1064 DominatorTree DT(*F);
1065 SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap;
1066
1067 MDBuilder MDB(C);
1068 // Create a TBAA tag for accesses to certain coroutine frame slots, so that
1069 // subsequent alias analysis will understand they do not intersect with
1070 // user memory.
1071 // We do this only if a suitable TBAA root already exists in the module.
1072 MDNode *TBAATag = nullptr;
1073 if (auto *CppTBAAStr = MDString::getIfExists(Context&: C, Str: "Simple C++ TBAA")) {
1074 auto *TBAARoot = MDNode::getIfExists(Context&: C, MDs: CppTBAAStr);
1075 // Create a "fake" scalar type; all other types defined in the source
1076 // language will be assumed non-aliasing with this type.
1077 MDNode *Scalar = MDB.createTBAAScalarTypeNode(
1078 Name: (F->getName() + ".Frame Slot").str(), Parent: TBAARoot);
1079 TBAATag = MDB.createTBAAStructTagNode(BaseType: Scalar, AccessType: Scalar, Offset: 0);
1080 }
1081 for (auto const &E : FrameData.Spills) {
1082 Value *Def = E.first;
1083 Type *ByValTy = extractByvalIfArgument(MaybeArgument: Def);
1084
1085 Builder.SetInsertPoint(coro::getSpillInsertionPt(Shape, Def, DT));
1086 createStoreIntoFrame(Builder, Def, ByValTy, Shape, FrameData);
1087
1088 BasicBlock *CurrentBlock = nullptr;
1089 Value *CurrentReload = nullptr;
1090 for (auto *U : E.second) {
1091 // If we have not seen the use block, create a load instruction to reload
1092 // the spilled value from the coroutine frame. Populates the Value pointer
1093 // reference provided with the frame GEP.
1094 if (CurrentBlock != U->getParent()) {
1095 CurrentBlock = U->getParent();
1096 Builder.SetInsertPoint(TheBB: CurrentBlock,
1097 IP: CurrentBlock->getFirstInsertionPt());
1098
1099 auto *GEP = createGEPToFramePointer(FrameData, Builder, Shape, Orig: E.first);
1100 GEP->setName(E.first->getName() + Twine(".reload.addr"));
1101 if (ByValTy) {
1102 CurrentReload = GEP;
1103 } else {
1104 auto SpillAlignment = Align(FrameData.getAlign(V: Def));
1105 auto *LI =
1106 Builder.CreateAlignedLoad(Ty: E.first->getType(), Ptr: GEP, Align: SpillAlignment,
1107 Name: E.first->getName() + Twine(".reload"));
1108 if (TBAATag)
1109 LI->setMetadata(KindID: LLVMContext::MD_tbaa, Node: TBAATag);
1110 CurrentReload = LI;
1111 }
1112
1113 TinyPtrVector<DbgVariableRecord *> DVRs = findDbgRecordsThroughLoads<
1114 DbgVariableRecord::LocationType::Declare>(F&: *F, Def);
1115
1116 auto SalvageOne = [&](DbgVariableRecord *DDI) {
1117 // This dbg.declare is preserved for all coro-split function
1118 // fragments. It will be unreachable in the main function, and
1119 // processed by coro::salvageDebugInfo() by the Cloner.
1120 DbgVariableRecord *NewDVR = new DbgVariableRecord(
1121 ValueAsMetadata::get(V: CurrentReload), DDI->getVariable(),
1122 DDI->getExpression(), DDI->getDebugLoc(),
1123 DbgVariableRecord::LocationType::Declare);
1124 Builder.GetInsertPoint()->getParent()->insertDbgRecordBefore(
1125 DR: NewDVR, Here: Builder.GetInsertPoint());
1126 // This dbg.declare is for the main function entry point. It
1127 // will be deleted in all coro-split functions.
1128 coro::salvageDebugInfo(ArgToAllocaMap, DVR&: *DDI, UseEntryValue: false /*UseEntryValue*/);
1129 };
1130 for_each(Range&: DVRs, F: SalvageOne);
1131 }
1132
1133 TinyPtrVector<DbgVariableRecord *> DVRDeclareValues =
1134 findDbgRecordsThroughLoads<
1135 DbgVariableRecord::LocationType::DeclareValue>(F&: *F, Def);
1136
1137 auto SalvageOneCoro = [&](auto *DDI) {
1138 // This dbg.declare_value is preserved for all coro-split function
1139 // fragments. It will be unreachable in the main function, and
1140 // processed by coro::salvageDebugInfo() by the Cloner. However, convert
1141 // it to a dbg.declare to make sure future passes don't have to deal
1142 // with a dbg.declare_value.
1143 auto *VAM = ValueAsMetadata::get(V: CurrentReload);
1144 Type *Ty = VAM->getValue()->getType();
1145 // If the metadata type is not a pointer, emit a dbg.value instead.
1146 DbgVariableRecord *NewDVR = new DbgVariableRecord(
1147 ValueAsMetadata::get(V: CurrentReload), DDI->getVariable(),
1148 DDI->getExpression(), DDI->getDebugLoc(),
1149 Ty->isPointerTy() ? DbgVariableRecord::LocationType::Declare
1150 : DbgVariableRecord::LocationType::Value);
1151 Builder.GetInsertPoint()->getParent()->insertDbgRecordBefore(
1152 DR: NewDVR, Here: Builder.GetInsertPoint());
1153 // This dbg.declare_value is for the main function entry point. It
1154 // will be deleted in all coro-split functions.
1155 coro::salvageDebugInfo(ArgToAllocaMap, DVR&: *DDI, UseEntryValue: false /*UseEntryValue*/);
1156 };
1157 for_each(Range&: DVRDeclareValues, F: SalvageOneCoro);
1158
1159 // If we have a single edge PHINode, remove it and replace it with a
1160 // reload from the coroutine frame. (We already took care of multi edge
1161 // PHINodes by normalizing them in the rewritePHIs function).
1162 if (auto *PN = dyn_cast<PHINode>(Val: U)) {
1163 assert(PN->getNumIncomingValues() == 1 &&
1164 "unexpected number of incoming "
1165 "values in the PHINode");
1166 PN->replaceAllUsesWith(V: CurrentReload);
1167 PN->eraseFromParent();
1168 continue;
1169 }
1170
1171 // Replace all uses of CurrentValue in the current instruction with
1172 // reload.
1173 U->replaceUsesOfWith(From: Def, To: CurrentReload);
1174 // Instructions are added to Def's user list if the attached
1175 // debug records use Def. Update those now.
1176 for (DbgVariableRecord &DVR : filterDbgVars(R: U->getDbgRecordRange()))
1177 DVR.replaceVariableLocationOp(OldValue: Def, NewValue: CurrentReload, AllowEmpty: true);
1178 }
1179 }
1180
1181 BasicBlock *FramePtrBB = Shape.getInsertPtAfterFramePtr()->getParent();
1182
1183 auto SpillBlock = FramePtrBB->splitBasicBlock(
1184 I: Shape.getInsertPtAfterFramePtr(), BBName: "AllocaSpillBB");
1185 SpillBlock->splitBasicBlock(I: &SpillBlock->front(), BBName: "PostSpill");
1186 Shape.AllocaSpillBlock = SpillBlock;
1187
1188 // retcon and retcon.once lowering assumes all uses have been sunk.
1189 if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
1190 Shape.ABI == coro::ABI::Async) {
1191 // If we found any allocas, replace all of their remaining uses with Geps.
1192 Builder.SetInsertPoint(TheBB: SpillBlock, IP: SpillBlock->begin());
1193 for (const auto &P : FrameData.Allocas) {
1194 AllocaInst *Alloca = P.Alloca;
1195 auto *G = createGEPToFramePointer(FrameData, Builder, Shape, Orig: Alloca);
1196
1197 // Remove any lifetime intrinsics, now that these are no longer allocas.
1198 for (User *U : make_early_inc_range(Range: Alloca->users())) {
1199 auto *I = cast<Instruction>(Val: U);
1200 if (I->isLifetimeStartOrEnd())
1201 I->eraseFromParent();
1202 }
1203
1204 // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G))
1205 // here, as we are changing location of the instruction.
1206 G->takeName(V: Alloca);
1207 Alloca->replaceAllUsesWith(V: G);
1208 Alloca->eraseFromParent();
1209 }
1210 return;
1211 }
1212
1213 // If we found any alloca, replace all of their remaining uses with GEP
1214 // instructions. To remain debugbility, we replace the uses of allocas for
1215 // dbg.declares and dbg.values with the reload from the frame.
1216 // Note: We cannot replace the alloca with GEP instructions indiscriminately,
1217 // as some of the uses may not be dominated by CoroBegin.
1218 Builder.SetInsertPoint(TheBB: Shape.AllocaSpillBlock,
1219 IP: Shape.AllocaSpillBlock->begin());
1220 SmallVector<Instruction *, 4> UsersToUpdate;
1221 for (const auto &A : FrameData.Allocas) {
1222 AllocaInst *Alloca = A.Alloca;
1223 UsersToUpdate.clear();
1224 for (User *U : make_early_inc_range(Range: Alloca->users())) {
1225 auto *I = cast<Instruction>(Val: U);
1226 // It is meaningless to retain the lifetime intrinsics refer for the
1227 // member of coroutine frames and the meaningless lifetime intrinsics
1228 // are possible to block further optimizations.
1229 if (I->isLifetimeStartOrEnd())
1230 I->eraseFromParent();
1231 else if (DT.dominates(Def: Shape.CoroBegin, User: I))
1232 UsersToUpdate.push_back(Elt: I);
1233 }
1234
1235 if (UsersToUpdate.empty())
1236 continue;
1237 auto *G = createGEPToFramePointer(FrameData, Builder, Shape, Orig: Alloca);
1238 G->setName(Alloca->getName() + Twine(".reload.addr"));
1239
1240 SmallVector<DbgVariableRecord *> DbgVariableRecords;
1241 findDbgUsers(V: Alloca, DbgVariableRecords);
1242 for (auto *DVR : DbgVariableRecords)
1243 DVR->replaceVariableLocationOp(OldValue: Alloca, NewValue: G);
1244
1245 for (Instruction *I : UsersToUpdate)
1246 I->replaceUsesOfWith(From: Alloca, To: G);
1247
1248 if (Alloca->user_empty())
1249 Alloca->eraseFromParent();
1250 }
1251 Builder.SetInsertPoint(&*Shape.getInsertPtAfterFramePtr());
1252 for (const auto &A : FrameData.Allocas) {
1253 AllocaInst *Alloca = A.Alloca;
1254 if (A.MayWriteBeforeCoroBegin) {
1255 // isEscaped really means potentially modified before CoroBegin.
1256 handleAccessBeforeCoroBegin(FrameData, Shape, Builder, Alloca);
1257 }
1258 // For each alias to Alloca created before CoroBegin but used after
1259 // CoroBegin, we recreate them after CoroBegin by applying the offset
1260 // to the pointer in the frame.
1261 for (const auto &Alias : A.Aliases) {
1262 auto *FramePtr =
1263 createGEPToFramePointer(FrameData, Builder, Shape, Orig: Alloca);
1264 auto &Value = *Alias.second;
1265 auto ITy = IntegerType::get(C, NumBits: Value.getBitWidth());
1266 auto *AliasPtr =
1267 Builder.CreateInBoundsPtrAdd(Ptr: FramePtr, Offset: ConstantInt::get(Ty: ITy, V: Value));
1268 Alias.first->replaceUsesWithIf(
1269 New: AliasPtr, ShouldReplace: [&](Use &U) { return DT.dominates(Def: Shape.CoroBegin, U); });
1270 }
1271 }
1272}
1273
1274// Moves the values in the PHIs in SuccBB that correspong to PredBB into a new
1275// PHI in InsertedBB.
1276static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB,
1277 BasicBlock *InsertedBB,
1278 BasicBlock *PredBB,
1279 PHINode *UntilPHI = nullptr) {
1280 auto *PN = cast<PHINode>(Val: &SuccBB->front());
1281 do {
1282 int Index = PN->getBasicBlockIndex(BB: InsertedBB);
1283 Value *V = PN->getIncomingValue(i: Index);
1284 PHINode *InputV = PHINode::Create(
1285 Ty: V->getType(), NumReservedValues: 1, NameStr: V->getName() + Twine(".") + SuccBB->getName());
1286 InputV->insertBefore(InsertPos: InsertedBB->begin());
1287 InputV->addIncoming(V, BB: PredBB);
1288 PN->setIncomingValue(i: Index, V: InputV);
1289 PN = dyn_cast<PHINode>(Val: PN->getNextNode());
1290 } while (PN != UntilPHI);
1291}
1292
1293// Rewrites the PHI Nodes in a cleanuppad.
1294static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB,
1295 CleanupPadInst *CleanupPad) {
1296 // For every incoming edge to a CleanupPad we will create a new block holding
1297 // all incoming values in single-value PHI nodes. We will then create another
1298 // block to act as a dispather (as all unwind edges for related EH blocks
1299 // must be the same).
1300 //
1301 // cleanuppad:
1302 // %2 = phi i32[%0, %catchswitch], [%1, %catch.1]
1303 // %3 = cleanuppad within none []
1304 //
1305 // It will create:
1306 //
1307 // cleanuppad.corodispatch
1308 // %2 = phi i8[0, %catchswitch], [1, %catch.1]
1309 // %3 = cleanuppad within none []
1310 // switch i8 % 2, label %unreachable
1311 // [i8 0, label %cleanuppad.from.catchswitch
1312 // i8 1, label %cleanuppad.from.catch.1]
1313 // cleanuppad.from.catchswitch:
1314 // %4 = phi i32 [%0, %catchswitch]
1315 // br %label cleanuppad
1316 // cleanuppad.from.catch.1:
1317 // %6 = phi i32 [%1, %catch.1]
1318 // br %label cleanuppad
1319 // cleanuppad:
1320 // %8 = phi i32 [%4, %cleanuppad.from.catchswitch],
1321 // [%6, %cleanuppad.from.catch.1]
1322
1323 // Unreachable BB, in case switching on an invalid value in the dispatcher.
1324 auto *UnreachBB = BasicBlock::Create(
1325 Context&: CleanupPadBB->getContext(), Name: "unreachable", Parent: CleanupPadBB->getParent());
1326 IRBuilder<> Builder(UnreachBB);
1327 Builder.CreateUnreachable();
1328
1329 // Create a new cleanuppad which will be the dispatcher.
1330 auto *NewCleanupPadBB =
1331 BasicBlock::Create(Context&: CleanupPadBB->getContext(),
1332 Name: CleanupPadBB->getName() + Twine(".corodispatch"),
1333 Parent: CleanupPadBB->getParent(), InsertBefore: CleanupPadBB);
1334 Builder.SetInsertPoint(NewCleanupPadBB);
1335 auto *SwitchType = Builder.getInt8Ty();
1336 auto *SetDispatchValuePN =
1337 Builder.CreatePHI(Ty: SwitchType, NumReservedValues: pred_size(BB: CleanupPadBB));
1338 CleanupPad->removeFromParent();
1339 CleanupPad->insertAfter(InsertPos: SetDispatchValuePN->getIterator());
1340 auto *SwitchOnDispatch = Builder.CreateSwitch(V: SetDispatchValuePN, Dest: UnreachBB,
1341 NumCases: pred_size(BB: CleanupPadBB));
1342
1343 int SwitchIndex = 0;
1344 SmallVector<BasicBlock *, 8> Preds(predecessors(BB: CleanupPadBB));
1345 for (BasicBlock *Pred : Preds) {
1346 // Create a new cleanuppad and move the PHI values to there.
1347 auto *CaseBB = BasicBlock::Create(Context&: CleanupPadBB->getContext(),
1348 Name: CleanupPadBB->getName() +
1349 Twine(".from.") + Pred->getName(),
1350 Parent: CleanupPadBB->getParent(), InsertBefore: CleanupPadBB);
1351 updatePhiNodes(DestBB: CleanupPadBB, OldPred: Pred, NewPred: CaseBB);
1352 CaseBB->setName(CleanupPadBB->getName() + Twine(".from.") +
1353 Pred->getName());
1354 Builder.SetInsertPoint(CaseBB);
1355 Builder.CreateBr(Dest: CleanupPadBB);
1356 movePHIValuesToInsertedBlock(SuccBB: CleanupPadBB, InsertedBB: CaseBB, PredBB: NewCleanupPadBB);
1357
1358 // Update this Pred to the new unwind point.
1359 setUnwindEdgeTo(TI: Pred->getTerminator(), Succ: NewCleanupPadBB);
1360
1361 // Setup the switch in the dispatcher.
1362 auto *SwitchConstant = ConstantInt::get(Ty: SwitchType, V: SwitchIndex);
1363 SetDispatchValuePN->addIncoming(V: SwitchConstant, BB: Pred);
1364 SwitchOnDispatch->addCase(OnVal: SwitchConstant, Dest: CaseBB);
1365 SwitchIndex++;
1366 }
1367}
1368
1369static void cleanupSinglePredPHIs(Function &F) {
1370 SmallVector<PHINode *, 32> Worklist;
1371 for (auto &BB : F) {
1372 for (auto &Phi : BB.phis()) {
1373 if (Phi.getNumIncomingValues() == 1) {
1374 Worklist.push_back(Elt: &Phi);
1375 } else
1376 break;
1377 }
1378 }
1379 while (!Worklist.empty()) {
1380 auto *Phi = Worklist.pop_back_val();
1381 auto *OriginalValue = Phi->getIncomingValue(i: 0);
1382 Phi->replaceAllUsesWith(V: OriginalValue);
1383 }
1384}
1385
1386static void rewritePHIs(BasicBlock &BB) {
1387 // For every incoming edge we will create a block holding all
1388 // incoming values in a single PHI nodes.
1389 //
1390 // loop:
1391 // %n.val = phi i32[%n, %entry], [%inc, %loop]
1392 //
1393 // It will create:
1394 //
1395 // loop.from.entry:
1396 // %n.loop.pre = phi i32 [%n, %entry]
1397 // br %label loop
1398 // loop.from.loop:
1399 // %inc.loop.pre = phi i32 [%inc, %loop]
1400 // br %label loop
1401 //
1402 // After this rewrite, further analysis will ignore any phi nodes with more
1403 // than one incoming edge.
1404
1405 // TODO: Simplify PHINodes in the basic block to remove duplicate
1406 // predecessors.
1407
1408 // Special case for CleanupPad: all EH blocks must have the same unwind edge
1409 // so we need to create an additional "dispatcher" block.
1410 if (!BB.empty()) {
1411 if (auto *CleanupPad =
1412 dyn_cast_or_null<CleanupPadInst>(Val: BB.getFirstNonPHIIt())) {
1413 SmallVector<BasicBlock *, 8> Preds(predecessors(BB: &BB));
1414 for (BasicBlock *Pred : Preds) {
1415 if (CatchSwitchInst *CS =
1416 dyn_cast<CatchSwitchInst>(Val: Pred->getTerminator())) {
1417 // CleanupPad with a CatchSwitch predecessor: therefore this is an
1418 // unwind destination that needs to be handle specially.
1419 assert(CS->getUnwindDest() == &BB);
1420 (void)CS;
1421 rewritePHIsForCleanupPad(CleanupPadBB: &BB, CleanupPad);
1422 return;
1423 }
1424 }
1425 }
1426 }
1427
1428 LandingPadInst *LandingPad = nullptr;
1429 PHINode *ReplPHI = nullptr;
1430 if (!BB.empty()) {
1431 if ((LandingPad =
1432 dyn_cast_or_null<LandingPadInst>(Val: BB.getFirstNonPHIIt()))) {
1433 // ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
1434 // We replace the original landing pad with a PHINode that will collect the
1435 // results from all of them.
1436 ReplPHI = PHINode::Create(Ty: LandingPad->getType(), NumReservedValues: 1, NameStr: "");
1437 ReplPHI->insertBefore(InsertPos: LandingPad->getIterator());
1438 ReplPHI->takeName(V: LandingPad);
1439 LandingPad->replaceAllUsesWith(V: ReplPHI);
1440 // We will erase the original landing pad at the end of this function after
1441 // ehAwareSplitEdge cloned it in the transition blocks.
1442 }
1443 }
1444
1445 SmallVector<BasicBlock *, 8> Preds(predecessors(BB: &BB));
1446 for (BasicBlock *Pred : Preds) {
1447 auto *IncomingBB = ehAwareSplitEdge(BB: Pred, Succ: &BB, OriginalPad: LandingPad, LandingPadReplacement: ReplPHI);
1448 IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName());
1449
1450 // Stop the moving of values at ReplPHI, as this is either null or the PHI
1451 // that replaced the landing pad.
1452 movePHIValuesToInsertedBlock(SuccBB: &BB, InsertedBB: IncomingBB, PredBB: Pred, UntilPHI: ReplPHI);
1453 }
1454
1455 if (LandingPad) {
1456 // Calls to ehAwareSplitEdge function cloned the original lading pad.
1457 // No longer need it.
1458 LandingPad->eraseFromParent();
1459 }
1460}
1461
1462static void rewritePHIs(Function &F) {
1463 SmallVector<BasicBlock *, 8> WorkList;
1464
1465 for (BasicBlock &BB : F)
1466 if (auto *PN = dyn_cast<PHINode>(Val: &BB.front()))
1467 if (PN->getNumIncomingValues() > 1)
1468 WorkList.push_back(Elt: &BB);
1469
1470 for (BasicBlock *BB : WorkList)
1471 rewritePHIs(BB&: *BB);
1472}
1473
1474// Splits the block at a particular instruction unless it is the first
1475// instruction in the block with a single predecessor.
1476static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) {
1477 auto *BB = I->getParent();
1478 if (&BB->front() == I) {
1479 if (BB->getSinglePredecessor()) {
1480 BB->setName(Name);
1481 return BB;
1482 }
1483 }
1484 return BB->splitBasicBlock(I, BBName: Name);
1485}
1486
1487// Split above and below a particular instruction so that it
1488// will be all alone by itself in a block.
1489static void splitAround(Instruction *I, const Twine &Name) {
1490 splitBlockIfNotFirst(I, Name);
1491 splitBlockIfNotFirst(I: I->getNextNode(), Name: "After" + Name);
1492}
1493
1494/// After we split the coroutine, will the given basic block be along
1495/// an obvious exit path for the resumption function?
1496static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB,
1497 unsigned depth = 3) {
1498 // If we've bottomed out our depth count, stop searching and assume
1499 // that the path might loop back.
1500 if (depth == 0) return false;
1501
1502 // If this is a suspend block, we're about to exit the resumption function.
1503 if (coro::isSuspendBlock(BB))
1504 return true;
1505
1506 // Recurse into the successors.
1507 for (auto *Succ : successors(BB)) {
1508 if (!willLeaveFunctionImmediatelyAfter(BB: Succ, depth: depth - 1))
1509 return false;
1510 }
1511
1512 // If none of the successors leads back in a loop, we're on an exit/abort.
1513 return true;
1514}
1515
1516static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) {
1517 // Look for a free that isn't sufficiently obviously followed by
1518 // either a suspend or a termination, i.e. something that will leave
1519 // the coro resumption frame.
1520 for (auto *U : AI->users()) {
1521 auto FI = dyn_cast<CoroAllocaFreeInst>(Val: U);
1522 if (!FI) continue;
1523
1524 if (!willLeaveFunctionImmediatelyAfter(BB: FI->getParent()))
1525 return true;
1526 }
1527
1528 // If we never found one, we don't need a stack save.
1529 return false;
1530}
1531
1532/// Turn each of the given local allocas into a normal (dynamic) alloca
1533/// instruction.
1534static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas,
1535 SmallVectorImpl<Instruction*> &DeadInsts) {
1536 for (auto *AI : LocalAllocas) {
1537 IRBuilder<> Builder(AI);
1538
1539 // Save the stack depth. Try to avoid doing this if the stackrestore
1540 // is going to immediately precede a return or something.
1541 Value *StackSave = nullptr;
1542 if (localAllocaNeedsStackSave(AI))
1543 StackSave = Builder.CreateStackSave();
1544
1545 // Allocate memory.
1546 auto Alloca = Builder.CreateAlloca(Ty: Builder.getInt8Ty(), ArraySize: AI->getSize());
1547 Alloca->setAlignment(AI->getAlignment());
1548
1549 for (auto *U : AI->users()) {
1550 // Replace gets with the allocation.
1551 if (isa<CoroAllocaGetInst>(Val: U)) {
1552 U->replaceAllUsesWith(V: Alloca);
1553
1554 // Replace frees with stackrestores. This is safe because
1555 // alloca.alloc is required to obey a stack discipline, although we
1556 // don't enforce that structurally.
1557 } else {
1558 auto FI = cast<CoroAllocaFreeInst>(Val: U);
1559 if (StackSave) {
1560 Builder.SetInsertPoint(FI);
1561 Builder.CreateStackRestore(Ptr: StackSave);
1562 }
1563 }
1564 DeadInsts.push_back(Elt: cast<Instruction>(Val: U));
1565 }
1566
1567 DeadInsts.push_back(Elt: AI);
1568 }
1569}
1570
1571/// Get the current swifterror value.
1572static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy,
1573 coro::Shape &Shape) {
1574 // Make a fake function pointer as a sort of intrinsic.
1575 auto FnTy = FunctionType::get(Result: ValueTy, Params: {}, isVarArg: false);
1576 auto Fn = ConstantPointerNull::get(T: Builder.getPtrTy());
1577
1578 auto Call = Builder.CreateCall(FTy: FnTy, Callee: Fn, Args: {});
1579 Shape.SwiftErrorOps.push_back(Elt: Call);
1580
1581 return Call;
1582}
1583
1584/// Set the given value as the current swifterror value.
1585///
1586/// Returns a slot that can be used as a swifterror slot.
1587static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V,
1588 coro::Shape &Shape) {
1589 // Make a fake function pointer as a sort of intrinsic.
1590 auto FnTy = FunctionType::get(Result: Builder.getPtrTy(),
1591 Params: {V->getType()}, isVarArg: false);
1592 auto Fn = ConstantPointerNull::get(T: Builder.getPtrTy());
1593
1594 auto Call = Builder.CreateCall(FTy: FnTy, Callee: Fn, Args: { V });
1595 Shape.SwiftErrorOps.push_back(Elt: Call);
1596
1597 return Call;
1598}
1599
1600/// Set the swifterror value from the given alloca before a call,
1601/// then put in back in the alloca afterwards.
1602///
1603/// Returns an address that will stand in for the swifterror slot
1604/// until splitting.
1605static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call,
1606 AllocaInst *Alloca,
1607 coro::Shape &Shape) {
1608 auto ValueTy = Alloca->getAllocatedType();
1609 IRBuilder<> Builder(Call);
1610
1611 // Load the current value from the alloca and set it as the
1612 // swifterror value.
1613 auto ValueBeforeCall = Builder.CreateLoad(Ty: ValueTy, Ptr: Alloca);
1614 auto Addr = emitSetSwiftErrorValue(Builder, V: ValueBeforeCall, Shape);
1615
1616 // Move to after the call. Since swifterror only has a guaranteed
1617 // value on normal exits, we can ignore implicit and explicit unwind
1618 // edges.
1619 if (isa<CallInst>(Val: Call)) {
1620 Builder.SetInsertPoint(Call->getNextNode());
1621 } else {
1622 auto Invoke = cast<InvokeInst>(Val: Call);
1623 Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg());
1624 }
1625
1626 // Get the current swifterror value and store it to the alloca.
1627 auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape);
1628 Builder.CreateStore(Val: ValueAfterCall, Ptr: Alloca);
1629
1630 return Addr;
1631}
1632
1633/// Eliminate a formerly-swifterror alloca by inserting the get/set
1634/// intrinsics and attempting to MemToReg the alloca away.
1635static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca,
1636 coro::Shape &Shape) {
1637 for (Use &Use : llvm::make_early_inc_range(Range: Alloca->uses())) {
1638 // swifterror values can only be used in very specific ways.
1639 // We take advantage of that here.
1640 auto User = Use.getUser();
1641 if (isa<LoadInst>(Val: User) || isa<StoreInst>(Val: User))
1642 continue;
1643
1644 assert(isa<CallInst>(User) || isa<InvokeInst>(User));
1645 auto Call = cast<Instruction>(Val: User);
1646
1647 auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape);
1648
1649 // Use the returned slot address as the call argument.
1650 Use.set(Addr);
1651 }
1652
1653 // All the uses should be loads and stores now.
1654 assert(isAllocaPromotable(Alloca));
1655}
1656
1657/// "Eliminate" a swifterror argument by reducing it to the alloca case
1658/// and then loading and storing in the prologue and epilog.
1659///
1660/// The argument keeps the swifterror flag.
1661static void eliminateSwiftErrorArgument(Function &F, Argument &Arg,
1662 coro::Shape &Shape,
1663 SmallVectorImpl<AllocaInst*> &AllocasToPromote) {
1664 IRBuilder<> Builder(&F.getEntryBlock(),
1665 F.getEntryBlock().getFirstNonPHIOrDbg());
1666
1667 auto ArgTy = cast<PointerType>(Val: Arg.getType());
1668 auto ValueTy = PointerType::getUnqual(C&: F.getContext());
1669
1670 // Reduce to the alloca case:
1671
1672 // Create an alloca and replace all uses of the arg with it.
1673 auto Alloca = Builder.CreateAlloca(Ty: ValueTy, AddrSpace: ArgTy->getAddressSpace());
1674 Arg.replaceAllUsesWith(V: Alloca);
1675
1676 // Set an initial value in the alloca. swifterror is always null on entry.
1677 auto InitialValue = Constant::getNullValue(Ty: ValueTy);
1678 Builder.CreateStore(Val: InitialValue, Ptr: Alloca);
1679
1680 // Find all the suspends in the function and save and restore around them.
1681 for (auto *Suspend : Shape.CoroSuspends) {
1682 (void) emitSetAndGetSwiftErrorValueAround(Call: Suspend, Alloca, Shape);
1683 }
1684
1685 // Find all the coro.ends in the function and restore the error value.
1686 for (auto *End : Shape.CoroEnds) {
1687 Builder.SetInsertPoint(End);
1688 auto FinalValue = Builder.CreateLoad(Ty: ValueTy, Ptr: Alloca);
1689 (void) emitSetSwiftErrorValue(Builder, V: FinalValue, Shape);
1690 }
1691
1692 // Now we can use the alloca logic.
1693 AllocasToPromote.push_back(Elt: Alloca);
1694 eliminateSwiftErrorAlloca(F, Alloca, Shape);
1695}
1696
1697/// Eliminate all problematic uses of swifterror arguments and allocas
1698/// from the function. We'll fix them up later when splitting the function.
1699static void eliminateSwiftError(Function &F, coro::Shape &Shape) {
1700 SmallVector<AllocaInst*, 4> AllocasToPromote;
1701
1702 // Look for a swifterror argument.
1703 for (auto &Arg : F.args()) {
1704 if (!Arg.hasSwiftErrorAttr()) continue;
1705
1706 eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote);
1707 break;
1708 }
1709
1710 // Look for swifterror allocas.
1711 for (auto &Inst : F.getEntryBlock()) {
1712 auto Alloca = dyn_cast<AllocaInst>(Val: &Inst);
1713 if (!Alloca || !Alloca->isSwiftError()) continue;
1714
1715 // Clear the swifterror flag.
1716 Alloca->setSwiftError(false);
1717
1718 AllocasToPromote.push_back(Elt: Alloca);
1719 eliminateSwiftErrorAlloca(F, Alloca, Shape);
1720 }
1721
1722 // If we have any allocas to promote, compute a dominator tree and
1723 // promote them en masse.
1724 if (!AllocasToPromote.empty()) {
1725 DominatorTree DT(F);
1726 PromoteMemToReg(Allocas: AllocasToPromote, DT);
1727 }
1728}
1729
1730/// For each local variable that all of its user are only used inside one of
1731/// suspended region, we sink their lifetime.start markers to the place where
1732/// after the suspend block. Doing so minimizes the lifetime of each variable,
1733/// hence minimizing the amount of data we end up putting on the frame.
1734static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
1735 SuspendCrossingInfo &Checker,
1736 const DominatorTree &DT) {
1737 if (F.hasOptNone())
1738 return;
1739
1740 // Collect all possible basic blocks which may dominate all uses of allocas.
1741 SmallPtrSet<BasicBlock *, 4> DomSet;
1742 DomSet.insert(Ptr: &F.getEntryBlock());
1743 for (auto *CSI : Shape.CoroSuspends) {
1744 BasicBlock *SuspendBlock = CSI->getParent();
1745 assert(coro::isSuspendBlock(SuspendBlock) &&
1746 SuspendBlock->getSingleSuccessor() &&
1747 "should have split coro.suspend into its own block");
1748 DomSet.insert(Ptr: SuspendBlock->getSingleSuccessor());
1749 }
1750
1751 for (Instruction &I : instructions(F)) {
1752 AllocaInst* AI = dyn_cast<AllocaInst>(Val: &I);
1753 if (!AI)
1754 continue;
1755
1756 for (BasicBlock *DomBB : DomSet) {
1757 bool Valid = true;
1758 SmallVector<Instruction *, 1> Lifetimes;
1759
1760 auto isLifetimeStart = [](Instruction* I) {
1761 if (auto* II = dyn_cast<IntrinsicInst>(Val: I))
1762 return II->getIntrinsicID() == Intrinsic::lifetime_start;
1763 return false;
1764 };
1765
1766 auto collectLifetimeStart = [&](Instruction *U, AllocaInst *AI) {
1767 if (isLifetimeStart(U)) {
1768 Lifetimes.push_back(Elt: U);
1769 return true;
1770 }
1771 if (!U->hasOneUse() || U->stripPointerCasts() != AI)
1772 return false;
1773 if (isLifetimeStart(U->user_back())) {
1774 Lifetimes.push_back(Elt: U->user_back());
1775 return true;
1776 }
1777 return false;
1778 };
1779
1780 for (User *U : AI->users()) {
1781 Instruction *UI = cast<Instruction>(Val: U);
1782 // For all users except lifetime.start markers, if they are all
1783 // dominated by one of the basic blocks and do not cross
1784 // suspend points as well, then there is no need to spill the
1785 // instruction.
1786 if (!DT.dominates(A: DomBB, B: UI->getParent()) ||
1787 Checker.isDefinitionAcrossSuspend(DefBB: DomBB, U: UI)) {
1788 // Skip lifetime.start, GEP and bitcast used by lifetime.start
1789 // markers.
1790 if (collectLifetimeStart(UI, AI))
1791 continue;
1792 Valid = false;
1793 break;
1794 }
1795 }
1796 // Sink lifetime.start markers to dominate block when they are
1797 // only used outside the region.
1798 if (Valid && Lifetimes.size() != 0) {
1799 auto *NewLifetime = Lifetimes[0]->clone();
1800 NewLifetime->replaceUsesOfWith(From: NewLifetime->getOperand(i: 0), To: AI);
1801 NewLifetime->insertBefore(InsertPos: DomBB->getTerminator()->getIterator());
1802
1803 // All the outsided lifetime.start markers are no longer necessary.
1804 for (Instruction *S : Lifetimes)
1805 S->eraseFromParent();
1806
1807 break;
1808 }
1809 }
1810 }
1811}
1812
1813static std::optional<std::pair<Value &, DIExpression &>>
1814salvageDebugInfoImpl(SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
1815 bool UseEntryValue, Function *F, Value *Storage,
1816 DIExpression *Expr, bool SkipOutermostLoad) {
1817 IRBuilder<> Builder(F->getContext());
1818 auto InsertPt = F->getEntryBlock().getFirstInsertionPt();
1819 while (isa<IntrinsicInst>(Val: InsertPt))
1820 ++InsertPt;
1821 Builder.SetInsertPoint(TheBB: &F->getEntryBlock(), IP: InsertPt);
1822
1823 while (auto *Inst = dyn_cast_or_null<Instruction>(Val: Storage)) {
1824 if (auto *LdInst = dyn_cast<LoadInst>(Val: Inst)) {
1825 Storage = LdInst->getPointerOperand();
1826 // FIXME: This is a heuristic that works around the fact that
1827 // LLVM IR debug intrinsics cannot yet distinguish between
1828 // memory and value locations: Because a dbg.declare(alloca) is
1829 // implicitly a memory location no DW_OP_deref operation for the
1830 // last direct load from an alloca is necessary. This condition
1831 // effectively drops the *last* DW_OP_deref in the expression.
1832 if (!SkipOutermostLoad)
1833 Expr = DIExpression::prepend(Expr, Flags: DIExpression::DerefBefore);
1834 } else if (auto *StInst = dyn_cast<StoreInst>(Val: Inst)) {
1835 Storage = StInst->getValueOperand();
1836 } else {
1837 SmallVector<uint64_t, 16> Ops;
1838 SmallVector<Value *, 0> AdditionalValues;
1839 Value *Op = llvm::salvageDebugInfoImpl(
1840 I&: *Inst, CurrentLocOps: Expr ? Expr->getNumLocationOperands() : 0, Ops,
1841 AdditionalValues);
1842 if (!Op || !AdditionalValues.empty()) {
1843 // If salvaging failed or salvaging produced more than one location
1844 // operand, give up.
1845 break;
1846 }
1847 Storage = Op;
1848 Expr = DIExpression::appendOpsToArg(Expr, Ops, ArgNo: 0, /*StackValue*/ false);
1849 }
1850 SkipOutermostLoad = false;
1851 }
1852 if (!Storage)
1853 return std::nullopt;
1854
1855 auto *StorageAsArg = dyn_cast<Argument>(Val: Storage);
1856
1857 const bool IsSingleLocationExpression = Expr->isSingleLocationExpression();
1858 // Use an EntryValue when requested (UseEntryValue) for swift async Arguments.
1859 // Entry values in variadic expressions are not supported.
1860 const bool WillUseEntryValue =
1861 UseEntryValue && StorageAsArg &&
1862 StorageAsArg->hasAttribute(Kind: Attribute::SwiftAsync) &&
1863 !Expr->isEntryValue() && IsSingleLocationExpression;
1864
1865 if (WillUseEntryValue)
1866 Expr = DIExpression::prepend(Expr, Flags: DIExpression::EntryValue);
1867
1868 // If the coroutine frame is an Argument, store it in an alloca to improve
1869 // its availability (e.g. registers may be clobbered).
1870 // Avoid this if the value is guaranteed to be available through other means
1871 // (e.g. swift ABI guarantees).
1872 // Avoid this if multiple location expressions are involved, as LLVM does not
1873 // know how to prepend a deref in this scenario.
1874 if (StorageAsArg && !WillUseEntryValue && IsSingleLocationExpression) {
1875 auto &Cached = ArgToAllocaMap[StorageAsArg];
1876 if (!Cached) {
1877 Cached = Builder.CreateAlloca(Ty: Storage->getType(), AddrSpace: 0, ArraySize: nullptr,
1878 Name: Storage->getName() + ".debug");
1879 Builder.CreateStore(Val: Storage, Ptr: Cached);
1880 }
1881 Storage = Cached;
1882 // FIXME: LLVM lacks nuanced semantics to differentiate between
1883 // memory and direct locations at the IR level. The backend will
1884 // turn a dbg.declare(alloca, ..., DIExpression()) into a memory
1885 // location. Thus, if there are deref and offset operations in the
1886 // expression, we need to add a DW_OP_deref at the *start* of the
1887 // expression to first load the contents of the alloca before
1888 // adjusting it with the expression.
1889 Expr = DIExpression::prepend(Expr, Flags: DIExpression::DerefBefore);
1890 }
1891
1892 Expr = Expr->foldConstantMath();
1893 return {{*Storage, *Expr}};
1894}
1895
1896void coro::salvageDebugInfo(
1897 SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
1898 DbgVariableRecord &DVR, bool UseEntryValue) {
1899
1900 Function *F = DVR.getFunction();
1901 // Follow the pointer arithmetic all the way to the incoming
1902 // function argument and convert into a DIExpression.
1903 bool SkipOutermostLoad = DVR.isDbgDeclare() || DVR.isDbgDeclareValue();
1904 Value *OriginalStorage = DVR.getVariableLocationOp(OpIdx: 0);
1905
1906 auto SalvagedInfo =
1907 ::salvageDebugInfoImpl(ArgToAllocaMap, UseEntryValue, F, Storage: OriginalStorage,
1908 Expr: DVR.getExpression(), SkipOutermostLoad);
1909 if (!SalvagedInfo)
1910 return;
1911
1912 Value *Storage = &SalvagedInfo->first;
1913 DIExpression *Expr = &SalvagedInfo->second;
1914
1915 DVR.replaceVariableLocationOp(OldValue: OriginalStorage, NewValue: Storage);
1916 DVR.setExpression(Expr);
1917 // We only hoist dbg.declare and dbg.declare_value today since it doesn't make
1918 // sense to hoist dbg.value since it does not have the same function wide
1919 // guarantees that dbg.declare does.
1920 if (DVR.getType() == DbgVariableRecord::LocationType::Declare ||
1921 DVR.getType() == DbgVariableRecord::LocationType::DeclareValue) {
1922 std::optional<BasicBlock::iterator> InsertPt;
1923 if (auto *I = dyn_cast<Instruction>(Val: Storage)) {
1924 InsertPt = I->getInsertionPointAfterDef();
1925 // Update DILocation only if variable was not inlined.
1926 DebugLoc ILoc = I->getDebugLoc();
1927 DebugLoc DVRLoc = DVR.getDebugLoc();
1928 if (ILoc && DVRLoc &&
1929 DVRLoc->getScope()->getSubprogram() ==
1930 ILoc->getScope()->getSubprogram())
1931 DVR.setDebugLoc(ILoc);
1932 } else if (isa<Argument>(Val: Storage))
1933 InsertPt = F->getEntryBlock().begin();
1934 if (InsertPt) {
1935 DVR.removeFromParent();
1936 // If there is a dbg.declare_value being reinserted, insert it as a
1937 // dbg.declare instead, so that subsequent passes don't have to deal with
1938 // a dbg.declare_value.
1939 if (DVR.getType() == DbgVariableRecord::LocationType::DeclareValue) {
1940 auto *MD = DVR.getRawLocation();
1941 if (auto *VAM = dyn_cast<ValueAsMetadata>(Val: MD)) {
1942 Type *Ty = VAM->getValue()->getType();
1943 if (Ty->isPointerTy())
1944 DVR.Type = DbgVariableRecord::LocationType::Declare;
1945 else
1946 DVR.Type = DbgVariableRecord::LocationType::Value;
1947 }
1948 }
1949 (*InsertPt)->getParent()->insertDbgRecordBefore(DR: &DVR, Here: *InsertPt);
1950 }
1951 }
1952}
1953
1954void coro::normalizeCoroutine(Function &F, coro::Shape &Shape,
1955 TargetTransformInfo &TTI) {
1956 // Don't eliminate swifterror in async functions that won't be split.
1957 if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())
1958 eliminateSwiftError(F, Shape);
1959
1960 if (Shape.ABI == coro::ABI::Switch &&
1961 Shape.SwitchLowering.PromiseAlloca) {
1962 Shape.getSwitchCoroId()->clearPromise();
1963 }
1964
1965 // Make sure that all coro.save, coro.suspend and the fallthrough coro.end
1966 // intrinsics are in their own blocks to simplify the logic of building up
1967 // SuspendCrossing data.
1968 for (auto *CSI : Shape.CoroSuspends) {
1969 if (auto *Save = CSI->getCoroSave())
1970 splitAround(I: Save, Name: "CoroSave");
1971 splitAround(I: CSI, Name: "CoroSuspend");
1972 }
1973
1974 // Put CoroEnds into their own blocks.
1975 for (AnyCoroEndInst *CE : Shape.CoroEnds) {
1976 splitAround(I: CE, Name: "CoroEnd");
1977
1978 // Emit the musttail call function in a new block before the CoroEnd.
1979 // We do this here so that the right suspend crossing info is computed for
1980 // the uses of the musttail call function call. (Arguments to the coro.end
1981 // instructions would be ignored)
1982 if (auto *AsyncEnd = dyn_cast<CoroAsyncEndInst>(Val: CE)) {
1983 auto *MustTailCallFn = AsyncEnd->getMustTailCallFunction();
1984 if (!MustTailCallFn)
1985 continue;
1986 IRBuilder<> Builder(AsyncEnd);
1987 SmallVector<Value *, 8> Args(AsyncEnd->args());
1988 auto Arguments = ArrayRef<Value *>(Args).drop_front(N: 3);
1989 auto *Call = coro::createMustTailCall(
1990 Loc: AsyncEnd->getDebugLoc(), MustTailCallFn, TTI, Arguments, Builder);
1991 splitAround(I: Call, Name: "MustTailCall.Before.CoroEnd");
1992 }
1993 }
1994
1995 // Later code makes structural assumptions about single predecessors phis e.g
1996 // that they are not live across a suspend point.
1997 cleanupSinglePredPHIs(F);
1998
1999 // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
2000 // never have its definition separated from the PHI by the suspend point.
2001 rewritePHIs(F);
2002}
2003
2004void coro::BaseABI::buildCoroutineFrame(bool OptimizeFrame) {
2005 SuspendCrossingInfo Checker(F, Shape.CoroSuspends, Shape.CoroEnds);
2006 doRematerializations(F, Checker, IsMaterializable);
2007
2008 const DominatorTree DT(F);
2009 if (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
2010 Shape.ABI != coro::ABI::RetconOnce)
2011 sinkLifetimeStartMarkers(F, Shape, Checker, DT);
2012
2013 // All values (that are not allocas) that needs to be spilled to the frame.
2014 coro::SpillInfo Spills;
2015 // All values defined as allocas that need to live in the frame.
2016 SmallVector<coro::AllocaInfo, 8> Allocas;
2017
2018 // Collect the spills for arguments and other not-materializable values.
2019 coro::collectSpillsFromArgs(Spills, F, Checker);
2020 SmallVector<Instruction *, 4> DeadInstructions;
2021 SmallVector<CoroAllocaAllocInst *, 4> LocalAllocas;
2022 coro::collectSpillsAndAllocasFromInsts(Spills, Allocas, DeadInstructions,
2023 LocalAllocas, F, Checker, DT, Shape);
2024 coro::collectSpillsFromDbgInfo(Spills, F, Checker);
2025
2026 LLVM_DEBUG(dumpAllocas(Allocas));
2027 LLVM_DEBUG(dumpSpills("Spills", Spills));
2028
2029 if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
2030 Shape.ABI == coro::ABI::Async)
2031 sinkSpillUsesAfterCoroBegin(DT, CoroBegin: Shape.CoroBegin, Spills, Allocas);
2032
2033 // Build frame layout
2034 FrameDataInfo FrameData(Spills, Allocas);
2035 buildFrameLayout(F, DT, Shape, FrameData, OptimizeFrame);
2036 Shape.FramePtr = Shape.CoroBegin;
2037 // For now, this works for C++ programs only.
2038 buildFrameDebugInfo(F, Shape, FrameData);
2039 // Insert spills and reloads
2040 insertSpills(FrameData, Shape);
2041 lowerLocalAllocas(LocalAllocas, DeadInsts&: DeadInstructions);
2042
2043 for (auto *I : DeadInstructions)
2044 I->eraseFromParent();
2045}
2046