1//===-- InlineAsmPrepare - Prepare inline asm for code generation ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass lowers inline asm calls in LLVM IR in order to to assist
10// SelectionDAG's codegen.
11//
12// CallBrInst:
13//
14// Assists in inserting register copies for the output values of a callbr
15// along the edges leading to the indirect target blocks. Though the output
16// SSA value is defined by the callbr instruction itself in the IR
17// representation, the value cannot be copied to the appropriate virtual
18// registers prior to jumping to an indirect label, since the jump occurs
19// within the user-provided assembly blob.
20//
21// Instead, those copies must occur separately at the beginning of each
22// indirect target. That requires that we create a separate SSA definition in
23// each of them (via llvm.callbr.landingpad), and may require splitting
24// critical edges so we have a location to place the intrinsic. Finally, we
25// remap users of the original callbr output SSA value to instead point to
26// the appropriate llvm.callbr.landingpad value.
27//
28// Ideally, this could be done inside SelectionDAG, or in the
29// MachineInstruction representation, without the use of an IR-level
30// intrinsic. But, within the current framework, it’s simpler to implement
31// as an IR pass. (If support for callbr in GlobalISel is implemented, it’s
32// worth considering whether this is still required.)
33//
34//===----------------------------------------------------------------------===//
35
36#include "llvm/CodeGen/InlineAsmPrepare.h"
37#include "llvm/ADT/ArrayRef.h"
38#include "llvm/ADT/SmallPtrSet.h"
39#include "llvm/ADT/SmallVector.h"
40#include "llvm/ADT/iterator.h"
41#include "llvm/Analysis/CFG.h"
42#include "llvm/CodeGen/Passes.h"
43#include "llvm/IR/BasicBlock.h"
44#include "llvm/IR/Dominators.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/IRBuilder.h"
47#include "llvm/IR/Instructions.h"
48#include "llvm/IR/IntrinsicInst.h"
49#include "llvm/IR/Intrinsics.h"
50#include "llvm/InitializePasses.h"
51#include "llvm/Pass.h"
52#include "llvm/Transforms/Utils/BasicBlockUtils.h"
53#include "llvm/Transforms/Utils/SSAUpdater.h"
54
55using namespace llvm;
56
57#define DEBUG_TYPE "inline-asm-prepare"
58
59namespace {
60
61class InlineAsmPrepare : public FunctionPass {
62public:
63 InlineAsmPrepare() : FunctionPass(ID) {}
64
65 void getAnalysisUsage(AnalysisUsage &AU) const override {
66 AU.addPreserved<DominatorTreeWrapperPass>();
67 }
68 bool runOnFunction(Function &F) override;
69
70 static char ID;
71};
72
73char InlineAsmPrepare::ID = 0;
74
75} // end anonymous namespace
76
77INITIALIZE_PASS_BEGIN(InlineAsmPrepare, "inline-asm-prepare",
78 "Prepare inline asm insts", false, false)
79INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
80INITIALIZE_PASS_END(InlineAsmPrepare, "inline-asm-prepare",
81 "Prepare inline asm insts", false, false)
82
83FunctionPass *llvm::createInlineAsmPreparePass() {
84 return new InlineAsmPrepare();
85}
86
87#ifndef NDEBUG
88static void printDebugDomInfo(const DominatorTree &DT, const Use &U,
89 const BasicBlock *BB, bool IsDefaultDest) {
90 if (isa<Instruction>(U.getUser()))
91 LLVM_DEBUG(dbgs() << "Use: " << *U.getUser() << ", in block "
92 << cast<Instruction>(U.getUser())->getParent()->getName()
93 << ", is " << (DT.dominates(BB, U) ? "" : "NOT ")
94 << "dominated by " << BB->getName() << " ("
95 << (IsDefaultDest ? "in" : "") << "direct)\n");
96}
97#endif
98
99/// The Use is in the same BasicBlock as the intrinsic call.
100static bool isInSameBasicBlock(const Use &U, const BasicBlock *BB) {
101 const auto *I = dyn_cast<Instruction>(Val: U.getUser());
102 return I && I->getParent() == BB;
103}
104
105static void updateSSA(DominatorTree &DT, CallBrInst *CBR, CallInst *Intrinsic,
106 SSAUpdater &SSAUpdate) {
107 SmallPtrSet<Use *, 4> Visited;
108 BasicBlock *DefaultDest = CBR->getDefaultDest();
109 BasicBlock *LandingPad = Intrinsic->getParent();
110
111 SmallVector<Use *, 4> Uses(make_pointer_range(Range: CBR->uses()));
112 for (Use *U : Uses) {
113 if (!Visited.insert(Ptr: U).second)
114 continue;
115
116#ifndef NDEBUG
117 printDebugDomInfo(DT, *U, LandingPad, /*IsDefaultDest*/ false);
118 printDebugDomInfo(DT, *U, DefaultDest, /*IsDefaultDest*/ true);
119#endif
120
121 // Don't rewrite the use in the newly inserted intrinsic.
122 if (const auto *II = dyn_cast<IntrinsicInst>(Val: U->getUser()))
123 if (II->getIntrinsicID() == Intrinsic::callbr_landingpad)
124 continue;
125
126 // If the Use is in the same BasicBlock as the Intrinsic call, replace
127 // the Use with the value of the Intrinsic call.
128 if (isInSameBasicBlock(U: *U, BB: LandingPad)) {
129 U->set(Intrinsic);
130 continue;
131 }
132
133 // If the Use is dominated by the default dest, do not touch it.
134 if (DT.dominates(BB: DefaultDest, U: *U))
135 continue;
136
137 SSAUpdate.RewriteUse(U&: *U);
138 }
139}
140
141static bool splitCriticalEdges(CallBrInst *CBR, DominatorTree *DT) {
142 bool Changed = false;
143
144 CriticalEdgeSplittingOptions Options(DT);
145 Options.setMergeIdenticalEdges();
146
147 // The indirect destination might be duplicated between another parameter...
148 //
149 // %0 = callbr ... [label %x, label %x]
150 //
151 // ...hence MergeIdenticalEdges and AllowIndentical edges, but we don't need
152 // to split the default destination if it's duplicated between an indirect
153 // destination...
154 //
155 // %1 = callbr ... to label %x [label %x]
156 //
157 // ...hence starting at 1 and checking against successor 0 (aka the default
158 // destination).
159 for (unsigned I = 1, E = CBR->getNumSuccessors(); I != E; ++I)
160 if (CBR->getSuccessor(i: I) == CBR->getSuccessor(i: 0) ||
161 isCriticalEdge(TI: CBR, SuccNum: I, /*AllowIdenticalEdges*/ true))
162 if (SplitKnownCriticalEdge(TI: CBR, SuccNum: I, Options))
163 Changed = true;
164
165 return Changed;
166}
167
168/// Create a separate SSA definition in each indirect target (via
169/// llvm.callbr.landingpad). This may require splitting critical edges so we
170/// have a location to place the intrinsic. Then remap users of the original
171/// callbr output SSA value to instead point to the appropriate
172/// llvm.callbr.landingpad value.
173static bool insertIntrinsicCalls(CallBrInst *CBR, DominatorTree &DT) {
174 bool Changed = false;
175 SmallPtrSet<const BasicBlock *, 4> Visited;
176 IRBuilder<> Builder(CBR->getContext());
177
178 if (!CBR->getNumIndirectDests())
179 return false;
180
181 SSAUpdater SSAUpdate;
182 SSAUpdate.Initialize(Ty: CBR->getType(), Name: CBR->getName());
183 SSAUpdate.AddAvailableValue(BB: CBR->getParent(), V: CBR);
184 SSAUpdate.AddAvailableValue(BB: CBR->getDefaultDest(), V: CBR);
185
186 for (BasicBlock *IndDest : CBR->getIndirectDests()) {
187 if (!Visited.insert(Ptr: IndDest).second)
188 continue;
189
190 Builder.SetInsertPoint(&*IndDest->begin());
191 CallInst *Intrinsic = Builder.CreateIntrinsic(
192 RetTy: CBR->getType(), ID: Intrinsic::callbr_landingpad, Args: {CBR});
193 SSAUpdate.AddAvailableValue(BB: IndDest, V: Intrinsic);
194 updateSSA(DT, CBR, Intrinsic, SSAUpdate);
195 Changed = true;
196 }
197
198 return Changed;
199}
200
201static bool processCallBrInst(Function &F, CallBrInst *CBR, DominatorTree *DT) {
202 bool Changed = false;
203
204 Changed |= splitCriticalEdges(CBR, DT);
205 Changed |= insertIntrinsicCalls(CBR, DT&: *DT);
206
207 return Changed;
208}
209
210static SmallVector<CallBrInst *, 2> findCallBrs(Function &F) {
211 SmallVector<CallBrInst *, 2> CBRs;
212 for (BasicBlock &BB : F)
213 if (auto *CBR = dyn_cast<CallBrInst>(Val: BB.getTerminator()))
214 if (!CBR->getType()->isVoidTy() && !CBR->use_empty())
215 CBRs.push_back(Elt: CBR);
216 return CBRs;
217}
218
219static bool runImpl(Function &F, ArrayRef<CallBrInst *> CBRs,
220 DominatorTree *DT) {
221 bool Changed = false;
222
223 for (CallBrInst *CBR : CBRs)
224 Changed |= processCallBrInst(F, CBR, DT);
225
226 return Changed;
227}
228
229bool InlineAsmPrepare::runOnFunction(Function &F) {
230 SmallVector<CallBrInst *, 2> CBRs = findCallBrs(F);
231 if (CBRs.empty())
232 return false;
233
234 // It's highly likely that most programs do not contain CallBrInsts. Follow a
235 // similar pattern from SafeStackLegacyPass::runOnFunction to reuse previous
236 // domtree analysis if available, otherwise compute it lazily. This avoids
237 // forcing Dominator Tree Construction at -O0 for programs that likely do not
238 // contain CallBrInsts. It does pessimize programs with callbr at higher
239 // optimization levels, as the DominatorTree created here is not reused by
240 // subsequent passes.
241 DominatorTree *DT;
242 std::optional<DominatorTree> LazilyComputedDomTree;
243 if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
244 DT = &DTWP->getDomTree();
245 else {
246 LazilyComputedDomTree.emplace(args&: F);
247 DT = &*LazilyComputedDomTree;
248 }
249
250 return runImpl(F, CBRs, DT);
251}
252
253PreservedAnalyses InlineAsmPreparePass::run(Function &F,
254 FunctionAnalysisManager &FAM) {
255 SmallVector<CallBrInst *, 2> CBRs = findCallBrs(F);
256 if (CBRs.empty())
257 return PreservedAnalyses::all();
258
259 auto *DT = &FAM.getResult<DominatorTreeAnalysis>(IR&: F);
260
261 if (runImpl(F, CBRs, DT)) {
262 PreservedAnalyses PA;
263 PA.preserve<DominatorTreeAnalysis>();
264 return PA;
265 }
266
267 return PreservedAnalyses::all();
268}
269