1 | //===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This implements routines for translating from LLVM IR into SelectionDAG IR. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "SelectionDAGBuilder.h" |
14 | #include "SDNodeDbgValue.h" |
15 | #include "llvm/ADT/APFloat.h" |
16 | #include "llvm/ADT/APInt.h" |
17 | #include "llvm/ADT/BitVector.h" |
18 | #include "llvm/ADT/STLExtras.h" |
19 | #include "llvm/ADT/SmallPtrSet.h" |
20 | #include "llvm/ADT/SmallSet.h" |
21 | #include "llvm/ADT/StringRef.h" |
22 | #include "llvm/ADT/Twine.h" |
23 | #include "llvm/Analysis/AliasAnalysis.h" |
24 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
25 | #include "llvm/Analysis/ConstantFolding.h" |
26 | #include "llvm/Analysis/Loads.h" |
27 | #include "llvm/Analysis/MemoryLocation.h" |
28 | #include "llvm/Analysis/TargetLibraryInfo.h" |
29 | #include "llvm/Analysis/TargetTransformInfo.h" |
30 | #include "llvm/Analysis/ValueTracking.h" |
31 | #include "llvm/Analysis/VectorUtils.h" |
32 | #include "llvm/CodeGen/Analysis.h" |
33 | #include "llvm/CodeGen/AssignmentTrackingAnalysis.h" |
34 | #include "llvm/CodeGen/CodeGenCommonISel.h" |
35 | #include "llvm/CodeGen/FunctionLoweringInfo.h" |
36 | #include "llvm/CodeGen/GCMetadata.h" |
37 | #include "llvm/CodeGen/ISDOpcodes.h" |
38 | #include "llvm/CodeGen/MachineBasicBlock.h" |
39 | #include "llvm/CodeGen/MachineFrameInfo.h" |
40 | #include "llvm/CodeGen/MachineFunction.h" |
41 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
42 | #include "llvm/CodeGen/MachineInstrBundleIterator.h" |
43 | #include "llvm/CodeGen/MachineMemOperand.h" |
44 | #include "llvm/CodeGen/MachineModuleInfo.h" |
45 | #include "llvm/CodeGen/MachineOperand.h" |
46 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
47 | #include "llvm/CodeGen/RuntimeLibcallUtil.h" |
48 | #include "llvm/CodeGen/SelectionDAG.h" |
49 | #include "llvm/CodeGen/SelectionDAGTargetInfo.h" |
50 | #include "llvm/CodeGen/StackMaps.h" |
51 | #include "llvm/CodeGen/SwiftErrorValueTracking.h" |
52 | #include "llvm/CodeGen/TargetFrameLowering.h" |
53 | #include "llvm/CodeGen/TargetInstrInfo.h" |
54 | #include "llvm/CodeGen/TargetOpcodes.h" |
55 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
56 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
57 | #include "llvm/CodeGen/WinEHFuncInfo.h" |
58 | #include "llvm/IR/Argument.h" |
59 | #include "llvm/IR/Attributes.h" |
60 | #include "llvm/IR/BasicBlock.h" |
61 | #include "llvm/IR/CFG.h" |
62 | #include "llvm/IR/CallingConv.h" |
63 | #include "llvm/IR/Constant.h" |
64 | #include "llvm/IR/ConstantRange.h" |
65 | #include "llvm/IR/Constants.h" |
66 | #include "llvm/IR/DataLayout.h" |
67 | #include "llvm/IR/DebugInfo.h" |
68 | #include "llvm/IR/DebugInfoMetadata.h" |
69 | #include "llvm/IR/DerivedTypes.h" |
70 | #include "llvm/IR/DiagnosticInfo.h" |
71 | #include "llvm/IR/EHPersonalities.h" |
72 | #include "llvm/IR/Function.h" |
73 | #include "llvm/IR/GetElementPtrTypeIterator.h" |
74 | #include "llvm/IR/InlineAsm.h" |
75 | #include "llvm/IR/InstrTypes.h" |
76 | #include "llvm/IR/Instructions.h" |
77 | #include "llvm/IR/IntrinsicInst.h" |
78 | #include "llvm/IR/Intrinsics.h" |
79 | #include "llvm/IR/IntrinsicsAArch64.h" |
80 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
81 | #include "llvm/IR/IntrinsicsWebAssembly.h" |
82 | #include "llvm/IR/LLVMContext.h" |
83 | #include "llvm/IR/MemoryModelRelaxationAnnotations.h" |
84 | #include "llvm/IR/Metadata.h" |
85 | #include "llvm/IR/Module.h" |
86 | #include "llvm/IR/Operator.h" |
87 | #include "llvm/IR/PatternMatch.h" |
88 | #include "llvm/IR/Statepoint.h" |
89 | #include "llvm/IR/Type.h" |
90 | #include "llvm/IR/User.h" |
91 | #include "llvm/IR/Value.h" |
92 | #include "llvm/MC/MCContext.h" |
93 | #include "llvm/Support/AtomicOrdering.h" |
94 | #include "llvm/Support/Casting.h" |
95 | #include "llvm/Support/CommandLine.h" |
96 | #include "llvm/Support/Compiler.h" |
97 | #include "llvm/Support/Debug.h" |
98 | #include "llvm/Support/InstructionCost.h" |
99 | #include "llvm/Support/MathExtras.h" |
100 | #include "llvm/Support/raw_ostream.h" |
101 | #include "llvm/Target/TargetIntrinsicInfo.h" |
102 | #include "llvm/Target/TargetMachine.h" |
103 | #include "llvm/Target/TargetOptions.h" |
104 | #include "llvm/TargetParser/Triple.h" |
105 | #include "llvm/Transforms/Utils/Local.h" |
106 | #include <cstddef> |
107 | #include <deque> |
108 | #include <iterator> |
109 | #include <limits> |
110 | #include <optional> |
111 | #include <tuple> |
112 | |
113 | using namespace llvm; |
114 | using namespace PatternMatch; |
115 | using namespace SwitchCG; |
116 | |
117 | #define DEBUG_TYPE "isel" |
118 | |
119 | /// LimitFloatPrecision - Generate low-precision inline sequences for |
120 | /// some float libcalls (6, 8 or 12 bits). |
121 | static unsigned LimitFloatPrecision; |
122 | |
123 | static cl::opt<bool> |
124 | InsertAssertAlign("insert-assert-align" , cl::init(Val: true), |
125 | cl::desc("Insert the experimental `assertalign` node." ), |
126 | cl::ReallyHidden); |
127 | |
128 | static cl::opt<unsigned, true> |
129 | LimitFPPrecision("limit-float-precision" , |
130 | cl::desc("Generate low-precision inline sequences " |
131 | "for some float libcalls" ), |
132 | cl::location(L&: LimitFloatPrecision), cl::Hidden, |
133 | cl::init(Val: 0)); |
134 | |
135 | static cl::opt<unsigned> SwitchPeelThreshold( |
136 | "switch-peel-threshold" , cl::Hidden, cl::init(Val: 66), |
137 | cl::desc("Set the case probability threshold for peeling the case from a " |
138 | "switch statement. A value greater than 100 will void this " |
139 | "optimization" )); |
140 | |
141 | // Limit the width of DAG chains. This is important in general to prevent |
142 | // DAG-based analysis from blowing up. For example, alias analysis and |
143 | // load clustering may not complete in reasonable time. It is difficult to |
144 | // recognize and avoid this situation within each individual analysis, and |
145 | // future analyses are likely to have the same behavior. Limiting DAG width is |
146 | // the safe approach and will be especially important with global DAGs. |
147 | // |
148 | // MaxParallelChains default is arbitrarily high to avoid affecting |
149 | // optimization, but could be lowered to improve compile time. Any ld-ld-st-st |
150 | // sequence over this should have been converted to llvm.memcpy by the |
151 | // frontend. It is easy to induce this behavior with .ll code such as: |
152 | // %buffer = alloca [4096 x i8] |
153 | // %data = load [4096 x i8]* %argPtr |
154 | // store [4096 x i8] %data, [4096 x i8]* %buffer |
155 | static const unsigned MaxParallelChains = 64; |
156 | |
157 | static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, |
158 | const SDValue *Parts, unsigned NumParts, |
159 | MVT PartVT, EVT ValueVT, const Value *V, |
160 | SDValue InChain, |
161 | std::optional<CallingConv::ID> CC); |
162 | |
163 | /// getCopyFromParts - Create a value that contains the specified legal parts |
164 | /// combined into the value they represent. If the parts combine to a type |
165 | /// larger than ValueVT then AssertOp can be used to specify whether the extra |
166 | /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT |
167 | /// (ISD::AssertSext). |
168 | static SDValue |
169 | getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, |
170 | unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, |
171 | SDValue InChain, |
172 | std::optional<CallingConv::ID> CC = std::nullopt, |
173 | std::optional<ISD::NodeType> AssertOp = std::nullopt) { |
174 | // Let the target assemble the parts if it wants to |
175 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
176 | if (SDValue Val = TLI.joinRegisterPartsIntoValue(DAG, DL, Parts, NumParts, |
177 | PartVT, ValueVT, CC)) |
178 | return Val; |
179 | |
180 | if (ValueVT.isVector()) |
181 | return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V, |
182 | InChain, CC); |
183 | |
184 | assert(NumParts > 0 && "No parts to assemble!" ); |
185 | SDValue Val = Parts[0]; |
186 | |
187 | if (NumParts > 1) { |
188 | // Assemble the value from multiple parts. |
189 | if (ValueVT.isInteger()) { |
190 | unsigned PartBits = PartVT.getSizeInBits(); |
191 | unsigned ValueBits = ValueVT.getSizeInBits(); |
192 | |
193 | // Assemble the power of 2 part. |
194 | unsigned RoundParts = llvm::bit_floor(Value: NumParts); |
195 | unsigned RoundBits = PartBits * RoundParts; |
196 | EVT RoundVT = RoundBits == ValueBits ? |
197 | ValueVT : EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: RoundBits); |
198 | SDValue Lo, Hi; |
199 | |
200 | EVT HalfVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: RoundBits/2); |
201 | |
202 | if (RoundParts > 2) { |
203 | Lo = getCopyFromParts(DAG, DL, Parts, NumParts: RoundParts / 2, PartVT, ValueVT: HalfVT, V, |
204 | InChain); |
205 | Hi = getCopyFromParts(DAG, DL, Parts: Parts + RoundParts / 2, NumParts: RoundParts / 2, |
206 | PartVT, ValueVT: HalfVT, V, InChain); |
207 | } else { |
208 | Lo = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: HalfVT, Operand: Parts[0]); |
209 | Hi = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: HalfVT, Operand: Parts[1]); |
210 | } |
211 | |
212 | if (DAG.getDataLayout().isBigEndian()) |
213 | std::swap(a&: Lo, b&: Hi); |
214 | |
215 | Val = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: RoundVT, N1: Lo, N2: Hi); |
216 | |
217 | if (RoundParts < NumParts) { |
218 | // Assemble the trailing non-power-of-2 part. |
219 | unsigned OddParts = NumParts - RoundParts; |
220 | EVT OddVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: OddParts * PartBits); |
221 | Hi = getCopyFromParts(DAG, DL, Parts: Parts + RoundParts, NumParts: OddParts, PartVT, |
222 | ValueVT: OddVT, V, InChain, CC); |
223 | |
224 | // Combine the round and odd parts. |
225 | Lo = Val; |
226 | if (DAG.getDataLayout().isBigEndian()) |
227 | std::swap(a&: Lo, b&: Hi); |
228 | EVT TotalVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumParts * PartBits); |
229 | Hi = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: TotalVT, Operand: Hi); |
230 | Hi = DAG.getNode(Opcode: ISD::SHL, DL, VT: TotalVT, N1: Hi, |
231 | N2: DAG.getConstant(Val: Lo.getValueSizeInBits(), DL, |
232 | VT: TLI.getShiftAmountTy( |
233 | LHSTy: TotalVT, DL: DAG.getDataLayout()))); |
234 | Lo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: TotalVT, Operand: Lo); |
235 | Val = DAG.getNode(Opcode: ISD::OR, DL, VT: TotalVT, N1: Lo, N2: Hi); |
236 | } |
237 | } else if (PartVT.isFloatingPoint()) { |
238 | // FP split into multiple FP parts (for ppcf128) |
239 | assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 && |
240 | "Unexpected split" ); |
241 | SDValue Lo, Hi; |
242 | Lo = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: EVT(MVT::f64), Operand: Parts[0]); |
243 | Hi = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: EVT(MVT::f64), Operand: Parts[1]); |
244 | if (TLI.hasBigEndianPartOrdering(VT: ValueVT, DL: DAG.getDataLayout())) |
245 | std::swap(a&: Lo, b&: Hi); |
246 | Val = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: ValueVT, N1: Lo, N2: Hi); |
247 | } else { |
248 | // FP split into integer parts (soft fp) |
249 | assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && |
250 | !PartVT.isVector() && "Unexpected split" ); |
251 | EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ValueVT.getSizeInBits()); |
252 | Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, ValueVT: IntVT, V, |
253 | InChain, CC); |
254 | } |
255 | } |
256 | |
257 | // There is now one part, held in Val. Correct it to match ValueVT. |
258 | // PartEVT is the type of the register class that holds the value. |
259 | // ValueVT is the type of the inline asm operation. |
260 | EVT PartEVT = Val.getValueType(); |
261 | |
262 | if (PartEVT == ValueVT) |
263 | return Val; |
264 | |
265 | if (PartEVT.isInteger() && ValueVT.isFloatingPoint() && |
266 | ValueVT.bitsLT(VT: PartEVT)) { |
267 | // For an FP value in an integer part, we need to truncate to the right |
268 | // width first. |
269 | PartEVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ValueVT.getSizeInBits()); |
270 | Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: PartEVT, Operand: Val); |
271 | } |
272 | |
273 | // Handle types that have the same size. |
274 | if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) |
275 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val); |
276 | |
277 | // Handle types with different sizes. |
278 | if (PartEVT.isInteger() && ValueVT.isInteger()) { |
279 | if (ValueVT.bitsLT(VT: PartEVT)) { |
280 | // For a truncate, see if we have any information to |
281 | // indicate whether the truncated bits will always be |
282 | // zero or sign-extension. |
283 | if (AssertOp) |
284 | Val = DAG.getNode(Opcode: *AssertOp, DL, VT: PartEVT, N1: Val, |
285 | N2: DAG.getValueType(ValueVT)); |
286 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ValueVT, Operand: Val); |
287 | } |
288 | return DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ValueVT, Operand: Val); |
289 | } |
290 | |
291 | if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { |
292 | // FP_ROUND's are always exact here. |
293 | if (ValueVT.bitsLT(VT: Val.getValueType())) { |
294 | |
295 | SDValue NoChange = |
296 | DAG.getTargetConstant(Val: 1, DL, VT: TLI.getPointerTy(DL: DAG.getDataLayout())); |
297 | |
298 | if (DAG.getMachineFunction().getFunction().getAttributes().hasFnAttr( |
299 | Kind: llvm::Attribute::StrictFP)) { |
300 | return DAG.getNode(Opcode: ISD::STRICT_FP_ROUND, DL, |
301 | VTList: DAG.getVTList(VT1: ValueVT, VT2: MVT::Other), N1: InChain, N2: Val, |
302 | N3: NoChange); |
303 | } |
304 | |
305 | return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: ValueVT, N1: Val, N2: NoChange); |
306 | } |
307 | |
308 | return DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: ValueVT, Operand: Val); |
309 | } |
310 | |
311 | // Handle MMX to a narrower integer type by bitcasting MMX to integer and |
312 | // then truncating. |
313 | if (PartEVT == MVT::x86mmx && ValueVT.isInteger() && |
314 | ValueVT.bitsLT(VT: PartEVT)) { |
315 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i64, Operand: Val); |
316 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ValueVT, Operand: Val); |
317 | } |
318 | |
319 | report_fatal_error(reason: "Unknown mismatch in getCopyFromParts!" ); |
320 | } |
321 | |
322 | static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, |
323 | const Twine &ErrMsg) { |
324 | const Instruction *I = dyn_cast_or_null<Instruction>(Val: V); |
325 | if (!V) |
326 | return Ctx.emitError(ErrorStr: ErrMsg); |
327 | |
328 | const char *AsmError = ", possible invalid constraint for vector type" ; |
329 | if (const CallInst *CI = dyn_cast<CallInst>(Val: I)) |
330 | if (CI->isInlineAsm()) |
331 | return Ctx.emitError(I, ErrorStr: ErrMsg + AsmError); |
332 | |
333 | return Ctx.emitError(I, ErrorStr: ErrMsg); |
334 | } |
335 | |
336 | /// getCopyFromPartsVector - Create a value that contains the specified legal |
337 | /// parts combined into the value they represent. If the parts combine to a |
338 | /// type larger than ValueVT then AssertOp can be used to specify whether the |
339 | /// extra bits are known to be zero (ISD::AssertZext) or sign extended from |
340 | /// ValueVT (ISD::AssertSext). |
341 | static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, |
342 | const SDValue *Parts, unsigned NumParts, |
343 | MVT PartVT, EVT ValueVT, const Value *V, |
344 | SDValue InChain, |
345 | std::optional<CallingConv::ID> CallConv) { |
346 | assert(ValueVT.isVector() && "Not a vector value" ); |
347 | assert(NumParts > 0 && "No parts to assemble!" ); |
348 | const bool IsABIRegCopy = CallConv.has_value(); |
349 | |
350 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
351 | SDValue Val = Parts[0]; |
352 | |
353 | // Handle a multi-element vector. |
354 | if (NumParts > 1) { |
355 | EVT IntermediateVT; |
356 | MVT RegisterVT; |
357 | unsigned NumIntermediates; |
358 | unsigned NumRegs; |
359 | |
360 | if (IsABIRegCopy) { |
361 | NumRegs = TLI.getVectorTypeBreakdownForCallingConv( |
362 | Context&: *DAG.getContext(), CC: *CallConv, VT: ValueVT, IntermediateVT, |
363 | NumIntermediates, RegisterVT); |
364 | } else { |
365 | NumRegs = |
366 | TLI.getVectorTypeBreakdown(Context&: *DAG.getContext(), VT: ValueVT, IntermediateVT, |
367 | NumIntermediates, RegisterVT); |
368 | } |
369 | |
370 | assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!" ); |
371 | NumParts = NumRegs; // Silence a compiler warning. |
372 | assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!" ); |
373 | assert(RegisterVT.getSizeInBits() == |
374 | Parts[0].getSimpleValueType().getSizeInBits() && |
375 | "Part type sizes don't match!" ); |
376 | |
377 | // Assemble the parts into intermediate operands. |
378 | SmallVector<SDValue, 8> Ops(NumIntermediates); |
379 | if (NumIntermediates == NumParts) { |
380 | // If the register was not expanded, truncate or copy the value, |
381 | // as appropriate. |
382 | for (unsigned i = 0; i != NumParts; ++i) |
383 | Ops[i] = getCopyFromParts(DAG, DL, Parts: &Parts[i], NumParts: 1, PartVT, ValueVT: IntermediateVT, |
384 | V, InChain, CC: CallConv); |
385 | } else if (NumParts > 0) { |
386 | // If the intermediate type was expanded, build the intermediate |
387 | // operands from the parts. |
388 | assert(NumParts % NumIntermediates == 0 && |
389 | "Must expand into a divisible number of parts!" ); |
390 | unsigned Factor = NumParts / NumIntermediates; |
391 | for (unsigned i = 0; i != NumIntermediates; ++i) |
392 | Ops[i] = getCopyFromParts(DAG, DL, Parts: &Parts[i * Factor], NumParts: Factor, PartVT, |
393 | ValueVT: IntermediateVT, V, InChain, CC: CallConv); |
394 | } |
395 | |
396 | // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the |
397 | // intermediate operands. |
398 | EVT BuiltVectorTy = |
399 | IntermediateVT.isVector() |
400 | ? EVT::getVectorVT( |
401 | Context&: *DAG.getContext(), VT: IntermediateVT.getScalarType(), |
402 | EC: IntermediateVT.getVectorElementCount() * NumParts) |
403 | : EVT::getVectorVT(Context&: *DAG.getContext(), |
404 | VT: IntermediateVT.getScalarType(), |
405 | NumElements: NumIntermediates); |
406 | Val = DAG.getNode(Opcode: IntermediateVT.isVector() ? ISD::CONCAT_VECTORS |
407 | : ISD::BUILD_VECTOR, |
408 | DL, VT: BuiltVectorTy, Ops); |
409 | } |
410 | |
411 | // There is now one part, held in Val. Correct it to match ValueVT. |
412 | EVT PartEVT = Val.getValueType(); |
413 | |
414 | if (PartEVT == ValueVT) |
415 | return Val; |
416 | |
417 | if (PartEVT.isVector()) { |
418 | // Vector/Vector bitcast. |
419 | if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) |
420 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val); |
421 | |
422 | // If the parts vector has more elements than the value vector, then we |
423 | // have a vector widening case (e.g. <2 x float> -> <4 x float>). |
424 | // Extract the elements we want. |
425 | if (PartEVT.getVectorElementCount() != ValueVT.getVectorElementCount()) { |
426 | assert((PartEVT.getVectorElementCount().getKnownMinValue() > |
427 | ValueVT.getVectorElementCount().getKnownMinValue()) && |
428 | (PartEVT.getVectorElementCount().isScalable() == |
429 | ValueVT.getVectorElementCount().isScalable()) && |
430 | "Cannot narrow, it would be a lossy transformation" ); |
431 | PartEVT = |
432 | EVT::getVectorVT(Context&: *DAG.getContext(), VT: PartEVT.getVectorElementType(), |
433 | EC: ValueVT.getVectorElementCount()); |
434 | Val = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: PartEVT, N1: Val, |
435 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
436 | if (PartEVT == ValueVT) |
437 | return Val; |
438 | if (PartEVT.isInteger() && ValueVT.isFloatingPoint()) |
439 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val); |
440 | |
441 | // Vector/Vector bitcast (e.g. <2 x bfloat> -> <2 x half>). |
442 | if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) |
443 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val); |
444 | } |
445 | |
446 | // Promoted vector extract |
447 | return DAG.getAnyExtOrTrunc(Op: Val, DL, VT: ValueVT); |
448 | } |
449 | |
450 | // Trivial bitcast if the types are the same size and the destination |
451 | // vector type is legal. |
452 | if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() && |
453 | TLI.isTypeLegal(VT: ValueVT)) |
454 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val); |
455 | |
456 | if (ValueVT.getVectorNumElements() != 1) { |
457 | // Certain ABIs require that vectors are passed as integers. For vectors |
458 | // are the same size, this is an obvious bitcast. |
459 | if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) { |
460 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val); |
461 | } else if (ValueVT.bitsLT(VT: PartEVT)) { |
462 | const uint64_t ValueSize = ValueVT.getFixedSizeInBits(); |
463 | EVT IntermediateType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ValueSize); |
464 | // Drop the extra bits. |
465 | Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: IntermediateType, Operand: Val); |
466 | return DAG.getBitcast(VT: ValueVT, V: Val); |
467 | } |
468 | |
469 | diagnosePossiblyInvalidConstraint( |
470 | Ctx&: *DAG.getContext(), V, ErrMsg: "non-trivial scalar-to-vector conversion" ); |
471 | return DAG.getUNDEF(VT: ValueVT); |
472 | } |
473 | |
474 | // Handle cases such as i8 -> <1 x i1> |
475 | EVT ValueSVT = ValueVT.getVectorElementType(); |
476 | if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) { |
477 | unsigned ValueSize = ValueSVT.getSizeInBits(); |
478 | if (ValueSize == PartEVT.getSizeInBits()) { |
479 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueSVT, Operand: Val); |
480 | } else if (ValueSVT.isFloatingPoint() && PartEVT.isInteger()) { |
481 | // It's possible a scalar floating point type gets softened to integer and |
482 | // then promoted to a larger integer. If PartEVT is the larger integer |
483 | // we need to truncate it and then bitcast to the FP type. |
484 | assert(ValueSVT.bitsLT(PartEVT) && "Unexpected types" ); |
485 | EVT IntermediateType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ValueSize); |
486 | Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: IntermediateType, Operand: Val); |
487 | Val = DAG.getBitcast(VT: ValueSVT, V: Val); |
488 | } else { |
489 | Val = ValueVT.isFloatingPoint() |
490 | ? DAG.getFPExtendOrRound(Op: Val, DL, VT: ValueSVT) |
491 | : DAG.getAnyExtOrTrunc(Op: Val, DL, VT: ValueSVT); |
492 | } |
493 | } |
494 | |
495 | return DAG.getBuildVector(VT: ValueVT, DL, Ops: Val); |
496 | } |
497 | |
498 | static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, |
499 | SDValue Val, SDValue *Parts, unsigned NumParts, |
500 | MVT PartVT, const Value *V, |
501 | std::optional<CallingConv::ID> CallConv); |
502 | |
503 | /// getCopyToParts - Create a series of nodes that contain the specified value |
504 | /// split into legal parts. If the parts contain more bits than Val, then, for |
505 | /// integers, ExtendKind can be used to specify how to generate the extra bits. |
506 | static void |
507 | getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, |
508 | unsigned NumParts, MVT PartVT, const Value *V, |
509 | std::optional<CallingConv::ID> CallConv = std::nullopt, |
510 | ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { |
511 | // Let the target split the parts if it wants to |
512 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
513 | if (TLI.splitValueIntoRegisterParts(DAG, DL, Val, Parts, NumParts, PartVT, |
514 | CC: CallConv)) |
515 | return; |
516 | EVT ValueVT = Val.getValueType(); |
517 | |
518 | // Handle the vector case separately. |
519 | if (ValueVT.isVector()) |
520 | return getCopyToPartsVector(DAG, dl: DL, Val, Parts, NumParts, PartVT, V, |
521 | CallConv); |
522 | |
523 | unsigned OrigNumParts = NumParts; |
524 | assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) && |
525 | "Copying to an illegal type!" ); |
526 | |
527 | if (NumParts == 0) |
528 | return; |
529 | |
530 | assert(!ValueVT.isVector() && "Vector case handled elsewhere" ); |
531 | EVT PartEVT = PartVT; |
532 | if (PartEVT == ValueVT) { |
533 | assert(NumParts == 1 && "No-op copy with multiple parts!" ); |
534 | Parts[0] = Val; |
535 | return; |
536 | } |
537 | |
538 | unsigned PartBits = PartVT.getSizeInBits(); |
539 | if (NumParts * PartBits > ValueVT.getSizeInBits()) { |
540 | // If the parts cover more bits than the value has, promote the value. |
541 | if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { |
542 | assert(NumParts == 1 && "Do not know what to promote to!" ); |
543 | Val = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: PartVT, Operand: Val); |
544 | } else { |
545 | if (ValueVT.isFloatingPoint()) { |
546 | // FP values need to be bitcast, then extended if they are being put |
547 | // into a larger container. |
548 | ValueVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ValueVT.getSizeInBits()); |
549 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ValueVT, Operand: Val); |
550 | } |
551 | assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && |
552 | ValueVT.isInteger() && |
553 | "Unknown mismatch!" ); |
554 | ValueVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumParts * PartBits); |
555 | Val = DAG.getNode(Opcode: ExtendKind, DL, VT: ValueVT, Operand: Val); |
556 | if (PartVT == MVT::x86mmx) |
557 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Val); |
558 | } |
559 | } else if (PartBits == ValueVT.getSizeInBits()) { |
560 | // Different types of the same size. |
561 | assert(NumParts == 1 && PartEVT != ValueVT); |
562 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Val); |
563 | } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { |
564 | // If the parts cover less bits than value has, truncate the value. |
565 | assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && |
566 | ValueVT.isInteger() && |
567 | "Unknown mismatch!" ); |
568 | ValueVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumParts * PartBits); |
569 | Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ValueVT, Operand: Val); |
570 | if (PartVT == MVT::x86mmx) |
571 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Val); |
572 | } |
573 | |
574 | // The value may have changed - recompute ValueVT. |
575 | ValueVT = Val.getValueType(); |
576 | assert(NumParts * PartBits == ValueVT.getSizeInBits() && |
577 | "Failed to tile the value with PartVT!" ); |
578 | |
579 | if (NumParts == 1) { |
580 | if (PartEVT != ValueVT) { |
581 | diagnosePossiblyInvalidConstraint(Ctx&: *DAG.getContext(), V, |
582 | ErrMsg: "scalar-to-vector conversion failed" ); |
583 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Val); |
584 | } |
585 | |
586 | Parts[0] = Val; |
587 | return; |
588 | } |
589 | |
590 | // Expand the value into multiple parts. |
591 | if (NumParts & (NumParts - 1)) { |
592 | // The number of parts is not a power of 2. Split off and copy the tail. |
593 | assert(PartVT.isInteger() && ValueVT.isInteger() && |
594 | "Do not know what to expand to!" ); |
595 | unsigned RoundParts = llvm::bit_floor(Value: NumParts); |
596 | unsigned RoundBits = RoundParts * PartBits; |
597 | unsigned OddParts = NumParts - RoundParts; |
598 | SDValue OddVal = DAG.getNode(Opcode: ISD::SRL, DL, VT: ValueVT, N1: Val, |
599 | N2: DAG.getShiftAmountConstant(Val: RoundBits, VT: ValueVT, DL)); |
600 | |
601 | getCopyToParts(DAG, DL, Val: OddVal, Parts: Parts + RoundParts, NumParts: OddParts, PartVT, V, |
602 | CallConv); |
603 | |
604 | if (DAG.getDataLayout().isBigEndian()) |
605 | // The odd parts were reversed by getCopyToParts - unreverse them. |
606 | std::reverse(first: Parts + RoundParts, last: Parts + NumParts); |
607 | |
608 | NumParts = RoundParts; |
609 | ValueVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumParts * PartBits); |
610 | Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ValueVT, Operand: Val); |
611 | } |
612 | |
613 | // The number of parts is a power of 2. Repeatedly bisect the value using |
614 | // EXTRACT_ELEMENT. |
615 | Parts[0] = DAG.getNode(Opcode: ISD::BITCAST, DL, |
616 | VT: EVT::getIntegerVT(Context&: *DAG.getContext(), |
617 | BitWidth: ValueVT.getSizeInBits()), |
618 | Operand: Val); |
619 | |
620 | for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { |
621 | for (unsigned i = 0; i < NumParts; i += StepSize) { |
622 | unsigned ThisBits = StepSize * PartBits / 2; |
623 | EVT ThisVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ThisBits); |
624 | SDValue &Part0 = Parts[i]; |
625 | SDValue &Part1 = Parts[i+StepSize/2]; |
626 | |
627 | Part1 = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, |
628 | VT: ThisVT, N1: Part0, N2: DAG.getIntPtrConstant(Val: 1, DL)); |
629 | Part0 = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, |
630 | VT: ThisVT, N1: Part0, N2: DAG.getIntPtrConstant(Val: 0, DL)); |
631 | |
632 | if (ThisBits == PartBits && ThisVT != PartVT) { |
633 | Part0 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Part0); |
634 | Part1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Part1); |
635 | } |
636 | } |
637 | } |
638 | |
639 | if (DAG.getDataLayout().isBigEndian()) |
640 | std::reverse(first: Parts, last: Parts + OrigNumParts); |
641 | } |
642 | |
643 | static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val, |
644 | const SDLoc &DL, EVT PartVT) { |
645 | if (!PartVT.isVector()) |
646 | return SDValue(); |
647 | |
648 | EVT ValueVT = Val.getValueType(); |
649 | EVT PartEVT = PartVT.getVectorElementType(); |
650 | EVT ValueEVT = ValueVT.getVectorElementType(); |
651 | ElementCount PartNumElts = PartVT.getVectorElementCount(); |
652 | ElementCount ValueNumElts = ValueVT.getVectorElementCount(); |
653 | |
654 | // We only support widening vectors with equivalent element types and |
655 | // fixed/scalable properties. If a target needs to widen a fixed-length type |
656 | // to a scalable one, it should be possible to use INSERT_SUBVECTOR below. |
657 | if (ElementCount::isKnownLE(LHS: PartNumElts, RHS: ValueNumElts) || |
658 | PartNumElts.isScalable() != ValueNumElts.isScalable()) |
659 | return SDValue(); |
660 | |
661 | // Have a try for bf16 because some targets share its ABI with fp16. |
662 | if (ValueEVT == MVT::bf16 && PartEVT == MVT::f16) { |
663 | assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) && |
664 | "Cannot widen to illegal type" ); |
665 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, |
666 | VT: ValueVT.changeVectorElementType(EltVT: MVT::f16), Operand: Val); |
667 | } else if (PartEVT != ValueEVT) { |
668 | return SDValue(); |
669 | } |
670 | |
671 | // Widening a scalable vector to another scalable vector is done by inserting |
672 | // the vector into a larger undef one. |
673 | if (PartNumElts.isScalable()) |
674 | return DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: PartVT, N1: DAG.getUNDEF(VT: PartVT), |
675 | N2: Val, N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
676 | |
677 | // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in |
678 | // undef elements. |
679 | SmallVector<SDValue, 16> Ops; |
680 | DAG.ExtractVectorElements(Op: Val, Args&: Ops); |
681 | SDValue EltUndef = DAG.getUNDEF(VT: PartEVT); |
682 | Ops.append(NumInputs: (PartNumElts - ValueNumElts).getFixedValue(), Elt: EltUndef); |
683 | |
684 | // FIXME: Use CONCAT for 2x -> 4x. |
685 | return DAG.getBuildVector(VT: PartVT, DL, Ops); |
686 | } |
687 | |
688 | /// getCopyToPartsVector - Create a series of nodes that contain the specified |
689 | /// value split into legal parts. |
690 | static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, |
691 | SDValue Val, SDValue *Parts, unsigned NumParts, |
692 | MVT PartVT, const Value *V, |
693 | std::optional<CallingConv::ID> CallConv) { |
694 | EVT ValueVT = Val.getValueType(); |
695 | assert(ValueVT.isVector() && "Not a vector" ); |
696 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
697 | const bool IsABIRegCopy = CallConv.has_value(); |
698 | |
699 | if (NumParts == 1) { |
700 | EVT PartEVT = PartVT; |
701 | if (PartEVT == ValueVT) { |
702 | // Nothing to do. |
703 | } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { |
704 | // Bitconvert vector->vector case. |
705 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: PartVT, Operand: Val); |
706 | } else if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT)) { |
707 | Val = Widened; |
708 | } else if (PartVT.isVector() && |
709 | PartEVT.getVectorElementType().bitsGE( |
710 | VT: ValueVT.getVectorElementType()) && |
711 | PartEVT.getVectorElementCount() == |
712 | ValueVT.getVectorElementCount()) { |
713 | |
714 | // Promoted vector extract |
715 | Val = DAG.getAnyExtOrTrunc(Op: Val, DL, VT: PartVT); |
716 | } else if (PartEVT.isVector() && |
717 | PartEVT.getVectorElementType() != |
718 | ValueVT.getVectorElementType() && |
719 | TLI.getTypeAction(Context&: *DAG.getContext(), VT: ValueVT) == |
720 | TargetLowering::TypeWidenVector) { |
721 | // Combination of widening and promotion. |
722 | EVT WidenVT = |
723 | EVT::getVectorVT(Context&: *DAG.getContext(), VT: ValueVT.getVectorElementType(), |
724 | EC: PartVT.getVectorElementCount()); |
725 | SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT: WidenVT); |
726 | Val = DAG.getAnyExtOrTrunc(Op: Widened, DL, VT: PartVT); |
727 | } else { |
728 | // Don't extract an integer from a float vector. This can happen if the |
729 | // FP type gets softened to integer and then promoted. The promotion |
730 | // prevents it from being picked up by the earlier bitcast case. |
731 | if (ValueVT.getVectorElementCount().isScalar() && |
732 | (!ValueVT.isFloatingPoint() || !PartVT.isInteger())) { |
733 | // If we reach this condition and PartVT is FP, this means that |
734 | // ValueVT is also FP and both have a different size, otherwise we |
735 | // would have bitcasted them. Producing an EXTRACT_VECTOR_ELT here |
736 | // would be invalid since that would mean the smaller FP type has to |
737 | // be extended to the larger one. |
738 | if (PartVT.isFloatingPoint()) { |
739 | Val = DAG.getBitcast(VT: ValueVT.getScalarType(), V: Val); |
740 | Val = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: PartVT, Operand: Val); |
741 | } else |
742 | Val = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: PartVT, N1: Val, |
743 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
744 | } else { |
745 | uint64_t ValueSize = ValueVT.getFixedSizeInBits(); |
746 | assert(PartVT.getFixedSizeInBits() > ValueSize && |
747 | "lossy conversion of vector to scalar type" ); |
748 | EVT IntermediateType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ValueSize); |
749 | Val = DAG.getBitcast(VT: IntermediateType, V: Val); |
750 | Val = DAG.getAnyExtOrTrunc(Op: Val, DL, VT: PartVT); |
751 | } |
752 | } |
753 | |
754 | assert(Val.getValueType() == PartVT && "Unexpected vector part value type" ); |
755 | Parts[0] = Val; |
756 | return; |
757 | } |
758 | |
759 | // Handle a multi-element vector. |
760 | EVT IntermediateVT; |
761 | MVT RegisterVT; |
762 | unsigned NumIntermediates; |
763 | unsigned NumRegs; |
764 | if (IsABIRegCopy) { |
765 | NumRegs = TLI.getVectorTypeBreakdownForCallingConv( |
766 | Context&: *DAG.getContext(), CC: *CallConv, VT: ValueVT, IntermediateVT, NumIntermediates, |
767 | RegisterVT); |
768 | } else { |
769 | NumRegs = |
770 | TLI.getVectorTypeBreakdown(Context&: *DAG.getContext(), VT: ValueVT, IntermediateVT, |
771 | NumIntermediates, RegisterVT); |
772 | } |
773 | |
774 | assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!" ); |
775 | NumParts = NumRegs; // Silence a compiler warning. |
776 | assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!" ); |
777 | |
778 | assert(IntermediateVT.isScalableVector() == ValueVT.isScalableVector() && |
779 | "Mixing scalable and fixed vectors when copying in parts" ); |
780 | |
781 | std::optional<ElementCount> DestEltCnt; |
782 | |
783 | if (IntermediateVT.isVector()) |
784 | DestEltCnt = IntermediateVT.getVectorElementCount() * NumIntermediates; |
785 | else |
786 | DestEltCnt = ElementCount::getFixed(MinVal: NumIntermediates); |
787 | |
788 | EVT BuiltVectorTy = EVT::getVectorVT( |
789 | Context&: *DAG.getContext(), VT: IntermediateVT.getScalarType(), EC: *DestEltCnt); |
790 | |
791 | if (ValueVT == BuiltVectorTy) { |
792 | // Nothing to do. |
793 | } else if (ValueVT.getSizeInBits() == BuiltVectorTy.getSizeInBits()) { |
794 | // Bitconvert vector->vector case. |
795 | Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: BuiltVectorTy, Operand: Val); |
796 | } else { |
797 | if (BuiltVectorTy.getVectorElementType().bitsGT( |
798 | VT: ValueVT.getVectorElementType())) { |
799 | // Integer promotion. |
800 | ValueVT = EVT::getVectorVT(Context&: *DAG.getContext(), |
801 | VT: BuiltVectorTy.getVectorElementType(), |
802 | EC: ValueVT.getVectorElementCount()); |
803 | Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ValueVT, Operand: Val); |
804 | } |
805 | |
806 | if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT: BuiltVectorTy)) { |
807 | Val = Widened; |
808 | } |
809 | } |
810 | |
811 | assert(Val.getValueType() == BuiltVectorTy && "Unexpected vector value type" ); |
812 | |
813 | // Split the vector into intermediate operands. |
814 | SmallVector<SDValue, 8> Ops(NumIntermediates); |
815 | for (unsigned i = 0; i != NumIntermediates; ++i) { |
816 | if (IntermediateVT.isVector()) { |
817 | // This does something sensible for scalable vectors - see the |
818 | // definition of EXTRACT_SUBVECTOR for further details. |
819 | unsigned IntermediateNumElts = IntermediateVT.getVectorMinNumElements(); |
820 | Ops[i] = |
821 | DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: IntermediateVT, N1: Val, |
822 | N2: DAG.getVectorIdxConstant(Val: i * IntermediateNumElts, DL)); |
823 | } else { |
824 | Ops[i] = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: IntermediateVT, N1: Val, |
825 | N2: DAG.getVectorIdxConstant(Val: i, DL)); |
826 | } |
827 | } |
828 | |
829 | // Split the intermediate operands into legal parts. |
830 | if (NumParts == NumIntermediates) { |
831 | // If the register was not expanded, promote or copy the value, |
832 | // as appropriate. |
833 | for (unsigned i = 0; i != NumParts; ++i) |
834 | getCopyToParts(DAG, DL, Val: Ops[i], Parts: &Parts[i], NumParts: 1, PartVT, V, CallConv); |
835 | } else if (NumParts > 0) { |
836 | // If the intermediate type was expanded, split each the value into |
837 | // legal parts. |
838 | assert(NumIntermediates != 0 && "division by zero" ); |
839 | assert(NumParts % NumIntermediates == 0 && |
840 | "Must expand into a divisible number of parts!" ); |
841 | unsigned Factor = NumParts / NumIntermediates; |
842 | for (unsigned i = 0; i != NumIntermediates; ++i) |
843 | getCopyToParts(DAG, DL, Val: Ops[i], Parts: &Parts[i * Factor], NumParts: Factor, PartVT, V, |
844 | CallConv); |
845 | } |
846 | } |
847 | |
848 | RegsForValue::RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, |
849 | EVT valuevt, std::optional<CallingConv::ID> CC) |
850 | : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs), |
851 | RegCount(1, regs.size()), CallConv(CC) {} |
852 | |
853 | RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, |
854 | const DataLayout &DL, unsigned Reg, Type *Ty, |
855 | std::optional<CallingConv::ID> CC) { |
856 | ComputeValueVTs(TLI, DL, Ty, ValueVTs); |
857 | |
858 | CallConv = CC; |
859 | |
860 | for (EVT ValueVT : ValueVTs) { |
861 | unsigned NumRegs = |
862 | isABIMangled() |
863 | ? TLI.getNumRegistersForCallingConv(Context, CC: *CC, VT: ValueVT) |
864 | : TLI.getNumRegisters(Context, VT: ValueVT); |
865 | MVT RegisterVT = |
866 | isABIMangled() |
867 | ? TLI.getRegisterTypeForCallingConv(Context, CC: *CC, VT: ValueVT) |
868 | : TLI.getRegisterType(Context, VT: ValueVT); |
869 | for (unsigned i = 0; i != NumRegs; ++i) |
870 | Regs.push_back(Elt: Reg + i); |
871 | RegVTs.push_back(Elt: RegisterVT); |
872 | RegCount.push_back(Elt: NumRegs); |
873 | Reg += NumRegs; |
874 | } |
875 | } |
876 | |
877 | SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, |
878 | FunctionLoweringInfo &FuncInfo, |
879 | const SDLoc &dl, SDValue &Chain, |
880 | SDValue *Glue, const Value *V) const { |
881 | // A Value with type {} or [0 x %t] needs no registers. |
882 | if (ValueVTs.empty()) |
883 | return SDValue(); |
884 | |
885 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
886 | |
887 | // Assemble the legal parts into the final values. |
888 | SmallVector<SDValue, 4> Values(ValueVTs.size()); |
889 | SmallVector<SDValue, 8> Parts; |
890 | for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { |
891 | // Copy the legal parts from the registers. |
892 | EVT ValueVT = ValueVTs[Value]; |
893 | unsigned NumRegs = RegCount[Value]; |
894 | MVT RegisterVT = isABIMangled() |
895 | ? TLI.getRegisterTypeForCallingConv( |
896 | Context&: *DAG.getContext(), CC: *CallConv, VT: RegVTs[Value]) |
897 | : RegVTs[Value]; |
898 | |
899 | Parts.resize(N: NumRegs); |
900 | for (unsigned i = 0; i != NumRegs; ++i) { |
901 | SDValue P; |
902 | if (!Glue) { |
903 | P = DAG.getCopyFromReg(Chain, dl, Reg: Regs[Part+i], VT: RegisterVT); |
904 | } else { |
905 | P = DAG.getCopyFromReg(Chain, dl, Reg: Regs[Part+i], VT: RegisterVT, Glue: *Glue); |
906 | *Glue = P.getValue(R: 2); |
907 | } |
908 | |
909 | Chain = P.getValue(R: 1); |
910 | Parts[i] = P; |
911 | |
912 | // If the source register was virtual and if we know something about it, |
913 | // add an assert node. |
914 | if (!Register::isVirtualRegister(Reg: Regs[Part + i]) || |
915 | !RegisterVT.isInteger()) |
916 | continue; |
917 | |
918 | const FunctionLoweringInfo::LiveOutInfo *LOI = |
919 | FuncInfo.GetLiveOutRegInfo(Reg: Regs[Part+i]); |
920 | if (!LOI) |
921 | continue; |
922 | |
923 | unsigned RegSize = RegisterVT.getScalarSizeInBits(); |
924 | unsigned NumSignBits = LOI->NumSignBits; |
925 | unsigned NumZeroBits = LOI->Known.countMinLeadingZeros(); |
926 | |
927 | if (NumZeroBits == RegSize) { |
928 | // The current value is a zero. |
929 | // Explicitly express that as it would be easier for |
930 | // optimizations to kick in. |
931 | Parts[i] = DAG.getConstant(Val: 0, DL: dl, VT: RegisterVT); |
932 | continue; |
933 | } |
934 | |
935 | // FIXME: We capture more information than the dag can represent. For |
936 | // now, just use the tightest assertzext/assertsext possible. |
937 | bool isSExt; |
938 | EVT FromVT(MVT::Other); |
939 | if (NumZeroBits) { |
940 | FromVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: RegSize - NumZeroBits); |
941 | isSExt = false; |
942 | } else if (NumSignBits > 1) { |
943 | FromVT = |
944 | EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: RegSize - NumSignBits + 1); |
945 | isSExt = true; |
946 | } else { |
947 | continue; |
948 | } |
949 | // Add an assertion node. |
950 | assert(FromVT != MVT::Other); |
951 | Parts[i] = DAG.getNode(Opcode: isSExt ? ISD::AssertSext : ISD::AssertZext, DL: dl, |
952 | VT: RegisterVT, N1: P, N2: DAG.getValueType(FromVT)); |
953 | } |
954 | |
955 | Values[Value] = getCopyFromParts(DAG, DL: dl, Parts: Parts.begin(), NumParts: NumRegs, |
956 | PartVT: RegisterVT, ValueVT, V, InChain: Chain, CC: CallConv); |
957 | Part += NumRegs; |
958 | Parts.clear(); |
959 | } |
960 | |
961 | return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: dl, VTList: DAG.getVTList(VTs: ValueVTs), Ops: Values); |
962 | } |
963 | |
964 | void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, |
965 | const SDLoc &dl, SDValue &Chain, SDValue *Glue, |
966 | const Value *V, |
967 | ISD::NodeType PreferredExtendType) const { |
968 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
969 | ISD::NodeType ExtendKind = PreferredExtendType; |
970 | |
971 | // Get the list of the values's legal parts. |
972 | unsigned NumRegs = Regs.size(); |
973 | SmallVector<SDValue, 8> Parts(NumRegs); |
974 | for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { |
975 | unsigned NumParts = RegCount[Value]; |
976 | |
977 | MVT RegisterVT = isABIMangled() |
978 | ? TLI.getRegisterTypeForCallingConv( |
979 | Context&: *DAG.getContext(), CC: *CallConv, VT: RegVTs[Value]) |
980 | : RegVTs[Value]; |
981 | |
982 | if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, VT2: RegisterVT)) |
983 | ExtendKind = ISD::ZERO_EXTEND; |
984 | |
985 | getCopyToParts(DAG, DL: dl, Val: Val.getValue(R: Val.getResNo() + Value), Parts: &Parts[Part], |
986 | NumParts, PartVT: RegisterVT, V, CallConv, ExtendKind); |
987 | Part += NumParts; |
988 | } |
989 | |
990 | // Copy the parts into the registers. |
991 | SmallVector<SDValue, 8> Chains(NumRegs); |
992 | for (unsigned i = 0; i != NumRegs; ++i) { |
993 | SDValue Part; |
994 | if (!Glue) { |
995 | Part = DAG.getCopyToReg(Chain, dl, Reg: Regs[i], N: Parts[i]); |
996 | } else { |
997 | Part = DAG.getCopyToReg(Chain, dl, Reg: Regs[i], N: Parts[i], Glue: *Glue); |
998 | *Glue = Part.getValue(R: 1); |
999 | } |
1000 | |
1001 | Chains[i] = Part.getValue(R: 0); |
1002 | } |
1003 | |
1004 | if (NumRegs == 1 || Glue) |
1005 | // If NumRegs > 1 && Glue is used then the use of the last CopyToReg is |
1006 | // flagged to it. That is the CopyToReg nodes and the user are considered |
1007 | // a single scheduling unit. If we create a TokenFactor and return it as |
1008 | // chain, then the TokenFactor is both a predecessor (operand) of the |
1009 | // user as well as a successor (the TF operands are flagged to the user). |
1010 | // c1, f1 = CopyToReg |
1011 | // c2, f2 = CopyToReg |
1012 | // c3 = TokenFactor c1, c2 |
1013 | // ... |
1014 | // = op c3, ..., f2 |
1015 | Chain = Chains[NumRegs-1]; |
1016 | else |
1017 | Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Chains); |
1018 | } |
1019 | |
1020 | void RegsForValue::AddInlineAsmOperands(InlineAsm::Kind Code, bool HasMatching, |
1021 | unsigned MatchingIdx, const SDLoc &dl, |
1022 | SelectionDAG &DAG, |
1023 | std::vector<SDValue> &Ops) const { |
1024 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
1025 | |
1026 | InlineAsm::Flag Flag(Code, Regs.size()); |
1027 | if (HasMatching) |
1028 | Flag.setMatchingOp(MatchingIdx); |
1029 | else if (!Regs.empty() && Register::isVirtualRegister(Reg: Regs.front())) { |
1030 | // Put the register class of the virtual registers in the flag word. That |
1031 | // way, later passes can recompute register class constraints for inline |
1032 | // assembly as well as normal instructions. |
1033 | // Don't do this for tied operands that can use the regclass information |
1034 | // from the def. |
1035 | const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); |
1036 | const TargetRegisterClass *RC = MRI.getRegClass(Reg: Regs.front()); |
1037 | Flag.setRegClass(RC->getID()); |
1038 | } |
1039 | |
1040 | SDValue Res = DAG.getTargetConstant(Val: Flag, DL: dl, VT: MVT::i32); |
1041 | Ops.push_back(x: Res); |
1042 | |
1043 | if (Code == InlineAsm::Kind::Clobber) { |
1044 | // Clobbers should always have a 1:1 mapping with registers, and may |
1045 | // reference registers that have illegal (e.g. vector) types. Hence, we |
1046 | // shouldn't try to apply any sort of splitting logic to them. |
1047 | assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() && |
1048 | "No 1:1 mapping from clobbers to regs?" ); |
1049 | Register SP = TLI.getStackPointerRegisterToSaveRestore(); |
1050 | (void)SP; |
1051 | for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) { |
1052 | Ops.push_back(x: DAG.getRegister(Reg: Regs[I], VT: RegVTs[I])); |
1053 | assert( |
1054 | (Regs[I] != SP || |
1055 | DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) && |
1056 | "If we clobbered the stack pointer, MFI should know about it." ); |
1057 | } |
1058 | return; |
1059 | } |
1060 | |
1061 | for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { |
1062 | MVT RegisterVT = RegVTs[Value]; |
1063 | unsigned NumRegs = TLI.getNumRegisters(Context&: *DAG.getContext(), VT: ValueVTs[Value], |
1064 | RegisterVT); |
1065 | for (unsigned i = 0; i != NumRegs; ++i) { |
1066 | assert(Reg < Regs.size() && "Mismatch in # registers expected" ); |
1067 | unsigned TheReg = Regs[Reg++]; |
1068 | Ops.push_back(x: DAG.getRegister(Reg: TheReg, VT: RegisterVT)); |
1069 | } |
1070 | } |
1071 | } |
1072 | |
1073 | SmallVector<std::pair<unsigned, TypeSize>, 4> |
1074 | RegsForValue::getRegsAndSizes() const { |
1075 | SmallVector<std::pair<unsigned, TypeSize>, 4> OutVec; |
1076 | unsigned I = 0; |
1077 | for (auto CountAndVT : zip_first(t: RegCount, u: RegVTs)) { |
1078 | unsigned RegCount = std::get<0>(t&: CountAndVT); |
1079 | MVT RegisterVT = std::get<1>(t&: CountAndVT); |
1080 | TypeSize RegisterSize = RegisterVT.getSizeInBits(); |
1081 | for (unsigned E = I + RegCount; I != E; ++I) |
1082 | OutVec.push_back(Elt: std::make_pair(x: Regs[I], y&: RegisterSize)); |
1083 | } |
1084 | return OutVec; |
1085 | } |
1086 | |
1087 | void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa, |
1088 | AssumptionCache *ac, |
1089 | const TargetLibraryInfo *li) { |
1090 | AA = aa; |
1091 | AC = ac; |
1092 | GFI = gfi; |
1093 | LibInfo = li; |
1094 | Context = DAG.getContext(); |
1095 | LPadToCallSiteMap.clear(); |
1096 | SL->init(tli: DAG.getTargetLoweringInfo(), tm: TM, dl: DAG.getDataLayout()); |
1097 | AssignmentTrackingEnabled = isAssignmentTrackingEnabled( |
1098 | M: *DAG.getMachineFunction().getFunction().getParent()); |
1099 | } |
1100 | |
1101 | void SelectionDAGBuilder::clear() { |
1102 | NodeMap.clear(); |
1103 | UnusedArgNodeMap.clear(); |
1104 | PendingLoads.clear(); |
1105 | PendingExports.clear(); |
1106 | PendingConstrainedFP.clear(); |
1107 | PendingConstrainedFPStrict.clear(); |
1108 | CurInst = nullptr; |
1109 | HasTailCall = false; |
1110 | SDNodeOrder = LowestSDNodeOrder; |
1111 | StatepointLowering.clear(); |
1112 | } |
1113 | |
1114 | void SelectionDAGBuilder::clearDanglingDebugInfo() { |
1115 | DanglingDebugInfoMap.clear(); |
1116 | } |
1117 | |
1118 | // Update DAG root to include dependencies on Pending chains. |
1119 | SDValue SelectionDAGBuilder::updateRoot(SmallVectorImpl<SDValue> &Pending) { |
1120 | SDValue Root = DAG.getRoot(); |
1121 | |
1122 | if (Pending.empty()) |
1123 | return Root; |
1124 | |
1125 | // Add current root to PendingChains, unless we already indirectly |
1126 | // depend on it. |
1127 | if (Root.getOpcode() != ISD::EntryToken) { |
1128 | unsigned i = 0, e = Pending.size(); |
1129 | for (; i != e; ++i) { |
1130 | assert(Pending[i].getNode()->getNumOperands() > 1); |
1131 | if (Pending[i].getNode()->getOperand(Num: 0) == Root) |
1132 | break; // Don't add the root if we already indirectly depend on it. |
1133 | } |
1134 | |
1135 | if (i == e) |
1136 | Pending.push_back(Elt: Root); |
1137 | } |
1138 | |
1139 | if (Pending.size() == 1) |
1140 | Root = Pending[0]; |
1141 | else |
1142 | Root = DAG.getTokenFactor(DL: getCurSDLoc(), Vals&: Pending); |
1143 | |
1144 | DAG.setRoot(Root); |
1145 | Pending.clear(); |
1146 | return Root; |
1147 | } |
1148 | |
1149 | SDValue SelectionDAGBuilder::getMemoryRoot() { |
1150 | return updateRoot(Pending&: PendingLoads); |
1151 | } |
1152 | |
1153 | SDValue SelectionDAGBuilder::getRoot() { |
1154 | // Chain up all pending constrained intrinsics together with all |
1155 | // pending loads, by simply appending them to PendingLoads and |
1156 | // then calling getMemoryRoot(). |
1157 | PendingLoads.reserve(N: PendingLoads.size() + |
1158 | PendingConstrainedFP.size() + |
1159 | PendingConstrainedFPStrict.size()); |
1160 | PendingLoads.append(in_start: PendingConstrainedFP.begin(), |
1161 | in_end: PendingConstrainedFP.end()); |
1162 | PendingLoads.append(in_start: PendingConstrainedFPStrict.begin(), |
1163 | in_end: PendingConstrainedFPStrict.end()); |
1164 | PendingConstrainedFP.clear(); |
1165 | PendingConstrainedFPStrict.clear(); |
1166 | return getMemoryRoot(); |
1167 | } |
1168 | |
1169 | SDValue SelectionDAGBuilder::getControlRoot() { |
1170 | // We need to emit pending fpexcept.strict constrained intrinsics, |
1171 | // so append them to the PendingExports list. |
1172 | PendingExports.append(in_start: PendingConstrainedFPStrict.begin(), |
1173 | in_end: PendingConstrainedFPStrict.end()); |
1174 | PendingConstrainedFPStrict.clear(); |
1175 | return updateRoot(Pending&: PendingExports); |
1176 | } |
1177 | |
1178 | void SelectionDAGBuilder::handleDebugDeclare(Value *Address, |
1179 | DILocalVariable *Variable, |
1180 | DIExpression *Expression, |
1181 | DebugLoc DL) { |
1182 | assert(Variable && "Missing variable" ); |
1183 | |
1184 | // Check if address has undef value. |
1185 | if (!Address || isa<UndefValue>(Val: Address) || |
1186 | (Address->use_empty() && !isa<Argument>(Val: Address))) { |
1187 | LLVM_DEBUG( |
1188 | dbgs() |
1189 | << "dbg_declare: Dropping debug info (bad/undef/unused-arg address)\n" ); |
1190 | return; |
1191 | } |
1192 | |
1193 | bool IsParameter = Variable->isParameter() || isa<Argument>(Val: Address); |
1194 | |
1195 | SDValue &N = NodeMap[Address]; |
1196 | if (!N.getNode() && isa<Argument>(Val: Address)) |
1197 | // Check unused arguments map. |
1198 | N = UnusedArgNodeMap[Address]; |
1199 | SDDbgValue *SDV; |
1200 | if (N.getNode()) { |
1201 | if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Val: Address)) |
1202 | Address = BCI->getOperand(i_nocapture: 0); |
1203 | // Parameters are handled specially. |
1204 | auto *FINode = dyn_cast<FrameIndexSDNode>(Val: N.getNode()); |
1205 | if (IsParameter && FINode) { |
1206 | // Byval parameter. We have a frame index at this point. |
1207 | SDV = DAG.getFrameIndexDbgValue(Var: Variable, Expr: Expression, FI: FINode->getIndex(), |
1208 | /*IsIndirect*/ true, DL, O: SDNodeOrder); |
1209 | } else if (isa<Argument>(Val: Address)) { |
1210 | // Address is an argument, so try to emit its dbg value using |
1211 | // virtual register info from the FuncInfo.ValueMap. |
1212 | EmitFuncArgumentDbgValue(V: Address, Variable, Expr: Expression, DL, |
1213 | Kind: FuncArgumentDbgValueKind::Declare, N); |
1214 | return; |
1215 | } else { |
1216 | SDV = DAG.getDbgValue(Var: Variable, Expr: Expression, N: N.getNode(), R: N.getResNo(), |
1217 | IsIndirect: true, DL, O: SDNodeOrder); |
1218 | } |
1219 | DAG.AddDbgValue(DB: SDV, isParameter: IsParameter); |
1220 | } else { |
1221 | // If Address is an argument then try to emit its dbg value using |
1222 | // virtual register info from the FuncInfo.ValueMap. |
1223 | if (!EmitFuncArgumentDbgValue(V: Address, Variable, Expr: Expression, DL, |
1224 | Kind: FuncArgumentDbgValueKind::Declare, N)) { |
1225 | LLVM_DEBUG(dbgs() << "dbg_declare: Dropping debug info" |
1226 | << " (could not emit func-arg dbg_value)\n" ); |
1227 | } |
1228 | } |
1229 | return; |
1230 | } |
1231 | |
1232 | void SelectionDAGBuilder::visitDbgInfo(const Instruction &I) { |
1233 | // Add SDDbgValue nodes for any var locs here. Do so before updating |
1234 | // SDNodeOrder, as this mapping is {Inst -> Locs BEFORE Inst}. |
1235 | if (FunctionVarLocs const *FnVarLocs = DAG.getFunctionVarLocs()) { |
1236 | // Add SDDbgValue nodes for any var locs here. Do so before updating |
1237 | // SDNodeOrder, as this mapping is {Inst -> Locs BEFORE Inst}. |
1238 | for (auto It = FnVarLocs->locs_begin(Before: &I), End = FnVarLocs->locs_end(Before: &I); |
1239 | It != End; ++It) { |
1240 | auto *Var = FnVarLocs->getDILocalVariable(ID: It->VariableID); |
1241 | dropDanglingDebugInfo(Variable: Var, Expr: It->Expr); |
1242 | if (It->Values.isKillLocation(Expression: It->Expr)) { |
1243 | handleKillDebugValue(Var, Expr: It->Expr, DbgLoc: It->DL, Order: SDNodeOrder); |
1244 | continue; |
1245 | } |
1246 | SmallVector<Value *> Values(It->Values.location_ops()); |
1247 | if (!handleDebugValue(Values, Var, Expr: It->Expr, DbgLoc: It->DL, Order: SDNodeOrder, |
1248 | IsVariadic: It->Values.hasArgList())) { |
1249 | SmallVector<Value *, 4> Vals; |
1250 | for (Value *V : It->Values.location_ops()) |
1251 | Vals.push_back(Elt: V); |
1252 | addDanglingDebugInfo(Values&: Vals, |
1253 | Var: FnVarLocs->getDILocalVariable(ID: It->VariableID), |
1254 | Expr: It->Expr, IsVariadic: Vals.size() > 1, DL: It->DL, Order: SDNodeOrder); |
1255 | } |
1256 | } |
1257 | } |
1258 | |
1259 | // We must skip DbgVariableRecords if they've already been processed above as |
1260 | // we have just emitted the debug values resulting from assignment tracking |
1261 | // analysis, making any existing DbgVariableRecords redundant (and probably |
1262 | // less correct). We still need to process DbgLabelRecords. This does sink |
1263 | // DbgLabelRecords to the bottom of the group of debug records. That sholdn't |
1264 | // be important as it does so deterministcally and ordering between |
1265 | // DbgLabelRecords and DbgVariableRecords is immaterial (other than for MIR/IR |
1266 | // printing). |
1267 | bool SkipDbgVariableRecords = DAG.getFunctionVarLocs(); |
1268 | // Is there is any debug-info attached to this instruction, in the form of |
1269 | // DbgRecord non-instruction debug-info records. |
1270 | for (DbgRecord &DR : I.getDbgRecordRange()) { |
1271 | if (DbgLabelRecord *DLR = dyn_cast<DbgLabelRecord>(Val: &DR)) { |
1272 | assert(DLR->getLabel() && "Missing label" ); |
1273 | SDDbgLabel *SDV = |
1274 | DAG.getDbgLabel(Label: DLR->getLabel(), DL: DLR->getDebugLoc(), O: SDNodeOrder); |
1275 | DAG.AddDbgLabel(DB: SDV); |
1276 | continue; |
1277 | } |
1278 | |
1279 | if (SkipDbgVariableRecords) |
1280 | continue; |
1281 | DbgVariableRecord &DVR = cast<DbgVariableRecord>(Val&: DR); |
1282 | DILocalVariable *Variable = DVR.getVariable(); |
1283 | DIExpression *Expression = DVR.getExpression(); |
1284 | dropDanglingDebugInfo(Variable, Expr: Expression); |
1285 | |
1286 | if (DVR.getType() == DbgVariableRecord::LocationType::Declare) { |
1287 | if (FuncInfo.PreprocessedDVRDeclares.contains(Ptr: &DVR)) |
1288 | continue; |
1289 | LLVM_DEBUG(dbgs() << "SelectionDAG visiting dbg_declare: " << DVR |
1290 | << "\n" ); |
1291 | handleDebugDeclare(Address: DVR.getVariableLocationOp(OpIdx: 0), Variable, Expression, |
1292 | DL: DVR.getDebugLoc()); |
1293 | continue; |
1294 | } |
1295 | |
1296 | // A DbgVariableRecord with no locations is a kill location. |
1297 | SmallVector<Value *, 4> Values(DVR.location_ops()); |
1298 | if (Values.empty()) { |
1299 | handleKillDebugValue(Var: Variable, Expr: Expression, DbgLoc: DVR.getDebugLoc(), |
1300 | Order: SDNodeOrder); |
1301 | continue; |
1302 | } |
1303 | |
1304 | // A DbgVariableRecord with an undef or absent location is also a kill |
1305 | // location. |
1306 | if (llvm::any_of(Range&: Values, |
1307 | P: [](Value *V) { return !V || isa<UndefValue>(Val: V); })) { |
1308 | handleKillDebugValue(Var: Variable, Expr: Expression, DbgLoc: DVR.getDebugLoc(), |
1309 | Order: SDNodeOrder); |
1310 | continue; |
1311 | } |
1312 | |
1313 | bool IsVariadic = DVR.hasArgList(); |
1314 | if (!handleDebugValue(Values, Var: Variable, Expr: Expression, DbgLoc: DVR.getDebugLoc(), |
1315 | Order: SDNodeOrder, IsVariadic)) { |
1316 | addDanglingDebugInfo(Values, Var: Variable, Expr: Expression, IsVariadic, |
1317 | DL: DVR.getDebugLoc(), Order: SDNodeOrder); |
1318 | } |
1319 | } |
1320 | } |
1321 | |
1322 | void SelectionDAGBuilder::visit(const Instruction &I) { |
1323 | visitDbgInfo(I); |
1324 | |
1325 | // Set up outgoing PHI node register values before emitting the terminator. |
1326 | if (I.isTerminator()) { |
1327 | HandlePHINodesInSuccessorBlocks(LLVMBB: I.getParent()); |
1328 | } |
1329 | |
1330 | // Increase the SDNodeOrder if dealing with a non-debug instruction. |
1331 | if (!isa<DbgInfoIntrinsic>(Val: I)) |
1332 | ++SDNodeOrder; |
1333 | |
1334 | CurInst = &I; |
1335 | |
1336 | // Set inserted listener only if required. |
1337 | bool NodeInserted = false; |
1338 | std::unique_ptr<SelectionDAG::DAGNodeInsertedListener> InsertedListener; |
1339 | MDNode *PCSectionsMD = I.getMetadata(KindID: LLVMContext::MD_pcsections); |
1340 | MDNode *MMRA = I.getMetadata(KindID: LLVMContext::MD_mmra); |
1341 | if (PCSectionsMD || MMRA) { |
1342 | InsertedListener = std::make_unique<SelectionDAG::DAGNodeInsertedListener>( |
1343 | args&: DAG, args: [&](SDNode *) { NodeInserted = true; }); |
1344 | } |
1345 | |
1346 | visit(Opcode: I.getOpcode(), I); |
1347 | |
1348 | if (!I.isTerminator() && !HasTailCall && |
1349 | !isa<GCStatepointInst>(Val: I)) // statepoints handle their exports internally |
1350 | CopyToExportRegsIfNeeded(V: &I); |
1351 | |
1352 | // Handle metadata. |
1353 | if (PCSectionsMD || MMRA) { |
1354 | auto It = NodeMap.find(Val: &I); |
1355 | if (It != NodeMap.end()) { |
1356 | if (PCSectionsMD) |
1357 | DAG.addPCSections(Node: It->second.getNode(), MD: PCSectionsMD); |
1358 | if (MMRA) |
1359 | DAG.addMMRAMetadata(Node: It->second.getNode(), MMRA); |
1360 | } else if (NodeInserted) { |
1361 | // This should not happen; if it does, don't let it go unnoticed so we can |
1362 | // fix it. Relevant visit*() function is probably missing a setValue(). |
1363 | errs() << "warning: loosing !pcsections and/or !mmra metadata [" |
1364 | << I.getModule()->getName() << "]\n" ; |
1365 | LLVM_DEBUG(I.dump()); |
1366 | assert(false); |
1367 | } |
1368 | } |
1369 | |
1370 | CurInst = nullptr; |
1371 | } |
1372 | |
1373 | void SelectionDAGBuilder::visitPHI(const PHINode &) { |
1374 | llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!" ); |
1375 | } |
1376 | |
1377 | void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { |
1378 | // Note: this doesn't use InstVisitor, because it has to work with |
1379 | // ConstantExpr's in addition to instructions. |
1380 | switch (Opcode) { |
1381 | default: llvm_unreachable("Unknown instruction type encountered!" ); |
1382 | // Build the switch statement using the Instruction.def file. |
1383 | #define HANDLE_INST(NUM, OPCODE, CLASS) \ |
1384 | case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break; |
1385 | #include "llvm/IR/Instruction.def" |
1386 | } |
1387 | } |
1388 | |
1389 | static bool handleDanglingVariadicDebugInfo(SelectionDAG &DAG, |
1390 | DILocalVariable *Variable, |
1391 | DebugLoc DL, unsigned Order, |
1392 | SmallVectorImpl<Value *> &Values, |
1393 | DIExpression *Expression) { |
1394 | // For variadic dbg_values we will now insert an undef. |
1395 | // FIXME: We can potentially recover these! |
1396 | SmallVector<SDDbgOperand, 2> Locs; |
1397 | for (const Value *V : Values) { |
1398 | auto *Undef = UndefValue::get(T: V->getType()); |
1399 | Locs.push_back(Elt: SDDbgOperand::fromConst(Const: Undef)); |
1400 | } |
1401 | SDDbgValue *SDV = DAG.getDbgValueList(Var: Variable, Expr: Expression, Locs, Dependencies: {}, |
1402 | /*IsIndirect=*/false, DL, O: Order, |
1403 | /*IsVariadic=*/true); |
1404 | DAG.AddDbgValue(DB: SDV, /*isParameter=*/false); |
1405 | return true; |
1406 | } |
1407 | |
1408 | void SelectionDAGBuilder::addDanglingDebugInfo(SmallVectorImpl<Value *> &Values, |
1409 | DILocalVariable *Var, |
1410 | DIExpression *Expr, |
1411 | bool IsVariadic, DebugLoc DL, |
1412 | unsigned Order) { |
1413 | if (IsVariadic) { |
1414 | handleDanglingVariadicDebugInfo(DAG, Variable: Var, DL, Order, Values, Expression: Expr); |
1415 | return; |
1416 | } |
1417 | // TODO: Dangling debug info will eventually either be resolved or produce |
1418 | // an Undef DBG_VALUE. However in the resolution case, a gap may appear |
1419 | // between the original dbg.value location and its resolved DBG_VALUE, |
1420 | // which we should ideally fill with an extra Undef DBG_VALUE. |
1421 | assert(Values.size() == 1); |
1422 | DanglingDebugInfoMap[Values[0]].emplace_back(args&: Var, args&: Expr, args&: DL, args&: Order); |
1423 | } |
1424 | |
1425 | void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable, |
1426 | const DIExpression *Expr) { |
1427 | auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) { |
1428 | DIVariable *DanglingVariable = DDI.getVariable(); |
1429 | DIExpression *DanglingExpr = DDI.getExpression(); |
1430 | if (DanglingVariable == Variable && Expr->fragmentsOverlap(Other: DanglingExpr)) { |
1431 | LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " |
1432 | << printDDI(nullptr, DDI) << "\n" ); |
1433 | return true; |
1434 | } |
1435 | return false; |
1436 | }; |
1437 | |
1438 | for (auto &DDIMI : DanglingDebugInfoMap) { |
1439 | DanglingDebugInfoVector &DDIV = DDIMI.second; |
1440 | |
1441 | // If debug info is to be dropped, run it through final checks to see |
1442 | // whether it can be salvaged. |
1443 | for (auto &DDI : DDIV) |
1444 | if (isMatchingDbgValue(DDI)) |
1445 | salvageUnresolvedDbgValue(V: DDIMI.first, DDI); |
1446 | |
1447 | erase_if(C&: DDIV, P: isMatchingDbgValue); |
1448 | } |
1449 | } |
1450 | |
1451 | // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, |
1452 | // generate the debug data structures now that we've seen its definition. |
1453 | void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, |
1454 | SDValue Val) { |
1455 | auto DanglingDbgInfoIt = DanglingDebugInfoMap.find(Key: V); |
1456 | if (DanglingDbgInfoIt == DanglingDebugInfoMap.end()) |
1457 | return; |
1458 | |
1459 | DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second; |
1460 | for (auto &DDI : DDIV) { |
1461 | DebugLoc DL = DDI.getDebugLoc(); |
1462 | unsigned ValSDNodeOrder = Val.getNode()->getIROrder(); |
1463 | unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); |
1464 | DILocalVariable *Variable = DDI.getVariable(); |
1465 | DIExpression *Expr = DDI.getExpression(); |
1466 | assert(Variable->isValidLocationForIntrinsic(DL) && |
1467 | "Expected inlined-at fields to agree" ); |
1468 | SDDbgValue *SDV; |
1469 | if (Val.getNode()) { |
1470 | // FIXME: I doubt that it is correct to resolve a dangling DbgValue as a |
1471 | // FuncArgumentDbgValue (it would be hoisted to the function entry, and if |
1472 | // we couldn't resolve it directly when examining the DbgValue intrinsic |
1473 | // in the first place we should not be more successful here). Unless we |
1474 | // have some test case that prove this to be correct we should avoid |
1475 | // calling EmitFuncArgumentDbgValue here. |
1476 | if (!EmitFuncArgumentDbgValue(V, Variable, Expr, DL, |
1477 | Kind: FuncArgumentDbgValueKind::Value, N: Val)) { |
1478 | LLVM_DEBUG(dbgs() << "Resolve dangling debug info for " |
1479 | << printDDI(V, DDI) << "\n" ); |
1480 | LLVM_DEBUG(dbgs() << " By mapping to:\n " ; Val.dump()); |
1481 | // Increase the SDNodeOrder for the DbgValue here to make sure it is |
1482 | // inserted after the definition of Val when emitting the instructions |
1483 | // after ISel. An alternative could be to teach |
1484 | // ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly. |
1485 | LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs() |
1486 | << "changing SDNodeOrder from " << DbgSDNodeOrder << " to " |
1487 | << ValSDNodeOrder << "\n" ); |
1488 | SDV = getDbgValue(N: Val, Variable, Expr, dl: DL, |
1489 | DbgSDNodeOrder: std::max(a: DbgSDNodeOrder, b: ValSDNodeOrder)); |
1490 | DAG.AddDbgValue(DB: SDV, isParameter: false); |
1491 | } else |
1492 | LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " |
1493 | << printDDI(V, DDI) |
1494 | << " in EmitFuncArgumentDbgValue\n" ); |
1495 | } else { |
1496 | LLVM_DEBUG(dbgs() << "Dropping debug info for " << printDDI(V, DDI) |
1497 | << "\n" ); |
1498 | auto Undef = UndefValue::get(T: V->getType()); |
1499 | auto SDV = |
1500 | DAG.getConstantDbgValue(Var: Variable, Expr, C: Undef, DL, O: DbgSDNodeOrder); |
1501 | DAG.AddDbgValue(DB: SDV, isParameter: false); |
1502 | } |
1503 | } |
1504 | DDIV.clear(); |
1505 | } |
1506 | |
1507 | void SelectionDAGBuilder::salvageUnresolvedDbgValue(const Value *V, |
1508 | DanglingDebugInfo &DDI) { |
1509 | // TODO: For the variadic implementation, instead of only checking the fail |
1510 | // state of `handleDebugValue`, we need know specifically which values were |
1511 | // invalid, so that we attempt to salvage only those values when processing |
1512 | // a DIArgList. |
1513 | const Value *OrigV = V; |
1514 | DILocalVariable *Var = DDI.getVariable(); |
1515 | DIExpression *Expr = DDI.getExpression(); |
1516 | DebugLoc DL = DDI.getDebugLoc(); |
1517 | unsigned SDOrder = DDI.getSDNodeOrder(); |
1518 | |
1519 | // Currently we consider only dbg.value intrinsics -- we tell the salvager |
1520 | // that DW_OP_stack_value is desired. |
1521 | bool StackValue = true; |
1522 | |
1523 | // Can this Value can be encoded without any further work? |
1524 | if (handleDebugValue(Values: V, Var, Expr, DbgLoc: DL, Order: SDOrder, /*IsVariadic=*/false)) |
1525 | return; |
1526 | |
1527 | // Attempt to salvage back through as many instructions as possible. Bail if |
1528 | // a non-instruction is seen, such as a constant expression or global |
1529 | // variable. FIXME: Further work could recover those too. |
1530 | while (isa<Instruction>(Val: V)) { |
1531 | const Instruction &VAsInst = *cast<const Instruction>(Val: V); |
1532 | // Temporary "0", awaiting real implementation. |
1533 | SmallVector<uint64_t, 16> Ops; |
1534 | SmallVector<Value *, 4> AdditionalValues; |
1535 | V = salvageDebugInfoImpl(I&: const_cast<Instruction &>(VAsInst), |
1536 | CurrentLocOps: Expr->getNumLocationOperands(), Ops, |
1537 | AdditionalValues); |
1538 | // If we cannot salvage any further, and haven't yet found a suitable debug |
1539 | // expression, bail out. |
1540 | if (!V) |
1541 | break; |
1542 | |
1543 | // TODO: If AdditionalValues isn't empty, then the salvage can only be |
1544 | // represented with a DBG_VALUE_LIST, so we give up. When we have support |
1545 | // here for variadic dbg_values, remove that condition. |
1546 | if (!AdditionalValues.empty()) |
1547 | break; |
1548 | |
1549 | // New value and expr now represent this debuginfo. |
1550 | Expr = DIExpression::appendOpsToArg(Expr, Ops, ArgNo: 0, StackValue); |
1551 | |
1552 | // Some kind of simplification occurred: check whether the operand of the |
1553 | // salvaged debug expression can be encoded in this DAG. |
1554 | if (handleDebugValue(Values: V, Var, Expr, DbgLoc: DL, Order: SDOrder, /*IsVariadic=*/false)) { |
1555 | LLVM_DEBUG( |
1556 | dbgs() << "Salvaged debug location info for:\n " << *Var << "\n" |
1557 | << *OrigV << "\nBy stripping back to:\n " << *V << "\n" ); |
1558 | return; |
1559 | } |
1560 | } |
1561 | |
1562 | // This was the final opportunity to salvage this debug information, and it |
1563 | // couldn't be done. Place an undef DBG_VALUE at this location to terminate |
1564 | // any earlier variable location. |
1565 | assert(OrigV && "V shouldn't be null" ); |
1566 | auto *Undef = UndefValue::get(T: OrigV->getType()); |
1567 | auto *SDV = DAG.getConstantDbgValue(Var, Expr, C: Undef, DL, O: SDNodeOrder); |
1568 | DAG.AddDbgValue(DB: SDV, isParameter: false); |
1569 | LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " |
1570 | << printDDI(OrigV, DDI) << "\n" ); |
1571 | } |
1572 | |
1573 | void SelectionDAGBuilder::handleKillDebugValue(DILocalVariable *Var, |
1574 | DIExpression *Expr, |
1575 | DebugLoc DbgLoc, |
1576 | unsigned Order) { |
1577 | Value *Poison = PoisonValue::get(T: Type::getInt1Ty(C&: *Context)); |
1578 | DIExpression *NewExpr = |
1579 | const_cast<DIExpression *>(DIExpression::convertToUndefExpression(Expr)); |
1580 | handleDebugValue(Values: Poison, Var, Expr: NewExpr, DbgLoc, Order, |
1581 | /*IsVariadic*/ false); |
1582 | } |
1583 | |
1584 | bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values, |
1585 | DILocalVariable *Var, |
1586 | DIExpression *Expr, DebugLoc DbgLoc, |
1587 | unsigned Order, bool IsVariadic) { |
1588 | if (Values.empty()) |
1589 | return true; |
1590 | |
1591 | // Filter EntryValue locations out early. |
1592 | if (visitEntryValueDbgValue(Values, Variable: Var, Expr, DbgLoc)) |
1593 | return true; |
1594 | |
1595 | SmallVector<SDDbgOperand> LocationOps; |
1596 | SmallVector<SDNode *> Dependencies; |
1597 | for (const Value *V : Values) { |
1598 | // Constant value. |
1599 | if (isa<ConstantInt>(Val: V) || isa<ConstantFP>(Val: V) || isa<UndefValue>(Val: V) || |
1600 | isa<ConstantPointerNull>(Val: V)) { |
1601 | LocationOps.emplace_back(Args: SDDbgOperand::fromConst(Const: V)); |
1602 | continue; |
1603 | } |
1604 | |
1605 | // Look through IntToPtr constants. |
1606 | if (auto *CE = dyn_cast<ConstantExpr>(Val: V)) |
1607 | if (CE->getOpcode() == Instruction::IntToPtr) { |
1608 | LocationOps.emplace_back(Args: SDDbgOperand::fromConst(Const: CE->getOperand(i_nocapture: 0))); |
1609 | continue; |
1610 | } |
1611 | |
1612 | // If the Value is a frame index, we can create a FrameIndex debug value |
1613 | // without relying on the DAG at all. |
1614 | if (const AllocaInst *AI = dyn_cast<AllocaInst>(Val: V)) { |
1615 | auto SI = FuncInfo.StaticAllocaMap.find(Val: AI); |
1616 | if (SI != FuncInfo.StaticAllocaMap.end()) { |
1617 | LocationOps.emplace_back(Args: SDDbgOperand::fromFrameIdx(FrameIdx: SI->second)); |
1618 | continue; |
1619 | } |
1620 | } |
1621 | |
1622 | // Do not use getValue() in here; we don't want to generate code at |
1623 | // this point if it hasn't been done yet. |
1624 | SDValue N = NodeMap[V]; |
1625 | if (!N.getNode() && isa<Argument>(Val: V)) // Check unused arguments map. |
1626 | N = UnusedArgNodeMap[V]; |
1627 | if (N.getNode()) { |
1628 | // Only emit func arg dbg value for non-variadic dbg.values for now. |
1629 | if (!IsVariadic && |
1630 | EmitFuncArgumentDbgValue(V, Variable: Var, Expr, DL: DbgLoc, |
1631 | Kind: FuncArgumentDbgValueKind::Value, N)) |
1632 | return true; |
1633 | if (auto *FISDN = dyn_cast<FrameIndexSDNode>(Val: N.getNode())) { |
1634 | // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can |
1635 | // describe stack slot locations. |
1636 | // |
1637 | // Consider "int x = 0; int *px = &x;". There are two kinds of |
1638 | // interesting debug values here after optimization: |
1639 | // |
1640 | // dbg.value(i32* %px, !"int *px", !DIExpression()), and |
1641 | // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref)) |
1642 | // |
1643 | // Both describe the direct values of their associated variables. |
1644 | Dependencies.push_back(Elt: N.getNode()); |
1645 | LocationOps.emplace_back(Args: SDDbgOperand::fromFrameIdx(FrameIdx: FISDN->getIndex())); |
1646 | continue; |
1647 | } |
1648 | LocationOps.emplace_back( |
1649 | Args: SDDbgOperand::fromNode(Node: N.getNode(), ResNo: N.getResNo())); |
1650 | continue; |
1651 | } |
1652 | |
1653 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
1654 | // Special rules apply for the first dbg.values of parameter variables in a |
1655 | // function. Identify them by the fact they reference Argument Values, that |
1656 | // they're parameters, and they are parameters of the current function. We |
1657 | // need to let them dangle until they get an SDNode. |
1658 | bool IsParamOfFunc = |
1659 | isa<Argument>(Val: V) && Var->isParameter() && !DbgLoc.getInlinedAt(); |
1660 | if (IsParamOfFunc) |
1661 | return false; |
1662 | |
1663 | // The value is not used in this block yet (or it would have an SDNode). |
1664 | // We still want the value to appear for the user if possible -- if it has |
1665 | // an associated VReg, we can refer to that instead. |
1666 | auto VMI = FuncInfo.ValueMap.find(Val: V); |
1667 | if (VMI != FuncInfo.ValueMap.end()) { |
1668 | unsigned Reg = VMI->second; |
1669 | // If this is a PHI node, it may be split up into several MI PHI nodes |
1670 | // (in FunctionLoweringInfo::set). |
1671 | RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, |
1672 | V->getType(), std::nullopt); |
1673 | if (RFV.occupiesMultipleRegs()) { |
1674 | // FIXME: We could potentially support variadic dbg_values here. |
1675 | if (IsVariadic) |
1676 | return false; |
1677 | unsigned Offset = 0; |
1678 | unsigned BitsToDescribe = 0; |
1679 | if (auto VarSize = Var->getSizeInBits()) |
1680 | BitsToDescribe = *VarSize; |
1681 | if (auto Fragment = Expr->getFragmentInfo()) |
1682 | BitsToDescribe = Fragment->SizeInBits; |
1683 | for (const auto &RegAndSize : RFV.getRegsAndSizes()) { |
1684 | // Bail out if all bits are described already. |
1685 | if (Offset >= BitsToDescribe) |
1686 | break; |
1687 | // TODO: handle scalable vectors. |
1688 | unsigned RegisterSize = RegAndSize.second; |
1689 | unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe) |
1690 | ? BitsToDescribe - Offset |
1691 | : RegisterSize; |
1692 | auto FragmentExpr = DIExpression::createFragmentExpression( |
1693 | Expr, OffsetInBits: Offset, SizeInBits: FragmentSize); |
1694 | if (!FragmentExpr) |
1695 | continue; |
1696 | SDDbgValue *SDV = DAG.getVRegDbgValue( |
1697 | Var, Expr: *FragmentExpr, VReg: RegAndSize.first, IsIndirect: false, DL: DbgLoc, O: Order); |
1698 | DAG.AddDbgValue(DB: SDV, isParameter: false); |
1699 | Offset += RegisterSize; |
1700 | } |
1701 | return true; |
1702 | } |
1703 | // We can use simple vreg locations for variadic dbg_values as well. |
1704 | LocationOps.emplace_back(Args: SDDbgOperand::fromVReg(VReg: Reg)); |
1705 | continue; |
1706 | } |
1707 | // We failed to create a SDDbgOperand for V. |
1708 | return false; |
1709 | } |
1710 | |
1711 | // We have created a SDDbgOperand for each Value in Values. |
1712 | assert(!LocationOps.empty()); |
1713 | SDDbgValue *SDV = |
1714 | DAG.getDbgValueList(Var, Expr, Locs: LocationOps, Dependencies, |
1715 | /*IsIndirect=*/false, DL: DbgLoc, O: Order, IsVariadic); |
1716 | DAG.AddDbgValue(DB: SDV, /*isParameter=*/false); |
1717 | return true; |
1718 | } |
1719 | |
1720 | void SelectionDAGBuilder::resolveOrClearDbgInfo() { |
1721 | // Try to fixup any remaining dangling debug info -- and drop it if we can't. |
1722 | for (auto &Pair : DanglingDebugInfoMap) |
1723 | for (auto &DDI : Pair.second) |
1724 | salvageUnresolvedDbgValue(V: const_cast<Value *>(Pair.first), DDI); |
1725 | clearDanglingDebugInfo(); |
1726 | } |
1727 | |
1728 | /// getCopyFromRegs - If there was virtual register allocated for the value V |
1729 | /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. |
1730 | SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { |
1731 | DenseMap<const Value *, Register>::iterator It = FuncInfo.ValueMap.find(Val: V); |
1732 | SDValue Result; |
1733 | |
1734 | if (It != FuncInfo.ValueMap.end()) { |
1735 | Register InReg = It->second; |
1736 | |
1737 | RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), |
1738 | DAG.getDataLayout(), InReg, Ty, |
1739 | std::nullopt); // This is not an ABI copy. |
1740 | SDValue Chain = DAG.getEntryNode(); |
1741 | Result = RFV.getCopyFromRegs(DAG, FuncInfo, dl: getCurSDLoc(), Chain, Glue: nullptr, |
1742 | V); |
1743 | resolveDanglingDebugInfo(V, Val: Result); |
1744 | } |
1745 | |
1746 | return Result; |
1747 | } |
1748 | |
1749 | /// getValue - Return an SDValue for the given Value. |
1750 | SDValue SelectionDAGBuilder::getValue(const Value *V) { |
1751 | // If we already have an SDValue for this value, use it. It's important |
1752 | // to do this first, so that we don't create a CopyFromReg if we already |
1753 | // have a regular SDValue. |
1754 | SDValue &N = NodeMap[V]; |
1755 | if (N.getNode()) return N; |
1756 | |
1757 | // If there's a virtual register allocated and initialized for this |
1758 | // value, use it. |
1759 | if (SDValue copyFromReg = getCopyFromRegs(V, Ty: V->getType())) |
1760 | return copyFromReg; |
1761 | |
1762 | // Otherwise create a new SDValue and remember it. |
1763 | SDValue Val = getValueImpl(V); |
1764 | NodeMap[V] = Val; |
1765 | resolveDanglingDebugInfo(V, Val); |
1766 | return Val; |
1767 | } |
1768 | |
1769 | /// getNonRegisterValue - Return an SDValue for the given Value, but |
1770 | /// don't look in FuncInfo.ValueMap for a virtual register. |
1771 | SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { |
1772 | // If we already have an SDValue for this value, use it. |
1773 | SDValue &N = NodeMap[V]; |
1774 | if (N.getNode()) { |
1775 | if (isIntOrFPConstant(V: N)) { |
1776 | // Remove the debug location from the node as the node is about to be used |
1777 | // in a location which may differ from the original debug location. This |
1778 | // is relevant to Constant and ConstantFP nodes because they can appear |
1779 | // as constant expressions inside PHI nodes. |
1780 | N->setDebugLoc(DebugLoc()); |
1781 | } |
1782 | return N; |
1783 | } |
1784 | |
1785 | // Otherwise create a new SDValue and remember it. |
1786 | SDValue Val = getValueImpl(V); |
1787 | NodeMap[V] = Val; |
1788 | resolveDanglingDebugInfo(V, Val); |
1789 | return Val; |
1790 | } |
1791 | |
1792 | /// getValueImpl - Helper function for getValue and getNonRegisterValue. |
1793 | /// Create an SDValue for the given value. |
1794 | SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { |
1795 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
1796 | |
1797 | if (const Constant *C = dyn_cast<Constant>(Val: V)) { |
1798 | EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: V->getType(), AllowUnknown: true); |
1799 | |
1800 | if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: C)) |
1801 | return DAG.getConstant(Val: *CI, DL: getCurSDLoc(), VT); |
1802 | |
1803 | if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: C)) |
1804 | return DAG.getGlobalAddress(GV, DL: getCurSDLoc(), VT); |
1805 | |
1806 | if (const ConstantPtrAuth *CPA = dyn_cast<ConstantPtrAuth>(Val: C)) { |
1807 | return DAG.getNode(Opcode: ISD::PtrAuthGlobalAddress, DL: getCurSDLoc(), VT, |
1808 | N1: getValue(V: CPA->getPointer()), N2: getValue(V: CPA->getKey()), |
1809 | N3: getValue(V: CPA->getAddrDiscriminator()), |
1810 | N4: getValue(V: CPA->getDiscriminator())); |
1811 | } |
1812 | |
1813 | if (isa<ConstantPointerNull>(Val: C)) { |
1814 | unsigned AS = V->getType()->getPointerAddressSpace(); |
1815 | return DAG.getConstant(Val: 0, DL: getCurSDLoc(), |
1816 | VT: TLI.getPointerTy(DL: DAG.getDataLayout(), AS)); |
1817 | } |
1818 | |
1819 | if (match(V: C, P: m_VScale())) |
1820 | return DAG.getVScale(DL: getCurSDLoc(), VT, MulImm: APInt(VT.getSizeInBits(), 1)); |
1821 | |
1822 | if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: C)) |
1823 | return DAG.getConstantFP(V: *CFP, DL: getCurSDLoc(), VT); |
1824 | |
1825 | if (isa<UndefValue>(Val: C) && !V->getType()->isAggregateType()) |
1826 | return DAG.getUNDEF(VT); |
1827 | |
1828 | if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: C)) { |
1829 | visit(Opcode: CE->getOpcode(), I: *CE); |
1830 | SDValue N1 = NodeMap[V]; |
1831 | assert(N1.getNode() && "visit didn't populate the NodeMap!" ); |
1832 | return N1; |
1833 | } |
1834 | |
1835 | if (isa<ConstantStruct>(Val: C) || isa<ConstantArray>(Val: C)) { |
1836 | SmallVector<SDValue, 4> Constants; |
1837 | for (const Use &U : C->operands()) { |
1838 | SDNode *Val = getValue(V: U).getNode(); |
1839 | // If the operand is an empty aggregate, there are no values. |
1840 | if (!Val) continue; |
1841 | // Add each leaf value from the operand to the Constants list |
1842 | // to form a flattened list of all the values. |
1843 | for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) |
1844 | Constants.push_back(Elt: SDValue(Val, i)); |
1845 | } |
1846 | |
1847 | return DAG.getMergeValues(Ops: Constants, dl: getCurSDLoc()); |
1848 | } |
1849 | |
1850 | if (const ConstantDataSequential *CDS = |
1851 | dyn_cast<ConstantDataSequential>(Val: C)) { |
1852 | SmallVector<SDValue, 4> Ops; |
1853 | for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { |
1854 | SDNode *Val = getValue(V: CDS->getElementAsConstant(i)).getNode(); |
1855 | // Add each leaf value from the operand to the Constants list |
1856 | // to form a flattened list of all the values. |
1857 | for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) |
1858 | Ops.push_back(Elt: SDValue(Val, i)); |
1859 | } |
1860 | |
1861 | if (isa<ArrayType>(Val: CDS->getType())) |
1862 | return DAG.getMergeValues(Ops, dl: getCurSDLoc()); |
1863 | return NodeMap[V] = DAG.getBuildVector(VT, DL: getCurSDLoc(), Ops); |
1864 | } |
1865 | |
1866 | if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { |
1867 | assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) && |
1868 | "Unknown struct or array constant!" ); |
1869 | |
1870 | SmallVector<EVT, 4> ValueVTs; |
1871 | ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: C->getType(), ValueVTs); |
1872 | unsigned NumElts = ValueVTs.size(); |
1873 | if (NumElts == 0) |
1874 | return SDValue(); // empty struct |
1875 | SmallVector<SDValue, 4> Constants(NumElts); |
1876 | for (unsigned i = 0; i != NumElts; ++i) { |
1877 | EVT EltVT = ValueVTs[i]; |
1878 | if (isa<UndefValue>(Val: C)) |
1879 | Constants[i] = DAG.getUNDEF(VT: EltVT); |
1880 | else if (EltVT.isFloatingPoint()) |
1881 | Constants[i] = DAG.getConstantFP(Val: 0, DL: getCurSDLoc(), VT: EltVT); |
1882 | else |
1883 | Constants[i] = DAG.getConstant(Val: 0, DL: getCurSDLoc(), VT: EltVT); |
1884 | } |
1885 | |
1886 | return DAG.getMergeValues(Ops: Constants, dl: getCurSDLoc()); |
1887 | } |
1888 | |
1889 | if (const BlockAddress *BA = dyn_cast<BlockAddress>(Val: C)) |
1890 | return DAG.getBlockAddress(BA, VT); |
1891 | |
1892 | if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(Val: C)) |
1893 | return getValue(V: Equiv->getGlobalValue()); |
1894 | |
1895 | if (const auto *NC = dyn_cast<NoCFIValue>(Val: C)) |
1896 | return getValue(V: NC->getGlobalValue()); |
1897 | |
1898 | if (VT == MVT::aarch64svcount) { |
1899 | assert(C->isNullValue() && "Can only zero this target type!" ); |
1900 | return DAG.getNode(Opcode: ISD::BITCAST, DL: getCurSDLoc(), VT, |
1901 | Operand: DAG.getConstant(Val: 0, DL: getCurSDLoc(), VT: MVT::nxv16i1)); |
1902 | } |
1903 | |
1904 | VectorType *VecTy = cast<VectorType>(Val: V->getType()); |
1905 | |
1906 | // Now that we know the number and type of the elements, get that number of |
1907 | // elements into the Ops array based on what kind of constant it is. |
1908 | if (const ConstantVector *CV = dyn_cast<ConstantVector>(Val: C)) { |
1909 | SmallVector<SDValue, 16> Ops; |
1910 | unsigned NumElements = cast<FixedVectorType>(Val: VecTy)->getNumElements(); |
1911 | for (unsigned i = 0; i != NumElements; ++i) |
1912 | Ops.push_back(Elt: getValue(V: CV->getOperand(i_nocapture: i))); |
1913 | |
1914 | return NodeMap[V] = DAG.getBuildVector(VT, DL: getCurSDLoc(), Ops); |
1915 | } |
1916 | |
1917 | if (isa<ConstantAggregateZero>(Val: C)) { |
1918 | EVT EltVT = |
1919 | TLI.getValueType(DL: DAG.getDataLayout(), Ty: VecTy->getElementType()); |
1920 | |
1921 | SDValue Op; |
1922 | if (EltVT.isFloatingPoint()) |
1923 | Op = DAG.getConstantFP(Val: 0, DL: getCurSDLoc(), VT: EltVT); |
1924 | else |
1925 | Op = DAG.getConstant(Val: 0, DL: getCurSDLoc(), VT: EltVT); |
1926 | |
1927 | return NodeMap[V] = DAG.getSplat(VT, DL: getCurSDLoc(), Op); |
1928 | } |
1929 | |
1930 | llvm_unreachable("Unknown vector constant" ); |
1931 | } |
1932 | |
1933 | // If this is a static alloca, generate it as the frameindex instead of |
1934 | // computation. |
1935 | if (const AllocaInst *AI = dyn_cast<AllocaInst>(Val: V)) { |
1936 | DenseMap<const AllocaInst*, int>::iterator SI = |
1937 | FuncInfo.StaticAllocaMap.find(Val: AI); |
1938 | if (SI != FuncInfo.StaticAllocaMap.end()) |
1939 | return DAG.getFrameIndex( |
1940 | FI: SI->second, VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: AI->getType())); |
1941 | } |
1942 | |
1943 | // If this is an instruction which fast-isel has deferred, select it now. |
1944 | if (const Instruction *Inst = dyn_cast<Instruction>(Val: V)) { |
1945 | Register InReg = FuncInfo.InitializeRegForValue(V: Inst); |
1946 | |
1947 | RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, |
1948 | Inst->getType(), std::nullopt); |
1949 | SDValue Chain = DAG.getEntryNode(); |
1950 | return RFV.getCopyFromRegs(DAG, FuncInfo, dl: getCurSDLoc(), Chain, Glue: nullptr, V); |
1951 | } |
1952 | |
1953 | if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(Val: V)) |
1954 | return DAG.getMDNode(MD: cast<MDNode>(Val: MD->getMetadata())); |
1955 | |
1956 | if (const auto *BB = dyn_cast<BasicBlock>(Val: V)) |
1957 | return DAG.getBasicBlock(MBB: FuncInfo.MBBMap[BB]); |
1958 | |
1959 | llvm_unreachable("Can't get register for value!" ); |
1960 | } |
1961 | |
1962 | void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) { |
1963 | auto Pers = classifyEHPersonality(Pers: FuncInfo.Fn->getPersonalityFn()); |
1964 | bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX; |
1965 | bool IsCoreCLR = Pers == EHPersonality::CoreCLR; |
1966 | bool IsSEH = isAsynchronousEHPersonality(Pers); |
1967 | MachineBasicBlock *CatchPadMBB = FuncInfo.MBB; |
1968 | if (!IsSEH) |
1969 | CatchPadMBB->setIsEHScopeEntry(); |
1970 | // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues. |
1971 | if (IsMSVCCXX || IsCoreCLR) |
1972 | CatchPadMBB->setIsEHFuncletEntry(); |
1973 | } |
1974 | |
1975 | void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { |
1976 | // Update machine-CFG edge. |
1977 | MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()]; |
1978 | FuncInfo.MBB->addSuccessor(Succ: TargetMBB); |
1979 | TargetMBB->setIsEHCatchretTarget(true); |
1980 | DAG.getMachineFunction().setHasEHCatchret(true); |
1981 | |
1982 | auto Pers = classifyEHPersonality(Pers: FuncInfo.Fn->getPersonalityFn()); |
1983 | bool IsSEH = isAsynchronousEHPersonality(Pers); |
1984 | if (IsSEH) { |
1985 | // If this is not a fall-through branch or optimizations are switched off, |
1986 | // emit the branch. |
1987 | if (TargetMBB != NextBlock(MBB: FuncInfo.MBB) || |
1988 | TM.getOptLevel() == CodeGenOptLevel::None) |
1989 | DAG.setRoot(DAG.getNode(Opcode: ISD::BR, DL: getCurSDLoc(), VT: MVT::Other, |
1990 | N1: getControlRoot(), N2: DAG.getBasicBlock(MBB: TargetMBB))); |
1991 | return; |
1992 | } |
1993 | |
1994 | // Figure out the funclet membership for the catchret's successor. |
1995 | // This will be used by the FuncletLayout pass to determine how to order the |
1996 | // BB's. |
1997 | // A 'catchret' returns to the outer scope's color. |
1998 | Value *ParentPad = I.getCatchSwitchParentPad(); |
1999 | const BasicBlock *SuccessorColor; |
2000 | if (isa<ConstantTokenNone>(Val: ParentPad)) |
2001 | SuccessorColor = &FuncInfo.Fn->getEntryBlock(); |
2002 | else |
2003 | SuccessorColor = cast<Instruction>(Val: ParentPad)->getParent(); |
2004 | assert(SuccessorColor && "No parent funclet for catchret!" ); |
2005 | MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor]; |
2006 | assert(SuccessorColorMBB && "No MBB for SuccessorColor!" ); |
2007 | |
2008 | // Create the terminator node. |
2009 | SDValue Ret = DAG.getNode(Opcode: ISD::CATCHRET, DL: getCurSDLoc(), VT: MVT::Other, |
2010 | N1: getControlRoot(), N2: DAG.getBasicBlock(MBB: TargetMBB), |
2011 | N3: DAG.getBasicBlock(MBB: SuccessorColorMBB)); |
2012 | DAG.setRoot(Ret); |
2013 | } |
2014 | |
2015 | void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) { |
2016 | // Don't emit any special code for the cleanuppad instruction. It just marks |
2017 | // the start of an EH scope/funclet. |
2018 | FuncInfo.MBB->setIsEHScopeEntry(); |
2019 | auto Pers = classifyEHPersonality(Pers: FuncInfo.Fn->getPersonalityFn()); |
2020 | if (Pers != EHPersonality::Wasm_CXX) { |
2021 | FuncInfo.MBB->setIsEHFuncletEntry(); |
2022 | FuncInfo.MBB->setIsCleanupFuncletEntry(); |
2023 | } |
2024 | } |
2025 | |
2026 | // In wasm EH, even though a catchpad may not catch an exception if a tag does |
2027 | // not match, it is OK to add only the first unwind destination catchpad to the |
2028 | // successors, because there will be at least one invoke instruction within the |
2029 | // catch scope that points to the next unwind destination, if one exists, so |
2030 | // CFGSort cannot mess up with BB sorting order. |
2031 | // (All catchpads with 'catch (type)' clauses have a 'llvm.rethrow' intrinsic |
2032 | // call within them, and catchpads only consisting of 'catch (...)' have a |
2033 | // '__cxa_end_catch' call within them, both of which generate invokes in case |
2034 | // the next unwind destination exists, i.e., the next unwind destination is not |
2035 | // the caller.) |
2036 | // |
2037 | // Having at most one EH pad successor is also simpler and helps later |
2038 | // transformations. |
2039 | // |
2040 | // For example, |
2041 | // current: |
2042 | // invoke void @foo to ... unwind label %catch.dispatch |
2043 | // catch.dispatch: |
2044 | // %0 = catchswitch within ... [label %catch.start] unwind label %next |
2045 | // catch.start: |
2046 | // ... |
2047 | // ... in this BB or some other child BB dominated by this BB there will be an |
2048 | // invoke that points to 'next' BB as an unwind destination |
2049 | // |
2050 | // next: ; We don't need to add this to 'current' BB's successor |
2051 | // ... |
2052 | static void findWasmUnwindDestinations( |
2053 | FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB, |
2054 | BranchProbability Prob, |
2055 | SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>> |
2056 | &UnwindDests) { |
2057 | while (EHPadBB) { |
2058 | const Instruction *Pad = EHPadBB->getFirstNonPHI(); |
2059 | if (isa<CleanupPadInst>(Val: Pad)) { |
2060 | // Stop on cleanup pads. |
2061 | UnwindDests.emplace_back(Args&: FuncInfo.MBBMap[EHPadBB], Args&: Prob); |
2062 | UnwindDests.back().first->setIsEHScopeEntry(); |
2063 | break; |
2064 | } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: Pad)) { |
2065 | // Add the catchpad handlers to the possible destinations. We don't |
2066 | // continue to the unwind destination of the catchswitch for wasm. |
2067 | for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { |
2068 | UnwindDests.emplace_back(Args&: FuncInfo.MBBMap[CatchPadBB], Args&: Prob); |
2069 | UnwindDests.back().first->setIsEHScopeEntry(); |
2070 | } |
2071 | break; |
2072 | } else { |
2073 | continue; |
2074 | } |
2075 | } |
2076 | } |
2077 | |
2078 | /// When an invoke or a cleanupret unwinds to the next EH pad, there are |
2079 | /// many places it could ultimately go. In the IR, we have a single unwind |
2080 | /// destination, but in the machine CFG, we enumerate all the possible blocks. |
2081 | /// This function skips over imaginary basic blocks that hold catchswitch |
2082 | /// instructions, and finds all the "real" machine |
2083 | /// basic block destinations. As those destinations may not be successors of |
2084 | /// EHPadBB, here we also calculate the edge probability to those destinations. |
2085 | /// The passed-in Prob is the edge probability to EHPadBB. |
2086 | static void findUnwindDestinations( |
2087 | FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB, |
2088 | BranchProbability Prob, |
2089 | SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>> |
2090 | &UnwindDests) { |
2091 | EHPersonality Personality = |
2092 | classifyEHPersonality(Pers: FuncInfo.Fn->getPersonalityFn()); |
2093 | bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX; |
2094 | bool IsCoreCLR = Personality == EHPersonality::CoreCLR; |
2095 | bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX; |
2096 | bool IsSEH = isAsynchronousEHPersonality(Pers: Personality); |
2097 | |
2098 | if (IsWasmCXX) { |
2099 | findWasmUnwindDestinations(FuncInfo, EHPadBB, Prob, UnwindDests); |
2100 | assert(UnwindDests.size() <= 1 && |
2101 | "There should be at most one unwind destination for wasm" ); |
2102 | return; |
2103 | } |
2104 | |
2105 | while (EHPadBB) { |
2106 | const Instruction *Pad = EHPadBB->getFirstNonPHI(); |
2107 | BasicBlock *NewEHPadBB = nullptr; |
2108 | if (isa<LandingPadInst>(Val: Pad)) { |
2109 | // Stop on landingpads. They are not funclets. |
2110 | UnwindDests.emplace_back(Args&: FuncInfo.MBBMap[EHPadBB], Args&: Prob); |
2111 | break; |
2112 | } else if (isa<CleanupPadInst>(Val: Pad)) { |
2113 | // Stop on cleanup pads. Cleanups are always funclet entries for all known |
2114 | // personalities. |
2115 | UnwindDests.emplace_back(Args&: FuncInfo.MBBMap[EHPadBB], Args&: Prob); |
2116 | UnwindDests.back().first->setIsEHScopeEntry(); |
2117 | UnwindDests.back().first->setIsEHFuncletEntry(); |
2118 | break; |
2119 | } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: Pad)) { |
2120 | // Add the catchpad handlers to the possible destinations. |
2121 | for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { |
2122 | UnwindDests.emplace_back(Args&: FuncInfo.MBBMap[CatchPadBB], Args&: Prob); |
2123 | // For MSVC++ and the CLR, catchblocks are funclets and need prologues. |
2124 | if (IsMSVCCXX || IsCoreCLR) |
2125 | UnwindDests.back().first->setIsEHFuncletEntry(); |
2126 | if (!IsSEH) |
2127 | UnwindDests.back().first->setIsEHScopeEntry(); |
2128 | } |
2129 | NewEHPadBB = CatchSwitch->getUnwindDest(); |
2130 | } else { |
2131 | continue; |
2132 | } |
2133 | |
2134 | BranchProbabilityInfo *BPI = FuncInfo.BPI; |
2135 | if (BPI && NewEHPadBB) |
2136 | Prob *= BPI->getEdgeProbability(Src: EHPadBB, Dst: NewEHPadBB); |
2137 | EHPadBB = NewEHPadBB; |
2138 | } |
2139 | } |
2140 | |
2141 | void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) { |
2142 | // Update successor info. |
2143 | SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests; |
2144 | auto UnwindDest = I.getUnwindDest(); |
2145 | BranchProbabilityInfo *BPI = FuncInfo.BPI; |
2146 | BranchProbability UnwindDestProb = |
2147 | (BPI && UnwindDest) |
2148 | ? BPI->getEdgeProbability(Src: FuncInfo.MBB->getBasicBlock(), Dst: UnwindDest) |
2149 | : BranchProbability::getZero(); |
2150 | findUnwindDestinations(FuncInfo, EHPadBB: UnwindDest, Prob: UnwindDestProb, UnwindDests); |
2151 | for (auto &UnwindDest : UnwindDests) { |
2152 | UnwindDest.first->setIsEHPad(); |
2153 | addSuccessorWithProb(Src: FuncInfo.MBB, Dst: UnwindDest.first, Prob: UnwindDest.second); |
2154 | } |
2155 | FuncInfo.MBB->normalizeSuccProbs(); |
2156 | |
2157 | // Create the terminator node. |
2158 | SDValue Ret = |
2159 | DAG.getNode(Opcode: ISD::CLEANUPRET, DL: getCurSDLoc(), VT: MVT::Other, Operand: getControlRoot()); |
2160 | DAG.setRoot(Ret); |
2161 | } |
2162 | |
2163 | void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) { |
2164 | report_fatal_error(reason: "visitCatchSwitch not yet implemented!" ); |
2165 | } |
2166 | |
2167 | void SelectionDAGBuilder::visitRet(const ReturnInst &I) { |
2168 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
2169 | auto &DL = DAG.getDataLayout(); |
2170 | SDValue Chain = getControlRoot(); |
2171 | SmallVector<ISD::OutputArg, 8> Outs; |
2172 | SmallVector<SDValue, 8> OutVals; |
2173 | |
2174 | // Calls to @llvm.experimental.deoptimize don't generate a return value, so |
2175 | // lower |
2176 | // |
2177 | // %val = call <ty> @llvm.experimental.deoptimize() |
2178 | // ret <ty> %val |
2179 | // |
2180 | // differently. |
2181 | if (I.getParent()->getTerminatingDeoptimizeCall()) { |
2182 | LowerDeoptimizingReturn(); |
2183 | return; |
2184 | } |
2185 | |
2186 | if (!FuncInfo.CanLowerReturn) { |
2187 | unsigned DemoteReg = FuncInfo.DemoteRegister; |
2188 | const Function *F = I.getParent()->getParent(); |
2189 | |
2190 | // Emit a store of the return value through the virtual register. |
2191 | // Leave Outs empty so that LowerReturn won't try to load return |
2192 | // registers the usual way. |
2193 | SmallVector<EVT, 1> PtrValueVTs; |
2194 | ComputeValueVTs(TLI, DL, |
2195 | Ty: PointerType::get(C&: F->getContext(), |
2196 | AddressSpace: DAG.getDataLayout().getAllocaAddrSpace()), |
2197 | ValueVTs&: PtrValueVTs); |
2198 | |
2199 | SDValue RetPtr = |
2200 | DAG.getCopyFromReg(Chain, dl: getCurSDLoc(), Reg: DemoteReg, VT: PtrValueVTs[0]); |
2201 | SDValue RetOp = getValue(V: I.getOperand(i_nocapture: 0)); |
2202 | |
2203 | SmallVector<EVT, 4> ValueVTs, MemVTs; |
2204 | SmallVector<uint64_t, 4> Offsets; |
2205 | ComputeValueVTs(TLI, DL, Ty: I.getOperand(i_nocapture: 0)->getType(), ValueVTs, MemVTs: &MemVTs, |
2206 | FixedOffsets: &Offsets, StartingOffset: 0); |
2207 | unsigned NumValues = ValueVTs.size(); |
2208 | |
2209 | SmallVector<SDValue, 4> Chains(NumValues); |
2210 | Align BaseAlign = DL.getPrefTypeAlign(Ty: I.getOperand(i_nocapture: 0)->getType()); |
2211 | for (unsigned i = 0; i != NumValues; ++i) { |
2212 | // An aggregate return value cannot wrap around the address space, so |
2213 | // offsets to its parts don't wrap either. |
2214 | SDValue Ptr = DAG.getObjectPtrOffset(SL: getCurSDLoc(), Ptr: RetPtr, |
2215 | Offset: TypeSize::getFixed(ExactSize: Offsets[i])); |
2216 | |
2217 | SDValue Val = RetOp.getValue(R: RetOp.getResNo() + i); |
2218 | if (MemVTs[i] != ValueVTs[i]) |
2219 | Val = DAG.getPtrExtOrTrunc(Op: Val, DL: getCurSDLoc(), VT: MemVTs[i]); |
2220 | Chains[i] = DAG.getStore( |
2221 | Chain, dl: getCurSDLoc(), Val, |
2222 | // FIXME: better loc info would be nice. |
2223 | Ptr, PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()), |
2224 | Alignment: commonAlignment(A: BaseAlign, Offset: Offsets[i])); |
2225 | } |
2226 | |
2227 | Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: getCurSDLoc(), |
2228 | VT: MVT::Other, Ops: Chains); |
2229 | } else if (I.getNumOperands() != 0) { |
2230 | SmallVector<EVT, 4> ValueVTs; |
2231 | ComputeValueVTs(TLI, DL, Ty: I.getOperand(i_nocapture: 0)->getType(), ValueVTs); |
2232 | unsigned NumValues = ValueVTs.size(); |
2233 | if (NumValues) { |
2234 | SDValue RetOp = getValue(V: I.getOperand(i_nocapture: 0)); |
2235 | |
2236 | const Function *F = I.getParent()->getParent(); |
2237 | |
2238 | bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters( |
2239 | Ty: I.getOperand(i_nocapture: 0)->getType(), CallConv: F->getCallingConv(), |
2240 | /*IsVarArg*/ isVarArg: false, DL); |
2241 | |
2242 | ISD::NodeType ExtendKind = ISD::ANY_EXTEND; |
2243 | if (F->getAttributes().hasRetAttr(Kind: Attribute::SExt)) |
2244 | ExtendKind = ISD::SIGN_EXTEND; |
2245 | else if (F->getAttributes().hasRetAttr(Kind: Attribute::ZExt)) |
2246 | ExtendKind = ISD::ZERO_EXTEND; |
2247 | |
2248 | LLVMContext &Context = F->getContext(); |
2249 | bool RetInReg = F->getAttributes().hasRetAttr(Kind: Attribute::InReg); |
2250 | |
2251 | for (unsigned j = 0; j != NumValues; ++j) { |
2252 | EVT VT = ValueVTs[j]; |
2253 | |
2254 | if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) |
2255 | VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind); |
2256 | |
2257 | CallingConv::ID CC = F->getCallingConv(); |
2258 | |
2259 | unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT); |
2260 | MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT); |
2261 | SmallVector<SDValue, 4> Parts(NumParts); |
2262 | getCopyToParts(DAG, DL: getCurSDLoc(), |
2263 | Val: SDValue(RetOp.getNode(), RetOp.getResNo() + j), |
2264 | Parts: &Parts[0], NumParts, PartVT, V: &I, CallConv: CC, ExtendKind); |
2265 | |
2266 | // 'inreg' on function refers to return value |
2267 | ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); |
2268 | if (RetInReg) |
2269 | Flags.setInReg(); |
2270 | |
2271 | if (I.getOperand(i_nocapture: 0)->getType()->isPointerTy()) { |
2272 | Flags.setPointer(); |
2273 | Flags.setPointerAddrSpace( |
2274 | cast<PointerType>(Val: I.getOperand(i_nocapture: 0)->getType())->getAddressSpace()); |
2275 | } |
2276 | |
2277 | if (NeedsRegBlock) { |
2278 | Flags.setInConsecutiveRegs(); |
2279 | if (j == NumValues - 1) |
2280 | Flags.setInConsecutiveRegsLast(); |
2281 | } |
2282 | |
2283 | // Propagate extension type if any |
2284 | if (ExtendKind == ISD::SIGN_EXTEND) |
2285 | Flags.setSExt(); |
2286 | else if (ExtendKind == ISD::ZERO_EXTEND) |
2287 | Flags.setZExt(); |
2288 | |
2289 | for (unsigned i = 0; i < NumParts; ++i) { |
2290 | Outs.push_back(Elt: ISD::OutputArg(Flags, |
2291 | Parts[i].getValueType().getSimpleVT(), |
2292 | VT, /*isfixed=*/true, 0, 0)); |
2293 | OutVals.push_back(Elt: Parts[i]); |
2294 | } |
2295 | } |
2296 | } |
2297 | } |
2298 | |
2299 | // Push in swifterror virtual register as the last element of Outs. This makes |
2300 | // sure swifterror virtual register will be returned in the swifterror |
2301 | // physical register. |
2302 | const Function *F = I.getParent()->getParent(); |
2303 | if (TLI.supportSwiftError() && |
2304 | F->getAttributes().hasAttrSomewhere(Kind: Attribute::SwiftError)) { |
2305 | assert(SwiftError.getFunctionArg() && "Need a swift error argument" ); |
2306 | ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); |
2307 | Flags.setSwiftError(); |
2308 | Outs.push_back(Elt: ISD::OutputArg( |
2309 | Flags, /*vt=*/TLI.getPointerTy(DL), /*argvt=*/EVT(TLI.getPointerTy(DL)), |
2310 | /*isfixed=*/true, /*origidx=*/1, /*partOffs=*/0)); |
2311 | // Create SDNode for the swifterror virtual register. |
2312 | OutVals.push_back( |
2313 | Elt: DAG.getRegister(Reg: SwiftError.getOrCreateVRegUseAt( |
2314 | &I, FuncInfo.MBB, SwiftError.getFunctionArg()), |
2315 | VT: EVT(TLI.getPointerTy(DL)))); |
2316 | } |
2317 | |
2318 | bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg(); |
2319 | CallingConv::ID CallConv = |
2320 | DAG.getMachineFunction().getFunction().getCallingConv(); |
2321 | Chain = DAG.getTargetLoweringInfo().LowerReturn( |
2322 | Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG); |
2323 | |
2324 | // Verify that the target's LowerReturn behaved as expected. |
2325 | assert(Chain.getNode() && Chain.getValueType() == MVT::Other && |
2326 | "LowerReturn didn't return a valid chain!" ); |
2327 | |
2328 | // Update the DAG with the new chain value resulting from return lowering. |
2329 | DAG.setRoot(Chain); |
2330 | } |
2331 | |
2332 | /// CopyToExportRegsIfNeeded - If the given value has virtual registers |
2333 | /// created for it, emit nodes to copy the value into the virtual |
2334 | /// registers. |
2335 | void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) { |
2336 | // Skip empty types |
2337 | if (V->getType()->isEmptyTy()) |
2338 | return; |
2339 | |
2340 | DenseMap<const Value *, Register>::iterator VMI = FuncInfo.ValueMap.find(Val: V); |
2341 | if (VMI != FuncInfo.ValueMap.end()) { |
2342 | assert((!V->use_empty() || isa<CallBrInst>(V)) && |
2343 | "Unused value assigned virtual registers!" ); |
2344 | CopyValueToVirtualRegister(V, Reg: VMI->second); |
2345 | } |
2346 | } |
2347 | |
2348 | /// ExportFromCurrentBlock - If this condition isn't known to be exported from |
2349 | /// the current basic block, add it to ValueMap now so that we'll get a |
2350 | /// CopyTo/FromReg. |
2351 | void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) { |
2352 | // No need to export constants. |
2353 | if (!isa<Instruction>(Val: V) && !isa<Argument>(Val: V)) return; |
2354 | |
2355 | // Already exported? |
2356 | if (FuncInfo.isExportedInst(V)) return; |
2357 | |
2358 | Register Reg = FuncInfo.InitializeRegForValue(V); |
2359 | CopyValueToVirtualRegister(V, Reg); |
2360 | } |
2361 | |
2362 | bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, |
2363 | const BasicBlock *FromBB) { |
2364 | // The operands of the setcc have to be in this block. We don't know |
2365 | // how to export them from some other block. |
2366 | if (const Instruction *VI = dyn_cast<Instruction>(Val: V)) { |
2367 | // Can export from current BB. |
2368 | if (VI->getParent() == FromBB) |
2369 | return true; |
2370 | |
2371 | // Is already exported, noop. |
2372 | return FuncInfo.isExportedInst(V); |
2373 | } |
2374 | |
2375 | // If this is an argument, we can export it if the BB is the entry block or |
2376 | // if it is already exported. |
2377 | if (isa<Argument>(Val: V)) { |
2378 | if (FromBB->isEntryBlock()) |
2379 | return true; |
2380 | |
2381 | // Otherwise, can only export this if it is already exported. |
2382 | return FuncInfo.isExportedInst(V); |
2383 | } |
2384 | |
2385 | // Otherwise, constants can always be exported. |
2386 | return true; |
2387 | } |
2388 | |
2389 | /// Return branch probability calculated by BranchProbabilityInfo for IR blocks. |
2390 | BranchProbability |
2391 | SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src, |
2392 | const MachineBasicBlock *Dst) const { |
2393 | BranchProbabilityInfo *BPI = FuncInfo.BPI; |
2394 | const BasicBlock *SrcBB = Src->getBasicBlock(); |
2395 | const BasicBlock *DstBB = Dst->getBasicBlock(); |
2396 | if (!BPI) { |
2397 | // If BPI is not available, set the default probability as 1 / N, where N is |
2398 | // the number of successors. |
2399 | auto SuccSize = std::max<uint32_t>(a: succ_size(BB: SrcBB), b: 1); |
2400 | return BranchProbability(1, SuccSize); |
2401 | } |
2402 | return BPI->getEdgeProbability(Src: SrcBB, Dst: DstBB); |
2403 | } |
2404 | |
2405 | void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src, |
2406 | MachineBasicBlock *Dst, |
2407 | BranchProbability Prob) { |
2408 | if (!FuncInfo.BPI) |
2409 | Src->addSuccessorWithoutProb(Succ: Dst); |
2410 | else { |
2411 | if (Prob.isUnknown()) |
2412 | Prob = getEdgeProbability(Src, Dst); |
2413 | Src->addSuccessor(Succ: Dst, Prob); |
2414 | } |
2415 | } |
2416 | |
2417 | static bool InBlock(const Value *V, const BasicBlock *BB) { |
2418 | if (const Instruction *I = dyn_cast<Instruction>(Val: V)) |
2419 | return I->getParent() == BB; |
2420 | return true; |
2421 | } |
2422 | |
2423 | /// EmitBranchForMergedCondition - Helper method for FindMergedConditions. |
2424 | /// This function emits a branch and is used at the leaves of an OR or an |
2425 | /// AND operator tree. |
2426 | void |
2427 | SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, |
2428 | MachineBasicBlock *TBB, |
2429 | MachineBasicBlock *FBB, |
2430 | MachineBasicBlock *CurBB, |
2431 | MachineBasicBlock *SwitchBB, |
2432 | BranchProbability TProb, |
2433 | BranchProbability FProb, |
2434 | bool InvertCond) { |
2435 | const BasicBlock *BB = CurBB->getBasicBlock(); |
2436 | |
2437 | // If the leaf of the tree is a comparison, merge the condition into |
2438 | // the caseblock. |
2439 | if (const CmpInst *BOp = dyn_cast<CmpInst>(Val: Cond)) { |
2440 | // The operands of the cmp have to be in this block. We don't know |
2441 | // how to export them from some other block. If this is the first block |
2442 | // of the sequence, no exporting is needed. |
2443 | if (CurBB == SwitchBB || |
2444 | (isExportableFromCurrentBlock(V: BOp->getOperand(i_nocapture: 0), FromBB: BB) && |
2445 | isExportableFromCurrentBlock(V: BOp->getOperand(i_nocapture: 1), FromBB: BB))) { |
2446 | ISD::CondCode Condition; |
2447 | if (const ICmpInst *IC = dyn_cast<ICmpInst>(Val: Cond)) { |
2448 | ICmpInst::Predicate Pred = |
2449 | InvertCond ? IC->getInversePredicate() : IC->getPredicate(); |
2450 | Condition = getICmpCondCode(Pred); |
2451 | } else { |
2452 | const FCmpInst *FC = cast<FCmpInst>(Val: Cond); |
2453 | FCmpInst::Predicate Pred = |
2454 | InvertCond ? FC->getInversePredicate() : FC->getPredicate(); |
2455 | Condition = getFCmpCondCode(Pred); |
2456 | if (TM.Options.NoNaNsFPMath) |
2457 | Condition = getFCmpCodeWithoutNaN(CC: Condition); |
2458 | } |
2459 | |
2460 | CaseBlock CB(Condition, BOp->getOperand(i_nocapture: 0), BOp->getOperand(i_nocapture: 1), nullptr, |
2461 | TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb); |
2462 | SL->SwitchCases.push_back(x: CB); |
2463 | return; |
2464 | } |
2465 | } |
2466 | |
2467 | // Create a CaseBlock record representing this branch. |
2468 | ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ; |
2469 | CaseBlock CB(Opc, Cond, ConstantInt::getTrue(Context&: *DAG.getContext()), |
2470 | nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb); |
2471 | SL->SwitchCases.push_back(x: CB); |
2472 | } |
2473 | |
2474 | // Collect dependencies on V recursively. This is used for the cost analysis in |
2475 | // `shouldKeepJumpConditionsTogether`. |
2476 | static bool collectInstructionDeps( |
2477 | SmallMapVector<const Instruction *, bool, 8> *Deps, const Value *V, |
2478 | SmallMapVector<const Instruction *, bool, 8> *Necessary = nullptr, |
2479 | unsigned Depth = 0) { |
2480 | // Return false if we have an incomplete count. |
2481 | if (Depth >= SelectionDAG::MaxRecursionDepth) |
2482 | return false; |
2483 | |
2484 | auto *I = dyn_cast<Instruction>(Val: V); |
2485 | if (I == nullptr) |
2486 | return true; |
2487 | |
2488 | if (Necessary != nullptr) { |
2489 | // This instruction is necessary for the other side of the condition so |
2490 | // don't count it. |
2491 | if (Necessary->contains(Key: I)) |
2492 | return true; |
2493 | } |
2494 | |
2495 | // Already added this dep. |
2496 | if (!Deps->try_emplace(Key: I, Args: false).second) |
2497 | return true; |
2498 | |
2499 | for (unsigned OpIdx = 0, E = I->getNumOperands(); OpIdx < E; ++OpIdx) |
2500 | if (!collectInstructionDeps(Deps, V: I->getOperand(i: OpIdx), Necessary, |
2501 | Depth: Depth + 1)) |
2502 | return false; |
2503 | return true; |
2504 | } |
2505 | |
2506 | bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether( |
2507 | const FunctionLoweringInfo &FuncInfo, const BranchInst &I, |
2508 | Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs, |
2509 | TargetLoweringBase::CondMergingParams Params) const { |
2510 | if (I.getNumSuccessors() != 2) |
2511 | return false; |
2512 | |
2513 | if (!I.isConditional()) |
2514 | return false; |
2515 | |
2516 | if (Params.BaseCost < 0) |
2517 | return false; |
2518 | |
2519 | // Baseline cost. |
2520 | InstructionCost CostThresh = Params.BaseCost; |
2521 | |
2522 | BranchProbabilityInfo *BPI = nullptr; |
2523 | if (Params.LikelyBias || Params.UnlikelyBias) |
2524 | BPI = FuncInfo.BPI; |
2525 | if (BPI != nullptr) { |
2526 | // See if we are either likely to get an early out or compute both lhs/rhs |
2527 | // of the condition. |
2528 | BasicBlock *IfFalse = I.getSuccessor(i: 0); |
2529 | BasicBlock *IfTrue = I.getSuccessor(i: 1); |
2530 | |
2531 | std::optional<bool> Likely; |
2532 | if (BPI->isEdgeHot(Src: I.getParent(), Dst: IfTrue)) |
2533 | Likely = true; |
2534 | else if (BPI->isEdgeHot(Src: I.getParent(), Dst: IfFalse)) |
2535 | Likely = false; |
2536 | |
2537 | if (Likely) { |
2538 | if (Opc == (*Likely ? Instruction::And : Instruction::Or)) |
2539 | // Its likely we will have to compute both lhs and rhs of condition |
2540 | CostThresh += Params.LikelyBias; |
2541 | else { |
2542 | if (Params.UnlikelyBias < 0) |
2543 | return false; |
2544 | // Its likely we will get an early out. |
2545 | CostThresh -= Params.UnlikelyBias; |
2546 | } |
2547 | } |
2548 | } |
2549 | |
2550 | if (CostThresh <= 0) |
2551 | return false; |
2552 | |
2553 | // Collect "all" instructions that lhs condition is dependent on. |
2554 | // Use map for stable iteration (to avoid non-determanism of iteration of |
2555 | // SmallPtrSet). The `bool` value is just a dummy. |
2556 | SmallMapVector<const Instruction *, bool, 8> LhsDeps, RhsDeps; |
2557 | collectInstructionDeps(Deps: &LhsDeps, V: Lhs); |
2558 | // Collect "all" instructions that rhs condition is dependent on AND are |
2559 | // dependencies of lhs. This gives us an estimate on which instructions we |
2560 | // stand to save by splitting the condition. |
2561 | if (!collectInstructionDeps(Deps: &RhsDeps, V: Rhs, Necessary: &LhsDeps)) |
2562 | return false; |
2563 | // Add the compare instruction itself unless its a dependency on the LHS. |
2564 | if (const auto *RhsI = dyn_cast<Instruction>(Val: Rhs)) |
2565 | if (!LhsDeps.contains(Key: RhsI)) |
2566 | RhsDeps.try_emplace(Key: RhsI, Args: false); |
2567 | |
2568 | const auto &TLI = DAG.getTargetLoweringInfo(); |
2569 | const auto &TTI = |
2570 | TLI.getTargetMachine().getTargetTransformInfo(F: *I.getFunction()); |
2571 | |
2572 | InstructionCost CostOfIncluding = 0; |
2573 | // See if this instruction will need to computed independently of whether RHS |
2574 | // is. |
2575 | Value *BrCond = I.getCondition(); |
2576 | auto ShouldCountInsn = [&RhsDeps, &BrCond](const Instruction *Ins) { |
2577 | for (const auto *U : Ins->users()) { |
2578 | // If user is independent of RHS calculation we don't need to count it. |
2579 | if (auto *UIns = dyn_cast<Instruction>(Val: U)) |
2580 | if (UIns != BrCond && !RhsDeps.contains(Key: UIns)) |
2581 | return false; |
2582 | } |
2583 | return true; |
2584 | }; |
2585 | |
2586 | // Prune instructions from RHS Deps that are dependencies of unrelated |
2587 | // instructions. The value (SelectionDAG::MaxRecursionDepth) is fairly |
2588 | // arbitrary and just meant to cap the how much time we spend in the pruning |
2589 | // loop. Its highly unlikely to come into affect. |
2590 | const unsigned MaxPruneIters = SelectionDAG::MaxRecursionDepth; |
2591 | // Stop after a certain point. No incorrectness from including too many |
2592 | // instructions. |
2593 | for (unsigned PruneIters = 0; PruneIters < MaxPruneIters; ++PruneIters) { |
2594 | const Instruction *ToDrop = nullptr; |
2595 | for (const auto &InsPair : RhsDeps) { |
2596 | if (!ShouldCountInsn(InsPair.first)) { |
2597 | ToDrop = InsPair.first; |
2598 | break; |
2599 | } |
2600 | } |
2601 | if (ToDrop == nullptr) |
2602 | break; |
2603 | RhsDeps.erase(Key: ToDrop); |
2604 | } |
2605 | |
2606 | for (const auto &InsPair : RhsDeps) { |
2607 | // Finally accumulate latency that we can only attribute to computing the |
2608 | // RHS condition. Use latency because we are essentially trying to calculate |
2609 | // the cost of the dependency chain. |
2610 | // Possible TODO: We could try to estimate ILP and make this more precise. |
2611 | CostOfIncluding += |
2612 | TTI.getInstructionCost(U: InsPair.first, CostKind: TargetTransformInfo::TCK_Latency); |
2613 | |
2614 | if (CostOfIncluding > CostThresh) |
2615 | return false; |
2616 | } |
2617 | return true; |
2618 | } |
2619 | |
2620 | void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, |
2621 | MachineBasicBlock *TBB, |
2622 | MachineBasicBlock *FBB, |
2623 | MachineBasicBlock *CurBB, |
2624 | MachineBasicBlock *SwitchBB, |
2625 | Instruction::BinaryOps Opc, |
2626 | BranchProbability TProb, |
2627 | BranchProbability FProb, |
2628 | bool InvertCond) { |
2629 | // Skip over not part of the tree and remember to invert op and operands at |
2630 | // next level. |
2631 | Value *NotCond; |
2632 | if (match(V: Cond, P: m_OneUse(SubPattern: m_Not(V: m_Value(V&: NotCond)))) && |
2633 | InBlock(V: NotCond, BB: CurBB->getBasicBlock())) { |
2634 | FindMergedConditions(Cond: NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb, |
2635 | InvertCond: !InvertCond); |
2636 | return; |
2637 | } |
2638 | |
2639 | const Instruction *BOp = dyn_cast<Instruction>(Val: Cond); |
2640 | const Value *BOpOp0, *BOpOp1; |
2641 | // Compute the effective opcode for Cond, taking into account whether it needs |
2642 | // to be inverted, e.g. |
2643 | // and (not (or A, B)), C |
2644 | // gets lowered as |
2645 | // and (and (not A, not B), C) |
2646 | Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0; |
2647 | if (BOp) { |
2648 | BOpc = match(V: BOp, P: m_LogicalAnd(L: m_Value(V&: BOpOp0), R: m_Value(V&: BOpOp1))) |
2649 | ? Instruction::And |
2650 | : (match(V: BOp, P: m_LogicalOr(L: m_Value(V&: BOpOp0), R: m_Value(V&: BOpOp1))) |
2651 | ? Instruction::Or |
2652 | : (Instruction::BinaryOps)0); |
2653 | if (InvertCond) { |
2654 | if (BOpc == Instruction::And) |
2655 | BOpc = Instruction::Or; |
2656 | else if (BOpc == Instruction::Or) |
2657 | BOpc = Instruction::And; |
2658 | } |
2659 | } |
2660 | |
2661 | // If this node is not part of the or/and tree, emit it as a branch. |
2662 | // Note that all nodes in the tree should have same opcode. |
2663 | bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse(); |
2664 | if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() || |
2665 | !InBlock(V: BOpOp0, BB: CurBB->getBasicBlock()) || |
2666 | !InBlock(V: BOpOp1, BB: CurBB->getBasicBlock())) { |
2667 | EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, |
2668 | TProb, FProb, InvertCond); |
2669 | return; |
2670 | } |
2671 | |
2672 | // Create TmpBB after CurBB. |
2673 | MachineFunction::iterator BBI(CurBB); |
2674 | MachineFunction &MF = DAG.getMachineFunction(); |
2675 | MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(BB: CurBB->getBasicBlock()); |
2676 | CurBB->getParent()->insert(MBBI: ++BBI, MBB: TmpBB); |
2677 | |
2678 | if (Opc == Instruction::Or) { |
2679 | // Codegen X | Y as: |
2680 | // BB1: |
2681 | // jmp_if_X TBB |
2682 | // jmp TmpBB |
2683 | // TmpBB: |
2684 | // jmp_if_Y TBB |
2685 | // jmp FBB |
2686 | // |
2687 | |
2688 | // We have flexibility in setting Prob for BB1 and Prob for TmpBB. |
2689 | // The requirement is that |
2690 | // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) |
2691 | // = TrueProb for original BB. |
2692 | // Assuming the original probabilities are A and B, one choice is to set |
2693 | // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to |
2694 | // A/(1+B) and 2B/(1+B). This choice assumes that |
2695 | // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. |
2696 | // Another choice is to assume TrueProb for BB1 equals to TrueProb for |
2697 | // TmpBB, but the math is more complicated. |
2698 | |
2699 | auto NewTrueProb = TProb / 2; |
2700 | auto NewFalseProb = TProb / 2 + FProb; |
2701 | // Emit the LHS condition. |
2702 | FindMergedConditions(Cond: BOpOp0, TBB, FBB: TmpBB, CurBB, SwitchBB, Opc, TProb: NewTrueProb, |
2703 | FProb: NewFalseProb, InvertCond); |
2704 | |
2705 | // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). |
2706 | SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb}; |
2707 | BranchProbability::normalizeProbabilities(Begin: Probs.begin(), End: Probs.end()); |
2708 | // Emit the RHS condition into TmpBB. |
2709 | FindMergedConditions(Cond: BOpOp1, TBB, FBB, CurBB: TmpBB, SwitchBB, Opc, TProb: Probs[0], |
2710 | FProb: Probs[1], InvertCond); |
2711 | } else { |
2712 | assert(Opc == Instruction::And && "Unknown merge op!" ); |
2713 | // Codegen X & Y as: |
2714 | // BB1: |
2715 | // jmp_if_X TmpBB |
2716 | // jmp FBB |
2717 | // TmpBB: |
2718 | // jmp_if_Y TBB |
2719 | // jmp FBB |
2720 | // |
2721 | // This requires creation of TmpBB after CurBB. |
2722 | |
2723 | // We have flexibility in setting Prob for BB1 and Prob for TmpBB. |
2724 | // The requirement is that |
2725 | // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) |
2726 | // = FalseProb for original BB. |
2727 | // Assuming the original probabilities are A and B, one choice is to set |
2728 | // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to |
2729 | // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 == |
2730 | // TrueProb for BB1 * FalseProb for TmpBB. |
2731 | |
2732 | auto NewTrueProb = TProb + FProb / 2; |
2733 | auto NewFalseProb = FProb / 2; |
2734 | // Emit the LHS condition. |
2735 | FindMergedConditions(Cond: BOpOp0, TBB: TmpBB, FBB, CurBB, SwitchBB, Opc, TProb: NewTrueProb, |
2736 | FProb: NewFalseProb, InvertCond); |
2737 | |
2738 | // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). |
2739 | SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2}; |
2740 | BranchProbability::normalizeProbabilities(Begin: Probs.begin(), End: Probs.end()); |
2741 | // Emit the RHS condition into TmpBB. |
2742 | FindMergedConditions(Cond: BOpOp1, TBB, FBB, CurBB: TmpBB, SwitchBB, Opc, TProb: Probs[0], |
2743 | FProb: Probs[1], InvertCond); |
2744 | } |
2745 | } |
2746 | |
2747 | /// If the set of cases should be emitted as a series of branches, return true. |
2748 | /// If we should emit this as a bunch of and/or'd together conditions, return |
2749 | /// false. |
2750 | bool |
2751 | SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) { |
2752 | if (Cases.size() != 2) return true; |
2753 | |
2754 | // If this is two comparisons of the same values or'd or and'd together, they |
2755 | // will get folded into a single comparison, so don't emit two blocks. |
2756 | if ((Cases[0].CmpLHS == Cases[1].CmpLHS && |
2757 | Cases[0].CmpRHS == Cases[1].CmpRHS) || |
2758 | (Cases[0].CmpRHS == Cases[1].CmpLHS && |
2759 | Cases[0].CmpLHS == Cases[1].CmpRHS)) { |
2760 | return false; |
2761 | } |
2762 | |
2763 | // Handle: (X != null) | (Y != null) --> (X|Y) != 0 |
2764 | // Handle: (X == null) & (Y == null) --> (X|Y) == 0 |
2765 | if (Cases[0].CmpRHS == Cases[1].CmpRHS && |
2766 | Cases[0].CC == Cases[1].CC && |
2767 | isa<Constant>(Val: Cases[0].CmpRHS) && |
2768 | cast<Constant>(Val: Cases[0].CmpRHS)->isNullValue()) { |
2769 | if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB) |
2770 | return false; |
2771 | if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB) |
2772 | return false; |
2773 | } |
2774 | |
2775 | return true; |
2776 | } |
2777 | |
2778 | void SelectionDAGBuilder::visitBr(const BranchInst &I) { |
2779 | MachineBasicBlock *BrMBB = FuncInfo.MBB; |
2780 | |
2781 | // Update machine-CFG edges. |
2782 | MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(i: 0)]; |
2783 | |
2784 | if (I.isUnconditional()) { |
2785 | // Update machine-CFG edges. |
2786 | BrMBB->addSuccessor(Succ: Succ0MBB); |
2787 | |
2788 | // If this is not a fall-through branch or optimizations are switched off, |
2789 | // emit the branch. |
2790 | if (Succ0MBB != NextBlock(MBB: BrMBB) || |
2791 | TM.getOptLevel() == CodeGenOptLevel::None) { |
2792 | auto Br = DAG.getNode(Opcode: ISD::BR, DL: getCurSDLoc(), VT: MVT::Other, |
2793 | N1: getControlRoot(), N2: DAG.getBasicBlock(MBB: Succ0MBB)); |
2794 | setValue(V: &I, NewN: Br); |
2795 | DAG.setRoot(Br); |
2796 | } |
2797 | |
2798 | return; |
2799 | } |
2800 | |
2801 | // If this condition is one of the special cases we handle, do special stuff |
2802 | // now. |
2803 | const Value *CondVal = I.getCondition(); |
2804 | MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(i: 1)]; |
2805 | |
2806 | // If this is a series of conditions that are or'd or and'd together, emit |
2807 | // this as a sequence of branches instead of setcc's with and/or operations. |
2808 | // As long as jumps are not expensive (exceptions for multi-use logic ops, |
2809 | // unpredictable branches, and vector extracts because those jumps are likely |
2810 | // expensive for any target), this should improve performance. |
2811 | // For example, instead of something like: |
2812 | // cmp A, B |
2813 | // C = seteq |
2814 | // cmp D, E |
2815 | // F = setle |
2816 | // or C, F |
2817 | // jnz foo |
2818 | // Emit: |
2819 | // cmp A, B |
2820 | // je foo |
2821 | // cmp D, E |
2822 | // jle foo |
2823 | const Instruction *BOp = dyn_cast<Instruction>(Val: CondVal); |
2824 | if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp && |
2825 | BOp->hasOneUse() && !I.hasMetadata(KindID: LLVMContext::MD_unpredictable)) { |
2826 | Value *Vec; |
2827 | const Value *BOp0, *BOp1; |
2828 | Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0; |
2829 | if (match(V: BOp, P: m_LogicalAnd(L: m_Value(V&: BOp0), R: m_Value(V&: BOp1)))) |
2830 | Opcode = Instruction::And; |
2831 | else if (match(V: BOp, P: m_LogicalOr(L: m_Value(V&: BOp0), R: m_Value(V&: BOp1)))) |
2832 | Opcode = Instruction::Or; |
2833 | |
2834 | if (Opcode && |
2835 | !(match(V: BOp0, P: m_ExtractElt(Val: m_Value(V&: Vec), Idx: m_Value())) && |
2836 | match(V: BOp1, P: m_ExtractElt(Val: m_Specific(V: Vec), Idx: m_Value()))) && |
2837 | !shouldKeepJumpConditionsTogether( |
2838 | FuncInfo, I, Opc: Opcode, Lhs: BOp0, Rhs: BOp1, |
2839 | Params: DAG.getTargetLoweringInfo().getJumpConditionMergingParams( |
2840 | Opcode, BOp0, BOp1))) { |
2841 | FindMergedConditions(Cond: BOp, TBB: Succ0MBB, FBB: Succ1MBB, CurBB: BrMBB, SwitchBB: BrMBB, Opc: Opcode, |
2842 | TProb: getEdgeProbability(Src: BrMBB, Dst: Succ0MBB), |
2843 | FProb: getEdgeProbability(Src: BrMBB, Dst: Succ1MBB), |
2844 | /*InvertCond=*/false); |
2845 | // If the compares in later blocks need to use values not currently |
2846 | // exported from this block, export them now. This block should always |
2847 | // be the first entry. |
2848 | assert(SL->SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!" ); |
2849 | |
2850 | // Allow some cases to be rejected. |
2851 | if (ShouldEmitAsBranches(Cases: SL->SwitchCases)) { |
2852 | for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i) { |
2853 | ExportFromCurrentBlock(V: SL->SwitchCases[i].CmpLHS); |
2854 | ExportFromCurrentBlock(V: SL->SwitchCases[i].CmpRHS); |
2855 | } |
2856 | |
2857 | // Emit the branch for this block. |
2858 | visitSwitchCase(CB&: SL->SwitchCases[0], SwitchBB: BrMBB); |
2859 | SL->SwitchCases.erase(position: SL->SwitchCases.begin()); |
2860 | return; |
2861 | } |
2862 | |
2863 | // Okay, we decided not to do this, remove any inserted MBB's and clear |
2864 | // SwitchCases. |
2865 | for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i) |
2866 | FuncInfo.MF->erase(MBBI: SL->SwitchCases[i].ThisBB); |
2867 | |
2868 | SL->SwitchCases.clear(); |
2869 | } |
2870 | } |
2871 | |
2872 | // Create a CaseBlock record representing this branch. |
2873 | CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(Context&: *DAG.getContext()), |
2874 | nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc()); |
2875 | |
2876 | // Use visitSwitchCase to actually insert the fast branch sequence for this |
2877 | // cond branch. |
2878 | visitSwitchCase(CB, SwitchBB: BrMBB); |
2879 | } |
2880 | |
2881 | /// visitSwitchCase - Emits the necessary code to represent a single node in |
2882 | /// the binary search tree resulting from lowering a switch instruction. |
2883 | void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, |
2884 | MachineBasicBlock *SwitchBB) { |
2885 | SDValue Cond; |
2886 | SDValue CondLHS = getValue(V: CB.CmpLHS); |
2887 | SDLoc dl = CB.DL; |
2888 | |
2889 | if (CB.CC == ISD::SETTRUE) { |
2890 | // Branch or fall through to TrueBB. |
2891 | addSuccessorWithProb(Src: SwitchBB, Dst: CB.TrueBB, Prob: CB.TrueProb); |
2892 | SwitchBB->normalizeSuccProbs(); |
2893 | if (CB.TrueBB != NextBlock(MBB: SwitchBB)) { |
2894 | DAG.setRoot(DAG.getNode(Opcode: ISD::BR, DL: dl, VT: MVT::Other, N1: getControlRoot(), |
2895 | N2: DAG.getBasicBlock(MBB: CB.TrueBB))); |
2896 | } |
2897 | return; |
2898 | } |
2899 | |
2900 | auto &TLI = DAG.getTargetLoweringInfo(); |
2901 | EVT MemVT = TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: CB.CmpLHS->getType()); |
2902 | |
2903 | // Build the setcc now. |
2904 | if (!CB.CmpMHS) { |
2905 | // Fold "(X == true)" to X and "(X == false)" to !X to |
2906 | // handle common cases produced by branch lowering. |
2907 | if (CB.CmpRHS == ConstantInt::getTrue(Context&: *DAG.getContext()) && |
2908 | CB.CC == ISD::SETEQ) |
2909 | Cond = CondLHS; |
2910 | else if (CB.CmpRHS == ConstantInt::getFalse(Context&: *DAG.getContext()) && |
2911 | CB.CC == ISD::SETEQ) { |
2912 | SDValue True = DAG.getConstant(Val: 1, DL: dl, VT: CondLHS.getValueType()); |
2913 | Cond = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: CondLHS.getValueType(), N1: CondLHS, N2: True); |
2914 | } else { |
2915 | SDValue CondRHS = getValue(V: CB.CmpRHS); |
2916 | |
2917 | // If a pointer's DAG type is larger than its memory type then the DAG |
2918 | // values are zero-extended. This breaks signed comparisons so truncate |
2919 | // back to the underlying type before doing the compare. |
2920 | if (CondLHS.getValueType() != MemVT) { |
2921 | CondLHS = DAG.getPtrExtOrTrunc(Op: CondLHS, DL: getCurSDLoc(), VT: MemVT); |
2922 | CondRHS = DAG.getPtrExtOrTrunc(Op: CondRHS, DL: getCurSDLoc(), VT: MemVT); |
2923 | } |
2924 | Cond = DAG.getSetCC(DL: dl, VT: MVT::i1, LHS: CondLHS, RHS: CondRHS, Cond: CB.CC); |
2925 | } |
2926 | } else { |
2927 | assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now" ); |
2928 | |
2929 | const APInt& Low = cast<ConstantInt>(Val: CB.CmpLHS)->getValue(); |
2930 | const APInt& High = cast<ConstantInt>(Val: CB.CmpRHS)->getValue(); |
2931 | |
2932 | SDValue CmpOp = getValue(V: CB.CmpMHS); |
2933 | EVT VT = CmpOp.getValueType(); |
2934 | |
2935 | if (cast<ConstantInt>(Val: CB.CmpLHS)->isMinValue(IsSigned: true)) { |
2936 | Cond = DAG.getSetCC(DL: dl, VT: MVT::i1, LHS: CmpOp, RHS: DAG.getConstant(Val: High, DL: dl, VT), |
2937 | Cond: ISD::SETLE); |
2938 | } else { |
2939 | SDValue SUB = DAG.getNode(Opcode: ISD::SUB, DL: dl, |
2940 | VT, N1: CmpOp, N2: DAG.getConstant(Val: Low, DL: dl, VT)); |
2941 | Cond = DAG.getSetCC(DL: dl, VT: MVT::i1, LHS: SUB, |
2942 | RHS: DAG.getConstant(Val: High-Low, DL: dl, VT), Cond: ISD::SETULE); |
2943 | } |
2944 | } |
2945 | |
2946 | // Update successor info |
2947 | addSuccessorWithProb(Src: SwitchBB, Dst: CB.TrueBB, Prob: CB.TrueProb); |
2948 | // TrueBB and FalseBB are always different unless the incoming IR is |
2949 | // degenerate. This only happens when running llc on weird IR. |
2950 | if (CB.TrueBB != CB.FalseBB) |
2951 | addSuccessorWithProb(Src: SwitchBB, Dst: CB.FalseBB, Prob: CB.FalseProb); |
2952 | SwitchBB->normalizeSuccProbs(); |
2953 | |
2954 | // If the lhs block is the next block, invert the condition so that we can |
2955 | // fall through to the lhs instead of the rhs block. |
2956 | if (CB.TrueBB == NextBlock(MBB: SwitchBB)) { |
2957 | std::swap(a&: CB.TrueBB, b&: CB.FalseBB); |
2958 | SDValue True = DAG.getConstant(Val: 1, DL: dl, VT: Cond.getValueType()); |
2959 | Cond = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: Cond.getValueType(), N1: Cond, N2: True); |
2960 | } |
2961 | |
2962 | SDValue BrCond = DAG.getNode(Opcode: ISD::BRCOND, DL: dl, |
2963 | VT: MVT::Other, N1: getControlRoot(), N2: Cond, |
2964 | N3: DAG.getBasicBlock(MBB: CB.TrueBB)); |
2965 | |
2966 | setValue(V: CurInst, NewN: BrCond); |
2967 | |
2968 | // Insert the false branch. Do this even if it's a fall through branch, |
2969 | // this makes it easier to do DAG optimizations which require inverting |
2970 | // the branch condition. |
2971 | BrCond = DAG.getNode(Opcode: ISD::BR, DL: dl, VT: MVT::Other, N1: BrCond, |
2972 | N2: DAG.getBasicBlock(MBB: CB.FalseBB)); |
2973 | |
2974 | DAG.setRoot(BrCond); |
2975 | } |
2976 | |
2977 | /// visitJumpTable - Emit JumpTable node in the current MBB |
2978 | void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) { |
2979 | // Emit the code for the jump table |
2980 | assert(JT.SL && "Should set SDLoc for SelectionDAG!" ); |
2981 | assert(JT.Reg != -1U && "Should lower JT Header first!" ); |
2982 | EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout()); |
2983 | SDValue Index = DAG.getCopyFromReg(Chain: getControlRoot(), dl: *JT.SL, Reg: JT.Reg, VT: PTy); |
2984 | SDValue Table = DAG.getJumpTable(JTI: JT.JTI, VT: PTy); |
2985 | SDValue BrJumpTable = DAG.getNode(Opcode: ISD::BR_JT, DL: *JT.SL, VT: MVT::Other, |
2986 | N1: Index.getValue(R: 1), N2: Table, N3: Index); |
2987 | DAG.setRoot(BrJumpTable); |
2988 | } |
2989 | |
2990 | /// visitJumpTableHeader - This function emits necessary code to produce index |
2991 | /// in the JumpTable from switch case. |
2992 | void SelectionDAGBuilder::(SwitchCG::JumpTable &JT, |
2993 | JumpTableHeader &JTH, |
2994 | MachineBasicBlock *SwitchBB) { |
2995 | assert(JT.SL && "Should set SDLoc for SelectionDAG!" ); |
2996 | const SDLoc &dl = *JT.SL; |
2997 | |
2998 | // Subtract the lowest switch case value from the value being switched on. |
2999 | SDValue SwitchOp = getValue(V: JTH.SValue); |
3000 | EVT VT = SwitchOp.getValueType(); |
3001 | SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: SwitchOp, |
3002 | N2: DAG.getConstant(Val: JTH.First, DL: dl, VT)); |
3003 | |
3004 | // The SDNode we just created, which holds the value being switched on minus |
3005 | // the smallest case value, needs to be copied to a virtual register so it |
3006 | // can be used as an index into the jump table in a subsequent basic block. |
3007 | // This value may be smaller or larger than the target's pointer type, and |
3008 | // therefore require extension or truncating. |
3009 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3010 | SwitchOp = DAG.getZExtOrTrunc(Op: Sub, DL: dl, VT: TLI.getPointerTy(DL: DAG.getDataLayout())); |
3011 | |
3012 | unsigned JumpTableReg = |
3013 | FuncInfo.CreateReg(VT: TLI.getPointerTy(DL: DAG.getDataLayout())); |
3014 | SDValue CopyTo = DAG.getCopyToReg(Chain: getControlRoot(), dl, |
3015 | Reg: JumpTableReg, N: SwitchOp); |
3016 | JT.Reg = JumpTableReg; |
3017 | |
3018 | if (!JTH.FallthroughUnreachable) { |
3019 | // Emit the range check for the jump table, and branch to the default block |
3020 | // for the switch statement if the value being switched on exceeds the |
3021 | // largest case in the switch. |
3022 | SDValue CMP = DAG.getSetCC( |
3023 | DL: dl, VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), |
3024 | VT: Sub.getValueType()), |
3025 | LHS: Sub, RHS: DAG.getConstant(Val: JTH.Last - JTH.First, DL: dl, VT), Cond: ISD::SETUGT); |
3026 | |
3027 | SDValue BrCond = DAG.getNode(Opcode: ISD::BRCOND, DL: dl, |
3028 | VT: MVT::Other, N1: CopyTo, N2: CMP, |
3029 | N3: DAG.getBasicBlock(MBB: JT.Default)); |
3030 | |
3031 | // Avoid emitting unnecessary branches to the next block. |
3032 | if (JT.MBB != NextBlock(MBB: SwitchBB)) |
3033 | BrCond = DAG.getNode(Opcode: ISD::BR, DL: dl, VT: MVT::Other, N1: BrCond, |
3034 | N2: DAG.getBasicBlock(MBB: JT.MBB)); |
3035 | |
3036 | DAG.setRoot(BrCond); |
3037 | } else { |
3038 | // Avoid emitting unnecessary branches to the next block. |
3039 | if (JT.MBB != NextBlock(MBB: SwitchBB)) |
3040 | DAG.setRoot(DAG.getNode(Opcode: ISD::BR, DL: dl, VT: MVT::Other, N1: CopyTo, |
3041 | N2: DAG.getBasicBlock(MBB: JT.MBB))); |
3042 | else |
3043 | DAG.setRoot(CopyTo); |
3044 | } |
3045 | } |
3046 | |
3047 | /// Create a LOAD_STACK_GUARD node, and let it carry the target specific global |
3048 | /// variable if there exists one. |
3049 | static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL, |
3050 | SDValue &Chain) { |
3051 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3052 | EVT PtrTy = TLI.getPointerTy(DL: DAG.getDataLayout()); |
3053 | EVT PtrMemTy = TLI.getPointerMemTy(DL: DAG.getDataLayout()); |
3054 | MachineFunction &MF = DAG.getMachineFunction(); |
3055 | Value *Global = TLI.getSDagStackGuard(M: *MF.getFunction().getParent()); |
3056 | MachineSDNode *Node = |
3057 | DAG.getMachineNode(Opcode: TargetOpcode::LOAD_STACK_GUARD, dl: DL, VT: PtrTy, Op1: Chain); |
3058 | if (Global) { |
3059 | MachinePointerInfo MPInfo(Global); |
3060 | auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | |
3061 | MachineMemOperand::MODereferenceable; |
3062 | MachineMemOperand *MemRef = MF.getMachineMemOperand( |
3063 | PtrInfo: MPInfo, F: Flags, Size: LocationSize::precise(Value: PtrTy.getSizeInBits() / 8), |
3064 | BaseAlignment: DAG.getEVTAlign(MemoryVT: PtrTy)); |
3065 | DAG.setNodeMemRefs(N: Node, NewMemRefs: {MemRef}); |
3066 | } |
3067 | if (PtrTy != PtrMemTy) |
3068 | return DAG.getPtrExtOrTrunc(Op: SDValue(Node, 0), DL, VT: PtrMemTy); |
3069 | return SDValue(Node, 0); |
3070 | } |
3071 | |
3072 | /// Codegen a new tail for a stack protector check ParentMBB which has had its |
3073 | /// tail spliced into a stack protector check success bb. |
3074 | /// |
3075 | /// For a high level explanation of how this fits into the stack protector |
3076 | /// generation see the comment on the declaration of class |
3077 | /// StackProtectorDescriptor. |
3078 | void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, |
3079 | MachineBasicBlock *ParentBB) { |
3080 | |
3081 | // First create the loads to the guard/stack slot for the comparison. |
3082 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3083 | EVT PtrTy = TLI.getPointerTy(DL: DAG.getDataLayout()); |
3084 | EVT PtrMemTy = TLI.getPointerMemTy(DL: DAG.getDataLayout()); |
3085 | |
3086 | MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo(); |
3087 | int FI = MFI.getStackProtectorIndex(); |
3088 | |
3089 | SDValue Guard; |
3090 | SDLoc dl = getCurSDLoc(); |
3091 | SDValue StackSlotPtr = DAG.getFrameIndex(FI, VT: PtrTy); |
3092 | const Module &M = *ParentBB->getParent()->getFunction().getParent(); |
3093 | Align Align = |
3094 | DAG.getDataLayout().getPrefTypeAlign(Ty: PointerType::get(C&: M.getContext(), AddressSpace: 0)); |
3095 | |
3096 | // Generate code to load the content of the guard slot. |
3097 | SDValue GuardVal = DAG.getLoad( |
3098 | VT: PtrMemTy, dl, Chain: DAG.getEntryNode(), Ptr: StackSlotPtr, |
3099 | PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), Alignment: Align, |
3100 | MMOFlags: MachineMemOperand::MOVolatile); |
3101 | |
3102 | if (TLI.useStackGuardXorFP()) |
3103 | GuardVal = TLI.emitStackGuardXorFP(DAG, Val: GuardVal, DL: dl); |
3104 | |
3105 | // Retrieve guard check function, nullptr if instrumentation is inlined. |
3106 | if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) { |
3107 | // The target provides a guard check function to validate the guard value. |
3108 | // Generate a call to that function with the content of the guard slot as |
3109 | // argument. |
3110 | FunctionType *FnTy = GuardCheckFn->getFunctionType(); |
3111 | assert(FnTy->getNumParams() == 1 && "Invalid function signature" ); |
3112 | |
3113 | TargetLowering::ArgListTy Args; |
3114 | TargetLowering::ArgListEntry Entry; |
3115 | Entry.Node = GuardVal; |
3116 | Entry.Ty = FnTy->getParamType(i: 0); |
3117 | if (GuardCheckFn->hasParamAttribute(ArgNo: 0, Kind: Attribute::AttrKind::InReg)) |
3118 | Entry.IsInReg = true; |
3119 | Args.push_back(x: Entry); |
3120 | |
3121 | TargetLowering::CallLoweringInfo CLI(DAG); |
3122 | CLI.setDebugLoc(getCurSDLoc()) |
3123 | .setChain(DAG.getEntryNode()) |
3124 | .setCallee(CC: GuardCheckFn->getCallingConv(), ResultType: FnTy->getReturnType(), |
3125 | Target: getValue(V: GuardCheckFn), ArgsList: std::move(Args)); |
3126 | |
3127 | std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); |
3128 | DAG.setRoot(Result.second); |
3129 | return; |
3130 | } |
3131 | |
3132 | // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD. |
3133 | // Otherwise, emit a volatile load to retrieve the stack guard value. |
3134 | SDValue Chain = DAG.getEntryNode(); |
3135 | if (TLI.useLoadStackGuardNode()) { |
3136 | Guard = getLoadStackGuard(DAG, DL: dl, Chain); |
3137 | } else { |
3138 | const Value *IRGuard = TLI.getSDagStackGuard(M); |
3139 | SDValue GuardPtr = getValue(V: IRGuard); |
3140 | |
3141 | Guard = DAG.getLoad(VT: PtrMemTy, dl, Chain, Ptr: GuardPtr, |
3142 | PtrInfo: MachinePointerInfo(IRGuard, 0), Alignment: Align, |
3143 | MMOFlags: MachineMemOperand::MOVolatile); |
3144 | } |
3145 | |
3146 | // Perform the comparison via a getsetcc. |
3147 | SDValue Cmp = DAG.getSetCC(DL: dl, VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(), |
3148 | Context&: *DAG.getContext(), |
3149 | VT: Guard.getValueType()), |
3150 | LHS: Guard, RHS: GuardVal, Cond: ISD::SETNE); |
3151 | |
3152 | // If the guard/stackslot do not equal, branch to failure MBB. |
3153 | SDValue BrCond = DAG.getNode(Opcode: ISD::BRCOND, DL: dl, |
3154 | VT: MVT::Other, N1: GuardVal.getOperand(i: 0), |
3155 | N2: Cmp, N3: DAG.getBasicBlock(MBB: SPD.getFailureMBB())); |
3156 | // Otherwise branch to success MBB. |
3157 | SDValue Br = DAG.getNode(Opcode: ISD::BR, DL: dl, |
3158 | VT: MVT::Other, N1: BrCond, |
3159 | N2: DAG.getBasicBlock(MBB: SPD.getSuccessMBB())); |
3160 | |
3161 | DAG.setRoot(Br); |
3162 | } |
3163 | |
3164 | /// Codegen the failure basic block for a stack protector check. |
3165 | /// |
3166 | /// A failure stack protector machine basic block consists simply of a call to |
3167 | /// __stack_chk_fail(). |
3168 | /// |
3169 | /// For a high level explanation of how this fits into the stack protector |
3170 | /// generation see the comment on the declaration of class |
3171 | /// StackProtectorDescriptor. |
3172 | void |
3173 | SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { |
3174 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3175 | TargetLowering::MakeLibCallOptions CallOptions; |
3176 | CallOptions.setDiscardResult(true); |
3177 | SDValue Chain = |
3178 | TLI.makeLibCall(DAG, LC: RTLIB::STACKPROTECTOR_CHECK_FAIL, RetVT: MVT::isVoid, |
3179 | Ops: std::nullopt, CallOptions, dl: getCurSDLoc()) |
3180 | .second; |
3181 | // On PS4/PS5, the "return address" must still be within the calling |
3182 | // function, even if it's at the very end, so emit an explicit TRAP here. |
3183 | // Passing 'true' for doesNotReturn above won't generate the trap for us. |
3184 | if (TM.getTargetTriple().isPS()) |
3185 | Chain = DAG.getNode(Opcode: ISD::TRAP, DL: getCurSDLoc(), VT: MVT::Other, Operand: Chain); |
3186 | // WebAssembly needs an unreachable instruction after a non-returning call, |
3187 | // because the function return type can be different from __stack_chk_fail's |
3188 | // return type (void). |
3189 | if (TM.getTargetTriple().isWasm()) |
3190 | Chain = DAG.getNode(Opcode: ISD::TRAP, DL: getCurSDLoc(), VT: MVT::Other, Operand: Chain); |
3191 | |
3192 | DAG.setRoot(Chain); |
3193 | } |
3194 | |
3195 | /// visitBitTestHeader - This function emits necessary code to produce value |
3196 | /// suitable for "bit tests" |
3197 | void SelectionDAGBuilder::(BitTestBlock &B, |
3198 | MachineBasicBlock *SwitchBB) { |
3199 | SDLoc dl = getCurSDLoc(); |
3200 | |
3201 | // Subtract the minimum value. |
3202 | SDValue SwitchOp = getValue(V: B.SValue); |
3203 | EVT VT = SwitchOp.getValueType(); |
3204 | SDValue RangeSub = |
3205 | DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: SwitchOp, N2: DAG.getConstant(Val: B.First, DL: dl, VT)); |
3206 | |
3207 | // Determine the type of the test operands. |
3208 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3209 | bool UsePtrType = false; |
3210 | if (!TLI.isTypeLegal(VT)) { |
3211 | UsePtrType = true; |
3212 | } else { |
3213 | for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) |
3214 | if (!isUIntN(N: VT.getSizeInBits(), x: B.Cases[i].Mask)) { |
3215 | // Switch table case range are encoded into series of masks. |
3216 | // Just use pointer type, it's guaranteed to fit. |
3217 | UsePtrType = true; |
3218 | break; |
3219 | } |
3220 | } |
3221 | SDValue Sub = RangeSub; |
3222 | if (UsePtrType) { |
3223 | VT = TLI.getPointerTy(DL: DAG.getDataLayout()); |
3224 | Sub = DAG.getZExtOrTrunc(Op: Sub, DL: dl, VT); |
3225 | } |
3226 | |
3227 | B.RegVT = VT.getSimpleVT(); |
3228 | B.Reg = FuncInfo.CreateReg(VT: B.RegVT); |
3229 | SDValue CopyTo = DAG.getCopyToReg(Chain: getControlRoot(), dl, Reg: B.Reg, N: Sub); |
3230 | |
3231 | MachineBasicBlock* MBB = B.Cases[0].ThisBB; |
3232 | |
3233 | if (!B.FallthroughUnreachable) |
3234 | addSuccessorWithProb(Src: SwitchBB, Dst: B.Default, Prob: B.DefaultProb); |
3235 | addSuccessorWithProb(Src: SwitchBB, Dst: MBB, Prob: B.Prob); |
3236 | SwitchBB->normalizeSuccProbs(); |
3237 | |
3238 | SDValue Root = CopyTo; |
3239 | if (!B.FallthroughUnreachable) { |
3240 | // Conditional branch to the default block. |
3241 | SDValue RangeCmp = DAG.getSetCC(DL: dl, |
3242 | VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), |
3243 | VT: RangeSub.getValueType()), |
3244 | LHS: RangeSub, RHS: DAG.getConstant(Val: B.Range, DL: dl, VT: RangeSub.getValueType()), |
3245 | Cond: ISD::SETUGT); |
3246 | |
3247 | Root = DAG.getNode(Opcode: ISD::BRCOND, DL: dl, VT: MVT::Other, N1: Root, N2: RangeCmp, |
3248 | N3: DAG.getBasicBlock(MBB: B.Default)); |
3249 | } |
3250 | |
3251 | // Avoid emitting unnecessary branches to the next block. |
3252 | if (MBB != NextBlock(MBB: SwitchBB)) |
3253 | Root = DAG.getNode(Opcode: ISD::BR, DL: dl, VT: MVT::Other, N1: Root, N2: DAG.getBasicBlock(MBB)); |
3254 | |
3255 | DAG.setRoot(Root); |
3256 | } |
3257 | |
3258 | /// visitBitTestCase - this function produces one "bit test" |
3259 | void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, |
3260 | MachineBasicBlock* NextMBB, |
3261 | BranchProbability BranchProbToNext, |
3262 | unsigned Reg, |
3263 | BitTestCase &B, |
3264 | MachineBasicBlock *SwitchBB) { |
3265 | SDLoc dl = getCurSDLoc(); |
3266 | MVT VT = BB.RegVT; |
3267 | SDValue ShiftOp = DAG.getCopyFromReg(Chain: getControlRoot(), dl, Reg, VT); |
3268 | SDValue Cmp; |
3269 | unsigned PopCount = llvm::popcount(Value: B.Mask); |
3270 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3271 | if (PopCount == 1) { |
3272 | // Testing for a single bit; just compare the shift count with what it |
3273 | // would need to be to shift a 1 bit in that position. |
3274 | Cmp = DAG.getSetCC( |
3275 | DL: dl, VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT), |
3276 | LHS: ShiftOp, RHS: DAG.getConstant(Val: llvm::countr_zero(Val: B.Mask), DL: dl, VT), |
3277 | Cond: ISD::SETEQ); |
3278 | } else if (PopCount == BB.Range) { |
3279 | // There is only one zero bit in the range, test for it directly. |
3280 | Cmp = DAG.getSetCC( |
3281 | DL: dl, VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT), |
3282 | LHS: ShiftOp, RHS: DAG.getConstant(Val: llvm::countr_one(Value: B.Mask), DL: dl, VT), Cond: ISD::SETNE); |
3283 | } else { |
3284 | // Make desired shift |
3285 | SDValue SwitchVal = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, |
3286 | N1: DAG.getConstant(Val: 1, DL: dl, VT), N2: ShiftOp); |
3287 | |
3288 | // Emit bit tests and jumps |
3289 | SDValue AndOp = DAG.getNode(Opcode: ISD::AND, DL: dl, |
3290 | VT, N1: SwitchVal, N2: DAG.getConstant(Val: B.Mask, DL: dl, VT)); |
3291 | Cmp = DAG.getSetCC( |
3292 | DL: dl, VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT), |
3293 | LHS: AndOp, RHS: DAG.getConstant(Val: 0, DL: dl, VT), Cond: ISD::SETNE); |
3294 | } |
3295 | |
3296 | // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb. |
3297 | addSuccessorWithProb(Src: SwitchBB, Dst: B.TargetBB, Prob: B.ExtraProb); |
3298 | // The branch probability from SwitchBB to NextMBB is BranchProbToNext. |
3299 | addSuccessorWithProb(Src: SwitchBB, Dst: NextMBB, Prob: BranchProbToNext); |
3300 | // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is |
3301 | // one as they are relative probabilities (and thus work more like weights), |
3302 | // and hence we need to normalize them to let the sum of them become one. |
3303 | SwitchBB->normalizeSuccProbs(); |
3304 | |
3305 | SDValue BrAnd = DAG.getNode(Opcode: ISD::BRCOND, DL: dl, |
3306 | VT: MVT::Other, N1: getControlRoot(), |
3307 | N2: Cmp, N3: DAG.getBasicBlock(MBB: B.TargetBB)); |
3308 | |
3309 | // Avoid emitting unnecessary branches to the next block. |
3310 | if (NextMBB != NextBlock(MBB: SwitchBB)) |
3311 | BrAnd = DAG.getNode(Opcode: ISD::BR, DL: dl, VT: MVT::Other, N1: BrAnd, |
3312 | N2: DAG.getBasicBlock(MBB: NextMBB)); |
3313 | |
3314 | DAG.setRoot(BrAnd); |
3315 | } |
3316 | |
3317 | void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { |
3318 | MachineBasicBlock *InvokeMBB = FuncInfo.MBB; |
3319 | |
3320 | // Retrieve successors. Look through artificial IR level blocks like |
3321 | // catchswitch for successors. |
3322 | MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(i: 0)]; |
3323 | const BasicBlock *EHPadBB = I.getSuccessor(i: 1); |
3324 | MachineBasicBlock *EHPadMBB = FuncInfo.MBBMap[EHPadBB]; |
3325 | |
3326 | // Deopt and ptrauth bundles are lowered in helper functions, and we don't |
3327 | // have to do anything here to lower funclet bundles. |
3328 | assert(!I.hasOperandBundlesOtherThan( |
3329 | {LLVMContext::OB_deopt, LLVMContext::OB_gc_transition, |
3330 | LLVMContext::OB_gc_live, LLVMContext::OB_funclet, |
3331 | LLVMContext::OB_cfguardtarget, LLVMContext::OB_ptrauth, |
3332 | LLVMContext::OB_clang_arc_attachedcall}) && |
3333 | "Cannot lower invokes with arbitrary operand bundles yet!" ); |
3334 | |
3335 | const Value *Callee(I.getCalledOperand()); |
3336 | const Function *Fn = dyn_cast<Function>(Val: Callee); |
3337 | if (isa<InlineAsm>(Val: Callee)) |
3338 | visitInlineAsm(Call: I, EHPadBB); |
3339 | else if (Fn && Fn->isIntrinsic()) { |
3340 | switch (Fn->getIntrinsicID()) { |
3341 | default: |
3342 | llvm_unreachable("Cannot invoke this intrinsic" ); |
3343 | case Intrinsic::donothing: |
3344 | // Ignore invokes to @llvm.donothing: jump directly to the next BB. |
3345 | case Intrinsic::seh_try_begin: |
3346 | case Intrinsic::seh_scope_begin: |
3347 | case Intrinsic::seh_try_end: |
3348 | case Intrinsic::seh_scope_end: |
3349 | if (EHPadMBB) |
3350 | // a block referenced by EH table |
3351 | // so dtor-funclet not removed by opts |
3352 | EHPadMBB->setMachineBlockAddressTaken(); |
3353 | break; |
3354 | case Intrinsic::experimental_patchpoint_void: |
3355 | case Intrinsic::experimental_patchpoint: |
3356 | visitPatchpoint(CB: I, EHPadBB); |
3357 | break; |
3358 | case Intrinsic::experimental_gc_statepoint: |
3359 | LowerStatepoint(I: cast<GCStatepointInst>(Val: I), EHPadBB); |
3360 | break; |
3361 | case Intrinsic::wasm_rethrow: { |
3362 | // This is usually done in visitTargetIntrinsic, but this intrinsic is |
3363 | // special because it can be invoked, so we manually lower it to a DAG |
3364 | // node here. |
3365 | SmallVector<SDValue, 8> Ops; |
3366 | Ops.push_back(Elt: getControlRoot()); // inchain for the terminator node |
3367 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3368 | Ops.push_back( |
3369 | Elt: DAG.getTargetConstant(Val: Intrinsic::wasm_rethrow, DL: getCurSDLoc(), |
3370 | VT: TLI.getPointerTy(DL: DAG.getDataLayout()))); |
3371 | SDVTList VTs = DAG.getVTList(VTs: ArrayRef<EVT>({MVT::Other})); // outchain |
3372 | DAG.setRoot(DAG.getNode(Opcode: ISD::INTRINSIC_VOID, DL: getCurSDLoc(), VTList: VTs, Ops)); |
3373 | break; |
3374 | } |
3375 | } |
3376 | } else if (I.hasDeoptState()) { |
3377 | // Currently we do not lower any intrinsic calls with deopt operand bundles. |
3378 | // Eventually we will support lowering the @llvm.experimental.deoptimize |
3379 | // intrinsic, and right now there are no plans to support other intrinsics |
3380 | // with deopt state. |
3381 | LowerCallSiteWithDeoptBundle(Call: &I, Callee: getValue(V: Callee), EHPadBB); |
3382 | } else if (I.countOperandBundlesOfType(ID: LLVMContext::OB_ptrauth)) { |
3383 | LowerCallSiteWithPtrAuthBundle(CB: cast<CallBase>(Val: I), EHPadBB); |
3384 | } else { |
3385 | LowerCallTo(CB: I, Callee: getValue(V: Callee), IsTailCall: false, IsMustTailCall: false, EHPadBB); |
3386 | } |
3387 | |
3388 | // If the value of the invoke is used outside of its defining block, make it |
3389 | // available as a virtual register. |
3390 | // We already took care of the exported value for the statepoint instruction |
3391 | // during call to the LowerStatepoint. |
3392 | if (!isa<GCStatepointInst>(Val: I)) { |
3393 | CopyToExportRegsIfNeeded(V: &I); |
3394 | } |
3395 | |
3396 | SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests; |
3397 | BranchProbabilityInfo *BPI = FuncInfo.BPI; |
3398 | BranchProbability EHPadBBProb = |
3399 | BPI ? BPI->getEdgeProbability(Src: InvokeMBB->getBasicBlock(), Dst: EHPadBB) |
3400 | : BranchProbability::getZero(); |
3401 | findUnwindDestinations(FuncInfo, EHPadBB, Prob: EHPadBBProb, UnwindDests); |
3402 | |
3403 | // Update successor info. |
3404 | addSuccessorWithProb(Src: InvokeMBB, Dst: Return); |
3405 | for (auto &UnwindDest : UnwindDests) { |
3406 | UnwindDest.first->setIsEHPad(); |
3407 | addSuccessorWithProb(Src: InvokeMBB, Dst: UnwindDest.first, Prob: UnwindDest.second); |
3408 | } |
3409 | InvokeMBB->normalizeSuccProbs(); |
3410 | |
3411 | // Drop into normal successor. |
3412 | DAG.setRoot(DAG.getNode(Opcode: ISD::BR, DL: getCurSDLoc(), VT: MVT::Other, N1: getControlRoot(), |
3413 | N2: DAG.getBasicBlock(MBB: Return))); |
3414 | } |
3415 | |
3416 | void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) { |
3417 | MachineBasicBlock *CallBrMBB = FuncInfo.MBB; |
3418 | |
3419 | // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't |
3420 | // have to do anything here to lower funclet bundles. |
3421 | assert(!I.hasOperandBundlesOtherThan( |
3422 | {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && |
3423 | "Cannot lower callbrs with arbitrary operand bundles yet!" ); |
3424 | |
3425 | assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr" ); |
3426 | visitInlineAsm(Call: I); |
3427 | CopyToExportRegsIfNeeded(V: &I); |
3428 | |
3429 | // Retrieve successors. |
3430 | SmallPtrSet<BasicBlock *, 8> Dests; |
3431 | Dests.insert(Ptr: I.getDefaultDest()); |
3432 | MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()]; |
3433 | |
3434 | // Update successor info. |
3435 | addSuccessorWithProb(Src: CallBrMBB, Dst: Return, Prob: BranchProbability::getOne()); |
3436 | for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) { |
3437 | BasicBlock *Dest = I.getIndirectDest(i); |
3438 | MachineBasicBlock *Target = FuncInfo.MBBMap[Dest]; |
3439 | Target->setIsInlineAsmBrIndirectTarget(); |
3440 | Target->setMachineBlockAddressTaken(); |
3441 | Target->setLabelMustBeEmitted(); |
3442 | // Don't add duplicate machine successors. |
3443 | if (Dests.insert(Ptr: Dest).second) |
3444 | addSuccessorWithProb(Src: CallBrMBB, Dst: Target, Prob: BranchProbability::getZero()); |
3445 | } |
3446 | CallBrMBB->normalizeSuccProbs(); |
3447 | |
3448 | // Drop into default successor. |
3449 | DAG.setRoot(DAG.getNode(Opcode: ISD::BR, DL: getCurSDLoc(), |
3450 | VT: MVT::Other, N1: getControlRoot(), |
3451 | N2: DAG.getBasicBlock(MBB: Return))); |
3452 | } |
3453 | |
3454 | void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { |
3455 | llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!" ); |
3456 | } |
3457 | |
3458 | void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { |
3459 | assert(FuncInfo.MBB->isEHPad() && |
3460 | "Call to landingpad not in landing pad!" ); |
3461 | |
3462 | // If there aren't registers to copy the values into (e.g., during SjLj |
3463 | // exceptions), then don't bother to create these DAG nodes. |
3464 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3465 | const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn(); |
3466 | if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 && |
3467 | TLI.getExceptionSelectorRegister(PersonalityFn) == 0) |
3468 | return; |
3469 | |
3470 | // If landingpad's return type is token type, we don't create DAG nodes |
3471 | // for its exception pointer and selector value. The extraction of exception |
3472 | // pointer or selector value from token type landingpads is not currently |
3473 | // supported. |
3474 | if (LP.getType()->isTokenTy()) |
3475 | return; |
3476 | |
3477 | SmallVector<EVT, 2> ValueVTs; |
3478 | SDLoc dl = getCurSDLoc(); |
3479 | ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: LP.getType(), ValueVTs); |
3480 | assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported" ); |
3481 | |
3482 | // Get the two live-in registers as SDValues. The physregs have already been |
3483 | // copied into virtual registers. |
3484 | SDValue Ops[2]; |
3485 | if (FuncInfo.ExceptionPointerVirtReg) { |
3486 | Ops[0] = DAG.getZExtOrTrunc( |
3487 | Op: DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl, |
3488 | Reg: FuncInfo.ExceptionPointerVirtReg, |
3489 | VT: TLI.getPointerTy(DL: DAG.getDataLayout())), |
3490 | DL: dl, VT: ValueVTs[0]); |
3491 | } else { |
3492 | Ops[0] = DAG.getConstant(Val: 0, DL: dl, VT: TLI.getPointerTy(DL: DAG.getDataLayout())); |
3493 | } |
3494 | Ops[1] = DAG.getZExtOrTrunc( |
3495 | Op: DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl, |
3496 | Reg: FuncInfo.ExceptionSelectorVirtReg, |
3497 | VT: TLI.getPointerTy(DL: DAG.getDataLayout())), |
3498 | DL: dl, VT: ValueVTs[1]); |
3499 | |
3500 | // Merge into one. |
3501 | SDValue Res = DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: dl, |
3502 | VTList: DAG.getVTList(VTs: ValueVTs), Ops); |
3503 | setValue(V: &LP, NewN: Res); |
3504 | } |
3505 | |
3506 | void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, |
3507 | MachineBasicBlock *Last) { |
3508 | // Update JTCases. |
3509 | for (JumpTableBlock &JTB : SL->JTCases) |
3510 | if (JTB.first.HeaderBB == First) |
3511 | JTB.first.HeaderBB = Last; |
3512 | |
3513 | // Update BitTestCases. |
3514 | for (BitTestBlock &BTB : SL->BitTestCases) |
3515 | if (BTB.Parent == First) |
3516 | BTB.Parent = Last; |
3517 | } |
3518 | |
3519 | void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { |
3520 | MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; |
3521 | |
3522 | // Update machine-CFG edges with unique successors. |
3523 | SmallSet<BasicBlock*, 32> Done; |
3524 | for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) { |
3525 | BasicBlock *BB = I.getSuccessor(i); |
3526 | bool Inserted = Done.insert(Ptr: BB).second; |
3527 | if (!Inserted) |
3528 | continue; |
3529 | |
3530 | MachineBasicBlock *Succ = FuncInfo.MBBMap[BB]; |
3531 | addSuccessorWithProb(Src: IndirectBrMBB, Dst: Succ); |
3532 | } |
3533 | IndirectBrMBB->normalizeSuccProbs(); |
3534 | |
3535 | DAG.setRoot(DAG.getNode(Opcode: ISD::BRIND, DL: getCurSDLoc(), |
3536 | VT: MVT::Other, N1: getControlRoot(), |
3537 | N2: getValue(V: I.getAddress()))); |
3538 | } |
3539 | |
3540 | void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { |
3541 | if (!DAG.getTarget().Options.TrapUnreachable) |
3542 | return; |
3543 | |
3544 | // We may be able to ignore unreachable behind a noreturn call. |
3545 | if (const CallInst *Call = dyn_cast_or_null<CallInst>(Val: I.getPrevNode()); |
3546 | Call && Call->doesNotReturn()) { |
3547 | if (DAG.getTarget().Options.NoTrapAfterNoreturn) |
3548 | return; |
3549 | // Do not emit an additional trap instruction. |
3550 | if (Call->isNonContinuableTrap()) |
3551 | return; |
3552 | } |
3553 | |
3554 | DAG.setRoot(DAG.getNode(Opcode: ISD::TRAP, DL: getCurSDLoc(), VT: MVT::Other, Operand: DAG.getRoot())); |
3555 | } |
3556 | |
3557 | void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) { |
3558 | SDNodeFlags Flags; |
3559 | if (auto *FPOp = dyn_cast<FPMathOperator>(Val: &I)) |
3560 | Flags.copyFMF(FPMO: *FPOp); |
3561 | |
3562 | SDValue Op = getValue(V: I.getOperand(i: 0)); |
3563 | SDValue UnNodeValue = DAG.getNode(Opcode, DL: getCurSDLoc(), VT: Op.getValueType(), |
3564 | Operand: Op, Flags); |
3565 | setValue(V: &I, NewN: UnNodeValue); |
3566 | } |
3567 | |
3568 | void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) { |
3569 | SDNodeFlags Flags; |
3570 | if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(Val: &I)) { |
3571 | Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap()); |
3572 | Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap()); |
3573 | } |
3574 | if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(Val: &I)) |
3575 | Flags.setExact(ExactOp->isExact()); |
3576 | if (auto *DisjointOp = dyn_cast<PossiblyDisjointInst>(Val: &I)) |
3577 | Flags.setDisjoint(DisjointOp->isDisjoint()); |
3578 | if (auto *FPOp = dyn_cast<FPMathOperator>(Val: &I)) |
3579 | Flags.copyFMF(FPMO: *FPOp); |
3580 | |
3581 | SDValue Op1 = getValue(V: I.getOperand(i: 0)); |
3582 | SDValue Op2 = getValue(V: I.getOperand(i: 1)); |
3583 | SDValue BinNodeValue = DAG.getNode(Opcode, DL: getCurSDLoc(), VT: Op1.getValueType(), |
3584 | N1: Op1, N2: Op2, Flags); |
3585 | setValue(V: &I, NewN: BinNodeValue); |
3586 | } |
3587 | |
3588 | void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { |
3589 | SDValue Op1 = getValue(V: I.getOperand(i: 0)); |
3590 | SDValue Op2 = getValue(V: I.getOperand(i: 1)); |
3591 | |
3592 | EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy( |
3593 | LHSTy: Op1.getValueType(), DL: DAG.getDataLayout()); |
3594 | |
3595 | // Coerce the shift amount to the right type if we can. This exposes the |
3596 | // truncate or zext to optimization early. |
3597 | if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { |
3598 | assert(ShiftTy.getSizeInBits() >= Log2_32_Ceil(Op1.getValueSizeInBits()) && |
3599 | "Unexpected shift type" ); |
3600 | Op2 = DAG.getZExtOrTrunc(Op: Op2, DL: getCurSDLoc(), VT: ShiftTy); |
3601 | } |
3602 | |
3603 | bool nuw = false; |
3604 | bool nsw = false; |
3605 | bool exact = false; |
3606 | |
3607 | if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) { |
3608 | |
3609 | if (const OverflowingBinaryOperator *OFBinOp = |
3610 | dyn_cast<const OverflowingBinaryOperator>(Val: &I)) { |
3611 | nuw = OFBinOp->hasNoUnsignedWrap(); |
3612 | nsw = OFBinOp->hasNoSignedWrap(); |
3613 | } |
3614 | if (const PossiblyExactOperator *ExactOp = |
3615 | dyn_cast<const PossiblyExactOperator>(Val: &I)) |
3616 | exact = ExactOp->isExact(); |
3617 | } |
3618 | SDNodeFlags Flags; |
3619 | Flags.setExact(exact); |
3620 | Flags.setNoSignedWrap(nsw); |
3621 | Flags.setNoUnsignedWrap(nuw); |
3622 | SDValue Res = DAG.getNode(Opcode, DL: getCurSDLoc(), VT: Op1.getValueType(), N1: Op1, N2: Op2, |
3623 | Flags); |
3624 | setValue(V: &I, NewN: Res); |
3625 | } |
3626 | |
3627 | void SelectionDAGBuilder::visitSDiv(const User &I) { |
3628 | SDValue Op1 = getValue(V: I.getOperand(i: 0)); |
3629 | SDValue Op2 = getValue(V: I.getOperand(i: 1)); |
3630 | |
3631 | SDNodeFlags Flags; |
3632 | Flags.setExact(isa<PossiblyExactOperator>(Val: &I) && |
3633 | cast<PossiblyExactOperator>(Val: &I)->isExact()); |
3634 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SDIV, DL: getCurSDLoc(), VT: Op1.getValueType(), N1: Op1, |
3635 | N2: Op2, Flags)); |
3636 | } |
3637 | |
3638 | void SelectionDAGBuilder::visitICmp(const ICmpInst &I) { |
3639 | ICmpInst::Predicate predicate = I.getPredicate(); |
3640 | SDValue Op1 = getValue(V: I.getOperand(i_nocapture: 0)); |
3641 | SDValue Op2 = getValue(V: I.getOperand(i_nocapture: 1)); |
3642 | ISD::CondCode Opcode = getICmpCondCode(Pred: predicate); |
3643 | |
3644 | auto &TLI = DAG.getTargetLoweringInfo(); |
3645 | EVT MemVT = |
3646 | TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: I.getOperand(i_nocapture: 0)->getType()); |
3647 | |
3648 | // If a pointer's DAG type is larger than its memory type then the DAG values |
3649 | // are zero-extended. This breaks signed comparisons so truncate back to the |
3650 | // underlying type before doing the compare. |
3651 | if (Op1.getValueType() != MemVT) { |
3652 | Op1 = DAG.getPtrExtOrTrunc(Op: Op1, DL: getCurSDLoc(), VT: MemVT); |
3653 | Op2 = DAG.getPtrExtOrTrunc(Op: Op2, DL: getCurSDLoc(), VT: MemVT); |
3654 | } |
3655 | |
3656 | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(), |
3657 | Ty: I.getType()); |
3658 | setValue(V: &I, NewN: DAG.getSetCC(DL: getCurSDLoc(), VT: DestVT, LHS: Op1, RHS: Op2, Cond: Opcode)); |
3659 | } |
3660 | |
3661 | void SelectionDAGBuilder::visitFCmp(const FCmpInst &I) { |
3662 | FCmpInst::Predicate predicate = I.getPredicate(); |
3663 | SDValue Op1 = getValue(V: I.getOperand(i_nocapture: 0)); |
3664 | SDValue Op2 = getValue(V: I.getOperand(i_nocapture: 1)); |
3665 | |
3666 | ISD::CondCode Condition = getFCmpCondCode(Pred: predicate); |
3667 | auto *FPMO = cast<FPMathOperator>(Val: &I); |
3668 | if (FPMO->hasNoNaNs() || TM.Options.NoNaNsFPMath) |
3669 | Condition = getFCmpCodeWithoutNaN(CC: Condition); |
3670 | |
3671 | SDNodeFlags Flags; |
3672 | Flags.copyFMF(FPMO: *FPMO); |
3673 | SelectionDAG::FlagInserter FlagsInserter(DAG, Flags); |
3674 | |
3675 | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(), |
3676 | Ty: I.getType()); |
3677 | setValue(V: &I, NewN: DAG.getSetCC(DL: getCurSDLoc(), VT: DestVT, LHS: Op1, RHS: Op2, Cond: Condition)); |
3678 | } |
3679 | |
3680 | // Check if the condition of the select has one use or two users that are both |
3681 | // selects with the same condition. |
3682 | static bool hasOnlySelectUsers(const Value *Cond) { |
3683 | return llvm::all_of(Range: Cond->users(), P: [](const Value *V) { |
3684 | return isa<SelectInst>(Val: V); |
3685 | }); |
3686 | } |
3687 | |
3688 | void SelectionDAGBuilder::visitSelect(const User &I) { |
3689 | SmallVector<EVT, 4> ValueVTs; |
3690 | ComputeValueVTs(TLI: DAG.getTargetLoweringInfo(), DL: DAG.getDataLayout(), Ty: I.getType(), |
3691 | ValueVTs); |
3692 | unsigned NumValues = ValueVTs.size(); |
3693 | if (NumValues == 0) return; |
3694 | |
3695 | SmallVector<SDValue, 4> Values(NumValues); |
3696 | SDValue Cond = getValue(V: I.getOperand(i: 0)); |
3697 | SDValue LHSVal = getValue(V: I.getOperand(i: 1)); |
3698 | SDValue RHSVal = getValue(V: I.getOperand(i: 2)); |
3699 | SmallVector<SDValue, 1> BaseOps(1, Cond); |
3700 | ISD::NodeType OpCode = |
3701 | Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT; |
3702 | |
3703 | bool IsUnaryAbs = false; |
3704 | bool Negate = false; |
3705 | |
3706 | SDNodeFlags Flags; |
3707 | if (auto *FPOp = dyn_cast<FPMathOperator>(Val: &I)) |
3708 | Flags.copyFMF(FPMO: *FPOp); |
3709 | |
3710 | Flags.setUnpredictable( |
3711 | cast<SelectInst>(Val: I).getMetadata(KindID: LLVMContext::MD_unpredictable)); |
3712 | |
3713 | // Min/max matching is only viable if all output VTs are the same. |
3714 | if (all_equal(Range&: ValueVTs)) { |
3715 | EVT VT = ValueVTs[0]; |
3716 | LLVMContext &Ctx = *DAG.getContext(); |
3717 | auto &TLI = DAG.getTargetLoweringInfo(); |
3718 | |
3719 | // We care about the legality of the operation after it has been type |
3720 | // legalized. |
3721 | while (TLI.getTypeAction(Context&: Ctx, VT) != TargetLoweringBase::TypeLegal) |
3722 | VT = TLI.getTypeToTransformTo(Context&: Ctx, VT); |
3723 | |
3724 | // If the vselect is legal, assume we want to leave this as a vector setcc + |
3725 | // vselect. Otherwise, if this is going to be scalarized, we want to see if |
3726 | // min/max is legal on the scalar type. |
3727 | bool UseScalarMinMax = VT.isVector() && |
3728 | !TLI.isOperationLegalOrCustom(Op: ISD::VSELECT, VT); |
3729 | |
3730 | // ValueTracking's select pattern matching does not account for -0.0, |
3731 | // so we can't lower to FMINIMUM/FMAXIMUM because those nodes specify that |
3732 | // -0.0 is less than +0.0. |
3733 | Value *LHS, *RHS; |
3734 | auto SPR = matchSelectPattern(V: const_cast<User*>(&I), LHS, RHS); |
3735 | ISD::NodeType Opc = ISD::DELETED_NODE; |
3736 | switch (SPR.Flavor) { |
3737 | case SPF_UMAX: Opc = ISD::UMAX; break; |
3738 | case SPF_UMIN: Opc = ISD::UMIN; break; |
3739 | case SPF_SMAX: Opc = ISD::SMAX; break; |
3740 | case SPF_SMIN: Opc = ISD::SMIN; break; |
3741 | case SPF_FMINNUM: |
3742 | switch (SPR.NaNBehavior) { |
3743 | case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?" ); |
3744 | case SPNB_RETURNS_NAN: break; |
3745 | case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break; |
3746 | case SPNB_RETURNS_ANY: |
3747 | if (TLI.isOperationLegalOrCustom(Op: ISD::FMINNUM, VT) || |
3748 | (UseScalarMinMax && |
3749 | TLI.isOperationLegalOrCustom(Op: ISD::FMINNUM, VT: VT.getScalarType()))) |
3750 | Opc = ISD::FMINNUM; |
3751 | break; |
3752 | } |
3753 | break; |
3754 | case SPF_FMAXNUM: |
3755 | switch (SPR.NaNBehavior) { |
3756 | case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?" ); |
3757 | case SPNB_RETURNS_NAN: break; |
3758 | case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break; |
3759 | case SPNB_RETURNS_ANY: |
3760 | if (TLI.isOperationLegalOrCustom(Op: ISD::FMAXNUM, VT) || |
3761 | (UseScalarMinMax && |
3762 | TLI.isOperationLegalOrCustom(Op: ISD::FMAXNUM, VT: VT.getScalarType()))) |
3763 | Opc = ISD::FMAXNUM; |
3764 | break; |
3765 | } |
3766 | break; |
3767 | case SPF_NABS: |
3768 | Negate = true; |
3769 | [[fallthrough]]; |
3770 | case SPF_ABS: |
3771 | IsUnaryAbs = true; |
3772 | Opc = ISD::ABS; |
3773 | break; |
3774 | default: break; |
3775 | } |
3776 | |
3777 | if (!IsUnaryAbs && Opc != ISD::DELETED_NODE && |
3778 | (TLI.isOperationLegalOrCustomOrPromote(Op: Opc, VT) || |
3779 | (UseScalarMinMax && |
3780 | TLI.isOperationLegalOrCustom(Op: Opc, VT: VT.getScalarType()))) && |
3781 | // If the underlying comparison instruction is used by any other |
3782 | // instruction, the consumed instructions won't be destroyed, so it is |
3783 | // not profitable to convert to a min/max. |
3784 | hasOnlySelectUsers(Cond: cast<SelectInst>(Val: I).getCondition())) { |
3785 | OpCode = Opc; |
3786 | LHSVal = getValue(V: LHS); |
3787 | RHSVal = getValue(V: RHS); |
3788 | BaseOps.clear(); |
3789 | } |
3790 | |
3791 | if (IsUnaryAbs) { |
3792 | OpCode = Opc; |
3793 | LHSVal = getValue(V: LHS); |
3794 | BaseOps.clear(); |
3795 | } |
3796 | } |
3797 | |
3798 | if (IsUnaryAbs) { |
3799 | for (unsigned i = 0; i != NumValues; ++i) { |
3800 | SDLoc dl = getCurSDLoc(); |
3801 | EVT VT = LHSVal.getNode()->getValueType(ResNo: LHSVal.getResNo() + i); |
3802 | Values[i] = |
3803 | DAG.getNode(Opcode: OpCode, DL: dl, VT, Operand: LHSVal.getValue(R: LHSVal.getResNo() + i)); |
3804 | if (Negate) |
3805 | Values[i] = DAG.getNegative(Val: Values[i], DL: dl, VT); |
3806 | } |
3807 | } else { |
3808 | for (unsigned i = 0; i != NumValues; ++i) { |
3809 | SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end()); |
3810 | Ops.push_back(Elt: SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); |
3811 | Ops.push_back(Elt: SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); |
3812 | Values[i] = DAG.getNode( |
3813 | Opcode: OpCode, DL: getCurSDLoc(), |
3814 | VT: LHSVal.getNode()->getValueType(ResNo: LHSVal.getResNo() + i), Ops, Flags); |
3815 | } |
3816 | } |
3817 | |
3818 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: getCurSDLoc(), |
3819 | VTList: DAG.getVTList(VTs: ValueVTs), Ops: Values)); |
3820 | } |
3821 | |
3822 | void SelectionDAGBuilder::visitTrunc(const User &I) { |
3823 | // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). |
3824 | SDValue N = getValue(V: I.getOperand(i: 0)); |
3825 | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(), |
3826 | Ty: I.getType()); |
3827 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::TRUNCATE, DL: getCurSDLoc(), VT: DestVT, Operand: N)); |
3828 | } |
3829 | |
3830 | void SelectionDAGBuilder::visitZExt(const User &I) { |
3831 | // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). |
3832 | // ZExt also can't be a cast to bool for same reason. So, nothing much to do |
3833 | SDValue N = getValue(V: I.getOperand(i: 0)); |
3834 | auto &TLI = DAG.getTargetLoweringInfo(); |
3835 | EVT DestVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
3836 | |
3837 | SDNodeFlags Flags; |
3838 | if (auto *PNI = dyn_cast<PossiblyNonNegInst>(Val: &I)) |
3839 | Flags.setNonNeg(PNI->hasNonNeg()); |
3840 | |
3841 | // Eagerly use nonneg information to canonicalize towards sign_extend if |
3842 | // that is the target's preference. |
3843 | // TODO: Let the target do this later. |
3844 | if (Flags.hasNonNeg() && |
3845 | TLI.isSExtCheaperThanZExt(FromTy: N.getValueType(), ToTy: DestVT)) { |
3846 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: getCurSDLoc(), VT: DestVT, Operand: N)); |
3847 | return; |
3848 | } |
3849 | |
3850 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: getCurSDLoc(), VT: DestVT, Operand: N, Flags)); |
3851 | } |
3852 | |
3853 | void SelectionDAGBuilder::visitSExt(const User &I) { |
3854 | // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). |
3855 | // SExt also can't be a cast to bool for same reason. So, nothing much to do |
3856 | SDValue N = getValue(V: I.getOperand(i: 0)); |
3857 | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(), |
3858 | Ty: I.getType()); |
3859 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: getCurSDLoc(), VT: DestVT, Operand: N)); |
3860 | } |
3861 | |
3862 | void SelectionDAGBuilder::visitFPTrunc(const User &I) { |
3863 | // FPTrunc is never a no-op cast, no need to check |
3864 | SDValue N = getValue(V: I.getOperand(i: 0)); |
3865 | SDLoc dl = getCurSDLoc(); |
3866 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3867 | EVT DestVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
3868 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: DestVT, N1: N, |
3869 | N2: DAG.getTargetConstant( |
3870 | Val: 0, DL: dl, VT: TLI.getPointerTy(DL: DAG.getDataLayout())))); |
3871 | } |
3872 | |
3873 | void SelectionDAGBuilder::visitFPExt(const User &I) { |
3874 | // FPExt is never a no-op cast, no need to check |
3875 | SDValue N = getValue(V: I.getOperand(i: 0)); |
3876 | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(), |
3877 | Ty: I.getType()); |
3878 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FP_EXTEND, DL: getCurSDLoc(), VT: DestVT, Operand: N)); |
3879 | } |
3880 | |
3881 | void SelectionDAGBuilder::visitFPToUI(const User &I) { |
3882 | // FPToUI is never a no-op cast, no need to check |
3883 | SDValue N = getValue(V: I.getOperand(i: 0)); |
3884 | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(), |
3885 | Ty: I.getType()); |
3886 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FP_TO_UINT, DL: getCurSDLoc(), VT: DestVT, Operand: N)); |
3887 | } |
3888 | |
3889 | void SelectionDAGBuilder::visitFPToSI(const User &I) { |
3890 | // FPToSI is never a no-op cast, no need to check |
3891 | SDValue N = getValue(V: I.getOperand(i: 0)); |
3892 | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(), |
3893 | Ty: I.getType()); |
3894 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: getCurSDLoc(), VT: DestVT, Operand: N)); |
3895 | } |
3896 | |
3897 | void SelectionDAGBuilder::visitUIToFP(const User &I) { |
3898 | // UIToFP is never a no-op cast, no need to check |
3899 | SDValue N = getValue(V: I.getOperand(i: 0)); |
3900 | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(), |
3901 | Ty: I.getType()); |
3902 | SDNodeFlags Flags; |
3903 | if (auto *PNI = dyn_cast<PossiblyNonNegInst>(Val: &I)) |
3904 | Flags.setNonNeg(PNI->hasNonNeg()); |
3905 | |
3906 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::UINT_TO_FP, DL: getCurSDLoc(), VT: DestVT, Operand: N, Flags)); |
3907 | } |
3908 | |
3909 | void SelectionDAGBuilder::visitSIToFP(const User &I) { |
3910 | // SIToFP is never a no-op cast, no need to check |
3911 | SDValue N = getValue(V: I.getOperand(i: 0)); |
3912 | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(), |
3913 | Ty: I.getType()); |
3914 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SINT_TO_FP, DL: getCurSDLoc(), VT: DestVT, Operand: N)); |
3915 | } |
3916 | |
3917 | void SelectionDAGBuilder::visitPtrToInt(const User &I) { |
3918 | // What to do depends on the size of the integer and the size of the pointer. |
3919 | // We can either truncate, zero extend, or no-op, accordingly. |
3920 | SDValue N = getValue(V: I.getOperand(i: 0)); |
3921 | auto &TLI = DAG.getTargetLoweringInfo(); |
3922 | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(), |
3923 | Ty: I.getType()); |
3924 | EVT PtrMemVT = |
3925 | TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: I.getOperand(i: 0)->getType()); |
3926 | N = DAG.getPtrExtOrTrunc(Op: N, DL: getCurSDLoc(), VT: PtrMemVT); |
3927 | N = DAG.getZExtOrTrunc(Op: N, DL: getCurSDLoc(), VT: DestVT); |
3928 | setValue(V: &I, NewN: N); |
3929 | } |
3930 | |
3931 | void SelectionDAGBuilder::visitIntToPtr(const User &I) { |
3932 | // What to do depends on the size of the integer and the size of the pointer. |
3933 | // We can either truncate, zero extend, or no-op, accordingly. |
3934 | SDValue N = getValue(V: I.getOperand(i: 0)); |
3935 | auto &TLI = DAG.getTargetLoweringInfo(); |
3936 | EVT DestVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
3937 | EVT PtrMemVT = TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
3938 | N = DAG.getZExtOrTrunc(Op: N, DL: getCurSDLoc(), VT: PtrMemVT); |
3939 | N = DAG.getPtrExtOrTrunc(Op: N, DL: getCurSDLoc(), VT: DestVT); |
3940 | setValue(V: &I, NewN: N); |
3941 | } |
3942 | |
3943 | void SelectionDAGBuilder::visitBitCast(const User &I) { |
3944 | SDValue N = getValue(V: I.getOperand(i: 0)); |
3945 | SDLoc dl = getCurSDLoc(); |
3946 | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(), |
3947 | Ty: I.getType()); |
3948 | |
3949 | // BitCast assures us that source and destination are the same size so this is |
3950 | // either a BITCAST or a no-op. |
3951 | if (DestVT != N.getValueType()) |
3952 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, |
3953 | VT: DestVT, Operand: N)); // convert types. |
3954 | // Check if the original LLVM IR Operand was a ConstantInt, because getValue() |
3955 | // might fold any kind of constant expression to an integer constant and that |
3956 | // is not what we are looking for. Only recognize a bitcast of a genuine |
3957 | // constant integer as an opaque constant. |
3958 | else if(ConstantInt *C = dyn_cast<ConstantInt>(Val: I.getOperand(i: 0))) |
3959 | setValue(V: &I, NewN: DAG.getConstant(Val: C->getValue(), DL: dl, VT: DestVT, /*isTarget=*/false, |
3960 | /*isOpaque*/true)); |
3961 | else |
3962 | setValue(V: &I, NewN: N); // noop cast. |
3963 | } |
3964 | |
3965 | void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { |
3966 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3967 | const Value *SV = I.getOperand(i: 0); |
3968 | SDValue N = getValue(V: SV); |
3969 | EVT DestVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
3970 | |
3971 | unsigned SrcAS = SV->getType()->getPointerAddressSpace(); |
3972 | unsigned DestAS = I.getType()->getPointerAddressSpace(); |
3973 | |
3974 | if (!TM.isNoopAddrSpaceCast(SrcAS, DestAS)) |
3975 | N = DAG.getAddrSpaceCast(dl: getCurSDLoc(), VT: DestVT, Ptr: N, SrcAS, DestAS); |
3976 | |
3977 | setValue(V: &I, NewN: N); |
3978 | } |
3979 | |
3980 | void SelectionDAGBuilder::visitInsertElement(const User &I) { |
3981 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3982 | SDValue InVec = getValue(V: I.getOperand(i: 0)); |
3983 | SDValue InVal = getValue(V: I.getOperand(i: 1)); |
3984 | SDValue InIdx = DAG.getZExtOrTrunc(Op: getValue(V: I.getOperand(i: 2)), DL: getCurSDLoc(), |
3985 | VT: TLI.getVectorIdxTy(DL: DAG.getDataLayout())); |
3986 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: getCurSDLoc(), |
3987 | VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()), |
3988 | N1: InVec, N2: InVal, N3: InIdx)); |
3989 | } |
3990 | |
3991 | void SelectionDAGBuilder::(const User &I) { |
3992 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3993 | SDValue InVec = getValue(V: I.getOperand(i: 0)); |
3994 | SDValue InIdx = DAG.getZExtOrTrunc(Op: getValue(V: I.getOperand(i: 1)), DL: getCurSDLoc(), |
3995 | VT: TLI.getVectorIdxTy(DL: DAG.getDataLayout())); |
3996 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: getCurSDLoc(), |
3997 | VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()), |
3998 | N1: InVec, N2: InIdx)); |
3999 | } |
4000 | |
4001 | void SelectionDAGBuilder::visitShuffleVector(const User &I) { |
4002 | SDValue Src1 = getValue(V: I.getOperand(i: 0)); |
4003 | SDValue Src2 = getValue(V: I.getOperand(i: 1)); |
4004 | ArrayRef<int> Mask; |
4005 | if (auto *SVI = dyn_cast<ShuffleVectorInst>(Val: &I)) |
4006 | Mask = SVI->getShuffleMask(); |
4007 | else |
4008 | Mask = cast<ConstantExpr>(Val: I).getShuffleMask(); |
4009 | SDLoc DL = getCurSDLoc(); |
4010 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
4011 | EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
4012 | EVT SrcVT = Src1.getValueType(); |
4013 | |
4014 | if (all_of(Range&: Mask, P: [](int Elem) { return Elem == 0; }) && |
4015 | VT.isScalableVector()) { |
4016 | // Canonical splat form of first element of first input vector. |
4017 | SDValue FirstElt = |
4018 | DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: SrcVT.getScalarType(), N1: Src1, |
4019 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
4020 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL, VT, Operand: FirstElt)); |
4021 | return; |
4022 | } |
4023 | |
4024 | // For now, we only handle splats for scalable vectors. |
4025 | // The DAGCombiner will perform a BUILD_VECTOR -> SPLAT_VECTOR transformation |
4026 | // for targets that support a SPLAT_VECTOR for non-scalable vector types. |
4027 | assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle" ); |
4028 | |
4029 | unsigned SrcNumElts = SrcVT.getVectorNumElements(); |
4030 | unsigned MaskNumElts = Mask.size(); |
4031 | |
4032 | if (SrcNumElts == MaskNumElts) { |
4033 | setValue(V: &I, NewN: DAG.getVectorShuffle(VT, dl: DL, N1: Src1, N2: Src2, Mask)); |
4034 | return; |
4035 | } |
4036 | |
4037 | // Normalize the shuffle vector since mask and vector length don't match. |
4038 | if (SrcNumElts < MaskNumElts) { |
4039 | // Mask is longer than the source vectors. We can use concatenate vector to |
4040 | // make the mask and vectors lengths match. |
4041 | |
4042 | if (MaskNumElts % SrcNumElts == 0) { |
4043 | // Mask length is a multiple of the source vector length. |
4044 | // Check if the shuffle is some kind of concatenation of the input |
4045 | // vectors. |
4046 | unsigned NumConcat = MaskNumElts / SrcNumElts; |
4047 | bool IsConcat = true; |
4048 | SmallVector<int, 8> ConcatSrcs(NumConcat, -1); |
4049 | for (unsigned i = 0; i != MaskNumElts; ++i) { |
4050 | int Idx = Mask[i]; |
4051 | if (Idx < 0) |
4052 | continue; |
4053 | // Ensure the indices in each SrcVT sized piece are sequential and that |
4054 | // the same source is used for the whole piece. |
4055 | if ((Idx % SrcNumElts != (i % SrcNumElts)) || |
4056 | (ConcatSrcs[i / SrcNumElts] >= 0 && |
4057 | ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) { |
4058 | IsConcat = false; |
4059 | break; |
4060 | } |
4061 | // Remember which source this index came from. |
4062 | ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; |
4063 | } |
4064 | |
4065 | // The shuffle is concatenating multiple vectors together. Just emit |
4066 | // a CONCAT_VECTORS operation. |
4067 | if (IsConcat) { |
4068 | SmallVector<SDValue, 8> ConcatOps; |
4069 | for (auto Src : ConcatSrcs) { |
4070 | if (Src < 0) |
4071 | ConcatOps.push_back(Elt: DAG.getUNDEF(VT: SrcVT)); |
4072 | else if (Src == 0) |
4073 | ConcatOps.push_back(Elt: Src1); |
4074 | else |
4075 | ConcatOps.push_back(Elt: Src2); |
4076 | } |
4077 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, Ops: ConcatOps)); |
4078 | return; |
4079 | } |
4080 | } |
4081 | |
4082 | unsigned PaddedMaskNumElts = alignTo(Value: MaskNumElts, Align: SrcNumElts); |
4083 | unsigned NumConcat = PaddedMaskNumElts / SrcNumElts; |
4084 | EVT PaddedVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: VT.getScalarType(), |
4085 | NumElements: PaddedMaskNumElts); |
4086 | |
4087 | // Pad both vectors with undefs to make them the same length as the mask. |
4088 | SDValue UndefVal = DAG.getUNDEF(VT: SrcVT); |
4089 | |
4090 | SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal); |
4091 | SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal); |
4092 | MOps1[0] = Src1; |
4093 | MOps2[0] = Src2; |
4094 | |
4095 | Src1 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: PaddedVT, Ops: MOps1); |
4096 | Src2 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: PaddedVT, Ops: MOps2); |
4097 | |
4098 | // Readjust mask for new input vector length. |
4099 | SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1); |
4100 | for (unsigned i = 0; i != MaskNumElts; ++i) { |
4101 | int Idx = Mask[i]; |
4102 | if (Idx >= (int)SrcNumElts) |
4103 | Idx -= SrcNumElts - PaddedMaskNumElts; |
4104 | MappedOps[i] = Idx; |
4105 | } |
4106 | |
4107 | SDValue Result = DAG.getVectorShuffle(VT: PaddedVT, dl: DL, N1: Src1, N2: Src2, Mask: MappedOps); |
4108 | |
4109 | // If the concatenated vector was padded, extract a subvector with the |
4110 | // correct number of elements. |
4111 | if (MaskNumElts != PaddedMaskNumElts) |
4112 | Result = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: Result, |
4113 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
4114 | |
4115 | setValue(V: &I, NewN: Result); |
4116 | return; |
4117 | } |
4118 | |
4119 | if (SrcNumElts > MaskNumElts) { |
4120 | // Analyze the access pattern of the vector to see if we can extract |
4121 | // two subvectors and do the shuffle. |
4122 | int StartIdx[2] = { -1, -1 }; // StartIdx to extract from |
4123 | bool = true; |
4124 | for (int Idx : Mask) { |
4125 | unsigned Input = 0; |
4126 | if (Idx < 0) |
4127 | continue; |
4128 | |
4129 | if (Idx >= (int)SrcNumElts) { |
4130 | Input = 1; |
4131 | Idx -= SrcNumElts; |
4132 | } |
4133 | |
4134 | // If all the indices come from the same MaskNumElts sized portion of |
4135 | // the sources we can use extract. Also make sure the extract wouldn't |
4136 | // extract past the end of the source. |
4137 | int NewStartIdx = alignDown(Value: Idx, Align: MaskNumElts); |
4138 | if (NewStartIdx + MaskNumElts > SrcNumElts || |
4139 | (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx)) |
4140 | CanExtract = false; |
4141 | // Make sure we always update StartIdx as we use it to track if all |
4142 | // elements are undef. |
4143 | StartIdx[Input] = NewStartIdx; |
4144 | } |
4145 | |
4146 | if (StartIdx[0] < 0 && StartIdx[1] < 0) { |
4147 | setValue(V: &I, NewN: DAG.getUNDEF(VT)); // Vectors are not used. |
4148 | return; |
4149 | } |
4150 | if (CanExtract) { |
4151 | // Extract appropriate subvector and generate a vector shuffle |
4152 | for (unsigned Input = 0; Input < 2; ++Input) { |
4153 | SDValue &Src = Input == 0 ? Src1 : Src2; |
4154 | if (StartIdx[Input] < 0) |
4155 | Src = DAG.getUNDEF(VT); |
4156 | else { |
4157 | Src = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT, N1: Src, |
4158 | N2: DAG.getVectorIdxConstant(Val: StartIdx[Input], DL)); |
4159 | } |
4160 | } |
4161 | |
4162 | // Calculate new mask. |
4163 | SmallVector<int, 8> MappedOps(Mask); |
4164 | for (int &Idx : MappedOps) { |
4165 | if (Idx >= (int)SrcNumElts) |
4166 | Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; |
4167 | else if (Idx >= 0) |
4168 | Idx -= StartIdx[0]; |
4169 | } |
4170 | |
4171 | setValue(V: &I, NewN: DAG.getVectorShuffle(VT, dl: DL, N1: Src1, N2: Src2, Mask: MappedOps)); |
4172 | return; |
4173 | } |
4174 | } |
4175 | |
4176 | // We can't use either concat vectors or extract subvectors so fall back to |
4177 | // replacing the shuffle with extract and build vector. |
4178 | // to insert and build vector. |
4179 | EVT EltVT = VT.getVectorElementType(); |
4180 | SmallVector<SDValue,8> Ops; |
4181 | for (int Idx : Mask) { |
4182 | SDValue Res; |
4183 | |
4184 | if (Idx < 0) { |
4185 | Res = DAG.getUNDEF(VT: EltVT); |
4186 | } else { |
4187 | SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; |
4188 | if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; |
4189 | |
4190 | Res = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: Src, |
4191 | N2: DAG.getVectorIdxConstant(Val: Idx, DL)); |
4192 | } |
4193 | |
4194 | Ops.push_back(Elt: Res); |
4195 | } |
4196 | |
4197 | setValue(V: &I, NewN: DAG.getBuildVector(VT, DL, Ops)); |
4198 | } |
4199 | |
4200 | void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { |
4201 | ArrayRef<unsigned> Indices = I.getIndices(); |
4202 | const Value *Op0 = I.getOperand(i_nocapture: 0); |
4203 | const Value *Op1 = I.getOperand(i_nocapture: 1); |
4204 | Type *AggTy = I.getType(); |
4205 | Type *ValTy = Op1->getType(); |
4206 | bool IntoUndef = isa<UndefValue>(Val: Op0); |
4207 | bool FromUndef = isa<UndefValue>(Val: Op1); |
4208 | |
4209 | unsigned LinearIndex = ComputeLinearIndex(Ty: AggTy, Indices); |
4210 | |
4211 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
4212 | SmallVector<EVT, 4> AggValueVTs; |
4213 | ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: AggTy, ValueVTs&: AggValueVTs); |
4214 | SmallVector<EVT, 4> ValValueVTs; |
4215 | ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: ValTy, ValueVTs&: ValValueVTs); |
4216 | |
4217 | unsigned NumAggValues = AggValueVTs.size(); |
4218 | unsigned NumValValues = ValValueVTs.size(); |
4219 | SmallVector<SDValue, 4> Values(NumAggValues); |
4220 | |
4221 | // Ignore an insertvalue that produces an empty object |
4222 | if (!NumAggValues) { |
4223 | setValue(V: &I, NewN: DAG.getUNDEF(VT: MVT(MVT::Other))); |
4224 | return; |
4225 | } |
4226 | |
4227 | SDValue Agg = getValue(V: Op0); |
4228 | unsigned i = 0; |
4229 | // Copy the beginning value(s) from the original aggregate. |
4230 | for (; i != LinearIndex; ++i) |
4231 | Values[i] = IntoUndef ? DAG.getUNDEF(VT: AggValueVTs[i]) : |
4232 | SDValue(Agg.getNode(), Agg.getResNo() + i); |
4233 | // Copy values from the inserted value(s). |
4234 | if (NumValValues) { |
4235 | SDValue Val = getValue(V: Op1); |
4236 | for (; i != LinearIndex + NumValValues; ++i) |
4237 | Values[i] = FromUndef ? DAG.getUNDEF(VT: AggValueVTs[i]) : |
4238 | SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); |
4239 | } |
4240 | // Copy remaining value(s) from the original aggregate. |
4241 | for (; i != NumAggValues; ++i) |
4242 | Values[i] = IntoUndef ? DAG.getUNDEF(VT: AggValueVTs[i]) : |
4243 | SDValue(Agg.getNode(), Agg.getResNo() + i); |
4244 | |
4245 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: getCurSDLoc(), |
4246 | VTList: DAG.getVTList(VTs: AggValueVTs), Ops: Values)); |
4247 | } |
4248 | |
4249 | void SelectionDAGBuilder::(const ExtractValueInst &I) { |
4250 | ArrayRef<unsigned> Indices = I.getIndices(); |
4251 | const Value *Op0 = I.getOperand(i_nocapture: 0); |
4252 | Type *AggTy = Op0->getType(); |
4253 | Type *ValTy = I.getType(); |
4254 | bool OutOfUndef = isa<UndefValue>(Val: Op0); |
4255 | |
4256 | unsigned LinearIndex = ComputeLinearIndex(Ty: AggTy, Indices); |
4257 | |
4258 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
4259 | SmallVector<EVT, 4> ValValueVTs; |
4260 | ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: ValTy, ValueVTs&: ValValueVTs); |
4261 | |
4262 | unsigned NumValValues = ValValueVTs.size(); |
4263 | |
4264 | // Ignore a extractvalue that produces an empty object |
4265 | if (!NumValValues) { |
4266 | setValue(V: &I, NewN: DAG.getUNDEF(VT: MVT(MVT::Other))); |
4267 | return; |
4268 | } |
4269 | |
4270 | SmallVector<SDValue, 4> Values(NumValValues); |
4271 | |
4272 | SDValue Agg = getValue(V: Op0); |
4273 | // Copy out the selected value(s). |
4274 | for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i) |
4275 | Values[i - LinearIndex] = |
4276 | OutOfUndef ? |
4277 | DAG.getUNDEF(VT: Agg.getNode()->getValueType(ResNo: Agg.getResNo() + i)) : |
4278 | SDValue(Agg.getNode(), Agg.getResNo() + i); |
4279 | |
4280 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: getCurSDLoc(), |
4281 | VTList: DAG.getVTList(VTs: ValValueVTs), Ops: Values)); |
4282 | } |
4283 | |
4284 | void SelectionDAGBuilder::visitGetElementPtr(const User &I) { |
4285 | Value *Op0 = I.getOperand(i: 0); |
4286 | // Note that the pointer operand may be a vector of pointers. Take the scalar |
4287 | // element which holds a pointer. |
4288 | unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace(); |
4289 | SDValue N = getValue(V: Op0); |
4290 | SDLoc dl = getCurSDLoc(); |
4291 | auto &TLI = DAG.getTargetLoweringInfo(); |
4292 | |
4293 | // Normalize Vector GEP - all scalar operands should be converted to the |
4294 | // splat vector. |
4295 | bool IsVectorGEP = I.getType()->isVectorTy(); |
4296 | ElementCount VectorElementCount = |
4297 | IsVectorGEP ? cast<VectorType>(Val: I.getType())->getElementCount() |
4298 | : ElementCount::getFixed(MinVal: 0); |
4299 | |
4300 | if (IsVectorGEP && !N.getValueType().isVector()) { |
4301 | LLVMContext &Context = *DAG.getContext(); |
4302 | EVT VT = EVT::getVectorVT(Context, VT: N.getValueType(), EC: VectorElementCount); |
4303 | N = DAG.getSplat(VT, DL: dl, Op: N); |
4304 | } |
4305 | |
4306 | for (gep_type_iterator GTI = gep_type_begin(GEP: &I), E = gep_type_end(GEP: &I); |
4307 | GTI != E; ++GTI) { |
4308 | const Value *Idx = GTI.getOperand(); |
4309 | if (StructType *StTy = GTI.getStructTypeOrNull()) { |
4310 | unsigned Field = cast<Constant>(Val: Idx)->getUniqueInteger().getZExtValue(); |
4311 | if (Field) { |
4312 | // N = N + Offset |
4313 | uint64_t Offset = |
4314 | DAG.getDataLayout().getStructLayout(Ty: StTy)->getElementOffset(Idx: Field); |
4315 | |
4316 | // In an inbounds GEP with an offset that is nonnegative even when |
4317 | // interpreted as signed, assume there is no unsigned overflow. |
4318 | SDNodeFlags Flags; |
4319 | if (int64_t(Offset) >= 0 && cast<GEPOperator>(Val: I).isInBounds()) |
4320 | Flags.setNoUnsignedWrap(true); |
4321 | |
4322 | N = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: N.getValueType(), N1: N, |
4323 | N2: DAG.getConstant(Val: Offset, DL: dl, VT: N.getValueType()), Flags); |
4324 | } |
4325 | } else { |
4326 | // IdxSize is the width of the arithmetic according to IR semantics. |
4327 | // In SelectionDAG, we may prefer to do arithmetic in a wider bitwidth |
4328 | // (and fix up the result later). |
4329 | unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS); |
4330 | MVT IdxTy = MVT::getIntegerVT(BitWidth: IdxSize); |
4331 | TypeSize ElementSize = |
4332 | GTI.getSequentialElementStride(DL: DAG.getDataLayout()); |
4333 | // We intentionally mask away the high bits here; ElementSize may not |
4334 | // fit in IdxTy. |
4335 | APInt ElementMul(IdxSize, ElementSize.getKnownMinValue()); |
4336 | bool ElementScalable = ElementSize.isScalable(); |
4337 | |
4338 | // If this is a scalar constant or a splat vector of constants, |
4339 | // handle it quickly. |
4340 | const auto *C = dyn_cast<Constant>(Val: Idx); |
4341 | if (C && isa<VectorType>(Val: C->getType())) |
4342 | C = C->getSplatValue(); |
4343 | |
4344 | const auto *CI = dyn_cast_or_null<ConstantInt>(Val: C); |
4345 | if (CI && CI->isZero()) |
4346 | continue; |
4347 | if (CI && !ElementScalable) { |
4348 | APInt Offs = ElementMul * CI->getValue().sextOrTrunc(width: IdxSize); |
4349 | LLVMContext &Context = *DAG.getContext(); |
4350 | SDValue OffsVal; |
4351 | if (IsVectorGEP) |
4352 | OffsVal = DAG.getConstant( |
4353 | Val: Offs, DL: dl, VT: EVT::getVectorVT(Context, VT: IdxTy, EC: VectorElementCount)); |
4354 | else |
4355 | OffsVal = DAG.getConstant(Val: Offs, DL: dl, VT: IdxTy); |
4356 | |
4357 | // In an inbounds GEP with an offset that is nonnegative even when |
4358 | // interpreted as signed, assume there is no unsigned overflow. |
4359 | SDNodeFlags Flags; |
4360 | if (Offs.isNonNegative() && cast<GEPOperator>(Val: I).isInBounds()) |
4361 | Flags.setNoUnsignedWrap(true); |
4362 | |
4363 | OffsVal = DAG.getSExtOrTrunc(Op: OffsVal, DL: dl, VT: N.getValueType()); |
4364 | |
4365 | N = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: N.getValueType(), N1: N, N2: OffsVal, Flags); |
4366 | continue; |
4367 | } |
4368 | |
4369 | // N = N + Idx * ElementMul; |
4370 | SDValue IdxN = getValue(V: Idx); |
4371 | |
4372 | if (!IdxN.getValueType().isVector() && IsVectorGEP) { |
4373 | EVT VT = EVT::getVectorVT(Context&: *Context, VT: IdxN.getValueType(), |
4374 | EC: VectorElementCount); |
4375 | IdxN = DAG.getSplat(VT, DL: dl, Op: IdxN); |
4376 | } |
4377 | |
4378 | // If the index is smaller or larger than intptr_t, truncate or extend |
4379 | // it. |
4380 | IdxN = DAG.getSExtOrTrunc(Op: IdxN, DL: dl, VT: N.getValueType()); |
4381 | |
4382 | if (ElementScalable) { |
4383 | EVT VScaleTy = N.getValueType().getScalarType(); |
4384 | SDValue VScale = DAG.getNode( |
4385 | Opcode: ISD::VSCALE, DL: dl, VT: VScaleTy, |
4386 | Operand: DAG.getConstant(Val: ElementMul.getZExtValue(), DL: dl, VT: VScaleTy)); |
4387 | if (IsVectorGEP) |
4388 | VScale = DAG.getSplatVector(VT: N.getValueType(), DL: dl, Op: VScale); |
4389 | IdxN = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: N.getValueType(), N1: IdxN, N2: VScale); |
4390 | } else { |
4391 | // If this is a multiply by a power of two, turn it into a shl |
4392 | // immediately. This is a very common case. |
4393 | if (ElementMul != 1) { |
4394 | if (ElementMul.isPowerOf2()) { |
4395 | unsigned Amt = ElementMul.logBase2(); |
4396 | IdxN = DAG.getNode(Opcode: ISD::SHL, DL: dl, |
4397 | VT: N.getValueType(), N1: IdxN, |
4398 | N2: DAG.getConstant(Val: Amt, DL: dl, VT: IdxN.getValueType())); |
4399 | } else { |
4400 | SDValue Scale = DAG.getConstant(Val: ElementMul.getZExtValue(), DL: dl, |
4401 | VT: IdxN.getValueType()); |
4402 | IdxN = DAG.getNode(Opcode: ISD::MUL, DL: dl, |
4403 | VT: N.getValueType(), N1: IdxN, N2: Scale); |
4404 | } |
4405 | } |
4406 | } |
4407 | |
4408 | N = DAG.getNode(Opcode: ISD::ADD, DL: dl, |
4409 | VT: N.getValueType(), N1: N, N2: IdxN); |
4410 | } |
4411 | } |
4412 | |
4413 | MVT PtrTy = TLI.getPointerTy(DL: DAG.getDataLayout(), AS); |
4414 | MVT PtrMemTy = TLI.getPointerMemTy(DL: DAG.getDataLayout(), AS); |
4415 | if (IsVectorGEP) { |
4416 | PtrTy = MVT::getVectorVT(VT: PtrTy, EC: VectorElementCount); |
4417 | PtrMemTy = MVT::getVectorVT(VT: PtrMemTy, EC: VectorElementCount); |
4418 | } |
4419 | |
4420 | if (PtrMemTy != PtrTy && !cast<GEPOperator>(Val: I).isInBounds()) |
4421 | N = DAG.getPtrExtendInReg(Op: N, DL: dl, VT: PtrMemTy); |
4422 | |
4423 | setValue(V: &I, NewN: N); |
4424 | } |
4425 | |
4426 | void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { |
4427 | // If this is a fixed sized alloca in the entry block of the function, |
4428 | // allocate it statically on the stack. |
4429 | if (FuncInfo.StaticAllocaMap.count(Val: &I)) |
4430 | return; // getValue will auto-populate this. |
4431 | |
4432 | SDLoc dl = getCurSDLoc(); |
4433 | Type *Ty = I.getAllocatedType(); |
4434 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
4435 | auto &DL = DAG.getDataLayout(); |
4436 | TypeSize TySize = DL.getTypeAllocSize(Ty); |
4437 | MaybeAlign Alignment = std::max(a: DL.getPrefTypeAlign(Ty), b: I.getAlign()); |
4438 | |
4439 | SDValue AllocSize = getValue(V: I.getArraySize()); |
4440 | |
4441 | EVT IntPtr = TLI.getPointerTy(DL, AS: I.getAddressSpace()); |
4442 | if (AllocSize.getValueType() != IntPtr) |
4443 | AllocSize = DAG.getZExtOrTrunc(Op: AllocSize, DL: dl, VT: IntPtr); |
4444 | |
4445 | if (TySize.isScalable()) |
4446 | AllocSize = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IntPtr, N1: AllocSize, |
4447 | N2: DAG.getVScale(DL: dl, VT: IntPtr, |
4448 | MulImm: APInt(IntPtr.getScalarSizeInBits(), |
4449 | TySize.getKnownMinValue()))); |
4450 | else { |
4451 | SDValue TySizeValue = |
4452 | DAG.getConstant(Val: TySize.getFixedValue(), DL: dl, VT: MVT::getIntegerVT(BitWidth: 64)); |
4453 | AllocSize = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IntPtr, N1: AllocSize, |
4454 | N2: DAG.getZExtOrTrunc(Op: TySizeValue, DL: dl, VT: IntPtr)); |
4455 | } |
4456 | |
4457 | // Handle alignment. If the requested alignment is less than or equal to |
4458 | // the stack alignment, ignore it. If the size is greater than or equal to |
4459 | // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. |
4460 | Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign(); |
4461 | if (*Alignment <= StackAlign) |
4462 | Alignment = std::nullopt; |
4463 | |
4464 | const uint64_t StackAlignMask = StackAlign.value() - 1U; |
4465 | // Round the size of the allocation up to the stack alignment size |
4466 | // by add SA-1 to the size. This doesn't overflow because we're computing |
4467 | // an address inside an alloca. |
4468 | SDNodeFlags Flags; |
4469 | Flags.setNoUnsignedWrap(true); |
4470 | AllocSize = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AllocSize.getValueType(), N1: AllocSize, |
4471 | N2: DAG.getConstant(Val: StackAlignMask, DL: dl, VT: IntPtr), Flags); |
4472 | |
4473 | // Mask out the low bits for alignment purposes. |
4474 | AllocSize = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: AllocSize.getValueType(), N1: AllocSize, |
4475 | N2: DAG.getConstant(Val: ~StackAlignMask, DL: dl, VT: IntPtr)); |
4476 | |
4477 | SDValue Ops[] = { |
4478 | getRoot(), AllocSize, |
4479 | DAG.getConstant(Val: Alignment ? Alignment->value() : 0, DL: dl, VT: IntPtr)}; |
4480 | SDVTList VTs = DAG.getVTList(VT1: AllocSize.getValueType(), VT2: MVT::Other); |
4481 | SDValue DSA = DAG.getNode(Opcode: ISD::DYNAMIC_STACKALLOC, DL: dl, VTList: VTs, Ops); |
4482 | setValue(V: &I, NewN: DSA); |
4483 | DAG.setRoot(DSA.getValue(R: 1)); |
4484 | |
4485 | assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects()); |
4486 | } |
4487 | |
4488 | static const MDNode *getRangeMetadata(const Instruction &I) { |
4489 | // If !noundef is not present, then !range violation results in a poison |
4490 | // value rather than immediate undefined behavior. In theory, transferring |
4491 | // these annotations to SDAG is fine, but in practice there are key SDAG |
4492 | // transforms that are known not to be poison-safe, such as folding logical |
4493 | // and/or to bitwise and/or. For now, only transfer !range if !noundef is |
4494 | // also present. |
4495 | if (!I.hasMetadata(KindID: LLVMContext::MD_noundef)) |
4496 | return nullptr; |
4497 | return I.getMetadata(KindID: LLVMContext::MD_range); |
4498 | } |
4499 | |
4500 | static std::optional<ConstantRange> getRange(const Instruction &I) { |
4501 | if (const auto *CB = dyn_cast<CallBase>(Val: &I)) { |
4502 | // see comment in getRangeMetadata about this check |
4503 | if (CB->hasRetAttr(Kind: Attribute::NoUndef)) |
4504 | return CB->getRange(); |
4505 | } |
4506 | if (const MDNode *Range = getRangeMetadata(I)) |
4507 | return getConstantRangeFromMetadata(RangeMD: *Range); |
4508 | return std::nullopt; |
4509 | } |
4510 | |
4511 | void SelectionDAGBuilder::visitLoad(const LoadInst &I) { |
4512 | if (I.isAtomic()) |
4513 | return visitAtomicLoad(I); |
4514 | |
4515 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
4516 | const Value *SV = I.getOperand(i_nocapture: 0); |
4517 | if (TLI.supportSwiftError()) { |
4518 | // Swifterror values can come from either a function parameter with |
4519 | // swifterror attribute or an alloca with swifterror attribute. |
4520 | if (const Argument *Arg = dyn_cast<Argument>(Val: SV)) { |
4521 | if (Arg->hasSwiftErrorAttr()) |
4522 | return visitLoadFromSwiftError(I); |
4523 | } |
4524 | |
4525 | if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: SV)) { |
4526 | if (Alloca->isSwiftError()) |
4527 | return visitLoadFromSwiftError(I); |
4528 | } |
4529 | } |
4530 | |
4531 | SDValue Ptr = getValue(V: SV); |
4532 | |
4533 | Type *Ty = I.getType(); |
4534 | SmallVector<EVT, 4> ValueVTs, MemVTs; |
4535 | SmallVector<TypeSize, 4> Offsets; |
4536 | ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty, ValueVTs, MemVTs: &MemVTs, Offsets: &Offsets); |
4537 | unsigned NumValues = ValueVTs.size(); |
4538 | if (NumValues == 0) |
4539 | return; |
4540 | |
4541 | Align Alignment = I.getAlign(); |
4542 | AAMDNodes AAInfo = I.getAAMetadata(); |
4543 | const MDNode *Ranges = getRangeMetadata(I); |
4544 | bool isVolatile = I.isVolatile(); |
4545 | MachineMemOperand::Flags MMOFlags = |
4546 | TLI.getLoadMemOperandFlags(LI: I, DL: DAG.getDataLayout(), AC, LibInfo); |
4547 | |
4548 | SDValue Root; |
4549 | bool ConstantMemory = false; |
4550 | if (isVolatile) |
4551 | // Serialize volatile loads with other side effects. |
4552 | Root = getRoot(); |
4553 | else if (NumValues > MaxParallelChains) |
4554 | Root = getMemoryRoot(); |
4555 | else if (AA && |
4556 | AA->pointsToConstantMemory(Loc: MemoryLocation( |
4557 | SV, |
4558 | LocationSize::precise(Value: DAG.getDataLayout().getTypeStoreSize(Ty)), |
4559 | AAInfo))) { |
4560 | // Do not serialize (non-volatile) loads of constant memory with anything. |
4561 | Root = DAG.getEntryNode(); |
4562 | ConstantMemory = true; |
4563 | MMOFlags |= MachineMemOperand::MOInvariant; |
4564 | } else { |
4565 | // Do not serialize non-volatile loads against each other. |
4566 | Root = DAG.getRoot(); |
4567 | } |
4568 | |
4569 | SDLoc dl = getCurSDLoc(); |
4570 | |
4571 | if (isVolatile) |
4572 | Root = TLI.prepareVolatileOrAtomicLoad(Chain: Root, DL: dl, DAG); |
4573 | |
4574 | SmallVector<SDValue, 4> Values(NumValues); |
4575 | SmallVector<SDValue, 4> Chains(std::min(a: MaxParallelChains, b: NumValues)); |
4576 | |
4577 | unsigned ChainI = 0; |
4578 | for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { |
4579 | // Serializing loads here may result in excessive register pressure, and |
4580 | // TokenFactor places arbitrary choke points on the scheduler. SD scheduling |
4581 | // could recover a bit by hoisting nodes upward in the chain by recognizing |
4582 | // they are side-effect free or do not alias. The optimizer should really |
4583 | // avoid this case by converting large object/array copies to llvm.memcpy |
4584 | // (MaxParallelChains should always remain as failsafe). |
4585 | if (ChainI == MaxParallelChains) { |
4586 | assert(PendingLoads.empty() && "PendingLoads must be serialized first" ); |
4587 | SDValue Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, |
4588 | Ops: ArrayRef(Chains.data(), ChainI)); |
4589 | Root = Chain; |
4590 | ChainI = 0; |
4591 | } |
4592 | |
4593 | // TODO: MachinePointerInfo only supports a fixed length offset. |
4594 | MachinePointerInfo PtrInfo = |
4595 | !Offsets[i].isScalable() || Offsets[i].isZero() |
4596 | ? MachinePointerInfo(SV, Offsets[i].getKnownMinValue()) |
4597 | : MachinePointerInfo(); |
4598 | |
4599 | SDValue A = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: Offsets[i]); |
4600 | SDValue L = DAG.getLoad(VT: MemVTs[i], dl, Chain: Root, Ptr: A, PtrInfo, Alignment, |
4601 | MMOFlags, AAInfo, Ranges); |
4602 | Chains[ChainI] = L.getValue(R: 1); |
4603 | |
4604 | if (MemVTs[i] != ValueVTs[i]) |
4605 | L = DAG.getPtrExtOrTrunc(Op: L, DL: dl, VT: ValueVTs[i]); |
4606 | |
4607 | Values[i] = L; |
4608 | } |
4609 | |
4610 | if (!ConstantMemory) { |
4611 | SDValue Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, |
4612 | Ops: ArrayRef(Chains.data(), ChainI)); |
4613 | if (isVolatile) |
4614 | DAG.setRoot(Chain); |
4615 | else |
4616 | PendingLoads.push_back(Elt: Chain); |
4617 | } |
4618 | |
4619 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: dl, |
4620 | VTList: DAG.getVTList(VTs: ValueVTs), Ops: Values)); |
4621 | } |
4622 | |
4623 | void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { |
4624 | assert(DAG.getTargetLoweringInfo().supportSwiftError() && |
4625 | "call visitStoreToSwiftError when backend supports swifterror" ); |
4626 | |
4627 | SmallVector<EVT, 4> ValueVTs; |
4628 | SmallVector<uint64_t, 4> Offsets; |
4629 | const Value *SrcV = I.getOperand(i_nocapture: 0); |
4630 | ComputeValueVTs(TLI: DAG.getTargetLoweringInfo(), DL: DAG.getDataLayout(), |
4631 | Ty: SrcV->getType(), ValueVTs, FixedOffsets: &Offsets, StartingOffset: 0); |
4632 | assert(ValueVTs.size() == 1 && Offsets[0] == 0 && |
4633 | "expect a single EVT for swifterror" ); |
4634 | |
4635 | SDValue Src = getValue(V: SrcV); |
4636 | // Create a virtual register, then update the virtual register. |
4637 | Register VReg = |
4638 | SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand()); |
4639 | // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue |
4640 | // Chain can be getRoot or getControlRoot. |
4641 | SDValue CopyNode = DAG.getCopyToReg(Chain: getRoot(), dl: getCurSDLoc(), Reg: VReg, |
4642 | N: SDValue(Src.getNode(), Src.getResNo())); |
4643 | DAG.setRoot(CopyNode); |
4644 | } |
4645 | |
4646 | void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { |
4647 | assert(DAG.getTargetLoweringInfo().supportSwiftError() && |
4648 | "call visitLoadFromSwiftError when backend supports swifterror" ); |
4649 | |
4650 | assert(!I.isVolatile() && |
4651 | !I.hasMetadata(LLVMContext::MD_nontemporal) && |
4652 | !I.hasMetadata(LLVMContext::MD_invariant_load) && |
4653 | "Support volatile, non temporal, invariant for load_from_swift_error" ); |
4654 | |
4655 | const Value *SV = I.getOperand(i_nocapture: 0); |
4656 | Type *Ty = I.getType(); |
4657 | assert( |
4658 | (!AA || |
4659 | !AA->pointsToConstantMemory(MemoryLocation( |
4660 | SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)), |
4661 | I.getAAMetadata()))) && |
4662 | "load_from_swift_error should not be constant memory" ); |
4663 | |
4664 | SmallVector<EVT, 4> ValueVTs; |
4665 | SmallVector<uint64_t, 4> Offsets; |
4666 | ComputeValueVTs(TLI: DAG.getTargetLoweringInfo(), DL: DAG.getDataLayout(), Ty, |
4667 | ValueVTs, FixedOffsets: &Offsets, StartingOffset: 0); |
4668 | assert(ValueVTs.size() == 1 && Offsets[0] == 0 && |
4669 | "expect a single EVT for swifterror" ); |
4670 | |
4671 | // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT |
4672 | SDValue L = DAG.getCopyFromReg( |
4673 | Chain: getRoot(), dl: getCurSDLoc(), |
4674 | Reg: SwiftError.getOrCreateVRegUseAt(&I, FuncInfo.MBB, SV), VT: ValueVTs[0]); |
4675 | |
4676 | setValue(V: &I, NewN: L); |
4677 | } |
4678 | |
4679 | void SelectionDAGBuilder::visitStore(const StoreInst &I) { |
4680 | if (I.isAtomic()) |
4681 | return visitAtomicStore(I); |
4682 | |
4683 | const Value *SrcV = I.getOperand(i_nocapture: 0); |
4684 | const Value *PtrV = I.getOperand(i_nocapture: 1); |
4685 | |
4686 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
4687 | if (TLI.supportSwiftError()) { |
4688 | // Swifterror values can come from either a function parameter with |
4689 | // swifterror attribute or an alloca with swifterror attribute. |
4690 | if (const Argument *Arg = dyn_cast<Argument>(Val: PtrV)) { |
4691 | if (Arg->hasSwiftErrorAttr()) |
4692 | return visitStoreToSwiftError(I); |
4693 | } |
4694 | |
4695 | if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: PtrV)) { |
4696 | if (Alloca->isSwiftError()) |
4697 | return visitStoreToSwiftError(I); |
4698 | } |
4699 | } |
4700 | |
4701 | SmallVector<EVT, 4> ValueVTs, MemVTs; |
4702 | SmallVector<TypeSize, 4> Offsets; |
4703 | ComputeValueVTs(TLI: DAG.getTargetLoweringInfo(), DL: DAG.getDataLayout(), |
4704 | Ty: SrcV->getType(), ValueVTs, MemVTs: &MemVTs, Offsets: &Offsets); |
4705 | unsigned NumValues = ValueVTs.size(); |
4706 | if (NumValues == 0) |
4707 | return; |
4708 | |
4709 | // Get the lowered operands. Note that we do this after |
4710 | // checking if NumResults is zero, because with zero results |
4711 | // the operands won't have values in the map. |
4712 | SDValue Src = getValue(V: SrcV); |
4713 | SDValue Ptr = getValue(V: PtrV); |
4714 | |
4715 | SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot(); |
4716 | SmallVector<SDValue, 4> Chains(std::min(a: MaxParallelChains, b: NumValues)); |
4717 | SDLoc dl = getCurSDLoc(); |
4718 | Align Alignment = I.getAlign(); |
4719 | AAMDNodes AAInfo = I.getAAMetadata(); |
4720 | |
4721 | auto MMOFlags = TLI.getStoreMemOperandFlags(SI: I, DL: DAG.getDataLayout()); |
4722 | |
4723 | unsigned ChainI = 0; |
4724 | for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { |
4725 | // See visitLoad comments. |
4726 | if (ChainI == MaxParallelChains) { |
4727 | SDValue Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, |
4728 | Ops: ArrayRef(Chains.data(), ChainI)); |
4729 | Root = Chain; |
4730 | ChainI = 0; |
4731 | } |
4732 | |
4733 | // TODO: MachinePointerInfo only supports a fixed length offset. |
4734 | MachinePointerInfo PtrInfo = |
4735 | !Offsets[i].isScalable() || Offsets[i].isZero() |
4736 | ? MachinePointerInfo(PtrV, Offsets[i].getKnownMinValue()) |
4737 | : MachinePointerInfo(); |
4738 | |
4739 | SDValue Add = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: Offsets[i]); |
4740 | SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i); |
4741 | if (MemVTs[i] != ValueVTs[i]) |
4742 | Val = DAG.getPtrExtOrTrunc(Op: Val, DL: dl, VT: MemVTs[i]); |
4743 | SDValue St = |
4744 | DAG.getStore(Chain: Root, dl, Val, Ptr: Add, PtrInfo, Alignment, MMOFlags, AAInfo); |
4745 | Chains[ChainI] = St; |
4746 | } |
4747 | |
4748 | SDValue StoreNode = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, |
4749 | Ops: ArrayRef(Chains.data(), ChainI)); |
4750 | setValue(V: &I, NewN: StoreNode); |
4751 | DAG.setRoot(StoreNode); |
4752 | } |
4753 | |
4754 | void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, |
4755 | bool IsCompressing) { |
4756 | SDLoc sdl = getCurSDLoc(); |
4757 | |
4758 | auto getMaskedStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, |
4759 | Align &Alignment) { |
4760 | // llvm.masked.store.*(Src0, Ptr, alignment, Mask) |
4761 | Src0 = I.getArgOperand(i: 0); |
4762 | Ptr = I.getArgOperand(i: 1); |
4763 | Alignment = cast<ConstantInt>(Val: I.getArgOperand(i: 2))->getAlignValue(); |
4764 | Mask = I.getArgOperand(i: 3); |
4765 | }; |
4766 | auto getCompressingStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, |
4767 | Align &Alignment) { |
4768 | // llvm.masked.compressstore.*(Src0, Ptr, Mask) |
4769 | Src0 = I.getArgOperand(i: 0); |
4770 | Ptr = I.getArgOperand(i: 1); |
4771 | Mask = I.getArgOperand(i: 2); |
4772 | Alignment = I.getParamAlign(ArgNo: 1).valueOrOne(); |
4773 | }; |
4774 | |
4775 | Value *PtrOperand, *MaskOperand, *Src0Operand; |
4776 | Align Alignment; |
4777 | if (IsCompressing) |
4778 | getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment); |
4779 | else |
4780 | getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment); |
4781 | |
4782 | SDValue Ptr = getValue(V: PtrOperand); |
4783 | SDValue Src0 = getValue(V: Src0Operand); |
4784 | SDValue Mask = getValue(V: MaskOperand); |
4785 | SDValue Offset = DAG.getUNDEF(VT: Ptr.getValueType()); |
4786 | |
4787 | EVT VT = Src0.getValueType(); |
4788 | |
4789 | auto MMOFlags = MachineMemOperand::MOStore; |
4790 | if (I.hasMetadata(KindID: LLVMContext::MD_nontemporal)) |
4791 | MMOFlags |= MachineMemOperand::MONonTemporal; |
4792 | |
4793 | MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( |
4794 | PtrInfo: MachinePointerInfo(PtrOperand), F: MMOFlags, |
4795 | Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: Alignment, AAInfo: I.getAAMetadata()); |
4796 | |
4797 | const auto &TLI = DAG.getTargetLoweringInfo(); |
4798 | const auto &TTI = |
4799 | TLI.getTargetMachine().getTargetTransformInfo(F: *I.getFunction()); |
4800 | SDValue StoreNode = |
4801 | !IsCompressing && |
4802 | TTI.hasConditionalLoadStoreForType(Ty: I.getArgOperand(i: 0)->getType()) |
4803 | ? TLI.visitMaskedStore(DAG, DL: sdl, Chain: getMemoryRoot(), MMO, Ptr, Val: Src0, |
4804 | Mask) |
4805 | : DAG.getMaskedStore(Chain: getMemoryRoot(), dl: sdl, Val: Src0, Base: Ptr, Offset, Mask, |
4806 | MemVT: VT, MMO, AM: ISD::UNINDEXED, /*Truncating=*/IsTruncating: false, |
4807 | IsCompressing); |
4808 | DAG.setRoot(StoreNode); |
4809 | setValue(V: &I, NewN: StoreNode); |
4810 | } |
4811 | |
4812 | // Get a uniform base for the Gather/Scatter intrinsic. |
4813 | // The first argument of the Gather/Scatter intrinsic is a vector of pointers. |
4814 | // We try to represent it as a base pointer + vector of indices. |
4815 | // Usually, the vector of pointers comes from a 'getelementptr' instruction. |
4816 | // The first operand of the GEP may be a single pointer or a vector of pointers |
4817 | // Example: |
4818 | // %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind |
4819 | // or |
4820 | // %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind |
4821 | // %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, .. |
4822 | // |
4823 | // When the first GEP operand is a single pointer - it is the uniform base we |
4824 | // are looking for. If first operand of the GEP is a splat vector - we |
4825 | // extract the splat value and use it as a uniform base. |
4826 | // In all other cases the function returns 'false'. |
4827 | static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index, |
4828 | ISD::MemIndexType &IndexType, SDValue &Scale, |
4829 | SelectionDAGBuilder *SDB, const BasicBlock *CurBB, |
4830 | uint64_t ElemSize) { |
4831 | SelectionDAG& DAG = SDB->DAG; |
4832 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
4833 | const DataLayout &DL = DAG.getDataLayout(); |
4834 | |
4835 | assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type" ); |
4836 | |
4837 | // Handle splat constant pointer. |
4838 | if (auto *C = dyn_cast<Constant>(Val: Ptr)) { |
4839 | C = C->getSplatValue(); |
4840 | if (!C) |
4841 | return false; |
4842 | |
4843 | Base = SDB->getValue(V: C); |
4844 | |
4845 | ElementCount NumElts = cast<VectorType>(Val: Ptr->getType())->getElementCount(); |
4846 | EVT VT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: TLI.getPointerTy(DL), EC: NumElts); |
4847 | Index = DAG.getConstant(Val: 0, DL: SDB->getCurSDLoc(), VT); |
4848 | IndexType = ISD::SIGNED_SCALED; |
4849 | Scale = DAG.getTargetConstant(Val: 1, DL: SDB->getCurSDLoc(), VT: TLI.getPointerTy(DL)); |
4850 | return true; |
4851 | } |
4852 | |
4853 | const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Val: Ptr); |
4854 | if (!GEP || GEP->getParent() != CurBB) |
4855 | return false; |
4856 | |
4857 | if (GEP->getNumOperands() != 2) |
4858 | return false; |
4859 | |
4860 | const Value *BasePtr = GEP->getPointerOperand(); |
4861 | const Value *IndexVal = GEP->getOperand(i_nocapture: GEP->getNumOperands() - 1); |
4862 | |
4863 | // Make sure the base is scalar and the index is a vector. |
4864 | if (BasePtr->getType()->isVectorTy() || !IndexVal->getType()->isVectorTy()) |
4865 | return false; |
4866 | |
4867 | TypeSize ScaleVal = DL.getTypeAllocSize(Ty: GEP->getResultElementType()); |
4868 | if (ScaleVal.isScalable()) |
4869 | return false; |
4870 | |
4871 | // Target may not support the required addressing mode. |
4872 | if (ScaleVal != 1 && |
4873 | !TLI.isLegalScaleForGatherScatter(Scale: ScaleVal.getFixedValue(), ElemSize)) |
4874 | return false; |
4875 | |
4876 | Base = SDB->getValue(V: BasePtr); |
4877 | Index = SDB->getValue(V: IndexVal); |
4878 | IndexType = ISD::SIGNED_SCALED; |
4879 | |
4880 | Scale = |
4881 | DAG.getTargetConstant(Val: ScaleVal, DL: SDB->getCurSDLoc(), VT: TLI.getPointerTy(DL)); |
4882 | return true; |
4883 | } |
4884 | |
4885 | void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { |
4886 | SDLoc sdl = getCurSDLoc(); |
4887 | |
4888 | // llvm.masked.scatter.*(Src0, Ptrs, alignment, Mask) |
4889 | const Value *Ptr = I.getArgOperand(i: 1); |
4890 | SDValue Src0 = getValue(V: I.getArgOperand(i: 0)); |
4891 | SDValue Mask = getValue(V: I.getArgOperand(i: 3)); |
4892 | EVT VT = Src0.getValueType(); |
4893 | Align Alignment = cast<ConstantInt>(Val: I.getArgOperand(i: 2)) |
4894 | ->getMaybeAlignValue() |
4895 | .value_or(u: DAG.getEVTAlign(MemoryVT: VT.getScalarType())); |
4896 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
4897 | |
4898 | SDValue Base; |
4899 | SDValue Index; |
4900 | ISD::MemIndexType IndexType; |
4901 | SDValue Scale; |
4902 | bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, SDB: this, |
4903 | CurBB: I.getParent(), ElemSize: VT.getScalarStoreSize()); |
4904 | |
4905 | unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace(); |
4906 | MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( |
4907 | PtrInfo: MachinePointerInfo(AS), F: MachineMemOperand::MOStore, |
4908 | Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: Alignment, AAInfo: I.getAAMetadata()); |
4909 | if (!UniformBase) { |
4910 | Base = DAG.getConstant(Val: 0, DL: sdl, VT: TLI.getPointerTy(DL: DAG.getDataLayout())); |
4911 | Index = getValue(V: Ptr); |
4912 | IndexType = ISD::SIGNED_SCALED; |
4913 | Scale = DAG.getTargetConstant(Val: 1, DL: sdl, VT: TLI.getPointerTy(DL: DAG.getDataLayout())); |
4914 | } |
4915 | |
4916 | EVT IdxVT = Index.getValueType(); |
4917 | EVT EltTy = IdxVT.getVectorElementType(); |
4918 | if (TLI.shouldExtendGSIndex(VT: IdxVT, EltTy)) { |
4919 | EVT NewIdxVT = IdxVT.changeVectorElementType(EltVT: EltTy); |
4920 | Index = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: sdl, VT: NewIdxVT, Operand: Index); |
4921 | } |
4922 | |
4923 | SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale }; |
4924 | SDValue Scatter = DAG.getMaskedScatter(VTs: DAG.getVTList(VT: MVT::Other), MemVT: VT, dl: sdl, |
4925 | Ops, MMO, IndexType, IsTruncating: false); |
4926 | DAG.setRoot(Scatter); |
4927 | setValue(V: &I, NewN: Scatter); |
4928 | } |
4929 | |
4930 | void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { |
4931 | SDLoc sdl = getCurSDLoc(); |
4932 | |
4933 | auto getMaskedLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, |
4934 | Align &Alignment) { |
4935 | // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) |
4936 | Ptr = I.getArgOperand(i: 0); |
4937 | Alignment = cast<ConstantInt>(Val: I.getArgOperand(i: 1))->getAlignValue(); |
4938 | Mask = I.getArgOperand(i: 2); |
4939 | Src0 = I.getArgOperand(i: 3); |
4940 | }; |
4941 | auto getExpandingLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, |
4942 | Align &Alignment) { |
4943 | // @llvm.masked.expandload.*(Ptr, Mask, Src0) |
4944 | Ptr = I.getArgOperand(i: 0); |
4945 | Alignment = I.getParamAlign(ArgNo: 0).valueOrOne(); |
4946 | Mask = I.getArgOperand(i: 1); |
4947 | Src0 = I.getArgOperand(i: 2); |
4948 | }; |
4949 | |
4950 | Value *PtrOperand, *MaskOperand, *Src0Operand; |
4951 | Align Alignment; |
4952 | if (IsExpanding) |
4953 | getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment); |
4954 | else |
4955 | getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment); |
4956 | |
4957 | SDValue Ptr = getValue(V: PtrOperand); |
4958 | SDValue Src0 = getValue(V: Src0Operand); |
4959 | SDValue Mask = getValue(V: MaskOperand); |
4960 | SDValue Offset = DAG.getUNDEF(VT: Ptr.getValueType()); |
4961 | |
4962 | EVT VT = Src0.getValueType(); |
4963 | AAMDNodes AAInfo = I.getAAMetadata(); |
4964 | const MDNode *Ranges = getRangeMetadata(I); |
4965 | |
4966 | // Do not serialize masked loads of constant memory with anything. |
4967 | MemoryLocation ML = MemoryLocation::getAfter(Ptr: PtrOperand, AATags: AAInfo); |
4968 | bool AddToChain = !AA || !AA->pointsToConstantMemory(Loc: ML); |
4969 | |
4970 | SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); |
4971 | |
4972 | auto MMOFlags = MachineMemOperand::MOLoad; |
4973 | if (I.hasMetadata(KindID: LLVMContext::MD_nontemporal)) |
4974 | MMOFlags |= MachineMemOperand::MONonTemporal; |
4975 | |
4976 | MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( |
4977 | PtrInfo: MachinePointerInfo(PtrOperand), F: MMOFlags, |
4978 | Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: Alignment, AAInfo, Ranges); |
4979 | |
4980 | const auto &TLI = DAG.getTargetLoweringInfo(); |
4981 | const auto &TTI = |
4982 | TLI.getTargetMachine().getTargetTransformInfo(F: *I.getFunction()); |
4983 | // The Load/Res may point to different values and both of them are output |
4984 | // variables. |
4985 | SDValue Load; |
4986 | SDValue Res; |
4987 | if (!IsExpanding && |
4988 | TTI.hasConditionalLoadStoreForType(Ty: Src0Operand->getType())) |
4989 | Res = TLI.visitMaskedLoad(DAG, DL: sdl, Chain: InChain, MMO, NewLoad&: Load, Ptr, PassThru: Src0, Mask); |
4990 | else |
4991 | Res = Load = |
4992 | DAG.getMaskedLoad(VT, dl: sdl, Chain: InChain, Base: Ptr, Offset, Mask, Src0, MemVT: VT, MMO, |
4993 | AM: ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding); |
4994 | if (AddToChain) |
4995 | PendingLoads.push_back(Elt: Load.getValue(R: 1)); |
4996 | setValue(V: &I, NewN: Res); |
4997 | } |
4998 | |
4999 | void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { |
5000 | SDLoc sdl = getCurSDLoc(); |
5001 | |
5002 | // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0) |
5003 | const Value *Ptr = I.getArgOperand(i: 0); |
5004 | SDValue Src0 = getValue(V: I.getArgOperand(i: 3)); |
5005 | SDValue Mask = getValue(V: I.getArgOperand(i: 2)); |
5006 | |
5007 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
5008 | EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
5009 | Align Alignment = cast<ConstantInt>(Val: I.getArgOperand(i: 1)) |
5010 | ->getMaybeAlignValue() |
5011 | .value_or(u: DAG.getEVTAlign(MemoryVT: VT.getScalarType())); |
5012 | |
5013 | const MDNode *Ranges = getRangeMetadata(I); |
5014 | |
5015 | SDValue Root = DAG.getRoot(); |
5016 | SDValue Base; |
5017 | SDValue Index; |
5018 | ISD::MemIndexType IndexType; |
5019 | SDValue Scale; |
5020 | bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, SDB: this, |
5021 | CurBB: I.getParent(), ElemSize: VT.getScalarStoreSize()); |
5022 | unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace(); |
5023 | MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( |
5024 | PtrInfo: MachinePointerInfo(AS), F: MachineMemOperand::MOLoad, |
5025 | Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: Alignment, AAInfo: I.getAAMetadata(), |
5026 | Ranges); |
5027 | |
5028 | if (!UniformBase) { |
5029 | Base = DAG.getConstant(Val: 0, DL: sdl, VT: TLI.getPointerTy(DL: DAG.getDataLayout())); |
5030 | Index = getValue(V: Ptr); |
5031 | IndexType = ISD::SIGNED_SCALED; |
5032 | Scale = DAG.getTargetConstant(Val: 1, DL: sdl, VT: TLI.getPointerTy(DL: DAG.getDataLayout())); |
5033 | } |
5034 | |
5035 | EVT IdxVT = Index.getValueType(); |
5036 | EVT EltTy = IdxVT.getVectorElementType(); |
5037 | if (TLI.shouldExtendGSIndex(VT: IdxVT, EltTy)) { |
5038 | EVT NewIdxVT = IdxVT.changeVectorElementType(EltVT: EltTy); |
5039 | Index = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: sdl, VT: NewIdxVT, Operand: Index); |
5040 | } |
5041 | |
5042 | SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale }; |
5043 | SDValue Gather = DAG.getMaskedGather(VTs: DAG.getVTList(VT1: VT, VT2: MVT::Other), MemVT: VT, dl: sdl, |
5044 | Ops, MMO, IndexType, ExtTy: ISD::NON_EXTLOAD); |
5045 | |
5046 | PendingLoads.push_back(Elt: Gather.getValue(R: 1)); |
5047 | setValue(V: &I, NewN: Gather); |
5048 | } |
5049 | |
5050 | void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { |
5051 | SDLoc dl = getCurSDLoc(); |
5052 | AtomicOrdering SuccessOrdering = I.getSuccessOrdering(); |
5053 | AtomicOrdering FailureOrdering = I.getFailureOrdering(); |
5054 | SyncScope::ID SSID = I.getSyncScopeID(); |
5055 | |
5056 | SDValue InChain = getRoot(); |
5057 | |
5058 | MVT MemVT = getValue(V: I.getCompareOperand()).getSimpleValueType(); |
5059 | SDVTList VTs = DAG.getVTList(VT1: MemVT, VT2: MVT::i1, VT3: MVT::Other); |
5060 | |
5061 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
5062 | auto Flags = TLI.getAtomicMemOperandFlags(AI: I, DL: DAG.getDataLayout()); |
5063 | |
5064 | MachineFunction &MF = DAG.getMachineFunction(); |
5065 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
5066 | PtrInfo: MachinePointerInfo(I.getPointerOperand()), F: Flags, |
5067 | Size: LocationSize::precise(Value: MemVT.getStoreSize()), BaseAlignment: DAG.getEVTAlign(MemoryVT: MemVT), |
5068 | AAInfo: AAMDNodes(), Ranges: nullptr, SSID, Ordering: SuccessOrdering, FailureOrdering); |
5069 | |
5070 | SDValue L = DAG.getAtomicCmpSwap(Opcode: ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, |
5071 | dl, MemVT, VTs, Chain: InChain, |
5072 | Ptr: getValue(V: I.getPointerOperand()), |
5073 | Cmp: getValue(V: I.getCompareOperand()), |
5074 | Swp: getValue(V: I.getNewValOperand()), MMO); |
5075 | |
5076 | SDValue OutChain = L.getValue(R: 2); |
5077 | |
5078 | setValue(V: &I, NewN: L); |
5079 | DAG.setRoot(OutChain); |
5080 | } |
5081 | |
5082 | void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { |
5083 | SDLoc dl = getCurSDLoc(); |
5084 | ISD::NodeType NT; |
5085 | switch (I.getOperation()) { |
5086 | default: llvm_unreachable("Unknown atomicrmw operation" ); |
5087 | case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break; |
5088 | case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break; |
5089 | case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break; |
5090 | case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break; |
5091 | case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break; |
5092 | case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break; |
5093 | case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break; |
5094 | case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break; |
5095 | case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break; |
5096 | case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break; |
5097 | case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; |
5098 | case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break; |
5099 | case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break; |
5100 | case AtomicRMWInst::FMax: NT = ISD::ATOMIC_LOAD_FMAX; break; |
5101 | case AtomicRMWInst::FMin: NT = ISD::ATOMIC_LOAD_FMIN; break; |
5102 | case AtomicRMWInst::UIncWrap: |
5103 | NT = ISD::ATOMIC_LOAD_UINC_WRAP; |
5104 | break; |
5105 | case AtomicRMWInst::UDecWrap: |
5106 | NT = ISD::ATOMIC_LOAD_UDEC_WRAP; |
5107 | break; |
5108 | } |
5109 | AtomicOrdering Ordering = I.getOrdering(); |
5110 | SyncScope::ID SSID = I.getSyncScopeID(); |
5111 | |
5112 | SDValue InChain = getRoot(); |
5113 | |
5114 | auto MemVT = getValue(V: I.getValOperand()).getSimpleValueType(); |
5115 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
5116 | auto Flags = TLI.getAtomicMemOperandFlags(AI: I, DL: DAG.getDataLayout()); |
5117 | |
5118 | MachineFunction &MF = DAG.getMachineFunction(); |
5119 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
5120 | PtrInfo: MachinePointerInfo(I.getPointerOperand()), F: Flags, |
5121 | Size: LocationSize::precise(Value: MemVT.getStoreSize()), BaseAlignment: DAG.getEVTAlign(MemoryVT: MemVT), |
5122 | AAInfo: AAMDNodes(), Ranges: nullptr, SSID, Ordering); |
5123 | |
5124 | SDValue L = |
5125 | DAG.getAtomic(Opcode: NT, dl, MemVT, Chain: InChain, |
5126 | Ptr: getValue(V: I.getPointerOperand()), Val: getValue(V: I.getValOperand()), |
5127 | MMO); |
5128 | |
5129 | SDValue OutChain = L.getValue(R: 1); |
5130 | |
5131 | setValue(V: &I, NewN: L); |
5132 | DAG.setRoot(OutChain); |
5133 | } |
5134 | |
5135 | void SelectionDAGBuilder::visitFence(const FenceInst &I) { |
5136 | SDLoc dl = getCurSDLoc(); |
5137 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
5138 | SDValue Ops[3]; |
5139 | Ops[0] = getRoot(); |
5140 | Ops[1] = DAG.getTargetConstant(Val: (unsigned)I.getOrdering(), DL: dl, |
5141 | VT: TLI.getFenceOperandTy(DL: DAG.getDataLayout())); |
5142 | Ops[2] = DAG.getTargetConstant(Val: I.getSyncScopeID(), DL: dl, |
5143 | VT: TLI.getFenceOperandTy(DL: DAG.getDataLayout())); |
5144 | SDValue N = DAG.getNode(Opcode: ISD::ATOMIC_FENCE, DL: dl, VT: MVT::Other, Ops); |
5145 | setValue(V: &I, NewN: N); |
5146 | DAG.setRoot(N); |
5147 | } |
5148 | |
5149 | void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { |
5150 | SDLoc dl = getCurSDLoc(); |
5151 | AtomicOrdering Order = I.getOrdering(); |
5152 | SyncScope::ID SSID = I.getSyncScopeID(); |
5153 | |
5154 | SDValue InChain = getRoot(); |
5155 | |
5156 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
5157 | EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
5158 | EVT MemVT = TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
5159 | |
5160 | if (!TLI.supportsUnalignedAtomics() && |
5161 | I.getAlign().value() < MemVT.getSizeInBits() / 8) |
5162 | report_fatal_error(reason: "Cannot generate unaligned atomic load" ); |
5163 | |
5164 | auto Flags = TLI.getLoadMemOperandFlags(LI: I, DL: DAG.getDataLayout(), AC, LibInfo); |
5165 | |
5166 | MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( |
5167 | PtrInfo: MachinePointerInfo(I.getPointerOperand()), F: Flags, |
5168 | Size: LocationSize::precise(Value: MemVT.getStoreSize()), BaseAlignment: I.getAlign(), AAInfo: AAMDNodes(), |
5169 | Ranges: nullptr, SSID, Ordering: Order); |
5170 | |
5171 | InChain = TLI.prepareVolatileOrAtomicLoad(Chain: InChain, DL: dl, DAG); |
5172 | |
5173 | SDValue Ptr = getValue(V: I.getPointerOperand()); |
5174 | SDValue L = DAG.getAtomic(Opcode: ISD::ATOMIC_LOAD, dl, MemVT, VT: MemVT, Chain: InChain, |
5175 | Ptr, MMO); |
5176 | |
5177 | SDValue OutChain = L.getValue(R: 1); |
5178 | if (MemVT != VT) |
5179 | L = DAG.getPtrExtOrTrunc(Op: L, DL: dl, VT); |
5180 | |
5181 | setValue(V: &I, NewN: L); |
5182 | DAG.setRoot(OutChain); |
5183 | } |
5184 | |
5185 | void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { |
5186 | SDLoc dl = getCurSDLoc(); |
5187 | |
5188 | AtomicOrdering Ordering = I.getOrdering(); |
5189 | SyncScope::ID SSID = I.getSyncScopeID(); |
5190 | |
5191 | SDValue InChain = getRoot(); |
5192 | |
5193 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
5194 | EVT MemVT = |
5195 | TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: I.getValueOperand()->getType()); |
5196 | |
5197 | if (!TLI.supportsUnalignedAtomics() && |
5198 | I.getAlign().value() < MemVT.getSizeInBits() / 8) |
5199 | report_fatal_error(reason: "Cannot generate unaligned atomic store" ); |
5200 | |
5201 | auto Flags = TLI.getStoreMemOperandFlags(SI: I, DL: DAG.getDataLayout()); |
5202 | |
5203 | MachineFunction &MF = DAG.getMachineFunction(); |
5204 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
5205 | PtrInfo: MachinePointerInfo(I.getPointerOperand()), F: Flags, |
5206 | Size: LocationSize::precise(Value: MemVT.getStoreSize()), BaseAlignment: I.getAlign(), AAInfo: AAMDNodes(), |
5207 | Ranges: nullptr, SSID, Ordering); |
5208 | |
5209 | SDValue Val = getValue(V: I.getValueOperand()); |
5210 | if (Val.getValueType() != MemVT) |
5211 | Val = DAG.getPtrExtOrTrunc(Op: Val, DL: dl, VT: MemVT); |
5212 | SDValue Ptr = getValue(V: I.getPointerOperand()); |
5213 | |
5214 | SDValue OutChain = |
5215 | DAG.getAtomic(Opcode: ISD::ATOMIC_STORE, dl, MemVT, Chain: InChain, Ptr: Val, Val: Ptr, MMO); |
5216 | |
5217 | setValue(V: &I, NewN: OutChain); |
5218 | DAG.setRoot(OutChain); |
5219 | } |
5220 | |
5221 | /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC |
5222 | /// node. |
5223 | void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, |
5224 | unsigned Intrinsic) { |
5225 | // Ignore the callsite's attributes. A specific call site may be marked with |
5226 | // readnone, but the lowering code will expect the chain based on the |
5227 | // definition. |
5228 | const Function *F = I.getCalledFunction(); |
5229 | bool HasChain = !F->doesNotAccessMemory(); |
5230 | bool OnlyLoad = HasChain && F->onlyReadsMemory(); |
5231 | |
5232 | // Build the operand list. |
5233 | SmallVector<SDValue, 8> Ops; |
5234 | if (HasChain) { // If this intrinsic has side-effects, chainify it. |
5235 | if (OnlyLoad) { |
5236 | // We don't need to serialize loads against other loads. |
5237 | Ops.push_back(Elt: DAG.getRoot()); |
5238 | } else { |
5239 | Ops.push_back(Elt: getRoot()); |
5240 | } |
5241 | } |
5242 | |
5243 | // Info is set by getTgtMemIntrinsic |
5244 | TargetLowering::IntrinsicInfo Info; |
5245 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
5246 | bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, |
5247 | DAG.getMachineFunction(), |
5248 | Intrinsic); |
5249 | |
5250 | // Add the intrinsic ID as an integer operand if it's not a target intrinsic. |
5251 | if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || |
5252 | Info.opc == ISD::INTRINSIC_W_CHAIN) |
5253 | Ops.push_back(Elt: DAG.getTargetConstant(Val: Intrinsic, DL: getCurSDLoc(), |
5254 | VT: TLI.getPointerTy(DL: DAG.getDataLayout()))); |
5255 | |
5256 | // Add all operands of the call to the operand list. |
5257 | for (unsigned i = 0, e = I.arg_size(); i != e; ++i) { |
5258 | const Value *Arg = I.getArgOperand(i); |
5259 | if (!I.paramHasAttr(ArgNo: i, Kind: Attribute::ImmArg)) { |
5260 | Ops.push_back(Elt: getValue(V: Arg)); |
5261 | continue; |
5262 | } |
5263 | |
5264 | // Use TargetConstant instead of a regular constant for immarg. |
5265 | EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: Arg->getType(), AllowUnknown: true); |
5266 | if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Arg)) { |
5267 | assert(CI->getBitWidth() <= 64 && |
5268 | "large intrinsic immediates not handled" ); |
5269 | Ops.push_back(Elt: DAG.getTargetConstant(Val: *CI, DL: SDLoc(), VT)); |
5270 | } else { |
5271 | Ops.push_back( |
5272 | Elt: DAG.getTargetConstantFP(Val: *cast<ConstantFP>(Val: Arg), DL: SDLoc(), VT)); |
5273 | } |
5274 | } |
5275 | |
5276 | SmallVector<EVT, 4> ValueVTs; |
5277 | ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: I.getType(), ValueVTs); |
5278 | |
5279 | if (HasChain) |
5280 | ValueVTs.push_back(Elt: MVT::Other); |
5281 | |
5282 | SDVTList VTs = DAG.getVTList(VTs: ValueVTs); |
5283 | |
5284 | // Propagate fast-math-flags from IR to node(s). |
5285 | SDNodeFlags Flags; |
5286 | if (auto *FPMO = dyn_cast<FPMathOperator>(Val: &I)) |
5287 | Flags.copyFMF(FPMO: *FPMO); |
5288 | SelectionDAG::FlagInserter FlagsInserter(DAG, Flags); |
5289 | |
5290 | // Create the node. |
5291 | SDValue Result; |
5292 | |
5293 | if (auto Bundle = I.getOperandBundle(ID: LLVMContext::OB_convergencectrl)) { |
5294 | auto *Token = Bundle->Inputs[0].get(); |
5295 | SDValue ConvControlToken = getValue(V: Token); |
5296 | assert(Ops.back().getValueType() != MVT::Glue && |
5297 | "Did not expected another glue node here." ); |
5298 | ConvControlToken = |
5299 | DAG.getNode(Opcode: ISD::CONVERGENCECTRL_GLUE, DL: {}, VT: MVT::Glue, Operand: ConvControlToken); |
5300 | Ops.push_back(Elt: ConvControlToken); |
5301 | } |
5302 | |
5303 | // In some cases, custom collection of operands from CallInst I may be needed. |
5304 | TLI.CollectTargetIntrinsicOperands(I, Ops, DAG); |
5305 | if (IsTgtIntrinsic) { |
5306 | // This is target intrinsic that touches memory |
5307 | // |
5308 | // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic |
5309 | // didn't yield anything useful. |
5310 | MachinePointerInfo MPI; |
5311 | if (Info.ptrVal) |
5312 | MPI = MachinePointerInfo(Info.ptrVal, Info.offset); |
5313 | else if (Info.fallbackAddressSpace) |
5314 | MPI = MachinePointerInfo(*Info.fallbackAddressSpace); |
5315 | Result = DAG.getMemIntrinsicNode(Opcode: Info.opc, dl: getCurSDLoc(), VTList: VTs, Ops, |
5316 | MemVT: Info.memVT, PtrInfo: MPI, Alignment: Info.align, Flags: Info.flags, |
5317 | Size: Info.size, AAInfo: I.getAAMetadata()); |
5318 | } else if (!HasChain) { |
5319 | Result = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: getCurSDLoc(), VTList: VTs, Ops); |
5320 | } else if (!I.getType()->isVoidTy()) { |
5321 | Result = DAG.getNode(Opcode: ISD::INTRINSIC_W_CHAIN, DL: getCurSDLoc(), VTList: VTs, Ops); |
5322 | } else { |
5323 | Result = DAG.getNode(Opcode: ISD::INTRINSIC_VOID, DL: getCurSDLoc(), VTList: VTs, Ops); |
5324 | } |
5325 | |
5326 | if (HasChain) { |
5327 | SDValue Chain = Result.getValue(R: Result.getNode()->getNumValues()-1); |
5328 | if (OnlyLoad) |
5329 | PendingLoads.push_back(Elt: Chain); |
5330 | else |
5331 | DAG.setRoot(Chain); |
5332 | } |
5333 | |
5334 | if (!I.getType()->isVoidTy()) { |
5335 | if (!isa<VectorType>(Val: I.getType())) |
5336 | Result = lowerRangeToAssertZExt(DAG, I, Op: Result); |
5337 | |
5338 | MaybeAlign Alignment = I.getRetAlign(); |
5339 | |
5340 | // Insert `assertalign` node if there's an alignment. |
5341 | if (InsertAssertAlign && Alignment) { |
5342 | Result = |
5343 | DAG.getAssertAlign(DL: getCurSDLoc(), V: Result, A: Alignment.valueOrOne()); |
5344 | } |
5345 | } |
5346 | |
5347 | setValue(V: &I, NewN: Result); |
5348 | } |
5349 | |
5350 | /// GetSignificand - Get the significand and build it into a floating-point |
5351 | /// number with exponent of 1: |
5352 | /// |
5353 | /// Op = (Op & 0x007fffff) | 0x3f800000; |
5354 | /// |
5355 | /// where Op is the hexadecimal representation of floating point value. |
5356 | static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) { |
5357 | SDValue t1 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: Op, |
5358 | N2: DAG.getConstant(Val: 0x007fffff, DL: dl, VT: MVT::i32)); |
5359 | SDValue t2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i32, N1: t1, |
5360 | N2: DAG.getConstant(Val: 0x3f800000, DL: dl, VT: MVT::i32)); |
5361 | return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::f32, Operand: t2); |
5362 | } |
5363 | |
5364 | /// GetExponent - Get the exponent: |
5365 | /// |
5366 | /// (float)(int)(((Op & 0x7f800000) >> 23) - 127); |
5367 | /// |
5368 | /// where Op is the hexadecimal representation of floating point value. |
5369 | static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, |
5370 | const TargetLowering &TLI, const SDLoc &dl) { |
5371 | SDValue t0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: Op, |
5372 | N2: DAG.getConstant(Val: 0x7f800000, DL: dl, VT: MVT::i32)); |
5373 | SDValue t1 = DAG.getNode( |
5374 | Opcode: ISD::SRL, DL: dl, VT: MVT::i32, N1: t0, |
5375 | N2: DAG.getConstant(Val: 23, DL: dl, |
5376 | VT: TLI.getShiftAmountTy(LHSTy: MVT::i32, DL: DAG.getDataLayout()))); |
5377 | SDValue t2 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32, N1: t1, |
5378 | N2: DAG.getConstant(Val: 127, DL: dl, VT: MVT::i32)); |
5379 | return DAG.getNode(Opcode: ISD::SINT_TO_FP, DL: dl, VT: MVT::f32, Operand: t2); |
5380 | } |
5381 | |
5382 | /// getF32Constant - Get 32-bit floating point constant. |
5383 | static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt, |
5384 | const SDLoc &dl) { |
5385 | return DAG.getConstantFP(Val: APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), DL: dl, |
5386 | VT: MVT::f32); |
5387 | } |
5388 | |
5389 | static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl, |
5390 | SelectionDAG &DAG) { |
5391 | // TODO: What fast-math-flags should be set on the floating-point nodes? |
5392 | |
5393 | // IntegerPartOfX = ((int32_t)(t0); |
5394 | SDValue IntegerPartOfX = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: MVT::i32, Operand: t0); |
5395 | |
5396 | // FractionalPartOfX = t0 - (float)IntegerPartOfX; |
5397 | SDValue t1 = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL: dl, VT: MVT::f32, Operand: IntegerPartOfX); |
5398 | SDValue X = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::f32, N1: t0, N2: t1); |
5399 | |
5400 | // IntegerPartOfX <<= 23; |
5401 | IntegerPartOfX = |
5402 | DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i32, N1: IntegerPartOfX, |
5403 | N2: DAG.getConstant(Val: 23, DL: dl, |
5404 | VT: DAG.getTargetLoweringInfo().getShiftAmountTy( |
5405 | LHSTy: MVT::i32, DL: DAG.getDataLayout()))); |
5406 | |
5407 | SDValue TwoToFractionalPartOfX; |
5408 | if (LimitFloatPrecision <= 6) { |
5409 | // For floating-point precision of 6: |
5410 | // |
5411 | // TwoToFractionalPartOfX = |
5412 | // 0.997535578f + |
5413 | // (0.735607626f + 0.252464424f * x) * x; |
5414 | // |
5415 | // error 0.0144103317, which is 6 bits |
5416 | SDValue t2 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: X, |
5417 | N2: getF32Constant(DAG, Flt: 0x3e814304, dl)); |
5418 | SDValue t3 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t2, |
5419 | N2: getF32Constant(DAG, Flt: 0x3f3c50c8, dl)); |
5420 | SDValue t4 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t3, N2: X); |
5421 | TwoToFractionalPartOfX = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t4, |
5422 | N2: getF32Constant(DAG, Flt: 0x3f7f5e7e, dl)); |
5423 | } else if (LimitFloatPrecision <= 12) { |
5424 | // For floating-point precision of 12: |
5425 | // |
5426 | // TwoToFractionalPartOfX = |
5427 | // 0.999892986f + |
5428 | // (0.696457318f + |
5429 | // (0.224338339f + 0.792043434e-1f * x) * x) * x; |
5430 | // |
5431 | // error 0.000107046256, which is 13 to 14 bits |
5432 | SDValue t2 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: X, |
5433 | N2: getF32Constant(DAG, Flt: 0x3da235e3, dl)); |
5434 | SDValue t3 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t2, |
5435 | N2: getF32Constant(DAG, Flt: 0x3e65b8f3, dl)); |
5436 | SDValue t4 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t3, N2: X); |
5437 | SDValue t5 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t4, |
5438 | N2: getF32Constant(DAG, Flt: 0x3f324b07, dl)); |
5439 | SDValue t6 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t5, N2: X); |
5440 | TwoToFractionalPartOfX = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t6, |
5441 | N2: getF32Constant(DAG, Flt: 0x3f7ff8fd, dl)); |
5442 | } else { // LimitFloatPrecision <= 18 |
5443 | // For floating-point precision of 18: |
5444 | // |
5445 | // TwoToFractionalPartOfX = |
5446 | // 0.999999982f + |
5447 | // (0.693148872f + |
5448 | // (0.240227044f + |
5449 | // (0.554906021e-1f + |
5450 | // (0.961591928e-2f + |
5451 | // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; |
5452 | // error 2.47208000*10^(-7), which is better than 18 bits |
5453 | SDValue t2 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: X, |
5454 | N2: getF32Constant(DAG, Flt: 0x3924b03e, dl)); |
5455 | SDValue t3 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t2, |
5456 | N2: getF32Constant(DAG, Flt: 0x3ab24b87, dl)); |
5457 | SDValue t4 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t3, N2: X); |
5458 | SDValue t5 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t4, |
5459 | N2: getF32Constant(DAG, Flt: 0x3c1d8c17, dl)); |
5460 | SDValue t6 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t5, N2: X); |
5461 | SDValue t7 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t6, |
5462 | N2: getF32Constant(DAG, Flt: 0x3d634a1d, dl)); |
5463 | SDValue t8 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t7, N2: X); |
5464 | SDValue t9 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t8, |
5465 | N2: getF32Constant(DAG, Flt: 0x3e75fe14, dl)); |
5466 | SDValue t10 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t9, N2: X); |
5467 | SDValue t11 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t10, |
5468 | N2: getF32Constant(DAG, Flt: 0x3f317234, dl)); |
5469 | SDValue t12 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t11, N2: X); |
5470 | TwoToFractionalPartOfX = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t12, |
5471 | N2: getF32Constant(DAG, Flt: 0x3f800000, dl)); |
5472 | } |
5473 | |
5474 | // Add the exponent into the result in integer domain. |
5475 | SDValue t13 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: TwoToFractionalPartOfX); |
5476 | return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::f32, |
5477 | Operand: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32, N1: t13, N2: IntegerPartOfX)); |
5478 | } |
5479 | |
5480 | /// expandExp - Lower an exp intrinsic. Handles the special sequences for |
5481 | /// limited-precision mode. |
5482 | static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, |
5483 | const TargetLowering &TLI, SDNodeFlags Flags) { |
5484 | if (Op.getValueType() == MVT::f32 && |
5485 | LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { |
5486 | |
5487 | // Put the exponent in the right bit position for later addition to the |
5488 | // final result: |
5489 | // |
5490 | // t0 = Op * log2(e) |
5491 | |
5492 | // TODO: What fast-math-flags should be set here? |
5493 | SDValue t0 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: Op, |
5494 | N2: DAG.getConstantFP(Val: numbers::log2ef, DL: dl, VT: MVT::f32)); |
5495 | return getLimitedPrecisionExp2(t0, dl, DAG); |
5496 | } |
5497 | |
5498 | // No special expansion. |
5499 | return DAG.getNode(Opcode: ISD::FEXP, DL: dl, VT: Op.getValueType(), Operand: Op, Flags); |
5500 | } |
5501 | |
5502 | /// expandLog - Lower a log intrinsic. Handles the special sequences for |
5503 | /// limited-precision mode. |
5504 | static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, |
5505 | const TargetLowering &TLI, SDNodeFlags Flags) { |
5506 | // TODO: What fast-math-flags should be set on the floating-point nodes? |
5507 | |
5508 | if (Op.getValueType() == MVT::f32 && |
5509 | LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { |
5510 | SDValue Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: Op); |
5511 | |
5512 | // Scale the exponent by log(2). |
5513 | SDValue Exp = GetExponent(DAG, Op: Op1, TLI, dl); |
5514 | SDValue LogOfExponent = |
5515 | DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: Exp, |
5516 | N2: DAG.getConstantFP(Val: numbers::ln2f, DL: dl, VT: MVT::f32)); |
5517 | |
5518 | // Get the significand and build it into a floating-point number with |
5519 | // exponent of 1. |
5520 | SDValue X = GetSignificand(DAG, Op: Op1, dl); |
5521 | |
5522 | SDValue LogOfMantissa; |
5523 | if (LimitFloatPrecision <= 6) { |
5524 | // For floating-point precision of 6: |
5525 | // |
5526 | // LogofMantissa = |
5527 | // -1.1609546f + |
5528 | // (1.4034025f - 0.23903021f * x) * x; |
5529 | // |
5530 | // error 0.0034276066, which is better than 8 bits |
5531 | SDValue t0 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: X, |
5532 | N2: getF32Constant(DAG, Flt: 0xbe74c456, dl)); |
5533 | SDValue t1 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t0, |
5534 | N2: getF32Constant(DAG, Flt: 0x3fb3a2b1, dl)); |
5535 | SDValue t2 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t1, N2: X); |
5536 | LogOfMantissa = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::f32, N1: t2, |
5537 | N2: getF32Constant(DAG, Flt: 0x3f949a29, dl)); |
5538 | } else if (LimitFloatPrecision <= 12) { |
5539 | // For floating-point precision of 12: |
5540 | // |
5541 | // LogOfMantissa = |
5542 | // -1.7417939f + |
5543 | // (2.8212026f + |
5544 | // (-1.4699568f + |
5545 | // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x; |
5546 | // |
5547 | // error 0.000061011436, which is 14 bits |
5548 | SDValue t0 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: X, |
5549 | N2: getF32Constant(DAG, Flt: 0xbd67b6d6, dl)); |
5550 | SDValue t1 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t0, |
5551 | N2: getF32Constant(DAG, Flt: 0x3ee4f4b8, dl)); |
5552 | SDValue t2 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t1, N2: X); |
5553 | SDValue t3 = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::f32, N1: t2, |
5554 | N2: getF32Constant(DAG, Flt: 0x3fbc278b, dl)); |
5555 | SDValue t4 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t3, N2: X); |
5556 | SDValue t5 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t4, |
5557 | N2: getF32Constant(DAG, Flt: 0x40348e95, dl)); |
5558 | SDValue t6 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t5, N2: X); |
5559 | LogOfMantissa = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::f32, N1: t6, |
5560 | N2: getF32Constant(DAG, Flt: 0x3fdef31a, dl)); |
5561 | } else { // LimitFloatPrecision <= 18 |
5562 | // For floating-point precision of 18: |
5563 | // |
5564 | // LogOfMantissa = |
5565 | // -2.1072184f + |
5566 | // (4.2372794f + |
5567 | // (-3.7029485f + |
5568 | // (2.2781945f + |
5569 | // (-0.87823314f + |
5570 | // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x; |
5571 | // |
5572 | // error 0.0000023660568, which is better than 18 bits |
5573 | SDValue t0 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: X, |
5574 | N2: getF32Constant(DAG, Flt: 0xbc91e5ac, dl)); |
5575 | SDValue t1 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t0, |
5576 | N2: getF32Constant(DAG, Flt: 0x3e4350aa, dl)); |
5577 | SDValue t2 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t1, N2: X); |
5578 | SDValue t3 = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::f32, N1: t2, |
5579 | N2: getF32Constant(DAG, Flt: 0x3f60d3e3, dl)); |
5580 | SDValue t4 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t3, N2: X); |
5581 | SDValue t5 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t4, |
5582 | N2: getF32Constant(DAG, Flt: 0x4011cdf0, dl)); |
5583 | SDValue t6 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t5, N2: X); |
5584 | SDValue t7 = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::f32, N1: t6, |
5585 | N2: getF32Constant(DAG, Flt: 0x406cfd1c, dl)); |
5586 | SDValue t8 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t7, N2: X); |
5587 | SDValue t9 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t8, |
5588 | N2: getF32Constant(DAG, Flt: 0x408797cb, dl)); |
5589 | SDValue t10 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t9, N2: X); |
5590 | LogOfMantissa = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::f32, N1: t10, |
5591 | N2: getF32Constant(DAG, Flt: 0x4006dcab, dl)); |
5592 | } |
5593 | |
5594 | return DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: LogOfExponent, N2: LogOfMantissa); |
5595 | } |
5596 | |
5597 | // No special expansion. |
5598 | return DAG.getNode(Opcode: ISD::FLOG, DL: dl, VT: Op.getValueType(), Operand: Op, Flags); |
5599 | } |
5600 | |
5601 | /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for |
5602 | /// limited-precision mode. |
5603 | static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, |
5604 | const TargetLowering &TLI, SDNodeFlags Flags) { |
5605 | // TODO: What fast-math-flags should be set on the floating-point nodes? |
5606 | |
5607 | if (Op.getValueType() == MVT::f32 && |
5608 | LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { |
5609 | SDValue Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: Op); |
5610 | |
5611 | // Get the exponent. |
5612 | SDValue LogOfExponent = GetExponent(DAG, Op: Op1, TLI, dl); |
5613 | |
5614 | // Get the significand and build it into a floating-point number with |
5615 | // exponent of 1. |
5616 | SDValue X = GetSignificand(DAG, Op: Op1, dl); |
5617 | |
5618 | // Different possible minimax approximations of significand in |
5619 | // floating-point for various degrees of accuracy over [1,2]. |
5620 | SDValue Log2ofMantissa; |
5621 | if (LimitFloatPrecision <= 6) { |
5622 | // For floating-point precision of 6: |
5623 | // |
5624 | // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x; |
5625 | // |
5626 | // error 0.0049451742, which is more than 7 bits |
5627 | SDValue t0 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: X, |
5628 | N2: getF32Constant(DAG, Flt: 0xbeb08fe0, dl)); |
5629 | SDValue t1 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t0, |
5630 | N2: getF32Constant(DAG, Flt: 0x40019463, dl)); |
5631 | SDValue t2 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t1, N2: X); |
5632 | Log2ofMantissa = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::f32, N1: t2, |
5633 | N2: getF32Constant(DAG, Flt: 0x3fd6633d, dl)); |
5634 | } else if (LimitFloatPrecision <= 12) { |
5635 | // For floating-point precision of 12: |
5636 | // |
5637 | // Log2ofMantissa = |
5638 | // -2.51285454f + |
5639 | // (4.07009056f + |
5640 | // (-2.12067489f + |
5641 | // (.645142248f - 0.816157886e-1f * x) * x) * x) * x; |
5642 | // |
5643 | // error 0.0000876136000, which is better than 13 bits |
5644 | SDValue t0 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: X, |
5645 | N2: getF32Constant(DAG, Flt: 0xbda7262e, dl)); |
5646 | SDValue t1 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t0, |
5647 | N2: getF32Constant(DAG, Flt: 0x3f25280b, dl)); |
5648 | SDValue t2 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t1, N2: X); |
5649 | SDValue t3 = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::f32, N1: t2, |
5650 | N2: getF32Constant(DAG, Flt: 0x4007b923, dl)); |
5651 | SDValue t4 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t3, N2: X); |
5652 | SDValue t5 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t4, |
5653 | N2: getF32Constant(DAG, Flt: 0x40823e2f, dl)); |
5654 | SDValue t6 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t5, N2: X); |
5655 | Log2ofMantissa = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::f32, N1: t6, |
5656 | N2: getF32Constant(DAG, Flt: 0x4020d29c, dl)); |
5657 | } else { // LimitFloatPrecision <= 18 |
5658 | // For floating-point precision of 18: |
5659 | // |
5660 | // Log2ofMantissa = |
5661 | // -3.0400495f + |
5662 | // (6.1129976f + |
5663 | // (-5.3420409f + |
5664 | // (3.2865683f + |
5665 | // (-1.2669343f + |
5666 | // (0.27515199f - |
5667 | // 0.25691327e-1f * x) * x) * x) * x) * x) * x; |
5668 | // |
5669 | // error 0.0000018516, which is better than 18 bits |
5670 | SDValue t0 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: X, |
5671 | N2: getF32Constant(DAG, Flt: 0xbcd2769e, dl)); |
5672 | SDValue t1 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t0, |
5673 | N2: getF32Constant(DAG, Flt: 0x3e8ce0b9, dl)); |
5674 | SDValue t2 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t1, N2: X); |
5675 | SDValue t3 = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::f32, N1: t2, |
5676 | N2: getF32Constant(DAG, Flt: 0x3fa22ae7, dl)); |
5677 | SDValue t4 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t3, N2: X); |
5678 | SDValue t5 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t4, |
5679 | N2: getF32Constant(DAG, Flt: 0x40525723, dl)); |
5680 | SDValue t6 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t5, N2: X); |
5681 | SDValue t7 = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::f32, N1: t6, |
5682 | N2: getF32Constant(DAG, Flt: 0x40aaf200, dl)); |
5683 | SDValue t8 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t7, N2: X); |
5684 | SDValue t9 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t8, |
5685 | N2: getF32Constant(DAG, Flt: 0x40c39dad, dl)); |
5686 | SDValue t10 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t9, N2: X); |
5687 | Log2ofMantissa = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::f32, N1: t10, |
5688 | N2: getF32Constant(DAG, Flt: 0x4042902c, dl)); |
5689 | } |
5690 | |
5691 | return DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: LogOfExponent, N2: Log2ofMantissa); |
5692 | } |
5693 | |
5694 | // No special expansion. |
5695 | return DAG.getNode(Opcode: ISD::FLOG2, DL: dl, VT: Op.getValueType(), Operand: Op, Flags); |
5696 | } |
5697 | |
5698 | /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for |
5699 | /// limited-precision mode. |
5700 | static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, |
5701 | const TargetLowering &TLI, SDNodeFlags Flags) { |
5702 | // TODO: What fast-math-flags should be set on the floating-point nodes? |
5703 | |
5704 | if (Op.getValueType() == MVT::f32 && |
5705 | LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { |
5706 | SDValue Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: Op); |
5707 | |
5708 | // Scale the exponent by log10(2) [0.30102999f]. |
5709 | SDValue Exp = GetExponent(DAG, Op: Op1, TLI, dl); |
5710 | SDValue LogOfExponent = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: Exp, |
5711 | N2: getF32Constant(DAG, Flt: 0x3e9a209a, dl)); |
5712 | |
5713 | // Get the significand and build it into a floating-point number with |
5714 | // exponent of 1. |
5715 | SDValue X = GetSignificand(DAG, Op: Op1, dl); |
5716 | |
5717 | SDValue Log10ofMantissa; |
5718 | if (LimitFloatPrecision <= 6) { |
5719 | // For floating-point precision of 6: |
5720 | // |
5721 | // Log10ofMantissa = |
5722 | // -0.50419619f + |
5723 | // (0.60948995f - 0.10380950f * x) * x; |
5724 | // |
5725 | // error 0.0014886165, which is 6 bits |
5726 | SDValue t0 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: X, |
5727 | N2: getF32Constant(DAG, Flt: 0xbdd49a13, dl)); |
5728 | SDValue t1 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t0, |
5729 | N2: getF32Constant(DAG, Flt: 0x3f1c0789, dl)); |
5730 | SDValue t2 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t1, N2: X); |
5731 | Log10ofMantissa = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::f32, N1: t2, |
5732 | N2: getF32Constant(DAG, Flt: 0x3f011300, dl)); |
5733 | } else if (LimitFloatPrecision <= 12) { |
5734 | // For floating-point precision of 12: |
5735 | // |
5736 | // Log10ofMantissa = |
5737 | // -0.64831180f + |
5738 | // (0.91751397f + |
5739 | // (-0.31664806f + 0.47637168e-1f * x) * x) * x; |
5740 | // |
5741 | // error 0.00019228036, which is better than 12 bits |
5742 | SDValue t0 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: X, |
5743 | N2: getF32Constant(DAG, Flt: 0x3d431f31, dl)); |
5744 | SDValue t1 = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::f32, N1: t0, |
5745 | N2: getF32Constant(DAG, Flt: 0x3ea21fb2, dl)); |
5746 | SDValue t2 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t1, N2: X); |
5747 | SDValue t3 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t2, |
5748 | N2: getF32Constant(DAG, Flt: 0x3f6ae232, dl)); |
5749 | SDValue t4 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t3, N2: X); |
5750 | Log10ofMantissa = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::f32, N1: t4, |
5751 | N2: getF32Constant(DAG, Flt: 0x3f25f7c3, dl)); |
5752 | } else { // LimitFloatPrecision <= 18 |
5753 | // For floating-point precision of 18: |
5754 | // |
5755 | // Log10ofMantissa = |
5756 | // -0.84299375f + |
5757 | // (1.5327582f + |
5758 | // (-1.0688956f + |
5759 | // (0.49102474f + |
5760 | // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x; |
5761 | // |
5762 | // error 0.0000037995730, which is better than 18 bits |
5763 | SDValue t0 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: X, |
5764 | N2: getF32Constant(DAG, Flt: 0x3c5d51ce, dl)); |
5765 | SDValue t1 = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::f32, N1: t0, |
5766 | N2: getF32Constant(DAG, Flt: 0x3e00685a, dl)); |
5767 | SDValue t2 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t1, N2: X); |
5768 | SDValue t3 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t2, |
5769 | N2: getF32Constant(DAG, Flt: 0x3efb6798, dl)); |
5770 | SDValue t4 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t3, N2: X); |
5771 | SDValue t5 = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::f32, N1: t4, |
5772 | N2: getF32Constant(DAG, Flt: 0x3f88d192, dl)); |
5773 | SDValue t6 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t5, N2: X); |
5774 | SDValue t7 = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: t6, |
5775 | N2: getF32Constant(DAG, Flt: 0x3fc4316c, dl)); |
5776 | SDValue t8 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: t7, N2: X); |
5777 | Log10ofMantissa = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::f32, N1: t8, |
5778 | N2: getF32Constant(DAG, Flt: 0x3f57ce70, dl)); |
5779 | } |
5780 | |
5781 | return DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: MVT::f32, N1: LogOfExponent, N2: Log10ofMantissa); |
5782 | } |
5783 | |
5784 | // No special expansion. |
5785 | return DAG.getNode(Opcode: ISD::FLOG10, DL: dl, VT: Op.getValueType(), Operand: Op, Flags); |
5786 | } |
5787 | |
5788 | /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for |
5789 | /// limited-precision mode. |
5790 | static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, |
5791 | const TargetLowering &TLI, SDNodeFlags Flags) { |
5792 | if (Op.getValueType() == MVT::f32 && |
5793 | LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) |
5794 | return getLimitedPrecisionExp2(t0: Op, dl, DAG); |
5795 | |
5796 | // No special expansion. |
5797 | return DAG.getNode(Opcode: ISD::FEXP2, DL: dl, VT: Op.getValueType(), Operand: Op, Flags); |
5798 | } |
5799 | |
5800 | /// visitPow - Lower a pow intrinsic. Handles the special sequences for |
5801 | /// limited-precision mode with x == 10.0f. |
5802 | static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, |
5803 | SelectionDAG &DAG, const TargetLowering &TLI, |
5804 | SDNodeFlags Flags) { |
5805 | bool IsExp10 = false; |
5806 | if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 && |
5807 | LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { |
5808 | if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(Val&: LHS)) { |
5809 | APFloat Ten(10.0f); |
5810 | IsExp10 = LHSC->isExactlyValue(V: Ten); |
5811 | } |
5812 | } |
5813 | |
5814 | // TODO: What fast-math-flags should be set on the FMUL node? |
5815 | if (IsExp10) { |
5816 | // Put the exponent in the right bit position for later addition to the |
5817 | // final result: |
5818 | // |
5819 | // #define LOG2OF10 3.3219281f |
5820 | // t0 = Op * LOG2OF10; |
5821 | SDValue t0 = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT: MVT::f32, N1: RHS, |
5822 | N2: getF32Constant(DAG, Flt: 0x40549a78, dl)); |
5823 | return getLimitedPrecisionExp2(t0, dl, DAG); |
5824 | } |
5825 | |
5826 | // No special expansion. |
5827 | return DAG.getNode(Opcode: ISD::FPOW, DL: dl, VT: LHS.getValueType(), N1: LHS, N2: RHS, Flags); |
5828 | } |
5829 | |
5830 | /// ExpandPowI - Expand a llvm.powi intrinsic. |
5831 | static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, |
5832 | SelectionDAG &DAG) { |
5833 | // If RHS is a constant, we can expand this out to a multiplication tree if |
5834 | // it's beneficial on the target, otherwise we end up lowering to a call to |
5835 | // __powidf2 (for example). |
5836 | if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Val&: RHS)) { |
5837 | unsigned Val = RHSC->getSExtValue(); |
5838 | |
5839 | // powi(x, 0) -> 1.0 |
5840 | if (Val == 0) |
5841 | return DAG.getConstantFP(Val: 1.0, DL, VT: LHS.getValueType()); |
5842 | |
5843 | if (DAG.getTargetLoweringInfo().isBeneficialToExpandPowI( |
5844 | Exponent: Val, OptForSize: DAG.shouldOptForSize())) { |
5845 | // Get the exponent as a positive value. |
5846 | if ((int)Val < 0) |
5847 | Val = -Val; |
5848 | // We use the simple binary decomposition method to generate the multiply |
5849 | // sequence. There are more optimal ways to do this (for example, |
5850 | // powi(x,15) generates one more multiply than it should), but this has |
5851 | // the benefit of being both really simple and much better than a libcall. |
5852 | SDValue Res; // Logically starts equal to 1.0 |
5853 | SDValue CurSquare = LHS; |
5854 | // TODO: Intrinsics should have fast-math-flags that propagate to these |
5855 | // nodes. |
5856 | while (Val) { |
5857 | if (Val & 1) { |
5858 | if (Res.getNode()) |
5859 | Res = |
5860 | DAG.getNode(Opcode: ISD::FMUL, DL, VT: Res.getValueType(), N1: Res, N2: CurSquare); |
5861 | else |
5862 | Res = CurSquare; // 1.0*CurSquare. |
5863 | } |
5864 | |
5865 | CurSquare = DAG.getNode(Opcode: ISD::FMUL, DL, VT: CurSquare.getValueType(), |
5866 | N1: CurSquare, N2: CurSquare); |
5867 | Val >>= 1; |
5868 | } |
5869 | |
5870 | // If the original was negative, invert the result, producing 1/(x*x*x). |
5871 | if (RHSC->getSExtValue() < 0) |
5872 | Res = DAG.getNode(Opcode: ISD::FDIV, DL, VT: LHS.getValueType(), |
5873 | N1: DAG.getConstantFP(Val: 1.0, DL, VT: LHS.getValueType()), N2: Res); |
5874 | return Res; |
5875 | } |
5876 | } |
5877 | |
5878 | // Otherwise, expand to a libcall. |
5879 | return DAG.getNode(Opcode: ISD::FPOWI, DL, VT: LHS.getValueType(), N1: LHS, N2: RHS); |
5880 | } |
5881 | |
5882 | static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL, |
5883 | SDValue LHS, SDValue RHS, SDValue Scale, |
5884 | SelectionDAG &DAG, const TargetLowering &TLI) { |
5885 | EVT VT = LHS.getValueType(); |
5886 | bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT; |
5887 | bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT; |
5888 | LLVMContext &Ctx = *DAG.getContext(); |
5889 | |
5890 | // If the type is legal but the operation isn't, this node might survive all |
5891 | // the way to operation legalization. If we end up there and we do not have |
5892 | // the ability to widen the type (if VT*2 is not legal), we cannot expand the |
5893 | // node. |
5894 | |
5895 | // Coax the legalizer into expanding the node during type legalization instead |
5896 | // by bumping the size by one bit. This will force it to Promote, enabling the |
5897 | // early expansion and avoiding the need to expand later. |
5898 | |
5899 | // We don't have to do this if Scale is 0; that can always be expanded, unless |
5900 | // it's a saturating signed operation. Those can experience true integer |
5901 | // division overflow, a case which we must avoid. |
5902 | |
5903 | // FIXME: We wouldn't have to do this (or any of the early |
5904 | // expansion/promotion) if it was possible to expand a libcall of an |
5905 | // illegal type during operation legalization. But it's not, so things |
5906 | // get a bit hacky. |
5907 | unsigned ScaleInt = Scale->getAsZExtVal(); |
5908 | if ((ScaleInt > 0 || (Saturating && Signed)) && |
5909 | (TLI.isTypeLegal(VT) || |
5910 | (VT.isVector() && TLI.isTypeLegal(VT: VT.getVectorElementType())))) { |
5911 | TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction( |
5912 | Op: Opcode, VT, Scale: ScaleInt); |
5913 | if (Action != TargetLowering::Legal && Action != TargetLowering::Custom) { |
5914 | EVT PromVT; |
5915 | if (VT.isScalarInteger()) |
5916 | PromVT = EVT::getIntegerVT(Context&: Ctx, BitWidth: VT.getSizeInBits() + 1); |
5917 | else if (VT.isVector()) { |
5918 | PromVT = VT.getVectorElementType(); |
5919 | PromVT = EVT::getIntegerVT(Context&: Ctx, BitWidth: PromVT.getSizeInBits() + 1); |
5920 | PromVT = EVT::getVectorVT(Context&: Ctx, VT: PromVT, EC: VT.getVectorElementCount()); |
5921 | } else |
5922 | llvm_unreachable("Wrong VT for DIVFIX?" ); |
5923 | LHS = DAG.getExtOrTrunc(IsSigned: Signed, Op: LHS, DL, VT: PromVT); |
5924 | RHS = DAG.getExtOrTrunc(IsSigned: Signed, Op: RHS, DL, VT: PromVT); |
5925 | EVT ShiftTy = TLI.getShiftAmountTy(LHSTy: PromVT, DL: DAG.getDataLayout()); |
5926 | // For saturating operations, we need to shift up the LHS to get the |
5927 | // proper saturation width, and then shift down again afterwards. |
5928 | if (Saturating) |
5929 | LHS = DAG.getNode(Opcode: ISD::SHL, DL, VT: PromVT, N1: LHS, |
5930 | N2: DAG.getConstant(Val: 1, DL, VT: ShiftTy)); |
5931 | SDValue Res = DAG.getNode(Opcode, DL, VT: PromVT, N1: LHS, N2: RHS, N3: Scale); |
5932 | if (Saturating) |
5933 | Res = DAG.getNode(Opcode: Signed ? ISD::SRA : ISD::SRL, DL, VT: PromVT, N1: Res, |
5934 | N2: DAG.getConstant(Val: 1, DL, VT: ShiftTy)); |
5935 | return DAG.getZExtOrTrunc(Op: Res, DL, VT); |
5936 | } |
5937 | } |
5938 | |
5939 | return DAG.getNode(Opcode, DL, VT, N1: LHS, N2: RHS, N3: Scale); |
5940 | } |
5941 | |
5942 | // getUnderlyingArgRegs - Find underlying registers used for a truncated, |
5943 | // bitcasted, or split argument. Returns a list of <Register, size in bits> |
5944 | static void |
5945 | getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, TypeSize>> &Regs, |
5946 | const SDValue &N) { |
5947 | switch (N.getOpcode()) { |
5948 | case ISD::CopyFromReg: { |
5949 | SDValue Op = N.getOperand(i: 1); |
5950 | Regs.emplace_back(Args: cast<RegisterSDNode>(Val&: Op)->getReg(), |
5951 | Args: Op.getValueType().getSizeInBits()); |
5952 | return; |
5953 | } |
5954 | case ISD::BITCAST: |
5955 | case ISD::AssertZext: |
5956 | case ISD::AssertSext: |
5957 | case ISD::TRUNCATE: |
5958 | getUnderlyingArgRegs(Regs, N: N.getOperand(i: 0)); |
5959 | return; |
5960 | case ISD::BUILD_PAIR: |
5961 | case ISD::BUILD_VECTOR: |
5962 | case ISD::CONCAT_VECTORS: |
5963 | for (SDValue Op : N->op_values()) |
5964 | getUnderlyingArgRegs(Regs, N: Op); |
5965 | return; |
5966 | default: |
5967 | return; |
5968 | } |
5969 | } |
5970 | |
5971 | /// If the DbgValueInst is a dbg_value of a function argument, create the |
5972 | /// corresponding DBG_VALUE machine instruction for it now. At the end of |
5973 | /// instruction selection, they will be inserted to the entry BB. |
5974 | /// We don't currently support this for variadic dbg_values, as they shouldn't |
5975 | /// appear for function arguments or in the prologue. |
5976 | bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( |
5977 | const Value *V, DILocalVariable *Variable, DIExpression *Expr, |
5978 | DILocation *DL, FuncArgumentDbgValueKind Kind, const SDValue &N) { |
5979 | const Argument *Arg = dyn_cast<Argument>(Val: V); |
5980 | if (!Arg) |
5981 | return false; |
5982 | |
5983 | MachineFunction &MF = DAG.getMachineFunction(); |
5984 | const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); |
5985 | |
5986 | // Helper to create DBG_INSTR_REFs or DBG_VALUEs, depending on what kind |
5987 | // we've been asked to pursue. |
5988 | auto MakeVRegDbgValue = [&](Register Reg, DIExpression *FragExpr, |
5989 | bool Indirect) { |
5990 | if (Reg.isVirtual() && MF.useDebugInstrRef()) { |
5991 | // For VRegs, in instruction referencing mode, create a DBG_INSTR_REF |
5992 | // pointing at the VReg, which will be patched up later. |
5993 | auto &Inst = TII->get(Opcode: TargetOpcode::DBG_INSTR_REF); |
5994 | SmallVector<MachineOperand, 1> MOs({MachineOperand::CreateReg( |
5995 | /* Reg */ Reg, /* isDef */ false, /* isImp */ false, |
5996 | /* isKill */ false, /* isDead */ false, |
5997 | /* isUndef */ false, /* isEarlyClobber */ false, |
5998 | /* SubReg */ 0, /* isDebug */ true)}); |
5999 | |
6000 | auto *NewDIExpr = FragExpr; |
6001 | // We don't have an "Indirect" field in DBG_INSTR_REF, fold that into |
6002 | // the DIExpression. |
6003 | if (Indirect) |
6004 | NewDIExpr = DIExpression::prepend(Expr: FragExpr, Flags: DIExpression::DerefBefore); |
6005 | SmallVector<uint64_t, 2> Ops({dwarf::DW_OP_LLVM_arg, 0}); |
6006 | NewDIExpr = DIExpression::prependOpcodes(Expr: NewDIExpr, Ops); |
6007 | return BuildMI(MF, DL, MCID: Inst, IsIndirect: false, MOs, Variable, Expr: NewDIExpr); |
6008 | } else { |
6009 | // Create a completely standard DBG_VALUE. |
6010 | auto &Inst = TII->get(Opcode: TargetOpcode::DBG_VALUE); |
6011 | return BuildMI(MF, DL, MCID: Inst, IsIndirect: Indirect, Reg, Variable, Expr: FragExpr); |
6012 | } |
6013 | }; |
6014 | |
6015 | if (Kind == FuncArgumentDbgValueKind::Value) { |
6016 | // ArgDbgValues are hoisted to the beginning of the entry block. So we |
6017 | // should only emit as ArgDbgValue if the dbg.value intrinsic is found in |
6018 | // the entry block. |
6019 | bool IsInEntryBlock = FuncInfo.MBB == &FuncInfo.MF->front(); |
6020 | if (!IsInEntryBlock) |
6021 | return false; |
6022 | |
6023 | // ArgDbgValues are hoisted to the beginning of the entry block. So we |
6024 | // should only emit as ArgDbgValue if the dbg.value intrinsic describes a |
6025 | // variable that also is a param. |
6026 | // |
6027 | // Although, if we are at the top of the entry block already, we can still |
6028 | // emit using ArgDbgValue. This might catch some situations when the |
6029 | // dbg.value refers to an argument that isn't used in the entry block, so |
6030 | // any CopyToReg node would be optimized out and the only way to express |
6031 | // this DBG_VALUE is by using the physical reg (or FI) as done in this |
6032 | // method. ArgDbgValues are hoisted to the beginning of the entry block. So |
6033 | // we should only emit as ArgDbgValue if the Variable is an argument to the |
6034 | // current function, and the dbg.value intrinsic is found in the entry |
6035 | // block. |
6036 | bool VariableIsFunctionInputArg = Variable->isParameter() && |
6037 | !DL->getInlinedAt(); |
6038 | bool IsInPrologue = SDNodeOrder == LowestSDNodeOrder; |
6039 | if (!IsInPrologue && !VariableIsFunctionInputArg) |
6040 | return false; |
6041 | |
6042 | // Here we assume that a function argument on IR level only can be used to |
6043 | // describe one input parameter on source level. If we for example have |
6044 | // source code like this |
6045 | // |
6046 | // struct A { long x, y; }; |
6047 | // void foo(struct A a, long b) { |
6048 | // ... |
6049 | // b = a.x; |
6050 | // ... |
6051 | // } |
6052 | // |
6053 | // and IR like this |
6054 | // |
6055 | // define void @foo(i32 %a1, i32 %a2, i32 %b) { |
6056 | // entry: |
6057 | // call void @llvm.dbg.value(metadata i32 %a1, "a", DW_OP_LLVM_fragment |
6058 | // call void @llvm.dbg.value(metadata i32 %a2, "a", DW_OP_LLVM_fragment |
6059 | // call void @llvm.dbg.value(metadata i32 %b, "b", |
6060 | // ... |
6061 | // call void @llvm.dbg.value(metadata i32 %a1, "b" |
6062 | // ... |
6063 | // |
6064 | // then the last dbg.value is describing a parameter "b" using a value that |
6065 | // is an argument. But since we already has used %a1 to describe a parameter |
6066 | // we should not handle that last dbg.value here (that would result in an |
6067 | // incorrect hoisting of the DBG_VALUE to the function entry). |
6068 | // Notice that we allow one dbg.value per IR level argument, to accommodate |
6069 | // for the situation with fragments above. |
6070 | // If there is no node for the value being handled, we return true to skip |
6071 | // the normal generation of debug info, as it would kill existing debug |
6072 | // info for the parameter in case of duplicates. |
6073 | if (VariableIsFunctionInputArg) { |
6074 | unsigned ArgNo = Arg->getArgNo(); |
6075 | if (ArgNo >= FuncInfo.DescribedArgs.size()) |
6076 | FuncInfo.DescribedArgs.resize(N: ArgNo + 1, t: false); |
6077 | else if (!IsInPrologue && FuncInfo.DescribedArgs.test(Idx: ArgNo)) |
6078 | return !NodeMap[V].getNode(); |
6079 | FuncInfo.DescribedArgs.set(ArgNo); |
6080 | } |
6081 | } |
6082 | |
6083 | bool IsIndirect = false; |
6084 | std::optional<MachineOperand> Op; |
6085 | // Some arguments' frame index is recorded during argument lowering. |
6086 | int FI = FuncInfo.getArgumentFrameIndex(A: Arg); |
6087 | if (FI != std::numeric_limits<int>::max()) |
6088 | Op = MachineOperand::CreateFI(Idx: FI); |
6089 | |
6090 | SmallVector<std::pair<unsigned, TypeSize>, 8> ArgRegsAndSizes; |
6091 | if (!Op && N.getNode()) { |
6092 | getUnderlyingArgRegs(Regs&: ArgRegsAndSizes, N); |
6093 | Register Reg; |
6094 | if (ArgRegsAndSizes.size() == 1) |
6095 | Reg = ArgRegsAndSizes.front().first; |
6096 | |
6097 | if (Reg && Reg.isVirtual()) { |
6098 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
6099 | Register PR = RegInfo.getLiveInPhysReg(VReg: Reg); |
6100 | if (PR) |
6101 | Reg = PR; |
6102 | } |
6103 | if (Reg) { |
6104 | Op = MachineOperand::CreateReg(Reg, isDef: false); |
6105 | IsIndirect = Kind != FuncArgumentDbgValueKind::Value; |
6106 | } |
6107 | } |
6108 | |
6109 | if (!Op && N.getNode()) { |
6110 | // Check if frame index is available. |
6111 | SDValue LCandidate = peekThroughBitcasts(V: N); |
6112 | if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(Val: LCandidate.getNode())) |
6113 | if (FrameIndexSDNode *FINode = |
6114 | dyn_cast<FrameIndexSDNode>(Val: LNode->getBasePtr().getNode())) |
6115 | Op = MachineOperand::CreateFI(Idx: FINode->getIndex()); |
6116 | } |
6117 | |
6118 | if (!Op) { |
6119 | // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg |
6120 | auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>> |
6121 | SplitRegs) { |
6122 | unsigned Offset = 0; |
6123 | for (const auto &RegAndSize : SplitRegs) { |
6124 | // If the expression is already a fragment, the current register |
6125 | // offset+size might extend beyond the fragment. In this case, only |
6126 | // the register bits that are inside the fragment are relevant. |
6127 | int RegFragmentSizeInBits = RegAndSize.second; |
6128 | if (auto ExprFragmentInfo = Expr->getFragmentInfo()) { |
6129 | uint64_t ExprFragmentSizeInBits = ExprFragmentInfo->SizeInBits; |
6130 | // The register is entirely outside the expression fragment, |
6131 | // so is irrelevant for debug info. |
6132 | if (Offset >= ExprFragmentSizeInBits) |
6133 | break; |
6134 | // The register is partially outside the expression fragment, only |
6135 | // the low bits within the fragment are relevant for debug info. |
6136 | if (Offset + RegFragmentSizeInBits > ExprFragmentSizeInBits) { |
6137 | RegFragmentSizeInBits = ExprFragmentSizeInBits - Offset; |
6138 | } |
6139 | } |
6140 | |
6141 | auto FragmentExpr = DIExpression::createFragmentExpression( |
6142 | Expr, OffsetInBits: Offset, SizeInBits: RegFragmentSizeInBits); |
6143 | Offset += RegAndSize.second; |
6144 | // If a valid fragment expression cannot be created, the variable's |
6145 | // correct value cannot be determined and so it is set as Undef. |
6146 | if (!FragmentExpr) { |
6147 | SDDbgValue *SDV = DAG.getConstantDbgValue( |
6148 | Var: Variable, Expr, C: UndefValue::get(T: V->getType()), DL, O: SDNodeOrder); |
6149 | DAG.AddDbgValue(DB: SDV, isParameter: false); |
6150 | continue; |
6151 | } |
6152 | MachineInstr *NewMI = |
6153 | MakeVRegDbgValue(RegAndSize.first, *FragmentExpr, |
6154 | Kind != FuncArgumentDbgValueKind::Value); |
6155 | FuncInfo.ArgDbgValues.push_back(Elt: NewMI); |
6156 | } |
6157 | }; |
6158 | |
6159 | // Check if ValueMap has reg number. |
6160 | DenseMap<const Value *, Register>::const_iterator |
6161 | VMI = FuncInfo.ValueMap.find(Val: V); |
6162 | if (VMI != FuncInfo.ValueMap.end()) { |
6163 | const auto &TLI = DAG.getTargetLoweringInfo(); |
6164 | RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second, |
6165 | V->getType(), std::nullopt); |
6166 | if (RFV.occupiesMultipleRegs()) { |
6167 | splitMultiRegDbgValue(RFV.getRegsAndSizes()); |
6168 | return true; |
6169 | } |
6170 | |
6171 | Op = MachineOperand::CreateReg(Reg: VMI->second, isDef: false); |
6172 | IsIndirect = Kind != FuncArgumentDbgValueKind::Value; |
6173 | } else if (ArgRegsAndSizes.size() > 1) { |
6174 | // This was split due to the calling convention, and no virtual register |
6175 | // mapping exists for the value. |
6176 | splitMultiRegDbgValue(ArgRegsAndSizes); |
6177 | return true; |
6178 | } |
6179 | } |
6180 | |
6181 | if (!Op) |
6182 | return false; |
6183 | |
6184 | assert(Variable->isValidLocationForIntrinsic(DL) && |
6185 | "Expected inlined-at fields to agree" ); |
6186 | MachineInstr *NewMI = nullptr; |
6187 | |
6188 | if (Op->isReg()) |
6189 | NewMI = MakeVRegDbgValue(Op->getReg(), Expr, IsIndirect); |
6190 | else |
6191 | NewMI = BuildMI(MF, DL, MCID: TII->get(Opcode: TargetOpcode::DBG_VALUE), IsIndirect: true, MOs: *Op, |
6192 | Variable, Expr); |
6193 | |
6194 | // Otherwise, use ArgDbgValues. |
6195 | FuncInfo.ArgDbgValues.push_back(Elt: NewMI); |
6196 | return true; |
6197 | } |
6198 | |
6199 | /// Return the appropriate SDDbgValue based on N. |
6200 | SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, |
6201 | DILocalVariable *Variable, |
6202 | DIExpression *Expr, |
6203 | const DebugLoc &dl, |
6204 | unsigned DbgSDNodeOrder) { |
6205 | if (auto *FISDN = dyn_cast<FrameIndexSDNode>(Val: N.getNode())) { |
6206 | // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe |
6207 | // stack slot locations. |
6208 | // |
6209 | // Consider "int x = 0; int *px = &x;". There are two kinds of interesting |
6210 | // debug values here after optimization: |
6211 | // |
6212 | // dbg.value(i32* %px, !"int *px", !DIExpression()), and |
6213 | // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref)) |
6214 | // |
6215 | // Both describe the direct values of their associated variables. |
6216 | return DAG.getFrameIndexDbgValue(Var: Variable, Expr, FI: FISDN->getIndex(), |
6217 | /*IsIndirect*/ false, DL: dl, O: DbgSDNodeOrder); |
6218 | } |
6219 | return DAG.getDbgValue(Var: Variable, Expr, N: N.getNode(), R: N.getResNo(), |
6220 | /*IsIndirect*/ false, DL: dl, O: DbgSDNodeOrder); |
6221 | } |
6222 | |
6223 | static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) { |
6224 | switch (Intrinsic) { |
6225 | case Intrinsic::smul_fix: |
6226 | return ISD::SMULFIX; |
6227 | case Intrinsic::umul_fix: |
6228 | return ISD::UMULFIX; |
6229 | case Intrinsic::smul_fix_sat: |
6230 | return ISD::SMULFIXSAT; |
6231 | case Intrinsic::umul_fix_sat: |
6232 | return ISD::UMULFIXSAT; |
6233 | case Intrinsic::sdiv_fix: |
6234 | return ISD::SDIVFIX; |
6235 | case Intrinsic::udiv_fix: |
6236 | return ISD::UDIVFIX; |
6237 | case Intrinsic::sdiv_fix_sat: |
6238 | return ISD::SDIVFIXSAT; |
6239 | case Intrinsic::udiv_fix_sat: |
6240 | return ISD::UDIVFIXSAT; |
6241 | default: |
6242 | llvm_unreachable("Unhandled fixed point intrinsic" ); |
6243 | } |
6244 | } |
6245 | |
6246 | void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I, |
6247 | const char *FunctionName) { |
6248 | assert(FunctionName && "FunctionName must not be nullptr" ); |
6249 | SDValue Callee = DAG.getExternalSymbol( |
6250 | Sym: FunctionName, |
6251 | VT: DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout())); |
6252 | LowerCallTo(CB: I, Callee, IsTailCall: I.isTailCall(), IsMustTailCall: I.isMustTailCall()); |
6253 | } |
6254 | |
6255 | /// Given a @llvm.call.preallocated.setup, return the corresponding |
6256 | /// preallocated call. |
6257 | static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) { |
6258 | assert(cast<CallBase>(PreallocatedSetup) |
6259 | ->getCalledFunction() |
6260 | ->getIntrinsicID() == Intrinsic::call_preallocated_setup && |
6261 | "expected call_preallocated_setup Value" ); |
6262 | for (const auto *U : PreallocatedSetup->users()) { |
6263 | auto *UseCall = cast<CallBase>(Val: U); |
6264 | const Function *Fn = UseCall->getCalledFunction(); |
6265 | if (!Fn || Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) { |
6266 | return UseCall; |
6267 | } |
6268 | } |
6269 | llvm_unreachable("expected corresponding call to preallocated setup/arg" ); |
6270 | } |
6271 | |
6272 | /// If DI is a debug value with an EntryValue expression, lower it using the |
6273 | /// corresponding physical register of the associated Argument value |
6274 | /// (guaranteed to exist by the verifier). |
6275 | bool SelectionDAGBuilder::visitEntryValueDbgValue( |
6276 | ArrayRef<const Value *> Values, DILocalVariable *Variable, |
6277 | DIExpression *Expr, DebugLoc DbgLoc) { |
6278 | if (!Expr->isEntryValue() || !hasSingleElement(C&: Values)) |
6279 | return false; |
6280 | |
6281 | // These properties are guaranteed by the verifier. |
6282 | const Argument *Arg = cast<Argument>(Val: Values[0]); |
6283 | assert(Arg->hasAttribute(Attribute::AttrKind::SwiftAsync)); |
6284 | |
6285 | auto ArgIt = FuncInfo.ValueMap.find(Val: Arg); |
6286 | if (ArgIt == FuncInfo.ValueMap.end()) { |
6287 | LLVM_DEBUG( |
6288 | dbgs() << "Dropping dbg.value: expression is entry_value but " |
6289 | "couldn't find an associated register for the Argument\n" ); |
6290 | return true; |
6291 | } |
6292 | Register ArgVReg = ArgIt->getSecond(); |
6293 | |
6294 | for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins()) |
6295 | if (ArgVReg == VirtReg || ArgVReg == PhysReg) { |
6296 | SDDbgValue *SDV = DAG.getVRegDbgValue( |
6297 | Var: Variable, Expr, VReg: PhysReg, IsIndirect: false /*IsIndidrect*/, DL: DbgLoc, O: SDNodeOrder); |
6298 | DAG.AddDbgValue(DB: SDV, isParameter: false /*treat as dbg.declare byval parameter*/); |
6299 | return true; |
6300 | } |
6301 | LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but " |
6302 | "couldn't find a physical register\n" ); |
6303 | return true; |
6304 | } |
6305 | |
6306 | /// Lower the call to the specified intrinsic function. |
6307 | void SelectionDAGBuilder::visitConvergenceControl(const CallInst &I, |
6308 | unsigned Intrinsic) { |
6309 | SDLoc sdl = getCurSDLoc(); |
6310 | switch (Intrinsic) { |
6311 | case Intrinsic::experimental_convergence_anchor: |
6312 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::CONVERGENCECTRL_ANCHOR, DL: sdl, VT: MVT::Untyped)); |
6313 | break; |
6314 | case Intrinsic::experimental_convergence_entry: |
6315 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::CONVERGENCECTRL_ENTRY, DL: sdl, VT: MVT::Untyped)); |
6316 | break; |
6317 | case Intrinsic::experimental_convergence_loop: { |
6318 | auto Bundle = I.getOperandBundle(ID: LLVMContext::OB_convergencectrl); |
6319 | auto *Token = Bundle->Inputs[0].get(); |
6320 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::CONVERGENCECTRL_LOOP, DL: sdl, VT: MVT::Untyped, |
6321 | Operand: getValue(V: Token))); |
6322 | break; |
6323 | } |
6324 | } |
6325 | } |
6326 | |
6327 | void SelectionDAGBuilder::visitVectorHistogram(const CallInst &I, |
6328 | unsigned IntrinsicID) { |
6329 | // For now, we're only lowering an 'add' histogram. |
6330 | // We can add others later, e.g. saturating adds, min/max. |
6331 | assert(IntrinsicID == Intrinsic::experimental_vector_histogram_add && |
6332 | "Tried to lower unsupported histogram type" ); |
6333 | SDLoc sdl = getCurSDLoc(); |
6334 | Value *Ptr = I.getOperand(i_nocapture: 0); |
6335 | SDValue Inc = getValue(V: I.getOperand(i_nocapture: 1)); |
6336 | SDValue Mask = getValue(V: I.getOperand(i_nocapture: 2)); |
6337 | |
6338 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
6339 | DataLayout TargetDL = DAG.getDataLayout(); |
6340 | EVT VT = Inc.getValueType(); |
6341 | Align Alignment = DAG.getEVTAlign(MemoryVT: VT); |
6342 | |
6343 | const MDNode *Ranges = getRangeMetadata(I); |
6344 | |
6345 | SDValue Root = DAG.getRoot(); |
6346 | SDValue Base; |
6347 | SDValue Index; |
6348 | ISD::MemIndexType IndexType; |
6349 | SDValue Scale; |
6350 | bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, SDB: this, |
6351 | CurBB: I.getParent(), ElemSize: VT.getScalarStoreSize()); |
6352 | |
6353 | unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace(); |
6354 | |
6355 | MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( |
6356 | PtrInfo: MachinePointerInfo(AS), |
6357 | F: MachineMemOperand::MOLoad | MachineMemOperand::MOStore, |
6358 | Size: MemoryLocation::UnknownSize, BaseAlignment: Alignment, AAInfo: I.getAAMetadata(), Ranges); |
6359 | |
6360 | if (!UniformBase) { |
6361 | Base = DAG.getConstant(Val: 0, DL: sdl, VT: TLI.getPointerTy(DL: DAG.getDataLayout())); |
6362 | Index = getValue(V: Ptr); |
6363 | IndexType = ISD::SIGNED_SCALED; |
6364 | Scale = |
6365 | DAG.getTargetConstant(Val: 1, DL: sdl, VT: TLI.getPointerTy(DL: DAG.getDataLayout())); |
6366 | } |
6367 | |
6368 | EVT IdxVT = Index.getValueType(); |
6369 | EVT EltTy = IdxVT.getVectorElementType(); |
6370 | if (TLI.shouldExtendGSIndex(VT: IdxVT, EltTy)) { |
6371 | EVT NewIdxVT = IdxVT.changeVectorElementType(EltVT: EltTy); |
6372 | Index = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: sdl, VT: NewIdxVT, Operand: Index); |
6373 | } |
6374 | |
6375 | SDValue ID = DAG.getTargetConstant(Val: IntrinsicID, DL: sdl, VT: MVT::i32); |
6376 | |
6377 | SDValue Ops[] = {Root, Inc, Mask, Base, Index, Scale, ID}; |
6378 | SDValue Histogram = DAG.getMaskedHistogram(VTs: DAG.getVTList(VT: MVT::Other), MemVT: VT, dl: sdl, |
6379 | Ops, MMO, IndexType); |
6380 | |
6381 | setValue(V: &I, NewN: Histogram); |
6382 | DAG.setRoot(Histogram); |
6383 | } |
6384 | |
6385 | /// Lower the call to the specified intrinsic function. |
6386 | void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, |
6387 | unsigned Intrinsic) { |
6388 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
6389 | SDLoc sdl = getCurSDLoc(); |
6390 | DebugLoc dl = getCurDebugLoc(); |
6391 | SDValue Res; |
6392 | |
6393 | SDNodeFlags Flags; |
6394 | if (auto *FPOp = dyn_cast<FPMathOperator>(Val: &I)) |
6395 | Flags.copyFMF(FPMO: *FPOp); |
6396 | |
6397 | switch (Intrinsic) { |
6398 | default: |
6399 | // By default, turn this into a target intrinsic node. |
6400 | visitTargetIntrinsic(I, Intrinsic); |
6401 | return; |
6402 | case Intrinsic::vscale: { |
6403 | EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
6404 | setValue(V: &I, NewN: DAG.getVScale(DL: sdl, VT, MulImm: APInt(VT.getSizeInBits(), 1))); |
6405 | return; |
6406 | } |
6407 | case Intrinsic::vastart: visitVAStart(I); return; |
6408 | case Intrinsic::vaend: visitVAEnd(I); return; |
6409 | case Intrinsic::vacopy: visitVACopy(I); return; |
6410 | case Intrinsic::returnaddress: |
6411 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::RETURNADDR, DL: sdl, |
6412 | VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()), |
6413 | Operand: getValue(V: I.getArgOperand(i: 0)))); |
6414 | return; |
6415 | case Intrinsic::addressofreturnaddress: |
6416 | setValue(V: &I, |
6417 | NewN: DAG.getNode(Opcode: ISD::ADDROFRETURNADDR, DL: sdl, |
6418 | VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()))); |
6419 | return; |
6420 | case Intrinsic::sponentry: |
6421 | setValue(V: &I, |
6422 | NewN: DAG.getNode(Opcode: ISD::SPONENTRY, DL: sdl, |
6423 | VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()))); |
6424 | return; |
6425 | case Intrinsic::frameaddress: |
6426 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FRAMEADDR, DL: sdl, |
6427 | VT: TLI.getFrameIndexTy(DL: DAG.getDataLayout()), |
6428 | Operand: getValue(V: I.getArgOperand(i: 0)))); |
6429 | return; |
6430 | case Intrinsic::read_volatile_register: |
6431 | case Intrinsic::read_register: { |
6432 | Value *Reg = I.getArgOperand(i: 0); |
6433 | SDValue Chain = getRoot(); |
6434 | SDValue RegName = |
6435 | DAG.getMDNode(MD: cast<MDNode>(Val: cast<MetadataAsValue>(Val: Reg)->getMetadata())); |
6436 | EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
6437 | Res = DAG.getNode(Opcode: ISD::READ_REGISTER, DL: sdl, |
6438 | VTList: DAG.getVTList(VT1: VT, VT2: MVT::Other), N1: Chain, N2: RegName); |
6439 | setValue(V: &I, NewN: Res); |
6440 | DAG.setRoot(Res.getValue(R: 1)); |
6441 | return; |
6442 | } |
6443 | case Intrinsic::write_register: { |
6444 | Value *Reg = I.getArgOperand(i: 0); |
6445 | Value *RegValue = I.getArgOperand(i: 1); |
6446 | SDValue Chain = getRoot(); |
6447 | SDValue RegName = |
6448 | DAG.getMDNode(MD: cast<MDNode>(Val: cast<MetadataAsValue>(Val: Reg)->getMetadata())); |
6449 | DAG.setRoot(DAG.getNode(Opcode: ISD::WRITE_REGISTER, DL: sdl, VT: MVT::Other, N1: Chain, |
6450 | N2: RegName, N3: getValue(V: RegValue))); |
6451 | return; |
6452 | } |
6453 | case Intrinsic::memcpy: { |
6454 | const auto &MCI = cast<MemCpyInst>(Val: I); |
6455 | SDValue Op1 = getValue(V: I.getArgOperand(i: 0)); |
6456 | SDValue Op2 = getValue(V: I.getArgOperand(i: 1)); |
6457 | SDValue Op3 = getValue(V: I.getArgOperand(i: 2)); |
6458 | // @llvm.memcpy defines 0 and 1 to both mean no alignment. |
6459 | Align DstAlign = MCI.getDestAlign().valueOrOne(); |
6460 | Align SrcAlign = MCI.getSourceAlign().valueOrOne(); |
6461 | Align Alignment = std::min(a: DstAlign, b: SrcAlign); |
6462 | bool isVol = MCI.isVolatile(); |
6463 | // FIXME: Support passing different dest/src alignments to the memcpy DAG |
6464 | // node. |
6465 | SDValue Root = isVol ? getRoot() : getMemoryRoot(); |
6466 | SDValue MC = DAG.getMemcpy(Chain: Root, dl: sdl, Dst: Op1, Src: Op2, Size: Op3, Alignment, isVol, |
6467 | /* AlwaysInline */ false, CI: &I, OverrideTailCall: std::nullopt, |
6468 | DstPtrInfo: MachinePointerInfo(I.getArgOperand(i: 0)), |
6469 | SrcPtrInfo: MachinePointerInfo(I.getArgOperand(i: 1)), |
6470 | AAInfo: I.getAAMetadata(), AA); |
6471 | updateDAGForMaybeTailCall(MaybeTC: MC); |
6472 | return; |
6473 | } |
6474 | case Intrinsic::memcpy_inline: { |
6475 | const auto &MCI = cast<MemCpyInlineInst>(Val: I); |
6476 | SDValue Dst = getValue(V: I.getArgOperand(i: 0)); |
6477 | SDValue Src = getValue(V: I.getArgOperand(i: 1)); |
6478 | SDValue Size = getValue(V: I.getArgOperand(i: 2)); |
6479 | assert(isa<ConstantSDNode>(Size) && "memcpy_inline needs constant size" ); |
6480 | // @llvm.memcpy.inline defines 0 and 1 to both mean no alignment. |
6481 | Align DstAlign = MCI.getDestAlign().valueOrOne(); |
6482 | Align SrcAlign = MCI.getSourceAlign().valueOrOne(); |
6483 | Align Alignment = std::min(a: DstAlign, b: SrcAlign); |
6484 | bool isVol = MCI.isVolatile(); |
6485 | // FIXME: Support passing different dest/src alignments to the memcpy DAG |
6486 | // node. |
6487 | SDValue MC = DAG.getMemcpy(Chain: getRoot(), dl: sdl, Dst, Src, Size, Alignment, isVol, |
6488 | /* AlwaysInline */ true, CI: &I, OverrideTailCall: std::nullopt, |
6489 | DstPtrInfo: MachinePointerInfo(I.getArgOperand(i: 0)), |
6490 | SrcPtrInfo: MachinePointerInfo(I.getArgOperand(i: 1)), |
6491 | AAInfo: I.getAAMetadata(), AA); |
6492 | updateDAGForMaybeTailCall(MaybeTC: MC); |
6493 | return; |
6494 | } |
6495 | case Intrinsic::memset: { |
6496 | const auto &MSI = cast<MemSetInst>(Val: I); |
6497 | SDValue Op1 = getValue(V: I.getArgOperand(i: 0)); |
6498 | SDValue Op2 = getValue(V: I.getArgOperand(i: 1)); |
6499 | SDValue Op3 = getValue(V: I.getArgOperand(i: 2)); |
6500 | // @llvm.memset defines 0 and 1 to both mean no alignment. |
6501 | Align Alignment = MSI.getDestAlign().valueOrOne(); |
6502 | bool isVol = MSI.isVolatile(); |
6503 | SDValue Root = isVol ? getRoot() : getMemoryRoot(); |
6504 | SDValue MS = DAG.getMemset( |
6505 | Chain: Root, dl: sdl, Dst: Op1, Src: Op2, Size: Op3, Alignment, isVol, /* AlwaysInline */ false, |
6506 | CI: &I, DstPtrInfo: MachinePointerInfo(I.getArgOperand(i: 0)), AAInfo: I.getAAMetadata()); |
6507 | updateDAGForMaybeTailCall(MaybeTC: MS); |
6508 | return; |
6509 | } |
6510 | case Intrinsic::memset_inline: { |
6511 | const auto &MSII = cast<MemSetInlineInst>(Val: I); |
6512 | SDValue Dst = getValue(V: I.getArgOperand(i: 0)); |
6513 | SDValue Value = getValue(V: I.getArgOperand(i: 1)); |
6514 | SDValue Size = getValue(V: I.getArgOperand(i: 2)); |
6515 | assert(isa<ConstantSDNode>(Size) && "memset_inline needs constant size" ); |
6516 | // @llvm.memset defines 0 and 1 to both mean no alignment. |
6517 | Align DstAlign = MSII.getDestAlign().valueOrOne(); |
6518 | bool isVol = MSII.isVolatile(); |
6519 | SDValue Root = isVol ? getRoot() : getMemoryRoot(); |
6520 | SDValue MC = DAG.getMemset(Chain: Root, dl: sdl, Dst, Src: Value, Size, Alignment: DstAlign, isVol, |
6521 | /* AlwaysInline */ true, CI: &I, |
6522 | DstPtrInfo: MachinePointerInfo(I.getArgOperand(i: 0)), |
6523 | AAInfo: I.getAAMetadata()); |
6524 | updateDAGForMaybeTailCall(MaybeTC: MC); |
6525 | return; |
6526 | } |
6527 | case Intrinsic::memmove: { |
6528 | const auto &MMI = cast<MemMoveInst>(Val: I); |
6529 | SDValue Op1 = getValue(V: I.getArgOperand(i: 0)); |
6530 | SDValue Op2 = getValue(V: I.getArgOperand(i: 1)); |
6531 | SDValue Op3 = getValue(V: I.getArgOperand(i: 2)); |
6532 | // @llvm.memmove defines 0 and 1 to both mean no alignment. |
6533 | Align DstAlign = MMI.getDestAlign().valueOrOne(); |
6534 | Align SrcAlign = MMI.getSourceAlign().valueOrOne(); |
6535 | Align Alignment = std::min(a: DstAlign, b: SrcAlign); |
6536 | bool isVol = MMI.isVolatile(); |
6537 | // FIXME: Support passing different dest/src alignments to the memmove DAG |
6538 | // node. |
6539 | SDValue Root = isVol ? getRoot() : getMemoryRoot(); |
6540 | SDValue MM = DAG.getMemmove(Chain: Root, dl: sdl, Dst: Op1, Src: Op2, Size: Op3, Alignment, isVol, CI: &I, |
6541 | /* OverrideTailCall */ std::nullopt, |
6542 | DstPtrInfo: MachinePointerInfo(I.getArgOperand(i: 0)), |
6543 | SrcPtrInfo: MachinePointerInfo(I.getArgOperand(i: 1)), |
6544 | AAInfo: I.getAAMetadata(), AA); |
6545 | updateDAGForMaybeTailCall(MaybeTC: MM); |
6546 | return; |
6547 | } |
6548 | case Intrinsic::memcpy_element_unordered_atomic: { |
6549 | const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(Val: I); |
6550 | SDValue Dst = getValue(V: MI.getRawDest()); |
6551 | SDValue Src = getValue(V: MI.getRawSource()); |
6552 | SDValue Length = getValue(V: MI.getLength()); |
6553 | |
6554 | Type *LengthTy = MI.getLength()->getType(); |
6555 | unsigned ElemSz = MI.getElementSizeInBytes(); |
6556 | bool isTC = I.isTailCall() && isInTailCallPosition(Call: I, TM: DAG.getTarget()); |
6557 | SDValue MC = |
6558 | DAG.getAtomicMemcpy(Chain: getRoot(), dl: sdl, Dst, Src, Size: Length, SizeTy: LengthTy, ElemSz, |
6559 | isTailCall: isTC, DstPtrInfo: MachinePointerInfo(MI.getRawDest()), |
6560 | SrcPtrInfo: MachinePointerInfo(MI.getRawSource())); |
6561 | updateDAGForMaybeTailCall(MaybeTC: MC); |
6562 | return; |
6563 | } |
6564 | case Intrinsic::memmove_element_unordered_atomic: { |
6565 | auto &MI = cast<AtomicMemMoveInst>(Val: I); |
6566 | SDValue Dst = getValue(V: MI.getRawDest()); |
6567 | SDValue Src = getValue(V: MI.getRawSource()); |
6568 | SDValue Length = getValue(V: MI.getLength()); |
6569 | |
6570 | Type *LengthTy = MI.getLength()->getType(); |
6571 | unsigned ElemSz = MI.getElementSizeInBytes(); |
6572 | bool isTC = I.isTailCall() && isInTailCallPosition(Call: I, TM: DAG.getTarget()); |
6573 | SDValue MC = |
6574 | DAG.getAtomicMemmove(Chain: getRoot(), dl: sdl, Dst, Src, Size: Length, SizeTy: LengthTy, ElemSz, |
6575 | isTailCall: isTC, DstPtrInfo: MachinePointerInfo(MI.getRawDest()), |
6576 | SrcPtrInfo: MachinePointerInfo(MI.getRawSource())); |
6577 | updateDAGForMaybeTailCall(MaybeTC: MC); |
6578 | return; |
6579 | } |
6580 | case Intrinsic::memset_element_unordered_atomic: { |
6581 | auto &MI = cast<AtomicMemSetInst>(Val: I); |
6582 | SDValue Dst = getValue(V: MI.getRawDest()); |
6583 | SDValue Val = getValue(V: MI.getValue()); |
6584 | SDValue Length = getValue(V: MI.getLength()); |
6585 | |
6586 | Type *LengthTy = MI.getLength()->getType(); |
6587 | unsigned ElemSz = MI.getElementSizeInBytes(); |
6588 | bool isTC = I.isTailCall() && isInTailCallPosition(Call: I, TM: DAG.getTarget()); |
6589 | SDValue MC = |
6590 | DAG.getAtomicMemset(Chain: getRoot(), dl: sdl, Dst, Value: Val, Size: Length, SizeTy: LengthTy, ElemSz, |
6591 | isTailCall: isTC, DstPtrInfo: MachinePointerInfo(MI.getRawDest())); |
6592 | updateDAGForMaybeTailCall(MaybeTC: MC); |
6593 | return; |
6594 | } |
6595 | case Intrinsic::call_preallocated_setup: { |
6596 | const CallBase *PreallocatedCall = FindPreallocatedCall(PreallocatedSetup: &I); |
6597 | SDValue SrcValue = DAG.getSrcValue(v: PreallocatedCall); |
6598 | SDValue Res = DAG.getNode(Opcode: ISD::PREALLOCATED_SETUP, DL: sdl, VT: MVT::Other, |
6599 | N1: getRoot(), N2: SrcValue); |
6600 | setValue(V: &I, NewN: Res); |
6601 | DAG.setRoot(Res); |
6602 | return; |
6603 | } |
6604 | case Intrinsic::call_preallocated_arg: { |
6605 | const CallBase *PreallocatedCall = FindPreallocatedCall(PreallocatedSetup: I.getOperand(i_nocapture: 0)); |
6606 | SDValue SrcValue = DAG.getSrcValue(v: PreallocatedCall); |
6607 | SDValue Ops[3]; |
6608 | Ops[0] = getRoot(); |
6609 | Ops[1] = SrcValue; |
6610 | Ops[2] = DAG.getTargetConstant(Val: *cast<ConstantInt>(Val: I.getArgOperand(i: 1)), DL: sdl, |
6611 | VT: MVT::i32); // arg index |
6612 | SDValue Res = DAG.getNode( |
6613 | Opcode: ISD::PREALLOCATED_ARG, DL: sdl, |
6614 | VTList: DAG.getVTList(VT1: TLI.getPointerTy(DL: DAG.getDataLayout()), VT2: MVT::Other), Ops); |
6615 | setValue(V: &I, NewN: Res); |
6616 | DAG.setRoot(Res.getValue(R: 1)); |
6617 | return; |
6618 | } |
6619 | case Intrinsic::dbg_declare: { |
6620 | const auto &DI = cast<DbgDeclareInst>(Val: I); |
6621 | // Debug intrinsics are handled separately in assignment tracking mode. |
6622 | // Some intrinsics are handled right after Argument lowering. |
6623 | if (AssignmentTrackingEnabled || |
6624 | FuncInfo.PreprocessedDbgDeclares.count(Ptr: &DI)) |
6625 | return; |
6626 | LLVM_DEBUG(dbgs() << "SelectionDAG visiting dbg_declare: " << DI << "\n" ); |
6627 | DILocalVariable *Variable = DI.getVariable(); |
6628 | DIExpression *Expression = DI.getExpression(); |
6629 | dropDanglingDebugInfo(Variable, Expr: Expression); |
6630 | // Assume dbg.declare can not currently use DIArgList, i.e. |
6631 | // it is non-variadic. |
6632 | assert(!DI.hasArgList() && "Only dbg.value should currently use DIArgList" ); |
6633 | handleDebugDeclare(Address: DI.getVariableLocationOp(OpIdx: 0), Variable, Expression, |
6634 | DL: DI.getDebugLoc()); |
6635 | return; |
6636 | } |
6637 | case Intrinsic::dbg_label: { |
6638 | const DbgLabelInst &DI = cast<DbgLabelInst>(Val: I); |
6639 | DILabel *Label = DI.getLabel(); |
6640 | assert(Label && "Missing label" ); |
6641 | |
6642 | SDDbgLabel *SDV; |
6643 | SDV = DAG.getDbgLabel(Label, DL: dl, O: SDNodeOrder); |
6644 | DAG.AddDbgLabel(DB: SDV); |
6645 | return; |
6646 | } |
6647 | case Intrinsic::dbg_assign: { |
6648 | // Debug intrinsics are handled separately in assignment tracking mode. |
6649 | if (AssignmentTrackingEnabled) |
6650 | return; |
6651 | // If assignment tracking hasn't been enabled then fall through and treat |
6652 | // the dbg.assign as a dbg.value. |
6653 | [[fallthrough]]; |
6654 | } |
6655 | case Intrinsic::dbg_value: { |
6656 | // Debug intrinsics are handled separately in assignment tracking mode. |
6657 | if (AssignmentTrackingEnabled) |
6658 | return; |
6659 | const DbgValueInst &DI = cast<DbgValueInst>(Val: I); |
6660 | assert(DI.getVariable() && "Missing variable" ); |
6661 | |
6662 | DILocalVariable *Variable = DI.getVariable(); |
6663 | DIExpression *Expression = DI.getExpression(); |
6664 | dropDanglingDebugInfo(Variable, Expr: Expression); |
6665 | |
6666 | if (DI.isKillLocation()) { |
6667 | handleKillDebugValue(Var: Variable, Expr: Expression, DbgLoc: DI.getDebugLoc(), Order: SDNodeOrder); |
6668 | return; |
6669 | } |
6670 | |
6671 | SmallVector<Value *, 4> Values(DI.getValues()); |
6672 | if (Values.empty()) |
6673 | return; |
6674 | |
6675 | bool IsVariadic = DI.hasArgList(); |
6676 | if (!handleDebugValue(Values, Var: Variable, Expr: Expression, DbgLoc: DI.getDebugLoc(), |
6677 | Order: SDNodeOrder, IsVariadic)) |
6678 | addDanglingDebugInfo(Values, Var: Variable, Expr: Expression, IsVariadic, |
6679 | DL: DI.getDebugLoc(), Order: SDNodeOrder); |
6680 | return; |
6681 | } |
6682 | |
6683 | case Intrinsic::eh_typeid_for: { |
6684 | // Find the type id for the given typeinfo. |
6685 | GlobalValue *GV = ExtractTypeInfo(V: I.getArgOperand(i: 0)); |
6686 | unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(TI: GV); |
6687 | Res = DAG.getConstant(Val: TypeID, DL: sdl, VT: MVT::i32); |
6688 | setValue(V: &I, NewN: Res); |
6689 | return; |
6690 | } |
6691 | |
6692 | case Intrinsic::eh_return_i32: |
6693 | case Intrinsic::eh_return_i64: |
6694 | DAG.getMachineFunction().setCallsEHReturn(true); |
6695 | DAG.setRoot(DAG.getNode(Opcode: ISD::EH_RETURN, DL: sdl, |
6696 | VT: MVT::Other, |
6697 | N1: getControlRoot(), |
6698 | N2: getValue(V: I.getArgOperand(i: 0)), |
6699 | N3: getValue(V: I.getArgOperand(i: 1)))); |
6700 | return; |
6701 | case Intrinsic::eh_unwind_init: |
6702 | DAG.getMachineFunction().setCallsUnwindInit(true); |
6703 | return; |
6704 | case Intrinsic::eh_dwarf_cfa: |
6705 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::EH_DWARF_CFA, DL: sdl, |
6706 | VT: TLI.getPointerTy(DL: DAG.getDataLayout()), |
6707 | Operand: getValue(V: I.getArgOperand(i: 0)))); |
6708 | return; |
6709 | case Intrinsic::eh_sjlj_callsite: { |
6710 | MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); |
6711 | ConstantInt *CI = cast<ConstantInt>(Val: I.getArgOperand(i: 0)); |
6712 | assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!" ); |
6713 | |
6714 | MMI.setCurrentCallSite(CI->getZExtValue()); |
6715 | return; |
6716 | } |
6717 | case Intrinsic::eh_sjlj_functioncontext: { |
6718 | // Get and store the index of the function context. |
6719 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
6720 | AllocaInst *FnCtx = |
6721 | cast<AllocaInst>(Val: I.getArgOperand(i: 0)->stripPointerCasts()); |
6722 | int FI = FuncInfo.StaticAllocaMap[FnCtx]; |
6723 | MFI.setFunctionContextIndex(FI); |
6724 | return; |
6725 | } |
6726 | case Intrinsic::eh_sjlj_setjmp: { |
6727 | SDValue Ops[2]; |
6728 | Ops[0] = getRoot(); |
6729 | Ops[1] = getValue(V: I.getArgOperand(i: 0)); |
6730 | SDValue Op = DAG.getNode(Opcode: ISD::EH_SJLJ_SETJMP, DL: sdl, |
6731 | VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other), Ops); |
6732 | setValue(V: &I, NewN: Op.getValue(R: 0)); |
6733 | DAG.setRoot(Op.getValue(R: 1)); |
6734 | return; |
6735 | } |
6736 | case Intrinsic::eh_sjlj_longjmp: |
6737 | DAG.setRoot(DAG.getNode(Opcode: ISD::EH_SJLJ_LONGJMP, DL: sdl, VT: MVT::Other, |
6738 | N1: getRoot(), N2: getValue(V: I.getArgOperand(i: 0)))); |
6739 | return; |
6740 | case Intrinsic::eh_sjlj_setup_dispatch: |
6741 | DAG.setRoot(DAG.getNode(Opcode: ISD::EH_SJLJ_SETUP_DISPATCH, DL: sdl, VT: MVT::Other, |
6742 | Operand: getRoot())); |
6743 | return; |
6744 | case Intrinsic::masked_gather: |
6745 | visitMaskedGather(I); |
6746 | return; |
6747 | case Intrinsic::masked_load: |
6748 | visitMaskedLoad(I); |
6749 | return; |
6750 | case Intrinsic::masked_scatter: |
6751 | visitMaskedScatter(I); |
6752 | return; |
6753 | case Intrinsic::masked_store: |
6754 | visitMaskedStore(I); |
6755 | return; |
6756 | case Intrinsic::masked_expandload: |
6757 | visitMaskedLoad(I, IsExpanding: true /* IsExpanding */); |
6758 | return; |
6759 | case Intrinsic::masked_compressstore: |
6760 | visitMaskedStore(I, IsCompressing: true /* IsCompressing */); |
6761 | return; |
6762 | case Intrinsic::powi: |
6763 | setValue(V: &I, NewN: ExpandPowI(DL: sdl, LHS: getValue(V: I.getArgOperand(i: 0)), |
6764 | RHS: getValue(V: I.getArgOperand(i: 1)), DAG)); |
6765 | return; |
6766 | case Intrinsic::log: |
6767 | setValue(V: &I, NewN: expandLog(dl: sdl, Op: getValue(V: I.getArgOperand(i: 0)), DAG, TLI, Flags)); |
6768 | return; |
6769 | case Intrinsic::log2: |
6770 | setValue(V: &I, |
6771 | NewN: expandLog2(dl: sdl, Op: getValue(V: I.getArgOperand(i: 0)), DAG, TLI, Flags)); |
6772 | return; |
6773 | case Intrinsic::log10: |
6774 | setValue(V: &I, |
6775 | NewN: expandLog10(dl: sdl, Op: getValue(V: I.getArgOperand(i: 0)), DAG, TLI, Flags)); |
6776 | return; |
6777 | case Intrinsic::exp: |
6778 | setValue(V: &I, NewN: expandExp(dl: sdl, Op: getValue(V: I.getArgOperand(i: 0)), DAG, TLI, Flags)); |
6779 | return; |
6780 | case Intrinsic::exp2: |
6781 | setValue(V: &I, |
6782 | NewN: expandExp2(dl: sdl, Op: getValue(V: I.getArgOperand(i: 0)), DAG, TLI, Flags)); |
6783 | return; |
6784 | case Intrinsic::pow: |
6785 | setValue(V: &I, NewN: expandPow(dl: sdl, LHS: getValue(V: I.getArgOperand(i: 0)), |
6786 | RHS: getValue(V: I.getArgOperand(i: 1)), DAG, TLI, Flags)); |
6787 | return; |
6788 | case Intrinsic::sqrt: |
6789 | case Intrinsic::fabs: |
6790 | case Intrinsic::sin: |
6791 | case Intrinsic::cos: |
6792 | case Intrinsic::tan: |
6793 | case Intrinsic::asin: |
6794 | case Intrinsic::acos: |
6795 | case Intrinsic::atan: |
6796 | case Intrinsic::sinh: |
6797 | case Intrinsic::cosh: |
6798 | case Intrinsic::tanh: |
6799 | case Intrinsic::exp10: |
6800 | case Intrinsic::floor: |
6801 | case Intrinsic::ceil: |
6802 | case Intrinsic::trunc: |
6803 | case Intrinsic::rint: |
6804 | case Intrinsic::nearbyint: |
6805 | case Intrinsic::round: |
6806 | case Intrinsic::roundeven: |
6807 | case Intrinsic::canonicalize: { |
6808 | unsigned Opcode; |
6809 | // clang-format off |
6810 | switch (Intrinsic) { |
6811 | default: llvm_unreachable("Impossible intrinsic" ); // Can't reach here. |
6812 | case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; |
6813 | case Intrinsic::fabs: Opcode = ISD::FABS; break; |
6814 | case Intrinsic::sin: Opcode = ISD::FSIN; break; |
6815 | case Intrinsic::cos: Opcode = ISD::FCOS; break; |
6816 | case Intrinsic::tan: Opcode = ISD::FTAN; break; |
6817 | case Intrinsic::asin: Opcode = ISD::FASIN; break; |
6818 | case Intrinsic::acos: Opcode = ISD::FACOS; break; |
6819 | case Intrinsic::atan: Opcode = ISD::FATAN; break; |
6820 | case Intrinsic::sinh: Opcode = ISD::FSINH; break; |
6821 | case Intrinsic::cosh: Opcode = ISD::FCOSH; break; |
6822 | case Intrinsic::tanh: Opcode = ISD::FTANH; break; |
6823 | case Intrinsic::exp10: Opcode = ISD::FEXP10; break; |
6824 | case Intrinsic::floor: Opcode = ISD::FFLOOR; break; |
6825 | case Intrinsic::ceil: Opcode = ISD::FCEIL; break; |
6826 | case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; |
6827 | case Intrinsic::rint: Opcode = ISD::FRINT; break; |
6828 | case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; |
6829 | case Intrinsic::round: Opcode = ISD::FROUND; break; |
6830 | case Intrinsic::roundeven: Opcode = ISD::FROUNDEVEN; break; |
6831 | case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break; |
6832 | } |
6833 | // clang-format on |
6834 | |
6835 | setValue(V: &I, NewN: DAG.getNode(Opcode, DL: sdl, |
6836 | VT: getValue(V: I.getArgOperand(i: 0)).getValueType(), |
6837 | Operand: getValue(V: I.getArgOperand(i: 0)), Flags)); |
6838 | return; |
6839 | } |
6840 | case Intrinsic::lround: |
6841 | case Intrinsic::llround: |
6842 | case Intrinsic::lrint: |
6843 | case Intrinsic::llrint: { |
6844 | unsigned Opcode; |
6845 | // clang-format off |
6846 | switch (Intrinsic) { |
6847 | default: llvm_unreachable("Impossible intrinsic" ); // Can't reach here. |
6848 | case Intrinsic::lround: Opcode = ISD::LROUND; break; |
6849 | case Intrinsic::llround: Opcode = ISD::LLROUND; break; |
6850 | case Intrinsic::lrint: Opcode = ISD::LRINT; break; |
6851 | case Intrinsic::llrint: Opcode = ISD::LLRINT; break; |
6852 | } |
6853 | // clang-format on |
6854 | |
6855 | EVT RetVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
6856 | setValue(V: &I, NewN: DAG.getNode(Opcode, DL: sdl, VT: RetVT, |
6857 | Operand: getValue(V: I.getArgOperand(i: 0)))); |
6858 | return; |
6859 | } |
6860 | case Intrinsic::minnum: |
6861 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FMINNUM, DL: sdl, |
6862 | VT: getValue(V: I.getArgOperand(i: 0)).getValueType(), |
6863 | N1: getValue(V: I.getArgOperand(i: 0)), |
6864 | N2: getValue(V: I.getArgOperand(i: 1)), Flags)); |
6865 | return; |
6866 | case Intrinsic::maxnum: |
6867 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FMAXNUM, DL: sdl, |
6868 | VT: getValue(V: I.getArgOperand(i: 0)).getValueType(), |
6869 | N1: getValue(V: I.getArgOperand(i: 0)), |
6870 | N2: getValue(V: I.getArgOperand(i: 1)), Flags)); |
6871 | return; |
6872 | case Intrinsic::minimum: |
6873 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FMINIMUM, DL: sdl, |
6874 | VT: getValue(V: I.getArgOperand(i: 0)).getValueType(), |
6875 | N1: getValue(V: I.getArgOperand(i: 0)), |
6876 | N2: getValue(V: I.getArgOperand(i: 1)), Flags)); |
6877 | return; |
6878 | case Intrinsic::maximum: |
6879 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FMAXIMUM, DL: sdl, |
6880 | VT: getValue(V: I.getArgOperand(i: 0)).getValueType(), |
6881 | N1: getValue(V: I.getArgOperand(i: 0)), |
6882 | N2: getValue(V: I.getArgOperand(i: 1)), Flags)); |
6883 | return; |
6884 | case Intrinsic::copysign: |
6885 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FCOPYSIGN, DL: sdl, |
6886 | VT: getValue(V: I.getArgOperand(i: 0)).getValueType(), |
6887 | N1: getValue(V: I.getArgOperand(i: 0)), |
6888 | N2: getValue(V: I.getArgOperand(i: 1)), Flags)); |
6889 | return; |
6890 | case Intrinsic::ldexp: |
6891 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FLDEXP, DL: sdl, |
6892 | VT: getValue(V: I.getArgOperand(i: 0)).getValueType(), |
6893 | N1: getValue(V: I.getArgOperand(i: 0)), |
6894 | N2: getValue(V: I.getArgOperand(i: 1)), Flags)); |
6895 | return; |
6896 | case Intrinsic::frexp: { |
6897 | SmallVector<EVT, 2> ValueVTs; |
6898 | ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: I.getType(), ValueVTs); |
6899 | SDVTList VTs = DAG.getVTList(VTs: ValueVTs); |
6900 | setValue(V: &I, |
6901 | NewN: DAG.getNode(Opcode: ISD::FFREXP, DL: sdl, VTList: VTs, N: getValue(V: I.getArgOperand(i: 0)))); |
6902 | return; |
6903 | } |
6904 | case Intrinsic::arithmetic_fence: { |
6905 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::ARITH_FENCE, DL: sdl, |
6906 | VT: getValue(V: I.getArgOperand(i: 0)).getValueType(), |
6907 | Operand: getValue(V: I.getArgOperand(i: 0)), Flags)); |
6908 | return; |
6909 | } |
6910 | case Intrinsic::fma: |
6911 | setValue(V: &I, NewN: DAG.getNode( |
6912 | Opcode: ISD::FMA, DL: sdl, VT: getValue(V: I.getArgOperand(i: 0)).getValueType(), |
6913 | N1: getValue(V: I.getArgOperand(i: 0)), N2: getValue(V: I.getArgOperand(i: 1)), |
6914 | N3: getValue(V: I.getArgOperand(i: 2)), Flags)); |
6915 | return; |
6916 | #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ |
6917 | case Intrinsic::INTRINSIC: |
6918 | #include "llvm/IR/ConstrainedOps.def" |
6919 | visitConstrainedFPIntrinsic(FPI: cast<ConstrainedFPIntrinsic>(Val: I)); |
6920 | return; |
6921 | #define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: |
6922 | #include "llvm/IR/VPIntrinsics.def" |
6923 | visitVectorPredicationIntrinsic(VPIntrin: cast<VPIntrinsic>(Val: I)); |
6924 | return; |
6925 | case Intrinsic::fptrunc_round: { |
6926 | // Get the last argument, the metadata and convert it to an integer in the |
6927 | // call |
6928 | Metadata *MD = cast<MetadataAsValue>(Val: I.getArgOperand(i: 1))->getMetadata(); |
6929 | std::optional<RoundingMode> RoundMode = |
6930 | convertStrToRoundingMode(cast<MDString>(Val: MD)->getString()); |
6931 | |
6932 | EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
6933 | |
6934 | // Propagate fast-math-flags from IR to node(s). |
6935 | SDNodeFlags Flags; |
6936 | Flags.copyFMF(FPMO: *cast<FPMathOperator>(Val: &I)); |
6937 | SelectionDAG::FlagInserter FlagsInserter(DAG, Flags); |
6938 | |
6939 | SDValue Result; |
6940 | Result = DAG.getNode( |
6941 | Opcode: ISD::FPTRUNC_ROUND, DL: sdl, VT, N1: getValue(V: I.getArgOperand(i: 0)), |
6942 | N2: DAG.getTargetConstant(Val: (int)*RoundMode, DL: sdl, |
6943 | VT: TLI.getPointerTy(DL: DAG.getDataLayout()))); |
6944 | setValue(V: &I, NewN: Result); |
6945 | |
6946 | return; |
6947 | } |
6948 | case Intrinsic::fmuladd: { |
6949 | EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
6950 | if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && |
6951 | TLI.isFMAFasterThanFMulAndFAdd(MF: DAG.getMachineFunction(), VT)) { |
6952 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FMA, DL: sdl, |
6953 | VT: getValue(V: I.getArgOperand(i: 0)).getValueType(), |
6954 | N1: getValue(V: I.getArgOperand(i: 0)), |
6955 | N2: getValue(V: I.getArgOperand(i: 1)), |
6956 | N3: getValue(V: I.getArgOperand(i: 2)), Flags)); |
6957 | } else { |
6958 | // TODO: Intrinsic calls should have fast-math-flags. |
6959 | SDValue Mul = DAG.getNode( |
6960 | Opcode: ISD::FMUL, DL: sdl, VT: getValue(V: I.getArgOperand(i: 0)).getValueType(), |
6961 | N1: getValue(V: I.getArgOperand(i: 0)), N2: getValue(V: I.getArgOperand(i: 1)), Flags); |
6962 | SDValue Add = DAG.getNode(Opcode: ISD::FADD, DL: sdl, |
6963 | VT: getValue(V: I.getArgOperand(i: 0)).getValueType(), |
6964 | N1: Mul, N2: getValue(V: I.getArgOperand(i: 2)), Flags); |
6965 | setValue(V: &I, NewN: Add); |
6966 | } |
6967 | return; |
6968 | } |
6969 | case Intrinsic::convert_to_fp16: |
6970 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::BITCAST, DL: sdl, VT: MVT::i16, |
6971 | Operand: DAG.getNode(Opcode: ISD::FP_ROUND, DL: sdl, VT: MVT::f16, |
6972 | N1: getValue(V: I.getArgOperand(i: 0)), |
6973 | N2: DAG.getTargetConstant(Val: 0, DL: sdl, |
6974 | VT: MVT::i32)))); |
6975 | return; |
6976 | case Intrinsic::convert_from_fp16: |
6977 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FP_EXTEND, DL: sdl, |
6978 | VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()), |
6979 | Operand: DAG.getNode(Opcode: ISD::BITCAST, DL: sdl, VT: MVT::f16, |
6980 | Operand: getValue(V: I.getArgOperand(i: 0))))); |
6981 | return; |
6982 | case Intrinsic::fptosi_sat: { |
6983 | EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
6984 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FP_TO_SINT_SAT, DL: sdl, VT, |
6985 | N1: getValue(V: I.getArgOperand(i: 0)), |
6986 | N2: DAG.getValueType(VT.getScalarType()))); |
6987 | return; |
6988 | } |
6989 | case Intrinsic::fptoui_sat: { |
6990 | EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
6991 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FP_TO_UINT_SAT, DL: sdl, VT, |
6992 | N1: getValue(V: I.getArgOperand(i: 0)), |
6993 | N2: DAG.getValueType(VT.getScalarType()))); |
6994 | return; |
6995 | } |
6996 | case Intrinsic::set_rounding: |
6997 | Res = DAG.getNode(Opcode: ISD::SET_ROUNDING, DL: sdl, VT: MVT::Other, |
6998 | Ops: {getRoot(), getValue(V: I.getArgOperand(i: 0))}); |
6999 | setValue(V: &I, NewN: Res); |
7000 | DAG.setRoot(Res.getValue(R: 0)); |
7001 | return; |
7002 | case Intrinsic::is_fpclass: { |
7003 | const DataLayout DLayout = DAG.getDataLayout(); |
7004 | EVT DestVT = TLI.getValueType(DL: DLayout, Ty: I.getType()); |
7005 | EVT ArgVT = TLI.getValueType(DL: DLayout, Ty: I.getArgOperand(i: 0)->getType()); |
7006 | FPClassTest Test = static_cast<FPClassTest>( |
7007 | cast<ConstantInt>(Val: I.getArgOperand(i: 1))->getZExtValue()); |
7008 | MachineFunction &MF = DAG.getMachineFunction(); |
7009 | const Function &F = MF.getFunction(); |
7010 | SDValue Op = getValue(V: I.getArgOperand(i: 0)); |
7011 | SDNodeFlags Flags; |
7012 | Flags.setNoFPExcept( |
7013 | !F.getAttributes().hasFnAttr(Kind: llvm::Attribute::StrictFP)); |
7014 | // If ISD::IS_FPCLASS should be expanded, do it right now, because the |
7015 | // expansion can use illegal types. Making expansion early allows |
7016 | // legalizing these types prior to selection. |
7017 | if (!TLI.isOperationLegalOrCustom(Op: ISD::IS_FPCLASS, VT: ArgVT)) { |
7018 | SDValue Result = TLI.expandIS_FPCLASS(ResultVT: DestVT, Op, Test, Flags, DL: sdl, DAG); |
7019 | setValue(V: &I, NewN: Result); |
7020 | return; |
7021 | } |
7022 | |
7023 | SDValue Check = DAG.getTargetConstant(Val: Test, DL: sdl, VT: MVT::i32); |
7024 | SDValue V = DAG.getNode(Opcode: ISD::IS_FPCLASS, DL: sdl, VT: DestVT, Ops: {Op, Check}, Flags); |
7025 | setValue(V: &I, NewN: V); |
7026 | return; |
7027 | } |
7028 | case Intrinsic::get_fpenv: { |
7029 | const DataLayout DLayout = DAG.getDataLayout(); |
7030 | EVT EnvVT = TLI.getValueType(DL: DLayout, Ty: I.getType()); |
7031 | Align TempAlign = DAG.getEVTAlign(MemoryVT: EnvVT); |
7032 | SDValue Chain = getRoot(); |
7033 | // Use GET_FPENV if it is legal or custom. Otherwise use memory-based node |
7034 | // and temporary storage in stack. |
7035 | if (TLI.isOperationLegalOrCustom(Op: ISD::GET_FPENV, VT: EnvVT)) { |
7036 | Res = DAG.getNode( |
7037 | Opcode: ISD::GET_FPENV, DL: sdl, |
7038 | VTList: DAG.getVTList(VT1: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()), |
7039 | VT2: MVT::Other), |
7040 | N: Chain); |
7041 | } else { |
7042 | SDValue Temp = DAG.CreateStackTemporary(VT: EnvVT, minAlign: TempAlign.value()); |
7043 | int SPFI = cast<FrameIndexSDNode>(Val: Temp.getNode())->getIndex(); |
7044 | auto MPI = |
7045 | MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: SPFI); |
7046 | MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( |
7047 | PtrInfo: MPI, F: MachineMemOperand::MOStore, Size: LocationSize::beforeOrAfterPointer(), |
7048 | BaseAlignment: TempAlign); |
7049 | Chain = DAG.getGetFPEnv(Chain, dl: sdl, Ptr: Temp, MemVT: EnvVT, MMO); |
7050 | Res = DAG.getLoad(VT: EnvVT, dl: sdl, Chain, Ptr: Temp, PtrInfo: MPI); |
7051 | } |
7052 | setValue(V: &I, NewN: Res); |
7053 | DAG.setRoot(Res.getValue(R: 1)); |
7054 | return; |
7055 | } |
7056 | case Intrinsic::set_fpenv: { |
7057 | const DataLayout DLayout = DAG.getDataLayout(); |
7058 | SDValue Env = getValue(V: I.getArgOperand(i: 0)); |
7059 | EVT EnvVT = Env.getValueType(); |
7060 | Align TempAlign = DAG.getEVTAlign(MemoryVT: EnvVT); |
7061 | SDValue Chain = getRoot(); |
7062 | // If SET_FPENV is custom or legal, use it. Otherwise use loading |
7063 | // environment from memory. |
7064 | if (TLI.isOperationLegalOrCustom(Op: ISD::SET_FPENV, VT: EnvVT)) { |
7065 | Chain = DAG.getNode(Opcode: ISD::SET_FPENV, DL: sdl, VT: MVT::Other, N1: Chain, N2: Env); |
7066 | } else { |
7067 | // Allocate space in stack, copy environment bits into it and use this |
7068 | // memory in SET_FPENV_MEM. |
7069 | SDValue Temp = DAG.CreateStackTemporary(VT: EnvVT, minAlign: TempAlign.value()); |
7070 | int SPFI = cast<FrameIndexSDNode>(Val: Temp.getNode())->getIndex(); |
7071 | auto MPI = |
7072 | MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: SPFI); |
7073 | Chain = DAG.getStore(Chain, dl: sdl, Val: Env, Ptr: Temp, PtrInfo: MPI, Alignment: TempAlign, |
7074 | MMOFlags: MachineMemOperand::MOStore); |
7075 | MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( |
7076 | PtrInfo: MPI, F: MachineMemOperand::MOLoad, Size: LocationSize::beforeOrAfterPointer(), |
7077 | BaseAlignment: TempAlign); |
7078 | Chain = DAG.getSetFPEnv(Chain, dl: sdl, Ptr: Temp, MemVT: EnvVT, MMO); |
7079 | } |
7080 | DAG.setRoot(Chain); |
7081 | return; |
7082 | } |
7083 | case Intrinsic::reset_fpenv: |
7084 | DAG.setRoot(DAG.getNode(Opcode: ISD::RESET_FPENV, DL: sdl, VT: MVT::Other, Operand: getRoot())); |
7085 | return; |
7086 | case Intrinsic::get_fpmode: |
7087 | Res = DAG.getNode( |
7088 | Opcode: ISD::GET_FPMODE, DL: sdl, |
7089 | VTList: DAG.getVTList(VT1: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()), |
7090 | VT2: MVT::Other), |
7091 | N: DAG.getRoot()); |
7092 | setValue(V: &I, NewN: Res); |
7093 | DAG.setRoot(Res.getValue(R: 1)); |
7094 | return; |
7095 | case Intrinsic::set_fpmode: |
7096 | Res = DAG.getNode(Opcode: ISD::SET_FPMODE, DL: sdl, VT: MVT::Other, N1: {DAG.getRoot()}, |
7097 | N2: getValue(V: I.getArgOperand(i: 0))); |
7098 | DAG.setRoot(Res); |
7099 | return; |
7100 | case Intrinsic::reset_fpmode: { |
7101 | Res = DAG.getNode(Opcode: ISD::RESET_FPMODE, DL: sdl, VT: MVT::Other, Operand: getRoot()); |
7102 | DAG.setRoot(Res); |
7103 | return; |
7104 | } |
7105 | case Intrinsic::pcmarker: { |
7106 | SDValue Tmp = getValue(V: I.getArgOperand(i: 0)); |
7107 | DAG.setRoot(DAG.getNode(Opcode: ISD::PCMARKER, DL: sdl, VT: MVT::Other, N1: getRoot(), N2: Tmp)); |
7108 | return; |
7109 | } |
7110 | case Intrinsic::readcyclecounter: { |
7111 | SDValue Op = getRoot(); |
7112 | Res = DAG.getNode(Opcode: ISD::READCYCLECOUNTER, DL: sdl, |
7113 | VTList: DAG.getVTList(VT1: MVT::i64, VT2: MVT::Other), N: Op); |
7114 | setValue(V: &I, NewN: Res); |
7115 | DAG.setRoot(Res.getValue(R: 1)); |
7116 | return; |
7117 | } |
7118 | case Intrinsic::readsteadycounter: { |
7119 | SDValue Op = getRoot(); |
7120 | Res = DAG.getNode(Opcode: ISD::READSTEADYCOUNTER, DL: sdl, |
7121 | VTList: DAG.getVTList(VT1: MVT::i64, VT2: MVT::Other), N: Op); |
7122 | setValue(V: &I, NewN: Res); |
7123 | DAG.setRoot(Res.getValue(R: 1)); |
7124 | return; |
7125 | } |
7126 | case Intrinsic::bitreverse: |
7127 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::BITREVERSE, DL: sdl, |
7128 | VT: getValue(V: I.getArgOperand(i: 0)).getValueType(), |
7129 | Operand: getValue(V: I.getArgOperand(i: 0)))); |
7130 | return; |
7131 | case Intrinsic::bswap: |
7132 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::BSWAP, DL: sdl, |
7133 | VT: getValue(V: I.getArgOperand(i: 0)).getValueType(), |
7134 | Operand: getValue(V: I.getArgOperand(i: 0)))); |
7135 | return; |
7136 | case Intrinsic::cttz: { |
7137 | SDValue Arg = getValue(V: I.getArgOperand(i: 0)); |
7138 | ConstantInt *CI = cast<ConstantInt>(Val: I.getArgOperand(i: 1)); |
7139 | EVT Ty = Arg.getValueType(); |
7140 | setValue(V: &I, NewN: DAG.getNode(Opcode: CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, |
7141 | DL: sdl, VT: Ty, Operand: Arg)); |
7142 | return; |
7143 | } |
7144 | case Intrinsic::ctlz: { |
7145 | SDValue Arg = getValue(V: I.getArgOperand(i: 0)); |
7146 | ConstantInt *CI = cast<ConstantInt>(Val: I.getArgOperand(i: 1)); |
7147 | EVT Ty = Arg.getValueType(); |
7148 | setValue(V: &I, NewN: DAG.getNode(Opcode: CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, |
7149 | DL: sdl, VT: Ty, Operand: Arg)); |
7150 | return; |
7151 | } |
7152 | case Intrinsic::ctpop: { |
7153 | SDValue Arg = getValue(V: I.getArgOperand(i: 0)); |
7154 | EVT Ty = Arg.getValueType(); |
7155 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::CTPOP, DL: sdl, VT: Ty, Operand: Arg)); |
7156 | return; |
7157 | } |
7158 | case Intrinsic::fshl: |
7159 | case Intrinsic::fshr: { |
7160 | bool IsFSHL = Intrinsic == Intrinsic::fshl; |
7161 | SDValue X = getValue(V: I.getArgOperand(i: 0)); |
7162 | SDValue Y = getValue(V: I.getArgOperand(i: 1)); |
7163 | SDValue Z = getValue(V: I.getArgOperand(i: 2)); |
7164 | EVT VT = X.getValueType(); |
7165 | |
7166 | if (X == Y) { |
7167 | auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR; |
7168 | setValue(V: &I, NewN: DAG.getNode(Opcode: RotateOpcode, DL: sdl, VT, N1: X, N2: Z)); |
7169 | } else { |
7170 | auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR; |
7171 | setValue(V: &I, NewN: DAG.getNode(Opcode: FunnelOpcode, DL: sdl, VT, N1: X, N2: Y, N3: Z)); |
7172 | } |
7173 | return; |
7174 | } |
7175 | case Intrinsic::sadd_sat: { |
7176 | SDValue Op1 = getValue(V: I.getArgOperand(i: 0)); |
7177 | SDValue Op2 = getValue(V: I.getArgOperand(i: 1)); |
7178 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SADDSAT, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2)); |
7179 | return; |
7180 | } |
7181 | case Intrinsic::uadd_sat: { |
7182 | SDValue Op1 = getValue(V: I.getArgOperand(i: 0)); |
7183 | SDValue Op2 = getValue(V: I.getArgOperand(i: 1)); |
7184 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::UADDSAT, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2)); |
7185 | return; |
7186 | } |
7187 | case Intrinsic::ssub_sat: { |
7188 | SDValue Op1 = getValue(V: I.getArgOperand(i: 0)); |
7189 | SDValue Op2 = getValue(V: I.getArgOperand(i: 1)); |
7190 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SSUBSAT, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2)); |
7191 | return; |
7192 | } |
7193 | case Intrinsic::usub_sat: { |
7194 | SDValue Op1 = getValue(V: I.getArgOperand(i: 0)); |
7195 | SDValue Op2 = getValue(V: I.getArgOperand(i: 1)); |
7196 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::USUBSAT, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2)); |
7197 | return; |
7198 | } |
7199 | case Intrinsic::sshl_sat: { |
7200 | SDValue Op1 = getValue(V: I.getArgOperand(i: 0)); |
7201 | SDValue Op2 = getValue(V: I.getArgOperand(i: 1)); |
7202 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SSHLSAT, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2)); |
7203 | return; |
7204 | } |
7205 | case Intrinsic::ushl_sat: { |
7206 | SDValue Op1 = getValue(V: I.getArgOperand(i: 0)); |
7207 | SDValue Op2 = getValue(V: I.getArgOperand(i: 1)); |
7208 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::USHLSAT, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2)); |
7209 | return; |
7210 | } |
7211 | case Intrinsic::smul_fix: |
7212 | case Intrinsic::umul_fix: |
7213 | case Intrinsic::smul_fix_sat: |
7214 | case Intrinsic::umul_fix_sat: { |
7215 | SDValue Op1 = getValue(V: I.getArgOperand(i: 0)); |
7216 | SDValue Op2 = getValue(V: I.getArgOperand(i: 1)); |
7217 | SDValue Op3 = getValue(V: I.getArgOperand(i: 2)); |
7218 | setValue(V: &I, NewN: DAG.getNode(Opcode: FixedPointIntrinsicToOpcode(Intrinsic), DL: sdl, |
7219 | VT: Op1.getValueType(), N1: Op1, N2: Op2, N3: Op3)); |
7220 | return; |
7221 | } |
7222 | case Intrinsic::sdiv_fix: |
7223 | case Intrinsic::udiv_fix: |
7224 | case Intrinsic::sdiv_fix_sat: |
7225 | case Intrinsic::udiv_fix_sat: { |
7226 | SDValue Op1 = getValue(V: I.getArgOperand(i: 0)); |
7227 | SDValue Op2 = getValue(V: I.getArgOperand(i: 1)); |
7228 | SDValue Op3 = getValue(V: I.getArgOperand(i: 2)); |
7229 | setValue(V: &I, NewN: expandDivFix(Opcode: FixedPointIntrinsicToOpcode(Intrinsic), DL: sdl, |
7230 | LHS: Op1, RHS: Op2, Scale: Op3, DAG, TLI)); |
7231 | return; |
7232 | } |
7233 | case Intrinsic::smax: { |
7234 | SDValue Op1 = getValue(V: I.getArgOperand(i: 0)); |
7235 | SDValue Op2 = getValue(V: I.getArgOperand(i: 1)); |
7236 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SMAX, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2)); |
7237 | return; |
7238 | } |
7239 | case Intrinsic::smin: { |
7240 | SDValue Op1 = getValue(V: I.getArgOperand(i: 0)); |
7241 | SDValue Op2 = getValue(V: I.getArgOperand(i: 1)); |
7242 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SMIN, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2)); |
7243 | return; |
7244 | } |
7245 | case Intrinsic::umax: { |
7246 | SDValue Op1 = getValue(V: I.getArgOperand(i: 0)); |
7247 | SDValue Op2 = getValue(V: I.getArgOperand(i: 1)); |
7248 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::UMAX, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2)); |
7249 | return; |
7250 | } |
7251 | case Intrinsic::umin: { |
7252 | SDValue Op1 = getValue(V: I.getArgOperand(i: 0)); |
7253 | SDValue Op2 = getValue(V: I.getArgOperand(i: 1)); |
7254 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::UMIN, DL: sdl, VT: Op1.getValueType(), N1: Op1, N2: Op2)); |
7255 | return; |
7256 | } |
7257 | case Intrinsic::abs: { |
7258 | // TODO: Preserve "int min is poison" arg in SDAG? |
7259 | SDValue Op1 = getValue(V: I.getArgOperand(i: 0)); |
7260 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::ABS, DL: sdl, VT: Op1.getValueType(), Operand: Op1)); |
7261 | return; |
7262 | } |
7263 | case Intrinsic::scmp: { |
7264 | SDValue Op1 = getValue(V: I.getArgOperand(i: 0)); |
7265 | SDValue Op2 = getValue(V: I.getArgOperand(i: 1)); |
7266 | EVT DestVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
7267 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::SCMP, DL: sdl, VT: DestVT, N1: Op1, N2: Op2)); |
7268 | break; |
7269 | } |
7270 | case Intrinsic::ucmp: { |
7271 | SDValue Op1 = getValue(V: I.getArgOperand(i: 0)); |
7272 | SDValue Op2 = getValue(V: I.getArgOperand(i: 1)); |
7273 | EVT DestVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
7274 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::UCMP, DL: sdl, VT: DestVT, N1: Op1, N2: Op2)); |
7275 | break; |
7276 | } |
7277 | case Intrinsic::stacksave: { |
7278 | SDValue Op = getRoot(); |
7279 | EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
7280 | Res = DAG.getNode(Opcode: ISD::STACKSAVE, DL: sdl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::Other), N: Op); |
7281 | setValue(V: &I, NewN: Res); |
7282 | DAG.setRoot(Res.getValue(R: 1)); |
7283 | return; |
7284 | } |
7285 | case Intrinsic::stackrestore: |
7286 | Res = getValue(V: I.getArgOperand(i: 0)); |
7287 | DAG.setRoot(DAG.getNode(Opcode: ISD::STACKRESTORE, DL: sdl, VT: MVT::Other, N1: getRoot(), N2: Res)); |
7288 | return; |
7289 | case Intrinsic::get_dynamic_area_offset: { |
7290 | SDValue Op = getRoot(); |
7291 | EVT PtrTy = TLI.getFrameIndexTy(DL: DAG.getDataLayout()); |
7292 | EVT ResTy = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
7293 | // Result type for @llvm.get.dynamic.area.offset should match PtrTy for |
7294 | // target. |
7295 | if (PtrTy.getFixedSizeInBits() < ResTy.getFixedSizeInBits()) |
7296 | report_fatal_error(reason: "Wrong result type for @llvm.get.dynamic.area.offset" |
7297 | " intrinsic!" ); |
7298 | Res = DAG.getNode(Opcode: ISD::GET_DYNAMIC_AREA_OFFSET, DL: sdl, VTList: DAG.getVTList(VT: ResTy), |
7299 | N: Op); |
7300 | DAG.setRoot(Op); |
7301 | setValue(V: &I, NewN: Res); |
7302 | return; |
7303 | } |
7304 | case Intrinsic::stackguard: { |
7305 | MachineFunction &MF = DAG.getMachineFunction(); |
7306 | const Module &M = *MF.getFunction().getParent(); |
7307 | EVT PtrTy = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
7308 | SDValue Chain = getRoot(); |
7309 | if (TLI.useLoadStackGuardNode()) { |
7310 | Res = getLoadStackGuard(DAG, DL: sdl, Chain); |
7311 | Res = DAG.getPtrExtOrTrunc(Op: Res, DL: sdl, VT: PtrTy); |
7312 | } else { |
7313 | const Value *Global = TLI.getSDagStackGuard(M); |
7314 | Align Align = DAG.getDataLayout().getPrefTypeAlign(Ty: Global->getType()); |
7315 | Res = DAG.getLoad(VT: PtrTy, dl: sdl, Chain, Ptr: getValue(V: Global), |
7316 | PtrInfo: MachinePointerInfo(Global, 0), Alignment: Align, |
7317 | MMOFlags: MachineMemOperand::MOVolatile); |
7318 | } |
7319 | if (TLI.useStackGuardXorFP()) |
7320 | Res = TLI.emitStackGuardXorFP(DAG, Val: Res, DL: sdl); |
7321 | DAG.setRoot(Chain); |
7322 | setValue(V: &I, NewN: Res); |
7323 | return; |
7324 | } |
7325 | case Intrinsic::stackprotector: { |
7326 | // Emit code into the DAG to store the stack guard onto the stack. |
7327 | MachineFunction &MF = DAG.getMachineFunction(); |
7328 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
7329 | SDValue Src, Chain = getRoot(); |
7330 | |
7331 | if (TLI.useLoadStackGuardNode()) |
7332 | Src = getLoadStackGuard(DAG, DL: sdl, Chain); |
7333 | else |
7334 | Src = getValue(V: I.getArgOperand(i: 0)); // The guard's value. |
7335 | |
7336 | AllocaInst *Slot = cast<AllocaInst>(Val: I.getArgOperand(i: 1)); |
7337 | |
7338 | int FI = FuncInfo.StaticAllocaMap[Slot]; |
7339 | MFI.setStackProtectorIndex(FI); |
7340 | EVT PtrTy = TLI.getFrameIndexTy(DL: DAG.getDataLayout()); |
7341 | |
7342 | SDValue FIN = DAG.getFrameIndex(FI, VT: PtrTy); |
7343 | |
7344 | // Store the stack protector onto the stack. |
7345 | Res = DAG.getStore( |
7346 | Chain, dl: sdl, Val: Src, Ptr: FIN, |
7347 | PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), |
7348 | Alignment: MaybeAlign(), MMOFlags: MachineMemOperand::MOVolatile); |
7349 | setValue(V: &I, NewN: Res); |
7350 | DAG.setRoot(Res); |
7351 | return; |
7352 | } |
7353 | case Intrinsic::objectsize: |
7354 | llvm_unreachable("llvm.objectsize.* should have been lowered already" ); |
7355 | |
7356 | case Intrinsic::is_constant: |
7357 | llvm_unreachable("llvm.is.constant.* should have been lowered already" ); |
7358 | |
7359 | case Intrinsic::annotation: |
7360 | case Intrinsic::ptr_annotation: |
7361 | case Intrinsic::launder_invariant_group: |
7362 | case Intrinsic::strip_invariant_group: |
7363 | // Drop the intrinsic, but forward the value |
7364 | setValue(V: &I, NewN: getValue(V: I.getOperand(i_nocapture: 0))); |
7365 | return; |
7366 | |
7367 | case Intrinsic::assume: |
7368 | case Intrinsic::experimental_noalias_scope_decl: |
7369 | case Intrinsic::var_annotation: |
7370 | case Intrinsic::sideeffect: |
7371 | // Discard annotate attributes, noalias scope declarations, assumptions, and |
7372 | // artificial side-effects. |
7373 | return; |
7374 | |
7375 | case Intrinsic::codeview_annotation: { |
7376 | // Emit a label associated with this metadata. |
7377 | MachineFunction &MF = DAG.getMachineFunction(); |
7378 | MCSymbol *Label = MF.getContext().createTempSymbol(Name: "annotation" , AlwaysAddSuffix: true); |
7379 | Metadata *MD = cast<MetadataAsValue>(Val: I.getArgOperand(i: 0))->getMetadata(); |
7380 | MF.addCodeViewAnnotation(Label, MD: cast<MDNode>(Val: MD)); |
7381 | Res = DAG.getLabelNode(Opcode: ISD::ANNOTATION_LABEL, dl: sdl, Root: getRoot(), Label); |
7382 | DAG.setRoot(Res); |
7383 | return; |
7384 | } |
7385 | |
7386 | case Intrinsic::init_trampoline: { |
7387 | const Function *F = cast<Function>(Val: I.getArgOperand(i: 1)->stripPointerCasts()); |
7388 | |
7389 | SDValue Ops[6]; |
7390 | Ops[0] = getRoot(); |
7391 | Ops[1] = getValue(V: I.getArgOperand(i: 0)); |
7392 | Ops[2] = getValue(V: I.getArgOperand(i: 1)); |
7393 | Ops[3] = getValue(V: I.getArgOperand(i: 2)); |
7394 | Ops[4] = DAG.getSrcValue(v: I.getArgOperand(i: 0)); |
7395 | Ops[5] = DAG.getSrcValue(v: F); |
7396 | |
7397 | Res = DAG.getNode(Opcode: ISD::INIT_TRAMPOLINE, DL: sdl, VT: MVT::Other, Ops); |
7398 | |
7399 | DAG.setRoot(Res); |
7400 | return; |
7401 | } |
7402 | case Intrinsic::adjust_trampoline: |
7403 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::ADJUST_TRAMPOLINE, DL: sdl, |
7404 | VT: TLI.getPointerTy(DL: DAG.getDataLayout()), |
7405 | Operand: getValue(V: I.getArgOperand(i: 0)))); |
7406 | return; |
7407 | case Intrinsic::gcroot: { |
7408 | assert(DAG.getMachineFunction().getFunction().hasGC() && |
7409 | "only valid in functions with gc specified, enforced by Verifier" ); |
7410 | assert(GFI && "implied by previous" ); |
7411 | const Value *Alloca = I.getArgOperand(i: 0)->stripPointerCasts(); |
7412 | const Constant *TypeMap = cast<Constant>(Val: I.getArgOperand(i: 1)); |
7413 | |
7414 | FrameIndexSDNode *FI = cast<FrameIndexSDNode>(Val: getValue(V: Alloca).getNode()); |
7415 | GFI->addStackRoot(Num: FI->getIndex(), Metadata: TypeMap); |
7416 | return; |
7417 | } |
7418 | case Intrinsic::gcread: |
7419 | case Intrinsic::gcwrite: |
7420 | llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!" ); |
7421 | case Intrinsic::get_rounding: |
7422 | Res = DAG.getNode(Opcode: ISD::GET_ROUNDING, DL: sdl, ResultTys: {MVT::i32, MVT::Other}, Ops: getRoot()); |
7423 | setValue(V: &I, NewN: Res); |
7424 | DAG.setRoot(Res.getValue(R: 1)); |
7425 | return; |
7426 | |
7427 | case Intrinsic::expect: |
7428 | // Just replace __builtin_expect(exp, c) with EXP. |
7429 | setValue(V: &I, NewN: getValue(V: I.getArgOperand(i: 0))); |
7430 | return; |
7431 | |
7432 | case Intrinsic::ubsantrap: |
7433 | case Intrinsic::debugtrap: |
7434 | case Intrinsic::trap: { |
7435 | StringRef TrapFuncName = |
7436 | I.getAttributes().getFnAttr(Kind: "trap-func-name" ).getValueAsString(); |
7437 | if (TrapFuncName.empty()) { |
7438 | switch (Intrinsic) { |
7439 | case Intrinsic::trap: |
7440 | DAG.setRoot(DAG.getNode(Opcode: ISD::TRAP, DL: sdl, VT: MVT::Other, Operand: getRoot())); |
7441 | break; |
7442 | case Intrinsic::debugtrap: |
7443 | DAG.setRoot(DAG.getNode(Opcode: ISD::DEBUGTRAP, DL: sdl, VT: MVT::Other, Operand: getRoot())); |
7444 | break; |
7445 | case Intrinsic::ubsantrap: |
7446 | DAG.setRoot(DAG.getNode( |
7447 | Opcode: ISD::UBSANTRAP, DL: sdl, VT: MVT::Other, N1: getRoot(), |
7448 | N2: DAG.getTargetConstant( |
7449 | Val: cast<ConstantInt>(Val: I.getArgOperand(i: 0))->getZExtValue(), DL: sdl, |
7450 | VT: MVT::i32))); |
7451 | break; |
7452 | default: llvm_unreachable("unknown trap intrinsic" ); |
7453 | } |
7454 | return; |
7455 | } |
7456 | TargetLowering::ArgListTy Args; |
7457 | if (Intrinsic == Intrinsic::ubsantrap) { |
7458 | Args.push_back(x: TargetLoweringBase::ArgListEntry()); |
7459 | Args[0].Val = I.getArgOperand(i: 0); |
7460 | Args[0].Node = getValue(V: Args[0].Val); |
7461 | Args[0].Ty = Args[0].Val->getType(); |
7462 | } |
7463 | |
7464 | TargetLowering::CallLoweringInfo CLI(DAG); |
7465 | CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( |
7466 | CC: CallingConv::C, ResultType: I.getType(), |
7467 | Target: DAG.getExternalSymbol(Sym: TrapFuncName.data(), |
7468 | VT: TLI.getPointerTy(DL: DAG.getDataLayout())), |
7469 | ArgsList: std::move(Args)); |
7470 | |
7471 | std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); |
7472 | DAG.setRoot(Result.second); |
7473 | return; |
7474 | } |
7475 | |
7476 | case Intrinsic::allow_runtime_check: |
7477 | case Intrinsic::allow_ubsan_check: |
7478 | setValue(V: &I, NewN: getValue(V: ConstantInt::getTrue(Ty: I.getType()))); |
7479 | return; |
7480 | |
7481 | case Intrinsic::uadd_with_overflow: |
7482 | case Intrinsic::sadd_with_overflow: |
7483 | case Intrinsic::usub_with_overflow: |
7484 | case Intrinsic::ssub_with_overflow: |
7485 | case Intrinsic::umul_with_overflow: |
7486 | case Intrinsic::smul_with_overflow: { |
7487 | ISD::NodeType Op; |
7488 | switch (Intrinsic) { |
7489 | default: llvm_unreachable("Impossible intrinsic" ); // Can't reach here. |
7490 | case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break; |
7491 | case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break; |
7492 | case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break; |
7493 | case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break; |
7494 | case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break; |
7495 | case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break; |
7496 | } |
7497 | SDValue Op1 = getValue(V: I.getArgOperand(i: 0)); |
7498 | SDValue Op2 = getValue(V: I.getArgOperand(i: 1)); |
7499 | |
7500 | EVT ResultVT = Op1.getValueType(); |
7501 | EVT OverflowVT = MVT::i1; |
7502 | if (ResultVT.isVector()) |
7503 | OverflowVT = EVT::getVectorVT( |
7504 | Context&: *Context, VT: OverflowVT, EC: ResultVT.getVectorElementCount()); |
7505 | |
7506 | SDVTList VTs = DAG.getVTList(VT1: ResultVT, VT2: OverflowVT); |
7507 | setValue(V: &I, NewN: DAG.getNode(Opcode: Op, DL: sdl, VTList: VTs, N1: Op1, N2: Op2)); |
7508 | return; |
7509 | } |
7510 | case Intrinsic::prefetch: { |
7511 | SDValue Ops[5]; |
7512 | unsigned rw = cast<ConstantInt>(Val: I.getArgOperand(i: 1))->getZExtValue(); |
7513 | auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore; |
7514 | Ops[0] = DAG.getRoot(); |
7515 | Ops[1] = getValue(V: I.getArgOperand(i: 0)); |
7516 | Ops[2] = DAG.getTargetConstant(Val: *cast<ConstantInt>(Val: I.getArgOperand(i: 1)), DL: sdl, |
7517 | VT: MVT::i32); |
7518 | Ops[3] = DAG.getTargetConstant(Val: *cast<ConstantInt>(Val: I.getArgOperand(i: 2)), DL: sdl, |
7519 | VT: MVT::i32); |
7520 | Ops[4] = DAG.getTargetConstant(Val: *cast<ConstantInt>(Val: I.getArgOperand(i: 3)), DL: sdl, |
7521 | VT: MVT::i32); |
7522 | SDValue Result = DAG.getMemIntrinsicNode( |
7523 | Opcode: ISD::PREFETCH, dl: sdl, VTList: DAG.getVTList(VT: MVT::Other), Ops, |
7524 | MemVT: EVT::getIntegerVT(Context&: *Context, BitWidth: 8), PtrInfo: MachinePointerInfo(I.getArgOperand(i: 0)), |
7525 | /* align */ Alignment: std::nullopt, Flags); |
7526 | |
7527 | // Chain the prefetch in parallel with any pending loads, to stay out of |
7528 | // the way of later optimizations. |
7529 | PendingLoads.push_back(Elt: Result); |
7530 | Result = getRoot(); |
7531 | DAG.setRoot(Result); |
7532 | return; |
7533 | } |
7534 | case Intrinsic::lifetime_start: |
7535 | case Intrinsic::lifetime_end: { |
7536 | bool IsStart = (Intrinsic == Intrinsic::lifetime_start); |
7537 | // Stack coloring is not enabled in O0, discard region information. |
7538 | if (TM.getOptLevel() == CodeGenOptLevel::None) |
7539 | return; |
7540 | |
7541 | const int64_t ObjectSize = |
7542 | cast<ConstantInt>(Val: I.getArgOperand(i: 0))->getSExtValue(); |
7543 | Value *const ObjectPtr = I.getArgOperand(i: 1); |
7544 | SmallVector<const Value *, 4> Allocas; |
7545 | getUnderlyingObjects(V: ObjectPtr, Objects&: Allocas); |
7546 | |
7547 | for (const Value *Alloca : Allocas) { |
7548 | const AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(Val: Alloca); |
7549 | |
7550 | // Could not find an Alloca. |
7551 | if (!LifetimeObject) |
7552 | continue; |
7553 | |
7554 | // First check that the Alloca is static, otherwise it won't have a |
7555 | // valid frame index. |
7556 | auto SI = FuncInfo.StaticAllocaMap.find(Val: LifetimeObject); |
7557 | if (SI == FuncInfo.StaticAllocaMap.end()) |
7558 | return; |
7559 | |
7560 | const int FrameIndex = SI->second; |
7561 | int64_t Offset; |
7562 | if (GetPointerBaseWithConstantOffset( |
7563 | Ptr: ObjectPtr, Offset, DL: DAG.getDataLayout()) != LifetimeObject) |
7564 | Offset = -1; // Cannot determine offset from alloca to lifetime object. |
7565 | Res = DAG.getLifetimeNode(IsStart, dl: sdl, Chain: getRoot(), FrameIndex, Size: ObjectSize, |
7566 | Offset); |
7567 | DAG.setRoot(Res); |
7568 | } |
7569 | return; |
7570 | } |
7571 | case Intrinsic::pseudoprobe: { |
7572 | auto Guid = cast<ConstantInt>(Val: I.getArgOperand(i: 0))->getZExtValue(); |
7573 | auto Index = cast<ConstantInt>(Val: I.getArgOperand(i: 1))->getZExtValue(); |
7574 | auto Attr = cast<ConstantInt>(Val: I.getArgOperand(i: 2))->getZExtValue(); |
7575 | Res = DAG.getPseudoProbeNode(Dl: sdl, Chain: getRoot(), Guid, Index, Attr); |
7576 | DAG.setRoot(Res); |
7577 | return; |
7578 | } |
7579 | case Intrinsic::invariant_start: |
7580 | // Discard region information. |
7581 | setValue(V: &I, |
7582 | NewN: DAG.getUNDEF(VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()))); |
7583 | return; |
7584 | case Intrinsic::invariant_end: |
7585 | // Discard region information. |
7586 | return; |
7587 | case Intrinsic::clear_cache: { |
7588 | SDValue InputChain = DAG.getRoot(); |
7589 | SDValue StartVal = getValue(V: I.getArgOperand(i: 0)); |
7590 | SDValue EndVal = getValue(V: I.getArgOperand(i: 1)); |
7591 | Res = DAG.getNode(Opcode: ISD::CLEAR_CACHE, DL: sdl, VTList: DAG.getVTList(VT: MVT::Other), |
7592 | Ops: {InputChain, StartVal, EndVal}); |
7593 | setValue(V: &I, NewN: Res); |
7594 | DAG.setRoot(Res); |
7595 | return; |
7596 | } |
7597 | case Intrinsic::donothing: |
7598 | case Intrinsic::seh_try_begin: |
7599 | case Intrinsic::seh_scope_begin: |
7600 | case Intrinsic::seh_try_end: |
7601 | case Intrinsic::seh_scope_end: |
7602 | // ignore |
7603 | return; |
7604 | case Intrinsic::experimental_stackmap: |
7605 | visitStackmap(I); |
7606 | return; |
7607 | case Intrinsic::experimental_patchpoint_void: |
7608 | case Intrinsic::experimental_patchpoint: |
7609 | visitPatchpoint(CB: I); |
7610 | return; |
7611 | case Intrinsic::experimental_gc_statepoint: |
7612 | LowerStatepoint(I: cast<GCStatepointInst>(Val: I)); |
7613 | return; |
7614 | case Intrinsic::experimental_gc_result: |
7615 | visitGCResult(I: cast<GCResultInst>(Val: I)); |
7616 | return; |
7617 | case Intrinsic::experimental_gc_relocate: |
7618 | visitGCRelocate(Relocate: cast<GCRelocateInst>(Val: I)); |
7619 | return; |
7620 | case Intrinsic::instrprof_cover: |
7621 | llvm_unreachable("instrprof failed to lower a cover" ); |
7622 | case Intrinsic::instrprof_increment: |
7623 | llvm_unreachable("instrprof failed to lower an increment" ); |
7624 | case Intrinsic::instrprof_timestamp: |
7625 | llvm_unreachable("instrprof failed to lower a timestamp" ); |
7626 | case Intrinsic::instrprof_value_profile: |
7627 | llvm_unreachable("instrprof failed to lower a value profiling call" ); |
7628 | case Intrinsic::instrprof_mcdc_parameters: |
7629 | llvm_unreachable("instrprof failed to lower mcdc parameters" ); |
7630 | case Intrinsic::instrprof_mcdc_tvbitmap_update: |
7631 | llvm_unreachable("instrprof failed to lower an mcdc tvbitmap update" ); |
7632 | case Intrinsic::localescape: { |
7633 | MachineFunction &MF = DAG.getMachineFunction(); |
7634 | const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); |
7635 | |
7636 | // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission |
7637 | // is the same on all targets. |
7638 | for (unsigned Idx = 0, E = I.arg_size(); Idx < E; ++Idx) { |
7639 | Value *Arg = I.getArgOperand(i: Idx)->stripPointerCasts(); |
7640 | if (isa<ConstantPointerNull>(Val: Arg)) |
7641 | continue; // Skip null pointers. They represent a hole in index space. |
7642 | AllocaInst *Slot = cast<AllocaInst>(Val: Arg); |
7643 | assert(FuncInfo.StaticAllocaMap.count(Slot) && |
7644 | "can only escape static allocas" ); |
7645 | int FI = FuncInfo.StaticAllocaMap[Slot]; |
7646 | MCSymbol *FrameAllocSym = MF.getContext().getOrCreateFrameAllocSymbol( |
7647 | FuncName: GlobalValue::dropLLVMManglingEscape(Name: MF.getName()), Idx); |
7648 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD: dl, |
7649 | MCID: TII->get(Opcode: TargetOpcode::LOCAL_ESCAPE)) |
7650 | .addSym(Sym: FrameAllocSym) |
7651 | .addFrameIndex(Idx: FI); |
7652 | } |
7653 | |
7654 | return; |
7655 | } |
7656 | |
7657 | case Intrinsic::localrecover: { |
7658 | // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx) |
7659 | MachineFunction &MF = DAG.getMachineFunction(); |
7660 | |
7661 | // Get the symbol that defines the frame offset. |
7662 | auto *Fn = cast<Function>(Val: I.getArgOperand(i: 0)->stripPointerCasts()); |
7663 | auto *Idx = cast<ConstantInt>(Val: I.getArgOperand(i: 2)); |
7664 | unsigned IdxVal = |
7665 | unsigned(Idx->getLimitedValue(Limit: std::numeric_limits<int>::max())); |
7666 | MCSymbol *FrameAllocSym = MF.getContext().getOrCreateFrameAllocSymbol( |
7667 | FuncName: GlobalValue::dropLLVMManglingEscape(Name: Fn->getName()), Idx: IdxVal); |
7668 | |
7669 | Value *FP = I.getArgOperand(i: 1); |
7670 | SDValue FPVal = getValue(V: FP); |
7671 | EVT PtrVT = FPVal.getValueType(); |
7672 | |
7673 | // Create a MCSymbol for the label to avoid any target lowering |
7674 | // that would make this PC relative. |
7675 | SDValue OffsetSym = DAG.getMCSymbol(Sym: FrameAllocSym, VT: PtrVT); |
7676 | SDValue OffsetVal = |
7677 | DAG.getNode(Opcode: ISD::LOCAL_RECOVER, DL: sdl, VT: PtrVT, Operand: OffsetSym); |
7678 | |
7679 | // Add the offset to the FP. |
7680 | SDValue Add = DAG.getMemBasePlusOffset(Base: FPVal, Offset: OffsetVal, DL: sdl); |
7681 | setValue(V: &I, NewN: Add); |
7682 | |
7683 | return; |
7684 | } |
7685 | |
7686 | case Intrinsic::eh_exceptionpointer: |
7687 | case Intrinsic::eh_exceptioncode: { |
7688 | // Get the exception pointer vreg, copy from it, and resize it to fit. |
7689 | const auto *CPI = cast<CatchPadInst>(Val: I.getArgOperand(i: 0)); |
7690 | MVT PtrVT = TLI.getPointerTy(DL: DAG.getDataLayout()); |
7691 | const TargetRegisterClass *PtrRC = TLI.getRegClassFor(VT: PtrVT); |
7692 | unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, RC: PtrRC); |
7693 | SDValue N = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: sdl, Reg: VReg, VT: PtrVT); |
7694 | if (Intrinsic == Intrinsic::eh_exceptioncode) |
7695 | N = DAG.getZExtOrTrunc(Op: N, DL: sdl, VT: MVT::i32); |
7696 | setValue(V: &I, NewN: N); |
7697 | return; |
7698 | } |
7699 | case Intrinsic::xray_customevent: { |
7700 | // Here we want to make sure that the intrinsic behaves as if it has a |
7701 | // specific calling convention. |
7702 | const auto &Triple = DAG.getTarget().getTargetTriple(); |
7703 | if (!Triple.isAArch64(PointerWidth: 64) && Triple.getArch() != Triple::x86_64) |
7704 | return; |
7705 | |
7706 | SmallVector<SDValue, 8> Ops; |
7707 | |
7708 | // We want to say that we always want the arguments in registers. |
7709 | SDValue LogEntryVal = getValue(V: I.getArgOperand(i: 0)); |
7710 | SDValue StrSizeVal = getValue(V: I.getArgOperand(i: 1)); |
7711 | SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue); |
7712 | SDValue Chain = getRoot(); |
7713 | Ops.push_back(Elt: LogEntryVal); |
7714 | Ops.push_back(Elt: StrSizeVal); |
7715 | Ops.push_back(Elt: Chain); |
7716 | |
7717 | // We need to enforce the calling convention for the callsite, so that |
7718 | // argument ordering is enforced correctly, and that register allocation can |
7719 | // see that some registers may be assumed clobbered and have to preserve |
7720 | // them across calls to the intrinsic. |
7721 | MachineSDNode *MN = DAG.getMachineNode(Opcode: TargetOpcode::PATCHABLE_EVENT_CALL, |
7722 | dl: sdl, VTs: NodeTys, Ops); |
7723 | SDValue patchableNode = SDValue(MN, 0); |
7724 | DAG.setRoot(patchableNode); |
7725 | setValue(V: &I, NewN: patchableNode); |
7726 | return; |
7727 | } |
7728 | case Intrinsic::xray_typedevent: { |
7729 | // Here we want to make sure that the intrinsic behaves as if it has a |
7730 | // specific calling convention. |
7731 | const auto &Triple = DAG.getTarget().getTargetTriple(); |
7732 | if (!Triple.isAArch64(PointerWidth: 64) && Triple.getArch() != Triple::x86_64) |
7733 | return; |
7734 | |
7735 | SmallVector<SDValue, 8> Ops; |
7736 | |
7737 | // We want to say that we always want the arguments in registers. |
7738 | // It's unclear to me how manipulating the selection DAG here forces callers |
7739 | // to provide arguments in registers instead of on the stack. |
7740 | SDValue LogTypeId = getValue(V: I.getArgOperand(i: 0)); |
7741 | SDValue LogEntryVal = getValue(V: I.getArgOperand(i: 1)); |
7742 | SDValue StrSizeVal = getValue(V: I.getArgOperand(i: 2)); |
7743 | SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue); |
7744 | SDValue Chain = getRoot(); |
7745 | Ops.push_back(Elt: LogTypeId); |
7746 | Ops.push_back(Elt: LogEntryVal); |
7747 | Ops.push_back(Elt: StrSizeVal); |
7748 | Ops.push_back(Elt: Chain); |
7749 | |
7750 | // We need to enforce the calling convention for the callsite, so that |
7751 | // argument ordering is enforced correctly, and that register allocation can |
7752 | // see that some registers may be assumed clobbered and have to preserve |
7753 | // them across calls to the intrinsic. |
7754 | MachineSDNode *MN = DAG.getMachineNode( |
7755 | Opcode: TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, dl: sdl, VTs: NodeTys, Ops); |
7756 | SDValue patchableNode = SDValue(MN, 0); |
7757 | DAG.setRoot(patchableNode); |
7758 | setValue(V: &I, NewN: patchableNode); |
7759 | return; |
7760 | } |
7761 | case Intrinsic::experimental_deoptimize: |
7762 | LowerDeoptimizeCall(CI: &I); |
7763 | return; |
7764 | case Intrinsic::experimental_stepvector: |
7765 | visitStepVector(I); |
7766 | return; |
7767 | case Intrinsic::vector_reduce_fadd: |
7768 | case Intrinsic::vector_reduce_fmul: |
7769 | case Intrinsic::vector_reduce_add: |
7770 | case Intrinsic::vector_reduce_mul: |
7771 | case Intrinsic::vector_reduce_and: |
7772 | case Intrinsic::vector_reduce_or: |
7773 | case Intrinsic::vector_reduce_xor: |
7774 | case Intrinsic::vector_reduce_smax: |
7775 | case Intrinsic::vector_reduce_smin: |
7776 | case Intrinsic::vector_reduce_umax: |
7777 | case Intrinsic::vector_reduce_umin: |
7778 | case Intrinsic::vector_reduce_fmax: |
7779 | case Intrinsic::vector_reduce_fmin: |
7780 | case Intrinsic::vector_reduce_fmaximum: |
7781 | case Intrinsic::vector_reduce_fminimum: |
7782 | visitVectorReduce(I, Intrinsic); |
7783 | return; |
7784 | |
7785 | case Intrinsic::icall_branch_funnel: { |
7786 | SmallVector<SDValue, 16> Ops; |
7787 | Ops.push_back(Elt: getValue(V: I.getArgOperand(i: 0))); |
7788 | |
7789 | int64_t Offset; |
7790 | auto *Base = dyn_cast<GlobalObject>(Val: GetPointerBaseWithConstantOffset( |
7791 | Ptr: I.getArgOperand(i: 1), Offset, DL: DAG.getDataLayout())); |
7792 | if (!Base) |
7793 | report_fatal_error( |
7794 | reason: "llvm.icall.branch.funnel operand must be a GlobalValue" ); |
7795 | Ops.push_back(Elt: DAG.getTargetGlobalAddress(GV: Base, DL: sdl, VT: MVT::i64, offset: 0)); |
7796 | |
7797 | struct BranchFunnelTarget { |
7798 | int64_t Offset; |
7799 | SDValue Target; |
7800 | }; |
7801 | SmallVector<BranchFunnelTarget, 8> Targets; |
7802 | |
7803 | for (unsigned Op = 1, N = I.arg_size(); Op != N; Op += 2) { |
7804 | auto *ElemBase = dyn_cast<GlobalObject>(Val: GetPointerBaseWithConstantOffset( |
7805 | Ptr: I.getArgOperand(i: Op), Offset, DL: DAG.getDataLayout())); |
7806 | if (ElemBase != Base) |
7807 | report_fatal_error(reason: "all llvm.icall.branch.funnel operands must refer " |
7808 | "to the same GlobalValue" ); |
7809 | |
7810 | SDValue Val = getValue(V: I.getArgOperand(i: Op + 1)); |
7811 | auto *GA = dyn_cast<GlobalAddressSDNode>(Val); |
7812 | if (!GA) |
7813 | report_fatal_error( |
7814 | reason: "llvm.icall.branch.funnel operand must be a GlobalValue" ); |
7815 | Targets.push_back(Elt: {.Offset: Offset, .Target: DAG.getTargetGlobalAddress( |
7816 | GV: GA->getGlobal(), DL: sdl, VT: Val.getValueType(), |
7817 | offset: GA->getOffset())}); |
7818 | } |
7819 | llvm::sort(C&: Targets, |
7820 | Comp: [](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) { |
7821 | return T1.Offset < T2.Offset; |
7822 | }); |
7823 | |
7824 | for (auto &T : Targets) { |
7825 | Ops.push_back(Elt: DAG.getTargetConstant(Val: T.Offset, DL: sdl, VT: MVT::i32)); |
7826 | Ops.push_back(Elt: T.Target); |
7827 | } |
7828 | |
7829 | Ops.push_back(Elt: DAG.getRoot()); // Chain |
7830 | SDValue N(DAG.getMachineNode(Opcode: TargetOpcode::ICALL_BRANCH_FUNNEL, dl: sdl, |
7831 | VT: MVT::Other, Ops), |
7832 | 0); |
7833 | DAG.setRoot(N); |
7834 | setValue(V: &I, NewN: N); |
7835 | HasTailCall = true; |
7836 | return; |
7837 | } |
7838 | |
7839 | case Intrinsic::wasm_landingpad_index: |
7840 | // Information this intrinsic contained has been transferred to |
7841 | // MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely |
7842 | // delete it now. |
7843 | return; |
7844 | |
7845 | case Intrinsic::aarch64_settag: |
7846 | case Intrinsic::aarch64_settag_zero: { |
7847 | const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); |
7848 | bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero; |
7849 | SDValue Val = TSI.EmitTargetCodeForSetTag( |
7850 | DAG, dl: sdl, Chain: getRoot(), Addr: getValue(V: I.getArgOperand(i: 0)), |
7851 | Size: getValue(V: I.getArgOperand(i: 1)), DstPtrInfo: MachinePointerInfo(I.getArgOperand(i: 0)), |
7852 | ZeroData: ZeroMemory); |
7853 | DAG.setRoot(Val); |
7854 | setValue(V: &I, NewN: Val); |
7855 | return; |
7856 | } |
7857 | case Intrinsic::amdgcn_cs_chain: { |
7858 | assert(I.arg_size() == 5 && "Additional args not supported yet" ); |
7859 | assert(cast<ConstantInt>(I.getOperand(4))->isZero() && |
7860 | "Non-zero flags not supported yet" ); |
7861 | |
7862 | // At this point we don't care if it's amdgpu_cs_chain or |
7863 | // amdgpu_cs_chain_preserve. |
7864 | CallingConv::ID CC = CallingConv::AMDGPU_CS_Chain; |
7865 | |
7866 | Type *RetTy = I.getType(); |
7867 | assert(RetTy->isVoidTy() && "Should not return" ); |
7868 | |
7869 | SDValue Callee = getValue(V: I.getOperand(i_nocapture: 0)); |
7870 | |
7871 | // We only have 2 actual args: one for the SGPRs and one for the VGPRs. |
7872 | // We'll also tack the value of the EXEC mask at the end. |
7873 | TargetLowering::ArgListTy Args; |
7874 | Args.reserve(n: 3); |
7875 | |
7876 | for (unsigned Idx : {2, 3, 1}) { |
7877 | TargetLowering::ArgListEntry Arg; |
7878 | Arg.Node = getValue(V: I.getOperand(i_nocapture: Idx)); |
7879 | Arg.Ty = I.getOperand(i_nocapture: Idx)->getType(); |
7880 | Arg.setAttributes(Call: &I, ArgIdx: Idx); |
7881 | Args.push_back(x: Arg); |
7882 | } |
7883 | |
7884 | assert(Args[0].IsInReg && "SGPR args should be marked inreg" ); |
7885 | assert(!Args[1].IsInReg && "VGPR args should not be marked inreg" ); |
7886 | Args[2].IsInReg = true; // EXEC should be inreg |
7887 | |
7888 | TargetLowering::CallLoweringInfo CLI(DAG); |
7889 | CLI.setDebugLoc(getCurSDLoc()) |
7890 | .setChain(getRoot()) |
7891 | .setCallee(CC, ResultType: RetTy, Target: Callee, ArgsList: std::move(Args)) |
7892 | .setNoReturn(true) |
7893 | .setTailCall(true) |
7894 | .setConvergent(I.isConvergent()); |
7895 | CLI.CB = &I; |
7896 | std::pair<SDValue, SDValue> Result = |
7897 | lowerInvokable(CLI, /*EHPadBB*/ nullptr); |
7898 | (void)Result; |
7899 | assert(!Result.first.getNode() && !Result.second.getNode() && |
7900 | "Should've lowered as tail call" ); |
7901 | |
7902 | HasTailCall = true; |
7903 | return; |
7904 | } |
7905 | case Intrinsic::ptrmask: { |
7906 | SDValue Ptr = getValue(V: I.getOperand(i_nocapture: 0)); |
7907 | SDValue Mask = getValue(V: I.getOperand(i_nocapture: 1)); |
7908 | |
7909 | // On arm64_32, pointers are 32 bits when stored in memory, but |
7910 | // zero-extended to 64 bits when in registers. Thus the mask is 32 bits to |
7911 | // match the index type, but the pointer is 64 bits, so the the mask must be |
7912 | // zero-extended up to 64 bits to match the pointer. |
7913 | EVT PtrVT = |
7914 | TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getOperand(i_nocapture: 0)->getType()); |
7915 | EVT MemVT = |
7916 | TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: I.getOperand(i_nocapture: 0)->getType()); |
7917 | assert(PtrVT == Ptr.getValueType()); |
7918 | assert(MemVT == Mask.getValueType()); |
7919 | if (MemVT != PtrVT) |
7920 | Mask = DAG.getPtrExtOrTrunc(Op: Mask, DL: sdl, VT: PtrVT); |
7921 | |
7922 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::AND, DL: sdl, VT: PtrVT, N1: Ptr, N2: Mask)); |
7923 | return; |
7924 | } |
7925 | case Intrinsic::threadlocal_address: { |
7926 | setValue(V: &I, NewN: getValue(V: I.getOperand(i_nocapture: 0))); |
7927 | return; |
7928 | } |
7929 | case Intrinsic::get_active_lane_mask: { |
7930 | EVT CCVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
7931 | SDValue Index = getValue(V: I.getOperand(i_nocapture: 0)); |
7932 | EVT ElementVT = Index.getValueType(); |
7933 | |
7934 | if (!TLI.shouldExpandGetActiveLaneMask(VT: CCVT, OpVT: ElementVT)) { |
7935 | visitTargetIntrinsic(I, Intrinsic); |
7936 | return; |
7937 | } |
7938 | |
7939 | SDValue TripCount = getValue(V: I.getOperand(i_nocapture: 1)); |
7940 | EVT VecTy = EVT::getVectorVT(Context&: *DAG.getContext(), VT: ElementVT, |
7941 | EC: CCVT.getVectorElementCount()); |
7942 | |
7943 | SDValue VectorIndex = DAG.getSplat(VT: VecTy, DL: sdl, Op: Index); |
7944 | SDValue VectorTripCount = DAG.getSplat(VT: VecTy, DL: sdl, Op: TripCount); |
7945 | SDValue VectorStep = DAG.getStepVector(DL: sdl, ResVT: VecTy); |
7946 | SDValue VectorInduction = DAG.getNode( |
7947 | Opcode: ISD::UADDSAT, DL: sdl, VT: VecTy, N1: VectorIndex, N2: VectorStep); |
7948 | SDValue SetCC = DAG.getSetCC(DL: sdl, VT: CCVT, LHS: VectorInduction, |
7949 | RHS: VectorTripCount, Cond: ISD::CondCode::SETULT); |
7950 | setValue(V: &I, NewN: SetCC); |
7951 | return; |
7952 | } |
7953 | case Intrinsic::experimental_get_vector_length: { |
7954 | assert(cast<ConstantInt>(I.getOperand(1))->getSExtValue() > 0 && |
7955 | "Expected positive VF" ); |
7956 | unsigned VF = cast<ConstantInt>(Val: I.getOperand(i_nocapture: 1))->getZExtValue(); |
7957 | bool IsScalable = cast<ConstantInt>(Val: I.getOperand(i_nocapture: 2))->isOne(); |
7958 | |
7959 | SDValue Count = getValue(V: I.getOperand(i_nocapture: 0)); |
7960 | EVT CountVT = Count.getValueType(); |
7961 | |
7962 | if (!TLI.shouldExpandGetVectorLength(CountVT, VF, IsScalable)) { |
7963 | visitTargetIntrinsic(I, Intrinsic); |
7964 | return; |
7965 | } |
7966 | |
7967 | // Expand to a umin between the trip count and the maximum elements the type |
7968 | // can hold. |
7969 | EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
7970 | |
7971 | // Extend the trip count to at least the result VT. |
7972 | if (CountVT.bitsLT(VT)) { |
7973 | Count = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: sdl, VT, Operand: Count); |
7974 | CountVT = VT; |
7975 | } |
7976 | |
7977 | SDValue MaxEVL = DAG.getElementCount(DL: sdl, VT: CountVT, |
7978 | EC: ElementCount::get(MinVal: VF, Scalable: IsScalable)); |
7979 | |
7980 | SDValue UMin = DAG.getNode(Opcode: ISD::UMIN, DL: sdl, VT: CountVT, N1: Count, N2: MaxEVL); |
7981 | // Clip to the result type if needed. |
7982 | SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: sdl, VT, Operand: UMin); |
7983 | |
7984 | setValue(V: &I, NewN: Trunc); |
7985 | return; |
7986 | } |
7987 | case Intrinsic::experimental_vector_partial_reduce_add: { |
7988 | SDValue OpNode = getValue(V: I.getOperand(i_nocapture: 1)); |
7989 | EVT ReducedTy = EVT::getEVT(Ty: I.getType()); |
7990 | EVT FullTy = OpNode.getValueType(); |
7991 | |
7992 | unsigned Stride = ReducedTy.getVectorMinNumElements(); |
7993 | unsigned ScaleFactor = FullTy.getVectorMinNumElements() / Stride; |
7994 | |
7995 | // Collect all of the subvectors |
7996 | std::deque<SDValue> Subvectors; |
7997 | Subvectors.push_back(x: getValue(V: I.getOperand(i_nocapture: 0))); |
7998 | for (unsigned i = 0; i < ScaleFactor; i++) { |
7999 | auto SourceIndex = DAG.getVectorIdxConstant(Val: i * Stride, DL: sdl); |
8000 | Subvectors.push_back(x: DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: sdl, VT: ReducedTy, |
8001 | Ops: {OpNode, SourceIndex})); |
8002 | } |
8003 | |
8004 | // Flatten the subvector tree |
8005 | while (Subvectors.size() > 1) { |
8006 | Subvectors.push_back(x: DAG.getNode(Opcode: ISD::ADD, DL: sdl, VT: ReducedTy, |
8007 | Ops: {Subvectors[0], Subvectors[1]})); |
8008 | Subvectors.pop_front(); |
8009 | Subvectors.pop_front(); |
8010 | } |
8011 | |
8012 | assert(Subvectors.size() == 1 && |
8013 | "There should only be one subvector after tree flattening" ); |
8014 | |
8015 | setValue(V: &I, NewN: Subvectors[0]); |
8016 | return; |
8017 | } |
8018 | case Intrinsic::experimental_cttz_elts: { |
8019 | auto DL = getCurSDLoc(); |
8020 | SDValue Op = getValue(V: I.getOperand(i_nocapture: 0)); |
8021 | EVT OpVT = Op.getValueType(); |
8022 | |
8023 | if (!TLI.shouldExpandCttzElements(VT: OpVT)) { |
8024 | visitTargetIntrinsic(I, Intrinsic); |
8025 | return; |
8026 | } |
8027 | |
8028 | if (OpVT.getScalarType() != MVT::i1) { |
8029 | // Compare the input vector elements to zero & use to count trailing zeros |
8030 | SDValue AllZero = DAG.getConstant(Val: 0, DL, VT: OpVT); |
8031 | OpVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i1, |
8032 | EC: OpVT.getVectorElementCount()); |
8033 | Op = DAG.getSetCC(DL, VT: OpVT, LHS: Op, RHS: AllZero, Cond: ISD::SETNE); |
8034 | } |
8035 | |
8036 | // If the zero-is-poison flag is set, we can assume the upper limit |
8037 | // of the result is VF-1. |
8038 | bool ZeroIsPoison = |
8039 | !cast<ConstantSDNode>(Val: getValue(V: I.getOperand(i_nocapture: 1)))->isZero(); |
8040 | ConstantRange VScaleRange(1, true); // Dummy value. |
8041 | if (isa<ScalableVectorType>(Val: I.getOperand(i_nocapture: 0)->getType())) |
8042 | VScaleRange = getVScaleRange(F: I.getCaller(), BitWidth: 64); |
8043 | unsigned EltWidth = TLI.getBitWidthForCttzElements( |
8044 | RetTy: I.getType(), EC: OpVT.getVectorElementCount(), ZeroIsPoison, VScaleRange: &VScaleRange); |
8045 | |
8046 | MVT NewEltTy = MVT::getIntegerVT(BitWidth: EltWidth); |
8047 | |
8048 | // Create the new vector type & get the vector length |
8049 | EVT NewVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: NewEltTy, |
8050 | EC: OpVT.getVectorElementCount()); |
8051 | |
8052 | SDValue VL = |
8053 | DAG.getElementCount(DL, VT: NewEltTy, EC: OpVT.getVectorElementCount()); |
8054 | |
8055 | SDValue StepVec = DAG.getStepVector(DL, ResVT: NewVT); |
8056 | SDValue SplatVL = DAG.getSplat(VT: NewVT, DL, Op: VL); |
8057 | SDValue StepVL = DAG.getNode(Opcode: ISD::SUB, DL, VT: NewVT, N1: SplatVL, N2: StepVec); |
8058 | SDValue Ext = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: NewVT, Operand: Op); |
8059 | SDValue And = DAG.getNode(Opcode: ISD::AND, DL, VT: NewVT, N1: StepVL, N2: Ext); |
8060 | SDValue Max = DAG.getNode(Opcode: ISD::VECREDUCE_UMAX, DL, VT: NewEltTy, Operand: And); |
8061 | SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT: NewEltTy, N1: VL, N2: Max); |
8062 | |
8063 | EVT RetTy = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
8064 | SDValue Ret = DAG.getZExtOrTrunc(Op: Sub, DL, VT: RetTy); |
8065 | |
8066 | setValue(V: &I, NewN: Ret); |
8067 | return; |
8068 | } |
8069 | case Intrinsic::vector_insert: { |
8070 | SDValue Vec = getValue(V: I.getOperand(i_nocapture: 0)); |
8071 | SDValue SubVec = getValue(V: I.getOperand(i_nocapture: 1)); |
8072 | SDValue Index = getValue(V: I.getOperand(i_nocapture: 2)); |
8073 | |
8074 | // The intrinsic's index type is i64, but the SDNode requires an index type |
8075 | // suitable for the target. Convert the index as required. |
8076 | MVT VectorIdxTy = TLI.getVectorIdxTy(DL: DAG.getDataLayout()); |
8077 | if (Index.getValueType() != VectorIdxTy) |
8078 | Index = DAG.getVectorIdxConstant(Val: Index->getAsZExtVal(), DL: sdl); |
8079 | |
8080 | EVT ResultVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
8081 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: sdl, VT: ResultVT, N1: Vec, N2: SubVec, |
8082 | N3: Index)); |
8083 | return; |
8084 | } |
8085 | case Intrinsic::vector_extract: { |
8086 | SDValue Vec = getValue(V: I.getOperand(i_nocapture: 0)); |
8087 | SDValue Index = getValue(V: I.getOperand(i_nocapture: 1)); |
8088 | EVT ResultVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
8089 | |
8090 | // The intrinsic's index type is i64, but the SDNode requires an index type |
8091 | // suitable for the target. Convert the index as required. |
8092 | MVT VectorIdxTy = TLI.getVectorIdxTy(DL: DAG.getDataLayout()); |
8093 | if (Index.getValueType() != VectorIdxTy) |
8094 | Index = DAG.getVectorIdxConstant(Val: Index->getAsZExtVal(), DL: sdl); |
8095 | |
8096 | setValue(V: &I, |
8097 | NewN: DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: sdl, VT: ResultVT, N1: Vec, N2: Index)); |
8098 | return; |
8099 | } |
8100 | case Intrinsic::vector_reverse: |
8101 | visitVectorReverse(I); |
8102 | return; |
8103 | case Intrinsic::vector_splice: |
8104 | visitVectorSplice(I); |
8105 | return; |
8106 | case Intrinsic::callbr_landingpad: |
8107 | visitCallBrLandingPad(I); |
8108 | return; |
8109 | case Intrinsic::vector_interleave2: |
8110 | visitVectorInterleave(I); |
8111 | return; |
8112 | case Intrinsic::vector_deinterleave2: |
8113 | visitVectorDeinterleave(I); |
8114 | return; |
8115 | case Intrinsic::experimental_vector_compress: |
8116 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::VECTOR_COMPRESS, DL: sdl, |
8117 | VT: getValue(V: I.getArgOperand(i: 0)).getValueType(), |
8118 | N1: getValue(V: I.getArgOperand(i: 0)), |
8119 | N2: getValue(V: I.getArgOperand(i: 1)), |
8120 | N3: getValue(V: I.getArgOperand(i: 2)), Flags)); |
8121 | return; |
8122 | case Intrinsic::experimental_convergence_anchor: |
8123 | case Intrinsic::experimental_convergence_entry: |
8124 | case Intrinsic::experimental_convergence_loop: |
8125 | visitConvergenceControl(I, Intrinsic); |
8126 | return; |
8127 | case Intrinsic::experimental_vector_histogram_add: { |
8128 | visitVectorHistogram(I, IntrinsicID: Intrinsic); |
8129 | return; |
8130 | } |
8131 | } |
8132 | } |
8133 | |
8134 | void SelectionDAGBuilder::visitConstrainedFPIntrinsic( |
8135 | const ConstrainedFPIntrinsic &FPI) { |
8136 | SDLoc sdl = getCurSDLoc(); |
8137 | |
8138 | // We do not need to serialize constrained FP intrinsics against |
8139 | // each other or against (nonvolatile) loads, so they can be |
8140 | // chained like loads. |
8141 | SDValue Chain = DAG.getRoot(); |
8142 | SmallVector<SDValue, 4> Opers; |
8143 | Opers.push_back(Elt: Chain); |
8144 | for (unsigned I = 0, E = FPI.getNonMetadataArgCount(); I != E; ++I) |
8145 | Opers.push_back(Elt: getValue(V: FPI.getArgOperand(i: I))); |
8146 | |
8147 | auto pushOutChain = [this](SDValue Result, fp::ExceptionBehavior EB) { |
8148 | assert(Result.getNode()->getNumValues() == 2); |
8149 | |
8150 | // Push node to the appropriate list so that future instructions can be |
8151 | // chained up correctly. |
8152 | SDValue OutChain = Result.getValue(R: 1); |
8153 | switch (EB) { |
8154 | case fp::ExceptionBehavior::ebIgnore: |
8155 | // The only reason why ebIgnore nodes still need to be chained is that |
8156 | // they might depend on the current rounding mode, and therefore must |
8157 | // not be moved across instruction that may change that mode. |
8158 | [[fallthrough]]; |
8159 | case fp::ExceptionBehavior::ebMayTrap: |
8160 | // These must not be moved across calls or instructions that may change |
8161 | // floating-point exception masks. |
8162 | PendingConstrainedFP.push_back(Elt: OutChain); |
8163 | break; |
8164 | case fp::ExceptionBehavior::ebStrict: |
8165 | // These must not be moved across calls or instructions that may change |
8166 | // floating-point exception masks or read floating-point exception flags. |
8167 | // In addition, they cannot be optimized out even if unused. |
8168 | PendingConstrainedFPStrict.push_back(Elt: OutChain); |
8169 | break; |
8170 | } |
8171 | }; |
8172 | |
8173 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
8174 | EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: FPI.getType()); |
8175 | SDVTList VTs = DAG.getVTList(VT1: VT, VT2: MVT::Other); |
8176 | fp::ExceptionBehavior EB = *FPI.getExceptionBehavior(); |
8177 | |
8178 | SDNodeFlags Flags; |
8179 | if (EB == fp::ExceptionBehavior::ebIgnore) |
8180 | Flags.setNoFPExcept(true); |
8181 | |
8182 | if (auto *FPOp = dyn_cast<FPMathOperator>(Val: &FPI)) |
8183 | Flags.copyFMF(FPMO: *FPOp); |
8184 | |
8185 | unsigned Opcode; |
8186 | switch (FPI.getIntrinsicID()) { |
8187 | default: llvm_unreachable("Impossible intrinsic" ); // Can't reach here. |
8188 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
8189 | case Intrinsic::INTRINSIC: \ |
8190 | Opcode = ISD::STRICT_##DAGN; \ |
8191 | break; |
8192 | #include "llvm/IR/ConstrainedOps.def" |
8193 | case Intrinsic::experimental_constrained_fmuladd: { |
8194 | Opcode = ISD::STRICT_FMA; |
8195 | // Break fmuladd into fmul and fadd. |
8196 | if (TM.Options.AllowFPOpFusion == FPOpFusion::Strict || |
8197 | !TLI.isFMAFasterThanFMulAndFAdd(MF: DAG.getMachineFunction(), VT)) { |
8198 | Opers.pop_back(); |
8199 | SDValue Mul = DAG.getNode(Opcode: ISD::STRICT_FMUL, DL: sdl, VTList: VTs, Ops: Opers, Flags); |
8200 | pushOutChain(Mul, EB); |
8201 | Opcode = ISD::STRICT_FADD; |
8202 | Opers.clear(); |
8203 | Opers.push_back(Elt: Mul.getValue(R: 1)); |
8204 | Opers.push_back(Elt: Mul.getValue(R: 0)); |
8205 | Opers.push_back(Elt: getValue(V: FPI.getArgOperand(i: 2))); |
8206 | } |
8207 | break; |
8208 | } |
8209 | } |
8210 | |
8211 | // A few strict DAG nodes carry additional operands that are not |
8212 | // set up by the default code above. |
8213 | switch (Opcode) { |
8214 | default: break; |
8215 | case ISD::STRICT_FP_ROUND: |
8216 | Opers.push_back( |
8217 | Elt: DAG.getTargetConstant(Val: 0, DL: sdl, VT: TLI.getPointerTy(DL: DAG.getDataLayout()))); |
8218 | break; |
8219 | case ISD::STRICT_FSETCC: |
8220 | case ISD::STRICT_FSETCCS: { |
8221 | auto *FPCmp = dyn_cast<ConstrainedFPCmpIntrinsic>(Val: &FPI); |
8222 | ISD::CondCode Condition = getFCmpCondCode(Pred: FPCmp->getPredicate()); |
8223 | if (TM.Options.NoNaNsFPMath) |
8224 | Condition = getFCmpCodeWithoutNaN(CC: Condition); |
8225 | Opers.push_back(Elt: DAG.getCondCode(Cond: Condition)); |
8226 | break; |
8227 | } |
8228 | } |
8229 | |
8230 | SDValue Result = DAG.getNode(Opcode, DL: sdl, VTList: VTs, Ops: Opers, Flags); |
8231 | pushOutChain(Result, EB); |
8232 | |
8233 | SDValue FPResult = Result.getValue(R: 0); |
8234 | setValue(V: &FPI, NewN: FPResult); |
8235 | } |
8236 | |
8237 | static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) { |
8238 | std::optional<unsigned> ResOPC; |
8239 | switch (VPIntrin.getIntrinsicID()) { |
8240 | case Intrinsic::vp_ctlz: { |
8241 | bool IsZeroUndef = cast<ConstantInt>(Val: VPIntrin.getArgOperand(i: 1))->isOne(); |
8242 | ResOPC = IsZeroUndef ? ISD::VP_CTLZ_ZERO_UNDEF : ISD::VP_CTLZ; |
8243 | break; |
8244 | } |
8245 | case Intrinsic::vp_cttz: { |
8246 | bool IsZeroUndef = cast<ConstantInt>(Val: VPIntrin.getArgOperand(i: 1))->isOne(); |
8247 | ResOPC = IsZeroUndef ? ISD::VP_CTTZ_ZERO_UNDEF : ISD::VP_CTTZ; |
8248 | break; |
8249 | } |
8250 | case Intrinsic::vp_cttz_elts: { |
8251 | bool IsZeroPoison = cast<ConstantInt>(Val: VPIntrin.getArgOperand(i: 1))->isOne(); |
8252 | ResOPC = IsZeroPoison ? ISD::VP_CTTZ_ELTS_ZERO_UNDEF : ISD::VP_CTTZ_ELTS; |
8253 | break; |
8254 | } |
8255 | #define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \ |
8256 | case Intrinsic::VPID: \ |
8257 | ResOPC = ISD::VPSD; \ |
8258 | break; |
8259 | #include "llvm/IR/VPIntrinsics.def" |
8260 | } |
8261 | |
8262 | if (!ResOPC) |
8263 | llvm_unreachable( |
8264 | "Inconsistency: no SDNode available for this VPIntrinsic!" ); |
8265 | |
8266 | if (*ResOPC == ISD::VP_REDUCE_SEQ_FADD || |
8267 | *ResOPC == ISD::VP_REDUCE_SEQ_FMUL) { |
8268 | if (VPIntrin.getFastMathFlags().allowReassoc()) |
8269 | return *ResOPC == ISD::VP_REDUCE_SEQ_FADD ? ISD::VP_REDUCE_FADD |
8270 | : ISD::VP_REDUCE_FMUL; |
8271 | } |
8272 | |
8273 | return *ResOPC; |
8274 | } |
8275 | |
8276 | void SelectionDAGBuilder::visitVPLoad( |
8277 | const VPIntrinsic &VPIntrin, EVT VT, |
8278 | const SmallVectorImpl<SDValue> &OpValues) { |
8279 | SDLoc DL = getCurSDLoc(); |
8280 | Value *PtrOperand = VPIntrin.getArgOperand(i: 0); |
8281 | MaybeAlign Alignment = VPIntrin.getPointerAlignment(); |
8282 | AAMDNodes AAInfo = VPIntrin.getAAMetadata(); |
8283 | const MDNode *Ranges = getRangeMetadata(I: VPIntrin); |
8284 | SDValue LD; |
8285 | // Do not serialize variable-length loads of constant memory with |
8286 | // anything. |
8287 | if (!Alignment) |
8288 | Alignment = DAG.getEVTAlign(MemoryVT: VT); |
8289 | MemoryLocation ML = MemoryLocation::getAfter(Ptr: PtrOperand, AATags: AAInfo); |
8290 | bool AddToChain = !AA || !AA->pointsToConstantMemory(Loc: ML); |
8291 | SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); |
8292 | MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( |
8293 | PtrInfo: MachinePointerInfo(PtrOperand), F: MachineMemOperand::MOLoad, |
8294 | Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: *Alignment, AAInfo, Ranges); |
8295 | LD = DAG.getLoadVP(VT, dl: DL, Chain: InChain, Ptr: OpValues[0], Mask: OpValues[1], EVL: OpValues[2], |
8296 | MMO, IsExpanding: false /*IsExpanding */); |
8297 | if (AddToChain) |
8298 | PendingLoads.push_back(Elt: LD.getValue(R: 1)); |
8299 | setValue(V: &VPIntrin, NewN: LD); |
8300 | } |
8301 | |
8302 | void SelectionDAGBuilder::visitVPGather( |
8303 | const VPIntrinsic &VPIntrin, EVT VT, |
8304 | const SmallVectorImpl<SDValue> &OpValues) { |
8305 | SDLoc DL = getCurSDLoc(); |
8306 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
8307 | Value *PtrOperand = VPIntrin.getArgOperand(i: 0); |
8308 | MaybeAlign Alignment = VPIntrin.getPointerAlignment(); |
8309 | AAMDNodes AAInfo = VPIntrin.getAAMetadata(); |
8310 | const MDNode *Ranges = getRangeMetadata(I: VPIntrin); |
8311 | SDValue LD; |
8312 | if (!Alignment) |
8313 | Alignment = DAG.getEVTAlign(MemoryVT: VT.getScalarType()); |
8314 | unsigned AS = |
8315 | PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); |
8316 | MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( |
8317 | PtrInfo: MachinePointerInfo(AS), F: MachineMemOperand::MOLoad, |
8318 | Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: *Alignment, AAInfo, Ranges); |
8319 | SDValue Base, Index, Scale; |
8320 | ISD::MemIndexType IndexType; |
8321 | bool UniformBase = getUniformBase(Ptr: PtrOperand, Base, Index, IndexType, Scale, |
8322 | SDB: this, CurBB: VPIntrin.getParent(), |
8323 | ElemSize: VT.getScalarStoreSize()); |
8324 | if (!UniformBase) { |
8325 | Base = DAG.getConstant(Val: 0, DL, VT: TLI.getPointerTy(DL: DAG.getDataLayout())); |
8326 | Index = getValue(V: PtrOperand); |
8327 | IndexType = ISD::SIGNED_SCALED; |
8328 | Scale = DAG.getTargetConstant(Val: 1, DL, VT: TLI.getPointerTy(DL: DAG.getDataLayout())); |
8329 | } |
8330 | EVT IdxVT = Index.getValueType(); |
8331 | EVT EltTy = IdxVT.getVectorElementType(); |
8332 | if (TLI.shouldExtendGSIndex(VT: IdxVT, EltTy)) { |
8333 | EVT NewIdxVT = IdxVT.changeVectorElementType(EltVT: EltTy); |
8334 | Index = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: NewIdxVT, Operand: Index); |
8335 | } |
8336 | LD = DAG.getGatherVP( |
8337 | VTs: DAG.getVTList(VT1: VT, VT2: MVT::Other), VT, dl: DL, |
8338 | Ops: {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO, |
8339 | IndexType); |
8340 | PendingLoads.push_back(Elt: LD.getValue(R: 1)); |
8341 | setValue(V: &VPIntrin, NewN: LD); |
8342 | } |
8343 | |
8344 | void SelectionDAGBuilder::visitVPStore( |
8345 | const VPIntrinsic &VPIntrin, const SmallVectorImpl<SDValue> &OpValues) { |
8346 | SDLoc DL = getCurSDLoc(); |
8347 | Value *PtrOperand = VPIntrin.getArgOperand(i: 1); |
8348 | EVT VT = OpValues[0].getValueType(); |
8349 | MaybeAlign Alignment = VPIntrin.getPointerAlignment(); |
8350 | AAMDNodes AAInfo = VPIntrin.getAAMetadata(); |
8351 | SDValue ST; |
8352 | if (!Alignment) |
8353 | Alignment = DAG.getEVTAlign(MemoryVT: VT); |
8354 | SDValue Ptr = OpValues[1]; |
8355 | SDValue Offset = DAG.getUNDEF(VT: Ptr.getValueType()); |
8356 | MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( |
8357 | PtrInfo: MachinePointerInfo(PtrOperand), F: MachineMemOperand::MOStore, |
8358 | Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: *Alignment, AAInfo); |
8359 | ST = DAG.getStoreVP(Chain: getMemoryRoot(), dl: DL, Val: OpValues[0], Ptr, Offset, |
8360 | Mask: OpValues[2], EVL: OpValues[3], MemVT: VT, MMO, AM: ISD::UNINDEXED, |
8361 | /* IsTruncating */ false, /*IsCompressing*/ false); |
8362 | DAG.setRoot(ST); |
8363 | setValue(V: &VPIntrin, NewN: ST); |
8364 | } |
8365 | |
8366 | void SelectionDAGBuilder::visitVPScatter( |
8367 | const VPIntrinsic &VPIntrin, const SmallVectorImpl<SDValue> &OpValues) { |
8368 | SDLoc DL = getCurSDLoc(); |
8369 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
8370 | Value *PtrOperand = VPIntrin.getArgOperand(i: 1); |
8371 | EVT VT = OpValues[0].getValueType(); |
8372 | MaybeAlign Alignment = VPIntrin.getPointerAlignment(); |
8373 | AAMDNodes AAInfo = VPIntrin.getAAMetadata(); |
8374 | SDValue ST; |
8375 | if (!Alignment) |
8376 | Alignment = DAG.getEVTAlign(MemoryVT: VT.getScalarType()); |
8377 | unsigned AS = |
8378 | PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); |
8379 | MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( |
8380 | PtrInfo: MachinePointerInfo(AS), F: MachineMemOperand::MOStore, |
8381 | Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: *Alignment, AAInfo); |
8382 | SDValue Base, Index, Scale; |
8383 | ISD::MemIndexType IndexType; |
8384 | bool UniformBase = getUniformBase(Ptr: PtrOperand, Base, Index, IndexType, Scale, |
8385 | SDB: this, CurBB: VPIntrin.getParent(), |
8386 | ElemSize: VT.getScalarStoreSize()); |
8387 | if (!UniformBase) { |
8388 | Base = DAG.getConstant(Val: 0, DL, VT: TLI.getPointerTy(DL: DAG.getDataLayout())); |
8389 | Index = getValue(V: PtrOperand); |
8390 | IndexType = ISD::SIGNED_SCALED; |
8391 | Scale = |
8392 | DAG.getTargetConstant(Val: 1, DL, VT: TLI.getPointerTy(DL: DAG.getDataLayout())); |
8393 | } |
8394 | EVT IdxVT = Index.getValueType(); |
8395 | EVT EltTy = IdxVT.getVectorElementType(); |
8396 | if (TLI.shouldExtendGSIndex(VT: IdxVT, EltTy)) { |
8397 | EVT NewIdxVT = IdxVT.changeVectorElementType(EltVT: EltTy); |
8398 | Index = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: NewIdxVT, Operand: Index); |
8399 | } |
8400 | ST = DAG.getScatterVP(VTs: DAG.getVTList(VT: MVT::Other), VT, dl: DL, |
8401 | Ops: {getMemoryRoot(), OpValues[0], Base, Index, Scale, |
8402 | OpValues[2], OpValues[3]}, |
8403 | MMO, IndexType); |
8404 | DAG.setRoot(ST); |
8405 | setValue(V: &VPIntrin, NewN: ST); |
8406 | } |
8407 | |
8408 | void SelectionDAGBuilder::visitVPStridedLoad( |
8409 | const VPIntrinsic &VPIntrin, EVT VT, |
8410 | const SmallVectorImpl<SDValue> &OpValues) { |
8411 | SDLoc DL = getCurSDLoc(); |
8412 | Value *PtrOperand = VPIntrin.getArgOperand(i: 0); |
8413 | MaybeAlign Alignment = VPIntrin.getPointerAlignment(); |
8414 | if (!Alignment) |
8415 | Alignment = DAG.getEVTAlign(MemoryVT: VT.getScalarType()); |
8416 | AAMDNodes AAInfo = VPIntrin.getAAMetadata(); |
8417 | const MDNode *Ranges = getRangeMetadata(I: VPIntrin); |
8418 | MemoryLocation ML = MemoryLocation::getAfter(Ptr: PtrOperand, AATags: AAInfo); |
8419 | bool AddToChain = !AA || !AA->pointsToConstantMemory(Loc: ML); |
8420 | SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); |
8421 | unsigned AS = PtrOperand->getType()->getPointerAddressSpace(); |
8422 | MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( |
8423 | PtrInfo: MachinePointerInfo(AS), F: MachineMemOperand::MOLoad, |
8424 | Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: *Alignment, AAInfo, Ranges); |
8425 | |
8426 | SDValue LD = DAG.getStridedLoadVP(VT, DL, Chain: InChain, Ptr: OpValues[0], Stride: OpValues[1], |
8427 | Mask: OpValues[2], EVL: OpValues[3], MMO, |
8428 | IsExpanding: false /*IsExpanding*/); |
8429 | |
8430 | if (AddToChain) |
8431 | PendingLoads.push_back(Elt: LD.getValue(R: 1)); |
8432 | setValue(V: &VPIntrin, NewN: LD); |
8433 | } |
8434 | |
8435 | void SelectionDAGBuilder::visitVPStridedStore( |
8436 | const VPIntrinsic &VPIntrin, const SmallVectorImpl<SDValue> &OpValues) { |
8437 | SDLoc DL = getCurSDLoc(); |
8438 | Value *PtrOperand = VPIntrin.getArgOperand(i: 1); |
8439 | EVT VT = OpValues[0].getValueType(); |
8440 | MaybeAlign Alignment = VPIntrin.getPointerAlignment(); |
8441 | if (!Alignment) |
8442 | Alignment = DAG.getEVTAlign(MemoryVT: VT.getScalarType()); |
8443 | AAMDNodes AAInfo = VPIntrin.getAAMetadata(); |
8444 | unsigned AS = PtrOperand->getType()->getPointerAddressSpace(); |
8445 | MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( |
8446 | PtrInfo: MachinePointerInfo(AS), F: MachineMemOperand::MOStore, |
8447 | Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: *Alignment, AAInfo); |
8448 | |
8449 | SDValue ST = DAG.getStridedStoreVP( |
8450 | Chain: getMemoryRoot(), DL, Val: OpValues[0], Ptr: OpValues[1], |
8451 | Offset: DAG.getUNDEF(VT: OpValues[1].getValueType()), Stride: OpValues[2], Mask: OpValues[3], |
8452 | EVL: OpValues[4], MemVT: VT, MMO, AM: ISD::UNINDEXED, /*IsTruncating*/ false, |
8453 | /*IsCompressing*/ false); |
8454 | |
8455 | DAG.setRoot(ST); |
8456 | setValue(V: &VPIntrin, NewN: ST); |
8457 | } |
8458 | |
8459 | void SelectionDAGBuilder::visitVPCmp(const VPCmpIntrinsic &VPIntrin) { |
8460 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
8461 | SDLoc DL = getCurSDLoc(); |
8462 | |
8463 | ISD::CondCode Condition; |
8464 | CmpInst::Predicate CondCode = VPIntrin.getPredicate(); |
8465 | bool IsFP = VPIntrin.getOperand(i_nocapture: 0)->getType()->isFPOrFPVectorTy(); |
8466 | if (IsFP) { |
8467 | // FIXME: Regular fcmps are FPMathOperators which may have fast-math (nnan) |
8468 | // flags, but calls that don't return floating-point types can't be |
8469 | // FPMathOperators, like vp.fcmp. This affects constrained fcmp too. |
8470 | Condition = getFCmpCondCode(Pred: CondCode); |
8471 | if (TM.Options.NoNaNsFPMath) |
8472 | Condition = getFCmpCodeWithoutNaN(CC: Condition); |
8473 | } else { |
8474 | Condition = getICmpCondCode(Pred: CondCode); |
8475 | } |
8476 | |
8477 | SDValue Op1 = getValue(V: VPIntrin.getOperand(i_nocapture: 0)); |
8478 | SDValue Op2 = getValue(V: VPIntrin.getOperand(i_nocapture: 1)); |
8479 | // #2 is the condition code |
8480 | SDValue MaskOp = getValue(V: VPIntrin.getOperand(i_nocapture: 3)); |
8481 | SDValue EVL = getValue(V: VPIntrin.getOperand(i_nocapture: 4)); |
8482 | MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy(); |
8483 | assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) && |
8484 | "Unexpected target EVL type" ); |
8485 | EVL = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: EVLParamVT, Operand: EVL); |
8486 | |
8487 | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(), |
8488 | Ty: VPIntrin.getType()); |
8489 | setValue(V: &VPIntrin, |
8490 | NewN: DAG.getSetCCVP(DL, VT: DestVT, LHS: Op1, RHS: Op2, Cond: Condition, Mask: MaskOp, EVL)); |
8491 | } |
8492 | |
8493 | void SelectionDAGBuilder::visitVectorPredicationIntrinsic( |
8494 | const VPIntrinsic &VPIntrin) { |
8495 | SDLoc DL = getCurSDLoc(); |
8496 | unsigned Opcode = getISDForVPIntrinsic(VPIntrin); |
8497 | |
8498 | auto IID = VPIntrin.getIntrinsicID(); |
8499 | |
8500 | if (const auto *CmpI = dyn_cast<VPCmpIntrinsic>(Val: &VPIntrin)) |
8501 | return visitVPCmp(VPIntrin: *CmpI); |
8502 | |
8503 | SmallVector<EVT, 4> ValueVTs; |
8504 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
8505 | ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: VPIntrin.getType(), ValueVTs); |
8506 | SDVTList VTs = DAG.getVTList(VTs: ValueVTs); |
8507 | |
8508 | auto EVLParamPos = VPIntrinsic::getVectorLengthParamPos(IntrinsicID: IID); |
8509 | |
8510 | MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy(); |
8511 | assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) && |
8512 | "Unexpected target EVL type" ); |
8513 | |
8514 | // Request operands. |
8515 | SmallVector<SDValue, 7> OpValues; |
8516 | for (unsigned I = 0; I < VPIntrin.arg_size(); ++I) { |
8517 | auto Op = getValue(V: VPIntrin.getArgOperand(i: I)); |
8518 | if (I == EVLParamPos) |
8519 | Op = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: EVLParamVT, Operand: Op); |
8520 | OpValues.push_back(Elt: Op); |
8521 | } |
8522 | |
8523 | switch (Opcode) { |
8524 | default: { |
8525 | SDNodeFlags SDFlags; |
8526 | if (auto *FPMO = dyn_cast<FPMathOperator>(Val: &VPIntrin)) |
8527 | SDFlags.copyFMF(FPMO: *FPMO); |
8528 | SDValue Result = DAG.getNode(Opcode, DL, VTList: VTs, Ops: OpValues, Flags: SDFlags); |
8529 | setValue(V: &VPIntrin, NewN: Result); |
8530 | break; |
8531 | } |
8532 | case ISD::VP_LOAD: |
8533 | visitVPLoad(VPIntrin, VT: ValueVTs[0], OpValues); |
8534 | break; |
8535 | case ISD::VP_GATHER: |
8536 | visitVPGather(VPIntrin, VT: ValueVTs[0], OpValues); |
8537 | break; |
8538 | case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: |
8539 | visitVPStridedLoad(VPIntrin, VT: ValueVTs[0], OpValues); |
8540 | break; |
8541 | case ISD::VP_STORE: |
8542 | visitVPStore(VPIntrin, OpValues); |
8543 | break; |
8544 | case ISD::VP_SCATTER: |
8545 | visitVPScatter(VPIntrin, OpValues); |
8546 | break; |
8547 | case ISD::EXPERIMENTAL_VP_STRIDED_STORE: |
8548 | visitVPStridedStore(VPIntrin, OpValues); |
8549 | break; |
8550 | case ISD::VP_FMULADD: { |
8551 | assert(OpValues.size() == 5 && "Unexpected number of operands" ); |
8552 | SDNodeFlags SDFlags; |
8553 | if (auto *FPMO = dyn_cast<FPMathOperator>(Val: &VPIntrin)) |
8554 | SDFlags.copyFMF(FPMO: *FPMO); |
8555 | if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && |
8556 | TLI.isFMAFasterThanFMulAndFAdd(MF: DAG.getMachineFunction(), ValueVTs[0])) { |
8557 | setValue(V: &VPIntrin, NewN: DAG.getNode(Opcode: ISD::VP_FMA, DL, VTList: VTs, Ops: OpValues, Flags: SDFlags)); |
8558 | } else { |
8559 | SDValue Mul = DAG.getNode( |
8560 | Opcode: ISD::VP_FMUL, DL, VTList: VTs, |
8561 | Ops: {OpValues[0], OpValues[1], OpValues[3], OpValues[4]}, Flags: SDFlags); |
8562 | SDValue Add = |
8563 | DAG.getNode(Opcode: ISD::VP_FADD, DL, VTList: VTs, |
8564 | Ops: {Mul, OpValues[2], OpValues[3], OpValues[4]}, Flags: SDFlags); |
8565 | setValue(V: &VPIntrin, NewN: Add); |
8566 | } |
8567 | break; |
8568 | } |
8569 | case ISD::VP_IS_FPCLASS: { |
8570 | const DataLayout DLayout = DAG.getDataLayout(); |
8571 | EVT DestVT = TLI.getValueType(DL: DLayout, Ty: VPIntrin.getType()); |
8572 | auto Constant = OpValues[1]->getAsZExtVal(); |
8573 | SDValue Check = DAG.getTargetConstant(Val: Constant, DL, VT: MVT::i32); |
8574 | SDValue V = DAG.getNode(Opcode: ISD::VP_IS_FPCLASS, DL, VT: DestVT, |
8575 | Ops: {OpValues[0], Check, OpValues[2], OpValues[3]}); |
8576 | setValue(V: &VPIntrin, NewN: V); |
8577 | return; |
8578 | } |
8579 | case ISD::VP_INTTOPTR: { |
8580 | SDValue N = OpValues[0]; |
8581 | EVT DestVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: VPIntrin.getType()); |
8582 | EVT PtrMemVT = TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: VPIntrin.getType()); |
8583 | N = DAG.getVPPtrExtOrTrunc(DL: getCurSDLoc(), VT: DestVT, Op: N, Mask: OpValues[1], |
8584 | EVL: OpValues[2]); |
8585 | N = DAG.getVPZExtOrTrunc(DL: getCurSDLoc(), VT: PtrMemVT, Op: N, Mask: OpValues[1], |
8586 | EVL: OpValues[2]); |
8587 | setValue(V: &VPIntrin, NewN: N); |
8588 | break; |
8589 | } |
8590 | case ISD::VP_PTRTOINT: { |
8591 | SDValue N = OpValues[0]; |
8592 | EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(), |
8593 | Ty: VPIntrin.getType()); |
8594 | EVT PtrMemVT = TLI.getMemValueType(DL: DAG.getDataLayout(), |
8595 | Ty: VPIntrin.getOperand(i_nocapture: 0)->getType()); |
8596 | N = DAG.getVPPtrExtOrTrunc(DL: getCurSDLoc(), VT: PtrMemVT, Op: N, Mask: OpValues[1], |
8597 | EVL: OpValues[2]); |
8598 | N = DAG.getVPZExtOrTrunc(DL: getCurSDLoc(), VT: DestVT, Op: N, Mask: OpValues[1], |
8599 | EVL: OpValues[2]); |
8600 | setValue(V: &VPIntrin, NewN: N); |
8601 | break; |
8602 | } |
8603 | case ISD::VP_ABS: |
8604 | case ISD::VP_CTLZ: |
8605 | case ISD::VP_CTLZ_ZERO_UNDEF: |
8606 | case ISD::VP_CTTZ: |
8607 | case ISD::VP_CTTZ_ZERO_UNDEF: |
8608 | case ISD::VP_CTTZ_ELTS_ZERO_UNDEF: |
8609 | case ISD::VP_CTTZ_ELTS: { |
8610 | SDValue Result = |
8611 | DAG.getNode(Opcode, DL, VTList: VTs, Ops: {OpValues[0], OpValues[2], OpValues[3]}); |
8612 | setValue(V: &VPIntrin, NewN: Result); |
8613 | break; |
8614 | } |
8615 | } |
8616 | } |
8617 | |
8618 | SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain, |
8619 | const BasicBlock *EHPadBB, |
8620 | MCSymbol *&BeginLabel) { |
8621 | MachineFunction &MF = DAG.getMachineFunction(); |
8622 | MachineModuleInfo &MMI = MF.getMMI(); |
8623 | |
8624 | // Insert a label before the invoke call to mark the try range. This can be |
8625 | // used to detect deletion of the invoke via the MachineModuleInfo. |
8626 | BeginLabel = MF.getContext().createTempSymbol(); |
8627 | |
8628 | // For SjLj, keep track of which landing pads go with which invokes |
8629 | // so as to maintain the ordering of pads in the LSDA. |
8630 | unsigned CallSiteIndex = MMI.getCurrentCallSite(); |
8631 | if (CallSiteIndex) { |
8632 | MF.setCallSiteBeginLabel(BeginLabel, Site: CallSiteIndex); |
8633 | LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(Elt: CallSiteIndex); |
8634 | |
8635 | // Now that the call site is handled, stop tracking it. |
8636 | MMI.setCurrentCallSite(0); |
8637 | } |
8638 | |
8639 | return DAG.getEHLabel(dl: getCurSDLoc(), Root: Chain, Label: BeginLabel); |
8640 | } |
8641 | |
8642 | SDValue SelectionDAGBuilder::lowerEndEH(SDValue Chain, const InvokeInst *II, |
8643 | const BasicBlock *EHPadBB, |
8644 | MCSymbol *BeginLabel) { |
8645 | assert(BeginLabel && "BeginLabel should've been set" ); |
8646 | |
8647 | MachineFunction &MF = DAG.getMachineFunction(); |
8648 | |
8649 | // Insert a label at the end of the invoke call to mark the try range. This |
8650 | // can be used to detect deletion of the invoke via the MachineModuleInfo. |
8651 | MCSymbol *EndLabel = MF.getContext().createTempSymbol(); |
8652 | Chain = DAG.getEHLabel(dl: getCurSDLoc(), Root: Chain, Label: EndLabel); |
8653 | |
8654 | // Inform MachineModuleInfo of range. |
8655 | auto Pers = classifyEHPersonality(Pers: FuncInfo.Fn->getPersonalityFn()); |
8656 | // There is a platform (e.g. wasm) that uses funclet style IR but does not |
8657 | // actually use outlined funclets and their LSDA info style. |
8658 | if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) { |
8659 | assert(II && "II should've been set" ); |
8660 | WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo(); |
8661 | EHInfo->addIPToStateRange(II, InvokeBegin: BeginLabel, InvokeEnd: EndLabel); |
8662 | } else if (!isScopedEHPersonality(Pers)) { |
8663 | assert(EHPadBB); |
8664 | MF.addInvoke(LandingPad: FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); |
8665 | } |
8666 | |
8667 | return Chain; |
8668 | } |
8669 | |
8670 | std::pair<SDValue, SDValue> |
8671 | SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, |
8672 | const BasicBlock *EHPadBB) { |
8673 | MCSymbol *BeginLabel = nullptr; |
8674 | |
8675 | if (EHPadBB) { |
8676 | // Both PendingLoads and PendingExports must be flushed here; |
8677 | // this call might not return. |
8678 | (void)getRoot(); |
8679 | DAG.setRoot(lowerStartEH(Chain: getControlRoot(), EHPadBB, BeginLabel)); |
8680 | CLI.setChain(getRoot()); |
8681 | } |
8682 | |
8683 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
8684 | std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); |
8685 | |
8686 | assert((CLI.IsTailCall || Result.second.getNode()) && |
8687 | "Non-null chain expected with non-tail call!" ); |
8688 | assert((Result.second.getNode() || !Result.first.getNode()) && |
8689 | "Null value expected with tail call!" ); |
8690 | |
8691 | if (!Result.second.getNode()) { |
8692 | // As a special case, a null chain means that a tail call has been emitted |
8693 | // and the DAG root is already updated. |
8694 | HasTailCall = true; |
8695 | |
8696 | // Since there's no actual continuation from this block, nothing can be |
8697 | // relying on us setting vregs for them. |
8698 | PendingExports.clear(); |
8699 | } else { |
8700 | DAG.setRoot(Result.second); |
8701 | } |
8702 | |
8703 | if (EHPadBB) { |
8704 | DAG.setRoot(lowerEndEH(Chain: getRoot(), II: cast_or_null<InvokeInst>(Val: CLI.CB), EHPadBB, |
8705 | BeginLabel)); |
8706 | Result.second = getRoot(); |
8707 | } |
8708 | |
8709 | return Result; |
8710 | } |
8711 | |
8712 | void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, |
8713 | bool isTailCall, bool isMustTailCall, |
8714 | const BasicBlock *EHPadBB, |
8715 | const TargetLowering::PtrAuthInfo *PAI) { |
8716 | auto &DL = DAG.getDataLayout(); |
8717 | FunctionType *FTy = CB.getFunctionType(); |
8718 | Type *RetTy = CB.getType(); |
8719 | |
8720 | TargetLowering::ArgListTy Args; |
8721 | Args.reserve(n: CB.arg_size()); |
8722 | |
8723 | const Value *SwiftErrorVal = nullptr; |
8724 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
8725 | |
8726 | if (isTailCall) { |
8727 | // Avoid emitting tail calls in functions with the disable-tail-calls |
8728 | // attribute. |
8729 | auto *Caller = CB.getParent()->getParent(); |
8730 | if (Caller->getFnAttribute(Kind: "disable-tail-calls" ).getValueAsString() == |
8731 | "true" && !isMustTailCall) |
8732 | isTailCall = false; |
8733 | |
8734 | // We can't tail call inside a function with a swifterror argument. Lowering |
8735 | // does not support this yet. It would have to move into the swifterror |
8736 | // register before the call. |
8737 | if (TLI.supportSwiftError() && |
8738 | Caller->getAttributes().hasAttrSomewhere(Kind: Attribute::SwiftError)) |
8739 | isTailCall = false; |
8740 | } |
8741 | |
8742 | for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) { |
8743 | TargetLowering::ArgListEntry Entry; |
8744 | const Value *V = *I; |
8745 | |
8746 | // Skip empty types |
8747 | if (V->getType()->isEmptyTy()) |
8748 | continue; |
8749 | |
8750 | SDValue ArgNode = getValue(V); |
8751 | Entry.Node = ArgNode; Entry.Ty = V->getType(); |
8752 | |
8753 | Entry.setAttributes(Call: &CB, ArgIdx: I - CB.arg_begin()); |
8754 | |
8755 | // Use swifterror virtual register as input to the call. |
8756 | if (Entry.IsSwiftError && TLI.supportSwiftError()) { |
8757 | SwiftErrorVal = V; |
8758 | // We find the virtual register for the actual swifterror argument. |
8759 | // Instead of using the Value, we use the virtual register instead. |
8760 | Entry.Node = |
8761 | DAG.getRegister(Reg: SwiftError.getOrCreateVRegUseAt(&CB, FuncInfo.MBB, V), |
8762 | VT: EVT(TLI.getPointerTy(DL))); |
8763 | } |
8764 | |
8765 | Args.push_back(x: Entry); |
8766 | |
8767 | // If we have an explicit sret argument that is an Instruction, (i.e., it |
8768 | // might point to function-local memory), we can't meaningfully tail-call. |
8769 | if (Entry.IsSRet && isa<Instruction>(Val: V)) |
8770 | isTailCall = false; |
8771 | } |
8772 | |
8773 | // If call site has a cfguardtarget operand bundle, create and add an |
8774 | // additional ArgListEntry. |
8775 | if (auto Bundle = CB.getOperandBundle(ID: LLVMContext::OB_cfguardtarget)) { |
8776 | TargetLowering::ArgListEntry Entry; |
8777 | Value *V = Bundle->Inputs[0]; |
8778 | SDValue ArgNode = getValue(V); |
8779 | Entry.Node = ArgNode; |
8780 | Entry.Ty = V->getType(); |
8781 | Entry.IsCFGuardTarget = true; |
8782 | Args.push_back(x: Entry); |
8783 | } |
8784 | |
8785 | // Check if target-independent constraints permit a tail call here. |
8786 | // Target-dependent constraints are checked within TLI->LowerCallTo. |
8787 | if (isTailCall && !isInTailCallPosition(Call: CB, TM: DAG.getTarget())) |
8788 | isTailCall = false; |
8789 | |
8790 | // Disable tail calls if there is an swifterror argument. Targets have not |
8791 | // been updated to support tail calls. |
8792 | if (TLI.supportSwiftError() && SwiftErrorVal) |
8793 | isTailCall = false; |
8794 | |
8795 | ConstantInt *CFIType = nullptr; |
8796 | if (CB.isIndirectCall()) { |
8797 | if (auto Bundle = CB.getOperandBundle(ID: LLVMContext::OB_kcfi)) { |
8798 | if (!TLI.supportKCFIBundles()) |
8799 | report_fatal_error( |
8800 | reason: "Target doesn't support calls with kcfi operand bundles." ); |
8801 | CFIType = cast<ConstantInt>(Val: Bundle->Inputs[0]); |
8802 | assert(CFIType->getType()->isIntegerTy(32) && "Invalid CFI type" ); |
8803 | } |
8804 | } |
8805 | |
8806 | SDValue ConvControlToken; |
8807 | if (auto Bundle = CB.getOperandBundle(ID: LLVMContext::OB_convergencectrl)) { |
8808 | auto *Token = Bundle->Inputs[0].get(); |
8809 | ConvControlToken = getValue(V: Token); |
8810 | } |
8811 | |
8812 | TargetLowering::CallLoweringInfo CLI(DAG); |
8813 | CLI.setDebugLoc(getCurSDLoc()) |
8814 | .setChain(getRoot()) |
8815 | .setCallee(ResultType: RetTy, FTy, Target: Callee, ArgsList: std::move(Args), Call: CB) |
8816 | .setTailCall(isTailCall) |
8817 | .setConvergent(CB.isConvergent()) |
8818 | .setIsPreallocated( |
8819 | CB.countOperandBundlesOfType(ID: LLVMContext::OB_preallocated) != 0) |
8820 | .setCFIType(CFIType) |
8821 | .setConvergenceControlToken(ConvControlToken); |
8822 | |
8823 | // Set the pointer authentication info if we have it. |
8824 | if (PAI) { |
8825 | if (!TLI.supportPtrAuthBundles()) |
8826 | report_fatal_error( |
8827 | reason: "This target doesn't support calls with ptrauth operand bundles." ); |
8828 | CLI.setPtrAuth(*PAI); |
8829 | } |
8830 | |
8831 | std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); |
8832 | |
8833 | if (Result.first.getNode()) { |
8834 | Result.first = lowerRangeToAssertZExt(DAG, I: CB, Op: Result.first); |
8835 | setValue(V: &CB, NewN: Result.first); |
8836 | } |
8837 | |
8838 | // The last element of CLI.InVals has the SDValue for swifterror return. |
8839 | // Here we copy it to a virtual register and update SwiftErrorMap for |
8840 | // book-keeping. |
8841 | if (SwiftErrorVal && TLI.supportSwiftError()) { |
8842 | // Get the last element of InVals. |
8843 | SDValue Src = CLI.InVals.back(); |
8844 | Register VReg = |
8845 | SwiftError.getOrCreateVRegDefAt(&CB, FuncInfo.MBB, SwiftErrorVal); |
8846 | SDValue CopyNode = CLI.DAG.getCopyToReg(Chain: Result.second, dl: CLI.DL, Reg: VReg, N: Src); |
8847 | DAG.setRoot(CopyNode); |
8848 | } |
8849 | } |
8850 | |
8851 | static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, |
8852 | SelectionDAGBuilder &Builder) { |
8853 | // Check to see if this load can be trivially constant folded, e.g. if the |
8854 | // input is from a string literal. |
8855 | if (const Constant *LoadInput = dyn_cast<Constant>(Val: PtrVal)) { |
8856 | // Cast pointer to the type we really want to load. |
8857 | Type *LoadTy = |
8858 | Type::getIntNTy(C&: PtrVal->getContext(), N: LoadVT.getScalarSizeInBits()); |
8859 | if (LoadVT.isVector()) |
8860 | LoadTy = FixedVectorType::get(ElementType: LoadTy, NumElts: LoadVT.getVectorNumElements()); |
8861 | |
8862 | LoadInput = ConstantExpr::getBitCast(C: const_cast<Constant *>(LoadInput), |
8863 | Ty: PointerType::getUnqual(ElementType: LoadTy)); |
8864 | |
8865 | if (const Constant *LoadCst = |
8866 | ConstantFoldLoadFromConstPtr(C: const_cast<Constant *>(LoadInput), |
8867 | Ty: LoadTy, DL: Builder.DAG.getDataLayout())) |
8868 | return Builder.getValue(V: LoadCst); |
8869 | } |
8870 | |
8871 | // Otherwise, we have to emit the load. If the pointer is to unfoldable but |
8872 | // still constant memory, the input chain can be the entry node. |
8873 | SDValue Root; |
8874 | bool ConstantMemory = false; |
8875 | |
8876 | // Do not serialize (non-volatile) loads of constant memory with anything. |
8877 | if (Builder.AA && Builder.AA->pointsToConstantMemory(P: PtrVal)) { |
8878 | Root = Builder.DAG.getEntryNode(); |
8879 | ConstantMemory = true; |
8880 | } else { |
8881 | // Do not serialize non-volatile loads against each other. |
8882 | Root = Builder.DAG.getRoot(); |
8883 | } |
8884 | |
8885 | SDValue Ptr = Builder.getValue(V: PtrVal); |
8886 | SDValue LoadVal = |
8887 | Builder.DAG.getLoad(VT: LoadVT, dl: Builder.getCurSDLoc(), Chain: Root, Ptr, |
8888 | PtrInfo: MachinePointerInfo(PtrVal), Alignment: Align(1)); |
8889 | |
8890 | if (!ConstantMemory) |
8891 | Builder.PendingLoads.push_back(Elt: LoadVal.getValue(R: 1)); |
8892 | return LoadVal; |
8893 | } |
8894 | |
8895 | /// Record the value for an instruction that produces an integer result, |
8896 | /// converting the type where necessary. |
8897 | void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, |
8898 | SDValue Value, |
8899 | bool IsSigned) { |
8900 | EVT VT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(), |
8901 | Ty: I.getType(), AllowUnknown: true); |
8902 | Value = DAG.getExtOrTrunc(IsSigned, Op: Value, DL: getCurSDLoc(), VT); |
8903 | setValue(V: &I, NewN: Value); |
8904 | } |
8905 | |
8906 | /// See if we can lower a memcmp/bcmp call into an optimized form. If so, return |
8907 | /// true and lower it. Otherwise return false, and it will be lowered like a |
8908 | /// normal call. |
8909 | /// The caller already checked that \p I calls the appropriate LibFunc with a |
8910 | /// correct prototype. |
8911 | bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) { |
8912 | const Value *LHS = I.getArgOperand(i: 0), *RHS = I.getArgOperand(i: 1); |
8913 | const Value *Size = I.getArgOperand(i: 2); |
8914 | const ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Val: getValue(V: Size)); |
8915 | if (CSize && CSize->getZExtValue() == 0) { |
8916 | EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DL: DAG.getDataLayout(), |
8917 | Ty: I.getType(), AllowUnknown: true); |
8918 | setValue(V: &I, NewN: DAG.getConstant(Val: 0, DL: getCurSDLoc(), VT: CallVT)); |
8919 | return true; |
8920 | } |
8921 | |
8922 | const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); |
8923 | std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp( |
8924 | DAG, dl: getCurSDLoc(), Chain: DAG.getRoot(), Op1: getValue(V: LHS), Op2: getValue(V: RHS), |
8925 | Op3: getValue(V: Size), Op1PtrInfo: MachinePointerInfo(LHS), Op2PtrInfo: MachinePointerInfo(RHS)); |
8926 | if (Res.first.getNode()) { |
8927 | processIntegerCallValue(I, Value: Res.first, IsSigned: true); |
8928 | PendingLoads.push_back(Elt: Res.second); |
8929 | return true; |
8930 | } |
8931 | |
8932 | // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 |
8933 | // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 |
8934 | if (!CSize || !isOnlyUsedInZeroEqualityComparison(CxtI: &I)) |
8935 | return false; |
8936 | |
8937 | // If the target has a fast compare for the given size, it will return a |
8938 | // preferred load type for that size. Require that the load VT is legal and |
8939 | // that the target supports unaligned loads of that type. Otherwise, return |
8940 | // INVALID. |
8941 | auto hasFastLoadsAndCompare = [&](unsigned NumBits) { |
8942 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
8943 | MVT LVT = TLI.hasFastEqualityCompare(NumBits); |
8944 | if (LVT != MVT::INVALID_SIMPLE_VALUE_TYPE) { |
8945 | // TODO: Handle 5 byte compare as 4-byte + 1 byte. |
8946 | // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. |
8947 | // TODO: Check alignment of src and dest ptrs. |
8948 | unsigned DstAS = LHS->getType()->getPointerAddressSpace(); |
8949 | unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); |
8950 | if (!TLI.isTypeLegal(VT: LVT) || |
8951 | !TLI.allowsMisalignedMemoryAccesses(LVT, AddrSpace: SrcAS) || |
8952 | !TLI.allowsMisalignedMemoryAccesses(LVT, AddrSpace: DstAS)) |
8953 | LVT = MVT::INVALID_SIMPLE_VALUE_TYPE; |
8954 | } |
8955 | |
8956 | return LVT; |
8957 | }; |
8958 | |
8959 | // This turns into unaligned loads. We only do this if the target natively |
8960 | // supports the MVT we'll be loading or if it is small enough (<= 4) that |
8961 | // we'll only produce a small number of byte loads. |
8962 | MVT LoadVT; |
8963 | unsigned NumBitsToCompare = CSize->getZExtValue() * 8; |
8964 | switch (NumBitsToCompare) { |
8965 | default: |
8966 | return false; |
8967 | case 16: |
8968 | LoadVT = MVT::i16; |
8969 | break; |
8970 | case 32: |
8971 | LoadVT = MVT::i32; |
8972 | break; |
8973 | case 64: |
8974 | case 128: |
8975 | case 256: |
8976 | LoadVT = hasFastLoadsAndCompare(NumBitsToCompare); |
8977 | break; |
8978 | } |
8979 | |
8980 | if (LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE) |
8981 | return false; |
8982 | |
8983 | SDValue LoadL = getMemCmpLoad(PtrVal: LHS, LoadVT, Builder&: *this); |
8984 | SDValue LoadR = getMemCmpLoad(PtrVal: RHS, LoadVT, Builder&: *this); |
8985 | |
8986 | // Bitcast to a wide integer type if the loads are vectors. |
8987 | if (LoadVT.isVector()) { |
8988 | EVT CmpVT = EVT::getIntegerVT(Context&: LHS->getContext(), BitWidth: LoadVT.getSizeInBits()); |
8989 | LoadL = DAG.getBitcast(VT: CmpVT, V: LoadL); |
8990 | LoadR = DAG.getBitcast(VT: CmpVT, V: LoadR); |
8991 | } |
8992 | |
8993 | SDValue Cmp = DAG.getSetCC(DL: getCurSDLoc(), VT: MVT::i1, LHS: LoadL, RHS: LoadR, Cond: ISD::SETNE); |
8994 | processIntegerCallValue(I, Value: Cmp, IsSigned: false); |
8995 | return true; |
8996 | } |
8997 | |
8998 | /// See if we can lower a memchr call into an optimized form. If so, return |
8999 | /// true and lower it. Otherwise return false, and it will be lowered like a |
9000 | /// normal call. |
9001 | /// The caller already checked that \p I calls the appropriate LibFunc with a |
9002 | /// correct prototype. |
9003 | bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { |
9004 | const Value *Src = I.getArgOperand(i: 0); |
9005 | const Value *Char = I.getArgOperand(i: 1); |
9006 | const Value *Length = I.getArgOperand(i: 2); |
9007 | |
9008 | const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); |
9009 | std::pair<SDValue, SDValue> Res = |
9010 | TSI.EmitTargetCodeForMemchr(DAG, dl: getCurSDLoc(), Chain: DAG.getRoot(), |
9011 | Src: getValue(V: Src), Char: getValue(V: Char), Length: getValue(V: Length), |
9012 | SrcPtrInfo: MachinePointerInfo(Src)); |
9013 | if (Res.first.getNode()) { |
9014 | setValue(V: &I, NewN: Res.first); |
9015 | PendingLoads.push_back(Elt: Res.second); |
9016 | return true; |
9017 | } |
9018 | |
9019 | return false; |
9020 | } |
9021 | |
9022 | /// See if we can lower a mempcpy call into an optimized form. If so, return |
9023 | /// true and lower it. Otherwise return false, and it will be lowered like a |
9024 | /// normal call. |
9025 | /// The caller already checked that \p I calls the appropriate LibFunc with a |
9026 | /// correct prototype. |
9027 | bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { |
9028 | SDValue Dst = getValue(V: I.getArgOperand(i: 0)); |
9029 | SDValue Src = getValue(V: I.getArgOperand(i: 1)); |
9030 | SDValue Size = getValue(V: I.getArgOperand(i: 2)); |
9031 | |
9032 | Align DstAlign = DAG.InferPtrAlign(Ptr: Dst).valueOrOne(); |
9033 | Align SrcAlign = DAG.InferPtrAlign(Ptr: Src).valueOrOne(); |
9034 | // DAG::getMemcpy needs Alignment to be defined. |
9035 | Align Alignment = std::min(a: DstAlign, b: SrcAlign); |
9036 | |
9037 | SDLoc sdl = getCurSDLoc(); |
9038 | |
9039 | // In the mempcpy context we need to pass in a false value for isTailCall |
9040 | // because the return pointer needs to be adjusted by the size of |
9041 | // the copied memory. |
9042 | SDValue Root = getMemoryRoot(); |
9043 | SDValue MC = DAG.getMemcpy( |
9044 | Chain: Root, dl: sdl, Dst, Src, Size, Alignment, isVol: false, AlwaysInline: false, /*CI=*/nullptr, |
9045 | OverrideTailCall: std::nullopt, DstPtrInfo: MachinePointerInfo(I.getArgOperand(i: 0)), |
9046 | SrcPtrInfo: MachinePointerInfo(I.getArgOperand(i: 1)), AAInfo: I.getAAMetadata()); |
9047 | assert(MC.getNode() != nullptr && |
9048 | "** memcpy should not be lowered as TailCall in mempcpy context **" ); |
9049 | DAG.setRoot(MC); |
9050 | |
9051 | // Check if Size needs to be truncated or extended. |
9052 | Size = DAG.getSExtOrTrunc(Op: Size, DL: sdl, VT: Dst.getValueType()); |
9053 | |
9054 | // Adjust return pointer to point just past the last dst byte. |
9055 | SDValue DstPlusSize = DAG.getNode(Opcode: ISD::ADD, DL: sdl, VT: Dst.getValueType(), |
9056 | N1: Dst, N2: Size); |
9057 | setValue(V: &I, NewN: DstPlusSize); |
9058 | return true; |
9059 | } |
9060 | |
9061 | /// See if we can lower a strcpy call into an optimized form. If so, return |
9062 | /// true and lower it, otherwise return false and it will be lowered like a |
9063 | /// normal call. |
9064 | /// The caller already checked that \p I calls the appropriate LibFunc with a |
9065 | /// correct prototype. |
9066 | bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { |
9067 | const Value *Arg0 = I.getArgOperand(i: 0), *Arg1 = I.getArgOperand(i: 1); |
9068 | |
9069 | const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); |
9070 | std::pair<SDValue, SDValue> Res = |
9071 | TSI.EmitTargetCodeForStrcpy(DAG, DL: getCurSDLoc(), Chain: getRoot(), |
9072 | Dest: getValue(V: Arg0), Src: getValue(V: Arg1), |
9073 | DestPtrInfo: MachinePointerInfo(Arg0), |
9074 | SrcPtrInfo: MachinePointerInfo(Arg1), isStpcpy); |
9075 | if (Res.first.getNode()) { |
9076 | setValue(V: &I, NewN: Res.first); |
9077 | DAG.setRoot(Res.second); |
9078 | return true; |
9079 | } |
9080 | |
9081 | return false; |
9082 | } |
9083 | |
9084 | /// See if we can lower a strcmp call into an optimized form. If so, return |
9085 | /// true and lower it, otherwise return false and it will be lowered like a |
9086 | /// normal call. |
9087 | /// The caller already checked that \p I calls the appropriate LibFunc with a |
9088 | /// correct prototype. |
9089 | bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { |
9090 | const Value *Arg0 = I.getArgOperand(i: 0), *Arg1 = I.getArgOperand(i: 1); |
9091 | |
9092 | const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); |
9093 | std::pair<SDValue, SDValue> Res = |
9094 | TSI.EmitTargetCodeForStrcmp(DAG, dl: getCurSDLoc(), Chain: DAG.getRoot(), |
9095 | Op1: getValue(V: Arg0), Op2: getValue(V: Arg1), |
9096 | Op1PtrInfo: MachinePointerInfo(Arg0), |
9097 | Op2PtrInfo: MachinePointerInfo(Arg1)); |
9098 | if (Res.first.getNode()) { |
9099 | processIntegerCallValue(I, Value: Res.first, IsSigned: true); |
9100 | PendingLoads.push_back(Elt: Res.second); |
9101 | return true; |
9102 | } |
9103 | |
9104 | return false; |
9105 | } |
9106 | |
9107 | /// See if we can lower a strlen call into an optimized form. If so, return |
9108 | /// true and lower it, otherwise return false and it will be lowered like a |
9109 | /// normal call. |
9110 | /// The caller already checked that \p I calls the appropriate LibFunc with a |
9111 | /// correct prototype. |
9112 | bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { |
9113 | const Value *Arg0 = I.getArgOperand(i: 0); |
9114 | |
9115 | const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); |
9116 | std::pair<SDValue, SDValue> Res = |
9117 | TSI.EmitTargetCodeForStrlen(DAG, DL: getCurSDLoc(), Chain: DAG.getRoot(), |
9118 | Src: getValue(V: Arg0), SrcPtrInfo: MachinePointerInfo(Arg0)); |
9119 | if (Res.first.getNode()) { |
9120 | processIntegerCallValue(I, Value: Res.first, IsSigned: false); |
9121 | PendingLoads.push_back(Elt: Res.second); |
9122 | return true; |
9123 | } |
9124 | |
9125 | return false; |
9126 | } |
9127 | |
9128 | /// See if we can lower a strnlen call into an optimized form. If so, return |
9129 | /// true and lower it, otherwise return false and it will be lowered like a |
9130 | /// normal call. |
9131 | /// The caller already checked that \p I calls the appropriate LibFunc with a |
9132 | /// correct prototype. |
9133 | bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { |
9134 | const Value *Arg0 = I.getArgOperand(i: 0), *Arg1 = I.getArgOperand(i: 1); |
9135 | |
9136 | const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); |
9137 | std::pair<SDValue, SDValue> Res = |
9138 | TSI.EmitTargetCodeForStrnlen(DAG, DL: getCurSDLoc(), Chain: DAG.getRoot(), |
9139 | Src: getValue(V: Arg0), MaxLength: getValue(V: Arg1), |
9140 | SrcPtrInfo: MachinePointerInfo(Arg0)); |
9141 | if (Res.first.getNode()) { |
9142 | processIntegerCallValue(I, Value: Res.first, IsSigned: false); |
9143 | PendingLoads.push_back(Elt: Res.second); |
9144 | return true; |
9145 | } |
9146 | |
9147 | return false; |
9148 | } |
9149 | |
9150 | /// See if we can lower a unary floating-point operation into an SDNode with |
9151 | /// the specified Opcode. If so, return true and lower it, otherwise return |
9152 | /// false and it will be lowered like a normal call. |
9153 | /// The caller already checked that \p I calls the appropriate LibFunc with a |
9154 | /// correct prototype. |
9155 | bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, |
9156 | unsigned Opcode) { |
9157 | // We already checked this call's prototype; verify it doesn't modify errno. |
9158 | if (!I.onlyReadsMemory()) |
9159 | return false; |
9160 | |
9161 | SDNodeFlags Flags; |
9162 | Flags.copyFMF(FPMO: cast<FPMathOperator>(Val: I)); |
9163 | |
9164 | SDValue Tmp = getValue(V: I.getArgOperand(i: 0)); |
9165 | setValue(V: &I, |
9166 | NewN: DAG.getNode(Opcode, DL: getCurSDLoc(), VT: Tmp.getValueType(), Operand: Tmp, Flags)); |
9167 | return true; |
9168 | } |
9169 | |
9170 | /// See if we can lower a binary floating-point operation into an SDNode with |
9171 | /// the specified Opcode. If so, return true and lower it. Otherwise return |
9172 | /// false, and it will be lowered like a normal call. |
9173 | /// The caller already checked that \p I calls the appropriate LibFunc with a |
9174 | /// correct prototype. |
9175 | bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, |
9176 | unsigned Opcode) { |
9177 | // We already checked this call's prototype; verify it doesn't modify errno. |
9178 | if (!I.onlyReadsMemory()) |
9179 | return false; |
9180 | |
9181 | SDNodeFlags Flags; |
9182 | Flags.copyFMF(FPMO: cast<FPMathOperator>(Val: I)); |
9183 | |
9184 | SDValue Tmp0 = getValue(V: I.getArgOperand(i: 0)); |
9185 | SDValue Tmp1 = getValue(V: I.getArgOperand(i: 1)); |
9186 | EVT VT = Tmp0.getValueType(); |
9187 | setValue(V: &I, NewN: DAG.getNode(Opcode, DL: getCurSDLoc(), VT, N1: Tmp0, N2: Tmp1, Flags)); |
9188 | return true; |
9189 | } |
9190 | |
9191 | void SelectionDAGBuilder::visitCall(const CallInst &I) { |
9192 | // Handle inline assembly differently. |
9193 | if (I.isInlineAsm()) { |
9194 | visitInlineAsm(Call: I); |
9195 | return; |
9196 | } |
9197 | |
9198 | diagnoseDontCall(CI: I); |
9199 | |
9200 | if (Function *F = I.getCalledFunction()) { |
9201 | if (F->isDeclaration()) { |
9202 | // Is this an LLVM intrinsic or a target-specific intrinsic? |
9203 | unsigned IID = F->getIntrinsicID(); |
9204 | if (!IID) |
9205 | if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) |
9206 | IID = II->getIntrinsicID(F); |
9207 | |
9208 | if (IID) { |
9209 | visitIntrinsicCall(I, Intrinsic: IID); |
9210 | return; |
9211 | } |
9212 | } |
9213 | |
9214 | // Check for well-known libc/libm calls. If the function is internal, it |
9215 | // can't be a library call. Don't do the check if marked as nobuiltin for |
9216 | // some reason or the call site requires strict floating point semantics. |
9217 | LibFunc Func; |
9218 | if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() && |
9219 | F->hasName() && LibInfo->getLibFunc(FDecl: *F, F&: Func) && |
9220 | LibInfo->hasOptimizedCodeGen(F: Func)) { |
9221 | switch (Func) { |
9222 | default: break; |
9223 | case LibFunc_bcmp: |
9224 | if (visitMemCmpBCmpCall(I)) |
9225 | return; |
9226 | break; |
9227 | case LibFunc_copysign: |
9228 | case LibFunc_copysignf: |
9229 | case LibFunc_copysignl: |
9230 | // We already checked this call's prototype; verify it doesn't modify |
9231 | // errno. |
9232 | if (I.onlyReadsMemory()) { |
9233 | SDValue LHS = getValue(V: I.getArgOperand(i: 0)); |
9234 | SDValue RHS = getValue(V: I.getArgOperand(i: 1)); |
9235 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::FCOPYSIGN, DL: getCurSDLoc(), |
9236 | VT: LHS.getValueType(), N1: LHS, N2: RHS)); |
9237 | return; |
9238 | } |
9239 | break; |
9240 | case LibFunc_fabs: |
9241 | case LibFunc_fabsf: |
9242 | case LibFunc_fabsl: |
9243 | if (visitUnaryFloatCall(I, Opcode: ISD::FABS)) |
9244 | return; |
9245 | break; |
9246 | case LibFunc_fmin: |
9247 | case LibFunc_fminf: |
9248 | case LibFunc_fminl: |
9249 | if (visitBinaryFloatCall(I, Opcode: ISD::FMINNUM)) |
9250 | return; |
9251 | break; |
9252 | case LibFunc_fmax: |
9253 | case LibFunc_fmaxf: |
9254 | case LibFunc_fmaxl: |
9255 | if (visitBinaryFloatCall(I, Opcode: ISD::FMAXNUM)) |
9256 | return; |
9257 | break; |
9258 | case LibFunc_sin: |
9259 | case LibFunc_sinf: |
9260 | case LibFunc_sinl: |
9261 | if (visitUnaryFloatCall(I, Opcode: ISD::FSIN)) |
9262 | return; |
9263 | break; |
9264 | case LibFunc_cos: |
9265 | case LibFunc_cosf: |
9266 | case LibFunc_cosl: |
9267 | if (visitUnaryFloatCall(I, Opcode: ISD::FCOS)) |
9268 | return; |
9269 | break; |
9270 | case LibFunc_tan: |
9271 | case LibFunc_tanf: |
9272 | case LibFunc_tanl: |
9273 | if (visitUnaryFloatCall(I, Opcode: ISD::FTAN)) |
9274 | return; |
9275 | break; |
9276 | case LibFunc_asin: |
9277 | case LibFunc_asinf: |
9278 | case LibFunc_asinl: |
9279 | if (visitUnaryFloatCall(I, Opcode: ISD::FASIN)) |
9280 | return; |
9281 | break; |
9282 | case LibFunc_acos: |
9283 | case LibFunc_acosf: |
9284 | case LibFunc_acosl: |
9285 | if (visitUnaryFloatCall(I, Opcode: ISD::FACOS)) |
9286 | return; |
9287 | break; |
9288 | case LibFunc_atan: |
9289 | case LibFunc_atanf: |
9290 | case LibFunc_atanl: |
9291 | if (visitUnaryFloatCall(I, Opcode: ISD::FATAN)) |
9292 | return; |
9293 | break; |
9294 | case LibFunc_sinh: |
9295 | case LibFunc_sinhf: |
9296 | case LibFunc_sinhl: |
9297 | if (visitUnaryFloatCall(I, Opcode: ISD::FSINH)) |
9298 | return; |
9299 | break; |
9300 | case LibFunc_cosh: |
9301 | case LibFunc_coshf: |
9302 | case LibFunc_coshl: |
9303 | if (visitUnaryFloatCall(I, Opcode: ISD::FCOSH)) |
9304 | return; |
9305 | break; |
9306 | case LibFunc_tanh: |
9307 | case LibFunc_tanhf: |
9308 | case LibFunc_tanhl: |
9309 | if (visitUnaryFloatCall(I, Opcode: ISD::FTANH)) |
9310 | return; |
9311 | break; |
9312 | case LibFunc_sqrt: |
9313 | case LibFunc_sqrtf: |
9314 | case LibFunc_sqrtl: |
9315 | case LibFunc_sqrt_finite: |
9316 | case LibFunc_sqrtf_finite: |
9317 | case LibFunc_sqrtl_finite: |
9318 | if (visitUnaryFloatCall(I, Opcode: ISD::FSQRT)) |
9319 | return; |
9320 | break; |
9321 | case LibFunc_floor: |
9322 | case LibFunc_floorf: |
9323 | case LibFunc_floorl: |
9324 | if (visitUnaryFloatCall(I, Opcode: ISD::FFLOOR)) |
9325 | return; |
9326 | break; |
9327 | case LibFunc_nearbyint: |
9328 | case LibFunc_nearbyintf: |
9329 | case LibFunc_nearbyintl: |
9330 | if (visitUnaryFloatCall(I, Opcode: ISD::FNEARBYINT)) |
9331 | return; |
9332 | break; |
9333 | case LibFunc_ceil: |
9334 | case LibFunc_ceilf: |
9335 | case LibFunc_ceill: |
9336 | if (visitUnaryFloatCall(I, Opcode: ISD::FCEIL)) |
9337 | return; |
9338 | break; |
9339 | case LibFunc_rint: |
9340 | case LibFunc_rintf: |
9341 | case LibFunc_rintl: |
9342 | if (visitUnaryFloatCall(I, Opcode: ISD::FRINT)) |
9343 | return; |
9344 | break; |
9345 | case LibFunc_round: |
9346 | case LibFunc_roundf: |
9347 | case LibFunc_roundl: |
9348 | if (visitUnaryFloatCall(I, Opcode: ISD::FROUND)) |
9349 | return; |
9350 | break; |
9351 | case LibFunc_trunc: |
9352 | case LibFunc_truncf: |
9353 | case LibFunc_truncl: |
9354 | if (visitUnaryFloatCall(I, Opcode: ISD::FTRUNC)) |
9355 | return; |
9356 | break; |
9357 | case LibFunc_log2: |
9358 | case LibFunc_log2f: |
9359 | case LibFunc_log2l: |
9360 | if (visitUnaryFloatCall(I, Opcode: ISD::FLOG2)) |
9361 | return; |
9362 | break; |
9363 | case LibFunc_exp2: |
9364 | case LibFunc_exp2f: |
9365 | case LibFunc_exp2l: |
9366 | if (visitUnaryFloatCall(I, Opcode: ISD::FEXP2)) |
9367 | return; |
9368 | break; |
9369 | case LibFunc_exp10: |
9370 | case LibFunc_exp10f: |
9371 | case LibFunc_exp10l: |
9372 | if (visitUnaryFloatCall(I, Opcode: ISD::FEXP10)) |
9373 | return; |
9374 | break; |
9375 | case LibFunc_ldexp: |
9376 | case LibFunc_ldexpf: |
9377 | case LibFunc_ldexpl: |
9378 | if (visitBinaryFloatCall(I, Opcode: ISD::FLDEXP)) |
9379 | return; |
9380 | break; |
9381 | case LibFunc_memcmp: |
9382 | if (visitMemCmpBCmpCall(I)) |
9383 | return; |
9384 | break; |
9385 | case LibFunc_mempcpy: |
9386 | if (visitMemPCpyCall(I)) |
9387 | return; |
9388 | break; |
9389 | case LibFunc_memchr: |
9390 | if (visitMemChrCall(I)) |
9391 | return; |
9392 | break; |
9393 | case LibFunc_strcpy: |
9394 | if (visitStrCpyCall(I, isStpcpy: false)) |
9395 | return; |
9396 | break; |
9397 | case LibFunc_stpcpy: |
9398 | if (visitStrCpyCall(I, isStpcpy: true)) |
9399 | return; |
9400 | break; |
9401 | case LibFunc_strcmp: |
9402 | if (visitStrCmpCall(I)) |
9403 | return; |
9404 | break; |
9405 | case LibFunc_strlen: |
9406 | if (visitStrLenCall(I)) |
9407 | return; |
9408 | break; |
9409 | case LibFunc_strnlen: |
9410 | if (visitStrNLenCall(I)) |
9411 | return; |
9412 | break; |
9413 | } |
9414 | } |
9415 | } |
9416 | |
9417 | if (I.countOperandBundlesOfType(ID: LLVMContext::OB_ptrauth)) { |
9418 | LowerCallSiteWithPtrAuthBundle(CB: cast<CallBase>(Val: I), /*EHPadBB=*/nullptr); |
9419 | return; |
9420 | } |
9421 | |
9422 | // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't |
9423 | // have to do anything here to lower funclet bundles. |
9424 | // CFGuardTarget bundles are lowered in LowerCallTo. |
9425 | assert(!I.hasOperandBundlesOtherThan( |
9426 | {LLVMContext::OB_deopt, LLVMContext::OB_funclet, |
9427 | LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated, |
9428 | LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi, |
9429 | LLVMContext::OB_convergencectrl}) && |
9430 | "Cannot lower calls with arbitrary operand bundles!" ); |
9431 | |
9432 | SDValue Callee = getValue(V: I.getCalledOperand()); |
9433 | |
9434 | if (I.hasDeoptState()) |
9435 | LowerCallSiteWithDeoptBundle(Call: &I, Callee, EHPadBB: nullptr); |
9436 | else |
9437 | // Check if we can potentially perform a tail call. More detailed checking |
9438 | // is be done within LowerCallTo, after more information about the call is |
9439 | // known. |
9440 | LowerCallTo(CB: I, Callee, isTailCall: I.isTailCall(), isMustTailCall: I.isMustTailCall()); |
9441 | } |
9442 | |
9443 | void SelectionDAGBuilder::LowerCallSiteWithPtrAuthBundle( |
9444 | const CallBase &CB, const BasicBlock *EHPadBB) { |
9445 | auto PAB = CB.getOperandBundle(Name: "ptrauth" ); |
9446 | const Value *CalleeV = CB.getCalledOperand(); |
9447 | |
9448 | // Gather the call ptrauth data from the operand bundle: |
9449 | // [ i32 <key>, i64 <discriminator> ] |
9450 | const auto *Key = cast<ConstantInt>(Val: PAB->Inputs[0]); |
9451 | const Value *Discriminator = PAB->Inputs[1]; |
9452 | |
9453 | assert(Key->getType()->isIntegerTy(32) && "Invalid ptrauth key" ); |
9454 | assert(Discriminator->getType()->isIntegerTy(64) && |
9455 | "Invalid ptrauth discriminator" ); |
9456 | |
9457 | // Look through ptrauth constants to find the raw callee. |
9458 | // Do a direct unauthenticated call if we found it and everything matches. |
9459 | if (const auto *CalleeCPA = dyn_cast<ConstantPtrAuth>(Val: CalleeV)) |
9460 | if (CalleeCPA->isKnownCompatibleWith(Key, Discriminator, |
9461 | DL: DAG.getDataLayout())) |
9462 | return LowerCallTo(CB, Callee: getValue(V: CalleeCPA->getPointer()), isTailCall: CB.isTailCall(), |
9463 | isMustTailCall: CB.isMustTailCall(), EHPadBB); |
9464 | |
9465 | // Functions should never be ptrauth-called directly. |
9466 | assert(!isa<Function>(CalleeV) && "invalid direct ptrauth call" ); |
9467 | |
9468 | // Otherwise, do an authenticated indirect call. |
9469 | TargetLowering::PtrAuthInfo PAI = {.Key: Key->getZExtValue(), |
9470 | .Discriminator: getValue(V: Discriminator)}; |
9471 | |
9472 | LowerCallTo(CB, Callee: getValue(V: CalleeV), isTailCall: CB.isTailCall(), isMustTailCall: CB.isMustTailCall(), |
9473 | EHPadBB, PAI: &PAI); |
9474 | } |
9475 | |
9476 | namespace { |
9477 | |
9478 | /// AsmOperandInfo - This contains information for each constraint that we are |
9479 | /// lowering. |
9480 | class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { |
9481 | public: |
9482 | /// CallOperand - If this is the result output operand or a clobber |
9483 | /// this is null, otherwise it is the incoming operand to the CallInst. |
9484 | /// This gets modified as the asm is processed. |
9485 | SDValue CallOperand; |
9486 | |
9487 | /// AssignedRegs - If this is a register or register class operand, this |
9488 | /// contains the set of register corresponding to the operand. |
9489 | RegsForValue AssignedRegs; |
9490 | |
9491 | explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) |
9492 | : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) { |
9493 | } |
9494 | |
9495 | /// Whether or not this operand accesses memory |
9496 | bool hasMemory(const TargetLowering &TLI) const { |
9497 | // Indirect operand accesses access memory. |
9498 | if (isIndirect) |
9499 | return true; |
9500 | |
9501 | for (const auto &Code : Codes) |
9502 | if (TLI.getConstraintType(Constraint: Code) == TargetLowering::C_Memory) |
9503 | return true; |
9504 | |
9505 | return false; |
9506 | } |
9507 | }; |
9508 | |
9509 | |
9510 | } // end anonymous namespace |
9511 | |
9512 | /// Make sure that the output operand \p OpInfo and its corresponding input |
9513 | /// operand \p MatchingOpInfo have compatible constraint types (otherwise error |
9514 | /// out). |
9515 | static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo, |
9516 | SDISelAsmOperandInfo &MatchingOpInfo, |
9517 | SelectionDAG &DAG) { |
9518 | if (OpInfo.ConstraintVT == MatchingOpInfo.ConstraintVT) |
9519 | return; |
9520 | |
9521 | const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); |
9522 | const auto &TLI = DAG.getTargetLoweringInfo(); |
9523 | |
9524 | std::pair<unsigned, const TargetRegisterClass *> MatchRC = |
9525 | TLI.getRegForInlineAsmConstraint(TRI, Constraint: OpInfo.ConstraintCode, |
9526 | VT: OpInfo.ConstraintVT); |
9527 | std::pair<unsigned, const TargetRegisterClass *> InputRC = |
9528 | TLI.getRegForInlineAsmConstraint(TRI, Constraint: MatchingOpInfo.ConstraintCode, |
9529 | VT: MatchingOpInfo.ConstraintVT); |
9530 | if ((OpInfo.ConstraintVT.isInteger() != |
9531 | MatchingOpInfo.ConstraintVT.isInteger()) || |
9532 | (MatchRC.second != InputRC.second)) { |
9533 | // FIXME: error out in a more elegant fashion |
9534 | report_fatal_error(reason: "Unsupported asm: input constraint" |
9535 | " with a matching output constraint of" |
9536 | " incompatible type!" ); |
9537 | } |
9538 | MatchingOpInfo.ConstraintVT = OpInfo.ConstraintVT; |
9539 | } |
9540 | |
9541 | /// Get a direct memory input to behave well as an indirect operand. |
9542 | /// This may introduce stores, hence the need for a \p Chain. |
9543 | /// \return The (possibly updated) chain. |
9544 | static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, |
9545 | SDISelAsmOperandInfo &OpInfo, |
9546 | SelectionDAG &DAG) { |
9547 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
9548 | |
9549 | // If we don't have an indirect input, put it in the constpool if we can, |
9550 | // otherwise spill it to a stack slot. |
9551 | // TODO: This isn't quite right. We need to handle these according to |
9552 | // the addressing mode that the constraint wants. Also, this may take |
9553 | // an additional register for the computation and we don't want that |
9554 | // either. |
9555 | |
9556 | // If the operand is a float, integer, or vector constant, spill to a |
9557 | // constant pool entry to get its address. |
9558 | const Value *OpVal = OpInfo.CallOperandVal; |
9559 | if (isa<ConstantFP>(Val: OpVal) || isa<ConstantInt>(Val: OpVal) || |
9560 | isa<ConstantVector>(Val: OpVal) || isa<ConstantDataVector>(Val: OpVal)) { |
9561 | OpInfo.CallOperand = DAG.getConstantPool( |
9562 | C: cast<Constant>(Val: OpVal), VT: TLI.getPointerTy(DL: DAG.getDataLayout())); |
9563 | return Chain; |
9564 | } |
9565 | |
9566 | // Otherwise, create a stack slot and emit a store to it before the asm. |
9567 | Type *Ty = OpVal->getType(); |
9568 | auto &DL = DAG.getDataLayout(); |
9569 | TypeSize TySize = DL.getTypeAllocSize(Ty); |
9570 | MachineFunction &MF = DAG.getMachineFunction(); |
9571 | const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); |
9572 | int StackID = 0; |
9573 | if (TySize.isScalable()) |
9574 | StackID = TFI->getStackIDForScalableVectors(); |
9575 | int SSFI = MF.getFrameInfo().CreateStackObject(Size: TySize.getKnownMinValue(), |
9576 | Alignment: DL.getPrefTypeAlign(Ty), isSpillSlot: false, |
9577 | Alloca: nullptr, ID: StackID); |
9578 | SDValue StackSlot = DAG.getFrameIndex(FI: SSFI, VT: TLI.getFrameIndexTy(DL)); |
9579 | Chain = DAG.getTruncStore(Chain, dl: Location, Val: OpInfo.CallOperand, Ptr: StackSlot, |
9580 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: SSFI), |
9581 | SVT: TLI.getMemValueType(DL, Ty)); |
9582 | OpInfo.CallOperand = StackSlot; |
9583 | |
9584 | return Chain; |
9585 | } |
9586 | |
9587 | /// GetRegistersForValue - Assign registers (virtual or physical) for the |
9588 | /// specified operand. We prefer to assign virtual registers, to allow the |
9589 | /// register allocator to handle the assignment process. However, if the asm |
9590 | /// uses features that we can't model on machineinstrs, we have SDISel do the |
9591 | /// allocation. This produces generally horrible, but correct, code. |
9592 | /// |
9593 | /// OpInfo describes the operand |
9594 | /// RefOpInfo describes the matching operand if any, the operand otherwise |
9595 | static std::optional<unsigned> |
9596 | getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, |
9597 | SDISelAsmOperandInfo &OpInfo, |
9598 | SDISelAsmOperandInfo &RefOpInfo) { |
9599 | LLVMContext &Context = *DAG.getContext(); |
9600 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
9601 | |
9602 | MachineFunction &MF = DAG.getMachineFunction(); |
9603 | SmallVector<unsigned, 4> Regs; |
9604 | const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); |
9605 | |
9606 | // No work to do for memory/address operands. |
9607 | if (OpInfo.ConstraintType == TargetLowering::C_Memory || |
9608 | OpInfo.ConstraintType == TargetLowering::C_Address) |
9609 | return std::nullopt; |
9610 | |
9611 | // If this is a constraint for a single physreg, or a constraint for a |
9612 | // register class, find it. |
9613 | unsigned AssignedReg; |
9614 | const TargetRegisterClass *RC; |
9615 | std::tie(args&: AssignedReg, args&: RC) = TLI.getRegForInlineAsmConstraint( |
9616 | TRI: &TRI, Constraint: RefOpInfo.ConstraintCode, VT: RefOpInfo.ConstraintVT); |
9617 | // RC is unset only on failure. Return immediately. |
9618 | if (!RC) |
9619 | return std::nullopt; |
9620 | |
9621 | // Get the actual register value type. This is important, because the user |
9622 | // may have asked for (e.g.) the AX register in i32 type. We need to |
9623 | // remember that AX is actually i16 to get the right extension. |
9624 | const MVT RegVT = *TRI.legalclasstypes_begin(RC: *RC); |
9625 | |
9626 | if (OpInfo.ConstraintVT != MVT::Other && RegVT != MVT::Untyped) { |
9627 | // If this is an FP operand in an integer register (or visa versa), or more |
9628 | // generally if the operand value disagrees with the register class we plan |
9629 | // to stick it in, fix the operand type. |
9630 | // |
9631 | // If this is an input value, the bitcast to the new type is done now. |
9632 | // Bitcast for output value is done at the end of visitInlineAsm(). |
9633 | if ((OpInfo.Type == InlineAsm::isOutput || |
9634 | OpInfo.Type == InlineAsm::isInput) && |
9635 | !TRI.isTypeLegalForClass(RC: *RC, T: OpInfo.ConstraintVT)) { |
9636 | // Try to convert to the first EVT that the reg class contains. If the |
9637 | // types are identical size, use a bitcast to convert (e.g. two differing |
9638 | // vector types). Note: output bitcast is done at the end of |
9639 | // visitInlineAsm(). |
9640 | if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { |
9641 | // Exclude indirect inputs while they are unsupported because the code |
9642 | // to perform the load is missing and thus OpInfo.CallOperand still |
9643 | // refers to the input address rather than the pointed-to value. |
9644 | if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect) |
9645 | OpInfo.CallOperand = |
9646 | DAG.getNode(Opcode: ISD::BITCAST, DL, VT: RegVT, Operand: OpInfo.CallOperand); |
9647 | OpInfo.ConstraintVT = RegVT; |
9648 | // If the operand is an FP value and we want it in integer registers, |
9649 | // use the corresponding integer type. This turns an f64 value into |
9650 | // i64, which can be passed with two i32 values on a 32-bit machine. |
9651 | } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { |
9652 | MVT VT = MVT::getIntegerVT(BitWidth: OpInfo.ConstraintVT.getSizeInBits()); |
9653 | if (OpInfo.Type == InlineAsm::isInput) |
9654 | OpInfo.CallOperand = |
9655 | DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: OpInfo.CallOperand); |
9656 | OpInfo.ConstraintVT = VT; |
9657 | } |
9658 | } |
9659 | } |
9660 | |
9661 | // No need to allocate a matching input constraint since the constraint it's |
9662 | // matching to has already been allocated. |
9663 | if (OpInfo.isMatchingInputConstraint()) |
9664 | return std::nullopt; |
9665 | |
9666 | EVT ValueVT = OpInfo.ConstraintVT; |
9667 | if (OpInfo.ConstraintVT == MVT::Other) |
9668 | ValueVT = RegVT; |
9669 | |
9670 | // Initialize NumRegs. |
9671 | unsigned NumRegs = 1; |
9672 | if (OpInfo.ConstraintVT != MVT::Other) |
9673 | NumRegs = TLI.getNumRegisters(Context, VT: OpInfo.ConstraintVT, RegisterVT: RegVT); |
9674 | |
9675 | // If this is a constraint for a specific physical register, like {r17}, |
9676 | // assign it now. |
9677 | |
9678 | // If this associated to a specific register, initialize iterator to correct |
9679 | // place. If virtual, make sure we have enough registers |
9680 | |
9681 | // Initialize iterator if necessary |
9682 | TargetRegisterClass::iterator I = RC->begin(); |
9683 | MachineRegisterInfo &RegInfo = MF.getRegInfo(); |
9684 | |
9685 | // Do not check for single registers. |
9686 | if (AssignedReg) { |
9687 | I = std::find(first: I, last: RC->end(), val: AssignedReg); |
9688 | if (I == RC->end()) { |
9689 | // RC does not contain the selected register, which indicates a |
9690 | // mismatch between the register and the required type/bitwidth. |
9691 | return {AssignedReg}; |
9692 | } |
9693 | } |
9694 | |
9695 | for (; NumRegs; --NumRegs, ++I) { |
9696 | assert(I != RC->end() && "Ran out of registers to allocate!" ); |
9697 | Register R = AssignedReg ? Register(*I) : RegInfo.createVirtualRegister(RegClass: RC); |
9698 | Regs.push_back(Elt: R); |
9699 | } |
9700 | |
9701 | OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); |
9702 | return std::nullopt; |
9703 | } |
9704 | |
9705 | static unsigned |
9706 | findMatchingInlineAsmOperand(unsigned OperandNo, |
9707 | const std::vector<SDValue> &AsmNodeOperands) { |
9708 | // Scan until we find the definition we already emitted of this operand. |
9709 | unsigned CurOp = InlineAsm::Op_FirstOperand; |
9710 | for (; OperandNo; --OperandNo) { |
9711 | // Advance to the next operand. |
9712 | unsigned OpFlag = AsmNodeOperands[CurOp]->getAsZExtVal(); |
9713 | const InlineAsm::Flag F(OpFlag); |
9714 | assert( |
9715 | (F.isRegDefKind() || F.isRegDefEarlyClobberKind() || F.isMemKind()) && |
9716 | "Skipped past definitions?" ); |
9717 | CurOp += F.getNumOperandRegisters() + 1; |
9718 | } |
9719 | return CurOp; |
9720 | } |
9721 | |
9722 | namespace { |
9723 | |
9724 | class { |
9725 | unsigned = 0; |
9726 | |
9727 | public: |
9728 | explicit (const CallBase &Call) { |
9729 | const InlineAsm *IA = cast<InlineAsm>(Val: Call.getCalledOperand()); |
9730 | if (IA->hasSideEffects()) |
9731 | Flags |= InlineAsm::Extra_HasSideEffects; |
9732 | if (IA->isAlignStack()) |
9733 | Flags |= InlineAsm::Extra_IsAlignStack; |
9734 | if (Call.isConvergent()) |
9735 | Flags |= InlineAsm::Extra_IsConvergent; |
9736 | Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect; |
9737 | } |
9738 | |
9739 | void update(const TargetLowering::AsmOperandInfo &OpInfo) { |
9740 | // Ideally, we would only check against memory constraints. However, the |
9741 | // meaning of an Other constraint can be target-specific and we can't easily |
9742 | // reason about it. Therefore, be conservative and set MayLoad/MayStore |
9743 | // for Other constraints as well. |
9744 | if (OpInfo.ConstraintType == TargetLowering::C_Memory || |
9745 | OpInfo.ConstraintType == TargetLowering::C_Other) { |
9746 | if (OpInfo.Type == InlineAsm::isInput) |
9747 | Flags |= InlineAsm::Extra_MayLoad; |
9748 | else if (OpInfo.Type == InlineAsm::isOutput) |
9749 | Flags |= InlineAsm::Extra_MayStore; |
9750 | else if (OpInfo.Type == InlineAsm::isClobber) |
9751 | Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); |
9752 | } |
9753 | } |
9754 | |
9755 | unsigned () const { return Flags; } |
9756 | }; |
9757 | |
9758 | } // end anonymous namespace |
9759 | |
9760 | static bool isFunction(SDValue Op) { |
9761 | if (Op && Op.getOpcode() == ISD::GlobalAddress) { |
9762 | if (auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op)) { |
9763 | auto Fn = dyn_cast_or_null<Function>(Val: GA->getGlobal()); |
9764 | |
9765 | // In normal "call dllimport func" instruction (non-inlineasm) it force |
9766 | // indirect access by specifing call opcode. And usually specially print |
9767 | // asm with indirect symbol (i.g: "*") according to opcode. Inline asm can |
9768 | // not do in this way now. (In fact, this is similar with "Data Access" |
9769 | // action). So here we ignore dllimport function. |
9770 | if (Fn && !Fn->hasDLLImportStorageClass()) |
9771 | return true; |
9772 | } |
9773 | } |
9774 | return false; |
9775 | } |
9776 | |
9777 | /// visitInlineAsm - Handle a call to an InlineAsm object. |
9778 | void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, |
9779 | const BasicBlock *EHPadBB) { |
9780 | const InlineAsm *IA = cast<InlineAsm>(Val: Call.getCalledOperand()); |
9781 | |
9782 | /// ConstraintOperands - Information about all of the constraints. |
9783 | SmallVector<SDISelAsmOperandInfo, 16> ConstraintOperands; |
9784 | |
9785 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
9786 | TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints( |
9787 | DL: DAG.getDataLayout(), TRI: DAG.getSubtarget().getRegisterInfo(), Call); |
9788 | |
9789 | // First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack, |
9790 | // AsmDialect, MayLoad, MayStore). |
9791 | bool HasSideEffect = IA->hasSideEffects(); |
9792 | ExtraFlags (Call); |
9793 | |
9794 | for (auto &T : TargetConstraints) { |
9795 | ConstraintOperands.push_back(Elt: SDISelAsmOperandInfo(T)); |
9796 | SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); |
9797 | |
9798 | if (OpInfo.CallOperandVal) |
9799 | OpInfo.CallOperand = getValue(V: OpInfo.CallOperandVal); |
9800 | |
9801 | if (!HasSideEffect) |
9802 | HasSideEffect = OpInfo.hasMemory(TLI); |
9803 | |
9804 | // Determine if this InlineAsm MayLoad or MayStore based on the constraints. |
9805 | // FIXME: Could we compute this on OpInfo rather than T? |
9806 | |
9807 | // Compute the constraint code and ConstraintType to use. |
9808 | TLI.ComputeConstraintToUse(OpInfo&: T, Op: SDValue()); |
9809 | |
9810 | if (T.ConstraintType == TargetLowering::C_Immediate && |
9811 | OpInfo.CallOperand && !isa<ConstantSDNode>(Val: OpInfo.CallOperand)) |
9812 | // We've delayed emitting a diagnostic like the "n" constraint because |
9813 | // inlining could cause an integer showing up. |
9814 | return emitInlineAsmError(Call, Message: "constraint '" + Twine(T.ConstraintCode) + |
9815 | "' expects an integer constant " |
9816 | "expression" ); |
9817 | |
9818 | ExtraInfo.update(OpInfo: T); |
9819 | } |
9820 | |
9821 | // We won't need to flush pending loads if this asm doesn't touch |
9822 | // memory and is nonvolatile. |
9823 | SDValue Glue, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot(); |
9824 | |
9825 | bool EmitEHLabels = isa<InvokeInst>(Val: Call); |
9826 | if (EmitEHLabels) { |
9827 | assert(EHPadBB && "InvokeInst must have an EHPadBB" ); |
9828 | } |
9829 | bool IsCallBr = isa<CallBrInst>(Val: Call); |
9830 | |
9831 | if (IsCallBr || EmitEHLabels) { |
9832 | // If this is a callbr or invoke we need to flush pending exports since |
9833 | // inlineasm_br and invoke are terminators. |
9834 | // We need to do this before nodes are glued to the inlineasm_br node. |
9835 | Chain = getControlRoot(); |
9836 | } |
9837 | |
9838 | MCSymbol *BeginLabel = nullptr; |
9839 | if (EmitEHLabels) { |
9840 | Chain = lowerStartEH(Chain, EHPadBB, BeginLabel); |
9841 | } |
9842 | |
9843 | int OpNo = -1; |
9844 | SmallVector<StringRef> AsmStrs; |
9845 | IA->collectAsmStrs(AsmStrs); |
9846 | |
9847 | // Second pass over the constraints: compute which constraint option to use. |
9848 | for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { |
9849 | if (OpInfo.hasArg() || OpInfo.Type == InlineAsm::isOutput) |
9850 | OpNo++; |
9851 | |
9852 | // If this is an output operand with a matching input operand, look up the |
9853 | // matching input. If their types mismatch, e.g. one is an integer, the |
9854 | // other is floating point, or their sizes are different, flag it as an |
9855 | // error. |
9856 | if (OpInfo.hasMatchingInput()) { |
9857 | SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; |
9858 | patchMatchingInput(OpInfo, MatchingOpInfo&: Input, DAG); |
9859 | } |
9860 | |
9861 | // Compute the constraint code and ConstraintType to use. |
9862 | TLI.ComputeConstraintToUse(OpInfo, Op: OpInfo.CallOperand, DAG: &DAG); |
9863 | |
9864 | if ((OpInfo.ConstraintType == TargetLowering::C_Memory && |
9865 | OpInfo.Type == InlineAsm::isClobber) || |
9866 | OpInfo.ConstraintType == TargetLowering::C_Address) |
9867 | continue; |
9868 | |
9869 | // In Linux PIC model, there are 4 cases about value/label addressing: |
9870 | // |
9871 | // 1: Function call or Label jmp inside the module. |
9872 | // 2: Data access (such as global variable, static variable) inside module. |
9873 | // 3: Function call or Label jmp outside the module. |
9874 | // 4: Data access (such as global variable) outside the module. |
9875 | // |
9876 | // Due to current llvm inline asm architecture designed to not "recognize" |
9877 | // the asm code, there are quite troubles for us to treat mem addressing |
9878 | // differently for same value/adress used in different instuctions. |
9879 | // For example, in pic model, call a func may in plt way or direclty |
9880 | // pc-related, but lea/mov a function adress may use got. |
9881 | // |
9882 | // Here we try to "recognize" function call for the case 1 and case 3 in |
9883 | // inline asm. And try to adjust the constraint for them. |
9884 | // |
9885 | // TODO: Due to current inline asm didn't encourage to jmp to the outsider |
9886 | // label, so here we don't handle jmp function label now, but we need to |
9887 | // enhance it (especilly in PIC model) if we meet meaningful requirements. |
9888 | if (OpInfo.isIndirect && isFunction(Op: OpInfo.CallOperand) && |
9889 | TLI.isInlineAsmTargetBranch(AsmStrs, OpNo) && |
9890 | TM.getCodeModel() != CodeModel::Large) { |
9891 | OpInfo.isIndirect = false; |
9892 | OpInfo.ConstraintType = TargetLowering::C_Address; |
9893 | } |
9894 | |
9895 | // If this is a memory input, and if the operand is not indirect, do what we |
9896 | // need to provide an address for the memory input. |
9897 | if (OpInfo.ConstraintType == TargetLowering::C_Memory && |
9898 | !OpInfo.isIndirect) { |
9899 | assert((OpInfo.isMultipleAlternative || |
9900 | (OpInfo.Type == InlineAsm::isInput)) && |
9901 | "Can only indirectify direct input operands!" ); |
9902 | |
9903 | // Memory operands really want the address of the value. |
9904 | Chain = getAddressForMemoryInput(Chain, Location: getCurSDLoc(), OpInfo, DAG); |
9905 | |
9906 | // There is no longer a Value* corresponding to this operand. |
9907 | OpInfo.CallOperandVal = nullptr; |
9908 | |
9909 | // It is now an indirect operand. |
9910 | OpInfo.isIndirect = true; |
9911 | } |
9912 | |
9913 | } |
9914 | |
9915 | // AsmNodeOperands - The operands for the ISD::INLINEASM node. |
9916 | std::vector<SDValue> AsmNodeOperands; |
9917 | AsmNodeOperands.push_back(x: SDValue()); // reserve space for input chain |
9918 | AsmNodeOperands.push_back(x: DAG.getTargetExternalSymbol( |
9919 | Sym: IA->getAsmString().c_str(), VT: TLI.getProgramPointerTy(DL: DAG.getDataLayout()))); |
9920 | |
9921 | // If we have a !srcloc metadata node associated with it, we want to attach |
9922 | // this to the ultimately generated inline asm machineinstr. To do this, we |
9923 | // pass in the third operand as this (potentially null) inline asm MDNode. |
9924 | const MDNode *SrcLoc = Call.getMetadata(Kind: "srcloc" ); |
9925 | AsmNodeOperands.push_back(x: DAG.getMDNode(MD: SrcLoc)); |
9926 | |
9927 | // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore |
9928 | // bits as operand 3. |
9929 | AsmNodeOperands.push_back(x: DAG.getTargetConstant( |
9930 | Val: ExtraInfo.get(), DL: getCurSDLoc(), VT: TLI.getPointerTy(DL: DAG.getDataLayout()))); |
9931 | |
9932 | // Third pass: Loop over operands to prepare DAG-level operands.. As part of |
9933 | // this, assign virtual and physical registers for inputs and otput. |
9934 | for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { |
9935 | // Assign Registers. |
9936 | SDISelAsmOperandInfo &RefOpInfo = |
9937 | OpInfo.isMatchingInputConstraint() |
9938 | ? ConstraintOperands[OpInfo.getMatchedOperand()] |
9939 | : OpInfo; |
9940 | const auto RegError = |
9941 | getRegistersForValue(DAG, DL: getCurSDLoc(), OpInfo, RefOpInfo); |
9942 | if (RegError) { |
9943 | const MachineFunction &MF = DAG.getMachineFunction(); |
9944 | const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); |
9945 | const char *RegName = TRI.getName(RegNo: *RegError); |
9946 | emitInlineAsmError(Call, Message: "register '" + Twine(RegName) + |
9947 | "' allocated for constraint '" + |
9948 | Twine(OpInfo.ConstraintCode) + |
9949 | "' does not match required type" ); |
9950 | return; |
9951 | } |
9952 | |
9953 | auto DetectWriteToReservedRegister = [&]() { |
9954 | const MachineFunction &MF = DAG.getMachineFunction(); |
9955 | const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); |
9956 | for (unsigned Reg : OpInfo.AssignedRegs.Regs) { |
9957 | if (Register::isPhysicalRegister(Reg) && |
9958 | TRI.isInlineAsmReadOnlyReg(MF, PhysReg: Reg)) { |
9959 | const char *RegName = TRI.getName(RegNo: Reg); |
9960 | emitInlineAsmError(Call, Message: "write to reserved register '" + |
9961 | Twine(RegName) + "'" ); |
9962 | return true; |
9963 | } |
9964 | } |
9965 | return false; |
9966 | }; |
9967 | assert((OpInfo.ConstraintType != TargetLowering::C_Address || |
9968 | (OpInfo.Type == InlineAsm::isInput && |
9969 | !OpInfo.isMatchingInputConstraint())) && |
9970 | "Only address as input operand is allowed." ); |
9971 | |
9972 | switch (OpInfo.Type) { |
9973 | case InlineAsm::isOutput: |
9974 | if (OpInfo.ConstraintType == TargetLowering::C_Memory) { |
9975 | const InlineAsm::ConstraintCode ConstraintID = |
9976 | TLI.getInlineAsmMemConstraint(ConstraintCode: OpInfo.ConstraintCode); |
9977 | assert(ConstraintID != InlineAsm::ConstraintCode::Unknown && |
9978 | "Failed to convert memory constraint code to constraint id." ); |
9979 | |
9980 | // Add information to the INLINEASM node to know about this output. |
9981 | InlineAsm::Flag OpFlags(InlineAsm::Kind::Mem, 1); |
9982 | OpFlags.setMemConstraint(ConstraintID); |
9983 | AsmNodeOperands.push_back(x: DAG.getTargetConstant(Val: OpFlags, DL: getCurSDLoc(), |
9984 | VT: MVT::i32)); |
9985 | AsmNodeOperands.push_back(x: OpInfo.CallOperand); |
9986 | } else { |
9987 | // Otherwise, this outputs to a register (directly for C_Register / |
9988 | // C_RegisterClass, and a target-defined fashion for |
9989 | // C_Immediate/C_Other). Find a register that we can use. |
9990 | if (OpInfo.AssignedRegs.Regs.empty()) { |
9991 | emitInlineAsmError( |
9992 | Call, Message: "couldn't allocate output register for constraint '" + |
9993 | Twine(OpInfo.ConstraintCode) + "'" ); |
9994 | return; |
9995 | } |
9996 | |
9997 | if (DetectWriteToReservedRegister()) |
9998 | return; |
9999 | |
10000 | // Add information to the INLINEASM node to know that this register is |
10001 | // set. |
10002 | OpInfo.AssignedRegs.AddInlineAsmOperands( |
10003 | Code: OpInfo.isEarlyClobber ? InlineAsm::Kind::RegDefEarlyClobber |
10004 | : InlineAsm::Kind::RegDef, |
10005 | HasMatching: false, MatchingIdx: 0, dl: getCurSDLoc(), DAG, Ops&: AsmNodeOperands); |
10006 | } |
10007 | break; |
10008 | |
10009 | case InlineAsm::isInput: |
10010 | case InlineAsm::isLabel: { |
10011 | SDValue InOperandVal = OpInfo.CallOperand; |
10012 | |
10013 | if (OpInfo.isMatchingInputConstraint()) { |
10014 | // If this is required to match an output register we have already set, |
10015 | // just use its register. |
10016 | auto CurOp = findMatchingInlineAsmOperand(OperandNo: OpInfo.getMatchedOperand(), |
10017 | AsmNodeOperands); |
10018 | InlineAsm::Flag Flag(AsmNodeOperands[CurOp]->getAsZExtVal()); |
10019 | if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) { |
10020 | if (OpInfo.isIndirect) { |
10021 | // This happens on gcc/testsuite/gcc.dg/pr8788-1.c |
10022 | emitInlineAsmError(Call, Message: "inline asm not supported yet: " |
10023 | "don't know how to handle tied " |
10024 | "indirect register inputs" ); |
10025 | return; |
10026 | } |
10027 | |
10028 | SmallVector<unsigned, 4> Regs; |
10029 | MachineFunction &MF = DAG.getMachineFunction(); |
10030 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
10031 | const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); |
10032 | auto *R = cast<RegisterSDNode>(Val&: AsmNodeOperands[CurOp+1]); |
10033 | Register TiedReg = R->getReg(); |
10034 | MVT RegVT = R->getSimpleValueType(ResNo: 0); |
10035 | const TargetRegisterClass *RC = |
10036 | TiedReg.isVirtual() ? MRI.getRegClass(Reg: TiedReg) |
10037 | : RegVT != MVT::Untyped ? TLI.getRegClassFor(VT: RegVT) |
10038 | : TRI.getMinimalPhysRegClass(Reg: TiedReg); |
10039 | for (unsigned i = 0, e = Flag.getNumOperandRegisters(); i != e; ++i) |
10040 | Regs.push_back(Elt: MRI.createVirtualRegister(RegClass: RC)); |
10041 | |
10042 | RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType()); |
10043 | |
10044 | SDLoc dl = getCurSDLoc(); |
10045 | // Use the produced MatchedRegs object to |
10046 | MatchedRegs.getCopyToRegs(Val: InOperandVal, DAG, dl, Chain, Glue: &Glue, V: &Call); |
10047 | MatchedRegs.AddInlineAsmOperands(Code: InlineAsm::Kind::RegUse, HasMatching: true, |
10048 | MatchingIdx: OpInfo.getMatchedOperand(), dl, DAG, |
10049 | Ops&: AsmNodeOperands); |
10050 | break; |
10051 | } |
10052 | |
10053 | assert(Flag.isMemKind() && "Unknown matching constraint!" ); |
10054 | assert(Flag.getNumOperandRegisters() == 1 && |
10055 | "Unexpected number of operands" ); |
10056 | // Add information to the INLINEASM node to know about this input. |
10057 | // See InlineAsm.h isUseOperandTiedToDef. |
10058 | Flag.clearMemConstraint(); |
10059 | Flag.setMatchingOp(OpInfo.getMatchedOperand()); |
10060 | AsmNodeOperands.push_back(x: DAG.getTargetConstant( |
10061 | Val: Flag, DL: getCurSDLoc(), VT: TLI.getPointerTy(DL: DAG.getDataLayout()))); |
10062 | AsmNodeOperands.push_back(x: AsmNodeOperands[CurOp+1]); |
10063 | break; |
10064 | } |
10065 | |
10066 | // Treat indirect 'X' constraint as memory. |
10067 | if (OpInfo.ConstraintType == TargetLowering::C_Other && |
10068 | OpInfo.isIndirect) |
10069 | OpInfo.ConstraintType = TargetLowering::C_Memory; |
10070 | |
10071 | if (OpInfo.ConstraintType == TargetLowering::C_Immediate || |
10072 | OpInfo.ConstraintType == TargetLowering::C_Other) { |
10073 | std::vector<SDValue> Ops; |
10074 | TLI.LowerAsmOperandForConstraint(Op: InOperandVal, Constraint: OpInfo.ConstraintCode, |
10075 | Ops, DAG); |
10076 | if (Ops.empty()) { |
10077 | if (OpInfo.ConstraintType == TargetLowering::C_Immediate) |
10078 | if (isa<ConstantSDNode>(Val: InOperandVal)) { |
10079 | emitInlineAsmError(Call, Message: "value out of range for constraint '" + |
10080 | Twine(OpInfo.ConstraintCode) + "'" ); |
10081 | return; |
10082 | } |
10083 | |
10084 | emitInlineAsmError(Call, |
10085 | Message: "invalid operand for inline asm constraint '" + |
10086 | Twine(OpInfo.ConstraintCode) + "'" ); |
10087 | return; |
10088 | } |
10089 | |
10090 | // Add information to the INLINEASM node to know about this input. |
10091 | InlineAsm::Flag ResOpType(InlineAsm::Kind::Imm, Ops.size()); |
10092 | AsmNodeOperands.push_back(x: DAG.getTargetConstant( |
10093 | Val: ResOpType, DL: getCurSDLoc(), VT: TLI.getPointerTy(DL: DAG.getDataLayout()))); |
10094 | llvm::append_range(C&: AsmNodeOperands, R&: Ops); |
10095 | break; |
10096 | } |
10097 | |
10098 | if (OpInfo.ConstraintType == TargetLowering::C_Memory) { |
10099 | assert((OpInfo.isIndirect || |
10100 | OpInfo.ConstraintType != TargetLowering::C_Memory) && |
10101 | "Operand must be indirect to be a mem!" ); |
10102 | assert(InOperandVal.getValueType() == |
10103 | TLI.getPointerTy(DAG.getDataLayout()) && |
10104 | "Memory operands expect pointer values" ); |
10105 | |
10106 | const InlineAsm::ConstraintCode ConstraintID = |
10107 | TLI.getInlineAsmMemConstraint(ConstraintCode: OpInfo.ConstraintCode); |
10108 | assert(ConstraintID != InlineAsm::ConstraintCode::Unknown && |
10109 | "Failed to convert memory constraint code to constraint id." ); |
10110 | |
10111 | // Add information to the INLINEASM node to know about this input. |
10112 | InlineAsm::Flag ResOpType(InlineAsm::Kind::Mem, 1); |
10113 | ResOpType.setMemConstraint(ConstraintID); |
10114 | AsmNodeOperands.push_back(x: DAG.getTargetConstant(Val: ResOpType, |
10115 | DL: getCurSDLoc(), |
10116 | VT: MVT::i32)); |
10117 | AsmNodeOperands.push_back(x: InOperandVal); |
10118 | break; |
10119 | } |
10120 | |
10121 | if (OpInfo.ConstraintType == TargetLowering::C_Address) { |
10122 | const InlineAsm::ConstraintCode ConstraintID = |
10123 | TLI.getInlineAsmMemConstraint(ConstraintCode: OpInfo.ConstraintCode); |
10124 | assert(ConstraintID != InlineAsm::ConstraintCode::Unknown && |
10125 | "Failed to convert memory constraint code to constraint id." ); |
10126 | |
10127 | InlineAsm::Flag ResOpType(InlineAsm::Kind::Mem, 1); |
10128 | |
10129 | SDValue AsmOp = InOperandVal; |
10130 | if (isFunction(Op: InOperandVal)) { |
10131 | auto *GA = cast<GlobalAddressSDNode>(Val&: InOperandVal); |
10132 | ResOpType = InlineAsm::Flag(InlineAsm::Kind::Func, 1); |
10133 | AsmOp = DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: getCurSDLoc(), |
10134 | VT: InOperandVal.getValueType(), |
10135 | offset: GA->getOffset()); |
10136 | } |
10137 | |
10138 | // Add information to the INLINEASM node to know about this input. |
10139 | ResOpType.setMemConstraint(ConstraintID); |
10140 | |
10141 | AsmNodeOperands.push_back( |
10142 | x: DAG.getTargetConstant(Val: ResOpType, DL: getCurSDLoc(), VT: MVT::i32)); |
10143 | |
10144 | AsmNodeOperands.push_back(x: AsmOp); |
10145 | break; |
10146 | } |
10147 | |
10148 | if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && |
10149 | OpInfo.ConstraintType != TargetLowering::C_Register) { |
10150 | emitInlineAsmError(Call, Message: "unknown asm constraint '" + |
10151 | Twine(OpInfo.ConstraintCode) + "'" ); |
10152 | return; |
10153 | } |
10154 | |
10155 | // TODO: Support this. |
10156 | if (OpInfo.isIndirect) { |
10157 | emitInlineAsmError( |
10158 | Call, Message: "Don't know how to handle indirect register inputs yet " |
10159 | "for constraint '" + |
10160 | Twine(OpInfo.ConstraintCode) + "'" ); |
10161 | return; |
10162 | } |
10163 | |
10164 | // Copy the input into the appropriate registers. |
10165 | if (OpInfo.AssignedRegs.Regs.empty()) { |
10166 | emitInlineAsmError(Call, |
10167 | Message: "couldn't allocate input reg for constraint '" + |
10168 | Twine(OpInfo.ConstraintCode) + "'" ); |
10169 | return; |
10170 | } |
10171 | |
10172 | if (DetectWriteToReservedRegister()) |
10173 | return; |
10174 | |
10175 | SDLoc dl = getCurSDLoc(); |
10176 | |
10177 | OpInfo.AssignedRegs.getCopyToRegs(Val: InOperandVal, DAG, dl, Chain, Glue: &Glue, |
10178 | V: &Call); |
10179 | |
10180 | OpInfo.AssignedRegs.AddInlineAsmOperands(Code: InlineAsm::Kind::RegUse, HasMatching: false, |
10181 | MatchingIdx: 0, dl, DAG, Ops&: AsmNodeOperands); |
10182 | break; |
10183 | } |
10184 | case InlineAsm::isClobber: |
10185 | // Add the clobbered value to the operand list, so that the register |
10186 | // allocator is aware that the physreg got clobbered. |
10187 | if (!OpInfo.AssignedRegs.Regs.empty()) |
10188 | OpInfo.AssignedRegs.AddInlineAsmOperands(Code: InlineAsm::Kind::Clobber, |
10189 | HasMatching: false, MatchingIdx: 0, dl: getCurSDLoc(), DAG, |
10190 | Ops&: AsmNodeOperands); |
10191 | break; |
10192 | } |
10193 | } |
10194 | |
10195 | // Finish up input operands. Set the input chain and add the flag last. |
10196 | AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; |
10197 | if (Glue.getNode()) AsmNodeOperands.push_back(x: Glue); |
10198 | |
10199 | unsigned ISDOpc = IsCallBr ? ISD::INLINEASM_BR : ISD::INLINEASM; |
10200 | Chain = DAG.getNode(Opcode: ISDOpc, DL: getCurSDLoc(), |
10201 | VTList: DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue), Ops: AsmNodeOperands); |
10202 | Glue = Chain.getValue(R: 1); |
10203 | |
10204 | // Do additional work to generate outputs. |
10205 | |
10206 | SmallVector<EVT, 1> ResultVTs; |
10207 | SmallVector<SDValue, 1> ResultValues; |
10208 | SmallVector<SDValue, 8> OutChains; |
10209 | |
10210 | llvm::Type *CallResultType = Call.getType(); |
10211 | ArrayRef<Type *> ResultTypes; |
10212 | if (StructType *StructResult = dyn_cast<StructType>(Val: CallResultType)) |
10213 | ResultTypes = StructResult->elements(); |
10214 | else if (!CallResultType->isVoidTy()) |
10215 | ResultTypes = ArrayRef(CallResultType); |
10216 | |
10217 | auto CurResultType = ResultTypes.begin(); |
10218 | auto handleRegAssign = [&](SDValue V) { |
10219 | assert(CurResultType != ResultTypes.end() && "Unexpected value" ); |
10220 | assert((*CurResultType)->isSized() && "Unexpected unsized type" ); |
10221 | EVT ResultVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: *CurResultType); |
10222 | ++CurResultType; |
10223 | // If the type of the inline asm call site return value is different but has |
10224 | // same size as the type of the asm output bitcast it. One example of this |
10225 | // is for vectors with different width / number of elements. This can |
10226 | // happen for register classes that can contain multiple different value |
10227 | // types. The preg or vreg allocated may not have the same VT as was |
10228 | // expected. |
10229 | // |
10230 | // This can also happen for a return value that disagrees with the register |
10231 | // class it is put in, eg. a double in a general-purpose register on a |
10232 | // 32-bit machine. |
10233 | if (ResultVT != V.getValueType() && |
10234 | ResultVT.getSizeInBits() == V.getValueSizeInBits()) |
10235 | V = DAG.getNode(Opcode: ISD::BITCAST, DL: getCurSDLoc(), VT: ResultVT, Operand: V); |
10236 | else if (ResultVT != V.getValueType() && ResultVT.isInteger() && |
10237 | V.getValueType().isInteger()) { |
10238 | // If a result value was tied to an input value, the computed result |
10239 | // may have a wider width than the expected result. Extract the |
10240 | // relevant portion. |
10241 | V = DAG.getNode(Opcode: ISD::TRUNCATE, DL: getCurSDLoc(), VT: ResultVT, Operand: V); |
10242 | } |
10243 | assert(ResultVT == V.getValueType() && "Asm result value mismatch!" ); |
10244 | ResultVTs.push_back(Elt: ResultVT); |
10245 | ResultValues.push_back(Elt: V); |
10246 | }; |
10247 | |
10248 | // Deal with output operands. |
10249 | for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { |
10250 | if (OpInfo.Type == InlineAsm::isOutput) { |
10251 | SDValue Val; |
10252 | // Skip trivial output operands. |
10253 | if (OpInfo.AssignedRegs.Regs.empty()) |
10254 | continue; |
10255 | |
10256 | switch (OpInfo.ConstraintType) { |
10257 | case TargetLowering::C_Register: |
10258 | case TargetLowering::C_RegisterClass: |
10259 | Val = OpInfo.AssignedRegs.getCopyFromRegs(DAG, FuncInfo, dl: getCurSDLoc(), |
10260 | Chain, Glue: &Glue, V: &Call); |
10261 | break; |
10262 | case TargetLowering::C_Immediate: |
10263 | case TargetLowering::C_Other: |
10264 | Val = TLI.LowerAsmOutputForConstraint(Chain, Glue, DL: getCurSDLoc(), |
10265 | OpInfo, DAG); |
10266 | break; |
10267 | case TargetLowering::C_Memory: |
10268 | break; // Already handled. |
10269 | case TargetLowering::C_Address: |
10270 | break; // Silence warning. |
10271 | case TargetLowering::C_Unknown: |
10272 | assert(false && "Unexpected unknown constraint" ); |
10273 | } |
10274 | |
10275 | // Indirect output manifest as stores. Record output chains. |
10276 | if (OpInfo.isIndirect) { |
10277 | const Value *Ptr = OpInfo.CallOperandVal; |
10278 | assert(Ptr && "Expected value CallOperandVal for indirect asm operand" ); |
10279 | SDValue Store = DAG.getStore(Chain, dl: getCurSDLoc(), Val, Ptr: getValue(V: Ptr), |
10280 | PtrInfo: MachinePointerInfo(Ptr)); |
10281 | OutChains.push_back(Elt: Store); |
10282 | } else { |
10283 | // generate CopyFromRegs to associated registers. |
10284 | assert(!Call.getType()->isVoidTy() && "Bad inline asm!" ); |
10285 | if (Val.getOpcode() == ISD::MERGE_VALUES) { |
10286 | for (const SDValue &V : Val->op_values()) |
10287 | handleRegAssign(V); |
10288 | } else |
10289 | handleRegAssign(Val); |
10290 | } |
10291 | } |
10292 | } |
10293 | |
10294 | // Set results. |
10295 | if (!ResultValues.empty()) { |
10296 | assert(CurResultType == ResultTypes.end() && |
10297 | "Mismatch in number of ResultTypes" ); |
10298 | assert(ResultValues.size() == ResultTypes.size() && |
10299 | "Mismatch in number of output operands in asm result" ); |
10300 | |
10301 | SDValue V = DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: getCurSDLoc(), |
10302 | VTList: DAG.getVTList(VTs: ResultVTs), Ops: ResultValues); |
10303 | setValue(V: &Call, NewN: V); |
10304 | } |
10305 | |
10306 | // Collect store chains. |
10307 | if (!OutChains.empty()) |
10308 | Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: getCurSDLoc(), VT: MVT::Other, Ops: OutChains); |
10309 | |
10310 | if (EmitEHLabels) { |
10311 | Chain = lowerEndEH(Chain, II: cast<InvokeInst>(Val: &Call), EHPadBB, BeginLabel); |
10312 | } |
10313 | |
10314 | // Only Update Root if inline assembly has a memory effect. |
10315 | if (ResultValues.empty() || HasSideEffect || !OutChains.empty() || IsCallBr || |
10316 | EmitEHLabels) |
10317 | DAG.setRoot(Chain); |
10318 | } |
10319 | |
10320 | void SelectionDAGBuilder::emitInlineAsmError(const CallBase &Call, |
10321 | const Twine &Message) { |
10322 | LLVMContext &Ctx = *DAG.getContext(); |
10323 | Ctx.emitError(I: &Call, ErrorStr: Message); |
10324 | |
10325 | // Make sure we leave the DAG in a valid state |
10326 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
10327 | SmallVector<EVT, 1> ValueVTs; |
10328 | ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: Call.getType(), ValueVTs); |
10329 | |
10330 | if (ValueVTs.empty()) |
10331 | return; |
10332 | |
10333 | SmallVector<SDValue, 1> Ops; |
10334 | for (const EVT &VT : ValueVTs) |
10335 | Ops.push_back(Elt: DAG.getUNDEF(VT)); |
10336 | |
10337 | setValue(V: &Call, NewN: DAG.getMergeValues(Ops, dl: getCurSDLoc())); |
10338 | } |
10339 | |
10340 | void SelectionDAGBuilder::visitVAStart(const CallInst &I) { |
10341 | DAG.setRoot(DAG.getNode(Opcode: ISD::VASTART, DL: getCurSDLoc(), |
10342 | VT: MVT::Other, N1: getRoot(), |
10343 | N2: getValue(V: I.getArgOperand(i: 0)), |
10344 | N3: DAG.getSrcValue(v: I.getArgOperand(i: 0)))); |
10345 | } |
10346 | |
10347 | void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { |
10348 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
10349 | const DataLayout &DL = DAG.getDataLayout(); |
10350 | SDValue V = DAG.getVAArg( |
10351 | VT: TLI.getMemValueType(DL: DAG.getDataLayout(), Ty: I.getType()), dl: getCurSDLoc(), |
10352 | Chain: getRoot(), Ptr: getValue(V: I.getOperand(i_nocapture: 0)), SV: DAG.getSrcValue(v: I.getOperand(i_nocapture: 0)), |
10353 | Align: DL.getABITypeAlign(Ty: I.getType()).value()); |
10354 | DAG.setRoot(V.getValue(R: 1)); |
10355 | |
10356 | if (I.getType()->isPointerTy()) |
10357 | V = DAG.getPtrExtOrTrunc( |
10358 | Op: V, DL: getCurSDLoc(), VT: TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType())); |
10359 | setValue(V: &I, NewN: V); |
10360 | } |
10361 | |
10362 | void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { |
10363 | DAG.setRoot(DAG.getNode(Opcode: ISD::VAEND, DL: getCurSDLoc(), |
10364 | VT: MVT::Other, N1: getRoot(), |
10365 | N2: getValue(V: I.getArgOperand(i: 0)), |
10366 | N3: DAG.getSrcValue(v: I.getArgOperand(i: 0)))); |
10367 | } |
10368 | |
10369 | void SelectionDAGBuilder::visitVACopy(const CallInst &I) { |
10370 | DAG.setRoot(DAG.getNode(Opcode: ISD::VACOPY, DL: getCurSDLoc(), |
10371 | VT: MVT::Other, N1: getRoot(), |
10372 | N2: getValue(V: I.getArgOperand(i: 0)), |
10373 | N3: getValue(V: I.getArgOperand(i: 1)), |
10374 | N4: DAG.getSrcValue(v: I.getArgOperand(i: 0)), |
10375 | N5: DAG.getSrcValue(v: I.getArgOperand(i: 1)))); |
10376 | } |
10377 | |
10378 | SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, |
10379 | const Instruction &I, |
10380 | SDValue Op) { |
10381 | std::optional<ConstantRange> CR = getRange(I); |
10382 | |
10383 | if (!CR || CR->isFullSet() || CR->isEmptySet() || CR->isUpperWrapped()) |
10384 | return Op; |
10385 | |
10386 | APInt Lo = CR->getUnsignedMin(); |
10387 | if (!Lo.isMinValue()) |
10388 | return Op; |
10389 | |
10390 | APInt Hi = CR->getUnsignedMax(); |
10391 | unsigned Bits = std::max(a: Hi.getActiveBits(), |
10392 | b: static_cast<unsigned>(IntegerType::MIN_INT_BITS)); |
10393 | |
10394 | EVT SmallVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: Bits); |
10395 | |
10396 | SDLoc SL = getCurSDLoc(); |
10397 | |
10398 | SDValue ZExt = DAG.getNode(Opcode: ISD::AssertZext, DL: SL, VT: Op.getValueType(), N1: Op, |
10399 | N2: DAG.getValueType(SmallVT)); |
10400 | unsigned NumVals = Op.getNode()->getNumValues(); |
10401 | if (NumVals == 1) |
10402 | return ZExt; |
10403 | |
10404 | SmallVector<SDValue, 4> Ops; |
10405 | |
10406 | Ops.push_back(Elt: ZExt); |
10407 | for (unsigned I = 1; I != NumVals; ++I) |
10408 | Ops.push_back(Elt: Op.getValue(R: I)); |
10409 | |
10410 | return DAG.getMergeValues(Ops, dl: SL); |
10411 | } |
10412 | |
10413 | /// Populate a CallLowerinInfo (into \p CLI) based on the properties of |
10414 | /// the call being lowered. |
10415 | /// |
10416 | /// This is a helper for lowering intrinsics that follow a target calling |
10417 | /// convention or require stack pointer adjustment. Only a subset of the |
10418 | /// intrinsic's operands need to participate in the calling convention. |
10419 | void SelectionDAGBuilder::populateCallLoweringInfo( |
10420 | TargetLowering::CallLoweringInfo &CLI, const CallBase *Call, |
10421 | unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy, |
10422 | AttributeSet RetAttrs, bool IsPatchPoint) { |
10423 | TargetLowering::ArgListTy Args; |
10424 | Args.reserve(n: NumArgs); |
10425 | |
10426 | // Populate the argument list. |
10427 | // Attributes for args start at offset 1, after the return attribute. |
10428 | for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; |
10429 | ArgI != ArgE; ++ArgI) { |
10430 | const Value *V = Call->getOperand(i_nocapture: ArgI); |
10431 | |
10432 | assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic." ); |
10433 | |
10434 | TargetLowering::ArgListEntry Entry; |
10435 | Entry.Node = getValue(V); |
10436 | Entry.Ty = V->getType(); |
10437 | Entry.setAttributes(Call, ArgIdx: ArgI); |
10438 | Args.push_back(x: Entry); |
10439 | } |
10440 | |
10441 | CLI.setDebugLoc(getCurSDLoc()) |
10442 | .setChain(getRoot()) |
10443 | .setCallee(CC: Call->getCallingConv(), ResultType: ReturnTy, Target: Callee, ArgsList: std::move(Args), |
10444 | ResultAttrs: RetAttrs) |
10445 | .setDiscardResult(Call->use_empty()) |
10446 | .setIsPatchPoint(IsPatchPoint) |
10447 | .setIsPreallocated( |
10448 | Call->countOperandBundlesOfType(ID: LLVMContext::OB_preallocated) != 0); |
10449 | } |
10450 | |
10451 | /// Add a stack map intrinsic call's live variable operands to a stackmap |
10452 | /// or patchpoint target node's operand list. |
10453 | /// |
10454 | /// Constants are converted to TargetConstants purely as an optimization to |
10455 | /// avoid constant materialization and register allocation. |
10456 | /// |
10457 | /// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not |
10458 | /// generate addess computation nodes, and so FinalizeISel can convert the |
10459 | /// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids |
10460 | /// address materialization and register allocation, but may also be required |
10461 | /// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an |
10462 | /// alloca in the entry block, then the runtime may assume that the alloca's |
10463 | /// StackMap location can be read immediately after compilation and that the |
10464 | /// location is valid at any point during execution (this is similar to the |
10465 | /// assumption made by the llvm.gcroot intrinsic). If the alloca's location were |
10466 | /// only available in a register, then the runtime would need to trap when |
10467 | /// execution reaches the StackMap in order to read the alloca's location. |
10468 | static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx, |
10469 | const SDLoc &DL, SmallVectorImpl<SDValue> &Ops, |
10470 | SelectionDAGBuilder &Builder) { |
10471 | SelectionDAG &DAG = Builder.DAG; |
10472 | for (unsigned I = StartIdx; I < Call.arg_size(); I++) { |
10473 | SDValue Op = Builder.getValue(V: Call.getArgOperand(i: I)); |
10474 | |
10475 | // Things on the stack are pointer-typed, meaning that they are already |
10476 | // legal and can be emitted directly to target nodes. |
10477 | if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: Op)) { |
10478 | Ops.push_back(Elt: DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: Op.getValueType())); |
10479 | } else { |
10480 | // Otherwise emit a target independent node to be legalised. |
10481 | Ops.push_back(Elt: Builder.getValue(V: Call.getArgOperand(i: I))); |
10482 | } |
10483 | } |
10484 | } |
10485 | |
10486 | /// Lower llvm.experimental.stackmap. |
10487 | void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { |
10488 | // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>, |
10489 | // [live variables...]) |
10490 | |
10491 | assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value." ); |
10492 | |
10493 | SDValue Chain, InGlue, Callee; |
10494 | SmallVector<SDValue, 32> Ops; |
10495 | |
10496 | SDLoc DL = getCurSDLoc(); |
10497 | Callee = getValue(V: CI.getCalledOperand()); |
10498 | |
10499 | // The stackmap intrinsic only records the live variables (the arguments |
10500 | // passed to it) and emits NOPS (if requested). Unlike the patchpoint |
10501 | // intrinsic, this won't be lowered to a function call. This means we don't |
10502 | // have to worry about calling conventions and target specific lowering code. |
10503 | // Instead we perform the call lowering right here. |
10504 | // |
10505 | // chain, flag = CALLSEQ_START(chain, 0, 0) |
10506 | // chain, flag = STACKMAP(id, nbytes, ..., chain, flag) |
10507 | // chain, flag = CALLSEQ_END(chain, 0, 0, flag) |
10508 | // |
10509 | Chain = DAG.getCALLSEQ_START(Chain: getRoot(), InSize: 0, OutSize: 0, DL); |
10510 | InGlue = Chain.getValue(R: 1); |
10511 | |
10512 | // Add the STACKMAP operands, starting with DAG house-keeping. |
10513 | Ops.push_back(Elt: Chain); |
10514 | Ops.push_back(Elt: InGlue); |
10515 | |
10516 | // Add the <id>, <numShadowBytes> operands. |
10517 | // |
10518 | // These do not require legalisation, and can be emitted directly to target |
10519 | // constant nodes. |
10520 | SDValue ID = getValue(V: CI.getArgOperand(i: 0)); |
10521 | assert(ID.getValueType() == MVT::i64); |
10522 | SDValue IDConst = |
10523 | DAG.getTargetConstant(Val: ID->getAsZExtVal(), DL, VT: ID.getValueType()); |
10524 | Ops.push_back(Elt: IDConst); |
10525 | |
10526 | SDValue Shad = getValue(V: CI.getArgOperand(i: 1)); |
10527 | assert(Shad.getValueType() == MVT::i32); |
10528 | SDValue ShadConst = |
10529 | DAG.getTargetConstant(Val: Shad->getAsZExtVal(), DL, VT: Shad.getValueType()); |
10530 | Ops.push_back(Elt: ShadConst); |
10531 | |
10532 | // Add the live variables. |
10533 | addStackMapLiveVars(Call: CI, StartIdx: 2, DL, Ops, Builder&: *this); |
10534 | |
10535 | // Create the STACKMAP node. |
10536 | SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue); |
10537 | Chain = DAG.getNode(Opcode: ISD::STACKMAP, DL, VTList: NodeTys, Ops); |
10538 | InGlue = Chain.getValue(R: 1); |
10539 | |
10540 | Chain = DAG.getCALLSEQ_END(Chain, Size1: 0, Size2: 0, Glue: InGlue, DL); |
10541 | |
10542 | // Stackmaps don't generate values, so nothing goes into the NodeMap. |
10543 | |
10544 | // Set the root to the target-lowered call chain. |
10545 | DAG.setRoot(Chain); |
10546 | |
10547 | // Inform the Frame Information that we have a stackmap in this function. |
10548 | FuncInfo.MF->getFrameInfo().setHasStackMap(); |
10549 | } |
10550 | |
10551 | /// Lower llvm.experimental.patchpoint directly to its target opcode. |
10552 | void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, |
10553 | const BasicBlock *EHPadBB) { |
10554 | // <ty> @llvm.experimental.patchpoint.<ty>(i64 <id>, |
10555 | // i32 <numBytes>, |
10556 | // i8* <target>, |
10557 | // i32 <numArgs>, |
10558 | // [Args...], |
10559 | // [live variables...]) |
10560 | |
10561 | CallingConv::ID CC = CB.getCallingConv(); |
10562 | bool IsAnyRegCC = CC == CallingConv::AnyReg; |
10563 | bool HasDef = !CB.getType()->isVoidTy(); |
10564 | SDLoc dl = getCurSDLoc(); |
10565 | SDValue Callee = getValue(V: CB.getArgOperand(i: PatchPointOpers::TargetPos)); |
10566 | |
10567 | // Handle immediate and symbolic callees. |
10568 | if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Val&: Callee)) |
10569 | Callee = DAG.getIntPtrConstant(Val: ConstCallee->getZExtValue(), DL: dl, |
10570 | /*isTarget=*/true); |
10571 | else if (auto* SymbolicCallee = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) |
10572 | Callee = DAG.getTargetGlobalAddress(GV: SymbolicCallee->getGlobal(), |
10573 | DL: SDLoc(SymbolicCallee), |
10574 | VT: SymbolicCallee->getValueType(ResNo: 0)); |
10575 | |
10576 | // Get the real number of arguments participating in the call <numArgs> |
10577 | SDValue NArgVal = getValue(V: CB.getArgOperand(i: PatchPointOpers::NArgPos)); |
10578 | unsigned NumArgs = NArgVal->getAsZExtVal(); |
10579 | |
10580 | // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> |
10581 | // Intrinsics include all meta-operands up to but not including CC. |
10582 | unsigned NumMetaOpers = PatchPointOpers::CCPos; |
10583 | assert(CB.arg_size() >= NumMetaOpers + NumArgs && |
10584 | "Not enough arguments provided to the patchpoint intrinsic" ); |
10585 | |
10586 | // For AnyRegCC the arguments are lowered later on manually. |
10587 | unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; |
10588 | Type *ReturnTy = |
10589 | IsAnyRegCC ? Type::getVoidTy(C&: *DAG.getContext()) : CB.getType(); |
10590 | |
10591 | TargetLowering::CallLoweringInfo CLI(DAG); |
10592 | populateCallLoweringInfo(CLI, Call: &CB, ArgIdx: NumMetaOpers, NumArgs: NumCallArgs, Callee, |
10593 | ReturnTy, RetAttrs: CB.getAttributes().getRetAttrs(), IsPatchPoint: true); |
10594 | std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); |
10595 | |
10596 | SDNode *CallEnd = Result.second.getNode(); |
10597 | if (CallEnd->getOpcode() == ISD::EH_LABEL) |
10598 | CallEnd = CallEnd->getOperand(Num: 0).getNode(); |
10599 | if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) |
10600 | CallEnd = CallEnd->getOperand(Num: 0).getNode(); |
10601 | |
10602 | /// Get a call instruction from the call sequence chain. |
10603 | /// Tail calls are not allowed. |
10604 | assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && |
10605 | "Expected a callseq node." ); |
10606 | SDNode *Call = CallEnd->getOperand(Num: 0).getNode(); |
10607 | bool HasGlue = Call->getGluedNode(); |
10608 | |
10609 | // Replace the target specific call node with the patchable intrinsic. |
10610 | SmallVector<SDValue, 8> Ops; |
10611 | |
10612 | // Push the chain. |
10613 | Ops.push_back(Elt: *(Call->op_begin())); |
10614 | |
10615 | // Optionally, push the glue (if any). |
10616 | if (HasGlue) |
10617 | Ops.push_back(Elt: *(Call->op_end() - 1)); |
10618 | |
10619 | // Push the register mask info. |
10620 | if (HasGlue) |
10621 | Ops.push_back(Elt: *(Call->op_end() - 2)); |
10622 | else |
10623 | Ops.push_back(Elt: *(Call->op_end() - 1)); |
10624 | |
10625 | // Add the <id> and <numBytes> constants. |
10626 | SDValue IDVal = getValue(V: CB.getArgOperand(i: PatchPointOpers::IDPos)); |
10627 | Ops.push_back(Elt: DAG.getTargetConstant(Val: IDVal->getAsZExtVal(), DL: dl, VT: MVT::i64)); |
10628 | SDValue NBytesVal = getValue(V: CB.getArgOperand(i: PatchPointOpers::NBytesPos)); |
10629 | Ops.push_back(Elt: DAG.getTargetConstant(Val: NBytesVal->getAsZExtVal(), DL: dl, VT: MVT::i32)); |
10630 | |
10631 | // Add the callee. |
10632 | Ops.push_back(Elt: Callee); |
10633 | |
10634 | // Adjust <numArgs> to account for any arguments that have been passed on the |
10635 | // stack instead. |
10636 | // Call Node: Chain, Target, {Args}, RegMask, [Glue] |
10637 | unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3); |
10638 | NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs; |
10639 | Ops.push_back(Elt: DAG.getTargetConstant(Val: NumCallRegArgs, DL: dl, VT: MVT::i32)); |
10640 | |
10641 | // Add the calling convention |
10642 | Ops.push_back(Elt: DAG.getTargetConstant(Val: (unsigned)CC, DL: dl, VT: MVT::i32)); |
10643 | |
10644 | // Add the arguments we omitted previously. The register allocator should |
10645 | // place these in any free register. |
10646 | if (IsAnyRegCC) |
10647 | for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) |
10648 | Ops.push_back(Elt: getValue(V: CB.getArgOperand(i))); |
10649 | |
10650 | // Push the arguments from the call instruction. |
10651 | SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1; |
10652 | Ops.append(in_start: Call->op_begin() + 2, in_end: e); |
10653 | |
10654 | // Push live variables for the stack map. |
10655 | addStackMapLiveVars(Call: CB, StartIdx: NumMetaOpers + NumArgs, DL: dl, Ops, Builder&: *this); |
10656 | |
10657 | SDVTList NodeTys; |
10658 | if (IsAnyRegCC && HasDef) { |
10659 | // Create the return types based on the intrinsic definition |
10660 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
10661 | SmallVector<EVT, 3> ValueVTs; |
10662 | ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: CB.getType(), ValueVTs); |
10663 | assert(ValueVTs.size() == 1 && "Expected only one return value type." ); |
10664 | |
10665 | // There is always a chain and a glue type at the end |
10666 | ValueVTs.push_back(Elt: MVT::Other); |
10667 | ValueVTs.push_back(Elt: MVT::Glue); |
10668 | NodeTys = DAG.getVTList(VTs: ValueVTs); |
10669 | } else |
10670 | NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue); |
10671 | |
10672 | // Replace the target specific call node with a PATCHPOINT node. |
10673 | SDValue PPV = DAG.getNode(Opcode: ISD::PATCHPOINT, DL: dl, VTList: NodeTys, Ops); |
10674 | |
10675 | // Update the NodeMap. |
10676 | if (HasDef) { |
10677 | if (IsAnyRegCC) |
10678 | setValue(V: &CB, NewN: SDValue(PPV.getNode(), 0)); |
10679 | else |
10680 | setValue(V: &CB, NewN: Result.first); |
10681 | } |
10682 | |
10683 | // Fixup the consumers of the intrinsic. The chain and glue may be used in the |
10684 | // call sequence. Furthermore the location of the chain and glue can change |
10685 | // when the AnyReg calling convention is used and the intrinsic returns a |
10686 | // value. |
10687 | if (IsAnyRegCC && HasDef) { |
10688 | SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)}; |
10689 | SDValue To[] = {PPV.getValue(R: 1), PPV.getValue(R: 2)}; |
10690 | DAG.ReplaceAllUsesOfValuesWith(From, To, Num: 2); |
10691 | } else |
10692 | DAG.ReplaceAllUsesWith(From: Call, To: PPV.getNode()); |
10693 | DAG.DeleteNode(N: Call); |
10694 | |
10695 | // Inform the Frame Information that we have a patchpoint in this function. |
10696 | FuncInfo.MF->getFrameInfo().setHasPatchPoint(); |
10697 | } |
10698 | |
10699 | void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, |
10700 | unsigned Intrinsic) { |
10701 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
10702 | SDValue Op1 = getValue(V: I.getArgOperand(i: 0)); |
10703 | SDValue Op2; |
10704 | if (I.arg_size() > 1) |
10705 | Op2 = getValue(V: I.getArgOperand(i: 1)); |
10706 | SDLoc dl = getCurSDLoc(); |
10707 | EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
10708 | SDValue Res; |
10709 | SDNodeFlags SDFlags; |
10710 | if (auto *FPMO = dyn_cast<FPMathOperator>(Val: &I)) |
10711 | SDFlags.copyFMF(FPMO: *FPMO); |
10712 | |
10713 | switch (Intrinsic) { |
10714 | case Intrinsic::vector_reduce_fadd: |
10715 | if (SDFlags.hasAllowReassociation()) |
10716 | Res = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT, N1: Op1, |
10717 | N2: DAG.getNode(Opcode: ISD::VECREDUCE_FADD, DL: dl, VT, Operand: Op2, Flags: SDFlags), |
10718 | Flags: SDFlags); |
10719 | else |
10720 | Res = DAG.getNode(Opcode: ISD::VECREDUCE_SEQ_FADD, DL: dl, VT, N1: Op1, N2: Op2, Flags: SDFlags); |
10721 | break; |
10722 | case Intrinsic::vector_reduce_fmul: |
10723 | if (SDFlags.hasAllowReassociation()) |
10724 | Res = DAG.getNode(Opcode: ISD::FMUL, DL: dl, VT, N1: Op1, |
10725 | N2: DAG.getNode(Opcode: ISD::VECREDUCE_FMUL, DL: dl, VT, Operand: Op2, Flags: SDFlags), |
10726 | Flags: SDFlags); |
10727 | else |
10728 | Res = DAG.getNode(Opcode: ISD::VECREDUCE_SEQ_FMUL, DL: dl, VT, N1: Op1, N2: Op2, Flags: SDFlags); |
10729 | break; |
10730 | case Intrinsic::vector_reduce_add: |
10731 | Res = DAG.getNode(Opcode: ISD::VECREDUCE_ADD, DL: dl, VT, Operand: Op1); |
10732 | break; |
10733 | case Intrinsic::vector_reduce_mul: |
10734 | Res = DAG.getNode(Opcode: ISD::VECREDUCE_MUL, DL: dl, VT, Operand: Op1); |
10735 | break; |
10736 | case Intrinsic::vector_reduce_and: |
10737 | Res = DAG.getNode(Opcode: ISD::VECREDUCE_AND, DL: dl, VT, Operand: Op1); |
10738 | break; |
10739 | case Intrinsic::vector_reduce_or: |
10740 | Res = DAG.getNode(Opcode: ISD::VECREDUCE_OR, DL: dl, VT, Operand: Op1); |
10741 | break; |
10742 | case Intrinsic::vector_reduce_xor: |
10743 | Res = DAG.getNode(Opcode: ISD::VECREDUCE_XOR, DL: dl, VT, Operand: Op1); |
10744 | break; |
10745 | case Intrinsic::vector_reduce_smax: |
10746 | Res = DAG.getNode(Opcode: ISD::VECREDUCE_SMAX, DL: dl, VT, Operand: Op1); |
10747 | break; |
10748 | case Intrinsic::vector_reduce_smin: |
10749 | Res = DAG.getNode(Opcode: ISD::VECREDUCE_SMIN, DL: dl, VT, Operand: Op1); |
10750 | break; |
10751 | case Intrinsic::vector_reduce_umax: |
10752 | Res = DAG.getNode(Opcode: ISD::VECREDUCE_UMAX, DL: dl, VT, Operand: Op1); |
10753 | break; |
10754 | case Intrinsic::vector_reduce_umin: |
10755 | Res = DAG.getNode(Opcode: ISD::VECREDUCE_UMIN, DL: dl, VT, Operand: Op1); |
10756 | break; |
10757 | case Intrinsic::vector_reduce_fmax: |
10758 | Res = DAG.getNode(Opcode: ISD::VECREDUCE_FMAX, DL: dl, VT, Operand: Op1, Flags: SDFlags); |
10759 | break; |
10760 | case Intrinsic::vector_reduce_fmin: |
10761 | Res = DAG.getNode(Opcode: ISD::VECREDUCE_FMIN, DL: dl, VT, Operand: Op1, Flags: SDFlags); |
10762 | break; |
10763 | case Intrinsic::vector_reduce_fmaximum: |
10764 | Res = DAG.getNode(Opcode: ISD::VECREDUCE_FMAXIMUM, DL: dl, VT, Operand: Op1, Flags: SDFlags); |
10765 | break; |
10766 | case Intrinsic::vector_reduce_fminimum: |
10767 | Res = DAG.getNode(Opcode: ISD::VECREDUCE_FMINIMUM, DL: dl, VT, Operand: Op1, Flags: SDFlags); |
10768 | break; |
10769 | default: |
10770 | llvm_unreachable("Unhandled vector reduce intrinsic" ); |
10771 | } |
10772 | setValue(V: &I, NewN: Res); |
10773 | } |
10774 | |
10775 | /// Returns an AttributeList representing the attributes applied to the return |
10776 | /// value of the given call. |
10777 | static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { |
10778 | SmallVector<Attribute::AttrKind, 2> Attrs; |
10779 | if (CLI.RetSExt) |
10780 | Attrs.push_back(Elt: Attribute::SExt); |
10781 | if (CLI.RetZExt) |
10782 | Attrs.push_back(Elt: Attribute::ZExt); |
10783 | if (CLI.IsInReg) |
10784 | Attrs.push_back(Elt: Attribute::InReg); |
10785 | |
10786 | return AttributeList::get(C&: CLI.RetTy->getContext(), Index: AttributeList::ReturnIndex, |
10787 | Kinds: Attrs); |
10788 | } |
10789 | |
10790 | /// TargetLowering::LowerCallTo - This is the default LowerCallTo |
10791 | /// implementation, which just calls LowerCall. |
10792 | /// FIXME: When all targets are |
10793 | /// migrated to using LowerCall, this hook should be integrated into SDISel. |
10794 | std::pair<SDValue, SDValue> |
10795 | TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { |
10796 | // Handle the incoming return values from the call. |
10797 | CLI.Ins.clear(); |
10798 | Type *OrigRetTy = CLI.RetTy; |
10799 | SmallVector<EVT, 4> RetTys; |
10800 | SmallVector<TypeSize, 4> Offsets; |
10801 | auto &DL = CLI.DAG.getDataLayout(); |
10802 | ComputeValueVTs(TLI: *this, DL, Ty: CLI.RetTy, ValueVTs&: RetTys, Offsets: &Offsets); |
10803 | |
10804 | if (CLI.IsPostTypeLegalization) { |
10805 | // If we are lowering a libcall after legalization, split the return type. |
10806 | SmallVector<EVT, 4> OldRetTys; |
10807 | SmallVector<TypeSize, 4> OldOffsets; |
10808 | RetTys.swap(RHS&: OldRetTys); |
10809 | Offsets.swap(RHS&: OldOffsets); |
10810 | |
10811 | for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) { |
10812 | EVT RetVT = OldRetTys[i]; |
10813 | uint64_t Offset = OldOffsets[i]; |
10814 | MVT RegisterVT = getRegisterType(Context&: CLI.RetTy->getContext(), VT: RetVT); |
10815 | unsigned NumRegs = getNumRegisters(Context&: CLI.RetTy->getContext(), VT: RetVT); |
10816 | unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8; |
10817 | RetTys.append(NumInputs: NumRegs, Elt: RegisterVT); |
10818 | for (unsigned j = 0; j != NumRegs; ++j) |
10819 | Offsets.push_back(Elt: TypeSize::getFixed(ExactSize: Offset + j * RegisterVTByteSZ)); |
10820 | } |
10821 | } |
10822 | |
10823 | SmallVector<ISD::OutputArg, 4> Outs; |
10824 | GetReturnInfo(CC: CLI.CallConv, ReturnType: CLI.RetTy, attr: getReturnAttrs(CLI), Outs, TLI: *this, DL); |
10825 | |
10826 | bool CanLowerReturn = |
10827 | this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(), |
10828 | CLI.IsVarArg, Outs, CLI.RetTy->getContext()); |
10829 | |
10830 | SDValue DemoteStackSlot; |
10831 | int DemoteStackIdx = -100; |
10832 | if (!CanLowerReturn) { |
10833 | // FIXME: equivalent assert? |
10834 | // assert(!CS.hasInAllocaArgument() && |
10835 | // "sret demotion is incompatible with inalloca"); |
10836 | uint64_t TySize = DL.getTypeAllocSize(Ty: CLI.RetTy); |
10837 | Align Alignment = DL.getPrefTypeAlign(Ty: CLI.RetTy); |
10838 | MachineFunction &MF = CLI.DAG.getMachineFunction(); |
10839 | DemoteStackIdx = |
10840 | MF.getFrameInfo().CreateStackObject(Size: TySize, Alignment, isSpillSlot: false); |
10841 | Type *StackSlotPtrType = PointerType::get(ElementType: CLI.RetTy, |
10842 | AddressSpace: DL.getAllocaAddrSpace()); |
10843 | |
10844 | DemoteStackSlot = CLI.DAG.getFrameIndex(FI: DemoteStackIdx, VT: getFrameIndexTy(DL)); |
10845 | ArgListEntry Entry; |
10846 | Entry.Node = DemoteStackSlot; |
10847 | Entry.Ty = StackSlotPtrType; |
10848 | Entry.IsSExt = false; |
10849 | Entry.IsZExt = false; |
10850 | Entry.IsInReg = false; |
10851 | Entry.IsSRet = true; |
10852 | Entry.IsNest = false; |
10853 | Entry.IsByVal = false; |
10854 | Entry.IsByRef = false; |
10855 | Entry.IsReturned = false; |
10856 | Entry.IsSwiftSelf = false; |
10857 | Entry.IsSwiftAsync = false; |
10858 | Entry.IsSwiftError = false; |
10859 | Entry.IsCFGuardTarget = false; |
10860 | Entry.Alignment = Alignment; |
10861 | CLI.getArgs().insert(position: CLI.getArgs().begin(), x: Entry); |
10862 | CLI.NumFixedArgs += 1; |
10863 | CLI.getArgs()[0].IndirectType = CLI.RetTy; |
10864 | CLI.RetTy = Type::getVoidTy(C&: CLI.RetTy->getContext()); |
10865 | |
10866 | // sret demotion isn't compatible with tail-calls, since the sret argument |
10867 | // points into the callers stack frame. |
10868 | CLI.IsTailCall = false; |
10869 | } else { |
10870 | bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( |
10871 | Ty: CLI.RetTy, CallConv: CLI.CallConv, isVarArg: CLI.IsVarArg, DL); |
10872 | for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { |
10873 | ISD::ArgFlagsTy Flags; |
10874 | if (NeedsRegBlock) { |
10875 | Flags.setInConsecutiveRegs(); |
10876 | if (I == RetTys.size() - 1) |
10877 | Flags.setInConsecutiveRegsLast(); |
10878 | } |
10879 | EVT VT = RetTys[I]; |
10880 | MVT RegisterVT = getRegisterTypeForCallingConv(Context&: CLI.RetTy->getContext(), |
10881 | CC: CLI.CallConv, VT); |
10882 | unsigned NumRegs = getNumRegistersForCallingConv(Context&: CLI.RetTy->getContext(), |
10883 | CC: CLI.CallConv, VT); |
10884 | for (unsigned i = 0; i != NumRegs; ++i) { |
10885 | ISD::InputArg MyFlags; |
10886 | MyFlags.Flags = Flags; |
10887 | MyFlags.VT = RegisterVT; |
10888 | MyFlags.ArgVT = VT; |
10889 | MyFlags.Used = CLI.IsReturnValueUsed; |
10890 | if (CLI.RetTy->isPointerTy()) { |
10891 | MyFlags.Flags.setPointer(); |
10892 | MyFlags.Flags.setPointerAddrSpace( |
10893 | cast<PointerType>(Val: CLI.RetTy)->getAddressSpace()); |
10894 | } |
10895 | if (CLI.RetSExt) |
10896 | MyFlags.Flags.setSExt(); |
10897 | if (CLI.RetZExt) |
10898 | MyFlags.Flags.setZExt(); |
10899 | if (CLI.IsInReg) |
10900 | MyFlags.Flags.setInReg(); |
10901 | CLI.Ins.push_back(Elt: MyFlags); |
10902 | } |
10903 | } |
10904 | } |
10905 | |
10906 | // We push in swifterror return as the last element of CLI.Ins. |
10907 | ArgListTy &Args = CLI.getArgs(); |
10908 | if (supportSwiftError()) { |
10909 | for (const ArgListEntry &Arg : Args) { |
10910 | if (Arg.IsSwiftError) { |
10911 | ISD::InputArg MyFlags; |
10912 | MyFlags.VT = getPointerTy(DL); |
10913 | MyFlags.ArgVT = EVT(getPointerTy(DL)); |
10914 | MyFlags.Flags.setSwiftError(); |
10915 | CLI.Ins.push_back(Elt: MyFlags); |
10916 | } |
10917 | } |
10918 | } |
10919 | |
10920 | // Handle all of the outgoing arguments. |
10921 | CLI.Outs.clear(); |
10922 | CLI.OutVals.clear(); |
10923 | for (unsigned i = 0, e = Args.size(); i != e; ++i) { |
10924 | SmallVector<EVT, 4> ValueVTs; |
10925 | ComputeValueVTs(TLI: *this, DL, Ty: Args[i].Ty, ValueVTs); |
10926 | // FIXME: Split arguments if CLI.IsPostTypeLegalization |
10927 | Type *FinalType = Args[i].Ty; |
10928 | if (Args[i].IsByVal) |
10929 | FinalType = Args[i].IndirectType; |
10930 | bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( |
10931 | Ty: FinalType, CallConv: CLI.CallConv, isVarArg: CLI.IsVarArg, DL); |
10932 | for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; |
10933 | ++Value) { |
10934 | EVT VT = ValueVTs[Value]; |
10935 | Type *ArgTy = VT.getTypeForEVT(Context&: CLI.RetTy->getContext()); |
10936 | SDValue Op = SDValue(Args[i].Node.getNode(), |
10937 | Args[i].Node.getResNo() + Value); |
10938 | ISD::ArgFlagsTy Flags; |
10939 | |
10940 | // Certain targets (such as MIPS), may have a different ABI alignment |
10941 | // for a type depending on the context. Give the target a chance to |
10942 | // specify the alignment it wants. |
10943 | const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL)); |
10944 | Flags.setOrigAlign(OriginalAlignment); |
10945 | |
10946 | if (Args[i].Ty->isPointerTy()) { |
10947 | Flags.setPointer(); |
10948 | Flags.setPointerAddrSpace( |
10949 | cast<PointerType>(Val: Args[i].Ty)->getAddressSpace()); |
10950 | } |
10951 | if (Args[i].IsZExt) |
10952 | Flags.setZExt(); |
10953 | if (Args[i].IsSExt) |
10954 | Flags.setSExt(); |
10955 | if (Args[i].IsInReg) { |
10956 | // If we are using vectorcall calling convention, a structure that is |
10957 | // passed InReg - is surely an HVA |
10958 | if (CLI.CallConv == CallingConv::X86_VectorCall && |
10959 | isa<StructType>(Val: FinalType)) { |
10960 | // The first value of a structure is marked |
10961 | if (0 == Value) |
10962 | Flags.setHvaStart(); |
10963 | Flags.setHva(); |
10964 | } |
10965 | // Set InReg Flag |
10966 | Flags.setInReg(); |
10967 | } |
10968 | if (Args[i].IsSRet) |
10969 | Flags.setSRet(); |
10970 | if (Args[i].IsSwiftSelf) |
10971 | Flags.setSwiftSelf(); |
10972 | if (Args[i].IsSwiftAsync) |
10973 | Flags.setSwiftAsync(); |
10974 | if (Args[i].IsSwiftError) |
10975 | Flags.setSwiftError(); |
10976 | if (Args[i].IsCFGuardTarget) |
10977 | Flags.setCFGuardTarget(); |
10978 | if (Args[i].IsByVal) |
10979 | Flags.setByVal(); |
10980 | if (Args[i].IsByRef) |
10981 | Flags.setByRef(); |
10982 | if (Args[i].IsPreallocated) { |
10983 | Flags.setPreallocated(); |
10984 | // Set the byval flag for CCAssignFn callbacks that don't know about |
10985 | // preallocated. This way we can know how many bytes we should've |
10986 | // allocated and how many bytes a callee cleanup function will pop. If |
10987 | // we port preallocated to more targets, we'll have to add custom |
10988 | // preallocated handling in the various CC lowering callbacks. |
10989 | Flags.setByVal(); |
10990 | } |
10991 | if (Args[i].IsInAlloca) { |
10992 | Flags.setInAlloca(); |
10993 | // Set the byval flag for CCAssignFn callbacks that don't know about |
10994 | // inalloca. This way we can know how many bytes we should've allocated |
10995 | // and how many bytes a callee cleanup function will pop. If we port |
10996 | // inalloca to more targets, we'll have to add custom inalloca handling |
10997 | // in the various CC lowering callbacks. |
10998 | Flags.setByVal(); |
10999 | } |
11000 | Align MemAlign; |
11001 | if (Args[i].IsByVal || Args[i].IsInAlloca || Args[i].IsPreallocated) { |
11002 | unsigned FrameSize = DL.getTypeAllocSize(Ty: Args[i].IndirectType); |
11003 | Flags.setByValSize(FrameSize); |
11004 | |
11005 | // info is not there but there are cases it cannot get right. |
11006 | if (auto MA = Args[i].Alignment) |
11007 | MemAlign = *MA; |
11008 | else |
11009 | MemAlign = Align(getByValTypeAlignment(Ty: Args[i].IndirectType, DL)); |
11010 | } else if (auto MA = Args[i].Alignment) { |
11011 | MemAlign = *MA; |
11012 | } else { |
11013 | MemAlign = OriginalAlignment; |
11014 | } |
11015 | Flags.setMemAlign(MemAlign); |
11016 | if (Args[i].IsNest) |
11017 | Flags.setNest(); |
11018 | if (NeedsRegBlock) |
11019 | Flags.setInConsecutiveRegs(); |
11020 | |
11021 | MVT PartVT = getRegisterTypeForCallingConv(Context&: CLI.RetTy->getContext(), |
11022 | CC: CLI.CallConv, VT); |
11023 | unsigned NumParts = getNumRegistersForCallingConv(Context&: CLI.RetTy->getContext(), |
11024 | CC: CLI.CallConv, VT); |
11025 | SmallVector<SDValue, 4> Parts(NumParts); |
11026 | ISD::NodeType ExtendKind = ISD::ANY_EXTEND; |
11027 | |
11028 | if (Args[i].IsSExt) |
11029 | ExtendKind = ISD::SIGN_EXTEND; |
11030 | else if (Args[i].IsZExt) |
11031 | ExtendKind = ISD::ZERO_EXTEND; |
11032 | |
11033 | // Conservatively only handle 'returned' on non-vectors that can be lowered, |
11034 | // for now. |
11035 | if (Args[i].IsReturned && !Op.getValueType().isVector() && |
11036 | CanLowerReturn) { |
11037 | assert((CLI.RetTy == Args[i].Ty || |
11038 | (CLI.RetTy->isPointerTy() && Args[i].Ty->isPointerTy() && |
11039 | CLI.RetTy->getPointerAddressSpace() == |
11040 | Args[i].Ty->getPointerAddressSpace())) && |
11041 | RetTys.size() == NumValues && "unexpected use of 'returned'" ); |
11042 | // Before passing 'returned' to the target lowering code, ensure that |
11043 | // either the register MVT and the actual EVT are the same size or that |
11044 | // the return value and argument are extended in the same way; in these |
11045 | // cases it's safe to pass the argument register value unchanged as the |
11046 | // return register value (although it's at the target's option whether |
11047 | // to do so) |
11048 | // TODO: allow code generation to take advantage of partially preserved |
11049 | // registers rather than clobbering the entire register when the |
11050 | // parameter extension method is not compatible with the return |
11051 | // extension method |
11052 | if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) || |
11053 | (ExtendKind != ISD::ANY_EXTEND && CLI.RetSExt == Args[i].IsSExt && |
11054 | CLI.RetZExt == Args[i].IsZExt)) |
11055 | Flags.setReturned(); |
11056 | } |
11057 | |
11058 | getCopyToParts(DAG&: CLI.DAG, DL: CLI.DL, Val: Op, Parts: &Parts[0], NumParts, PartVT, V: CLI.CB, |
11059 | CallConv: CLI.CallConv, ExtendKind); |
11060 | |
11061 | for (unsigned j = 0; j != NumParts; ++j) { |
11062 | // if it isn't first piece, alignment must be 1 |
11063 | // For scalable vectors the scalable part is currently handled |
11064 | // by individual targets, so we just use the known minimum size here. |
11065 | ISD::OutputArg MyFlags( |
11066 | Flags, Parts[j].getValueType().getSimpleVT(), VT, |
11067 | i < CLI.NumFixedArgs, i, |
11068 | j * Parts[j].getValueType().getStoreSize().getKnownMinValue()); |
11069 | if (NumParts > 1 && j == 0) |
11070 | MyFlags.Flags.setSplit(); |
11071 | else if (j != 0) { |
11072 | MyFlags.Flags.setOrigAlign(Align(1)); |
11073 | if (j == NumParts - 1) |
11074 | MyFlags.Flags.setSplitEnd(); |
11075 | } |
11076 | |
11077 | CLI.Outs.push_back(Elt: MyFlags); |
11078 | CLI.OutVals.push_back(Elt: Parts[j]); |
11079 | } |
11080 | |
11081 | if (NeedsRegBlock && Value == NumValues - 1) |
11082 | CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast(); |
11083 | } |
11084 | } |
11085 | |
11086 | SmallVector<SDValue, 4> InVals; |
11087 | CLI.Chain = LowerCall(CLI, InVals); |
11088 | |
11089 | // Update CLI.InVals to use outside of this function. |
11090 | CLI.InVals = InVals; |
11091 | |
11092 | // Verify that the target's LowerCall behaved as expected. |
11093 | assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other && |
11094 | "LowerCall didn't return a valid chain!" ); |
11095 | assert((!CLI.IsTailCall || InVals.empty()) && |
11096 | "LowerCall emitted a return value for a tail call!" ); |
11097 | assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) && |
11098 | "LowerCall didn't emit the correct number of values!" ); |
11099 | |
11100 | // For a tail call, the return value is merely live-out and there aren't |
11101 | // any nodes in the DAG representing it. Return a special value to |
11102 | // indicate that a tail call has been emitted and no more Instructions |
11103 | // should be processed in the current block. |
11104 | if (CLI.IsTailCall) { |
11105 | CLI.DAG.setRoot(CLI.Chain); |
11106 | return std::make_pair(x: SDValue(), y: SDValue()); |
11107 | } |
11108 | |
11109 | #ifndef NDEBUG |
11110 | for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) { |
11111 | assert(InVals[i].getNode() && "LowerCall emitted a null value!" ); |
11112 | assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() && |
11113 | "LowerCall emitted a value with the wrong type!" ); |
11114 | } |
11115 | #endif |
11116 | |
11117 | SmallVector<SDValue, 4> ReturnValues; |
11118 | if (!CanLowerReturn) { |
11119 | // The instruction result is the result of loading from the |
11120 | // hidden sret parameter. |
11121 | SmallVector<EVT, 1> PVTs; |
11122 | Type *PtrRetTy = |
11123 | PointerType::get(C&: OrigRetTy->getContext(), AddressSpace: DL.getAllocaAddrSpace()); |
11124 | |
11125 | ComputeValueVTs(TLI: *this, DL, Ty: PtrRetTy, ValueVTs&: PVTs); |
11126 | assert(PVTs.size() == 1 && "Pointers should fit in one register" ); |
11127 | EVT PtrVT = PVTs[0]; |
11128 | |
11129 | unsigned NumValues = RetTys.size(); |
11130 | ReturnValues.resize(N: NumValues); |
11131 | SmallVector<SDValue, 4> Chains(NumValues); |
11132 | |
11133 | // An aggregate return value cannot wrap around the address space, so |
11134 | // offsets to its parts don't wrap either. |
11135 | SDNodeFlags Flags; |
11136 | Flags.setNoUnsignedWrap(true); |
11137 | |
11138 | MachineFunction &MF = CLI.DAG.getMachineFunction(); |
11139 | Align HiddenSRetAlign = MF.getFrameInfo().getObjectAlign(ObjectIdx: DemoteStackIdx); |
11140 | for (unsigned i = 0; i < NumValues; ++i) { |
11141 | SDValue Add = CLI.DAG.getNode(Opcode: ISD::ADD, DL: CLI.DL, VT: PtrVT, N1: DemoteStackSlot, |
11142 | N2: CLI.DAG.getConstant(Val: Offsets[i], DL: CLI.DL, |
11143 | VT: PtrVT), Flags); |
11144 | SDValue L = CLI.DAG.getLoad( |
11145 | VT: RetTys[i], dl: CLI.DL, Chain: CLI.Chain, Ptr: Add, |
11146 | PtrInfo: MachinePointerInfo::getFixedStack(MF&: CLI.DAG.getMachineFunction(), |
11147 | FI: DemoteStackIdx, Offset: Offsets[i]), |
11148 | Alignment: HiddenSRetAlign); |
11149 | ReturnValues[i] = L; |
11150 | Chains[i] = L.getValue(R: 1); |
11151 | } |
11152 | |
11153 | CLI.Chain = CLI.DAG.getNode(Opcode: ISD::TokenFactor, DL: CLI.DL, VT: MVT::Other, Ops: Chains); |
11154 | } else { |
11155 | // Collect the legal value parts into potentially illegal values |
11156 | // that correspond to the original function's return values. |
11157 | std::optional<ISD::NodeType> AssertOp; |
11158 | if (CLI.RetSExt) |
11159 | AssertOp = ISD::AssertSext; |
11160 | else if (CLI.RetZExt) |
11161 | AssertOp = ISD::AssertZext; |
11162 | unsigned CurReg = 0; |
11163 | for (EVT VT : RetTys) { |
11164 | MVT RegisterVT = getRegisterTypeForCallingConv(Context&: CLI.RetTy->getContext(), |
11165 | CC: CLI.CallConv, VT); |
11166 | unsigned NumRegs = getNumRegistersForCallingConv(Context&: CLI.RetTy->getContext(), |
11167 | CC: CLI.CallConv, VT); |
11168 | |
11169 | ReturnValues.push_back(Elt: getCopyFromParts( |
11170 | DAG&: CLI.DAG, DL: CLI.DL, Parts: &InVals[CurReg], NumParts: NumRegs, PartVT: RegisterVT, ValueVT: VT, V: nullptr, |
11171 | InChain: CLI.Chain, CC: CLI.CallConv, AssertOp)); |
11172 | CurReg += NumRegs; |
11173 | } |
11174 | |
11175 | // For a function returning void, there is no return value. We can't create |
11176 | // such a node, so we just return a null return value in that case. In |
11177 | // that case, nothing will actually look at the value. |
11178 | if (ReturnValues.empty()) |
11179 | return std::make_pair(x: SDValue(), y&: CLI.Chain); |
11180 | } |
11181 | |
11182 | SDValue Res = CLI.DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: CLI.DL, |
11183 | VTList: CLI.DAG.getVTList(VTs: RetTys), Ops: ReturnValues); |
11184 | return std::make_pair(x&: Res, y&: CLI.Chain); |
11185 | } |
11186 | |
11187 | /// Places new result values for the node in Results (their number |
11188 | /// and types must exactly match those of the original return values of |
11189 | /// the node), or leaves Results empty, which indicates that the node is not |
11190 | /// to be custom lowered after all. |
11191 | void TargetLowering::LowerOperationWrapper(SDNode *N, |
11192 | SmallVectorImpl<SDValue> &Results, |
11193 | SelectionDAG &DAG) const { |
11194 | SDValue Res = LowerOperation(Op: SDValue(N, 0), DAG); |
11195 | |
11196 | if (!Res.getNode()) |
11197 | return; |
11198 | |
11199 | // If the original node has one result, take the return value from |
11200 | // LowerOperation as is. It might not be result number 0. |
11201 | if (N->getNumValues() == 1) { |
11202 | Results.push_back(Elt: Res); |
11203 | return; |
11204 | } |
11205 | |
11206 | // If the original node has multiple results, then the return node should |
11207 | // have the same number of results. |
11208 | assert((N->getNumValues() == Res->getNumValues()) && |
11209 | "Lowering returned the wrong number of results!" ); |
11210 | |
11211 | // Places new result values base on N result number. |
11212 | for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) |
11213 | Results.push_back(Elt: Res.getValue(R: I)); |
11214 | } |
11215 | |
11216 | SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { |
11217 | llvm_unreachable("LowerOperation not implemented for this target!" ); |
11218 | } |
11219 | |
11220 | void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, |
11221 | unsigned Reg, |
11222 | ISD::NodeType ExtendType) { |
11223 | SDValue Op = getNonRegisterValue(V); |
11224 | assert((Op.getOpcode() != ISD::CopyFromReg || |
11225 | cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && |
11226 | "Copy from a reg to the same reg!" ); |
11227 | assert(!Register::isPhysicalRegister(Reg) && "Is a physreg" ); |
11228 | |
11229 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
11230 | // If this is an InlineAsm we have to match the registers required, not the |
11231 | // notional registers required by the type. |
11232 | |
11233 | RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(), |
11234 | std::nullopt); // This is not an ABI copy. |
11235 | SDValue Chain = DAG.getEntryNode(); |
11236 | |
11237 | if (ExtendType == ISD::ANY_EXTEND) { |
11238 | auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(Val: V); |
11239 | if (PreferredExtendIt != FuncInfo.PreferredExtendType.end()) |
11240 | ExtendType = PreferredExtendIt->second; |
11241 | } |
11242 | RFV.getCopyToRegs(Val: Op, DAG, dl: getCurSDLoc(), Chain, Glue: nullptr, V, PreferredExtendType: ExtendType); |
11243 | PendingExports.push_back(Elt: Chain); |
11244 | } |
11245 | |
11246 | #include "llvm/CodeGen/SelectionDAGISel.h" |
11247 | |
11248 | /// isOnlyUsedInEntryBlock - If the specified argument is only used in the |
11249 | /// entry block, return true. This includes arguments used by switches, since |
11250 | /// the switch may expand into multiple basic blocks. |
11251 | static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { |
11252 | // With FastISel active, we may be splitting blocks, so force creation |
11253 | // of virtual registers for all non-dead arguments. |
11254 | if (FastISel) |
11255 | return A->use_empty(); |
11256 | |
11257 | const BasicBlock &Entry = A->getParent()->front(); |
11258 | for (const User *U : A->users()) |
11259 | if (cast<Instruction>(Val: U)->getParent() != &Entry || isa<SwitchInst>(Val: U)) |
11260 | return false; // Use not in entry block. |
11261 | |
11262 | return true; |
11263 | } |
11264 | |
11265 | using ArgCopyElisionMapTy = |
11266 | DenseMap<const Argument *, |
11267 | std::pair<const AllocaInst *, const StoreInst *>>; |
11268 | |
11269 | /// Scan the entry block of the function in FuncInfo for arguments that look |
11270 | /// like copies into a local alloca. Record any copied arguments in |
11271 | /// ArgCopyElisionCandidates. |
11272 | static void |
11273 | findArgumentCopyElisionCandidates(const DataLayout &DL, |
11274 | FunctionLoweringInfo *FuncInfo, |
11275 | ArgCopyElisionMapTy &ArgCopyElisionCandidates) { |
11276 | // Record the state of every static alloca used in the entry block. Argument |
11277 | // allocas are all used in the entry block, so we need approximately as many |
11278 | // entries as we have arguments. |
11279 | enum StaticAllocaInfo { Unknown, Clobbered, Elidable }; |
11280 | SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas; |
11281 | unsigned NumArgs = FuncInfo->Fn->arg_size(); |
11282 | StaticAllocas.reserve(NumEntries: NumArgs * 2); |
11283 | |
11284 | auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * { |
11285 | if (!V) |
11286 | return nullptr; |
11287 | V = V->stripPointerCasts(); |
11288 | const auto *AI = dyn_cast<AllocaInst>(Val: V); |
11289 | if (!AI || !AI->isStaticAlloca() || !FuncInfo->StaticAllocaMap.count(Val: AI)) |
11290 | return nullptr; |
11291 | auto Iter = StaticAllocas.insert(KV: {AI, Unknown}); |
11292 | return &Iter.first->second; |
11293 | }; |
11294 | |
11295 | // Look for stores of arguments to static allocas. Look through bitcasts and |
11296 | // GEPs to handle type coercions, as long as the alloca is fully initialized |
11297 | // by the store. Any non-store use of an alloca escapes it and any subsequent |
11298 | // unanalyzed store might write it. |
11299 | // FIXME: Handle structs initialized with multiple stores. |
11300 | for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) { |
11301 | // Look for stores, and handle non-store uses conservatively. |
11302 | const auto *SI = dyn_cast<StoreInst>(Val: &I); |
11303 | if (!SI) { |
11304 | // We will look through cast uses, so ignore them completely. |
11305 | if (I.isCast()) |
11306 | continue; |
11307 | // Ignore debug info and pseudo op intrinsics, they don't escape or store |
11308 | // to allocas. |
11309 | if (I.isDebugOrPseudoInst()) |
11310 | continue; |
11311 | // This is an unknown instruction. Assume it escapes or writes to all |
11312 | // static alloca operands. |
11313 | for (const Use &U : I.operands()) { |
11314 | if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U)) |
11315 | *Info = StaticAllocaInfo::Clobbered; |
11316 | } |
11317 | continue; |
11318 | } |
11319 | |
11320 | // If the stored value is a static alloca, mark it as escaped. |
11321 | if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand())) |
11322 | *Info = StaticAllocaInfo::Clobbered; |
11323 | |
11324 | // Check if the destination is a static alloca. |
11325 | const Value *Dst = SI->getPointerOperand()->stripPointerCasts(); |
11326 | StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst); |
11327 | if (!Info) |
11328 | continue; |
11329 | const AllocaInst *AI = cast<AllocaInst>(Val: Dst); |
11330 | |
11331 | // Skip allocas that have been initialized or clobbered. |
11332 | if (*Info != StaticAllocaInfo::Unknown) |
11333 | continue; |
11334 | |
11335 | // Check if the stored value is an argument, and that this store fully |
11336 | // initializes the alloca. |
11337 | // If the argument type has padding bits we can't directly forward a pointer |
11338 | // as the upper bits may contain garbage. |
11339 | // Don't elide copies from the same argument twice. |
11340 | const Value *Val = SI->getValueOperand()->stripPointerCasts(); |
11341 | const auto *Arg = dyn_cast<Argument>(Val); |
11342 | if (!Arg || Arg->hasPassPointeeByValueCopyAttr() || |
11343 | Arg->getType()->isEmptyTy() || |
11344 | DL.getTypeStoreSize(Ty: Arg->getType()) != |
11345 | DL.getTypeAllocSize(Ty: AI->getAllocatedType()) || |
11346 | !DL.typeSizeEqualsStoreSize(Ty: Arg->getType()) || |
11347 | ArgCopyElisionCandidates.count(Val: Arg)) { |
11348 | *Info = StaticAllocaInfo::Clobbered; |
11349 | continue; |
11350 | } |
11351 | |
11352 | LLVM_DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI |
11353 | << '\n'); |
11354 | |
11355 | // Mark this alloca and store for argument copy elision. |
11356 | *Info = StaticAllocaInfo::Elidable; |
11357 | ArgCopyElisionCandidates.insert(KV: {Arg, {AI, SI}}); |
11358 | |
11359 | // Stop scanning if we've seen all arguments. This will happen early in -O0 |
11360 | // builds, which is useful, because -O0 builds have large entry blocks and |
11361 | // many allocas. |
11362 | if (ArgCopyElisionCandidates.size() == NumArgs) |
11363 | break; |
11364 | } |
11365 | } |
11366 | |
11367 | /// Try to elide argument copies from memory into a local alloca. Succeeds if |
11368 | /// ArgVal is a load from a suitable fixed stack object. |
11369 | static void tryToElideArgumentCopy( |
11370 | FunctionLoweringInfo &FuncInfo, SmallVectorImpl<SDValue> &Chains, |
11371 | DenseMap<int, int> &ArgCopyElisionFrameIndexMap, |
11372 | SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs, |
11373 | ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg, |
11374 | ArrayRef<SDValue> ArgVals, bool &ArgHasUses) { |
11375 | // Check if this is a load from a fixed stack object. |
11376 | auto *LNode = dyn_cast<LoadSDNode>(Val: ArgVals[0]); |
11377 | if (!LNode) |
11378 | return; |
11379 | auto *FINode = dyn_cast<FrameIndexSDNode>(Val: LNode->getBasePtr().getNode()); |
11380 | if (!FINode) |
11381 | return; |
11382 | |
11383 | // Check that the fixed stack object is the right size and alignment. |
11384 | // Look at the alignment that the user wrote on the alloca instead of looking |
11385 | // at the stack object. |
11386 | auto ArgCopyIter = ArgCopyElisionCandidates.find(Val: &Arg); |
11387 | assert(ArgCopyIter != ArgCopyElisionCandidates.end()); |
11388 | const AllocaInst *AI = ArgCopyIter->second.first; |
11389 | int FixedIndex = FINode->getIndex(); |
11390 | int &AllocaIndex = FuncInfo.StaticAllocaMap[AI]; |
11391 | int OldIndex = AllocaIndex; |
11392 | MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); |
11393 | if (MFI.getObjectSize(ObjectIdx: FixedIndex) != MFI.getObjectSize(ObjectIdx: OldIndex)) { |
11394 | LLVM_DEBUG( |
11395 | dbgs() << " argument copy elision failed due to bad fixed stack " |
11396 | "object size\n" ); |
11397 | return; |
11398 | } |
11399 | Align RequiredAlignment = AI->getAlign(); |
11400 | if (MFI.getObjectAlign(ObjectIdx: FixedIndex) < RequiredAlignment) { |
11401 | LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca " |
11402 | "greater than stack argument alignment (" |
11403 | << DebugStr(RequiredAlignment) << " vs " |
11404 | << DebugStr(MFI.getObjectAlign(FixedIndex)) << ")\n" ); |
11405 | return; |
11406 | } |
11407 | |
11408 | // Perform the elision. Delete the old stack object and replace its only use |
11409 | // in the variable info map. Mark the stack object as mutable and aliased. |
11410 | LLVM_DEBUG({ |
11411 | dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n' |
11412 | << " Replacing frame index " << OldIndex << " with " << FixedIndex |
11413 | << '\n'; |
11414 | }); |
11415 | MFI.RemoveStackObject(ObjectIdx: OldIndex); |
11416 | MFI.setIsImmutableObjectIndex(ObjectIdx: FixedIndex, IsImmutable: false); |
11417 | MFI.setIsAliasedObjectIndex(ObjectIdx: FixedIndex, IsAliased: true); |
11418 | AllocaIndex = FixedIndex; |
11419 | ArgCopyElisionFrameIndexMap.insert(KV: {OldIndex, FixedIndex}); |
11420 | for (SDValue ArgVal : ArgVals) |
11421 | Chains.push_back(Elt: ArgVal.getValue(R: 1)); |
11422 | |
11423 | // Avoid emitting code for the store implementing the copy. |
11424 | const StoreInst *SI = ArgCopyIter->second.second; |
11425 | ElidedArgCopyInstrs.insert(Ptr: SI); |
11426 | |
11427 | // Check for uses of the argument again so that we can avoid exporting ArgVal |
11428 | // if it is't used by anything other than the store. |
11429 | for (const Value *U : Arg.users()) { |
11430 | if (U != SI) { |
11431 | ArgHasUses = true; |
11432 | break; |
11433 | } |
11434 | } |
11435 | } |
11436 | |
11437 | void SelectionDAGISel::LowerArguments(const Function &F) { |
11438 | SelectionDAG &DAG = SDB->DAG; |
11439 | SDLoc dl = SDB->getCurSDLoc(); |
11440 | const DataLayout &DL = DAG.getDataLayout(); |
11441 | SmallVector<ISD::InputArg, 16> Ins; |
11442 | |
11443 | // In Naked functions we aren't going to save any registers. |
11444 | if (F.hasFnAttribute(Kind: Attribute::Naked)) |
11445 | return; |
11446 | |
11447 | if (!FuncInfo->CanLowerReturn) { |
11448 | // Put in an sret pointer parameter before all the other parameters. |
11449 | SmallVector<EVT, 1> ValueVTs; |
11450 | ComputeValueVTs(TLI: *TLI, DL: DAG.getDataLayout(), |
11451 | Ty: PointerType::get(C&: F.getContext(), |
11452 | AddressSpace: DAG.getDataLayout().getAllocaAddrSpace()), |
11453 | ValueVTs); |
11454 | |
11455 | // NOTE: Assuming that a pointer will never break down to more than one VT |
11456 | // or one register. |
11457 | ISD::ArgFlagsTy Flags; |
11458 | Flags.setSRet(); |
11459 | MVT RegisterVT = TLI->getRegisterType(Context&: *DAG.getContext(), VT: ValueVTs[0]); |
11460 | ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, |
11461 | ISD::InputArg::NoArgIndex, 0); |
11462 | Ins.push_back(Elt: RetArg); |
11463 | } |
11464 | |
11465 | // Look for stores of arguments to static allocas. Mark such arguments with a |
11466 | // flag to ask the target to give us the memory location of that argument if |
11467 | // available. |
11468 | ArgCopyElisionMapTy ArgCopyElisionCandidates; |
11469 | findArgumentCopyElisionCandidates(DL, FuncInfo: FuncInfo.get(), |
11470 | ArgCopyElisionCandidates); |
11471 | |
11472 | // Set up the incoming argument description vector. |
11473 | for (const Argument &Arg : F.args()) { |
11474 | unsigned ArgNo = Arg.getArgNo(); |
11475 | SmallVector<EVT, 4> ValueVTs; |
11476 | ComputeValueVTs(TLI: *TLI, DL: DAG.getDataLayout(), Ty: Arg.getType(), ValueVTs); |
11477 | bool isArgValueUsed = !Arg.use_empty(); |
11478 | unsigned PartBase = 0; |
11479 | Type *FinalType = Arg.getType(); |
11480 | if (Arg.hasAttribute(Kind: Attribute::ByVal)) |
11481 | FinalType = Arg.getParamByValType(); |
11482 | bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( |
11483 | Ty: FinalType, CallConv: F.getCallingConv(), isVarArg: F.isVarArg(), DL); |
11484 | for (unsigned Value = 0, NumValues = ValueVTs.size(); |
11485 | Value != NumValues; ++Value) { |
11486 | EVT VT = ValueVTs[Value]; |
11487 | Type *ArgTy = VT.getTypeForEVT(Context&: *DAG.getContext()); |
11488 | ISD::ArgFlagsTy Flags; |
11489 | |
11490 | |
11491 | if (Arg.getType()->isPointerTy()) { |
11492 | Flags.setPointer(); |
11493 | Flags.setPointerAddrSpace( |
11494 | cast<PointerType>(Val: Arg.getType())->getAddressSpace()); |
11495 | } |
11496 | if (Arg.hasAttribute(Kind: Attribute::ZExt)) |
11497 | Flags.setZExt(); |
11498 | if (Arg.hasAttribute(Kind: Attribute::SExt)) |
11499 | Flags.setSExt(); |
11500 | if (Arg.hasAttribute(Kind: Attribute::InReg)) { |
11501 | // If we are using vectorcall calling convention, a structure that is |
11502 | // passed InReg - is surely an HVA |
11503 | if (F.getCallingConv() == CallingConv::X86_VectorCall && |
11504 | isa<StructType>(Val: Arg.getType())) { |
11505 | // The first value of a structure is marked |
11506 | if (0 == Value) |
11507 | Flags.setHvaStart(); |
11508 | Flags.setHva(); |
11509 | } |
11510 | // Set InReg Flag |
11511 | Flags.setInReg(); |
11512 | } |
11513 | if (Arg.hasAttribute(Kind: Attribute::StructRet)) |
11514 | Flags.setSRet(); |
11515 | if (Arg.hasAttribute(Kind: Attribute::SwiftSelf)) |
11516 | Flags.setSwiftSelf(); |
11517 | if (Arg.hasAttribute(Kind: Attribute::SwiftAsync)) |
11518 | Flags.setSwiftAsync(); |
11519 | if (Arg.hasAttribute(Kind: Attribute::SwiftError)) |
11520 | Flags.setSwiftError(); |
11521 | if (Arg.hasAttribute(Kind: Attribute::ByVal)) |
11522 | Flags.setByVal(); |
11523 | if (Arg.hasAttribute(Kind: Attribute::ByRef)) |
11524 | Flags.setByRef(); |
11525 | if (Arg.hasAttribute(Kind: Attribute::InAlloca)) { |
11526 | Flags.setInAlloca(); |
11527 | // Set the byval flag for CCAssignFn callbacks that don't know about |
11528 | // inalloca. This way we can know how many bytes we should've allocated |
11529 | // and how many bytes a callee cleanup function will pop. If we port |
11530 | // inalloca to more targets, we'll have to add custom inalloca handling |
11531 | // in the various CC lowering callbacks. |
11532 | Flags.setByVal(); |
11533 | } |
11534 | if (Arg.hasAttribute(Kind: Attribute::Preallocated)) { |
11535 | Flags.setPreallocated(); |
11536 | // Set the byval flag for CCAssignFn callbacks that don't know about |
11537 | // preallocated. This way we can know how many bytes we should've |
11538 | // allocated and how many bytes a callee cleanup function will pop. If |
11539 | // we port preallocated to more targets, we'll have to add custom |
11540 | // preallocated handling in the various CC lowering callbacks. |
11541 | Flags.setByVal(); |
11542 | } |
11543 | |
11544 | // Certain targets (such as MIPS), may have a different ABI alignment |
11545 | // for a type depending on the context. Give the target a chance to |
11546 | // specify the alignment it wants. |
11547 | const Align OriginalAlignment( |
11548 | TLI->getABIAlignmentForCallingConv(ArgTy, DL)); |
11549 | Flags.setOrigAlign(OriginalAlignment); |
11550 | |
11551 | Align MemAlign; |
11552 | Type *ArgMemTy = nullptr; |
11553 | if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated() || |
11554 | Flags.isByRef()) { |
11555 | if (!ArgMemTy) |
11556 | ArgMemTy = Arg.getPointeeInMemoryValueType(); |
11557 | |
11558 | uint64_t MemSize = DL.getTypeAllocSize(Ty: ArgMemTy); |
11559 | |
11560 | // For in-memory arguments, size and alignment should be passed from FE. |
11561 | // BE will guess if this info is not there but there are cases it cannot |
11562 | // get right. |
11563 | if (auto ParamAlign = Arg.getParamStackAlign()) |
11564 | MemAlign = *ParamAlign; |
11565 | else if ((ParamAlign = Arg.getParamAlign())) |
11566 | MemAlign = *ParamAlign; |
11567 | else |
11568 | MemAlign = Align(TLI->getByValTypeAlignment(Ty: ArgMemTy, DL)); |
11569 | if (Flags.isByRef()) |
11570 | Flags.setByRefSize(MemSize); |
11571 | else |
11572 | Flags.setByValSize(MemSize); |
11573 | } else if (auto ParamAlign = Arg.getParamStackAlign()) { |
11574 | MemAlign = *ParamAlign; |
11575 | } else { |
11576 | MemAlign = OriginalAlignment; |
11577 | } |
11578 | Flags.setMemAlign(MemAlign); |
11579 | |
11580 | if (Arg.hasAttribute(Kind: Attribute::Nest)) |
11581 | Flags.setNest(); |
11582 | if (NeedsRegBlock) |
11583 | Flags.setInConsecutiveRegs(); |
11584 | if (ArgCopyElisionCandidates.count(Val: &Arg)) |
11585 | Flags.setCopyElisionCandidate(); |
11586 | if (Arg.hasAttribute(Kind: Attribute::Returned)) |
11587 | Flags.setReturned(); |
11588 | |
11589 | MVT RegisterVT = TLI->getRegisterTypeForCallingConv( |
11590 | Context&: *CurDAG->getContext(), CC: F.getCallingConv(), VT); |
11591 | unsigned NumRegs = TLI->getNumRegistersForCallingConv( |
11592 | Context&: *CurDAG->getContext(), CC: F.getCallingConv(), VT); |
11593 | for (unsigned i = 0; i != NumRegs; ++i) { |
11594 | // For scalable vectors, use the minimum size; individual targets |
11595 | // are responsible for handling scalable vector arguments and |
11596 | // return values. |
11597 | ISD::InputArg MyFlags( |
11598 | Flags, RegisterVT, VT, isArgValueUsed, ArgNo, |
11599 | PartBase + i * RegisterVT.getStoreSize().getKnownMinValue()); |
11600 | if (NumRegs > 1 && i == 0) |
11601 | MyFlags.Flags.setSplit(); |
11602 | // if it isn't first piece, alignment must be 1 |
11603 | else if (i > 0) { |
11604 | MyFlags.Flags.setOrigAlign(Align(1)); |
11605 | if (i == NumRegs - 1) |
11606 | MyFlags.Flags.setSplitEnd(); |
11607 | } |
11608 | Ins.push_back(Elt: MyFlags); |
11609 | } |
11610 | if (NeedsRegBlock && Value == NumValues - 1) |
11611 | Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast(); |
11612 | PartBase += VT.getStoreSize().getKnownMinValue(); |
11613 | } |
11614 | } |
11615 | |
11616 | // Call the target to set up the argument values. |
11617 | SmallVector<SDValue, 8> InVals; |
11618 | SDValue NewRoot = TLI->LowerFormalArguments( |
11619 | DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals); |
11620 | |
11621 | // Verify that the target's LowerFormalArguments behaved as expected. |
11622 | assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other && |
11623 | "LowerFormalArguments didn't return a valid chain!" ); |
11624 | assert(InVals.size() == Ins.size() && |
11625 | "LowerFormalArguments didn't emit the correct number of values!" ); |
11626 | LLVM_DEBUG({ |
11627 | for (unsigned i = 0, e = Ins.size(); i != e; ++i) { |
11628 | assert(InVals[i].getNode() && |
11629 | "LowerFormalArguments emitted a null value!" ); |
11630 | assert(EVT(Ins[i].VT) == InVals[i].getValueType() && |
11631 | "LowerFormalArguments emitted a value with the wrong type!" ); |
11632 | } |
11633 | }); |
11634 | |
11635 | // Update the DAG with the new chain value resulting from argument lowering. |
11636 | DAG.setRoot(NewRoot); |
11637 | |
11638 | // Set up the argument values. |
11639 | unsigned i = 0; |
11640 | if (!FuncInfo->CanLowerReturn) { |
11641 | // Create a virtual register for the sret pointer, and put in a copy |
11642 | // from the sret argument into it. |
11643 | SmallVector<EVT, 1> ValueVTs; |
11644 | ComputeValueVTs(TLI: *TLI, DL: DAG.getDataLayout(), |
11645 | Ty: PointerType::get(C&: F.getContext(), |
11646 | AddressSpace: DAG.getDataLayout().getAllocaAddrSpace()), |
11647 | ValueVTs); |
11648 | MVT VT = ValueVTs[0].getSimpleVT(); |
11649 | MVT RegVT = TLI->getRegisterType(Context&: *CurDAG->getContext(), VT); |
11650 | std::optional<ISD::NodeType> AssertOp; |
11651 | SDValue ArgValue = |
11652 | getCopyFromParts(DAG, DL: dl, Parts: &InVals[0], NumParts: 1, PartVT: RegVT, ValueVT: VT, V: nullptr, InChain: NewRoot, |
11653 | CC: F.getCallingConv(), AssertOp); |
11654 | |
11655 | MachineFunction& MF = SDB->DAG.getMachineFunction(); |
11656 | MachineRegisterInfo& RegInfo = MF.getRegInfo(); |
11657 | Register SRetReg = |
11658 | RegInfo.createVirtualRegister(RegClass: TLI->getRegClassFor(VT: RegVT)); |
11659 | FuncInfo->DemoteRegister = SRetReg; |
11660 | NewRoot = |
11661 | SDB->DAG.getCopyToReg(Chain: NewRoot, dl: SDB->getCurSDLoc(), Reg: SRetReg, N: ArgValue); |
11662 | DAG.setRoot(NewRoot); |
11663 | |
11664 | // i indexes lowered arguments. Bump it past the hidden sret argument. |
11665 | ++i; |
11666 | } |
11667 | |
11668 | SmallVector<SDValue, 4> Chains; |
11669 | DenseMap<int, int> ArgCopyElisionFrameIndexMap; |
11670 | for (const Argument &Arg : F.args()) { |
11671 | SmallVector<SDValue, 4> ArgValues; |
11672 | SmallVector<EVT, 4> ValueVTs; |
11673 | ComputeValueVTs(TLI: *TLI, DL: DAG.getDataLayout(), Ty: Arg.getType(), ValueVTs); |
11674 | unsigned NumValues = ValueVTs.size(); |
11675 | if (NumValues == 0) |
11676 | continue; |
11677 | |
11678 | bool ArgHasUses = !Arg.use_empty(); |
11679 | |
11680 | // Elide the copying store if the target loaded this argument from a |
11681 | // suitable fixed stack object. |
11682 | if (Ins[i].Flags.isCopyElisionCandidate()) { |
11683 | unsigned NumParts = 0; |
11684 | for (EVT VT : ValueVTs) |
11685 | NumParts += TLI->getNumRegistersForCallingConv(Context&: *CurDAG->getContext(), |
11686 | CC: F.getCallingConv(), VT); |
11687 | |
11688 | tryToElideArgumentCopy(FuncInfo&: *FuncInfo, Chains, ArgCopyElisionFrameIndexMap, |
11689 | ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg, |
11690 | ArgVals: ArrayRef(&InVals[i], NumParts), ArgHasUses); |
11691 | } |
11692 | |
11693 | // If this argument is unused then remember its value. It is used to generate |
11694 | // debugging information. |
11695 | bool isSwiftErrorArg = |
11696 | TLI->supportSwiftError() && |
11697 | Arg.hasAttribute(Kind: Attribute::SwiftError); |
11698 | if (!ArgHasUses && !isSwiftErrorArg) { |
11699 | SDB->setUnusedArgValue(V: &Arg, NewN: InVals[i]); |
11700 | |
11701 | // Also remember any frame index for use in FastISel. |
11702 | if (FrameIndexSDNode *FI = |
11703 | dyn_cast<FrameIndexSDNode>(Val: InVals[i].getNode())) |
11704 | FuncInfo->setArgumentFrameIndex(A: &Arg, FI: FI->getIndex()); |
11705 | } |
11706 | |
11707 | for (unsigned Val = 0; Val != NumValues; ++Val) { |
11708 | EVT VT = ValueVTs[Val]; |
11709 | MVT PartVT = TLI->getRegisterTypeForCallingConv(Context&: *CurDAG->getContext(), |
11710 | CC: F.getCallingConv(), VT); |
11711 | unsigned NumParts = TLI->getNumRegistersForCallingConv( |
11712 | Context&: *CurDAG->getContext(), CC: F.getCallingConv(), VT); |
11713 | |
11714 | // Even an apparent 'unused' swifterror argument needs to be returned. So |
11715 | // we do generate a copy for it that can be used on return from the |
11716 | // function. |
11717 | if (ArgHasUses || isSwiftErrorArg) { |
11718 | std::optional<ISD::NodeType> AssertOp; |
11719 | if (Arg.hasAttribute(Kind: Attribute::SExt)) |
11720 | AssertOp = ISD::AssertSext; |
11721 | else if (Arg.hasAttribute(Kind: Attribute::ZExt)) |
11722 | AssertOp = ISD::AssertZext; |
11723 | |
11724 | ArgValues.push_back(Elt: getCopyFromParts(DAG, DL: dl, Parts: &InVals[i], NumParts, |
11725 | PartVT, ValueVT: VT, V: nullptr, InChain: NewRoot, |
11726 | CC: F.getCallingConv(), AssertOp)); |
11727 | } |
11728 | |
11729 | i += NumParts; |
11730 | } |
11731 | |
11732 | // We don't need to do anything else for unused arguments. |
11733 | if (ArgValues.empty()) |
11734 | continue; |
11735 | |
11736 | // Note down frame index. |
11737 | if (FrameIndexSDNode *FI = |
11738 | dyn_cast<FrameIndexSDNode>(Val: ArgValues[0].getNode())) |
11739 | FuncInfo->setArgumentFrameIndex(A: &Arg, FI: FI->getIndex()); |
11740 | |
11741 | SDValue Res = DAG.getMergeValues(Ops: ArrayRef(ArgValues.data(), NumValues), |
11742 | dl: SDB->getCurSDLoc()); |
11743 | |
11744 | SDB->setValue(V: &Arg, NewN: Res); |
11745 | if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { |
11746 | // We want to associate the argument with the frame index, among |
11747 | // involved operands, that correspond to the lowest address. The |
11748 | // getCopyFromParts function, called earlier, is swapping the order of |
11749 | // the operands to BUILD_PAIR depending on endianness. The result of |
11750 | // that swapping is that the least significant bits of the argument will |
11751 | // be in the first operand of the BUILD_PAIR node, and the most |
11752 | // significant bits will be in the second operand. |
11753 | unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 1 : 0; |
11754 | if (LoadSDNode *LNode = |
11755 | dyn_cast<LoadSDNode>(Val: Res.getOperand(i: LowAddressOp).getNode())) |
11756 | if (FrameIndexSDNode *FI = |
11757 | dyn_cast<FrameIndexSDNode>(Val: LNode->getBasePtr().getNode())) |
11758 | FuncInfo->setArgumentFrameIndex(A: &Arg, FI: FI->getIndex()); |
11759 | } |
11760 | |
11761 | // Analyses past this point are naive and don't expect an assertion. |
11762 | if (Res.getOpcode() == ISD::AssertZext) |
11763 | Res = Res.getOperand(i: 0); |
11764 | |
11765 | // Update the SwiftErrorVRegDefMap. |
11766 | if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) { |
11767 | unsigned Reg = cast<RegisterSDNode>(Val: Res.getOperand(i: 1))->getReg(); |
11768 | if (Register::isVirtualRegister(Reg)) |
11769 | SwiftError->setCurrentVReg(MBB: FuncInfo->MBB, SwiftError->getFunctionArg(), |
11770 | Reg); |
11771 | } |
11772 | |
11773 | // If this argument is live outside of the entry block, insert a copy from |
11774 | // wherever we got it to the vreg that other BB's will reference it as. |
11775 | if (Res.getOpcode() == ISD::CopyFromReg) { |
11776 | // If we can, though, try to skip creating an unnecessary vreg. |
11777 | // FIXME: This isn't very clean... it would be nice to make this more |
11778 | // general. |
11779 | unsigned Reg = cast<RegisterSDNode>(Val: Res.getOperand(i: 1))->getReg(); |
11780 | if (Register::isVirtualRegister(Reg)) { |
11781 | FuncInfo->ValueMap[&Arg] = Reg; |
11782 | continue; |
11783 | } |
11784 | } |
11785 | if (!isOnlyUsedInEntryBlock(A: &Arg, FastISel: TM.Options.EnableFastISel)) { |
11786 | FuncInfo->InitializeRegForValue(V: &Arg); |
11787 | SDB->CopyToExportRegsIfNeeded(V: &Arg); |
11788 | } |
11789 | } |
11790 | |
11791 | if (!Chains.empty()) { |
11792 | Chains.push_back(Elt: NewRoot); |
11793 | NewRoot = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Chains); |
11794 | } |
11795 | |
11796 | DAG.setRoot(NewRoot); |
11797 | |
11798 | assert(i == InVals.size() && "Argument register count mismatch!" ); |
11799 | |
11800 | // If any argument copy elisions occurred and we have debug info, update the |
11801 | // stale frame indices used in the dbg.declare variable info table. |
11802 | if (!ArgCopyElisionFrameIndexMap.empty()) { |
11803 | for (MachineFunction::VariableDbgInfo &VI : |
11804 | MF->getInStackSlotVariableDbgInfo()) { |
11805 | auto I = ArgCopyElisionFrameIndexMap.find(Val: VI.getStackSlot()); |
11806 | if (I != ArgCopyElisionFrameIndexMap.end()) |
11807 | VI.updateStackSlot(NewSlot: I->second); |
11808 | } |
11809 | } |
11810 | |
11811 | // Finally, if the target has anything special to do, allow it to do so. |
11812 | emitFunctionEntryCode(); |
11813 | } |
11814 | |
11815 | /// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to |
11816 | /// ensure constants are generated when needed. Remember the virtual registers |
11817 | /// that need to be added to the Machine PHI nodes as input. We cannot just |
11818 | /// directly add them, because expansion might result in multiple MBB's for one |
11819 | /// BB. As such, the start of the BB might correspond to a different MBB than |
11820 | /// the end. |
11821 | void |
11822 | SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { |
11823 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
11824 | |
11825 | SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; |
11826 | |
11827 | // Check PHI nodes in successors that expect a value to be available from this |
11828 | // block. |
11829 | for (const BasicBlock *SuccBB : successors(I: LLVMBB->getTerminator())) { |
11830 | if (!isa<PHINode>(Val: SuccBB->begin())) continue; |
11831 | MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; |
11832 | |
11833 | // If this terminator has multiple identical successors (common for |
11834 | // switches), only handle each succ once. |
11835 | if (!SuccsHandled.insert(Ptr: SuccMBB).second) |
11836 | continue; |
11837 | |
11838 | MachineBasicBlock::iterator MBBI = SuccMBB->begin(); |
11839 | |
11840 | // At this point we know that there is a 1-1 correspondence between LLVM PHI |
11841 | // nodes and Machine PHI nodes, but the incoming operands have not been |
11842 | // emitted yet. |
11843 | for (const PHINode &PN : SuccBB->phis()) { |
11844 | // Ignore dead phi's. |
11845 | if (PN.use_empty()) |
11846 | continue; |
11847 | |
11848 | // Skip empty types |
11849 | if (PN.getType()->isEmptyTy()) |
11850 | continue; |
11851 | |
11852 | unsigned Reg; |
11853 | const Value *PHIOp = PN.getIncomingValueForBlock(BB: LLVMBB); |
11854 | |
11855 | if (const auto *C = dyn_cast<Constant>(Val: PHIOp)) { |
11856 | unsigned &RegOut = ConstantsOut[C]; |
11857 | if (RegOut == 0) { |
11858 | RegOut = FuncInfo.CreateRegs(V: C); |
11859 | // We need to zero/sign extend ConstantInt phi operands to match |
11860 | // assumptions in FunctionLoweringInfo::ComputePHILiveOutRegInfo. |
11861 | ISD::NodeType ExtendType = ISD::ANY_EXTEND; |
11862 | if (auto *CI = dyn_cast<ConstantInt>(Val: C)) |
11863 | ExtendType = TLI.signExtendConstant(C: CI) ? ISD::SIGN_EXTEND |
11864 | : ISD::ZERO_EXTEND; |
11865 | CopyValueToVirtualRegister(V: C, Reg: RegOut, ExtendType); |
11866 | } |
11867 | Reg = RegOut; |
11868 | } else { |
11869 | DenseMap<const Value *, Register>::iterator I = |
11870 | FuncInfo.ValueMap.find(Val: PHIOp); |
11871 | if (I != FuncInfo.ValueMap.end()) |
11872 | Reg = I->second; |
11873 | else { |
11874 | assert(isa<AllocaInst>(PHIOp) && |
11875 | FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && |
11876 | "Didn't codegen value into a register!??" ); |
11877 | Reg = FuncInfo.CreateRegs(V: PHIOp); |
11878 | CopyValueToVirtualRegister(V: PHIOp, Reg); |
11879 | } |
11880 | } |
11881 | |
11882 | // Remember that this register needs to added to the machine PHI node as |
11883 | // the input for this MBB. |
11884 | SmallVector<EVT, 4> ValueVTs; |
11885 | ComputeValueVTs(TLI, DL: DAG.getDataLayout(), Ty: PN.getType(), ValueVTs); |
11886 | for (EVT VT : ValueVTs) { |
11887 | const unsigned NumRegisters = TLI.getNumRegisters(Context&: *DAG.getContext(), VT); |
11888 | for (unsigned i = 0; i != NumRegisters; ++i) |
11889 | FuncInfo.PHINodesToUpdate.push_back( |
11890 | x: std::make_pair(x: &*MBBI++, y: Reg + i)); |
11891 | Reg += NumRegisters; |
11892 | } |
11893 | } |
11894 | } |
11895 | |
11896 | ConstantsOut.clear(); |
11897 | } |
11898 | |
11899 | MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) { |
11900 | MachineFunction::iterator I(MBB); |
11901 | if (++I == FuncInfo.MF->end()) |
11902 | return nullptr; |
11903 | return &*I; |
11904 | } |
11905 | |
11906 | /// During lowering new call nodes can be created (such as memset, etc.). |
11907 | /// Those will become new roots of the current DAG, but complications arise |
11908 | /// when they are tail calls. In such cases, the call lowering will update |
11909 | /// the root, but the builder still needs to know that a tail call has been |
11910 | /// lowered in order to avoid generating an additional return. |
11911 | void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { |
11912 | // If the node is null, we do have a tail call. |
11913 | if (MaybeTC.getNode() != nullptr) |
11914 | DAG.setRoot(MaybeTC); |
11915 | else |
11916 | HasTailCall = true; |
11917 | } |
11918 | |
11919 | void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, |
11920 | MachineBasicBlock *SwitchMBB, |
11921 | MachineBasicBlock *DefaultMBB) { |
11922 | MachineFunction *CurMF = FuncInfo.MF; |
11923 | MachineBasicBlock *NextMBB = nullptr; |
11924 | MachineFunction::iterator BBI(W.MBB); |
11925 | if (++BBI != FuncInfo.MF->end()) |
11926 | NextMBB = &*BBI; |
11927 | |
11928 | unsigned Size = W.LastCluster - W.FirstCluster + 1; |
11929 | |
11930 | BranchProbabilityInfo *BPI = FuncInfo.BPI; |
11931 | |
11932 | if (Size == 2 && W.MBB == SwitchMBB) { |
11933 | // If any two of the cases has the same destination, and if one value |
11934 | // is the same as the other, but has one bit unset that the other has set, |
11935 | // use bit manipulation to do two compares at once. For example: |
11936 | // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" |
11937 | // TODO: This could be extended to merge any 2 cases in switches with 3 |
11938 | // cases. |
11939 | // TODO: Handle cases where W.CaseBB != SwitchBB. |
11940 | CaseCluster &Small = *W.FirstCluster; |
11941 | CaseCluster &Big = *W.LastCluster; |
11942 | |
11943 | if (Small.Low == Small.High && Big.Low == Big.High && |
11944 | Small.MBB == Big.MBB) { |
11945 | const APInt &SmallValue = Small.Low->getValue(); |
11946 | const APInt &BigValue = Big.Low->getValue(); |
11947 | |
11948 | // Check that there is only one bit different. |
11949 | APInt CommonBit = BigValue ^ SmallValue; |
11950 | if (CommonBit.isPowerOf2()) { |
11951 | SDValue CondLHS = getValue(V: Cond); |
11952 | EVT VT = CondLHS.getValueType(); |
11953 | SDLoc DL = getCurSDLoc(); |
11954 | |
11955 | SDValue Or = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: CondLHS, |
11956 | N2: DAG.getConstant(Val: CommonBit, DL, VT)); |
11957 | SDValue Cond = DAG.getSetCC( |
11958 | DL, VT: MVT::i1, LHS: Or, RHS: DAG.getConstant(Val: BigValue | SmallValue, DL, VT), |
11959 | Cond: ISD::SETEQ); |
11960 | |
11961 | // Update successor info. |
11962 | // Both Small and Big will jump to Small.BB, so we sum up the |
11963 | // probabilities. |
11964 | addSuccessorWithProb(Src: SwitchMBB, Dst: Small.MBB, Prob: Small.Prob + Big.Prob); |
11965 | if (BPI) |
11966 | addSuccessorWithProb( |
11967 | Src: SwitchMBB, Dst: DefaultMBB, |
11968 | // The default destination is the first successor in IR. |
11969 | Prob: BPI->getEdgeProbability(Src: SwitchMBB->getBasicBlock(), IndexInSuccessors: (unsigned)0)); |
11970 | else |
11971 | addSuccessorWithProb(Src: SwitchMBB, Dst: DefaultMBB); |
11972 | |
11973 | // Insert the true branch. |
11974 | SDValue BrCond = |
11975 | DAG.getNode(Opcode: ISD::BRCOND, DL, VT: MVT::Other, N1: getControlRoot(), N2: Cond, |
11976 | N3: DAG.getBasicBlock(MBB: Small.MBB)); |
11977 | // Insert the false branch. |
11978 | BrCond = DAG.getNode(Opcode: ISD::BR, DL, VT: MVT::Other, N1: BrCond, |
11979 | N2: DAG.getBasicBlock(MBB: DefaultMBB)); |
11980 | |
11981 | DAG.setRoot(BrCond); |
11982 | return; |
11983 | } |
11984 | } |
11985 | } |
11986 | |
11987 | if (TM.getOptLevel() != CodeGenOptLevel::None) { |
11988 | // Here, we order cases by probability so the most likely case will be |
11989 | // checked first. However, two clusters can have the same probability in |
11990 | // which case their relative ordering is non-deterministic. So we use Low |
11991 | // as a tie-breaker as clusters are guaranteed to never overlap. |
11992 | llvm::sort(Start: W.FirstCluster, End: W.LastCluster + 1, |
11993 | Comp: [](const CaseCluster &a, const CaseCluster &b) { |
11994 | return a.Prob != b.Prob ? |
11995 | a.Prob > b.Prob : |
11996 | a.Low->getValue().slt(RHS: b.Low->getValue()); |
11997 | }); |
11998 | |
11999 | // Rearrange the case blocks so that the last one falls through if possible |
12000 | // without changing the order of probabilities. |
12001 | for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) { |
12002 | --I; |
12003 | if (I->Prob > W.LastCluster->Prob) |
12004 | break; |
12005 | if (I->Kind == CC_Range && I->MBB == NextMBB) { |
12006 | std::swap(a&: *I, b&: *W.LastCluster); |
12007 | break; |
12008 | } |
12009 | } |
12010 | } |
12011 | |
12012 | // Compute total probability. |
12013 | BranchProbability DefaultProb = W.DefaultProb; |
12014 | BranchProbability UnhandledProbs = DefaultProb; |
12015 | for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) |
12016 | UnhandledProbs += I->Prob; |
12017 | |
12018 | MachineBasicBlock *CurMBB = W.MBB; |
12019 | for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) { |
12020 | bool FallthroughUnreachable = false; |
12021 | MachineBasicBlock *Fallthrough; |
12022 | if (I == W.LastCluster) { |
12023 | // For the last cluster, fall through to the default destination. |
12024 | Fallthrough = DefaultMBB; |
12025 | FallthroughUnreachable = isa<UnreachableInst>( |
12026 | Val: DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg()); |
12027 | } else { |
12028 | Fallthrough = CurMF->CreateMachineBasicBlock(BB: CurMBB->getBasicBlock()); |
12029 | CurMF->insert(MBBI: BBI, MBB: Fallthrough); |
12030 | // Put Cond in a virtual register to make it available from the new blocks. |
12031 | ExportFromCurrentBlock(V: Cond); |
12032 | } |
12033 | UnhandledProbs -= I->Prob; |
12034 | |
12035 | switch (I->Kind) { |
12036 | case CC_JumpTable: { |
12037 | // FIXME: Optimize away range check based on pivot comparisons. |
12038 | JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first; |
12039 | SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second; |
12040 | |
12041 | // The jump block hasn't been inserted yet; insert it here. |
12042 | MachineBasicBlock *JumpMBB = JT->MBB; |
12043 | CurMF->insert(MBBI: BBI, MBB: JumpMBB); |
12044 | |
12045 | auto JumpProb = I->Prob; |
12046 | auto FallthroughProb = UnhandledProbs; |
12047 | |
12048 | // If the default statement is a target of the jump table, we evenly |
12049 | // distribute the default probability to successors of CurMBB. Also |
12050 | // update the probability on the edge from JumpMBB to Fallthrough. |
12051 | for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(), |
12052 | SE = JumpMBB->succ_end(); |
12053 | SI != SE; ++SI) { |
12054 | if (*SI == DefaultMBB) { |
12055 | JumpProb += DefaultProb / 2; |
12056 | FallthroughProb -= DefaultProb / 2; |
12057 | JumpMBB->setSuccProbability(I: SI, Prob: DefaultProb / 2); |
12058 | JumpMBB->normalizeSuccProbs(); |
12059 | break; |
12060 | } |
12061 | } |
12062 | |
12063 | // If the default clause is unreachable, propagate that knowledge into |
12064 | // JTH->FallthroughUnreachable which will use it to suppress the range |
12065 | // check. |
12066 | // |
12067 | // However, don't do this if we're doing branch target enforcement, |
12068 | // because a table branch _without_ a range check can be a tempting JOP |
12069 | // gadget - out-of-bounds inputs that are impossible in correct |
12070 | // execution become possible again if an attacker can influence the |
12071 | // control flow. So if an attacker doesn't already have a BTI bypass |
12072 | // available, we don't want them to be able to get one out of this |
12073 | // table branch. |
12074 | if (FallthroughUnreachable) { |
12075 | Function &CurFunc = CurMF->getFunction(); |
12076 | if (!CurFunc.hasFnAttribute(Kind: "branch-target-enforcement" )) |
12077 | JTH->FallthroughUnreachable = true; |
12078 | } |
12079 | |
12080 | if (!JTH->FallthroughUnreachable) |
12081 | addSuccessorWithProb(Src: CurMBB, Dst: Fallthrough, Prob: FallthroughProb); |
12082 | addSuccessorWithProb(Src: CurMBB, Dst: JumpMBB, Prob: JumpProb); |
12083 | CurMBB->normalizeSuccProbs(); |
12084 | |
12085 | // The jump table header will be inserted in our current block, do the |
12086 | // range check, and fall through to our fallthrough block. |
12087 | JTH->HeaderBB = CurMBB; |
12088 | JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader. |
12089 | |
12090 | // If we're in the right place, emit the jump table header right now. |
12091 | if (CurMBB == SwitchMBB) { |
12092 | visitJumpTableHeader(JT&: *JT, JTH&: *JTH, SwitchBB: SwitchMBB); |
12093 | JTH->Emitted = true; |
12094 | } |
12095 | break; |
12096 | } |
12097 | case CC_BitTests: { |
12098 | // FIXME: Optimize away range check based on pivot comparisons. |
12099 | BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex]; |
12100 | |
12101 | // The bit test blocks haven't been inserted yet; insert them here. |
12102 | for (BitTestCase &BTC : BTB->Cases) |
12103 | CurMF->insert(MBBI: BBI, MBB: BTC.ThisBB); |
12104 | |
12105 | // Fill in fields of the BitTestBlock. |
12106 | BTB->Parent = CurMBB; |
12107 | BTB->Default = Fallthrough; |
12108 | |
12109 | BTB->DefaultProb = UnhandledProbs; |
12110 | // If the cases in bit test don't form a contiguous range, we evenly |
12111 | // distribute the probability on the edge to Fallthrough to two |
12112 | // successors of CurMBB. |
12113 | if (!BTB->ContiguousRange) { |
12114 | BTB->Prob += DefaultProb / 2; |
12115 | BTB->DefaultProb -= DefaultProb / 2; |
12116 | } |
12117 | |
12118 | if (FallthroughUnreachable) |
12119 | BTB->FallthroughUnreachable = true; |
12120 | |
12121 | // If we're in the right place, emit the bit test header right now. |
12122 | if (CurMBB == SwitchMBB) { |
12123 | visitBitTestHeader(B&: *BTB, SwitchBB: SwitchMBB); |
12124 | BTB->Emitted = true; |
12125 | } |
12126 | break; |
12127 | } |
12128 | case CC_Range: { |
12129 | const Value *RHS, *LHS, *MHS; |
12130 | ISD::CondCode CC; |
12131 | if (I->Low == I->High) { |
12132 | // Check Cond == I->Low. |
12133 | CC = ISD::SETEQ; |
12134 | LHS = Cond; |
12135 | RHS=I->Low; |
12136 | MHS = nullptr; |
12137 | } else { |
12138 | // Check I->Low <= Cond <= I->High. |
12139 | CC = ISD::SETLE; |
12140 | LHS = I->Low; |
12141 | MHS = Cond; |
12142 | RHS = I->High; |
12143 | } |
12144 | |
12145 | // If Fallthrough is unreachable, fold away the comparison. |
12146 | if (FallthroughUnreachable) |
12147 | CC = ISD::SETTRUE; |
12148 | |
12149 | // The false probability is the sum of all unhandled cases. |
12150 | CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, |
12151 | getCurSDLoc(), I->Prob, UnhandledProbs); |
12152 | |
12153 | if (CurMBB == SwitchMBB) |
12154 | visitSwitchCase(CB, SwitchBB: SwitchMBB); |
12155 | else |
12156 | SL->SwitchCases.push_back(x: CB); |
12157 | |
12158 | break; |
12159 | } |
12160 | } |
12161 | CurMBB = Fallthrough; |
12162 | } |
12163 | } |
12164 | |
12165 | void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, |
12166 | const SwitchWorkListItem &W, |
12167 | Value *Cond, |
12168 | MachineBasicBlock *SwitchMBB) { |
12169 | assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) && |
12170 | "Clusters not sorted?" ); |
12171 | assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!" ); |
12172 | |
12173 | auto [LastLeft, FirstRight, LeftProb, RightProb] = |
12174 | SL->computeSplitWorkItemInfo(W); |
12175 | |
12176 | // Use the first element on the right as pivot since we will make less-than |
12177 | // comparisons against it. |
12178 | CaseClusterIt PivotCluster = FirstRight; |
12179 | assert(PivotCluster > W.FirstCluster); |
12180 | assert(PivotCluster <= W.LastCluster); |
12181 | |
12182 | CaseClusterIt FirstLeft = W.FirstCluster; |
12183 | CaseClusterIt LastRight = W.LastCluster; |
12184 | |
12185 | const ConstantInt *Pivot = PivotCluster->Low; |
12186 | |
12187 | // New blocks will be inserted immediately after the current one. |
12188 | MachineFunction::iterator BBI(W.MBB); |
12189 | ++BBI; |
12190 | |
12191 | // We will branch to the LHS if Value < Pivot. If LHS is a single cluster, |
12192 | // we can branch to its destination directly if it's squeezed exactly in |
12193 | // between the known lower bound and Pivot - 1. |
12194 | MachineBasicBlock *LeftMBB; |
12195 | if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range && |
12196 | FirstLeft->Low == W.GE && |
12197 | (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) { |
12198 | LeftMBB = FirstLeft->MBB; |
12199 | } else { |
12200 | LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(BB: W.MBB->getBasicBlock()); |
12201 | FuncInfo.MF->insert(MBBI: BBI, MBB: LeftMBB); |
12202 | WorkList.push_back( |
12203 | Elt: {.MBB: LeftMBB, .FirstCluster: FirstLeft, .LastCluster: LastLeft, .GE: W.GE, .LT: Pivot, .DefaultProb: W.DefaultProb / 2}); |
12204 | // Put Cond in a virtual register to make it available from the new blocks. |
12205 | ExportFromCurrentBlock(V: Cond); |
12206 | } |
12207 | |
12208 | // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a |
12209 | // single cluster, RHS.Low == Pivot, and we can branch to its destination |
12210 | // directly if RHS.High equals the current upper bound. |
12211 | MachineBasicBlock *RightMBB; |
12212 | if (FirstRight == LastRight && FirstRight->Kind == CC_Range && |
12213 | W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) { |
12214 | RightMBB = FirstRight->MBB; |
12215 | } else { |
12216 | RightMBB = FuncInfo.MF->CreateMachineBasicBlock(BB: W.MBB->getBasicBlock()); |
12217 | FuncInfo.MF->insert(MBBI: BBI, MBB: RightMBB); |
12218 | WorkList.push_back( |
12219 | Elt: {.MBB: RightMBB, .FirstCluster: FirstRight, .LastCluster: LastRight, .GE: Pivot, .LT: W.LT, .DefaultProb: W.DefaultProb / 2}); |
12220 | // Put Cond in a virtual register to make it available from the new blocks. |
12221 | ExportFromCurrentBlock(V: Cond); |
12222 | } |
12223 | |
12224 | // Create the CaseBlock record that will be used to lower the branch. |
12225 | CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, |
12226 | getCurSDLoc(), LeftProb, RightProb); |
12227 | |
12228 | if (W.MBB == SwitchMBB) |
12229 | visitSwitchCase(CB, SwitchBB: SwitchMBB); |
12230 | else |
12231 | SL->SwitchCases.push_back(x: CB); |
12232 | } |
12233 | |
12234 | // Scale CaseProb after peeling a case with the probablity of PeeledCaseProb |
12235 | // from the swith statement. |
12236 | static BranchProbability scaleCaseProbality(BranchProbability CaseProb, |
12237 | BranchProbability PeeledCaseProb) { |
12238 | if (PeeledCaseProb == BranchProbability::getOne()) |
12239 | return BranchProbability::getZero(); |
12240 | BranchProbability SwitchProb = PeeledCaseProb.getCompl(); |
12241 | |
12242 | uint32_t Numerator = CaseProb.getNumerator(); |
12243 | uint32_t Denominator = SwitchProb.scale(Num: CaseProb.getDenominator()); |
12244 | return BranchProbability(Numerator, std::max(a: Numerator, b: Denominator)); |
12245 | } |
12246 | |
12247 | // Try to peel the top probability case if it exceeds the threshold. |
12248 | // Return current MachineBasicBlock for the switch statement if the peeling |
12249 | // does not occur. |
12250 | // If the peeling is performed, return the newly created MachineBasicBlock |
12251 | // for the peeled switch statement. Also update Clusters to remove the peeled |
12252 | // case. PeeledCaseProb is the BranchProbability for the peeled case. |
12253 | MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster( |
12254 | const SwitchInst &SI, CaseClusterVector &Clusters, |
12255 | BranchProbability &PeeledCaseProb) { |
12256 | MachineBasicBlock *SwitchMBB = FuncInfo.MBB; |
12257 | // Don't perform if there is only one cluster or optimizing for size. |
12258 | if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 || |
12259 | TM.getOptLevel() == CodeGenOptLevel::None || |
12260 | SwitchMBB->getParent()->getFunction().hasMinSize()) |
12261 | return SwitchMBB; |
12262 | |
12263 | BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100); |
12264 | unsigned PeeledCaseIndex = 0; |
12265 | bool SwitchPeeled = false; |
12266 | for (unsigned Index = 0; Index < Clusters.size(); ++Index) { |
12267 | CaseCluster &CC = Clusters[Index]; |
12268 | if (CC.Prob < TopCaseProb) |
12269 | continue; |
12270 | TopCaseProb = CC.Prob; |
12271 | PeeledCaseIndex = Index; |
12272 | SwitchPeeled = true; |
12273 | } |
12274 | if (!SwitchPeeled) |
12275 | return SwitchMBB; |
12276 | |
12277 | LLVM_DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: " |
12278 | << TopCaseProb << "\n" ); |
12279 | |
12280 | // Record the MBB for the peeled switch statement. |
12281 | MachineFunction::iterator BBI(SwitchMBB); |
12282 | ++BBI; |
12283 | MachineBasicBlock *PeeledSwitchMBB = |
12284 | FuncInfo.MF->CreateMachineBasicBlock(BB: SwitchMBB->getBasicBlock()); |
12285 | FuncInfo.MF->insert(MBBI: BBI, MBB: PeeledSwitchMBB); |
12286 | |
12287 | ExportFromCurrentBlock(V: SI.getCondition()); |
12288 | auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex; |
12289 | SwitchWorkListItem W = {.MBB: SwitchMBB, .FirstCluster: PeeledCaseIt, .LastCluster: PeeledCaseIt, |
12290 | .GE: nullptr, .LT: nullptr, .DefaultProb: TopCaseProb.getCompl()}; |
12291 | lowerWorkItem(W, Cond: SI.getCondition(), SwitchMBB, DefaultMBB: PeeledSwitchMBB); |
12292 | |
12293 | Clusters.erase(position: PeeledCaseIt); |
12294 | for (CaseCluster &CC : Clusters) { |
12295 | LLVM_DEBUG( |
12296 | dbgs() << "Scale the probablity for one cluster, before scaling: " |
12297 | << CC.Prob << "\n" ); |
12298 | CC.Prob = scaleCaseProbality(CaseProb: CC.Prob, PeeledCaseProb: TopCaseProb); |
12299 | LLVM_DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n" ); |
12300 | } |
12301 | PeeledCaseProb = TopCaseProb; |
12302 | return PeeledSwitchMBB; |
12303 | } |
12304 | |
12305 | void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { |
12306 | // Extract cases from the switch. |
12307 | BranchProbabilityInfo *BPI = FuncInfo.BPI; |
12308 | CaseClusterVector Clusters; |
12309 | Clusters.reserve(n: SI.getNumCases()); |
12310 | for (auto I : SI.cases()) { |
12311 | MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()]; |
12312 | const ConstantInt *CaseVal = I.getCaseValue(); |
12313 | BranchProbability Prob = |
12314 | BPI ? BPI->getEdgeProbability(Src: SI.getParent(), IndexInSuccessors: I.getSuccessorIndex()) |
12315 | : BranchProbability(1, SI.getNumCases() + 1); |
12316 | Clusters.push_back(x: CaseCluster::range(Low: CaseVal, High: CaseVal, MBB: Succ, Prob)); |
12317 | } |
12318 | |
12319 | MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()]; |
12320 | |
12321 | // Cluster adjacent cases with the same destination. We do this at all |
12322 | // optimization levels because it's cheap to do and will make codegen faster |
12323 | // if there are many clusters. |
12324 | sortAndRangeify(Clusters); |
12325 | |
12326 | // The branch probablity of the peeled case. |
12327 | BranchProbability PeeledCaseProb = BranchProbability::getZero(); |
12328 | MachineBasicBlock *PeeledSwitchMBB = |
12329 | peelDominantCaseCluster(SI, Clusters, PeeledCaseProb); |
12330 | |
12331 | // If there is only the default destination, jump there directly. |
12332 | MachineBasicBlock *SwitchMBB = FuncInfo.MBB; |
12333 | if (Clusters.empty()) { |
12334 | assert(PeeledSwitchMBB == SwitchMBB); |
12335 | SwitchMBB->addSuccessor(Succ: DefaultMBB); |
12336 | if (DefaultMBB != NextBlock(MBB: SwitchMBB)) { |
12337 | DAG.setRoot(DAG.getNode(Opcode: ISD::BR, DL: getCurSDLoc(), VT: MVT::Other, |
12338 | N1: getControlRoot(), N2: DAG.getBasicBlock(MBB: DefaultMBB))); |
12339 | } |
12340 | return; |
12341 | } |
12342 | |
12343 | SL->findJumpTables(Clusters, SI: &SI, SL: getCurSDLoc(), DefaultMBB, PSI: DAG.getPSI(), |
12344 | BFI: DAG.getBFI()); |
12345 | SL->findBitTestClusters(Clusters, SI: &SI); |
12346 | |
12347 | LLVM_DEBUG({ |
12348 | dbgs() << "Case clusters: " ; |
12349 | for (const CaseCluster &C : Clusters) { |
12350 | if (C.Kind == CC_JumpTable) |
12351 | dbgs() << "JT:" ; |
12352 | if (C.Kind == CC_BitTests) |
12353 | dbgs() << "BT:" ; |
12354 | |
12355 | C.Low->getValue().print(dbgs(), true); |
12356 | if (C.Low != C.High) { |
12357 | dbgs() << '-'; |
12358 | C.High->getValue().print(dbgs(), true); |
12359 | } |
12360 | dbgs() << ' '; |
12361 | } |
12362 | dbgs() << '\n'; |
12363 | }); |
12364 | |
12365 | assert(!Clusters.empty()); |
12366 | SwitchWorkList WorkList; |
12367 | CaseClusterIt First = Clusters.begin(); |
12368 | CaseClusterIt Last = Clusters.end() - 1; |
12369 | auto DefaultProb = getEdgeProbability(Src: PeeledSwitchMBB, Dst: DefaultMBB); |
12370 | // Scale the branchprobability for DefaultMBB if the peel occurs and |
12371 | // DefaultMBB is not replaced. |
12372 | if (PeeledCaseProb != BranchProbability::getZero() && |
12373 | DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()]) |
12374 | DefaultProb = scaleCaseProbality(CaseProb: DefaultProb, PeeledCaseProb); |
12375 | WorkList.push_back( |
12376 | Elt: {.MBB: PeeledSwitchMBB, .FirstCluster: First, .LastCluster: Last, .GE: nullptr, .LT: nullptr, .DefaultProb: DefaultProb}); |
12377 | |
12378 | while (!WorkList.empty()) { |
12379 | SwitchWorkListItem W = WorkList.pop_back_val(); |
12380 | unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; |
12381 | |
12382 | if (NumClusters > 3 && TM.getOptLevel() != CodeGenOptLevel::None && |
12383 | !DefaultMBB->getParent()->getFunction().hasMinSize()) { |
12384 | // For optimized builds, lower large range as a balanced binary tree. |
12385 | splitWorkItem(WorkList, W, Cond: SI.getCondition(), SwitchMBB); |
12386 | continue; |
12387 | } |
12388 | |
12389 | lowerWorkItem(W, Cond: SI.getCondition(), SwitchMBB, DefaultMBB); |
12390 | } |
12391 | } |
12392 | |
12393 | void SelectionDAGBuilder::visitStepVector(const CallInst &I) { |
12394 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
12395 | auto DL = getCurSDLoc(); |
12396 | EVT ResultVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
12397 | setValue(V: &I, NewN: DAG.getStepVector(DL, ResVT: ResultVT)); |
12398 | } |
12399 | |
12400 | void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) { |
12401 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
12402 | EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
12403 | |
12404 | SDLoc DL = getCurSDLoc(); |
12405 | SDValue V = getValue(V: I.getOperand(i_nocapture: 0)); |
12406 | assert(VT == V.getValueType() && "Malformed vector.reverse!" ); |
12407 | |
12408 | if (VT.isScalableVector()) { |
12409 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::VECTOR_REVERSE, DL, VT, Operand: V)); |
12410 | return; |
12411 | } |
12412 | |
12413 | // Use VECTOR_SHUFFLE for the fixed-length vector |
12414 | // to maintain existing behavior. |
12415 | SmallVector<int, 8> Mask; |
12416 | unsigned NumElts = VT.getVectorMinNumElements(); |
12417 | for (unsigned i = 0; i != NumElts; ++i) |
12418 | Mask.push_back(Elt: NumElts - 1 - i); |
12419 | |
12420 | setValue(V: &I, NewN: DAG.getVectorShuffle(VT, dl: DL, N1: V, N2: DAG.getUNDEF(VT), Mask)); |
12421 | } |
12422 | |
12423 | void SelectionDAGBuilder::visitVectorDeinterleave(const CallInst &I) { |
12424 | auto DL = getCurSDLoc(); |
12425 | SDValue InVec = getValue(V: I.getOperand(i_nocapture: 0)); |
12426 | EVT OutVT = |
12427 | InVec.getValueType().getHalfNumVectorElementsVT(Context&: *DAG.getContext()); |
12428 | |
12429 | unsigned OutNumElts = OutVT.getVectorMinNumElements(); |
12430 | |
12431 | // ISD Node needs the input vectors split into two equal parts |
12432 | SDValue Lo = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: OutVT, N1: InVec, |
12433 | N2: DAG.getVectorIdxConstant(Val: 0, DL)); |
12434 | SDValue Hi = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: OutVT, N1: InVec, |
12435 | N2: DAG.getVectorIdxConstant(Val: OutNumElts, DL)); |
12436 | |
12437 | // Use VECTOR_SHUFFLE for fixed-length vectors to benefit from existing |
12438 | // legalisation and combines. |
12439 | if (OutVT.isFixedLengthVector()) { |
12440 | SDValue Even = DAG.getVectorShuffle(VT: OutVT, dl: DL, N1: Lo, N2: Hi, |
12441 | Mask: createStrideMask(Start: 0, Stride: 2, VF: OutNumElts)); |
12442 | SDValue Odd = DAG.getVectorShuffle(VT: OutVT, dl: DL, N1: Lo, N2: Hi, |
12443 | Mask: createStrideMask(Start: 1, Stride: 2, VF: OutNumElts)); |
12444 | SDValue Res = DAG.getMergeValues(Ops: {Even, Odd}, dl: getCurSDLoc()); |
12445 | setValue(V: &I, NewN: Res); |
12446 | return; |
12447 | } |
12448 | |
12449 | SDValue Res = DAG.getNode(Opcode: ISD::VECTOR_DEINTERLEAVE, DL, |
12450 | VTList: DAG.getVTList(VT1: OutVT, VT2: OutVT), N1: Lo, N2: Hi); |
12451 | setValue(V: &I, NewN: Res); |
12452 | } |
12453 | |
12454 | void SelectionDAGBuilder::visitVectorInterleave(const CallInst &I) { |
12455 | auto DL = getCurSDLoc(); |
12456 | EVT InVT = getValue(V: I.getOperand(i_nocapture: 0)).getValueType(); |
12457 | SDValue InVec0 = getValue(V: I.getOperand(i_nocapture: 0)); |
12458 | SDValue InVec1 = getValue(V: I.getOperand(i_nocapture: 1)); |
12459 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
12460 | EVT OutVT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
12461 | |
12462 | // Use VECTOR_SHUFFLE for fixed-length vectors to benefit from existing |
12463 | // legalisation and combines. |
12464 | if (OutVT.isFixedLengthVector()) { |
12465 | unsigned NumElts = InVT.getVectorMinNumElements(); |
12466 | SDValue V = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: OutVT, N1: InVec0, N2: InVec1); |
12467 | setValue(V: &I, NewN: DAG.getVectorShuffle(VT: OutVT, dl: DL, N1: V, N2: DAG.getUNDEF(VT: OutVT), |
12468 | Mask: createInterleaveMask(VF: NumElts, NumVecs: 2))); |
12469 | return; |
12470 | } |
12471 | |
12472 | SDValue Res = DAG.getNode(Opcode: ISD::VECTOR_INTERLEAVE, DL, |
12473 | VTList: DAG.getVTList(VT1: InVT, VT2: InVT), N1: InVec0, N2: InVec1); |
12474 | Res = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: OutVT, N1: Res.getValue(R: 0), |
12475 | N2: Res.getValue(R: 1)); |
12476 | setValue(V: &I, NewN: Res); |
12477 | } |
12478 | |
12479 | void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) { |
12480 | SmallVector<EVT, 4> ValueVTs; |
12481 | ComputeValueVTs(TLI: DAG.getTargetLoweringInfo(), DL: DAG.getDataLayout(), Ty: I.getType(), |
12482 | ValueVTs); |
12483 | unsigned NumValues = ValueVTs.size(); |
12484 | if (NumValues == 0) return; |
12485 | |
12486 | SmallVector<SDValue, 4> Values(NumValues); |
12487 | SDValue Op = getValue(V: I.getOperand(i_nocapture: 0)); |
12488 | |
12489 | for (unsigned i = 0; i != NumValues; ++i) |
12490 | Values[i] = DAG.getNode(Opcode: ISD::FREEZE, DL: getCurSDLoc(), VT: ValueVTs[i], |
12491 | Operand: SDValue(Op.getNode(), Op.getResNo() + i)); |
12492 | |
12493 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: getCurSDLoc(), |
12494 | VTList: DAG.getVTList(VTs: ValueVTs), Ops: Values)); |
12495 | } |
12496 | |
12497 | void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) { |
12498 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
12499 | EVT VT = TLI.getValueType(DL: DAG.getDataLayout(), Ty: I.getType()); |
12500 | |
12501 | SDLoc DL = getCurSDLoc(); |
12502 | SDValue V1 = getValue(V: I.getOperand(i_nocapture: 0)); |
12503 | SDValue V2 = getValue(V: I.getOperand(i_nocapture: 1)); |
12504 | int64_t Imm = cast<ConstantInt>(Val: I.getOperand(i_nocapture: 2))->getSExtValue(); |
12505 | |
12506 | // VECTOR_SHUFFLE doesn't support a scalable mask so use a dedicated node. |
12507 | if (VT.isScalableVector()) { |
12508 | setValue(V: &I, NewN: DAG.getNode(Opcode: ISD::VECTOR_SPLICE, DL, VT, N1: V1, N2: V2, |
12509 | N3: DAG.getVectorIdxConstant(Val: Imm, DL))); |
12510 | return; |
12511 | } |
12512 | |
12513 | unsigned NumElts = VT.getVectorNumElements(); |
12514 | |
12515 | uint64_t Idx = (NumElts + Imm) % NumElts; |
12516 | |
12517 | // Use VECTOR_SHUFFLE to maintain original behaviour for fixed-length vectors. |
12518 | SmallVector<int, 8> Mask; |
12519 | for (unsigned i = 0; i < NumElts; ++i) |
12520 | Mask.push_back(Elt: Idx + i); |
12521 | setValue(V: &I, NewN: DAG.getVectorShuffle(VT, dl: DL, N1: V1, N2: V2, Mask)); |
12522 | } |
12523 | |
12524 | // Consider the following MIR after SelectionDAG, which produces output in |
12525 | // phyregs in the first case or virtregs in the second case. |
12526 | // |
12527 | // INLINEASM_BR ..., implicit-def $ebx, ..., implicit-def $edx |
12528 | // %5:gr32 = COPY $ebx |
12529 | // %6:gr32 = COPY $edx |
12530 | // %1:gr32 = COPY %6:gr32 |
12531 | // %0:gr32 = COPY %5:gr32 |
12532 | // |
12533 | // INLINEASM_BR ..., def %5:gr32, ..., def %6:gr32 |
12534 | // %1:gr32 = COPY %6:gr32 |
12535 | // %0:gr32 = COPY %5:gr32 |
12536 | // |
12537 | // Given %0, we'd like to return $ebx in the first case and %5 in the second. |
12538 | // Given %1, we'd like to return $edx in the first case and %6 in the second. |
12539 | // |
12540 | // If a callbr has outputs, it will have a single mapping in FuncInfo.ValueMap |
12541 | // to a single virtreg (such as %0). The remaining outputs monotonically |
12542 | // increase in virtreg number from there. If a callbr has no outputs, then it |
12543 | // should not have a corresponding callbr landingpad; in fact, the callbr |
12544 | // landingpad would not even be able to refer to such a callbr. |
12545 | static Register FollowCopyChain(MachineRegisterInfo &MRI, Register Reg) { |
12546 | MachineInstr *MI = MRI.def_begin(RegNo: Reg)->getParent(); |
12547 | // There is definitely at least one copy. |
12548 | assert(MI->getOpcode() == TargetOpcode::COPY && |
12549 | "start of copy chain MUST be COPY" ); |
12550 | Reg = MI->getOperand(i: 1).getReg(); |
12551 | MI = MRI.def_begin(RegNo: Reg)->getParent(); |
12552 | // There may be an optional second copy. |
12553 | if (MI->getOpcode() == TargetOpcode::COPY) { |
12554 | assert(Reg.isVirtual() && "expected COPY of virtual register" ); |
12555 | Reg = MI->getOperand(i: 1).getReg(); |
12556 | assert(Reg.isPhysical() && "expected COPY of physical register" ); |
12557 | MI = MRI.def_begin(RegNo: Reg)->getParent(); |
12558 | } |
12559 | // The start of the chain must be an INLINEASM_BR. |
12560 | assert(MI->getOpcode() == TargetOpcode::INLINEASM_BR && |
12561 | "end of copy chain MUST be INLINEASM_BR" ); |
12562 | return Reg; |
12563 | } |
12564 | |
12565 | // We must do this walk rather than the simpler |
12566 | // setValue(&I, getCopyFromRegs(CBR, CBR->getType())); |
12567 | // otherwise we will end up with copies of virtregs only valid along direct |
12568 | // edges. |
12569 | void SelectionDAGBuilder::visitCallBrLandingPad(const CallInst &I) { |
12570 | SmallVector<EVT, 8> ResultVTs; |
12571 | SmallVector<SDValue, 8> ResultValues; |
12572 | const auto *CBR = |
12573 | cast<CallBrInst>(Val: I.getParent()->getUniquePredecessor()->getTerminator()); |
12574 | |
12575 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
12576 | const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); |
12577 | MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); |
12578 | |
12579 | unsigned InitialDef = FuncInfo.ValueMap[CBR]; |
12580 | SDValue Chain = DAG.getRoot(); |
12581 | |
12582 | // Re-parse the asm constraints string. |
12583 | TargetLowering::AsmOperandInfoVector TargetConstraints = |
12584 | TLI.ParseConstraints(DL: DAG.getDataLayout(), TRI, Call: *CBR); |
12585 | for (auto &T : TargetConstraints) { |
12586 | SDISelAsmOperandInfo OpInfo(T); |
12587 | if (OpInfo.Type != InlineAsm::isOutput) |
12588 | continue; |
12589 | |
12590 | // Pencil in OpInfo.ConstraintType and OpInfo.ConstraintVT based on the |
12591 | // individual constraint. |
12592 | TLI.ComputeConstraintToUse(OpInfo, Op: OpInfo.CallOperand, DAG: &DAG); |
12593 | |
12594 | switch (OpInfo.ConstraintType) { |
12595 | case TargetLowering::C_Register: |
12596 | case TargetLowering::C_RegisterClass: { |
12597 | // Fill in OpInfo.AssignedRegs.Regs. |
12598 | getRegistersForValue(DAG, DL: getCurSDLoc(), OpInfo, RefOpInfo&: OpInfo); |
12599 | |
12600 | // getRegistersForValue may produce 1 to many registers based on whether |
12601 | // the OpInfo.ConstraintVT is legal on the target or not. |
12602 | for (unsigned &Reg : OpInfo.AssignedRegs.Regs) { |
12603 | Register OriginalDef = FollowCopyChain(MRI, Reg: InitialDef++); |
12604 | if (Register::isPhysicalRegister(Reg: OriginalDef)) |
12605 | FuncInfo.MBB->addLiveIn(PhysReg: OriginalDef); |
12606 | // Update the assigned registers to use the original defs. |
12607 | Reg = OriginalDef; |
12608 | } |
12609 | |
12610 | SDValue V = OpInfo.AssignedRegs.getCopyFromRegs( |
12611 | DAG, FuncInfo, dl: getCurSDLoc(), Chain, Glue: nullptr, V: CBR); |
12612 | ResultValues.push_back(Elt: V); |
12613 | ResultVTs.push_back(Elt: OpInfo.ConstraintVT); |
12614 | break; |
12615 | } |
12616 | case TargetLowering::C_Other: { |
12617 | SDValue Flag; |
12618 | SDValue V = TLI.LowerAsmOutputForConstraint(Chain, Glue&: Flag, DL: getCurSDLoc(), |
12619 | OpInfo, DAG); |
12620 | ++InitialDef; |
12621 | ResultValues.push_back(Elt: V); |
12622 | ResultVTs.push_back(Elt: OpInfo.ConstraintVT); |
12623 | break; |
12624 | } |
12625 | default: |
12626 | break; |
12627 | } |
12628 | } |
12629 | SDValue V = DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: getCurSDLoc(), |
12630 | VTList: DAG.getVTList(VTs: ResultVTs), Ops: ResultValues); |
12631 | setValue(V: &I, NewN: V); |
12632 | } |
12633 | |