1 | //===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file describes how to lower LLVM code to machine code. This has two |
11 | /// main components: |
12 | /// |
13 | /// 1. Which ValueTypes are natively supported by the target. |
14 | /// 2. Which operations are supported for supported ValueTypes. |
15 | /// 3. Cost thresholds for alternative implementations of certain operations. |
16 | /// |
17 | /// In addition it has a few other components, like information about FP |
18 | /// immediates. |
19 | /// |
20 | //===----------------------------------------------------------------------===// |
21 | |
22 | #ifndef LLVM_CODEGEN_TARGETLOWERING_H |
23 | #define LLVM_CODEGEN_TARGETLOWERING_H |
24 | |
25 | #include "llvm/ADT/APInt.h" |
26 | #include "llvm/ADT/ArrayRef.h" |
27 | #include "llvm/ADT/DenseMap.h" |
28 | #include "llvm/ADT/SmallVector.h" |
29 | #include "llvm/ADT/StringRef.h" |
30 | #include "llvm/CodeGen/DAGCombine.h" |
31 | #include "llvm/CodeGen/ISDOpcodes.h" |
32 | #include "llvm/CodeGen/LowLevelTypeUtils.h" |
33 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
34 | #include "llvm/CodeGen/RuntimeLibcallUtil.h" |
35 | #include "llvm/CodeGen/SelectionDAG.h" |
36 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
37 | #include "llvm/CodeGen/TargetCallingConv.h" |
38 | #include "llvm/CodeGen/ValueTypes.h" |
39 | #include "llvm/CodeGenTypes/MachineValueType.h" |
40 | #include "llvm/IR/Attributes.h" |
41 | #include "llvm/IR/CallingConv.h" |
42 | #include "llvm/IR/DataLayout.h" |
43 | #include "llvm/IR/DerivedTypes.h" |
44 | #include "llvm/IR/Function.h" |
45 | #include "llvm/IR/InlineAsm.h" |
46 | #include "llvm/IR/Instruction.h" |
47 | #include "llvm/IR/Instructions.h" |
48 | #include "llvm/IR/RuntimeLibcalls.h" |
49 | #include "llvm/IR/Type.h" |
50 | #include "llvm/Support/Alignment.h" |
51 | #include "llvm/Support/AtomicOrdering.h" |
52 | #include "llvm/Support/Casting.h" |
53 | #include "llvm/Support/ErrorHandling.h" |
54 | #include <algorithm> |
55 | #include <cassert> |
56 | #include <climits> |
57 | #include <cstdint> |
58 | #include <iterator> |
59 | #include <map> |
60 | #include <string> |
61 | #include <utility> |
62 | #include <vector> |
63 | |
64 | namespace llvm { |
65 | |
66 | class AssumptionCache; |
67 | class CCState; |
68 | class CCValAssign; |
69 | enum class ComplexDeinterleavingOperation; |
70 | enum class ComplexDeinterleavingRotation; |
71 | class Constant; |
72 | class FastISel; |
73 | class FunctionLoweringInfo; |
74 | class GlobalValue; |
75 | class Loop; |
76 | class GISelKnownBits; |
77 | class IntrinsicInst; |
78 | class IRBuilderBase; |
79 | struct KnownBits; |
80 | class LLVMContext; |
81 | class MachineBasicBlock; |
82 | class MachineFunction; |
83 | class MachineInstr; |
84 | class MachineJumpTableInfo; |
85 | class MachineLoop; |
86 | class MachineRegisterInfo; |
87 | class MCContext; |
88 | class MCExpr; |
89 | class Module; |
90 | class ProfileSummaryInfo; |
91 | class TargetLibraryInfo; |
92 | class TargetMachine; |
93 | class TargetRegisterClass; |
94 | class TargetRegisterInfo; |
95 | class TargetTransformInfo; |
96 | class Value; |
97 | |
98 | namespace Sched { |
99 | |
100 | enum Preference : uint8_t { |
101 | None, // No preference |
102 | Source, // Follow source order. |
103 | RegPressure, // Scheduling for lowest register pressure. |
104 | Hybrid, // Scheduling for both latency and register pressure. |
105 | ILP, // Scheduling for ILP in low register pressure mode. |
106 | VLIW, // Scheduling for VLIW targets. |
107 | Fast, // Fast suboptimal list scheduling |
108 | Linearize, // Linearize DAG, no scheduling |
109 | Last = Linearize // Marker for the last Sched::Preference |
110 | }; |
111 | |
112 | } // end namespace Sched |
113 | |
114 | // MemOp models a memory operation, either memset or memcpy/memmove. |
115 | struct MemOp { |
116 | private: |
117 | // Shared |
118 | uint64_t Size; |
119 | bool DstAlignCanChange; // true if destination alignment can satisfy any |
120 | // constraint. |
121 | Align DstAlign; // Specified alignment of the memory operation. |
122 | |
123 | bool AllowOverlap; |
124 | // memset only |
125 | bool IsMemset; // If setthis memory operation is a memset. |
126 | bool ZeroMemset; // If set clears out memory with zeros. |
127 | // memcpy only |
128 | bool MemcpyStrSrc; // Indicates whether the memcpy source is an in-register |
129 | // constant so it does not need to be loaded. |
130 | Align SrcAlign; // Inferred alignment of the source or default value if the |
131 | // memory operation does not need to load the value. |
132 | public: |
133 | static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, |
134 | Align SrcAlign, bool IsVolatile, |
135 | bool MemcpyStrSrc = false) { |
136 | MemOp Op; |
137 | Op.Size = Size; |
138 | Op.DstAlignCanChange = DstAlignCanChange; |
139 | Op.DstAlign = DstAlign; |
140 | Op.AllowOverlap = !IsVolatile; |
141 | Op.IsMemset = false; |
142 | Op.ZeroMemset = false; |
143 | Op.MemcpyStrSrc = MemcpyStrSrc; |
144 | Op.SrcAlign = SrcAlign; |
145 | return Op; |
146 | } |
147 | |
148 | static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, |
149 | bool IsZeroMemset, bool IsVolatile) { |
150 | MemOp Op; |
151 | Op.Size = Size; |
152 | Op.DstAlignCanChange = DstAlignCanChange; |
153 | Op.DstAlign = DstAlign; |
154 | Op.AllowOverlap = !IsVolatile; |
155 | Op.IsMemset = true; |
156 | Op.ZeroMemset = IsZeroMemset; |
157 | Op.MemcpyStrSrc = false; |
158 | return Op; |
159 | } |
160 | |
161 | uint64_t size() const { return Size; } |
162 | Align getDstAlign() const { |
163 | assert(!DstAlignCanChange); |
164 | return DstAlign; |
165 | } |
166 | bool isFixedDstAlign() const { return !DstAlignCanChange; } |
167 | bool allowOverlap() const { return AllowOverlap; } |
168 | bool isMemset() const { return IsMemset; } |
169 | bool isMemcpy() const { return !IsMemset; } |
170 | bool isMemcpyWithFixedDstAlign() const { |
171 | return isMemcpy() && !DstAlignCanChange; |
172 | } |
173 | bool isZeroMemset() const { return isMemset() && ZeroMemset; } |
174 | bool isMemcpyStrSrc() const { |
175 | assert(isMemcpy() && "Must be a memcpy" ); |
176 | return MemcpyStrSrc; |
177 | } |
178 | Align getSrcAlign() const { |
179 | assert(isMemcpy() && "Must be a memcpy" ); |
180 | return SrcAlign; |
181 | } |
182 | bool isSrcAligned(Align AlignCheck) const { |
183 | return isMemset() || llvm::isAligned(Lhs: AlignCheck, SizeInBytes: SrcAlign.value()); |
184 | } |
185 | bool isDstAligned(Align AlignCheck) const { |
186 | return DstAlignCanChange || llvm::isAligned(Lhs: AlignCheck, SizeInBytes: DstAlign.value()); |
187 | } |
188 | bool isAligned(Align AlignCheck) const { |
189 | return isSrcAligned(AlignCheck) && isDstAligned(AlignCheck); |
190 | } |
191 | }; |
192 | |
193 | /// This base class for TargetLowering contains the SelectionDAG-independent |
194 | /// parts that can be used from the rest of CodeGen. |
195 | class TargetLoweringBase { |
196 | public: |
197 | /// This enum indicates whether operations are valid for a target, and if not, |
198 | /// what action should be used to make them valid. |
199 | enum LegalizeAction : uint8_t { |
200 | Legal, // The target natively supports this operation. |
201 | Promote, // This operation should be executed in a larger type. |
202 | Expand, // Try to expand this to other ops, otherwise use a libcall. |
203 | LibCall, // Don't try to expand this to other ops, always use a libcall. |
204 | Custom // Use the LowerOperation hook to implement custom lowering. |
205 | }; |
206 | |
207 | /// This enum indicates whether a types are legal for a target, and if not, |
208 | /// what action should be used to make them valid. |
209 | enum LegalizeTypeAction : uint8_t { |
210 | TypeLegal, // The target natively supports this type. |
211 | TypePromoteInteger, // Replace this integer with a larger one. |
212 | TypeExpandInteger, // Split this integer into two of half the size. |
213 | TypeSoftenFloat, // Convert this float to a same size integer type. |
214 | TypeExpandFloat, // Split this float into two of half the size. |
215 | TypeScalarizeVector, // Replace this one-element vector with its element. |
216 | TypeSplitVector, // Split this vector into two of half the size. |
217 | TypeWidenVector, // This vector should be widened into a larger vector. |
218 | TypePromoteFloat, // Replace this float with a larger one. |
219 | TypeSoftPromoteHalf, // Soften half to i16 and use float to do arithmetic. |
220 | TypeScalarizeScalableVector, // This action is explicitly left unimplemented. |
221 | // While it is theoretically possible to |
222 | // legalize operations on scalable types with a |
223 | // loop that handles the vscale * #lanes of the |
224 | // vector, this is non-trivial at SelectionDAG |
225 | // level and these types are better to be |
226 | // widened or promoted. |
227 | }; |
228 | |
229 | /// LegalizeKind holds the legalization kind that needs to happen to EVT |
230 | /// in order to type-legalize it. |
231 | using LegalizeKind = std::pair<LegalizeTypeAction, EVT>; |
232 | |
233 | /// Enum that describes how the target represents true/false values. |
234 | enum BooleanContent { |
235 | UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage. |
236 | ZeroOrOneBooleanContent, // All bits zero except for bit 0. |
237 | ZeroOrNegativeOneBooleanContent // All bits equal to bit 0. |
238 | }; |
239 | |
240 | /// Enum that describes what type of support for selects the target has. |
241 | enum SelectSupportKind { |
242 | ScalarValSelect, // The target supports scalar selects (ex: cmov). |
243 | ScalarCondVectorVal, // The target supports selects with a scalar condition |
244 | // and vector values (ex: cmov). |
245 | VectorMaskSelect // The target supports vector selects with a vector |
246 | // mask (ex: x86 blends). |
247 | }; |
248 | |
249 | /// Enum that specifies what an atomic load/AtomicRMWInst is expanded |
250 | /// to, if at all. Exists because different targets have different levels of |
251 | /// support for these atomic instructions, and also have different options |
252 | /// w.r.t. what they should expand to. |
253 | enum class AtomicExpansionKind { |
254 | None, // Don't expand the instruction. |
255 | CastToInteger, // Cast the atomic instruction to another type, e.g. from |
256 | // floating-point to integer type. |
257 | LLSC, // Expand the instruction into loadlinked/storeconditional; used |
258 | // by ARM/AArch64. |
259 | LLOnly, // Expand the (load) instruction into just a load-linked, which has |
260 | // greater atomic guarantees than a normal load. |
261 | CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. |
262 | MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop. |
263 | BitTestIntrinsic, // Use a target-specific intrinsic for special bit |
264 | // operations; used by X86. |
265 | CmpArithIntrinsic,// Use a target-specific intrinsic for special compare |
266 | // operations; used by X86. |
267 | Expand, // Generic expansion in terms of other atomic operations. |
268 | |
269 | // Rewrite to a non-atomic form for use in a known non-preemptible |
270 | // environment. |
271 | NotAtomic |
272 | }; |
273 | |
274 | /// Enum that specifies when a multiplication should be expanded. |
275 | enum class MulExpansionKind { |
276 | Always, // Always expand the instruction. |
277 | OnlyLegalOrCustom, // Only expand when the resulting instructions are legal |
278 | // or custom. |
279 | }; |
280 | |
281 | /// Enum that specifies when a float negation is beneficial. |
282 | enum class NegatibleCost { |
283 | Cheaper = 0, // Negated expression is cheaper. |
284 | Neutral = 1, // Negated expression has the same cost. |
285 | Expensive = 2 // Negated expression is more expensive. |
286 | }; |
287 | |
288 | /// Enum of different potentially desirable ways to fold (and/or (setcc ...), |
289 | /// (setcc ...)). |
290 | enum AndOrSETCCFoldKind : uint8_t { |
291 | None = 0, // No fold is preferable. |
292 | AddAnd = 1, // Fold with `Add` op and `And` op is preferable. |
293 | NotAnd = 2, // Fold with `Not` op and `And` op is preferable. |
294 | ABS = 4, // Fold with `llvm.abs` op is preferable. |
295 | }; |
296 | |
297 | class ArgListEntry { |
298 | public: |
299 | Value *Val = nullptr; |
300 | SDValue Node = SDValue(); |
301 | Type *Ty = nullptr; |
302 | bool IsSExt : 1; |
303 | bool IsZExt : 1; |
304 | bool IsInReg : 1; |
305 | bool IsSRet : 1; |
306 | bool IsNest : 1; |
307 | bool IsByVal : 1; |
308 | bool IsByRef : 1; |
309 | bool IsInAlloca : 1; |
310 | bool IsPreallocated : 1; |
311 | bool IsReturned : 1; |
312 | bool IsSwiftSelf : 1; |
313 | bool IsSwiftAsync : 1; |
314 | bool IsSwiftError : 1; |
315 | bool IsCFGuardTarget : 1; |
316 | MaybeAlign Alignment = std::nullopt; |
317 | Type *IndirectType = nullptr; |
318 | |
319 | ArgListEntry() |
320 | : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false), |
321 | IsNest(false), IsByVal(false), IsByRef(false), IsInAlloca(false), |
322 | IsPreallocated(false), IsReturned(false), IsSwiftSelf(false), |
323 | IsSwiftAsync(false), IsSwiftError(false), IsCFGuardTarget(false) {} |
324 | |
325 | void setAttributes(const CallBase *Call, unsigned ArgIdx); |
326 | }; |
327 | using ArgListTy = std::vector<ArgListEntry>; |
328 | |
329 | virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC, |
330 | ArgListTy &Args) const {}; |
331 | |
332 | static ISD::NodeType getExtendForContent(BooleanContent Content) { |
333 | switch (Content) { |
334 | case UndefinedBooleanContent: |
335 | // Extend by adding rubbish bits. |
336 | return ISD::ANY_EXTEND; |
337 | case ZeroOrOneBooleanContent: |
338 | // Extend by adding zero bits. |
339 | return ISD::ZERO_EXTEND; |
340 | case ZeroOrNegativeOneBooleanContent: |
341 | // Extend by copying the sign bit. |
342 | return ISD::SIGN_EXTEND; |
343 | } |
344 | llvm_unreachable("Invalid content kind" ); |
345 | } |
346 | |
347 | explicit TargetLoweringBase(const TargetMachine &TM); |
348 | TargetLoweringBase(const TargetLoweringBase &) = delete; |
349 | TargetLoweringBase &operator=(const TargetLoweringBase &) = delete; |
350 | virtual ~TargetLoweringBase() = default; |
351 | |
352 | /// Return true if the target support strict float operation |
353 | bool isStrictFPEnabled() const { |
354 | return IsStrictFPEnabled; |
355 | } |
356 | |
357 | protected: |
358 | /// Initialize all of the actions to default values. |
359 | void initActions(); |
360 | |
361 | public: |
362 | const TargetMachine &getTargetMachine() const { return TM; } |
363 | |
364 | virtual bool useSoftFloat() const { return false; } |
365 | |
366 | /// Return the pointer type for the given address space, defaults to |
367 | /// the pointer type from the data layout. |
368 | /// FIXME: The default needs to be removed once all the code is updated. |
369 | virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const { |
370 | return MVT::getIntegerVT(BitWidth: DL.getPointerSizeInBits(AS)); |
371 | } |
372 | |
373 | /// Return the in-memory pointer type for the given address space, defaults to |
374 | /// the pointer type from the data layout. |
375 | /// FIXME: The default needs to be removed once all the code is updated. |
376 | virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const { |
377 | return MVT::getIntegerVT(BitWidth: DL.getPointerSizeInBits(AS)); |
378 | } |
379 | |
380 | /// Return the type for frame index, which is determined by |
381 | /// the alloca address space specified through the data layout. |
382 | MVT getFrameIndexTy(const DataLayout &DL) const { |
383 | return getPointerTy(DL, AS: DL.getAllocaAddrSpace()); |
384 | } |
385 | |
386 | /// Return the type for code pointers, which is determined by the program |
387 | /// address space specified through the data layout. |
388 | MVT getProgramPointerTy(const DataLayout &DL) const { |
389 | return getPointerTy(DL, AS: DL.getProgramAddressSpace()); |
390 | } |
391 | |
392 | /// Return the type for operands of fence. |
393 | /// TODO: Let fence operands be of i32 type and remove this. |
394 | virtual MVT getFenceOperandTy(const DataLayout &DL) const { |
395 | return getPointerTy(DL); |
396 | } |
397 | |
398 | /// Return the type to use for a scalar shift opcode, given the shifted amount |
399 | /// type. Targets should return a legal type if the input type is legal. |
400 | /// Targets can return a type that is too small if the input type is illegal. |
401 | virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const; |
402 | |
403 | /// Returns the type for the shift amount of a shift opcode. For vectors, |
404 | /// returns the input type. For scalars, calls getScalarShiftAmountTy. |
405 | /// If getScalarShiftAmountTy type cannot represent all possible shift |
406 | /// amounts, returns MVT::i32. |
407 | EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const; |
408 | |
409 | /// Return the preferred type to use for a shift opcode, given the shifted |
410 | /// amount type is \p ShiftValueTy. |
411 | LLVM_READONLY |
412 | virtual LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const { |
413 | return ShiftValueTy; |
414 | } |
415 | |
416 | /// Returns the type to be used for the index operand of: |
417 | /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, |
418 | /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR |
419 | virtual MVT getVectorIdxTy(const DataLayout &DL) const { |
420 | return getPointerTy(DL); |
421 | } |
422 | |
423 | /// Returns the type to be used for the EVL/AVL operand of VP nodes: |
424 | /// ISD::VP_ADD, ISD::VP_SUB, etc. It must be a legal scalar integer type, |
425 | /// and must be at least as large as i32. The EVL is implicitly zero-extended |
426 | /// to any larger type. |
427 | virtual MVT getVPExplicitVectorLengthTy() const { return MVT::i32; } |
428 | |
429 | /// This callback is used to inspect load/store instructions and add |
430 | /// target-specific MachineMemOperand flags to them. The default |
431 | /// implementation does nothing. |
432 | virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const { |
433 | return MachineMemOperand::MONone; |
434 | } |
435 | |
436 | /// This callback is used to inspect load/store SDNode. |
437 | /// The default implementation does nothing. |
438 | virtual MachineMemOperand::Flags |
439 | getTargetMMOFlags(const MemSDNode &Node) const { |
440 | return MachineMemOperand::MONone; |
441 | } |
442 | |
443 | MachineMemOperand::Flags |
444 | getLoadMemOperandFlags(const LoadInst &LI, const DataLayout &DL, |
445 | AssumptionCache *AC = nullptr, |
446 | const TargetLibraryInfo *LibInfo = nullptr) const; |
447 | MachineMemOperand::Flags getStoreMemOperandFlags(const StoreInst &SI, |
448 | const DataLayout &DL) const; |
449 | MachineMemOperand::Flags getAtomicMemOperandFlags(const Instruction &AI, |
450 | const DataLayout &DL) const; |
451 | |
452 | virtual bool isSelectSupported(SelectSupportKind /*kind*/) const { |
453 | return true; |
454 | } |
455 | |
456 | /// Return true if the @llvm.get.active.lane.mask intrinsic should be expanded |
457 | /// using generic code in SelectionDAGBuilder. |
458 | virtual bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const { |
459 | return true; |
460 | } |
461 | |
462 | virtual bool shouldExpandGetVectorLength(EVT CountVT, unsigned VF, |
463 | bool IsScalable) const { |
464 | return true; |
465 | } |
466 | |
467 | /// Return true if the @llvm.experimental.cttz.elts intrinsic should be |
468 | /// expanded using generic code in SelectionDAGBuilder. |
469 | virtual bool shouldExpandCttzElements(EVT VT) const { return true; } |
470 | |
471 | /// Return the minimum number of bits required to hold the maximum possible |
472 | /// number of trailing zero vector elements. |
473 | unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC, |
474 | bool ZeroIsPoison, |
475 | const ConstantRange *VScaleRange) const; |
476 | |
477 | // Return true if op(vecreduce(x), vecreduce(y)) should be reassociated to |
478 | // vecreduce(op(x, y)) for the reduction opcode RedOpc. |
479 | virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const { |
480 | return true; |
481 | } |
482 | |
483 | /// Return true if it is profitable to convert a select of FP constants into |
484 | /// a constant pool load whose address depends on the select condition. The |
485 | /// parameter may be used to differentiate a select with FP compare from |
486 | /// integer compare. |
487 | virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const { |
488 | return true; |
489 | } |
490 | |
491 | /// Return true if multiple condition registers are available. |
492 | bool hasMultipleConditionRegisters() const { |
493 | return HasMultipleConditionRegisters; |
494 | } |
495 | |
496 | /// Return true if the target has BitExtract instructions. |
497 | bool () const { return HasExtractBitsInsn; } |
498 | |
499 | /// Return the preferred vector type legalization action. |
500 | virtual TargetLoweringBase::LegalizeTypeAction |
501 | getPreferredVectorAction(MVT VT) const { |
502 | // The default action for one element vectors is to scalarize |
503 | if (VT.getVectorElementCount().isScalar()) |
504 | return TypeScalarizeVector; |
505 | // The default action for an odd-width vector is to widen. |
506 | if (!VT.isPow2VectorType()) |
507 | return TypeWidenVector; |
508 | // The default action for other vectors is to promote |
509 | return TypePromoteInteger; |
510 | } |
511 | |
512 | // Return true if the half type should be promoted using soft promotion rules |
513 | // where each operation is promoted to f32 individually, then converted to |
514 | // fp16. The default behavior is to promote chains of operations, keeping |
515 | // intermediate results in f32 precision and range. |
516 | virtual bool softPromoteHalfType() const { return false; } |
517 | |
518 | // Return true if, for soft-promoted half, the half type should be passed |
519 | // passed to and returned from functions as f32. The default behavior is to |
520 | // pass as i16. If soft-promoted half is not used, this function is ignored |
521 | // and values are always passed and returned as f32. |
522 | virtual bool useFPRegsForHalfType() const { return false; } |
523 | |
524 | // There are two general methods for expanding a BUILD_VECTOR node: |
525 | // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle |
526 | // them together. |
527 | // 2. Build the vector on the stack and then load it. |
528 | // If this function returns true, then method (1) will be used, subject to |
529 | // the constraint that all of the necessary shuffles are legal (as determined |
530 | // by isShuffleMaskLegal). If this function returns false, then method (2) is |
531 | // always used. The vector type, and the number of defined values, are |
532 | // provided. |
533 | virtual bool |
534 | shouldExpandBuildVectorWithShuffles(EVT /* VT */, |
535 | unsigned DefinedValues) const { |
536 | return DefinedValues < 3; |
537 | } |
538 | |
539 | /// Return true if integer divide is usually cheaper than a sequence of |
540 | /// several shifts, adds, and multiplies for this target. |
541 | /// The definition of "cheaper" may depend on whether we're optimizing |
542 | /// for speed or for size. |
543 | virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; } |
544 | |
545 | /// Return true if the target can handle a standalone remainder operation. |
546 | virtual bool hasStandaloneRem(EVT VT) const { |
547 | return true; |
548 | } |
549 | |
550 | /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X). |
551 | virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const { |
552 | // Default behavior is to replace SQRT(X) with X*RSQRT(X). |
553 | return false; |
554 | } |
555 | |
556 | /// Reciprocal estimate status values used by the functions below. |
557 | enum ReciprocalEstimate : int { |
558 | Unspecified = -1, |
559 | Disabled = 0, |
560 | Enabled = 1 |
561 | }; |
562 | |
563 | /// Return a ReciprocalEstimate enum value for a square root of the given type |
564 | /// based on the function's attributes. If the operation is not overridden by |
565 | /// the function's attributes, "Unspecified" is returned and target defaults |
566 | /// are expected to be used for instruction selection. |
567 | int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const; |
568 | |
569 | /// Return a ReciprocalEstimate enum value for a division of the given type |
570 | /// based on the function's attributes. If the operation is not overridden by |
571 | /// the function's attributes, "Unspecified" is returned and target defaults |
572 | /// are expected to be used for instruction selection. |
573 | int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const; |
574 | |
575 | /// Return the refinement step count for a square root of the given type based |
576 | /// on the function's attributes. If the operation is not overridden by |
577 | /// the function's attributes, "Unspecified" is returned and target defaults |
578 | /// are expected to be used for instruction selection. |
579 | int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const; |
580 | |
581 | /// Return the refinement step count for a division of the given type based |
582 | /// on the function's attributes. If the operation is not overridden by |
583 | /// the function's attributes, "Unspecified" is returned and target defaults |
584 | /// are expected to be used for instruction selection. |
585 | int getDivRefinementSteps(EVT VT, MachineFunction &MF) const; |
586 | |
587 | /// Returns true if target has indicated at least one type should be bypassed. |
588 | bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); } |
589 | |
590 | /// Returns map of slow types for division or remainder with corresponding |
591 | /// fast types |
592 | const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const { |
593 | return BypassSlowDivWidths; |
594 | } |
595 | |
596 | /// Return true only if vscale must be a power of two. |
597 | virtual bool isVScaleKnownToBeAPowerOfTwo() const { return false; } |
598 | |
599 | /// Return true if Flow Control is an expensive operation that should be |
600 | /// avoided. |
601 | bool isJumpExpensive() const { return JumpIsExpensive; } |
602 | |
603 | // Costs parameters used by |
604 | // SelectionDAGBuilder::shouldKeepJumpConditionsTogether. |
605 | // shouldKeepJumpConditionsTogether will use these parameter value to |
606 | // determine if two conditions in the form `br (and/or cond1, cond2)` should |
607 | // be split into two branches or left as one. |
608 | // |
609 | // BaseCost is the cost threshold (in latency). If the estimated latency of |
610 | // computing both `cond1` and `cond2` is below the cost of just computing |
611 | // `cond1` + BaseCost, the two conditions will be kept together. Otherwise |
612 | // they will be split. |
613 | // |
614 | // LikelyBias increases BaseCost if branch probability info indicates that it |
615 | // is likely that both `cond1` and `cond2` will be computed. |
616 | // |
617 | // UnlikelyBias decreases BaseCost if branch probability info indicates that |
618 | // it is likely that both `cond1` and `cond2` will be computed. |
619 | // |
620 | // Set any field to -1 to make it ignored (setting BaseCost to -1 results in |
621 | // `shouldKeepJumpConditionsTogether` always returning false). |
622 | struct CondMergingParams { |
623 | int BaseCost; |
624 | int LikelyBias; |
625 | int UnlikelyBias; |
626 | }; |
627 | // Return params for deciding if we should keep two branch conditions merged |
628 | // or split them into two separate branches. |
629 | // Arg0: The binary op joining the two conditions (and/or). |
630 | // Arg1: The first condition (cond1) |
631 | // Arg2: The second condition (cond2) |
632 | virtual CondMergingParams |
633 | getJumpConditionMergingParams(Instruction::BinaryOps, const Value *, |
634 | const Value *) const { |
635 | // -1 will always result in splitting. |
636 | return {.BaseCost: -1, .LikelyBias: -1, .UnlikelyBias: -1}; |
637 | } |
638 | |
639 | /// Return true if selects are only cheaper than branches if the branch is |
640 | /// unlikely to be predicted right. |
641 | bool isPredictableSelectExpensive() const { |
642 | return PredictableSelectIsExpensive; |
643 | } |
644 | |
645 | virtual bool fallBackToDAGISel(const Instruction &Inst) const { |
646 | return false; |
647 | } |
648 | |
649 | /// Return true if the following transform is beneficial: |
650 | /// fold (conv (load x)) -> (load (conv*)x) |
651 | /// On architectures that don't natively support some vector loads |
652 | /// efficiently, casting the load to a smaller vector of larger types and |
653 | /// loading is more efficient, however, this can be undone by optimizations in |
654 | /// dag combiner. |
655 | virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, |
656 | const SelectionDAG &DAG, |
657 | const MachineMemOperand &MMO) const; |
658 | |
659 | /// Return true if the following transform is beneficial: |
660 | /// (store (y (conv x)), y*)) -> (store x, (x*)) |
661 | virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, |
662 | const SelectionDAG &DAG, |
663 | const MachineMemOperand &MMO) const { |
664 | // Default to the same logic as loads. |
665 | return isLoadBitCastBeneficial(LoadVT: StoreVT, BitcastVT, DAG, MMO); |
666 | } |
667 | |
668 | /// Return true if it is expected to be cheaper to do a store of vector |
669 | /// constant with the given size and type for the address space than to |
670 | /// store the individual scalar element constants. |
671 | virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, |
672 | unsigned NumElem, |
673 | unsigned AddrSpace) const { |
674 | return IsZero; |
675 | } |
676 | |
677 | /// Allow store merging for the specified type after legalization in addition |
678 | /// to before legalization. This may transform stores that do not exist |
679 | /// earlier (for example, stores created from intrinsics). |
680 | virtual bool mergeStoresAfterLegalization(EVT MemVT) const { |
681 | return true; |
682 | } |
683 | |
684 | /// Returns if it's reasonable to merge stores to MemVT size. |
685 | virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, |
686 | const MachineFunction &MF) const { |
687 | return true; |
688 | } |
689 | |
690 | /// Return true if it is cheap to speculate a call to intrinsic cttz. |
691 | virtual bool isCheapToSpeculateCttz(Type *Ty) const { |
692 | return false; |
693 | } |
694 | |
695 | /// Return true if it is cheap to speculate a call to intrinsic ctlz. |
696 | virtual bool isCheapToSpeculateCtlz(Type *Ty) const { |
697 | return false; |
698 | } |
699 | |
700 | /// Return true if ctlz instruction is fast. |
701 | virtual bool isCtlzFast() const { |
702 | return false; |
703 | } |
704 | |
705 | /// Return true if ctpop instruction is fast. |
706 | virtual bool isCtpopFast(EVT VT) const { |
707 | return isOperationLegal(Op: ISD::CTPOP, VT); |
708 | } |
709 | |
710 | /// Return the maximum number of "x & (x - 1)" operations that can be done |
711 | /// instead of deferring to a custom CTPOP. |
712 | virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const { |
713 | return 1; |
714 | } |
715 | |
716 | /// Return true if instruction generated for equality comparison is folded |
717 | /// with instruction generated for signed comparison. |
718 | virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; } |
719 | |
720 | /// Return true if the heuristic to prefer icmp eq zero should be used in code |
721 | /// gen prepare. |
722 | virtual bool preferZeroCompareBranch() const { return false; } |
723 | |
724 | /// Return true if it is cheaper to split the store of a merged int val |
725 | /// from a pair of smaller values into multiple stores. |
726 | virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const { |
727 | return false; |
728 | } |
729 | |
730 | /// Return if the target supports combining a |
731 | /// chain like: |
732 | /// \code |
733 | /// %andResult = and %val1, #mask |
734 | /// %icmpResult = icmp %andResult, 0 |
735 | /// \endcode |
736 | /// into a single machine instruction of a form like: |
737 | /// \code |
738 | /// cc = test %register, #mask |
739 | /// \endcode |
740 | virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { |
741 | return false; |
742 | } |
743 | |
744 | /// Return true if it is valid to merge the TargetMMOFlags in two SDNodes. |
745 | virtual bool |
746 | areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, |
747 | const MemSDNode &NodeY) const { |
748 | return true; |
749 | } |
750 | |
751 | /// Use bitwise logic to make pairs of compares more efficient. For example: |
752 | /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 |
753 | /// This should be true when it takes more than one instruction to lower |
754 | /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on |
755 | /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win. |
756 | virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const { |
757 | return false; |
758 | } |
759 | |
760 | /// Return the preferred operand type if the target has a quick way to compare |
761 | /// integer values of the given size. Assume that any legal integer type can |
762 | /// be compared efficiently. Targets may override this to allow illegal wide |
763 | /// types to return a vector type if there is support to compare that type. |
764 | virtual MVT hasFastEqualityCompare(unsigned NumBits) const { |
765 | MVT VT = MVT::getIntegerVT(BitWidth: NumBits); |
766 | return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE; |
767 | } |
768 | |
769 | /// Return true if the target should transform: |
770 | /// (X & Y) == Y ---> (~X & Y) == 0 |
771 | /// (X & Y) != Y ---> (~X & Y) != 0 |
772 | /// |
773 | /// This may be profitable if the target has a bitwise and-not operation that |
774 | /// sets comparison flags. A target may want to limit the transformation based |
775 | /// on the type of Y or if Y is a constant. |
776 | /// |
777 | /// Note that the transform will not occur if Y is known to be a power-of-2 |
778 | /// because a mask and compare of a single bit can be handled by inverting the |
779 | /// predicate, for example: |
780 | /// (X & 8) == 8 ---> (X & 8) != 0 |
781 | virtual bool hasAndNotCompare(SDValue Y) const { |
782 | return false; |
783 | } |
784 | |
785 | /// Return true if the target has a bitwise and-not operation: |
786 | /// X = ~A & B |
787 | /// This can be used to simplify select or other instructions. |
788 | virtual bool hasAndNot(SDValue X) const { |
789 | // If the target has the more complex version of this operation, assume that |
790 | // it has this operation too. |
791 | return hasAndNotCompare(Y: X); |
792 | } |
793 | |
794 | /// Return true if the target has a bit-test instruction: |
795 | /// (X & (1 << Y)) ==/!= 0 |
796 | /// This knowledge can be used to prevent breaking the pattern, |
797 | /// or creating it if it could be recognized. |
798 | virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; } |
799 | |
800 | /// There are two ways to clear extreme bits (either low or high): |
801 | /// Mask: x & (-1 << y) (the instcombine canonical form) |
802 | /// Shifts: x >> y << y |
803 | /// Return true if the variant with 2 variable shifts is preferred. |
804 | /// Return false if there is no preference. |
805 | virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const { |
806 | // By default, let's assume that no one prefers shifts. |
807 | return false; |
808 | } |
809 | |
810 | /// Return true if it is profitable to fold a pair of shifts into a mask. |
811 | /// This is usually true on most targets. But some targets, like Thumb1, |
812 | /// have immediate shift instructions, but no immediate "and" instruction; |
813 | /// this makes the fold unprofitable. |
814 | virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, |
815 | CombineLevel Level) const { |
816 | return true; |
817 | } |
818 | |
819 | /// Should we tranform the IR-optimal check for whether given truncation |
820 | /// down into KeptBits would be truncating or not: |
821 | /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits) |
822 | /// Into it's more traditional form: |
823 | /// ((%x << C) a>> C) dstcond %x |
824 | /// Return true if we should transform. |
825 | /// Return false if there is no preference. |
826 | virtual bool shouldTransformSignedTruncationCheck(EVT XVT, |
827 | unsigned KeptBits) const { |
828 | // By default, let's assume that no one prefers shifts. |
829 | return false; |
830 | } |
831 | |
832 | /// Given the pattern |
833 | /// (X & (C l>>/<< Y)) ==/!= 0 |
834 | /// return true if it should be transformed into: |
835 | /// ((X <</l>> Y) & C) ==/!= 0 |
836 | /// WARNING: if 'X' is a constant, the fold may deadlock! |
837 | /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat() |
838 | /// here because it can end up being not linked in. |
839 | virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
840 | SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, |
841 | unsigned OldShiftOpcode, unsigned NewShiftOpcode, |
842 | SelectionDAG &DAG) const { |
843 | if (hasBitTest(X, Y)) { |
844 | // One interesting pattern that we'd want to form is 'bit test': |
845 | // ((1 << Y) & C) ==/!= 0 |
846 | // But we also need to be careful not to try to reverse that fold. |
847 | |
848 | // Is this '1 << Y' ? |
849 | if (OldShiftOpcode == ISD::SHL && CC->isOne()) |
850 | return false; // Keep the 'bit test' pattern. |
851 | |
852 | // Will it be '1 << Y' after the transform ? |
853 | if (XC && NewShiftOpcode == ISD::SHL && XC->isOne()) |
854 | return true; // Do form the 'bit test' pattern. |
855 | } |
856 | |
857 | // If 'X' is a constant, and we transform, then we will immediately |
858 | // try to undo the fold, thus causing endless combine loop. |
859 | // So by default, let's assume everyone prefers the fold |
860 | // iff 'X' is not a constant. |
861 | return !XC; |
862 | } |
863 | |
864 | // Return true if its desirable to perform the following transform: |
865 | // (fmul C, (uitofp Pow2)) |
866 | // -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa)) |
867 | // (fdiv C, (uitofp Pow2)) |
868 | // -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa)) |
869 | // |
870 | // This is only queried after we have verified the transform will be bitwise |
871 | // equals. |
872 | // |
873 | // SDNode *N : The FDiv/FMul node we want to transform. |
874 | // SDValue FPConst: The Float constant operand in `N`. |
875 | // SDValue IntPow2: The Integer power of 2 operand in `N`. |
876 | virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, |
877 | SDValue IntPow2) const { |
878 | // Default to avoiding fdiv which is often very expensive. |
879 | return N->getOpcode() == ISD::FDIV; |
880 | } |
881 | |
882 | // Given: |
883 | // (icmp eq/ne (and X, C0), (shift X, C1)) |
884 | // or |
885 | // (icmp eq/ne X, (rotate X, CPow2)) |
886 | |
887 | // If C0 is a mask or shifted mask and the shift amt (C1) isolates the |
888 | // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`) |
889 | // Do we prefer the shift to be shift-right, shift-left, or rotate. |
890 | // Note: Its only valid to convert the rotate version to the shift version iff |
891 | // the shift-amt (`C1`) is a power of 2 (including 0). |
892 | // If ShiftOpc (current Opcode) is returned, do nothing. |
893 | virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand( |
894 | EVT VT, unsigned ShiftOpc, bool MayTransformRotate, |
895 | const APInt &ShiftOrRotateAmt, |
896 | const std::optional<APInt> &AndMask) const { |
897 | return ShiftOpc; |
898 | } |
899 | |
900 | /// These two forms are equivalent: |
901 | /// sub %y, (xor %x, -1) |
902 | /// add (add %x, 1), %y |
903 | /// The variant with two add's is IR-canonical. |
904 | /// Some targets may prefer one to the other. |
905 | virtual bool preferIncOfAddToSubOfNot(EVT VT) const { |
906 | // By default, let's assume that everyone prefers the form with two add's. |
907 | return true; |
908 | } |
909 | |
910 | // By default prefer folding (abs (sub nsw x, y)) -> abds(x, y). Some targets |
911 | // may want to avoid this to prevent loss of sub_nsw pattern. |
912 | virtual bool preferABDSToABSWithNSW(EVT VT) const { |
913 | return true; |
914 | } |
915 | |
916 | // Return true if the target wants to transform Op(Splat(X)) -> Splat(Op(X)) |
917 | virtual bool preferScalarizeSplat(SDNode *N) const { return true; } |
918 | |
919 | // Return true if the target wants to transform: |
920 | // (TruncVT truncate(sext_in_reg(VT X, ExtVT)) |
921 | // -> (TruncVT sext_in_reg(truncate(VT X), ExtVT)) |
922 | // Some targets might prefer pre-sextinreg to improve truncation/saturation. |
923 | virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const { |
924 | return true; |
925 | } |
926 | |
927 | /// Return true if the target wants to use the optimization that |
928 | /// turns ext(promotableInst1(...(promotableInstN(load)))) into |
929 | /// promotedInst1(...(promotedInstN(ext(load)))). |
930 | bool enableExtLdPromotion() const { return EnableExtLdPromotion; } |
931 | |
932 | /// Return true if the target can combine store(extractelement VectorTy, |
933 | /// Idx). |
934 | /// \p Cost[out] gives the cost of that transformation when this is true. |
935 | virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, |
936 | unsigned &Cost) const { |
937 | return false; |
938 | } |
939 | |
940 | /// Return true if the target shall perform extract vector element and store |
941 | /// given that the vector is known to be splat of constant. |
942 | /// \p Index[out] gives the index of the vector element to be extracted when |
943 | /// this is true. |
944 | virtual bool ( |
945 | Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const { |
946 | return false; |
947 | } |
948 | |
949 | /// Return true if inserting a scalar into a variable element of an undef |
950 | /// vector is more efficiently handled by splatting the scalar instead. |
951 | virtual bool shouldSplatInsEltVarIndex(EVT) const { |
952 | return false; |
953 | } |
954 | |
955 | /// Return true if target always benefits from combining into FMA for a |
956 | /// given value type. This must typically return false on targets where FMA |
957 | /// takes more cycles to execute than FADD. |
958 | virtual bool enableAggressiveFMAFusion(EVT VT) const { return false; } |
959 | |
960 | /// Return true if target always benefits from combining into FMA for a |
961 | /// given value type. This must typically return false on targets where FMA |
962 | /// takes more cycles to execute than FADD. |
963 | virtual bool enableAggressiveFMAFusion(LLT Ty) const { return false; } |
964 | |
965 | /// Return the ValueType of the result of SETCC operations. |
966 | virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, |
967 | EVT VT) const; |
968 | |
969 | /// Return the ValueType for comparison libcalls. Comparison libcalls include |
970 | /// floating point comparison calls, and Ordered/Unordered check calls on |
971 | /// floating point numbers. |
972 | virtual |
973 | MVT::SimpleValueType getCmpLibcallReturnType() const; |
974 | |
975 | /// For targets without i1 registers, this gives the nature of the high-bits |
976 | /// of boolean values held in types wider than i1. |
977 | /// |
978 | /// "Boolean values" are special true/false values produced by nodes like |
979 | /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND. |
980 | /// Not to be confused with general values promoted from i1. Some cpus |
981 | /// distinguish between vectors of boolean and scalars; the isVec parameter |
982 | /// selects between the two kinds. For example on X86 a scalar boolean should |
983 | /// be zero extended from i1, while the elements of a vector of booleans |
984 | /// should be sign extended from i1. |
985 | /// |
986 | /// Some cpus also treat floating point types the same way as they treat |
987 | /// vectors instead of the way they treat scalars. |
988 | BooleanContent getBooleanContents(bool isVec, bool isFloat) const { |
989 | if (isVec) |
990 | return BooleanVectorContents; |
991 | return isFloat ? BooleanFloatContents : BooleanContents; |
992 | } |
993 | |
994 | BooleanContent getBooleanContents(EVT Type) const { |
995 | return getBooleanContents(isVec: Type.isVector(), isFloat: Type.isFloatingPoint()); |
996 | } |
997 | |
998 | /// Promote the given target boolean to a target boolean of the given type. |
999 | /// A target boolean is an integer value, not necessarily of type i1, the bits |
1000 | /// of which conform to getBooleanContents. |
1001 | /// |
1002 | /// ValVT is the type of values that produced the boolean. |
1003 | SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, |
1004 | EVT ValVT) const { |
1005 | SDLoc dl(Bool); |
1006 | EVT BoolVT = |
1007 | getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: ValVT); |
1008 | ISD::NodeType ExtendCode = getExtendForContent(Content: getBooleanContents(Type: ValVT)); |
1009 | return DAG.getNode(Opcode: ExtendCode, DL: dl, VT: BoolVT, Operand: Bool); |
1010 | } |
1011 | |
1012 | /// Return target scheduling preference. |
1013 | Sched::Preference getSchedulingPreference() const { |
1014 | return SchedPreferenceInfo; |
1015 | } |
1016 | |
1017 | /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics |
1018 | /// for different nodes. This function returns the preference (or none) for |
1019 | /// the given node. |
1020 | virtual Sched::Preference getSchedulingPreference(SDNode *) const { |
1021 | return Sched::None; |
1022 | } |
1023 | |
1024 | /// Return the register class that should be used for the specified value |
1025 | /// type. |
1026 | virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const { |
1027 | (void)isDivergent; |
1028 | const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; |
1029 | assert(RC && "This value type is not natively supported!" ); |
1030 | return RC; |
1031 | } |
1032 | |
1033 | /// Allows target to decide about the register class of the |
1034 | /// specific value that is live outside the defining block. |
1035 | /// Returns true if the value needs uniform register class. |
1036 | virtual bool requiresUniformRegister(MachineFunction &MF, |
1037 | const Value *) const { |
1038 | return false; |
1039 | } |
1040 | |
1041 | /// Return the 'representative' register class for the specified value |
1042 | /// type. |
1043 | /// |
1044 | /// The 'representative' register class is the largest legal super-reg |
1045 | /// register class for the register class of the value type. For example, on |
1046 | /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep |
1047 | /// register class is GR64 on x86_64. |
1048 | virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const { |
1049 | const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy]; |
1050 | return RC; |
1051 | } |
1052 | |
1053 | /// Return the cost of the 'representative' register class for the specified |
1054 | /// value type. |
1055 | virtual uint8_t getRepRegClassCostFor(MVT VT) const { |
1056 | return RepRegClassCostForVT[VT.SimpleTy]; |
1057 | } |
1058 | |
1059 | /// Return the preferred strategy to legalize tihs SHIFT instruction, with |
1060 | /// \p ExpansionFactor being the recursion depth - how many expansion needed. |
1061 | enum class ShiftLegalizationStrategy { |
1062 | ExpandToParts, |
1063 | ExpandThroughStack, |
1064 | LowerToLibcall |
1065 | }; |
1066 | virtual ShiftLegalizationStrategy |
1067 | preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, |
1068 | unsigned ExpansionFactor) const { |
1069 | if (ExpansionFactor == 1) |
1070 | return ShiftLegalizationStrategy::ExpandToParts; |
1071 | return ShiftLegalizationStrategy::ExpandThroughStack; |
1072 | } |
1073 | |
1074 | /// Return true if the target has native support for the specified value type. |
1075 | /// This means that it has a register that directly holds it without |
1076 | /// promotions or expansions. |
1077 | bool isTypeLegal(EVT VT) const { |
1078 | assert(!VT.isSimple() || |
1079 | (unsigned)VT.getSimpleVT().SimpleTy < std::size(RegClassForVT)); |
1080 | return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr; |
1081 | } |
1082 | |
1083 | class ValueTypeActionImpl { |
1084 | /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum |
1085 | /// that indicates how instruction selection should deal with the type. |
1086 | LegalizeTypeAction ValueTypeActions[MVT::VALUETYPE_SIZE]; |
1087 | |
1088 | public: |
1089 | ValueTypeActionImpl() { |
1090 | std::fill(std::begin(arr&: ValueTypeActions), std::end(arr&: ValueTypeActions), |
1091 | TypeLegal); |
1092 | } |
1093 | |
1094 | LegalizeTypeAction getTypeAction(MVT VT) const { |
1095 | return ValueTypeActions[VT.SimpleTy]; |
1096 | } |
1097 | |
1098 | void setTypeAction(MVT VT, LegalizeTypeAction Action) { |
1099 | ValueTypeActions[VT.SimpleTy] = Action; |
1100 | } |
1101 | }; |
1102 | |
1103 | const ValueTypeActionImpl &getValueTypeActions() const { |
1104 | return ValueTypeActions; |
1105 | } |
1106 | |
1107 | /// Return pair that represents the legalization kind (first) that needs to |
1108 | /// happen to EVT (second) in order to type-legalize it. |
1109 | /// |
1110 | /// First: how we should legalize values of this type, either it is already |
1111 | /// legal (return 'Legal') or we need to promote it to a larger type (return |
1112 | /// 'Promote'), or we need to expand it into multiple registers of smaller |
1113 | /// integer type (return 'Expand'). 'Custom' is not an option. |
1114 | /// |
1115 | /// Second: for types supported by the target, this is an identity function. |
1116 | /// For types that must be promoted to larger types, this returns the larger |
1117 | /// type to promote to. For integer types that are larger than the largest |
1118 | /// integer register, this contains one step in the expansion to get to the |
1119 | /// smaller register. For illegal floating point types, this returns the |
1120 | /// integer type to transform to. |
1121 | LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const; |
1122 | |
1123 | /// Return how we should legalize values of this type, either it is already |
1124 | /// legal (return 'Legal') or we need to promote it to a larger type (return |
1125 | /// 'Promote'), or we need to expand it into multiple registers of smaller |
1126 | /// integer type (return 'Expand'). 'Custom' is not an option. |
1127 | LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const { |
1128 | return getTypeConversion(Context, VT).first; |
1129 | } |
1130 | LegalizeTypeAction getTypeAction(MVT VT) const { |
1131 | return ValueTypeActions.getTypeAction(VT); |
1132 | } |
1133 | |
1134 | /// For types supported by the target, this is an identity function. For |
1135 | /// types that must be promoted to larger types, this returns the larger type |
1136 | /// to promote to. For integer types that are larger than the largest integer |
1137 | /// register, this contains one step in the expansion to get to the smaller |
1138 | /// register. For illegal floating point types, this returns the integer type |
1139 | /// to transform to. |
1140 | virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const { |
1141 | return getTypeConversion(Context, VT).second; |
1142 | } |
1143 | |
1144 | /// For types supported by the target, this is an identity function. For |
1145 | /// types that must be expanded (i.e. integer types that are larger than the |
1146 | /// largest integer register or illegal floating point types), this returns |
1147 | /// the largest legal type it will be expanded to. |
1148 | EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const { |
1149 | assert(!VT.isVector()); |
1150 | while (true) { |
1151 | switch (getTypeAction(Context, VT)) { |
1152 | case TypeLegal: |
1153 | return VT; |
1154 | case TypeExpandInteger: |
1155 | VT = getTypeToTransformTo(Context, VT); |
1156 | break; |
1157 | default: |
1158 | llvm_unreachable("Type is not legal nor is it to be expanded!" ); |
1159 | } |
1160 | } |
1161 | } |
1162 | |
1163 | /// Vector types are broken down into some number of legal first class types. |
1164 | /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8 |
1165 | /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64 |
1166 | /// turns into 4 EVT::i32 values with both PPC and X86. |
1167 | /// |
1168 | /// This method returns the number of registers needed, and the VT for each |
1169 | /// register. It also returns the VT and quantity of the intermediate values |
1170 | /// before they are promoted/expanded. |
1171 | unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT, |
1172 | EVT &IntermediateVT, |
1173 | unsigned &NumIntermediates, |
1174 | MVT &RegisterVT) const; |
1175 | |
1176 | /// Certain targets such as MIPS require that some types such as vectors are |
1177 | /// always broken down into scalars in some contexts. This occurs even if the |
1178 | /// vector type is legal. |
1179 | virtual unsigned getVectorTypeBreakdownForCallingConv( |
1180 | LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, |
1181 | unsigned &NumIntermediates, MVT &RegisterVT) const { |
1182 | return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates, |
1183 | RegisterVT); |
1184 | } |
1185 | |
1186 | struct IntrinsicInfo { |
1187 | unsigned opc = 0; // target opcode |
1188 | EVT memVT; // memory VT |
1189 | |
1190 | // value representing memory location |
1191 | PointerUnion<const Value *, const PseudoSourceValue *> ptrVal; |
1192 | |
1193 | // Fallback address space for use if ptrVal is nullptr. std::nullopt means |
1194 | // unknown address space. |
1195 | std::optional<unsigned> fallbackAddressSpace; |
1196 | |
1197 | int offset = 0; // offset off of ptrVal |
1198 | uint64_t size = 0; // the size of the memory location |
1199 | // (taken from memVT if zero) |
1200 | MaybeAlign align = Align(1); // alignment |
1201 | |
1202 | MachineMemOperand::Flags flags = MachineMemOperand::MONone; |
1203 | IntrinsicInfo() = default; |
1204 | }; |
1205 | |
1206 | /// Given an intrinsic, checks if on the target the intrinsic will need to map |
1207 | /// to a MemIntrinsicNode (touches memory). If this is the case, it returns |
1208 | /// true and store the intrinsic information into the IntrinsicInfo that was |
1209 | /// passed to the function. |
1210 | virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, |
1211 | MachineFunction &, |
1212 | unsigned /*Intrinsic*/) const { |
1213 | return false; |
1214 | } |
1215 | |
1216 | /// Returns true if the target can instruction select the specified FP |
1217 | /// immediate natively. If false, the legalizer will materialize the FP |
1218 | /// immediate as a load from a constant pool. |
1219 | virtual bool isFPImmLegal(const APFloat & /*Imm*/, EVT /*VT*/, |
1220 | bool ForCodeSize = false) const { |
1221 | return false; |
1222 | } |
1223 | |
1224 | /// Targets can use this to indicate that they only support *some* |
1225 | /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a |
1226 | /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be |
1227 | /// legal. |
1228 | virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const { |
1229 | return true; |
1230 | } |
1231 | |
1232 | /// Returns true if the operation can trap for the value type. |
1233 | /// |
1234 | /// VT must be a legal type. By default, we optimistically assume most |
1235 | /// operations don't trap except for integer divide and remainder. |
1236 | virtual bool canOpTrap(unsigned Op, EVT VT) const; |
1237 | |
1238 | /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there |
1239 | /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a |
1240 | /// constant pool entry. |
1241 | virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/, |
1242 | EVT /*VT*/) const { |
1243 | return false; |
1244 | } |
1245 | |
1246 | /// How to legalize this custom operation? |
1247 | virtual LegalizeAction getCustomOperationAction(SDNode &Op) const { |
1248 | return Legal; |
1249 | } |
1250 | |
1251 | /// Return how this operation should be treated: either it is legal, needs to |
1252 | /// be promoted to a larger size, needs to be expanded to some other code |
1253 | /// sequence, or the target has a custom expander for it. |
1254 | LegalizeAction getOperationAction(unsigned Op, EVT VT) const { |
1255 | // If a target-specific SDNode requires legalization, require the target |
1256 | // to provide custom legalization for it. |
1257 | if (Op >= std::size(OpActions[0])) |
1258 | return Custom; |
1259 | if (VT.isExtended()) |
1260 | return Expand; |
1261 | return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; |
1262 | } |
1263 | |
1264 | /// Custom method defined by each target to indicate if an operation which |
1265 | /// may require a scale is supported natively by the target. |
1266 | /// If not, the operation is illegal. |
1267 | virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT, |
1268 | unsigned Scale) const { |
1269 | return false; |
1270 | } |
1271 | |
1272 | /// Some fixed point operations may be natively supported by the target but |
1273 | /// only for specific scales. This method allows for checking |
1274 | /// if the width is supported by the target for a given operation that may |
1275 | /// depend on scale. |
1276 | LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT, |
1277 | unsigned Scale) const { |
1278 | auto Action = getOperationAction(Op, VT); |
1279 | if (Action != Legal) |
1280 | return Action; |
1281 | |
1282 | // This operation is supported in this type but may only work on specific |
1283 | // scales. |
1284 | bool Supported; |
1285 | switch (Op) { |
1286 | default: |
1287 | llvm_unreachable("Unexpected fixed point operation." ); |
1288 | case ISD::SMULFIX: |
1289 | case ISD::SMULFIXSAT: |
1290 | case ISD::UMULFIX: |
1291 | case ISD::UMULFIXSAT: |
1292 | case ISD::SDIVFIX: |
1293 | case ISD::SDIVFIXSAT: |
1294 | case ISD::UDIVFIX: |
1295 | case ISD::UDIVFIXSAT: |
1296 | Supported = isSupportedFixedPointOperation(Op, VT, Scale); |
1297 | break; |
1298 | } |
1299 | |
1300 | return Supported ? Action : Expand; |
1301 | } |
1302 | |
1303 | // If Op is a strict floating-point operation, return the result |
1304 | // of getOperationAction for the equivalent non-strict operation. |
1305 | LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const { |
1306 | unsigned EqOpc; |
1307 | switch (Op) { |
1308 | default: llvm_unreachable("Unexpected FP pseudo-opcode" ); |
1309 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
1310 | case ISD::STRICT_##DAGN: EqOpc = ISD::DAGN; break; |
1311 | #define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
1312 | case ISD::STRICT_##DAGN: EqOpc = ISD::SETCC; break; |
1313 | #include "llvm/IR/ConstrainedOps.def" |
1314 | } |
1315 | |
1316 | return getOperationAction(Op: EqOpc, VT); |
1317 | } |
1318 | |
1319 | /// Return true if the specified operation is legal on this target or can be |
1320 | /// made legal with custom lowering. This is used to help guide high-level |
1321 | /// lowering decisions. LegalOnly is an optional convenience for code paths |
1322 | /// traversed pre and post legalisation. |
1323 | bool isOperationLegalOrCustom(unsigned Op, EVT VT, |
1324 | bool LegalOnly = false) const { |
1325 | if (LegalOnly) |
1326 | return isOperationLegal(Op, VT); |
1327 | |
1328 | return (VT == MVT::Other || isTypeLegal(VT)) && |
1329 | (getOperationAction(Op, VT) == Legal || |
1330 | getOperationAction(Op, VT) == Custom); |
1331 | } |
1332 | |
1333 | /// Return true if the specified operation is legal on this target or can be |
1334 | /// made legal using promotion. This is used to help guide high-level lowering |
1335 | /// decisions. LegalOnly is an optional convenience for code paths traversed |
1336 | /// pre and post legalisation. |
1337 | bool isOperationLegalOrPromote(unsigned Op, EVT VT, |
1338 | bool LegalOnly = false) const { |
1339 | if (LegalOnly) |
1340 | return isOperationLegal(Op, VT); |
1341 | |
1342 | return (VT == MVT::Other || isTypeLegal(VT)) && |
1343 | (getOperationAction(Op, VT) == Legal || |
1344 | getOperationAction(Op, VT) == Promote); |
1345 | } |
1346 | |
1347 | /// Return true if the specified operation is legal on this target or can be |
1348 | /// made legal with custom lowering or using promotion. This is used to help |
1349 | /// guide high-level lowering decisions. LegalOnly is an optional convenience |
1350 | /// for code paths traversed pre and post legalisation. |
1351 | bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, |
1352 | bool LegalOnly = false) const { |
1353 | if (LegalOnly) |
1354 | return isOperationLegal(Op, VT); |
1355 | |
1356 | return (VT == MVT::Other || isTypeLegal(VT)) && |
1357 | (getOperationAction(Op, VT) == Legal || |
1358 | getOperationAction(Op, VT) == Custom || |
1359 | getOperationAction(Op, VT) == Promote); |
1360 | } |
1361 | |
1362 | /// Return true if the operation uses custom lowering, regardless of whether |
1363 | /// the type is legal or not. |
1364 | bool isOperationCustom(unsigned Op, EVT VT) const { |
1365 | return getOperationAction(Op, VT) == Custom; |
1366 | } |
1367 | |
1368 | /// Return true if lowering to a jump table is allowed. |
1369 | virtual bool areJTsAllowed(const Function *Fn) const { |
1370 | if (Fn->getFnAttribute(Kind: "no-jump-tables" ).getValueAsBool()) |
1371 | return false; |
1372 | |
1373 | return isOperationLegalOrCustom(Op: ISD::BR_JT, VT: MVT::Other) || |
1374 | isOperationLegalOrCustom(Op: ISD::BRIND, VT: MVT::Other); |
1375 | } |
1376 | |
1377 | /// Check whether the range [Low,High] fits in a machine word. |
1378 | bool rangeFitsInWord(const APInt &Low, const APInt &High, |
1379 | const DataLayout &DL) const { |
1380 | // FIXME: Using the pointer type doesn't seem ideal. |
1381 | uint64_t BW = DL.getIndexSizeInBits(AS: 0u); |
1382 | uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; |
1383 | return Range <= BW; |
1384 | } |
1385 | |
1386 | /// Return true if lowering to a jump table is suitable for a set of case |
1387 | /// clusters which may contain \p NumCases cases, \p Range range of values. |
1388 | virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, |
1389 | uint64_t Range, ProfileSummaryInfo *PSI, |
1390 | BlockFrequencyInfo *BFI) const; |
1391 | |
1392 | /// Returns preferred type for switch condition. |
1393 | virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, |
1394 | EVT ConditionVT) const; |
1395 | |
1396 | /// Return true if lowering to a bit test is suitable for a set of case |
1397 | /// clusters which contains \p NumDests unique destinations, \p Low and |
1398 | /// \p High as its lowest and highest case values, and expects \p NumCmps |
1399 | /// case value comparisons. Check if the number of destinations, comparison |
1400 | /// metric, and range are all suitable. |
1401 | bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, |
1402 | const APInt &Low, const APInt &High, |
1403 | const DataLayout &DL) const { |
1404 | // FIXME: I don't think NumCmps is the correct metric: a single case and a |
1405 | // range of cases both require only one branch to lower. Just looking at the |
1406 | // number of clusters and destinations should be enough to decide whether to |
1407 | // build bit tests. |
1408 | |
1409 | // To lower a range with bit tests, the range must fit the bitwidth of a |
1410 | // machine word. |
1411 | if (!rangeFitsInWord(Low, High, DL)) |
1412 | return false; |
1413 | |
1414 | // Decide whether it's profitable to lower this range with bit tests. Each |
1415 | // destination requires a bit test and branch, and there is an overall range |
1416 | // check branch. For a small number of clusters, separate comparisons might |
1417 | // be cheaper, and for many destinations, splitting the range might be |
1418 | // better. |
1419 | return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) || |
1420 | (NumDests == 3 && NumCmps >= 6); |
1421 | } |
1422 | |
1423 | /// Return true if the specified operation is illegal on this target or |
1424 | /// unlikely to be made legal with custom lowering. This is used to help guide |
1425 | /// high-level lowering decisions. |
1426 | bool isOperationExpand(unsigned Op, EVT VT) const { |
1427 | return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand); |
1428 | } |
1429 | |
1430 | /// Return true if the specified operation is legal on this target. |
1431 | bool isOperationLegal(unsigned Op, EVT VT) const { |
1432 | return (VT == MVT::Other || isTypeLegal(VT)) && |
1433 | getOperationAction(Op, VT) == Legal; |
1434 | } |
1435 | |
1436 | /// Return how this load with extension should be treated: either it is legal, |
1437 | /// needs to be promoted to a larger size, needs to be expanded to some other |
1438 | /// code sequence, or the target has a custom expander for it. |
1439 | LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, |
1440 | EVT MemVT) const { |
1441 | if (ValVT.isExtended() || MemVT.isExtended()) return Expand; |
1442 | unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; |
1443 | unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; |
1444 | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::VALUETYPE_SIZE && |
1445 | MemI < MVT::VALUETYPE_SIZE && "Table isn't big enough!" ); |
1446 | unsigned Shift = 4 * ExtType; |
1447 | return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf); |
1448 | } |
1449 | |
1450 | /// Return true if the specified load with extension is legal on this target. |
1451 | bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const { |
1452 | return getLoadExtAction(ExtType, ValVT, MemVT) == Legal; |
1453 | } |
1454 | |
1455 | /// Return true if the specified load with extension is legal or custom |
1456 | /// on this target. |
1457 | bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const { |
1458 | return getLoadExtAction(ExtType, ValVT, MemVT) == Legal || |
1459 | getLoadExtAction(ExtType, ValVT, MemVT) == Custom; |
1460 | } |
1461 | |
1462 | /// Same as getLoadExtAction, but for atomic loads. |
1463 | LegalizeAction getAtomicLoadExtAction(unsigned ExtType, EVT ValVT, |
1464 | EVT MemVT) const { |
1465 | if (ValVT.isExtended() || MemVT.isExtended()) return Expand; |
1466 | unsigned ValI = (unsigned)ValVT.getSimpleVT().SimpleTy; |
1467 | unsigned MemI = (unsigned)MemVT.getSimpleVT().SimpleTy; |
1468 | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::VALUETYPE_SIZE && |
1469 | MemI < MVT::VALUETYPE_SIZE && "Table isn't big enough!" ); |
1470 | unsigned Shift = 4 * ExtType; |
1471 | LegalizeAction Action = |
1472 | (LegalizeAction)((AtomicLoadExtActions[ValI][MemI] >> Shift) & 0xf); |
1473 | assert((Action == Legal || Action == Expand) && |
1474 | "Unsupported atomic load extension action." ); |
1475 | return Action; |
1476 | } |
1477 | |
1478 | /// Return true if the specified atomic load with extension is legal on |
1479 | /// this target. |
1480 | bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const { |
1481 | return getAtomicLoadExtAction(ExtType, ValVT, MemVT) == Legal; |
1482 | } |
1483 | |
1484 | /// Return how this store with truncation should be treated: either it is |
1485 | /// legal, needs to be promoted to a larger size, needs to be expanded to some |
1486 | /// other code sequence, or the target has a custom expander for it. |
1487 | LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const { |
1488 | if (ValVT.isExtended() || MemVT.isExtended()) return Expand; |
1489 | unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; |
1490 | unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; |
1491 | assert(ValI < MVT::VALUETYPE_SIZE && MemI < MVT::VALUETYPE_SIZE && |
1492 | "Table isn't big enough!" ); |
1493 | return TruncStoreActions[ValI][MemI]; |
1494 | } |
1495 | |
1496 | /// Return true if the specified store with truncation is legal on this |
1497 | /// target. |
1498 | bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const { |
1499 | return isTypeLegal(VT: ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal; |
1500 | } |
1501 | |
1502 | /// Return true if the specified store with truncation has solution on this |
1503 | /// target. |
1504 | bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const { |
1505 | return isTypeLegal(VT: ValVT) && |
1506 | (getTruncStoreAction(ValVT, MemVT) == Legal || |
1507 | getTruncStoreAction(ValVT, MemVT) == Custom); |
1508 | } |
1509 | |
1510 | virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, |
1511 | bool LegalOnly) const { |
1512 | if (LegalOnly) |
1513 | return isTruncStoreLegal(ValVT, MemVT); |
1514 | |
1515 | return isTruncStoreLegalOrCustom(ValVT, MemVT); |
1516 | } |
1517 | |
1518 | /// Return how the indexed load should be treated: either it is legal, needs |
1519 | /// to be promoted to a larger size, needs to be expanded to some other code |
1520 | /// sequence, or the target has a custom expander for it. |
1521 | LegalizeAction getIndexedLoadAction(unsigned IdxMode, MVT VT) const { |
1522 | return getIndexedModeAction(IdxMode, VT, Shift: IMAB_Load); |
1523 | } |
1524 | |
1525 | /// Return true if the specified indexed load is legal on this target. |
1526 | bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const { |
1527 | return VT.isSimple() && |
1528 | (getIndexedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Legal || |
1529 | getIndexedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Custom); |
1530 | } |
1531 | |
1532 | /// Return how the indexed store should be treated: either it is legal, needs |
1533 | /// to be promoted to a larger size, needs to be expanded to some other code |
1534 | /// sequence, or the target has a custom expander for it. |
1535 | LegalizeAction getIndexedStoreAction(unsigned IdxMode, MVT VT) const { |
1536 | return getIndexedModeAction(IdxMode, VT, Shift: IMAB_Store); |
1537 | } |
1538 | |
1539 | /// Return true if the specified indexed load is legal on this target. |
1540 | bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const { |
1541 | return VT.isSimple() && |
1542 | (getIndexedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Legal || |
1543 | getIndexedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Custom); |
1544 | } |
1545 | |
1546 | /// Return how the indexed load should be treated: either it is legal, needs |
1547 | /// to be promoted to a larger size, needs to be expanded to some other code |
1548 | /// sequence, or the target has a custom expander for it. |
1549 | LegalizeAction getIndexedMaskedLoadAction(unsigned IdxMode, MVT VT) const { |
1550 | return getIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedLoad); |
1551 | } |
1552 | |
1553 | /// Return true if the specified indexed load is legal on this target. |
1554 | bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const { |
1555 | return VT.isSimple() && |
1556 | (getIndexedMaskedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Legal || |
1557 | getIndexedMaskedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Custom); |
1558 | } |
1559 | |
1560 | /// Return how the indexed store should be treated: either it is legal, needs |
1561 | /// to be promoted to a larger size, needs to be expanded to some other code |
1562 | /// sequence, or the target has a custom expander for it. |
1563 | LegalizeAction getIndexedMaskedStoreAction(unsigned IdxMode, MVT VT) const { |
1564 | return getIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedStore); |
1565 | } |
1566 | |
1567 | /// Return true if the specified indexed load is legal on this target. |
1568 | bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const { |
1569 | return VT.isSimple() && |
1570 | (getIndexedMaskedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Legal || |
1571 | getIndexedMaskedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Custom); |
1572 | } |
1573 | |
1574 | /// Returns true if the index type for a masked gather/scatter requires |
1575 | /// extending |
1576 | virtual bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const { return false; } |
1577 | |
1578 | // Returns true if Extend can be folded into the index of a masked gathers/scatters |
1579 | // on this target. |
1580 | virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const { |
1581 | return false; |
1582 | } |
1583 | |
1584 | // Return true if the target supports a scatter/gather instruction with |
1585 | // indices which are scaled by the particular value. Note that all targets |
1586 | // must by definition support scale of 1. |
1587 | virtual bool isLegalScaleForGatherScatter(uint64_t Scale, |
1588 | uint64_t ElemSize) const { |
1589 | // MGATHER/MSCATTER are only required to support scaling by one or by the |
1590 | // element size. |
1591 | if (Scale != ElemSize && Scale != 1) |
1592 | return false; |
1593 | return true; |
1594 | } |
1595 | |
1596 | /// Return how the condition code should be treated: either it is legal, needs |
1597 | /// to be expanded to some other code sequence, or the target has a custom |
1598 | /// expander for it. |
1599 | LegalizeAction |
1600 | getCondCodeAction(ISD::CondCode CC, MVT VT) const { |
1601 | assert((unsigned)CC < std::size(CondCodeActions) && |
1602 | ((unsigned)VT.SimpleTy >> 3) < std::size(CondCodeActions[0]) && |
1603 | "Table isn't big enough!" ); |
1604 | // See setCondCodeAction for how this is encoded. |
1605 | uint32_t Shift = 4 * (VT.SimpleTy & 0x7); |
1606 | uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3]; |
1607 | LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF); |
1608 | assert(Action != Promote && "Can't promote condition code!" ); |
1609 | return Action; |
1610 | } |
1611 | |
1612 | /// Return true if the specified condition code is legal on this target. |
1613 | bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const { |
1614 | return getCondCodeAction(CC, VT) == Legal; |
1615 | } |
1616 | |
1617 | /// Return true if the specified condition code is legal or custom on this |
1618 | /// target. |
1619 | bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const { |
1620 | return getCondCodeAction(CC, VT) == Legal || |
1621 | getCondCodeAction(CC, VT) == Custom; |
1622 | } |
1623 | |
1624 | /// If the action for this operation is to promote, this method returns the |
1625 | /// ValueType to promote to. |
1626 | MVT getTypeToPromoteTo(unsigned Op, MVT VT) const { |
1627 | assert(getOperationAction(Op, VT) == Promote && |
1628 | "This operation isn't promoted!" ); |
1629 | |
1630 | // See if this has an explicit type specified. |
1631 | std::map<std::pair<unsigned, MVT::SimpleValueType>, |
1632 | MVT::SimpleValueType>::const_iterator PTTI = |
1633 | PromoteToType.find(x: std::make_pair(x&: Op, y&: VT.SimpleTy)); |
1634 | if (PTTI != PromoteToType.end()) return PTTI->second; |
1635 | |
1636 | assert((VT.isInteger() || VT.isFloatingPoint()) && |
1637 | "Cannot autopromote this type, add it with AddPromotedToType." ); |
1638 | |
1639 | uint64_t VTBits = VT.getScalarSizeInBits(); |
1640 | MVT NVT = VT; |
1641 | do { |
1642 | NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1); |
1643 | assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && |
1644 | "Didn't find type to promote to!" ); |
1645 | } while (VTBits >= NVT.getScalarSizeInBits() || !isTypeLegal(VT: NVT) || |
1646 | getOperationAction(Op, VT: NVT) == Promote); |
1647 | return NVT; |
1648 | } |
1649 | |
1650 | virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, |
1651 | bool AllowUnknown = false) const { |
1652 | return getValueType(DL, Ty, AllowUnknown); |
1653 | } |
1654 | |
1655 | /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM |
1656 | /// operations except for the pointer size. If AllowUnknown is true, this |
1657 | /// will return MVT::Other for types with no EVT counterpart (e.g. structs), |
1658 | /// otherwise it will assert. |
1659 | EVT getValueType(const DataLayout &DL, Type *Ty, |
1660 | bool AllowUnknown = false) const { |
1661 | // Lower scalar pointers to native pointer types. |
1662 | if (auto *PTy = dyn_cast<PointerType>(Val: Ty)) |
1663 | return getPointerTy(DL, AS: PTy->getAddressSpace()); |
1664 | |
1665 | if (auto *VTy = dyn_cast<VectorType>(Val: Ty)) { |
1666 | Type *EltTy = VTy->getElementType(); |
1667 | // Lower vectors of pointers to native pointer types. |
1668 | if (auto *PTy = dyn_cast<PointerType>(Val: EltTy)) { |
1669 | EVT PointerTy(getPointerTy(DL, AS: PTy->getAddressSpace())); |
1670 | EltTy = PointerTy.getTypeForEVT(Context&: Ty->getContext()); |
1671 | } |
1672 | return EVT::getVectorVT(Context&: Ty->getContext(), VT: EVT::getEVT(Ty: EltTy, HandleUnknown: false), |
1673 | EC: VTy->getElementCount()); |
1674 | } |
1675 | |
1676 | return EVT::getEVT(Ty, HandleUnknown: AllowUnknown); |
1677 | } |
1678 | |
1679 | EVT getMemValueType(const DataLayout &DL, Type *Ty, |
1680 | bool AllowUnknown = false) const { |
1681 | // Lower scalar pointers to native pointer types. |
1682 | if (auto *PTy = dyn_cast<PointerType>(Val: Ty)) |
1683 | return getPointerMemTy(DL, AS: PTy->getAddressSpace()); |
1684 | |
1685 | if (auto *VTy = dyn_cast<VectorType>(Val: Ty)) { |
1686 | Type *EltTy = VTy->getElementType(); |
1687 | if (auto *PTy = dyn_cast<PointerType>(Val: EltTy)) { |
1688 | EVT PointerTy(getPointerMemTy(DL, AS: PTy->getAddressSpace())); |
1689 | EltTy = PointerTy.getTypeForEVT(Context&: Ty->getContext()); |
1690 | } |
1691 | return EVT::getVectorVT(Context&: Ty->getContext(), VT: EVT::getEVT(Ty: EltTy, HandleUnknown: false), |
1692 | EC: VTy->getElementCount()); |
1693 | } |
1694 | |
1695 | return getValueType(DL, Ty, AllowUnknown); |
1696 | } |
1697 | |
1698 | |
1699 | /// Return the MVT corresponding to this LLVM type. See getValueType. |
1700 | MVT getSimpleValueType(const DataLayout &DL, Type *Ty, |
1701 | bool AllowUnknown = false) const { |
1702 | return getValueType(DL, Ty, AllowUnknown).getSimpleVT(); |
1703 | } |
1704 | |
1705 | /// Return the desired alignment for ByVal or InAlloca aggregate function |
1706 | /// arguments in the caller parameter area. This is the actual alignment, not |
1707 | /// its logarithm. |
1708 | virtual uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const; |
1709 | |
1710 | /// Return the type of registers that this ValueType will eventually require. |
1711 | MVT getRegisterType(MVT VT) const { |
1712 | assert((unsigned)VT.SimpleTy < std::size(RegisterTypeForVT)); |
1713 | return RegisterTypeForVT[VT.SimpleTy]; |
1714 | } |
1715 | |
1716 | /// Return the type of registers that this ValueType will eventually require. |
1717 | MVT getRegisterType(LLVMContext &Context, EVT VT) const { |
1718 | if (VT.isSimple()) |
1719 | return getRegisterType(VT: VT.getSimpleVT()); |
1720 | if (VT.isVector()) { |
1721 | EVT VT1; |
1722 | MVT RegisterVT; |
1723 | unsigned NumIntermediates; |
1724 | (void)getVectorTypeBreakdown(Context, VT, IntermediateVT&: VT1, |
1725 | NumIntermediates, RegisterVT); |
1726 | return RegisterVT; |
1727 | } |
1728 | if (VT.isInteger()) { |
1729 | return getRegisterType(Context, VT: getTypeToTransformTo(Context, VT)); |
1730 | } |
1731 | llvm_unreachable("Unsupported extended type!" ); |
1732 | } |
1733 | |
1734 | /// Return the number of registers that this ValueType will eventually |
1735 | /// require. |
1736 | /// |
1737 | /// This is one for any types promoted to live in larger registers, but may be |
1738 | /// more than one for types (like i64) that are split into pieces. For types |
1739 | /// like i140, which are first promoted then expanded, it is the number of |
1740 | /// registers needed to hold all the bits of the original type. For an i140 |
1741 | /// on a 32 bit machine this means 5 registers. |
1742 | /// |
1743 | /// RegisterVT may be passed as a way to override the default settings, for |
1744 | /// instance with i128 inline assembly operands on SystemZ. |
1745 | virtual unsigned |
1746 | getNumRegisters(LLVMContext &Context, EVT VT, |
1747 | std::optional<MVT> RegisterVT = std::nullopt) const { |
1748 | if (VT.isSimple()) { |
1749 | assert((unsigned)VT.getSimpleVT().SimpleTy < |
1750 | std::size(NumRegistersForVT)); |
1751 | return NumRegistersForVT[VT.getSimpleVT().SimpleTy]; |
1752 | } |
1753 | if (VT.isVector()) { |
1754 | EVT VT1; |
1755 | MVT VT2; |
1756 | unsigned NumIntermediates; |
1757 | return getVectorTypeBreakdown(Context, VT, IntermediateVT&: VT1, NumIntermediates, RegisterVT&: VT2); |
1758 | } |
1759 | if (VT.isInteger()) { |
1760 | unsigned BitWidth = VT.getSizeInBits(); |
1761 | unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits(); |
1762 | return (BitWidth + RegWidth - 1) / RegWidth; |
1763 | } |
1764 | llvm_unreachable("Unsupported extended type!" ); |
1765 | } |
1766 | |
1767 | /// Certain combinations of ABIs, Targets and features require that types |
1768 | /// are legal for some operations and not for other operations. |
1769 | /// For MIPS all vector types must be passed through the integer register set. |
1770 | virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, |
1771 | CallingConv::ID CC, EVT VT) const { |
1772 | return getRegisterType(Context, VT); |
1773 | } |
1774 | |
1775 | /// Certain targets require unusual breakdowns of certain types. For MIPS, |
1776 | /// this occurs when a vector type is used, as vector are passed through the |
1777 | /// integer register set. |
1778 | virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, |
1779 | CallingConv::ID CC, |
1780 | EVT VT) const { |
1781 | return getNumRegisters(Context, VT); |
1782 | } |
1783 | |
1784 | /// Certain targets have context sensitive alignment requirements, where one |
1785 | /// type has the alignment requirement of another type. |
1786 | virtual Align getABIAlignmentForCallingConv(Type *ArgTy, |
1787 | const DataLayout &DL) const { |
1788 | return DL.getABITypeAlign(Ty: ArgTy); |
1789 | } |
1790 | |
1791 | /// If true, then instruction selection should seek to shrink the FP constant |
1792 | /// of the specified type to a smaller type in order to save space and / or |
1793 | /// reduce runtime. |
1794 | virtual bool ShouldShrinkFPConstant(EVT) const { return true; } |
1795 | |
1796 | /// Return true if it is profitable to reduce a load to a smaller type. |
1797 | /// Example: (i16 (trunc (i32 (load x))) -> i16 load x |
1798 | virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, |
1799 | EVT NewVT) const { |
1800 | // By default, assume that it is cheaper to extract a subvector from a wide |
1801 | // vector load rather than creating multiple narrow vector loads. |
1802 | if (NewVT.isVector() && !Load->hasOneUse()) |
1803 | return false; |
1804 | |
1805 | return true; |
1806 | } |
1807 | |
1808 | /// Return true (the default) if it is profitable to remove a sext_inreg(x) |
1809 | /// where the sext is redundant, and use x directly. |
1810 | virtual bool shouldRemoveRedundantExtend(SDValue Op) const { return true; } |
1811 | |
1812 | /// Indicates if any padding is guaranteed to go at the most significant bits |
1813 | /// when storing the type to memory and the type size isn't equal to the store |
1814 | /// size. |
1815 | bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const { |
1816 | return VT.isScalarInteger() && !VT.isByteSized(); |
1817 | } |
1818 | |
1819 | /// When splitting a value of the specified type into parts, does the Lo |
1820 | /// or Hi part come first? This usually follows the endianness, except |
1821 | /// for ppcf128, where the Hi part always comes first. |
1822 | bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const { |
1823 | return DL.isBigEndian() || VT == MVT::ppcf128; |
1824 | } |
1825 | |
1826 | /// If true, the target has custom DAG combine transformations that it can |
1827 | /// perform for the specified node. |
1828 | bool hasTargetDAGCombine(ISD::NodeType NT) const { |
1829 | assert(unsigned(NT >> 3) < std::size(TargetDAGCombineArray)); |
1830 | return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7)); |
1831 | } |
1832 | |
1833 | unsigned getGatherAllAliasesMaxDepth() const { |
1834 | return GatherAllAliasesMaxDepth; |
1835 | } |
1836 | |
1837 | /// Returns the size of the platform's va_list object. |
1838 | virtual unsigned getVaListSizeInBits(const DataLayout &DL) const { |
1839 | return getPointerTy(DL).getSizeInBits(); |
1840 | } |
1841 | |
1842 | /// Get maximum # of store operations permitted for llvm.memset |
1843 | /// |
1844 | /// This function returns the maximum number of store operations permitted |
1845 | /// to replace a call to llvm.memset. The value is set by the target at the |
1846 | /// performance threshold for such a replacement. If OptSize is true, |
1847 | /// return the limit for functions that have OptSize attribute. |
1848 | unsigned getMaxStoresPerMemset(bool OptSize) const { |
1849 | return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset; |
1850 | } |
1851 | |
1852 | /// Get maximum # of store operations permitted for llvm.memcpy |
1853 | /// |
1854 | /// This function returns the maximum number of store operations permitted |
1855 | /// to replace a call to llvm.memcpy. The value is set by the target at the |
1856 | /// performance threshold for such a replacement. If OptSize is true, |
1857 | /// return the limit for functions that have OptSize attribute. |
1858 | unsigned getMaxStoresPerMemcpy(bool OptSize) const { |
1859 | return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy; |
1860 | } |
1861 | |
1862 | /// \brief Get maximum # of store operations to be glued together |
1863 | /// |
1864 | /// This function returns the maximum number of store operations permitted |
1865 | /// to glue together during lowering of llvm.memcpy. The value is set by |
1866 | // the target at the performance threshold for such a replacement. |
1867 | virtual unsigned getMaxGluedStoresPerMemcpy() const { |
1868 | return MaxGluedStoresPerMemcpy; |
1869 | } |
1870 | |
1871 | /// Get maximum # of load operations permitted for memcmp |
1872 | /// |
1873 | /// This function returns the maximum number of load operations permitted |
1874 | /// to replace a call to memcmp. The value is set by the target at the |
1875 | /// performance threshold for such a replacement. If OptSize is true, |
1876 | /// return the limit for functions that have OptSize attribute. |
1877 | unsigned getMaxExpandSizeMemcmp(bool OptSize) const { |
1878 | return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp; |
1879 | } |
1880 | |
1881 | /// Get maximum # of store operations permitted for llvm.memmove |
1882 | /// |
1883 | /// This function returns the maximum number of store operations permitted |
1884 | /// to replace a call to llvm.memmove. The value is set by the target at the |
1885 | /// performance threshold for such a replacement. If OptSize is true, |
1886 | /// return the limit for functions that have OptSize attribute. |
1887 | unsigned getMaxStoresPerMemmove(bool OptSize) const { |
1888 | return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove; |
1889 | } |
1890 | |
1891 | /// Determine if the target supports unaligned memory accesses. |
1892 | /// |
1893 | /// This function returns true if the target allows unaligned memory accesses |
1894 | /// of the specified type in the given address space. If true, it also returns |
1895 | /// a relative speed of the unaligned memory access in the last argument by |
1896 | /// reference. The higher the speed number the faster the operation comparing |
1897 | /// to a number returned by another such call. This is used, for example, in |
1898 | /// situations where an array copy/move/set is converted to a sequence of |
1899 | /// store operations. Its use helps to ensure that such replacements don't |
1900 | /// generate code that causes an alignment error (trap) on the target machine. |
1901 | virtual bool allowsMisalignedMemoryAccesses( |
1902 | EVT, unsigned AddrSpace = 0, Align Alignment = Align(1), |
1903 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
1904 | unsigned * /*Fast*/ = nullptr) const { |
1905 | return false; |
1906 | } |
1907 | |
1908 | /// LLT handling variant. |
1909 | virtual bool allowsMisalignedMemoryAccesses( |
1910 | LLT, unsigned AddrSpace = 0, Align Alignment = Align(1), |
1911 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
1912 | unsigned * /*Fast*/ = nullptr) const { |
1913 | return false; |
1914 | } |
1915 | |
1916 | /// This function returns true if the memory access is aligned or if the |
1917 | /// target allows this specific unaligned memory access. If the access is |
1918 | /// allowed, the optional final parameter returns a relative speed of the |
1919 | /// access (as defined by the target). |
1920 | bool allowsMemoryAccessForAlignment( |
1921 | LLVMContext &Context, const DataLayout &DL, EVT VT, |
1922 | unsigned AddrSpace = 0, Align Alignment = Align(1), |
1923 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
1924 | unsigned *Fast = nullptr) const; |
1925 | |
1926 | /// Return true if the memory access of this type is aligned or if the target |
1927 | /// allows this specific unaligned access for the given MachineMemOperand. |
1928 | /// If the access is allowed, the optional final parameter returns a relative |
1929 | /// speed of the access (as defined by the target). |
1930 | bool allowsMemoryAccessForAlignment(LLVMContext &Context, |
1931 | const DataLayout &DL, EVT VT, |
1932 | const MachineMemOperand &MMO, |
1933 | unsigned *Fast = nullptr) const; |
1934 | |
1935 | /// Return true if the target supports a memory access of this type for the |
1936 | /// given address space and alignment. If the access is allowed, the optional |
1937 | /// final parameter returns the relative speed of the access (as defined by |
1938 | /// the target). |
1939 | virtual bool |
1940 | allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, |
1941 | unsigned AddrSpace = 0, Align Alignment = Align(1), |
1942 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
1943 | unsigned *Fast = nullptr) const; |
1944 | |
1945 | /// Return true if the target supports a memory access of this type for the |
1946 | /// given MachineMemOperand. If the access is allowed, the optional |
1947 | /// final parameter returns the relative access speed (as defined by the |
1948 | /// target). |
1949 | bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, |
1950 | const MachineMemOperand &MMO, |
1951 | unsigned *Fast = nullptr) const; |
1952 | |
1953 | /// LLT handling variant. |
1954 | bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, LLT Ty, |
1955 | const MachineMemOperand &MMO, |
1956 | unsigned *Fast = nullptr) const; |
1957 | |
1958 | /// Returns the target specific optimal type for load and store operations as |
1959 | /// a result of memset, memcpy, and memmove lowering. |
1960 | /// It returns EVT::Other if the type should be determined using generic |
1961 | /// target-independent logic. |
1962 | virtual EVT |
1963 | getOptimalMemOpType(const MemOp &Op, |
1964 | const AttributeList & /*FuncAttributes*/) const { |
1965 | return MVT::Other; |
1966 | } |
1967 | |
1968 | /// LLT returning variant. |
1969 | virtual LLT |
1970 | getOptimalMemOpLLT(const MemOp &Op, |
1971 | const AttributeList & /*FuncAttributes*/) const { |
1972 | return LLT(); |
1973 | } |
1974 | |
1975 | /// Returns true if it's safe to use load / store of the specified type to |
1976 | /// expand memcpy / memset inline. |
1977 | /// |
1978 | /// This is mostly true for all types except for some special cases. For |
1979 | /// example, on X86 targets without SSE2 f64 load / store are done with fldl / |
1980 | /// fstpl which also does type conversion. Note the specified type doesn't |
1981 | /// have to be legal as the hook is used before type legalization. |
1982 | virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; } |
1983 | |
1984 | /// Return lower limit for number of blocks in a jump table. |
1985 | virtual unsigned getMinimumJumpTableEntries() const; |
1986 | |
1987 | /// Return lower limit of the density in a jump table. |
1988 | unsigned getMinimumJumpTableDensity(bool OptForSize) const; |
1989 | |
1990 | /// Return upper limit for number of entries in a jump table. |
1991 | /// Zero if no limit. |
1992 | unsigned getMaximumJumpTableSize() const; |
1993 | |
1994 | virtual bool isJumpTableRelative() const; |
1995 | |
1996 | /// If a physical register, this specifies the register that |
1997 | /// llvm.savestack/llvm.restorestack should save and restore. |
1998 | Register getStackPointerRegisterToSaveRestore() const { |
1999 | return StackPointerRegisterToSaveRestore; |
2000 | } |
2001 | |
2002 | /// If a physical register, this returns the register that receives the |
2003 | /// exception address on entry to an EH pad. |
2004 | virtual Register |
2005 | getExceptionPointerRegister(const Constant *PersonalityFn) const { |
2006 | return Register(); |
2007 | } |
2008 | |
2009 | /// If a physical register, this returns the register that receives the |
2010 | /// exception typeid on entry to a landing pad. |
2011 | virtual Register |
2012 | getExceptionSelectorRegister(const Constant *PersonalityFn) const { |
2013 | return Register(); |
2014 | } |
2015 | |
2016 | virtual bool needsFixedCatchObjects() const { |
2017 | report_fatal_error(reason: "Funclet EH is not implemented for this target" ); |
2018 | } |
2019 | |
2020 | /// Return the minimum stack alignment of an argument. |
2021 | Align getMinStackArgumentAlignment() const { |
2022 | return MinStackArgumentAlignment; |
2023 | } |
2024 | |
2025 | /// Return the minimum function alignment. |
2026 | Align getMinFunctionAlignment() const { return MinFunctionAlignment; } |
2027 | |
2028 | /// Return the preferred function alignment. |
2029 | Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; } |
2030 | |
2031 | /// Return the preferred loop alignment. |
2032 | virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const; |
2033 | |
2034 | /// Return the maximum amount of bytes allowed to be emitted when padding for |
2035 | /// alignment |
2036 | virtual unsigned |
2037 | getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const; |
2038 | |
2039 | /// Should loops be aligned even when the function is marked OptSize (but not |
2040 | /// MinSize). |
2041 | virtual bool alignLoopsWithOptSize() const { return false; } |
2042 | |
2043 | /// If the target has a standard location for the stack protector guard, |
2044 | /// returns the address of that location. Otherwise, returns nullptr. |
2045 | /// DEPRECATED: please override useLoadStackGuardNode and customize |
2046 | /// LOAD_STACK_GUARD, or customize \@llvm.stackguard(). |
2047 | virtual Value *getIRStackGuard(IRBuilderBase &IRB) const; |
2048 | |
2049 | /// Inserts necessary declarations for SSP (stack protection) purpose. |
2050 | /// Should be used only when getIRStackGuard returns nullptr. |
2051 | virtual void insertSSPDeclarations(Module &M) const; |
2052 | |
2053 | /// Return the variable that's previously inserted by insertSSPDeclarations, |
2054 | /// if any, otherwise return nullptr. Should be used only when |
2055 | /// getIRStackGuard returns nullptr. |
2056 | virtual Value *getSDagStackGuard(const Module &M) const; |
2057 | |
2058 | /// If this function returns true, stack protection checks should XOR the |
2059 | /// frame pointer (or whichever pointer is used to address locals) into the |
2060 | /// stack guard value before checking it. getIRStackGuard must return nullptr |
2061 | /// if this returns true. |
2062 | virtual bool useStackGuardXorFP() const { return false; } |
2063 | |
2064 | /// If the target has a standard stack protection check function that |
2065 | /// performs validation and error handling, returns the function. Otherwise, |
2066 | /// returns nullptr. Must be previously inserted by insertSSPDeclarations. |
2067 | /// Should be used only when getIRStackGuard returns nullptr. |
2068 | virtual Function *getSSPStackGuardCheck(const Module &M) const; |
2069 | |
2070 | protected: |
2071 | Value *getDefaultSafeStackPointerLocation(IRBuilderBase &IRB, |
2072 | bool UseTLS) const; |
2073 | |
2074 | public: |
2075 | /// Returns the target-specific address of the unsafe stack pointer. |
2076 | virtual Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const; |
2077 | |
2078 | /// Returns the name of the symbol used to emit stack probes or the empty |
2079 | /// string if not applicable. |
2080 | virtual bool hasStackProbeSymbol(const MachineFunction &MF) const { return false; } |
2081 | |
2082 | virtual bool hasInlineStackProbe(const MachineFunction &MF) const { return false; } |
2083 | |
2084 | virtual StringRef getStackProbeSymbolName(const MachineFunction &MF) const { |
2085 | return "" ; |
2086 | } |
2087 | |
2088 | /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we |
2089 | /// are happy to sink it into basic blocks. A cast may be free, but not |
2090 | /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer. |
2091 | virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const; |
2092 | |
2093 | /// Return true if the pointer arguments to CI should be aligned by aligning |
2094 | /// the object whose address is being passed. If so then MinSize is set to the |
2095 | /// minimum size the object must be to be aligned and PrefAlign is set to the |
2096 | /// preferred alignment. |
2097 | virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/, |
2098 | Align & /*PrefAlign*/) const { |
2099 | return false; |
2100 | } |
2101 | |
2102 | //===--------------------------------------------------------------------===// |
2103 | /// \name Helpers for TargetTransformInfo implementations |
2104 | /// @{ |
2105 | |
2106 | /// Get the ISD node that corresponds to the Instruction class opcode. |
2107 | int InstructionOpcodeToISD(unsigned Opcode) const; |
2108 | |
2109 | /// @} |
2110 | |
2111 | //===--------------------------------------------------------------------===// |
2112 | /// \name Helpers for atomic expansion. |
2113 | /// @{ |
2114 | |
2115 | /// Returns the maximum atomic operation size (in bits) supported by |
2116 | /// the backend. Atomic operations greater than this size (as well |
2117 | /// as ones that are not naturally aligned), will be expanded by |
2118 | /// AtomicExpandPass into an __atomic_* library call. |
2119 | unsigned getMaxAtomicSizeInBitsSupported() const { |
2120 | return MaxAtomicSizeInBitsSupported; |
2121 | } |
2122 | |
2123 | /// Returns the size in bits of the maximum div/rem the backend supports. |
2124 | /// Larger operations will be expanded by ExpandLargeDivRem. |
2125 | unsigned getMaxDivRemBitWidthSupported() const { |
2126 | return MaxDivRemBitWidthSupported; |
2127 | } |
2128 | |
2129 | /// Returns the size in bits of the maximum larget fp convert the backend |
2130 | /// supports. Larger operations will be expanded by ExpandLargeFPConvert. |
2131 | unsigned getMaxLargeFPConvertBitWidthSupported() const { |
2132 | return MaxLargeFPConvertBitWidthSupported; |
2133 | } |
2134 | |
2135 | /// Returns the size of the smallest cmpxchg or ll/sc instruction |
2136 | /// the backend supports. Any smaller operations are widened in |
2137 | /// AtomicExpandPass. |
2138 | /// |
2139 | /// Note that *unlike* operations above the maximum size, atomic ops |
2140 | /// are still natively supported below the minimum; they just |
2141 | /// require a more complex expansion. |
2142 | unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; } |
2143 | |
2144 | /// Whether the target supports unaligned atomic operations. |
2145 | bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; } |
2146 | |
2147 | /// Whether AtomicExpandPass should automatically insert fences and reduce |
2148 | /// ordering for this atomic. This should be true for most architectures with |
2149 | /// weak memory ordering. Defaults to false. |
2150 | virtual bool shouldInsertFencesForAtomic(const Instruction *I) const { |
2151 | return false; |
2152 | } |
2153 | |
2154 | /// Whether AtomicExpandPass should automatically insert a trailing fence |
2155 | /// without reducing the ordering for this atomic. Defaults to false. |
2156 | virtual bool |
2157 | shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const { |
2158 | return false; |
2159 | } |
2160 | |
2161 | /// Perform a load-linked operation on Addr, returning a "Value *" with the |
2162 | /// corresponding pointee type. This may entail some non-trivial operations to |
2163 | /// truncate or reconstruct types that will be illegal in the backend. See |
2164 | /// ARMISelLowering for an example implementation. |
2165 | virtual Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, |
2166 | Value *Addr, AtomicOrdering Ord) const { |
2167 | llvm_unreachable("Load linked unimplemented on this target" ); |
2168 | } |
2169 | |
2170 | /// Perform a store-conditional operation to Addr. Return the status of the |
2171 | /// store. This should be 0 if the store succeeded, non-zero otherwise. |
2172 | virtual Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, |
2173 | Value *Addr, AtomicOrdering Ord) const { |
2174 | llvm_unreachable("Store conditional unimplemented on this target" ); |
2175 | } |
2176 | |
2177 | /// Perform a masked atomicrmw using a target-specific intrinsic. This |
2178 | /// represents the core LL/SC loop which will be lowered at a late stage by |
2179 | /// the backend. The target-specific intrinsic returns the loaded value and |
2180 | /// is not responsible for masking and shifting the result. |
2181 | virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, |
2182 | AtomicRMWInst *AI, |
2183 | Value *AlignedAddr, Value *Incr, |
2184 | Value *Mask, Value *ShiftAmt, |
2185 | AtomicOrdering Ord) const { |
2186 | llvm_unreachable("Masked atomicrmw expansion unimplemented on this target" ); |
2187 | } |
2188 | |
2189 | /// Perform a atomicrmw expansion using a target-specific way. This is |
2190 | /// expected to be called when masked atomicrmw and bit test atomicrmw don't |
2191 | /// work, and the target supports another way to lower atomicrmw. |
2192 | virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const { |
2193 | llvm_unreachable( |
2194 | "Generic atomicrmw expansion unimplemented on this target" ); |
2195 | } |
2196 | |
2197 | /// Perform a bit test atomicrmw using a target-specific intrinsic. This |
2198 | /// represents the combined bit test intrinsic which will be lowered at a late |
2199 | /// stage by the backend. |
2200 | virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const { |
2201 | llvm_unreachable( |
2202 | "Bit test atomicrmw expansion unimplemented on this target" ); |
2203 | } |
2204 | |
2205 | /// Perform a atomicrmw which the result is only used by comparison, using a |
2206 | /// target-specific intrinsic. This represents the combined atomic and compare |
2207 | /// intrinsic which will be lowered at a late stage by the backend. |
2208 | virtual void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const { |
2209 | llvm_unreachable( |
2210 | "Compare arith atomicrmw expansion unimplemented on this target" ); |
2211 | } |
2212 | |
2213 | /// Perform a masked cmpxchg using a target-specific intrinsic. This |
2214 | /// represents the core LL/SC loop which will be lowered at a late stage by |
2215 | /// the backend. The target-specific intrinsic returns the loaded value and |
2216 | /// is not responsible for masking and shifting the result. |
2217 | virtual Value *emitMaskedAtomicCmpXchgIntrinsic( |
2218 | IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, |
2219 | Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { |
2220 | llvm_unreachable("Masked cmpxchg expansion unimplemented on this target" ); |
2221 | } |
2222 | |
2223 | //===--------------------------------------------------------------------===// |
2224 | /// \name KCFI check lowering. |
2225 | /// @{ |
2226 | |
2227 | virtual MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, |
2228 | MachineBasicBlock::instr_iterator &MBBI, |
2229 | const TargetInstrInfo *TII) const { |
2230 | llvm_unreachable("KCFI is not supported on this target" ); |
2231 | } |
2232 | |
2233 | /// @} |
2234 | |
2235 | /// Inserts in the IR a target-specific intrinsic specifying a fence. |
2236 | /// It is called by AtomicExpandPass before expanding an |
2237 | /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad |
2238 | /// if shouldInsertFencesForAtomic returns true. |
2239 | /// |
2240 | /// Inst is the original atomic instruction, prior to other expansions that |
2241 | /// may be performed. |
2242 | /// |
2243 | /// This function should either return a nullptr, or a pointer to an IR-level |
2244 | /// Instruction*. Even complex fence sequences can be represented by a |
2245 | /// single Instruction* through an intrinsic to be lowered later. |
2246 | /// |
2247 | /// The default implementation emits an IR fence before any release (or |
2248 | /// stronger) operation that stores, and after any acquire (or stronger) |
2249 | /// operation. This is generally a correct implementation, but backends may |
2250 | /// override if they wish to use alternative schemes (e.g. the PowerPC |
2251 | /// standard ABI uses a fence before a seq_cst load instead of after a |
2252 | /// seq_cst store). |
2253 | /// @{ |
2254 | virtual Instruction *emitLeadingFence(IRBuilderBase &Builder, |
2255 | Instruction *Inst, |
2256 | AtomicOrdering Ord) const; |
2257 | |
2258 | virtual Instruction *emitTrailingFence(IRBuilderBase &Builder, |
2259 | Instruction *Inst, |
2260 | AtomicOrdering Ord) const; |
2261 | /// @} |
2262 | |
2263 | // Emits code that executes when the comparison result in the ll/sc |
2264 | // expansion of a cmpxchg instruction is such that the store-conditional will |
2265 | // not execute. This makes it possible to balance out the load-linked with |
2266 | // a dedicated instruction, if desired. |
2267 | // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would |
2268 | // be unnecessarily held, except if clrex, inserted by this hook, is executed. |
2269 | virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const {} |
2270 | |
2271 | /// Returns true if arguments should be sign-extended in lib calls. |
2272 | virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { |
2273 | return IsSigned; |
2274 | } |
2275 | |
2276 | /// Returns true if arguments should be extended in lib calls. |
2277 | virtual bool shouldExtendTypeInLibCall(EVT Type) const { |
2278 | return true; |
2279 | } |
2280 | |
2281 | /// Returns how the given (atomic) load should be expanded by the |
2282 | /// IR-level AtomicExpand pass. |
2283 | virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const { |
2284 | return AtomicExpansionKind::None; |
2285 | } |
2286 | |
2287 | /// Returns how the given (atomic) load should be cast by the IR-level |
2288 | /// AtomicExpand pass. |
2289 | virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const { |
2290 | if (LI->getType()->isFloatingPointTy()) |
2291 | return AtomicExpansionKind::CastToInteger; |
2292 | return AtomicExpansionKind::None; |
2293 | } |
2294 | |
2295 | /// Returns how the given (atomic) store should be expanded by the IR-level |
2296 | /// AtomicExpand pass into. For instance AtomicExpansionKind::Expand will try |
2297 | /// to use an atomicrmw xchg. |
2298 | virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const { |
2299 | return AtomicExpansionKind::None; |
2300 | } |
2301 | |
2302 | /// Returns how the given (atomic) store should be cast by the IR-level |
2303 | /// AtomicExpand pass into. For instance AtomicExpansionKind::CastToInteger |
2304 | /// will try to cast the operands to integer values. |
2305 | virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const { |
2306 | if (SI->getValueOperand()->getType()->isFloatingPointTy()) |
2307 | return AtomicExpansionKind::CastToInteger; |
2308 | return AtomicExpansionKind::None; |
2309 | } |
2310 | |
2311 | /// Returns how the given atomic cmpxchg should be expanded by the IR-level |
2312 | /// AtomicExpand pass. |
2313 | virtual AtomicExpansionKind |
2314 | shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { |
2315 | return AtomicExpansionKind::None; |
2316 | } |
2317 | |
2318 | /// Returns how the IR-level AtomicExpand pass should expand the given |
2319 | /// AtomicRMW, if at all. Default is to never expand. |
2320 | virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { |
2321 | return RMW->isFloatingPointOperation() ? |
2322 | AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; |
2323 | } |
2324 | |
2325 | /// Returns how the given atomic atomicrmw should be cast by the IR-level |
2326 | /// AtomicExpand pass. |
2327 | virtual AtomicExpansionKind |
2328 | shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const { |
2329 | if (RMWI->getOperation() == AtomicRMWInst::Xchg && |
2330 | (RMWI->getValOperand()->getType()->isFloatingPointTy() || |
2331 | RMWI->getValOperand()->getType()->isPointerTy())) |
2332 | return AtomicExpansionKind::CastToInteger; |
2333 | |
2334 | return AtomicExpansionKind::None; |
2335 | } |
2336 | |
2337 | /// On some platforms, an AtomicRMW that never actually modifies the value |
2338 | /// (such as fetch_add of 0) can be turned into a fence followed by an |
2339 | /// atomic load. This may sound useless, but it makes it possible for the |
2340 | /// processor to keep the cacheline shared, dramatically improving |
2341 | /// performance. And such idempotent RMWs are useful for implementing some |
2342 | /// kinds of locks, see for example (justification + benchmarks): |
2343 | /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf |
2344 | /// This method tries doing that transformation, returning the atomic load if |
2345 | /// it succeeds, and nullptr otherwise. |
2346 | /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo |
2347 | /// another round of expansion. |
2348 | virtual LoadInst * |
2349 | lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const { |
2350 | return nullptr; |
2351 | } |
2352 | |
2353 | /// Returns how the platform's atomic operations are extended (ZERO_EXTEND, |
2354 | /// SIGN_EXTEND, or ANY_EXTEND). |
2355 | virtual ISD::NodeType getExtendForAtomicOps() const { |
2356 | return ISD::ZERO_EXTEND; |
2357 | } |
2358 | |
2359 | /// Returns how the platform's atomic compare and swap expects its comparison |
2360 | /// value to be extended (ZERO_EXTEND, SIGN_EXTEND, or ANY_EXTEND). This is |
2361 | /// separate from getExtendForAtomicOps, which is concerned with the |
2362 | /// sign-extension of the instruction's output, whereas here we are concerned |
2363 | /// with the sign-extension of the input. For targets with compare-and-swap |
2364 | /// instructions (or sub-word comparisons in their LL/SC loop expansions), |
2365 | /// the input can be ANY_EXTEND, but the output will still have a specific |
2366 | /// extension. |
2367 | virtual ISD::NodeType getExtendForAtomicCmpSwapArg() const { |
2368 | return ISD::ANY_EXTEND; |
2369 | } |
2370 | |
2371 | /// @} |
2372 | |
2373 | /// Returns true if we should normalize |
2374 | /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and |
2375 | /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely |
2376 | /// that it saves us from materializing N0 and N1 in an integer register. |
2377 | /// Targets that are able to perform and/or on flags should return false here. |
2378 | virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, |
2379 | EVT VT) const { |
2380 | // If a target has multiple condition registers, then it likely has logical |
2381 | // operations on those registers. |
2382 | if (hasMultipleConditionRegisters()) |
2383 | return false; |
2384 | // Only do the transform if the value won't be split into multiple |
2385 | // registers. |
2386 | LegalizeTypeAction Action = getTypeAction(Context, VT); |
2387 | return Action != TypeExpandInteger && Action != TypeExpandFloat && |
2388 | Action != TypeSplitVector; |
2389 | } |
2390 | |
2391 | virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; } |
2392 | |
2393 | /// Return true if a select of constants (select Cond, C1, C2) should be |
2394 | /// transformed into simple math ops with the condition value. For example: |
2395 | /// select Cond, C1, C1-1 --> add (zext Cond), C1-1 |
2396 | virtual bool convertSelectOfConstantsToMath(EVT VT) const { |
2397 | return false; |
2398 | } |
2399 | |
2400 | /// Return true if it is profitable to transform an integer |
2401 | /// multiplication-by-constant into simpler operations like shifts and adds. |
2402 | /// This may be true if the target does not directly support the |
2403 | /// multiplication operation for the specified type or the sequence of simpler |
2404 | /// ops is faster than the multiply. |
2405 | virtual bool decomposeMulByConstant(LLVMContext &Context, |
2406 | EVT VT, SDValue C) const { |
2407 | return false; |
2408 | } |
2409 | |
2410 | /// Return true if it may be profitable to transform |
2411 | /// (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). |
2412 | /// This may not be true if c1 and c2 can be represented as immediates but |
2413 | /// c1*c2 cannot, for example. |
2414 | /// The target should check if c1, c2 and c1*c2 can be represented as |
2415 | /// immediates, or have to be materialized into registers. If it is not sure |
2416 | /// about some cases, a default true can be returned to let the DAGCombiner |
2417 | /// decide. |
2418 | /// AddNode is (add x, c1), and ConstNode is c2. |
2419 | virtual bool isMulAddWithConstProfitable(SDValue AddNode, |
2420 | SDValue ConstNode) const { |
2421 | return true; |
2422 | } |
2423 | |
2424 | /// Return true if it is more correct/profitable to use strict FP_TO_INT |
2425 | /// conversion operations - canonicalizing the FP source value instead of |
2426 | /// converting all cases and then selecting based on value. |
2427 | /// This may be true if the target throws exceptions for out of bounds |
2428 | /// conversions or has fast FP CMOV. |
2429 | virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, |
2430 | bool IsSigned) const { |
2431 | return false; |
2432 | } |
2433 | |
2434 | /// Return true if it is beneficial to expand an @llvm.powi.* intrinsic. |
2435 | /// If not optimizing for size, expanding @llvm.powi.* intrinsics is always |
2436 | /// considered beneficial. |
2437 | /// If optimizing for size, expansion is only considered beneficial for upto |
2438 | /// 5 multiplies and a divide (if the exponent is negative). |
2439 | bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const { |
2440 | if (Exponent < 0) |
2441 | Exponent = -Exponent; |
2442 | uint64_t E = static_cast<uint64_t>(Exponent); |
2443 | return !OptForSize || (llvm::popcount(Value: E) + Log2_64(Value: E) < 7); |
2444 | } |
2445 | |
2446 | //===--------------------------------------------------------------------===// |
2447 | // TargetLowering Configuration Methods - These methods should be invoked by |
2448 | // the derived class constructor to configure this object for the target. |
2449 | // |
2450 | protected: |
2451 | /// Specify how the target extends the result of integer and floating point |
2452 | /// boolean values from i1 to a wider type. See getBooleanContents. |
2453 | void setBooleanContents(BooleanContent Ty) { |
2454 | BooleanContents = Ty; |
2455 | BooleanFloatContents = Ty; |
2456 | } |
2457 | |
2458 | /// Specify how the target extends the result of integer and floating point |
2459 | /// boolean values from i1 to a wider type. See getBooleanContents. |
2460 | void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) { |
2461 | BooleanContents = IntTy; |
2462 | BooleanFloatContents = FloatTy; |
2463 | } |
2464 | |
2465 | /// Specify how the target extends the result of a vector boolean value from a |
2466 | /// vector of i1 to a wider type. See getBooleanContents. |
2467 | void setBooleanVectorContents(BooleanContent Ty) { |
2468 | BooleanVectorContents = Ty; |
2469 | } |
2470 | |
2471 | /// Specify the target scheduling preference. |
2472 | void setSchedulingPreference(Sched::Preference Pref) { |
2473 | SchedPreferenceInfo = Pref; |
2474 | } |
2475 | |
2476 | /// Indicate the minimum number of blocks to generate jump tables. |
2477 | void setMinimumJumpTableEntries(unsigned Val); |
2478 | |
2479 | /// Indicate the maximum number of entries in jump tables. |
2480 | /// Set to zero to generate unlimited jump tables. |
2481 | void setMaximumJumpTableSize(unsigned); |
2482 | |
2483 | /// If set to a physical register, this specifies the register that |
2484 | /// llvm.savestack/llvm.restorestack should save and restore. |
2485 | void setStackPointerRegisterToSaveRestore(Register R) { |
2486 | StackPointerRegisterToSaveRestore = R; |
2487 | } |
2488 | |
2489 | /// Tells the code generator that the target has multiple (allocatable) |
2490 | /// condition registers that can be used to store the results of comparisons |
2491 | /// for use by selects and conditional branches. With multiple condition |
2492 | /// registers, the code generator will not aggressively sink comparisons into |
2493 | /// the blocks of their users. |
2494 | void setHasMultipleConditionRegisters(bool hasManyRegs = true) { |
2495 | HasMultipleConditionRegisters = hasManyRegs; |
2496 | } |
2497 | |
2498 | /// Tells the code generator that the target has BitExtract instructions. |
2499 | /// The code generator will aggressively sink "shift"s into the blocks of |
2500 | /// their users if the users will generate "and" instructions which can be |
2501 | /// combined with "shift" to BitExtract instructions. |
2502 | void (bool = true) { |
2503 | HasExtractBitsInsn = hasExtractInsn; |
2504 | } |
2505 | |
2506 | /// Tells the code generator not to expand logic operations on comparison |
2507 | /// predicates into separate sequences that increase the amount of flow |
2508 | /// control. |
2509 | void setJumpIsExpensive(bool isExpensive = true); |
2510 | |
2511 | /// Tells the code generator which bitwidths to bypass. |
2512 | void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) { |
2513 | BypassSlowDivWidths[SlowBitWidth] = FastBitWidth; |
2514 | } |
2515 | |
2516 | /// Add the specified register class as an available regclass for the |
2517 | /// specified value type. This indicates the selector can handle values of |
2518 | /// that class natively. |
2519 | void addRegisterClass(MVT VT, const TargetRegisterClass *RC) { |
2520 | assert((unsigned)VT.SimpleTy < std::size(RegClassForVT)); |
2521 | RegClassForVT[VT.SimpleTy] = RC; |
2522 | } |
2523 | |
2524 | /// Return the largest legal super-reg register class of the register class |
2525 | /// for the specified type and its associated "cost". |
2526 | virtual std::pair<const TargetRegisterClass *, uint8_t> |
2527 | findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const; |
2528 | |
2529 | /// Once all of the register classes are added, this allows us to compute |
2530 | /// derived properties we expose. |
2531 | void computeRegisterProperties(const TargetRegisterInfo *TRI); |
2532 | |
2533 | /// Indicate that the specified operation does not work with the specified |
2534 | /// type and indicate what to do about it. Note that VT may refer to either |
2535 | /// the type of a result or that of an operand of Op. |
2536 | void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) { |
2537 | assert(Op < std::size(OpActions[0]) && "Table isn't big enough!" ); |
2538 | OpActions[(unsigned)VT.SimpleTy][Op] = Action; |
2539 | } |
2540 | void setOperationAction(ArrayRef<unsigned> Ops, MVT VT, |
2541 | LegalizeAction Action) { |
2542 | for (auto Op : Ops) |
2543 | setOperationAction(Op, VT, Action); |
2544 | } |
2545 | void setOperationAction(ArrayRef<unsigned> Ops, ArrayRef<MVT> VTs, |
2546 | LegalizeAction Action) { |
2547 | for (auto VT : VTs) |
2548 | setOperationAction(Ops, VT, Action); |
2549 | } |
2550 | |
2551 | /// Indicate that the specified load with extension does not work with the |
2552 | /// specified type and indicate what to do about it. |
2553 | void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, |
2554 | LegalizeAction Action) { |
2555 | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && |
2556 | MemVT.isValid() && "Table isn't big enough!" ); |
2557 | assert((unsigned)Action < 0x10 && "too many bits for bitfield array" ); |
2558 | unsigned Shift = 4 * ExtType; |
2559 | LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift); |
2560 | LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift; |
2561 | } |
2562 | void setLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, MVT MemVT, |
2563 | LegalizeAction Action) { |
2564 | for (auto ExtType : ExtTypes) |
2565 | setLoadExtAction(ExtType, ValVT, MemVT, Action); |
2566 | } |
2567 | void setLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, |
2568 | ArrayRef<MVT> MemVTs, LegalizeAction Action) { |
2569 | for (auto MemVT : MemVTs) |
2570 | setLoadExtAction(ExtTypes, ValVT, MemVT, Action); |
2571 | } |
2572 | |
2573 | /// Let target indicate that an extending atomic load of the specified type |
2574 | /// is legal. |
2575 | void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, |
2576 | LegalizeAction Action) { |
2577 | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && |
2578 | MemVT.isValid() && "Table isn't big enough!" ); |
2579 | assert((unsigned)Action < 0x10 && "too many bits for bitfield array" ); |
2580 | unsigned Shift = 4 * ExtType; |
2581 | AtomicLoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= |
2582 | ~((uint16_t)0xF << Shift); |
2583 | AtomicLoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= |
2584 | ((uint16_t)Action << Shift); |
2585 | } |
2586 | void setAtomicLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, MVT MemVT, |
2587 | LegalizeAction Action) { |
2588 | for (auto ExtType : ExtTypes) |
2589 | setAtomicLoadExtAction(ExtType, ValVT, MemVT, Action); |
2590 | } |
2591 | void setAtomicLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, |
2592 | ArrayRef<MVT> MemVTs, LegalizeAction Action) { |
2593 | for (auto MemVT : MemVTs) |
2594 | setAtomicLoadExtAction(ExtTypes, ValVT, MemVT, Action); |
2595 | } |
2596 | |
2597 | /// Indicate that the specified truncating store does not work with the |
2598 | /// specified type and indicate what to do about it. |
2599 | void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action) { |
2600 | assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!" ); |
2601 | TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action; |
2602 | } |
2603 | |
2604 | /// Indicate that the specified indexed load does or does not work with the |
2605 | /// specified type and indicate what to do abort it. |
2606 | /// |
2607 | /// NOTE: All indexed mode loads are initialized to Expand in |
2608 | /// TargetLowering.cpp |
2609 | void setIndexedLoadAction(ArrayRef<unsigned> IdxModes, MVT VT, |
2610 | LegalizeAction Action) { |
2611 | for (auto IdxMode : IdxModes) |
2612 | setIndexedModeAction(IdxMode, VT, Shift: IMAB_Load, Action); |
2613 | } |
2614 | |
2615 | void setIndexedLoadAction(ArrayRef<unsigned> IdxModes, ArrayRef<MVT> VTs, |
2616 | LegalizeAction Action) { |
2617 | for (auto VT : VTs) |
2618 | setIndexedLoadAction(IdxModes, VT, Action); |
2619 | } |
2620 | |
2621 | /// Indicate that the specified indexed store does or does not work with the |
2622 | /// specified type and indicate what to do about it. |
2623 | /// |
2624 | /// NOTE: All indexed mode stores are initialized to Expand in |
2625 | /// TargetLowering.cpp |
2626 | void setIndexedStoreAction(ArrayRef<unsigned> IdxModes, MVT VT, |
2627 | LegalizeAction Action) { |
2628 | for (auto IdxMode : IdxModes) |
2629 | setIndexedModeAction(IdxMode, VT, Shift: IMAB_Store, Action); |
2630 | } |
2631 | |
2632 | void setIndexedStoreAction(ArrayRef<unsigned> IdxModes, ArrayRef<MVT> VTs, |
2633 | LegalizeAction Action) { |
2634 | for (auto VT : VTs) |
2635 | setIndexedStoreAction(IdxModes, VT, Action); |
2636 | } |
2637 | |
2638 | /// Indicate that the specified indexed masked load does or does not work with |
2639 | /// the specified type and indicate what to do about it. |
2640 | /// |
2641 | /// NOTE: All indexed mode masked loads are initialized to Expand in |
2642 | /// TargetLowering.cpp |
2643 | void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT, |
2644 | LegalizeAction Action) { |
2645 | setIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedLoad, Action); |
2646 | } |
2647 | |
2648 | /// Indicate that the specified indexed masked store does or does not work |
2649 | /// with the specified type and indicate what to do about it. |
2650 | /// |
2651 | /// NOTE: All indexed mode masked stores are initialized to Expand in |
2652 | /// TargetLowering.cpp |
2653 | void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT, |
2654 | LegalizeAction Action) { |
2655 | setIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedStore, Action); |
2656 | } |
2657 | |
2658 | /// Indicate that the specified condition code is or isn't supported on the |
2659 | /// target and indicate what to do about it. |
2660 | void setCondCodeAction(ArrayRef<ISD::CondCode> CCs, MVT VT, |
2661 | LegalizeAction Action) { |
2662 | for (auto CC : CCs) { |
2663 | assert(VT.isValid() && (unsigned)CC < std::size(CondCodeActions) && |
2664 | "Table isn't big enough!" ); |
2665 | assert((unsigned)Action < 0x10 && "too many bits for bitfield array" ); |
2666 | /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the |
2667 | /// 32-bit value and the upper 29 bits index into the second dimension of |
2668 | /// the array to select what 32-bit value to use. |
2669 | uint32_t Shift = 4 * (VT.SimpleTy & 0x7); |
2670 | CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift); |
2671 | CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift; |
2672 | } |
2673 | } |
2674 | void setCondCodeAction(ArrayRef<ISD::CondCode> CCs, ArrayRef<MVT> VTs, |
2675 | LegalizeAction Action) { |
2676 | for (auto VT : VTs) |
2677 | setCondCodeAction(CCs, VT, Action); |
2678 | } |
2679 | |
2680 | /// If Opc/OrigVT is specified as being promoted, the promotion code defaults |
2681 | /// to trying a larger integer/fp until it can find one that works. If that |
2682 | /// default is insufficient, this method can be used by the target to override |
2683 | /// the default. |
2684 | void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { |
2685 | PromoteToType[std::make_pair(x&: Opc, y&: OrigVT.SimpleTy)] = DestVT.SimpleTy; |
2686 | } |
2687 | |
2688 | /// Convenience method to set an operation to Promote and specify the type |
2689 | /// in a single call. |
2690 | void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { |
2691 | setOperationAction(Op: Opc, VT: OrigVT, Action: Promote); |
2692 | AddPromotedToType(Opc, OrigVT, DestVT); |
2693 | } |
2694 | void setOperationPromotedToType(ArrayRef<unsigned> Ops, MVT OrigVT, |
2695 | MVT DestVT) { |
2696 | for (auto Op : Ops) { |
2697 | setOperationAction(Op, VT: OrigVT, Action: Promote); |
2698 | AddPromotedToType(Opc: Op, OrigVT, DestVT); |
2699 | } |
2700 | } |
2701 | |
2702 | /// Targets should invoke this method for each target independent node that |
2703 | /// they want to provide a custom DAG combiner for by implementing the |
2704 | /// PerformDAGCombine virtual method. |
2705 | void setTargetDAGCombine(ArrayRef<ISD::NodeType> NTs) { |
2706 | for (auto NT : NTs) { |
2707 | assert(unsigned(NT >> 3) < std::size(TargetDAGCombineArray)); |
2708 | TargetDAGCombineArray[NT >> 3] |= 1 << (NT & 7); |
2709 | } |
2710 | } |
2711 | |
2712 | /// Set the target's minimum function alignment. |
2713 | void setMinFunctionAlignment(Align Alignment) { |
2714 | MinFunctionAlignment = Alignment; |
2715 | } |
2716 | |
2717 | /// Set the target's preferred function alignment. This should be set if |
2718 | /// there is a performance benefit to higher-than-minimum alignment |
2719 | void setPrefFunctionAlignment(Align Alignment) { |
2720 | PrefFunctionAlignment = Alignment; |
2721 | } |
2722 | |
2723 | /// Set the target's preferred loop alignment. Default alignment is one, it |
2724 | /// means the target does not care about loop alignment. The target may also |
2725 | /// override getPrefLoopAlignment to provide per-loop values. |
2726 | void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; } |
2727 | void setMaxBytesForAlignment(unsigned MaxBytes) { |
2728 | MaxBytesForAlignment = MaxBytes; |
2729 | } |
2730 | |
2731 | /// Set the minimum stack alignment of an argument. |
2732 | void setMinStackArgumentAlignment(Align Alignment) { |
2733 | MinStackArgumentAlignment = Alignment; |
2734 | } |
2735 | |
2736 | /// Set the maximum atomic operation size supported by the |
2737 | /// backend. Atomic operations greater than this size (as well as |
2738 | /// ones that are not naturally aligned), will be expanded by |
2739 | /// AtomicExpandPass into an __atomic_* library call. |
2740 | void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) { |
2741 | MaxAtomicSizeInBitsSupported = SizeInBits; |
2742 | } |
2743 | |
2744 | /// Set the size in bits of the maximum div/rem the backend supports. |
2745 | /// Larger operations will be expanded by ExpandLargeDivRem. |
2746 | void setMaxDivRemBitWidthSupported(unsigned SizeInBits) { |
2747 | MaxDivRemBitWidthSupported = SizeInBits; |
2748 | } |
2749 | |
2750 | /// Set the size in bits of the maximum fp convert the backend supports. |
2751 | /// Larger operations will be expanded by ExpandLargeFPConvert. |
2752 | void setMaxLargeFPConvertBitWidthSupported(unsigned SizeInBits) { |
2753 | MaxLargeFPConvertBitWidthSupported = SizeInBits; |
2754 | } |
2755 | |
2756 | /// Sets the minimum cmpxchg or ll/sc size supported by the backend. |
2757 | void setMinCmpXchgSizeInBits(unsigned SizeInBits) { |
2758 | MinCmpXchgSizeInBits = SizeInBits; |
2759 | } |
2760 | |
2761 | /// Sets whether unaligned atomic operations are supported. |
2762 | void setSupportsUnalignedAtomics(bool UnalignedSupported) { |
2763 | SupportsUnalignedAtomics = UnalignedSupported; |
2764 | } |
2765 | |
2766 | public: |
2767 | //===--------------------------------------------------------------------===// |
2768 | // Addressing mode description hooks (used by LSR etc). |
2769 | // |
2770 | |
2771 | /// CodeGenPrepare sinks address calculations into the same BB as Load/Store |
2772 | /// instructions reading the address. This allows as much computation as |
2773 | /// possible to be done in the address mode for that operand. This hook lets |
2774 | /// targets also pass back when this should be done on intrinsics which |
2775 | /// load/store. |
2776 | virtual bool getAddrModeArguments(IntrinsicInst * /*I*/, |
2777 | SmallVectorImpl<Value*> &/*Ops*/, |
2778 | Type *&/*AccessTy*/) const { |
2779 | return false; |
2780 | } |
2781 | |
2782 | /// This represents an addressing mode of: |
2783 | /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*vscale |
2784 | /// If BaseGV is null, there is no BaseGV. |
2785 | /// If BaseOffs is zero, there is no base offset. |
2786 | /// If HasBaseReg is false, there is no base register. |
2787 | /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with |
2788 | /// no scale. |
2789 | /// If ScalableOffset is zero, there is no scalable offset. |
2790 | struct AddrMode { |
2791 | GlobalValue *BaseGV = nullptr; |
2792 | int64_t BaseOffs = 0; |
2793 | bool HasBaseReg = false; |
2794 | int64_t Scale = 0; |
2795 | int64_t ScalableOffset = 0; |
2796 | AddrMode() = default; |
2797 | }; |
2798 | |
2799 | /// Return true if the addressing mode represented by AM is legal for this |
2800 | /// target, for a load/store of the specified type. |
2801 | /// |
2802 | /// The type may be VoidTy, in which case only return true if the addressing |
2803 | /// mode is legal for a load/store of any legal type. TODO: Handle |
2804 | /// pre/postinc as well. |
2805 | /// |
2806 | /// If the address space cannot be determined, it will be -1. |
2807 | /// |
2808 | /// TODO: Remove default argument |
2809 | virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, |
2810 | Type *Ty, unsigned AddrSpace, |
2811 | Instruction *I = nullptr) const; |
2812 | |
2813 | /// Returns true if the targets addressing mode can target thread local |
2814 | /// storage (TLS). |
2815 | virtual bool addressingModeSupportsTLS(const GlobalValue &) const { |
2816 | return false; |
2817 | } |
2818 | |
2819 | /// Return the prefered common base offset. |
2820 | virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, |
2821 | int64_t MaxOffset) const { |
2822 | return 0; |
2823 | } |
2824 | |
2825 | /// Return true if the specified immediate is legal icmp immediate, that is |
2826 | /// the target has icmp instructions which can compare a register against the |
2827 | /// immediate without having to materialize the immediate into a register. |
2828 | virtual bool isLegalICmpImmediate(int64_t) const { |
2829 | return true; |
2830 | } |
2831 | |
2832 | /// Return true if the specified immediate is legal add immediate, that is the |
2833 | /// target has add instructions which can add a register with the immediate |
2834 | /// without having to materialize the immediate into a register. |
2835 | virtual bool isLegalAddImmediate(int64_t) const { |
2836 | return true; |
2837 | } |
2838 | |
2839 | /// Return true if adding the specified scalable immediate is legal, that is |
2840 | /// the target has add instructions which can add a register with the |
2841 | /// immediate (multiplied by vscale) without having to materialize the |
2842 | /// immediate into a register. |
2843 | virtual bool isLegalAddScalableImmediate(int64_t) const { return false; } |
2844 | |
2845 | /// Return true if the specified immediate is legal for the value input of a |
2846 | /// store instruction. |
2847 | virtual bool isLegalStoreImmediate(int64_t Value) const { |
2848 | // Default implementation assumes that at least 0 works since it is likely |
2849 | // that a zero register exists or a zero immediate is allowed. |
2850 | return Value == 0; |
2851 | } |
2852 | |
2853 | /// Return true if it's significantly cheaper to shift a vector by a uniform |
2854 | /// scalar than by an amount which will vary across each lane. On x86 before |
2855 | /// AVX2 for example, there is a "psllw" instruction for the former case, but |
2856 | /// no simple instruction for a general "a << b" operation on vectors. |
2857 | /// This should also apply to lowering for vector funnel shifts (rotates). |
2858 | virtual bool isVectorShiftByScalarCheap(Type *Ty) const { |
2859 | return false; |
2860 | } |
2861 | |
2862 | /// Given a shuffle vector SVI representing a vector splat, return a new |
2863 | /// scalar type of size equal to SVI's scalar type if the new type is more |
2864 | /// profitable. Returns nullptr otherwise. For example under MVE float splats |
2865 | /// are converted to integer to prevent the need to move from SPR to GPR |
2866 | /// registers. |
2867 | virtual Type* shouldConvertSplatType(ShuffleVectorInst* SVI) const { |
2868 | return nullptr; |
2869 | } |
2870 | |
2871 | /// Given a set in interconnected phis of type 'From' that are loaded/stored |
2872 | /// or bitcast to type 'To', return true if the set should be converted to |
2873 | /// 'To'. |
2874 | virtual bool shouldConvertPhiType(Type *From, Type *To) const { |
2875 | return (From->isIntegerTy() || From->isFloatingPointTy()) && |
2876 | (To->isIntegerTy() || To->isFloatingPointTy()); |
2877 | } |
2878 | |
2879 | /// Returns true if the opcode is a commutative binary operation. |
2880 | virtual bool isCommutativeBinOp(unsigned Opcode) const { |
2881 | // FIXME: This should get its info from the td file. |
2882 | switch (Opcode) { |
2883 | case ISD::ADD: |
2884 | case ISD::SMIN: |
2885 | case ISD::SMAX: |
2886 | case ISD::UMIN: |
2887 | case ISD::UMAX: |
2888 | case ISD::MUL: |
2889 | case ISD::MULHU: |
2890 | case ISD::MULHS: |
2891 | case ISD::SMUL_LOHI: |
2892 | case ISD::UMUL_LOHI: |
2893 | case ISD::FADD: |
2894 | case ISD::FMUL: |
2895 | case ISD::AND: |
2896 | case ISD::OR: |
2897 | case ISD::XOR: |
2898 | case ISD::SADDO: |
2899 | case ISD::UADDO: |
2900 | case ISD::ADDC: |
2901 | case ISD::ADDE: |
2902 | case ISD::SADDSAT: |
2903 | case ISD::UADDSAT: |
2904 | case ISD::FMINNUM: |
2905 | case ISD::FMAXNUM: |
2906 | case ISD::FMINNUM_IEEE: |
2907 | case ISD::FMAXNUM_IEEE: |
2908 | case ISD::FMINIMUM: |
2909 | case ISD::FMAXIMUM: |
2910 | case ISD::AVGFLOORS: |
2911 | case ISD::AVGFLOORU: |
2912 | case ISD::AVGCEILS: |
2913 | case ISD::AVGCEILU: |
2914 | case ISD::ABDS: |
2915 | case ISD::ABDU: |
2916 | return true; |
2917 | default: return false; |
2918 | } |
2919 | } |
2920 | |
2921 | /// Return true if the node is a math/logic binary operator. |
2922 | virtual bool isBinOp(unsigned Opcode) const { |
2923 | // A commutative binop must be a binop. |
2924 | if (isCommutativeBinOp(Opcode)) |
2925 | return true; |
2926 | // These are non-commutative binops. |
2927 | switch (Opcode) { |
2928 | case ISD::SUB: |
2929 | case ISD::SHL: |
2930 | case ISD::SRL: |
2931 | case ISD::SRA: |
2932 | case ISD::ROTL: |
2933 | case ISD::ROTR: |
2934 | case ISD::SDIV: |
2935 | case ISD::UDIV: |
2936 | case ISD::SREM: |
2937 | case ISD::UREM: |
2938 | case ISD::SSUBSAT: |
2939 | case ISD::USUBSAT: |
2940 | case ISD::FSUB: |
2941 | case ISD::FDIV: |
2942 | case ISD::FREM: |
2943 | return true; |
2944 | default: |
2945 | return false; |
2946 | } |
2947 | } |
2948 | |
2949 | /// Return true if it's free to truncate a value of type FromTy to type |
2950 | /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 |
2951 | /// by referencing its sub-register AX. |
2952 | /// Targets must return false when FromTy <= ToTy. |
2953 | virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const { |
2954 | return false; |
2955 | } |
2956 | |
2957 | /// Return true if a truncation from FromTy to ToTy is permitted when deciding |
2958 | /// whether a call is in tail position. Typically this means that both results |
2959 | /// would be assigned to the same register or stack slot, but it could mean |
2960 | /// the target performs adequate checks of its own before proceeding with the |
2961 | /// tail call. Targets must return false when FromTy <= ToTy. |
2962 | virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const { |
2963 | return false; |
2964 | } |
2965 | |
2966 | virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { return false; } |
2967 | virtual bool isTruncateFree(LLT FromTy, LLT ToTy, const DataLayout &DL, |
2968 | LLVMContext &Ctx) const { |
2969 | return isTruncateFree(FromVT: getApproximateEVTForLLT(Ty: FromTy, DL, Ctx), |
2970 | ToVT: getApproximateEVTForLLT(Ty: ToTy, DL, Ctx)); |
2971 | } |
2972 | |
2973 | /// Return true if truncating the specific node Val to type VT2 is free. |
2974 | virtual bool isTruncateFree(SDValue Val, EVT VT2) const { |
2975 | // Fallback to type matching. |
2976 | return isTruncateFree(FromVT: Val.getValueType(), ToVT: VT2); |
2977 | } |
2978 | |
2979 | virtual bool isProfitableToHoist(Instruction *I) const { return true; } |
2980 | |
2981 | /// Return true if the extension represented by \p I is free. |
2982 | /// Unlikely the is[Z|FP]ExtFree family which is based on types, |
2983 | /// this method can use the context provided by \p I to decide |
2984 | /// whether or not \p I is free. |
2985 | /// This method extends the behavior of the is[Z|FP]ExtFree family. |
2986 | /// In other words, if is[Z|FP]Free returns true, then this method |
2987 | /// returns true as well. The converse is not true. |
2988 | /// The target can perform the adequate checks by overriding isExtFreeImpl. |
2989 | /// \pre \p I must be a sign, zero, or fp extension. |
2990 | bool isExtFree(const Instruction *I) const { |
2991 | switch (I->getOpcode()) { |
2992 | case Instruction::FPExt: |
2993 | if (isFPExtFree(DestVT: EVT::getEVT(Ty: I->getType()), |
2994 | SrcVT: EVT::getEVT(Ty: I->getOperand(i: 0)->getType()))) |
2995 | return true; |
2996 | break; |
2997 | case Instruction::ZExt: |
2998 | if (isZExtFree(FromTy: I->getOperand(i: 0)->getType(), ToTy: I->getType())) |
2999 | return true; |
3000 | break; |
3001 | case Instruction::SExt: |
3002 | break; |
3003 | default: |
3004 | llvm_unreachable("Instruction is not an extension" ); |
3005 | } |
3006 | return isExtFreeImpl(I); |
3007 | } |
3008 | |
3009 | /// Return true if \p Load and \p Ext can form an ExtLoad. |
3010 | /// For example, in AArch64 |
3011 | /// %L = load i8, i8* %ptr |
3012 | /// %E = zext i8 %L to i32 |
3013 | /// can be lowered into one load instruction |
3014 | /// ldrb w0, [x0] |
3015 | bool isExtLoad(const LoadInst *Load, const Instruction *Ext, |
3016 | const DataLayout &DL) const { |
3017 | EVT VT = getValueType(DL, Ty: Ext->getType()); |
3018 | EVT LoadVT = getValueType(DL, Ty: Load->getType()); |
3019 | |
3020 | // If the load has other users and the truncate is not free, the ext |
3021 | // probably isn't free. |
3022 | if (!Load->hasOneUse() && (isTypeLegal(VT: LoadVT) || !isTypeLegal(VT)) && |
3023 | !isTruncateFree(FromTy: Ext->getType(), ToTy: Load->getType())) |
3024 | return false; |
3025 | |
3026 | // Check whether the target supports casts folded into loads. |
3027 | unsigned LType; |
3028 | if (isa<ZExtInst>(Val: Ext)) |
3029 | LType = ISD::ZEXTLOAD; |
3030 | else { |
3031 | assert(isa<SExtInst>(Ext) && "Unexpected ext type!" ); |
3032 | LType = ISD::SEXTLOAD; |
3033 | } |
3034 | |
3035 | return isLoadExtLegal(ExtType: LType, ValVT: VT, MemVT: LoadVT); |
3036 | } |
3037 | |
3038 | /// Return true if any actual instruction that defines a value of type FromTy |
3039 | /// implicitly zero-extends the value to ToTy in the result register. |
3040 | /// |
3041 | /// The function should return true when it is likely that the truncate can |
3042 | /// be freely folded with an instruction defining a value of FromTy. If |
3043 | /// the defining instruction is unknown (because you're looking at a |
3044 | /// function argument, PHI, etc.) then the target may require an |
3045 | /// explicit truncate, which is not necessarily free, but this function |
3046 | /// does not deal with those cases. |
3047 | /// Targets must return false when FromTy >= ToTy. |
3048 | virtual bool isZExtFree(Type *FromTy, Type *ToTy) const { |
3049 | return false; |
3050 | } |
3051 | |
3052 | virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { return false; } |
3053 | virtual bool isZExtFree(LLT FromTy, LLT ToTy, const DataLayout &DL, |
3054 | LLVMContext &Ctx) const { |
3055 | return isZExtFree(FromTy: getApproximateEVTForLLT(Ty: FromTy, DL, Ctx), |
3056 | ToTy: getApproximateEVTForLLT(Ty: ToTy, DL, Ctx)); |
3057 | } |
3058 | |
3059 | /// Return true if zero-extending the specific node Val to type VT2 is free |
3060 | /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or |
3061 | /// because it's folded such as X86 zero-extending loads). |
3062 | virtual bool isZExtFree(SDValue Val, EVT VT2) const { |
3063 | return isZExtFree(FromTy: Val.getValueType(), ToTy: VT2); |
3064 | } |
3065 | |
3066 | /// Return true if sign-extension from FromTy to ToTy is cheaper than |
3067 | /// zero-extension. |
3068 | virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const { |
3069 | return false; |
3070 | } |
3071 | |
3072 | /// Return true if this constant should be sign extended when promoting to |
3073 | /// a larger type. |
3074 | virtual bool signExtendConstant(const ConstantInt *C) const { return false; } |
3075 | |
3076 | /// Return true if sinking I's operands to the same basic block as I is |
3077 | /// profitable, e.g. because the operands can be folded into a target |
3078 | /// instruction during instruction selection. After calling the function |
3079 | /// \p Ops contains the Uses to sink ordered by dominance (dominating users |
3080 | /// come first). |
3081 | virtual bool shouldSinkOperands(Instruction *I, |
3082 | SmallVectorImpl<Use *> &Ops) const { |
3083 | return false; |
3084 | } |
3085 | |
3086 | /// Try to optimize extending or truncating conversion instructions (like |
3087 | /// zext, trunc, fptoui, uitofp) for the target. |
3088 | virtual bool |
3089 | optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, |
3090 | const TargetTransformInfo &TTI) const { |
3091 | return false; |
3092 | } |
3093 | |
3094 | /// Return true if the target supplies and combines to a paired load |
3095 | /// two loaded values of type LoadedType next to each other in memory. |
3096 | /// RequiredAlignment gives the minimal alignment constraints that must be met |
3097 | /// to be able to select this paired load. |
3098 | /// |
3099 | /// This information is *not* used to generate actual paired loads, but it is |
3100 | /// used to generate a sequence of loads that is easier to combine into a |
3101 | /// paired load. |
3102 | /// For instance, something like this: |
3103 | /// a = load i64* addr |
3104 | /// b = trunc i64 a to i32 |
3105 | /// c = lshr i64 a, 32 |
3106 | /// d = trunc i64 c to i32 |
3107 | /// will be optimized into: |
3108 | /// b = load i32* addr1 |
3109 | /// d = load i32* addr2 |
3110 | /// Where addr1 = addr2 +/- sizeof(i32). |
3111 | /// |
3112 | /// In other words, unless the target performs a post-isel load combining, |
3113 | /// this information should not be provided because it will generate more |
3114 | /// loads. |
3115 | virtual bool hasPairedLoad(EVT /*LoadedType*/, |
3116 | Align & /*RequiredAlignment*/) const { |
3117 | return false; |
3118 | } |
3119 | |
3120 | /// Return true if the target has a vector blend instruction. |
3121 | virtual bool hasVectorBlend() const { return false; } |
3122 | |
3123 | /// Get the maximum supported factor for interleaved memory accesses. |
3124 | /// Default to be the minimum interleave factor: 2. |
3125 | virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; } |
3126 | |
3127 | /// Lower an interleaved load to target specific intrinsics. Return |
3128 | /// true on success. |
3129 | /// |
3130 | /// \p LI is the vector load instruction. |
3131 | /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector. |
3132 | /// \p Indices is the corresponding indices for each shufflevector. |
3133 | /// \p Factor is the interleave factor. |
3134 | virtual bool lowerInterleavedLoad(LoadInst *LI, |
3135 | ArrayRef<ShuffleVectorInst *> Shuffles, |
3136 | ArrayRef<unsigned> Indices, |
3137 | unsigned Factor) const { |
3138 | return false; |
3139 | } |
3140 | |
3141 | /// Lower an interleaved store to target specific intrinsics. Return |
3142 | /// true on success. |
3143 | /// |
3144 | /// \p SI is the vector store instruction. |
3145 | /// \p SVI is the shufflevector to RE-interleave the stored vector. |
3146 | /// \p Factor is the interleave factor. |
3147 | virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, |
3148 | unsigned Factor) const { |
3149 | return false; |
3150 | } |
3151 | |
3152 | /// Lower a deinterleave intrinsic to a target specific load intrinsic. |
3153 | /// Return true on success. Currently only supports |
3154 | /// llvm.vector.deinterleave2 |
3155 | /// |
3156 | /// \p DI is the deinterleave intrinsic. |
3157 | /// \p LI is the accompanying load instruction |
3158 | virtual bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI, |
3159 | LoadInst *LI) const { |
3160 | return false; |
3161 | } |
3162 | |
3163 | /// Lower an interleave intrinsic to a target specific store intrinsic. |
3164 | /// Return true on success. Currently only supports |
3165 | /// llvm.vector.interleave2 |
3166 | /// |
3167 | /// \p II is the interleave intrinsic. |
3168 | /// \p SI is the accompanying store instruction |
3169 | virtual bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, |
3170 | StoreInst *SI) const { |
3171 | return false; |
3172 | } |
3173 | |
3174 | /// Return true if an fpext operation is free (for instance, because |
3175 | /// single-precision floating-point numbers are implicitly extended to |
3176 | /// double-precision). |
3177 | virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const { |
3178 | assert(SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && |
3179 | "invalid fpext types" ); |
3180 | return false; |
3181 | } |
3182 | |
3183 | /// Return true if an fpext operation input to an \p Opcode operation is free |
3184 | /// (for instance, because half-precision floating-point numbers are |
3185 | /// implicitly extended to float-precision) for an FMA instruction. |
3186 | virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, |
3187 | LLT DestTy, LLT SrcTy) const { |
3188 | return false; |
3189 | } |
3190 | |
3191 | /// Return true if an fpext operation input to an \p Opcode operation is free |
3192 | /// (for instance, because half-precision floating-point numbers are |
3193 | /// implicitly extended to float-precision) for an FMA instruction. |
3194 | virtual bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, |
3195 | EVT DestVT, EVT SrcVT) const { |
3196 | assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && |
3197 | "invalid fpext types" ); |
3198 | return isFPExtFree(DestVT, SrcVT); |
3199 | } |
3200 | |
3201 | /// Return true if folding a vector load into ExtVal (a sign, zero, or any |
3202 | /// extend node) is profitable. |
3203 | virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; } |
3204 | |
3205 | /// Return true if an fneg operation is free to the point where it is never |
3206 | /// worthwhile to replace it with a bitwise operation. |
3207 | virtual bool isFNegFree(EVT VT) const { |
3208 | assert(VT.isFloatingPoint()); |
3209 | return false; |
3210 | } |
3211 | |
3212 | /// Return true if an fabs operation is free to the point where it is never |
3213 | /// worthwhile to replace it with a bitwise operation. |
3214 | virtual bool isFAbsFree(EVT VT) const { |
3215 | assert(VT.isFloatingPoint()); |
3216 | return false; |
3217 | } |
3218 | |
3219 | /// Return true if an FMA operation is faster than a pair of fmul and fadd |
3220 | /// instructions. fmuladd intrinsics will be expanded to FMAs when this method |
3221 | /// returns true, otherwise fmuladd is expanded to fmul + fadd. |
3222 | /// |
3223 | /// NOTE: This may be called before legalization on types for which FMAs are |
3224 | /// not legal, but should return true if those types will eventually legalize |
3225 | /// to types that support FMAs. After legalization, it will only be called on |
3226 | /// types that support FMAs (via Legal or Custom actions) |
3227 | virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
3228 | EVT) const { |
3229 | return false; |
3230 | } |
3231 | |
3232 | /// Return true if an FMA operation is faster than a pair of fmul and fadd |
3233 | /// instructions. fmuladd intrinsics will be expanded to FMAs when this method |
3234 | /// returns true, otherwise fmuladd is expanded to fmul + fadd. |
3235 | /// |
3236 | /// NOTE: This may be called before legalization on types for which FMAs are |
3237 | /// not legal, but should return true if those types will eventually legalize |
3238 | /// to types that support FMAs. After legalization, it will only be called on |
3239 | /// types that support FMAs (via Legal or Custom actions) |
3240 | virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
3241 | LLT) const { |
3242 | return false; |
3243 | } |
3244 | |
3245 | /// IR version |
3246 | virtual bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *) const { |
3247 | return false; |
3248 | } |
3249 | |
3250 | /// Returns true if \p MI can be combined with another instruction to |
3251 | /// form TargetOpcode::G_FMAD. \p N may be an TargetOpcode::G_FADD, |
3252 | /// TargetOpcode::G_FSUB, or an TargetOpcode::G_FMUL which will be |
3253 | /// distributed into an fadd/fsub. |
3254 | virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const { |
3255 | assert((MI.getOpcode() == TargetOpcode::G_FADD || |
3256 | MI.getOpcode() == TargetOpcode::G_FSUB || |
3257 | MI.getOpcode() == TargetOpcode::G_FMUL) && |
3258 | "unexpected node in FMAD forming combine" ); |
3259 | switch (Ty.getScalarSizeInBits()) { |
3260 | case 16: |
3261 | return isOperationLegal(Op: TargetOpcode::G_FMAD, VT: MVT::f16); |
3262 | case 32: |
3263 | return isOperationLegal(Op: TargetOpcode::G_FMAD, VT: MVT::f32); |
3264 | case 64: |
3265 | return isOperationLegal(Op: TargetOpcode::G_FMAD, VT: MVT::f64); |
3266 | default: |
3267 | break; |
3268 | } |
3269 | |
3270 | return false; |
3271 | } |
3272 | |
3273 | /// Returns true if be combined with to form an ISD::FMAD. \p N may be an |
3274 | /// ISD::FADD, ISD::FSUB, or an ISD::FMUL which will be distributed into an |
3275 | /// fadd/fsub. |
3276 | virtual bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const { |
3277 | assert((N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB || |
3278 | N->getOpcode() == ISD::FMUL) && |
3279 | "unexpected node in FMAD forming combine" ); |
3280 | return isOperationLegal(Op: ISD::FMAD, VT: N->getValueType(ResNo: 0)); |
3281 | } |
3282 | |
3283 | // Return true when the decision to generate FMA's (or FMS, FMLA etc) rather |
3284 | // than FMUL and ADD is delegated to the machine combiner. |
3285 | virtual bool generateFMAsInMachineCombiner(EVT VT, |
3286 | CodeGenOptLevel OptLevel) const { |
3287 | return false; |
3288 | } |
3289 | |
3290 | /// Return true if it's profitable to narrow operations of type SrcVT to |
3291 | /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not from |
3292 | /// i32 to i16. |
3293 | virtual bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const { |
3294 | return false; |
3295 | } |
3296 | |
3297 | /// Return true if pulling a binary operation into a select with an identity |
3298 | /// constant is profitable. This is the inverse of an IR transform. |
3299 | /// Example: X + (Cond ? Y : 0) --> Cond ? (X + Y) : X |
3300 | virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, |
3301 | EVT VT) const { |
3302 | return false; |
3303 | } |
3304 | |
3305 | /// Return true if it is beneficial to convert a load of a constant to |
3306 | /// just the constant itself. |
3307 | /// On some targets it might be more efficient to use a combination of |
3308 | /// arithmetic instructions to materialize the constant instead of loading it |
3309 | /// from a constant pool. |
3310 | virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm, |
3311 | Type *Ty) const { |
3312 | return false; |
3313 | } |
3314 | |
3315 | /// Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type |
3316 | /// from this source type with this index. This is needed because |
3317 | /// EXTRACT_SUBVECTOR usually has custom lowering that depends on the index of |
3318 | /// the first element, and only the target knows which lowering is cheap. |
3319 | virtual bool (EVT ResVT, EVT SrcVT, |
3320 | unsigned Index) const { |
3321 | return false; |
3322 | } |
3323 | |
3324 | /// Try to convert an extract element of a vector binary operation into an |
3325 | /// extract element followed by a scalar operation. |
3326 | virtual bool shouldScalarizeBinop(SDValue VecOp) const { |
3327 | return false; |
3328 | } |
3329 | |
3330 | /// Return true if extraction of a scalar element from the given vector type |
3331 | /// at the given index is cheap. For example, if scalar operations occur on |
3332 | /// the same register file as vector operations, then an extract element may |
3333 | /// be a sub-register rename rather than an actual instruction. |
3334 | virtual bool (EVT VT, unsigned Index) const { |
3335 | return false; |
3336 | } |
3337 | |
3338 | /// Try to convert math with an overflow comparison into the corresponding DAG |
3339 | /// node operation. Targets may want to override this independently of whether |
3340 | /// the operation is legal/custom for the given type because it may obscure |
3341 | /// matching of other patterns. |
3342 | virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, |
3343 | bool MathUsed) const { |
3344 | // TODO: The default logic is inherited from code in CodeGenPrepare. |
3345 | // The opcode should not make a difference by default? |
3346 | if (Opcode != ISD::UADDO) |
3347 | return false; |
3348 | |
3349 | // Allow the transform as long as we have an integer type that is not |
3350 | // obviously illegal and unsupported and if the math result is used |
3351 | // besides the overflow check. On some targets (e.g. SPARC), it is |
3352 | // not profitable to form on overflow op if the math result has no |
3353 | // concrete users. |
3354 | if (VT.isVector()) |
3355 | return false; |
3356 | return MathUsed && (VT.isSimple() || !isOperationExpand(Op: Opcode, VT)); |
3357 | } |
3358 | |
3359 | // Return true if it is profitable to use a scalar input to a BUILD_VECTOR |
3360 | // even if the vector itself has multiple uses. |
3361 | virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const { |
3362 | return false; |
3363 | } |
3364 | |
3365 | // Return true if CodeGenPrepare should consider splitting large offset of a |
3366 | // GEP to make the GEP fit into the addressing mode and can be sunk into the |
3367 | // same blocks of its users. |
3368 | virtual bool shouldConsiderGEPOffsetSplit() const { return false; } |
3369 | |
3370 | /// Return true if creating a shift of the type by the given |
3371 | /// amount is not profitable. |
3372 | virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const { |
3373 | return false; |
3374 | } |
3375 | |
3376 | // Should we fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) |
3377 | // A) where y has a single bit set? |
3378 | virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, |
3379 | const APInt &AndMask) const { |
3380 | unsigned ShCt = AndMask.getBitWidth() - 1; |
3381 | return !shouldAvoidTransformToShift(VT, Amount: ShCt); |
3382 | } |
3383 | |
3384 | /// Does this target require the clearing of high-order bits in a register |
3385 | /// passed to the fp16 to fp conversion library function. |
3386 | virtual bool shouldKeepZExtForFP16Conv() const { return false; } |
3387 | |
3388 | /// Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT |
3389 | /// from min(max(fptoi)) saturation patterns. |
3390 | virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const { |
3391 | return isOperationLegalOrCustom(Op, VT); |
3392 | } |
3393 | |
3394 | /// Should we expand [US]CMP nodes using two selects and two compares, or by |
3395 | /// doing arithmetic on boolean types |
3396 | virtual bool shouldExpandCmpUsingSelects() const { return false; } |
3397 | |
3398 | /// Does this target support complex deinterleaving |
3399 | virtual bool isComplexDeinterleavingSupported() const { return false; } |
3400 | |
3401 | /// Does this target support complex deinterleaving with the given operation |
3402 | /// and type |
3403 | virtual bool isComplexDeinterleavingOperationSupported( |
3404 | ComplexDeinterleavingOperation Operation, Type *Ty) const { |
3405 | return false; |
3406 | } |
3407 | |
3408 | /// Create the IR node for the given complex deinterleaving operation. |
3409 | /// If one cannot be created using all the given inputs, nullptr should be |
3410 | /// returned. |
3411 | virtual Value *createComplexDeinterleavingIR( |
3412 | IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, |
3413 | ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, |
3414 | Value *Accumulator = nullptr) const { |
3415 | return nullptr; |
3416 | } |
3417 | |
3418 | /// Rename the default libcall routine name for the specified libcall. |
3419 | void setLibcallName(RTLIB::Libcall Call, const char *Name) { |
3420 | Libcalls.setLibcallName(Call, Name); |
3421 | } |
3422 | |
3423 | void setLibcallName(ArrayRef<RTLIB::Libcall> Calls, const char *Name) { |
3424 | Libcalls.setLibcallName(Calls, Name); |
3425 | } |
3426 | |
3427 | /// Get the libcall routine name for the specified libcall. |
3428 | const char *getLibcallName(RTLIB::Libcall Call) const { |
3429 | return Libcalls.getLibcallName(Call); |
3430 | } |
3431 | |
3432 | /// Override the default CondCode to be used to test the result of the |
3433 | /// comparison libcall against zero. |
3434 | /// FIXME: This can't be merged with 'RuntimeLibcallsInfo' because of the ISD. |
3435 | void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) { |
3436 | CmpLibcallCCs[Call] = CC; |
3437 | } |
3438 | |
3439 | |
3440 | /// Get the CondCode that's to be used to test the result of the comparison |
3441 | /// libcall against zero. |
3442 | /// FIXME: This can't be merged with 'RuntimeLibcallsInfo' because of the ISD. |
3443 | ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const { |
3444 | return CmpLibcallCCs[Call]; |
3445 | } |
3446 | |
3447 | |
3448 | /// Set the CallingConv that should be used for the specified libcall. |
3449 | void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) { |
3450 | Libcalls.setLibcallCallingConv(Call, CC); |
3451 | } |
3452 | |
3453 | /// Get the CallingConv that should be used for the specified libcall. |
3454 | CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const { |
3455 | return Libcalls.getLibcallCallingConv(Call); |
3456 | } |
3457 | |
3458 | /// Execute target specific actions to finalize target lowering. |
3459 | /// This is used to set extra flags in MachineFrameInformation and freezing |
3460 | /// the set of reserved registers. |
3461 | /// The default implementation just freezes the set of reserved registers. |
3462 | virtual void finalizeLowering(MachineFunction &MF) const; |
3463 | |
3464 | //===----------------------------------------------------------------------===// |
3465 | // GlobalISel Hooks |
3466 | //===----------------------------------------------------------------------===// |
3467 | /// Check whether or not \p MI needs to be moved close to its uses. |
3468 | virtual bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const; |
3469 | |
3470 | |
3471 | private: |
3472 | const TargetMachine &TM; |
3473 | |
3474 | /// Tells the code generator that the target has multiple (allocatable) |
3475 | /// condition registers that can be used to store the results of comparisons |
3476 | /// for use by selects and conditional branches. With multiple condition |
3477 | /// registers, the code generator will not aggressively sink comparisons into |
3478 | /// the blocks of their users. |
3479 | bool HasMultipleConditionRegisters; |
3480 | |
3481 | /// Tells the code generator that the target has BitExtract instructions. |
3482 | /// The code generator will aggressively sink "shift"s into the blocks of |
3483 | /// their users if the users will generate "and" instructions which can be |
3484 | /// combined with "shift" to BitExtract instructions. |
3485 | bool ; |
3486 | |
3487 | /// Tells the code generator to bypass slow divide or remainder |
3488 | /// instructions. For example, BypassSlowDivWidths[32,8] tells the code |
3489 | /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer |
3490 | /// div/rem when the operands are positive and less than 256. |
3491 | DenseMap <unsigned int, unsigned int> BypassSlowDivWidths; |
3492 | |
3493 | /// Tells the code generator that it shouldn't generate extra flow control |
3494 | /// instructions and should attempt to combine flow control instructions via |
3495 | /// predication. |
3496 | bool JumpIsExpensive; |
3497 | |
3498 | /// Information about the contents of the high-bits in boolean values held in |
3499 | /// a type wider than i1. See getBooleanContents. |
3500 | BooleanContent BooleanContents; |
3501 | |
3502 | /// Information about the contents of the high-bits in boolean values held in |
3503 | /// a type wider than i1. See getBooleanContents. |
3504 | BooleanContent BooleanFloatContents; |
3505 | |
3506 | /// Information about the contents of the high-bits in boolean vector values |
3507 | /// when the element type is wider than i1. See getBooleanContents. |
3508 | BooleanContent BooleanVectorContents; |
3509 | |
3510 | /// The target scheduling preference: shortest possible total cycles or lowest |
3511 | /// register usage. |
3512 | Sched::Preference SchedPreferenceInfo; |
3513 | |
3514 | /// The minimum alignment that any argument on the stack needs to have. |
3515 | Align MinStackArgumentAlignment; |
3516 | |
3517 | /// The minimum function alignment (used when optimizing for size, and to |
3518 | /// prevent explicitly provided alignment from leading to incorrect code). |
3519 | Align MinFunctionAlignment; |
3520 | |
3521 | /// The preferred function alignment (used when alignment unspecified and |
3522 | /// optimizing for speed). |
3523 | Align PrefFunctionAlignment; |
3524 | |
3525 | /// The preferred loop alignment (in log2 bot in bytes). |
3526 | Align PrefLoopAlignment; |
3527 | /// The maximum amount of bytes permitted to be emitted for alignment. |
3528 | unsigned MaxBytesForAlignment; |
3529 | |
3530 | /// Size in bits of the maximum atomics size the backend supports. |
3531 | /// Accesses larger than this will be expanded by AtomicExpandPass. |
3532 | unsigned MaxAtomicSizeInBitsSupported; |
3533 | |
3534 | /// Size in bits of the maximum div/rem size the backend supports. |
3535 | /// Larger operations will be expanded by ExpandLargeDivRem. |
3536 | unsigned MaxDivRemBitWidthSupported; |
3537 | |
3538 | /// Size in bits of the maximum larget fp convert size the backend |
3539 | /// supports. Larger operations will be expanded by ExpandLargeFPConvert. |
3540 | unsigned MaxLargeFPConvertBitWidthSupported; |
3541 | |
3542 | /// Size in bits of the minimum cmpxchg or ll/sc operation the |
3543 | /// backend supports. |
3544 | unsigned MinCmpXchgSizeInBits; |
3545 | |
3546 | /// This indicates if the target supports unaligned atomic operations. |
3547 | bool SupportsUnalignedAtomics; |
3548 | |
3549 | /// If set to a physical register, this specifies the register that |
3550 | /// llvm.savestack/llvm.restorestack should save and restore. |
3551 | Register StackPointerRegisterToSaveRestore; |
3552 | |
3553 | /// This indicates the default register class to use for each ValueType the |
3554 | /// target supports natively. |
3555 | const TargetRegisterClass *RegClassForVT[MVT::VALUETYPE_SIZE]; |
3556 | uint16_t NumRegistersForVT[MVT::VALUETYPE_SIZE]; |
3557 | MVT RegisterTypeForVT[MVT::VALUETYPE_SIZE]; |
3558 | |
3559 | /// This indicates the "representative" register class to use for each |
3560 | /// ValueType the target supports natively. This information is used by the |
3561 | /// scheduler to track register pressure. By default, the representative |
3562 | /// register class is the largest legal super-reg register class of the |
3563 | /// register class of the specified type. e.g. On x86, i8, i16, and i32's |
3564 | /// representative class would be GR32. |
3565 | const TargetRegisterClass *RepRegClassForVT[MVT::VALUETYPE_SIZE] = {0}; |
3566 | |
3567 | /// This indicates the "cost" of the "representative" register class for each |
3568 | /// ValueType. The cost is used by the scheduler to approximate register |
3569 | /// pressure. |
3570 | uint8_t RepRegClassCostForVT[MVT::VALUETYPE_SIZE]; |
3571 | |
3572 | /// For any value types we are promoting or expanding, this contains the value |
3573 | /// type that we are changing to. For Expanded types, this contains one step |
3574 | /// of the expand (e.g. i64 -> i32), even if there are multiple steps required |
3575 | /// (e.g. i64 -> i16). For types natively supported by the system, this holds |
3576 | /// the same type (e.g. i32 -> i32). |
3577 | MVT TransformToType[MVT::VALUETYPE_SIZE]; |
3578 | |
3579 | /// For each operation and each value type, keep a LegalizeAction that |
3580 | /// indicates how instruction selection should deal with the operation. Most |
3581 | /// operations are Legal (aka, supported natively by the target), but |
3582 | /// operations that are not should be described. Note that operations on |
3583 | /// non-legal value types are not described here. |
3584 | LegalizeAction OpActions[MVT::VALUETYPE_SIZE][ISD::BUILTIN_OP_END]; |
3585 | |
3586 | /// For each load extension type and each value type, keep a LegalizeAction |
3587 | /// that indicates how instruction selection should deal with a load of a |
3588 | /// specific value type and extension type. Uses 4-bits to store the action |
3589 | /// for each of the 4 load ext types. |
3590 | uint16_t LoadExtActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE]; |
3591 | |
3592 | /// Similar to LoadExtActions, but for atomic loads. Only Legal or Expand |
3593 | /// (default) values are supported. |
3594 | uint16_t AtomicLoadExtActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE]; |
3595 | |
3596 | /// For each value type pair keep a LegalizeAction that indicates whether a |
3597 | /// truncating store of a specific value type and truncating type is legal. |
3598 | LegalizeAction TruncStoreActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE]; |
3599 | |
3600 | /// For each indexed mode and each value type, keep a quad of LegalizeAction |
3601 | /// that indicates how instruction selection should deal with the load / |
3602 | /// store / maskedload / maskedstore. |
3603 | /// |
3604 | /// The first dimension is the value_type for the reference. The second |
3605 | /// dimension represents the various modes for load store. |
3606 | uint16_t IndexedModeActions[MVT::VALUETYPE_SIZE][ISD::LAST_INDEXED_MODE]; |
3607 | |
3608 | /// For each condition code (ISD::CondCode) keep a LegalizeAction that |
3609 | /// indicates how instruction selection should deal with the condition code. |
3610 | /// |
3611 | /// Because each CC action takes up 4 bits, we need to have the array size be |
3612 | /// large enough to fit all of the value types. This can be done by rounding |
3613 | /// up the MVT::VALUETYPE_SIZE value to the next multiple of 8. |
3614 | uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::VALUETYPE_SIZE + 7) / 8]; |
3615 | |
3616 | ValueTypeActionImpl ValueTypeActions; |
3617 | |
3618 | private: |
3619 | /// Targets can specify ISD nodes that they would like PerformDAGCombine |
3620 | /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this |
3621 | /// array. |
3622 | unsigned char |
3623 | TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT-1)/CHAR_BIT]; |
3624 | |
3625 | /// For operations that must be promoted to a specific type, this holds the |
3626 | /// destination type. This map should be sparse, so don't hold it as an |
3627 | /// array. |
3628 | /// |
3629 | /// Targets add entries to this map with AddPromotedToType(..), clients access |
3630 | /// this with getTypeToPromoteTo(..). |
3631 | std::map<std::pair<unsigned, MVT::SimpleValueType>, MVT::SimpleValueType> |
3632 | PromoteToType; |
3633 | |
3634 | /// The list of libcalls that the target will use. |
3635 | RTLIB::RuntimeLibcallsInfo Libcalls; |
3636 | |
3637 | /// The ISD::CondCode that should be used to test the result of each of the |
3638 | /// comparison libcall against zero. |
3639 | ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL]; |
3640 | |
3641 | /// The bits of IndexedModeActions used to store the legalisation actions |
3642 | /// We store the data as | ML | MS | L | S | each taking 4 bits. |
3643 | enum IndexedModeActionsBits { |
3644 | IMAB_Store = 0, |
3645 | IMAB_Load = 4, |
3646 | IMAB_MaskedStore = 8, |
3647 | IMAB_MaskedLoad = 12 |
3648 | }; |
3649 | |
3650 | void setIndexedModeAction(unsigned IdxMode, MVT VT, unsigned Shift, |
3651 | LegalizeAction Action) { |
3652 | assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && |
3653 | (unsigned)Action < 0xf && "Table isn't big enough!" ); |
3654 | unsigned Ty = (unsigned)VT.SimpleTy; |
3655 | IndexedModeActions[Ty][IdxMode] &= ~(0xf << Shift); |
3656 | IndexedModeActions[Ty][IdxMode] |= ((uint16_t)Action) << Shift; |
3657 | } |
3658 | |
3659 | LegalizeAction getIndexedModeAction(unsigned IdxMode, MVT VT, |
3660 | unsigned Shift) const { |
3661 | assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && |
3662 | "Table isn't big enough!" ); |
3663 | unsigned Ty = (unsigned)VT.SimpleTy; |
3664 | return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] >> Shift) & 0xf); |
3665 | } |
3666 | |
3667 | protected: |
3668 | /// Return true if the extension represented by \p I is free. |
3669 | /// \pre \p I is a sign, zero, or fp extension and |
3670 | /// is[Z|FP]ExtFree of the related types is not true. |
3671 | virtual bool isExtFreeImpl(const Instruction *I) const { return false; } |
3672 | |
3673 | /// Depth that GatherAllAliases should continue looking for chain |
3674 | /// dependencies when trying to find a more preferable chain. As an |
3675 | /// approximation, this should be more than the number of consecutive stores |
3676 | /// expected to be merged. |
3677 | unsigned GatherAllAliasesMaxDepth; |
3678 | |
3679 | /// \brief Specify maximum number of store instructions per memset call. |
3680 | /// |
3681 | /// When lowering \@llvm.memset this field specifies the maximum number of |
3682 | /// store operations that may be substituted for the call to memset. Targets |
3683 | /// must set this value based on the cost threshold for that target. Targets |
3684 | /// should assume that the memset will be done using as many of the largest |
3685 | /// store operations first, followed by smaller ones, if necessary, per |
3686 | /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine |
3687 | /// with 16-bit alignment would result in four 2-byte stores and one 1-byte |
3688 | /// store. This only applies to setting a constant array of a constant size. |
3689 | unsigned MaxStoresPerMemset; |
3690 | /// Likewise for functions with the OptSize attribute. |
3691 | unsigned MaxStoresPerMemsetOptSize; |
3692 | |
3693 | /// \brief Specify maximum number of store instructions per memcpy call. |
3694 | /// |
3695 | /// When lowering \@llvm.memcpy this field specifies the maximum number of |
3696 | /// store operations that may be substituted for a call to memcpy. Targets |
3697 | /// must set this value based on the cost threshold for that target. Targets |
3698 | /// should assume that the memcpy will be done using as many of the largest |
3699 | /// store operations first, followed by smaller ones, if necessary, per |
3700 | /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine |
3701 | /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store |
3702 | /// and one 1-byte store. This only applies to copying a constant array of |
3703 | /// constant size. |
3704 | unsigned MaxStoresPerMemcpy; |
3705 | /// Likewise for functions with the OptSize attribute. |
3706 | unsigned MaxStoresPerMemcpyOptSize; |
3707 | /// \brief Specify max number of store instructions to glue in inlined memcpy. |
3708 | /// |
3709 | /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number |
3710 | /// of store instructions to keep together. This helps in pairing and |
3711 | // vectorization later on. |
3712 | unsigned MaxGluedStoresPerMemcpy = 0; |
3713 | |
3714 | /// \brief Specify maximum number of load instructions per memcmp call. |
3715 | /// |
3716 | /// When lowering \@llvm.memcmp this field specifies the maximum number of |
3717 | /// pairs of load operations that may be substituted for a call to memcmp. |
3718 | /// Targets must set this value based on the cost threshold for that target. |
3719 | /// Targets should assume that the memcmp will be done using as many of the |
3720 | /// largest load operations first, followed by smaller ones, if necessary, per |
3721 | /// alignment restrictions. For example, loading 7 bytes on a 32-bit machine |
3722 | /// with 32-bit alignment would result in one 4-byte load, a one 2-byte load |
3723 | /// and one 1-byte load. This only applies to copying a constant array of |
3724 | /// constant size. |
3725 | unsigned MaxLoadsPerMemcmp; |
3726 | /// Likewise for functions with the OptSize attribute. |
3727 | unsigned MaxLoadsPerMemcmpOptSize; |
3728 | |
3729 | /// \brief Specify maximum number of store instructions per memmove call. |
3730 | /// |
3731 | /// When lowering \@llvm.memmove this field specifies the maximum number of |
3732 | /// store instructions that may be substituted for a call to memmove. Targets |
3733 | /// must set this value based on the cost threshold for that target. Targets |
3734 | /// should assume that the memmove will be done using as many of the largest |
3735 | /// store operations first, followed by smaller ones, if necessary, per |
3736 | /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine |
3737 | /// with 8-bit alignment would result in nine 1-byte stores. This only |
3738 | /// applies to copying a constant array of constant size. |
3739 | unsigned MaxStoresPerMemmove; |
3740 | /// Likewise for functions with the OptSize attribute. |
3741 | unsigned MaxStoresPerMemmoveOptSize; |
3742 | |
3743 | /// Tells the code generator that select is more expensive than a branch if |
3744 | /// the branch is usually predicted right. |
3745 | bool PredictableSelectIsExpensive; |
3746 | |
3747 | /// \see enableExtLdPromotion. |
3748 | bool EnableExtLdPromotion; |
3749 | |
3750 | /// Return true if the value types that can be represented by the specified |
3751 | /// register class are all legal. |
3752 | bool isLegalRC(const TargetRegisterInfo &TRI, |
3753 | const TargetRegisterClass &RC) const; |
3754 | |
3755 | /// Replace/modify any TargetFrameIndex operands with a targte-dependent |
3756 | /// sequence of memory operands that is recognized by PrologEpilogInserter. |
3757 | MachineBasicBlock *emitPatchPoint(MachineInstr &MI, |
3758 | MachineBasicBlock *MBB) const; |
3759 | |
3760 | bool IsStrictFPEnabled; |
3761 | }; |
3762 | |
3763 | /// This class defines information used to lower LLVM code to legal SelectionDAG |
3764 | /// operators that the target instruction selector can accept natively. |
3765 | /// |
3766 | /// This class also defines callbacks that targets must implement to lower |
3767 | /// target-specific constructs to SelectionDAG operators. |
3768 | class TargetLowering : public TargetLoweringBase { |
3769 | public: |
3770 | struct DAGCombinerInfo; |
3771 | struct MakeLibCallOptions; |
3772 | |
3773 | TargetLowering(const TargetLowering &) = delete; |
3774 | TargetLowering &operator=(const TargetLowering &) = delete; |
3775 | |
3776 | explicit TargetLowering(const TargetMachine &TM); |
3777 | |
3778 | bool isPositionIndependent() const; |
3779 | |
3780 | virtual bool isSDNodeSourceOfDivergence(const SDNode *N, |
3781 | FunctionLoweringInfo *FLI, |
3782 | UniformityInfo *UA) const { |
3783 | return false; |
3784 | } |
3785 | |
3786 | // Lets target to control the following reassociation of operands: (op (op x, |
3787 | // c1), y) -> (op (op x, y), c1) where N0 is (op x, c1) and N1 is y. By |
3788 | // default consider profitable any case where N0 has single use. This |
3789 | // behavior reflects the condition replaced by this target hook call in the |
3790 | // DAGCombiner. Any particular target can implement its own heuristic to |
3791 | // restrict common combiner. |
3792 | virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, |
3793 | SDValue N1) const { |
3794 | return N0.hasOneUse(); |
3795 | } |
3796 | |
3797 | // Lets target to control the following reassociation of operands: (op (op x, |
3798 | // c1), y) -> (op (op x, y), c1) where N0 is (op x, c1) and N1 is y. By |
3799 | // default consider profitable any case where N0 has single use. This |
3800 | // behavior reflects the condition replaced by this target hook call in the |
3801 | // combiner. Any particular target can implement its own heuristic to |
3802 | // restrict common combiner. |
3803 | virtual bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0, |
3804 | Register N1) const { |
3805 | return MRI.hasOneNonDBGUse(RegNo: N0); |
3806 | } |
3807 | |
3808 | virtual bool isSDNodeAlwaysUniform(const SDNode * N) const { |
3809 | return false; |
3810 | } |
3811 | |
3812 | /// Returns true by value, base pointer and offset pointer and addressing mode |
3813 | /// by reference if the node's address can be legally represented as |
3814 | /// pre-indexed load / store address. |
3815 | virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/, |
3816 | SDValue &/*Offset*/, |
3817 | ISD::MemIndexedMode &/*AM*/, |
3818 | SelectionDAG &/*DAG*/) const { |
3819 | return false; |
3820 | } |
3821 | |
3822 | /// Returns true by value, base pointer and offset pointer and addressing mode |
3823 | /// by reference if this node can be combined with a load / store to form a |
3824 | /// post-indexed load / store. |
3825 | virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/, |
3826 | SDValue &/*Base*/, |
3827 | SDValue &/*Offset*/, |
3828 | ISD::MemIndexedMode &/*AM*/, |
3829 | SelectionDAG &/*DAG*/) const { |
3830 | return false; |
3831 | } |
3832 | |
3833 | /// Returns true if the specified base+offset is a legal indexed addressing |
3834 | /// mode for this target. \p MI is the load or store instruction that is being |
3835 | /// considered for transformation. |
3836 | virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset, |
3837 | bool IsPre, MachineRegisterInfo &MRI) const { |
3838 | return false; |
3839 | } |
3840 | |
3841 | /// Return the entry encoding for a jump table in the current function. The |
3842 | /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. |
3843 | virtual unsigned getJumpTableEncoding() const; |
3844 | |
3845 | virtual const MCExpr * |
3846 | LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/, |
3847 | const MachineBasicBlock * /*MBB*/, unsigned /*uid*/, |
3848 | MCContext &/*Ctx*/) const { |
3849 | llvm_unreachable("Need to implement this hook if target has custom JTIs" ); |
3850 | } |
3851 | |
3852 | /// Returns relocation base for the given PIC jumptable. |
3853 | virtual SDValue getPICJumpTableRelocBase(SDValue Table, |
3854 | SelectionDAG &DAG) const; |
3855 | |
3856 | /// This returns the relocation base for the given PIC jumptable, the same as |
3857 | /// getPICJumpTableRelocBase, but as an MCExpr. |
3858 | virtual const MCExpr * |
3859 | getPICJumpTableRelocBaseExpr(const MachineFunction *MF, |
3860 | unsigned JTI, MCContext &Ctx) const; |
3861 | |
3862 | /// Return true if folding a constant offset with the given GlobalAddress is |
3863 | /// legal. It is frequently not legal in PIC relocation models. |
3864 | virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; |
3865 | |
3866 | /// On x86, return true if the operand with index OpNo is a CALL or JUMP |
3867 | /// instruction, which can use either a memory constraint or an address |
3868 | /// constraint. -fasm-blocks "__asm call foo" lowers to |
3869 | /// call void asm sideeffect inteldialect "call ${0:P}", "*m..." |
3870 | /// |
3871 | /// This function is used by a hack to choose the address constraint, |
3872 | /// lowering to a direct call. |
3873 | virtual bool |
3874 | isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs, |
3875 | unsigned OpNo) const { |
3876 | return false; |
3877 | } |
3878 | |
3879 | bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, |
3880 | SDValue &Chain) const; |
3881 | |
3882 | void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, |
3883 | SDValue &NewRHS, ISD::CondCode &CCCode, |
3884 | const SDLoc &DL, const SDValue OldLHS, |
3885 | const SDValue OldRHS) const; |
3886 | |
3887 | void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, |
3888 | SDValue &NewRHS, ISD::CondCode &CCCode, |
3889 | const SDLoc &DL, const SDValue OldLHS, |
3890 | const SDValue OldRHS, SDValue &Chain, |
3891 | bool IsSignaling = false) const; |
3892 | |
3893 | virtual SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL, |
3894 | SDValue Chain, MachineMemOperand *MMO, |
3895 | SDValue &NewLoad, SDValue Ptr, |
3896 | SDValue PassThru, SDValue Mask) const { |
3897 | llvm_unreachable("Not Implemented" ); |
3898 | } |
3899 | |
3900 | virtual SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL, |
3901 | SDValue Chain, MachineMemOperand *MMO, |
3902 | SDValue Ptr, SDValue Val, |
3903 | SDValue Mask) const { |
3904 | llvm_unreachable("Not Implemented" ); |
3905 | } |
3906 | |
3907 | /// Returns a pair of (return value, chain). |
3908 | /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC. |
3909 | std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, |
3910 | EVT RetVT, ArrayRef<SDValue> Ops, |
3911 | MakeLibCallOptions CallOptions, |
3912 | const SDLoc &dl, |
3913 | SDValue Chain = SDValue()) const; |
3914 | |
3915 | /// Check whether parameters to a call that are passed in callee saved |
3916 | /// registers are the same as from the calling function. This needs to be |
3917 | /// checked for tail call eligibility. |
3918 | bool parametersInCSRMatch(const MachineRegisterInfo &MRI, |
3919 | const uint32_t *CallerPreservedMask, |
3920 | const SmallVectorImpl<CCValAssign> &ArgLocs, |
3921 | const SmallVectorImpl<SDValue> &OutVals) const; |
3922 | |
3923 | //===--------------------------------------------------------------------===// |
3924 | // TargetLowering Optimization Methods |
3925 | // |
3926 | |
3927 | /// A convenience struct that encapsulates a DAG, and two SDValues for |
3928 | /// returning information from TargetLowering to its clients that want to |
3929 | /// combine. |
3930 | struct TargetLoweringOpt { |
3931 | SelectionDAG &DAG; |
3932 | bool LegalTys; |
3933 | bool LegalOps; |
3934 | SDValue Old; |
3935 | SDValue New; |
3936 | |
3937 | explicit TargetLoweringOpt(SelectionDAG &InDAG, |
3938 | bool LT, bool LO) : |
3939 | DAG(InDAG), LegalTys(LT), LegalOps(LO) {} |
3940 | |
3941 | bool LegalTypes() const { return LegalTys; } |
3942 | bool LegalOperations() const { return LegalOps; } |
3943 | |
3944 | bool CombineTo(SDValue O, SDValue N) { |
3945 | Old = O; |
3946 | New = N; |
3947 | return true; |
3948 | } |
3949 | }; |
3950 | |
3951 | /// Determines the optimal series of memory ops to replace the memset / memcpy. |
3952 | /// Return true if the number of memory ops is below the threshold (Limit). |
3953 | /// Note that this is always the case when Limit is ~0. |
3954 | /// It returns the types of the sequence of memory ops to perform |
3955 | /// memset / memcpy by reference. |
3956 | virtual bool |
3957 | findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit, |
3958 | const MemOp &Op, unsigned DstAS, unsigned SrcAS, |
3959 | const AttributeList &FuncAttributes) const; |
3960 | |
3961 | /// Check to see if the specified operand of the specified instruction is a |
3962 | /// constant integer. If so, check to see if there are any bits set in the |
3963 | /// constant that are not demanded. If so, shrink the constant and return |
3964 | /// true. |
3965 | bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, |
3966 | const APInt &DemandedElts, |
3967 | TargetLoweringOpt &TLO) const; |
3968 | |
3969 | /// Helper wrapper around ShrinkDemandedConstant, demanding all elements. |
3970 | bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, |
3971 | TargetLoweringOpt &TLO) const; |
3972 | |
3973 | // Target hook to do target-specific const optimization, which is called by |
3974 | // ShrinkDemandedConstant. This function should return true if the target |
3975 | // doesn't want ShrinkDemandedConstant to further optimize the constant. |
3976 | virtual bool targetShrinkDemandedConstant(SDValue Op, |
3977 | const APInt &DemandedBits, |
3978 | const APInt &DemandedElts, |
3979 | TargetLoweringOpt &TLO) const { |
3980 | return false; |
3981 | } |
3982 | |
3983 | /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. |
3984 | /// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast, |
3985 | /// but it could be generalized for targets with other types of implicit |
3986 | /// widening casts. |
3987 | bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, |
3988 | const APInt &DemandedBits, |
3989 | TargetLoweringOpt &TLO) const; |
3990 | |
3991 | /// Look at Op. At this point, we know that only the DemandedBits bits of the |
3992 | /// result of Op are ever used downstream. If we can use this information to |
3993 | /// simplify Op, create a new simplified DAG node and return true, returning |
3994 | /// the original and new nodes in Old and New. Otherwise, analyze the |
3995 | /// expression and return a mask of KnownOne and KnownZero bits for the |
3996 | /// expression (used to simplify the caller). The KnownZero/One bits may only |
3997 | /// be accurate for those bits in the Demanded masks. |
3998 | /// \p AssumeSingleUse When this parameter is true, this function will |
3999 | /// attempt to simplify \p Op even if there are multiple uses. |
4000 | /// Callers are responsible for correctly updating the DAG based on the |
4001 | /// results of this function, because simply replacing TLO.Old |
4002 | /// with TLO.New will be incorrect when this parameter is true and TLO.Old |
4003 | /// has multiple uses. |
4004 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
4005 | const APInt &DemandedElts, KnownBits &Known, |
4006 | TargetLoweringOpt &TLO, unsigned Depth = 0, |
4007 | bool AssumeSingleUse = false) const; |
4008 | |
4009 | /// Helper wrapper around SimplifyDemandedBits, demanding all elements. |
4010 | /// Adds Op back to the worklist upon success. |
4011 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
4012 | KnownBits &Known, TargetLoweringOpt &TLO, |
4013 | unsigned Depth = 0, |
4014 | bool AssumeSingleUse = false) const; |
4015 | |
4016 | /// Helper wrapper around SimplifyDemandedBits. |
4017 | /// Adds Op back to the worklist upon success. |
4018 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
4019 | DAGCombinerInfo &DCI) const; |
4020 | |
4021 | /// Helper wrapper around SimplifyDemandedBits. |
4022 | /// Adds Op back to the worklist upon success. |
4023 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
4024 | const APInt &DemandedElts, |
4025 | DAGCombinerInfo &DCI) const; |
4026 | |
4027 | /// More limited version of SimplifyDemandedBits that can be used to "look |
4028 | /// through" ops that don't contribute to the DemandedBits/DemandedElts - |
4029 | /// bitwise ops etc. |
4030 | SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, |
4031 | const APInt &DemandedElts, |
4032 | SelectionDAG &DAG, |
4033 | unsigned Depth = 0) const; |
4034 | |
4035 | /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all |
4036 | /// elements. |
4037 | SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, |
4038 | SelectionDAG &DAG, |
4039 | unsigned Depth = 0) const; |
4040 | |
4041 | /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all |
4042 | /// bits from only some vector elements. |
4043 | SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, |
4044 | const APInt &DemandedElts, |
4045 | SelectionDAG &DAG, |
4046 | unsigned Depth = 0) const; |
4047 | |
4048 | /// Look at Vector Op. At this point, we know that only the DemandedElts |
4049 | /// elements of the result of Op are ever used downstream. If we can use |
4050 | /// this information to simplify Op, create a new simplified DAG node and |
4051 | /// return true, storing the original and new nodes in TLO. |
4052 | /// Otherwise, analyze the expression and return a mask of KnownUndef and |
4053 | /// KnownZero elements for the expression (used to simplify the caller). |
4054 | /// The KnownUndef/Zero elements may only be accurate for those bits |
4055 | /// in the DemandedMask. |
4056 | /// \p AssumeSingleUse When this parameter is true, this function will |
4057 | /// attempt to simplify \p Op even if there are multiple uses. |
4058 | /// Callers are responsible for correctly updating the DAG based on the |
4059 | /// results of this function, because simply replacing TLO.Old |
4060 | /// with TLO.New will be incorrect when this parameter is true and TLO.Old |
4061 | /// has multiple uses. |
4062 | bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, |
4063 | APInt &KnownUndef, APInt &KnownZero, |
4064 | TargetLoweringOpt &TLO, unsigned Depth = 0, |
4065 | bool AssumeSingleUse = false) const; |
4066 | |
4067 | /// Helper wrapper around SimplifyDemandedVectorElts. |
4068 | /// Adds Op back to the worklist upon success. |
4069 | bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, |
4070 | DAGCombinerInfo &DCI) const; |
4071 | |
4072 | /// Return true if the target supports simplifying demanded vector elements by |
4073 | /// converting them to undefs. |
4074 | virtual bool |
4075 | shouldSimplifyDemandedVectorElts(SDValue Op, |
4076 | const TargetLoweringOpt &TLO) const { |
4077 | return true; |
4078 | } |
4079 | |
4080 | /// Determine which of the bits specified in Mask are known to be either zero |
4081 | /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts |
4082 | /// argument allows us to only collect the known bits that are shared by the |
4083 | /// requested vector elements. |
4084 | virtual void computeKnownBitsForTargetNode(const SDValue Op, |
4085 | KnownBits &Known, |
4086 | const APInt &DemandedElts, |
4087 | const SelectionDAG &DAG, |
4088 | unsigned Depth = 0) const; |
4089 | |
4090 | /// Determine which of the bits specified in Mask are known to be either zero |
4091 | /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts |
4092 | /// argument allows us to only collect the known bits that are shared by the |
4093 | /// requested vector elements. This is for GISel. |
4094 | virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, |
4095 | Register R, KnownBits &Known, |
4096 | const APInt &DemandedElts, |
4097 | const MachineRegisterInfo &MRI, |
4098 | unsigned Depth = 0) const; |
4099 | |
4100 | /// Determine the known alignment for the pointer value \p R. This is can |
4101 | /// typically be inferred from the number of low known 0 bits. However, for a |
4102 | /// pointer with a non-integral address space, the alignment value may be |
4103 | /// independent from the known low bits. |
4104 | virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, |
4105 | Register R, |
4106 | const MachineRegisterInfo &MRI, |
4107 | unsigned Depth = 0) const; |
4108 | |
4109 | /// Determine which of the bits of FrameIndex \p FIOp are known to be 0. |
4110 | /// Default implementation computes low bits based on alignment |
4111 | /// information. This should preserve known bits passed into it. |
4112 | virtual void computeKnownBitsForFrameIndex(int FIOp, |
4113 | KnownBits &Known, |
4114 | const MachineFunction &MF) const; |
4115 | |
4116 | /// This method can be implemented by targets that want to expose additional |
4117 | /// information about sign bits to the DAG Combiner. The DemandedElts |
4118 | /// argument allows us to only collect the minimum sign bits that are shared |
4119 | /// by the requested vector elements. |
4120 | virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, |
4121 | const APInt &DemandedElts, |
4122 | const SelectionDAG &DAG, |
4123 | unsigned Depth = 0) const; |
4124 | |
4125 | /// This method can be implemented by targets that want to expose additional |
4126 | /// information about sign bits to GlobalISel combiners. The DemandedElts |
4127 | /// argument allows us to only collect the minimum sign bits that are shared |
4128 | /// by the requested vector elements. |
4129 | virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, |
4130 | Register R, |
4131 | const APInt &DemandedElts, |
4132 | const MachineRegisterInfo &MRI, |
4133 | unsigned Depth = 0) const; |
4134 | |
4135 | /// Attempt to simplify any target nodes based on the demanded vector |
4136 | /// elements, returning true on success. Otherwise, analyze the expression and |
4137 | /// return a mask of KnownUndef and KnownZero elements for the expression |
4138 | /// (used to simplify the caller). The KnownUndef/Zero elements may only be |
4139 | /// accurate for those bits in the DemandedMask. |
4140 | virtual bool SimplifyDemandedVectorEltsForTargetNode( |
4141 | SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, |
4142 | APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const; |
4143 | |
4144 | /// Attempt to simplify any target nodes based on the demanded bits/elts, |
4145 | /// returning true on success. Otherwise, analyze the |
4146 | /// expression and return a mask of KnownOne and KnownZero bits for the |
4147 | /// expression (used to simplify the caller). The KnownZero/One bits may only |
4148 | /// be accurate for those bits in the Demanded masks. |
4149 | virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, |
4150 | const APInt &DemandedBits, |
4151 | const APInt &DemandedElts, |
4152 | KnownBits &Known, |
4153 | TargetLoweringOpt &TLO, |
4154 | unsigned Depth = 0) const; |
4155 | |
4156 | /// More limited version of SimplifyDemandedBits that can be used to "look |
4157 | /// through" ops that don't contribute to the DemandedBits/DemandedElts - |
4158 | /// bitwise ops etc. |
4159 | virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode( |
4160 | SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, |
4161 | SelectionDAG &DAG, unsigned Depth) const; |
4162 | |
4163 | /// Return true if this function can prove that \p Op is never poison |
4164 | /// and, if \p PoisonOnly is false, does not have undef bits. The DemandedElts |
4165 | /// argument limits the check to the requested vector elements. |
4166 | virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode( |
4167 | SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, |
4168 | bool PoisonOnly, unsigned Depth) const; |
4169 | |
4170 | /// Return true if Op can create undef or poison from non-undef & non-poison |
4171 | /// operands. The DemandedElts argument limits the check to the requested |
4172 | /// vector elements. |
4173 | virtual bool |
4174 | canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, |
4175 | const SelectionDAG &DAG, bool PoisonOnly, |
4176 | bool ConsiderFlags, unsigned Depth) const; |
4177 | |
4178 | /// Tries to build a legal vector shuffle using the provided parameters |
4179 | /// or equivalent variations. The Mask argument maybe be modified as the |
4180 | /// function tries different variations. |
4181 | /// Returns an empty SDValue if the operation fails. |
4182 | SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, |
4183 | SDValue N1, MutableArrayRef<int> Mask, |
4184 | SelectionDAG &DAG) const; |
4185 | |
4186 | /// This method returns the constant pool value that will be loaded by LD. |
4187 | /// NOTE: You must check for implicit extensions of the constant by LD. |
4188 | virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const; |
4189 | |
4190 | /// If \p SNaN is false, \returns true if \p Op is known to never be any |
4191 | /// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling |
4192 | /// NaN. |
4193 | virtual bool isKnownNeverNaNForTargetNode(SDValue Op, |
4194 | const SelectionDAG &DAG, |
4195 | bool SNaN = false, |
4196 | unsigned Depth = 0) const; |
4197 | |
4198 | /// Return true if vector \p Op has the same value across all \p DemandedElts, |
4199 | /// indicating any elements which may be undef in the output \p UndefElts. |
4200 | virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, |
4201 | APInt &UndefElts, |
4202 | const SelectionDAG &DAG, |
4203 | unsigned Depth = 0) const; |
4204 | |
4205 | /// Returns true if the given Opc is considered a canonical constant for the |
4206 | /// target, which should not be transformed back into a BUILD_VECTOR. |
4207 | virtual bool isTargetCanonicalConstantNode(SDValue Op) const { |
4208 | return Op.getOpcode() == ISD::SPLAT_VECTOR || |
4209 | Op.getOpcode() == ISD::SPLAT_VECTOR_PARTS; |
4210 | } |
4211 | |
4212 | struct DAGCombinerInfo { |
4213 | void *DC; // The DAG Combiner object. |
4214 | CombineLevel Level; |
4215 | bool CalledByLegalizer; |
4216 | |
4217 | public: |
4218 | SelectionDAG &DAG; |
4219 | |
4220 | DAGCombinerInfo(SelectionDAG &dag, CombineLevel level, bool cl, void *dc) |
4221 | : DC(dc), Level(level), CalledByLegalizer(cl), DAG(dag) {} |
4222 | |
4223 | bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; } |
4224 | bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; } |
4225 | bool isAfterLegalizeDAG() const { return Level >= AfterLegalizeDAG; } |
4226 | CombineLevel getDAGCombineLevel() { return Level; } |
4227 | bool isCalledByLegalizer() const { return CalledByLegalizer; } |
4228 | |
4229 | void AddToWorklist(SDNode *N); |
4230 | SDValue CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo = true); |
4231 | SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true); |
4232 | SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true); |
4233 | |
4234 | bool recursivelyDeleteUnusedNodes(SDNode *N); |
4235 | |
4236 | void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO); |
4237 | }; |
4238 | |
4239 | /// Return if the N is a constant or constant vector equal to the true value |
4240 | /// from getBooleanContents(). |
4241 | bool isConstTrueVal(SDValue N) const; |
4242 | |
4243 | /// Return if the N is a constant or constant vector equal to the false value |
4244 | /// from getBooleanContents(). |
4245 | bool isConstFalseVal(SDValue N) const; |
4246 | |
4247 | /// Return if \p N is a True value when extended to \p VT. |
4248 | bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const; |
4249 | |
4250 | /// Try to simplify a setcc built with the specified operands and cc. If it is |
4251 | /// unable to simplify it, return a null SDValue. |
4252 | SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, |
4253 | bool foldBooleans, DAGCombinerInfo &DCI, |
4254 | const SDLoc &dl) const; |
4255 | |
4256 | // For targets which wrap address, unwrap for analysis. |
4257 | virtual SDValue unwrapAddress(SDValue N) const { return N; } |
4258 | |
4259 | /// Returns true (and the GlobalValue and the offset) if the node is a |
4260 | /// GlobalAddress + offset. |
4261 | virtual bool |
4262 | isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const; |
4263 | |
4264 | /// This method will be invoked for all target nodes and for any |
4265 | /// target-independent nodes that the target has registered with invoke it |
4266 | /// for. |
4267 | /// |
4268 | /// The semantics are as follows: |
4269 | /// Return Value: |
4270 | /// SDValue.Val == 0 - No change was made |
4271 | /// SDValue.Val == N - N was replaced, is dead, and is already handled. |
4272 | /// otherwise - N should be replaced by the returned Operand. |
4273 | /// |
4274 | /// In addition, methods provided by DAGCombinerInfo may be used to perform |
4275 | /// more complex transformations. |
4276 | /// |
4277 | virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
4278 | |
4279 | /// Return true if it is profitable to move this shift by a constant amount |
4280 | /// through its operand, adjusting any immediate operands as necessary to |
4281 | /// preserve semantics. This transformation may not be desirable if it |
4282 | /// disrupts a particularly auspicious target-specific tree (e.g. bitfield |
4283 | /// extraction in AArch64). By default, it returns true. |
4284 | /// |
4285 | /// @param N the shift node |
4286 | /// @param Level the current DAGCombine legalization level. |
4287 | virtual bool isDesirableToCommuteWithShift(const SDNode *N, |
4288 | CombineLevel Level) const { |
4289 | return true; |
4290 | } |
4291 | |
4292 | /// GlobalISel - return true if it is profitable to move this shift by a |
4293 | /// constant amount through its operand, adjusting any immediate operands as |
4294 | /// necessary to preserve semantics. This transformation may not be desirable |
4295 | /// if it disrupts a particularly auspicious target-specific tree (e.g. |
4296 | /// bitfield extraction in AArch64). By default, it returns true. |
4297 | /// |
4298 | /// @param MI the shift instruction |
4299 | /// @param IsAfterLegal true if running after legalization. |
4300 | virtual bool isDesirableToCommuteWithShift(const MachineInstr &MI, |
4301 | bool IsAfterLegal) const { |
4302 | return true; |
4303 | } |
4304 | |
4305 | /// GlobalISel - return true if it's profitable to perform the combine: |
4306 | /// shl ([sza]ext x), y => zext (shl x, y) |
4307 | virtual bool isDesirableToPullExtFromShl(const MachineInstr &MI) const { |
4308 | return true; |
4309 | } |
4310 | |
4311 | // Return AndOrSETCCFoldKind::{AddAnd, ABS} if its desirable to try and |
4312 | // optimize LogicOp(SETCC0, SETCC1). An example (what is implemented as of |
4313 | // writing this) is: |
4314 | // With C as a power of 2 and C != 0 and C != INT_MIN: |
4315 | // AddAnd: |
4316 | // (icmp eq A, C) | (icmp eq A, -C) |
4317 | // -> (icmp eq and(add(A, C), ~(C + C)), 0) |
4318 | // (icmp ne A, C) & (icmp ne A, -C)w |
4319 | // -> (icmp ne and(add(A, C), ~(C + C)), 0) |
4320 | // ABS: |
4321 | // (icmp eq A, C) | (icmp eq A, -C) |
4322 | // -> (icmp eq Abs(A), C) |
4323 | // (icmp ne A, C) & (icmp ne A, -C)w |
4324 | // -> (icmp ne Abs(A), C) |
4325 | // |
4326 | // @param LogicOp the logic op |
4327 | // @param SETCC0 the first of the SETCC nodes |
4328 | // @param SETCC0 the second of the SETCC nodes |
4329 | virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC( |
4330 | const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const { |
4331 | return AndOrSETCCFoldKind::None; |
4332 | } |
4333 | |
4334 | /// Return true if it is profitable to combine an XOR of a logical shift |
4335 | /// to create a logical shift of NOT. This transformation may not be desirable |
4336 | /// if it disrupts a particularly auspicious target-specific tree (e.g. |
4337 | /// BIC on ARM/AArch64). By default, it returns true. |
4338 | virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const { |
4339 | return true; |
4340 | } |
4341 | |
4342 | /// Return true if the target has native support for the specified value type |
4343 | /// and it is 'desirable' to use the type for the given node type. e.g. On x86 |
4344 | /// i16 is legal, but undesirable since i16 instruction encodings are longer |
4345 | /// and some i16 instructions are slow. |
4346 | virtual bool isTypeDesirableForOp(unsigned /*Opc*/, EVT VT) const { |
4347 | // By default, assume all legal types are desirable. |
4348 | return isTypeLegal(VT); |
4349 | } |
4350 | |
4351 | /// Return true if it is profitable for dag combiner to transform a floating |
4352 | /// point op of specified opcode to a equivalent op of an integer |
4353 | /// type. e.g. f32 load -> i32 load can be profitable on ARM. |
4354 | virtual bool isDesirableToTransformToIntegerOp(unsigned /*Opc*/, |
4355 | EVT /*VT*/) const { |
4356 | return false; |
4357 | } |
4358 | |
4359 | /// This method query the target whether it is beneficial for dag combiner to |
4360 | /// promote the specified node. If true, it should return the desired |
4361 | /// promotion type by reference. |
4362 | virtual bool IsDesirableToPromoteOp(SDValue /*Op*/, EVT &/*PVT*/) const { |
4363 | return false; |
4364 | } |
4365 | |
4366 | /// Return true if the target supports swifterror attribute. It optimizes |
4367 | /// loads and stores to reading and writing a specific register. |
4368 | virtual bool supportSwiftError() const { |
4369 | return false; |
4370 | } |
4371 | |
4372 | /// Return true if the target supports that a subset of CSRs for the given |
4373 | /// machine function is handled explicitly via copies. |
4374 | virtual bool supportSplitCSR(MachineFunction *MF) const { |
4375 | return false; |
4376 | } |
4377 | |
4378 | /// Return true if the target supports kcfi operand bundles. |
4379 | virtual bool supportKCFIBundles() const { return false; } |
4380 | |
4381 | /// Return true if the target supports ptrauth operand bundles. |
4382 | virtual bool supportPtrAuthBundles() const { return false; } |
4383 | |
4384 | /// Perform necessary initialization to handle a subset of CSRs explicitly |
4385 | /// via copies. This function is called at the beginning of instruction |
4386 | /// selection. |
4387 | virtual void initializeSplitCSR(MachineBasicBlock *Entry) const { |
4388 | llvm_unreachable("Not Implemented" ); |
4389 | } |
4390 | |
4391 | /// Insert explicit copies in entry and exit blocks. We copy a subset of |
4392 | /// CSRs to virtual registers in the entry block, and copy them back to |
4393 | /// physical registers in the exit blocks. This function is called at the end |
4394 | /// of instruction selection. |
4395 | virtual void insertCopiesSplitCSR( |
4396 | MachineBasicBlock *Entry, |
4397 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const { |
4398 | llvm_unreachable("Not Implemented" ); |
4399 | } |
4400 | |
4401 | /// Return the newly negated expression if the cost is not expensive and |
4402 | /// set the cost in \p Cost to indicate that if it is cheaper or neutral to |
4403 | /// do the negation. |
4404 | virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, |
4405 | bool LegalOps, bool OptForSize, |
4406 | NegatibleCost &Cost, |
4407 | unsigned Depth = 0) const; |
4408 | |
4409 | SDValue getCheaperOrNeutralNegatedExpression( |
4410 | SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, |
4411 | const NegatibleCost CostThreshold = NegatibleCost::Neutral, |
4412 | unsigned Depth = 0) const { |
4413 | NegatibleCost Cost = NegatibleCost::Expensive; |
4414 | SDValue Neg = |
4415 | getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth); |
4416 | if (!Neg) |
4417 | return SDValue(); |
4418 | |
4419 | if (Cost <= CostThreshold) |
4420 | return Neg; |
4421 | |
4422 | // Remove the new created node to avoid the side effect to the DAG. |
4423 | if (Neg->use_empty()) |
4424 | DAG.RemoveDeadNode(N: Neg.getNode()); |
4425 | return SDValue(); |
4426 | } |
4427 | |
4428 | /// This is the helper function to return the newly negated expression only |
4429 | /// when the cost is cheaper. |
4430 | SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, |
4431 | bool LegalOps, bool OptForSize, |
4432 | unsigned Depth = 0) const { |
4433 | return getCheaperOrNeutralNegatedExpression(Op, DAG, LegalOps, OptForSize, |
4434 | CostThreshold: NegatibleCost::Cheaper, Depth); |
4435 | } |
4436 | |
4437 | /// This is the helper function to return the newly negated expression if |
4438 | /// the cost is not expensive. |
4439 | SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, |
4440 | bool OptForSize, unsigned Depth = 0) const { |
4441 | NegatibleCost Cost = NegatibleCost::Expensive; |
4442 | return getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth); |
4443 | } |
4444 | |
4445 | //===--------------------------------------------------------------------===// |
4446 | // Lowering methods - These methods must be implemented by targets so that |
4447 | // the SelectionDAGBuilder code knows how to lower these. |
4448 | // |
4449 | |
4450 | /// Target-specific splitting of values into parts that fit a register |
4451 | /// storing a legal type |
4452 | virtual bool splitValueIntoRegisterParts( |
4453 | SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, |
4454 | unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { |
4455 | return false; |
4456 | } |
4457 | |
4458 | /// Allows the target to handle physreg-carried dependency |
4459 | /// in target-specific way. Used from the ScheduleDAGSDNodes to decide whether |
4460 | /// to add the edge to the dependency graph. |
4461 | /// Def - input: Selection DAG node defininfg physical register |
4462 | /// User - input: Selection DAG node using physical register |
4463 | /// Op - input: Number of User operand |
4464 | /// PhysReg - inout: set to the physical register if the edge is |
4465 | /// necessary, unchanged otherwise |
4466 | /// Cost - inout: physical register copy cost. |
4467 | /// Returns 'true' is the edge is necessary, 'false' otherwise |
4468 | virtual bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, |
4469 | const TargetRegisterInfo *TRI, |
4470 | const TargetInstrInfo *TII, |
4471 | unsigned &PhysReg, int &Cost) const { |
4472 | return false; |
4473 | } |
4474 | |
4475 | /// Target-specific combining of register parts into its original value |
4476 | virtual SDValue |
4477 | joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, |
4478 | const SDValue *Parts, unsigned NumParts, |
4479 | MVT PartVT, EVT ValueVT, |
4480 | std::optional<CallingConv::ID> CC) const { |
4481 | return SDValue(); |
4482 | } |
4483 | |
4484 | /// This hook must be implemented to lower the incoming (formal) arguments, |
4485 | /// described by the Ins array, into the specified DAG. The implementation |
4486 | /// should fill in the InVals array with legal-type argument values, and |
4487 | /// return the resulting token chain value. |
4488 | virtual SDValue LowerFormalArguments( |
4489 | SDValue /*Chain*/, CallingConv::ID /*CallConv*/, bool /*isVarArg*/, |
4490 | const SmallVectorImpl<ISD::InputArg> & /*Ins*/, const SDLoc & /*dl*/, |
4491 | SelectionDAG & /*DAG*/, SmallVectorImpl<SDValue> & /*InVals*/) const { |
4492 | llvm_unreachable("Not Implemented" ); |
4493 | } |
4494 | |
4495 | /// This structure contains the information necessary for lowering |
4496 | /// pointer-authenticating indirect calls. It is equivalent to the "ptrauth" |
4497 | /// operand bundle found on the call instruction, if any. |
4498 | struct PtrAuthInfo { |
4499 | uint64_t Key; |
4500 | SDValue Discriminator; |
4501 | }; |
4502 | |
4503 | /// This structure contains all information that is necessary for lowering |
4504 | /// calls. It is passed to TLI::LowerCallTo when the SelectionDAG builder |
4505 | /// needs to lower a call, and targets will see this struct in their LowerCall |
4506 | /// implementation. |
4507 | struct CallLoweringInfo { |
4508 | SDValue Chain; |
4509 | Type *RetTy = nullptr; |
4510 | bool RetSExt : 1; |
4511 | bool RetZExt : 1; |
4512 | bool IsVarArg : 1; |
4513 | bool IsInReg : 1; |
4514 | bool DoesNotReturn : 1; |
4515 | bool IsReturnValueUsed : 1; |
4516 | bool IsConvergent : 1; |
4517 | bool IsPatchPoint : 1; |
4518 | bool IsPreallocated : 1; |
4519 | bool NoMerge : 1; |
4520 | |
4521 | // IsTailCall should be modified by implementations of |
4522 | // TargetLowering::LowerCall that perform tail call conversions. |
4523 | bool IsTailCall = false; |
4524 | |
4525 | // Is Call lowering done post SelectionDAG type legalization. |
4526 | bool IsPostTypeLegalization = false; |
4527 | |
4528 | unsigned NumFixedArgs = -1; |
4529 | CallingConv::ID CallConv = CallingConv::C; |
4530 | SDValue Callee; |
4531 | ArgListTy Args; |
4532 | SelectionDAG &DAG; |
4533 | SDLoc DL; |
4534 | const CallBase *CB = nullptr; |
4535 | SmallVector<ISD::OutputArg, 32> Outs; |
4536 | SmallVector<SDValue, 32> OutVals; |
4537 | SmallVector<ISD::InputArg, 32> Ins; |
4538 | SmallVector<SDValue, 4> InVals; |
4539 | const ConstantInt *CFIType = nullptr; |
4540 | SDValue ConvergenceControlToken; |
4541 | |
4542 | std::optional<PtrAuthInfo> PAI; |
4543 | |
4544 | CallLoweringInfo(SelectionDAG &DAG) |
4545 | : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), |
4546 | DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false), |
4547 | IsPatchPoint(false), IsPreallocated(false), NoMerge(false), |
4548 | DAG(DAG) {} |
4549 | |
4550 | CallLoweringInfo &setDebugLoc(const SDLoc &dl) { |
4551 | DL = dl; |
4552 | return *this; |
4553 | } |
4554 | |
4555 | CallLoweringInfo &setChain(SDValue InChain) { |
4556 | Chain = InChain; |
4557 | return *this; |
4558 | } |
4559 | |
4560 | // setCallee with target/module-specific attributes |
4561 | CallLoweringInfo &setLibCallee(CallingConv::ID CC, Type *ResultType, |
4562 | SDValue Target, ArgListTy &&ArgsList) { |
4563 | RetTy = ResultType; |
4564 | Callee = Target; |
4565 | CallConv = CC; |
4566 | NumFixedArgs = ArgsList.size(); |
4567 | Args = std::move(ArgsList); |
4568 | |
4569 | DAG.getTargetLoweringInfo().markLibCallAttributes( |
4570 | MF: &(DAG.getMachineFunction()), CC, Args); |
4571 | return *this; |
4572 | } |
4573 | |
4574 | CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultType, |
4575 | SDValue Target, ArgListTy &&ArgsList, |
4576 | AttributeSet ResultAttrs = {}) { |
4577 | RetTy = ResultType; |
4578 | IsInReg = ResultAttrs.hasAttribute(Kind: Attribute::InReg); |
4579 | RetSExt = ResultAttrs.hasAttribute(Kind: Attribute::SExt); |
4580 | RetZExt = ResultAttrs.hasAttribute(Kind: Attribute::ZExt); |
4581 | NoMerge = ResultAttrs.hasAttribute(Kind: Attribute::NoMerge); |
4582 | |
4583 | Callee = Target; |
4584 | CallConv = CC; |
4585 | NumFixedArgs = ArgsList.size(); |
4586 | Args = std::move(ArgsList); |
4587 | return *this; |
4588 | } |
4589 | |
4590 | CallLoweringInfo &setCallee(Type *ResultType, FunctionType *FTy, |
4591 | SDValue Target, ArgListTy &&ArgsList, |
4592 | const CallBase &Call) { |
4593 | RetTy = ResultType; |
4594 | |
4595 | IsInReg = Call.hasRetAttr(Kind: Attribute::InReg); |
4596 | DoesNotReturn = |
4597 | Call.doesNotReturn() || |
4598 | (!isa<InvokeInst>(Val: Call) && isa<UnreachableInst>(Val: Call.getNextNode())); |
4599 | IsVarArg = FTy->isVarArg(); |
4600 | IsReturnValueUsed = !Call.use_empty(); |
4601 | RetSExt = Call.hasRetAttr(Kind: Attribute::SExt); |
4602 | RetZExt = Call.hasRetAttr(Kind: Attribute::ZExt); |
4603 | NoMerge = Call.hasFnAttr(Kind: Attribute::NoMerge); |
4604 | |
4605 | Callee = Target; |
4606 | |
4607 | CallConv = Call.getCallingConv(); |
4608 | NumFixedArgs = FTy->getNumParams(); |
4609 | Args = std::move(ArgsList); |
4610 | |
4611 | CB = &Call; |
4612 | |
4613 | return *this; |
4614 | } |
4615 | |
4616 | CallLoweringInfo &setInRegister(bool Value = true) { |
4617 | IsInReg = Value; |
4618 | return *this; |
4619 | } |
4620 | |
4621 | CallLoweringInfo &setNoReturn(bool Value = true) { |
4622 | DoesNotReturn = Value; |
4623 | return *this; |
4624 | } |
4625 | |
4626 | CallLoweringInfo &setVarArg(bool Value = true) { |
4627 | IsVarArg = Value; |
4628 | return *this; |
4629 | } |
4630 | |
4631 | CallLoweringInfo &setTailCall(bool Value = true) { |
4632 | IsTailCall = Value; |
4633 | return *this; |
4634 | } |
4635 | |
4636 | CallLoweringInfo &setDiscardResult(bool Value = true) { |
4637 | IsReturnValueUsed = !Value; |
4638 | return *this; |
4639 | } |
4640 | |
4641 | CallLoweringInfo &setConvergent(bool Value = true) { |
4642 | IsConvergent = Value; |
4643 | return *this; |
4644 | } |
4645 | |
4646 | CallLoweringInfo &setSExtResult(bool Value = true) { |
4647 | RetSExt = Value; |
4648 | return *this; |
4649 | } |
4650 | |
4651 | CallLoweringInfo &setZExtResult(bool Value = true) { |
4652 | RetZExt = Value; |
4653 | return *this; |
4654 | } |
4655 | |
4656 | CallLoweringInfo &setIsPatchPoint(bool Value = true) { |
4657 | IsPatchPoint = Value; |
4658 | return *this; |
4659 | } |
4660 | |
4661 | CallLoweringInfo &setIsPreallocated(bool Value = true) { |
4662 | IsPreallocated = Value; |
4663 | return *this; |
4664 | } |
4665 | |
4666 | CallLoweringInfo &setPtrAuth(PtrAuthInfo Value) { |
4667 | PAI = Value; |
4668 | return *this; |
4669 | } |
4670 | |
4671 | CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) { |
4672 | IsPostTypeLegalization = Value; |
4673 | return *this; |
4674 | } |
4675 | |
4676 | CallLoweringInfo &setCFIType(const ConstantInt *Type) { |
4677 | CFIType = Type; |
4678 | return *this; |
4679 | } |
4680 | |
4681 | CallLoweringInfo &setConvergenceControlToken(SDValue Token) { |
4682 | ConvergenceControlToken = Token; |
4683 | return *this; |
4684 | } |
4685 | |
4686 | ArgListTy &getArgs() { |
4687 | return Args; |
4688 | } |
4689 | }; |
4690 | |
4691 | /// This structure is used to pass arguments to makeLibCall function. |
4692 | struct MakeLibCallOptions { |
4693 | // By passing type list before soften to makeLibCall, the target hook |
4694 | // shouldExtendTypeInLibCall can get the original type before soften. |
4695 | ArrayRef<EVT> OpsVTBeforeSoften; |
4696 | EVT RetVTBeforeSoften; |
4697 | bool IsSExt : 1; |
4698 | bool DoesNotReturn : 1; |
4699 | bool IsReturnValueUsed : 1; |
4700 | bool IsPostTypeLegalization : 1; |
4701 | bool IsSoften : 1; |
4702 | |
4703 | MakeLibCallOptions() |
4704 | : IsSExt(false), DoesNotReturn(false), IsReturnValueUsed(true), |
4705 | IsPostTypeLegalization(false), IsSoften(false) {} |
4706 | |
4707 | MakeLibCallOptions &setSExt(bool Value = true) { |
4708 | IsSExt = Value; |
4709 | return *this; |
4710 | } |
4711 | |
4712 | MakeLibCallOptions &setNoReturn(bool Value = true) { |
4713 | DoesNotReturn = Value; |
4714 | return *this; |
4715 | } |
4716 | |
4717 | MakeLibCallOptions &setDiscardResult(bool Value = true) { |
4718 | IsReturnValueUsed = !Value; |
4719 | return *this; |
4720 | } |
4721 | |
4722 | MakeLibCallOptions &setIsPostTypeLegalization(bool Value = true) { |
4723 | IsPostTypeLegalization = Value; |
4724 | return *this; |
4725 | } |
4726 | |
4727 | MakeLibCallOptions &setTypeListBeforeSoften(ArrayRef<EVT> OpsVT, EVT RetVT, |
4728 | bool Value = true) { |
4729 | OpsVTBeforeSoften = OpsVT; |
4730 | RetVTBeforeSoften = RetVT; |
4731 | IsSoften = Value; |
4732 | return *this; |
4733 | } |
4734 | }; |
4735 | |
4736 | /// This function lowers an abstract call to a function into an actual call. |
4737 | /// This returns a pair of operands. The first element is the return value |
4738 | /// for the function (if RetTy is not VoidTy). The second element is the |
4739 | /// outgoing token chain. It calls LowerCall to do the actual lowering. |
4740 | std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const; |
4741 | |
4742 | /// This hook must be implemented to lower calls into the specified |
4743 | /// DAG. The outgoing arguments to the call are described by the Outs array, |
4744 | /// and the values to be returned by the call are described by the Ins |
4745 | /// array. The implementation should fill in the InVals array with legal-type |
4746 | /// return values from the call, and return the resulting token chain value. |
4747 | virtual SDValue |
4748 | LowerCall(CallLoweringInfo &/*CLI*/, |
4749 | SmallVectorImpl<SDValue> &/*InVals*/) const { |
4750 | llvm_unreachable("Not Implemented" ); |
4751 | } |
4752 | |
4753 | /// Target-specific cleanup for formal ByVal parameters. |
4754 | virtual void HandleByVal(CCState *, unsigned &, Align) const {} |
4755 | |
4756 | /// This hook should be implemented to check whether the return values |
4757 | /// described by the Outs array can fit into the return registers. If false |
4758 | /// is returned, an sret-demotion is performed. |
4759 | virtual bool CanLowerReturn(CallingConv::ID /*CallConv*/, |
4760 | MachineFunction &/*MF*/, bool /*isVarArg*/, |
4761 | const SmallVectorImpl<ISD::OutputArg> &/*Outs*/, |
4762 | LLVMContext &/*Context*/) const |
4763 | { |
4764 | // Return true by default to get preexisting behavior. |
4765 | return true; |
4766 | } |
4767 | |
4768 | /// This hook must be implemented to lower outgoing return values, described |
4769 | /// by the Outs array, into the specified DAG. The implementation should |
4770 | /// return the resulting token chain value. |
4771 | virtual SDValue LowerReturn(SDValue /*Chain*/, CallingConv::ID /*CallConv*/, |
4772 | bool /*isVarArg*/, |
4773 | const SmallVectorImpl<ISD::OutputArg> & /*Outs*/, |
4774 | const SmallVectorImpl<SDValue> & /*OutVals*/, |
4775 | const SDLoc & /*dl*/, |
4776 | SelectionDAG & /*DAG*/) const { |
4777 | llvm_unreachable("Not Implemented" ); |
4778 | } |
4779 | |
4780 | /// Return true if result of the specified node is used by a return node |
4781 | /// only. It also compute and return the input chain for the tail call. |
4782 | /// |
4783 | /// This is used to determine whether it is possible to codegen a libcall as |
4784 | /// tail call at legalization time. |
4785 | virtual bool isUsedByReturnOnly(SDNode *, SDValue &/*Chain*/) const { |
4786 | return false; |
4787 | } |
4788 | |
4789 | /// Return true if the target may be able emit the call instruction as a tail |
4790 | /// call. This is used by optimization passes to determine if it's profitable |
4791 | /// to duplicate return instructions to enable tailcall optimization. |
4792 | virtual bool mayBeEmittedAsTailCall(const CallInst *) const { |
4793 | return false; |
4794 | } |
4795 | |
4796 | /// Return the register ID of the name passed in. Used by named register |
4797 | /// global variables extension. There is no target-independent behaviour |
4798 | /// so the default action is to bail. |
4799 | virtual Register getRegisterByName(const char* RegName, LLT Ty, |
4800 | const MachineFunction &MF) const { |
4801 | report_fatal_error(reason: "Named registers not implemented for this target" ); |
4802 | } |
4803 | |
4804 | /// Return the type that should be used to zero or sign extend a |
4805 | /// zeroext/signext integer return value. FIXME: Some C calling conventions |
4806 | /// require the return type to be promoted, but this is not true all the time, |
4807 | /// e.g. i1/i8/i16 on x86/x86_64. It is also not necessary for non-C calling |
4808 | /// conventions. The frontend should handle this and include all of the |
4809 | /// necessary information. |
4810 | virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, |
4811 | ISD::NodeType /*ExtendKind*/) const { |
4812 | EVT MinVT = getRegisterType(VT: MVT::i32); |
4813 | return VT.bitsLT(VT: MinVT) ? MinVT : VT; |
4814 | } |
4815 | |
4816 | /// For some targets, an LLVM struct type must be broken down into multiple |
4817 | /// simple types, but the calling convention specifies that the entire struct |
4818 | /// must be passed in a block of consecutive registers. |
4819 | virtual bool |
4820 | functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, |
4821 | bool isVarArg, |
4822 | const DataLayout &DL) const { |
4823 | return false; |
4824 | } |
4825 | |
4826 | /// For most targets, an LLVM type must be broken down into multiple |
4827 | /// smaller types. Usually the halves are ordered according to the endianness |
4828 | /// but for some platform that would break. So this method will default to |
4829 | /// matching the endianness but can be overridden. |
4830 | virtual bool |
4831 | shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const { |
4832 | return DL.isLittleEndian(); |
4833 | } |
4834 | |
4835 | /// Returns a 0 terminated array of registers that can be safely used as |
4836 | /// scratch registers. |
4837 | virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const { |
4838 | return nullptr; |
4839 | } |
4840 | |
4841 | /// Returns a 0 terminated array of rounding control registers that can be |
4842 | /// attached into strict FP call. |
4843 | virtual ArrayRef<MCPhysReg> getRoundingControlRegisters() const { |
4844 | return ArrayRef<MCPhysReg>(); |
4845 | } |
4846 | |
4847 | /// This callback is used to prepare for a volatile or atomic load. |
4848 | /// It takes a chain node as input and returns the chain for the load itself. |
4849 | /// |
4850 | /// Having a callback like this is necessary for targets like SystemZ, |
4851 | /// which allows a CPU to reuse the result of a previous load indefinitely, |
4852 | /// even if a cache-coherent store is performed by another CPU. The default |
4853 | /// implementation does nothing. |
4854 | virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL, |
4855 | SelectionDAG &DAG) const { |
4856 | return Chain; |
4857 | } |
4858 | |
4859 | /// This callback is invoked by the type legalizer to legalize nodes with an |
4860 | /// illegal operand type but legal result types. It replaces the |
4861 | /// LowerOperation callback in the type Legalizer. The reason we can not do |
4862 | /// away with LowerOperation entirely is that LegalizeDAG isn't yet ready to |
4863 | /// use this callback. |
4864 | /// |
4865 | /// TODO: Consider merging with ReplaceNodeResults. |
4866 | /// |
4867 | /// The target places new result values for the node in Results (their number |
4868 | /// and types must exactly match those of the original return values of |
4869 | /// the node), or leaves Results empty, which indicates that the node is not |
4870 | /// to be custom lowered after all. |
4871 | /// The default implementation calls LowerOperation. |
4872 | virtual void LowerOperationWrapper(SDNode *N, |
4873 | SmallVectorImpl<SDValue> &Results, |
4874 | SelectionDAG &DAG) const; |
4875 | |
4876 | /// This callback is invoked for operations that are unsupported by the |
4877 | /// target, which are registered to use 'custom' lowering, and whose defined |
4878 | /// values are all legal. If the target has no operations that require custom |
4879 | /// lowering, it need not implement this. The default implementation of this |
4880 | /// aborts. |
4881 | virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; |
4882 | |
4883 | /// This callback is invoked when a node result type is illegal for the |
4884 | /// target, and the operation was registered to use 'custom' lowering for that |
4885 | /// result type. The target places new result values for the node in Results |
4886 | /// (their number and types must exactly match those of the original return |
4887 | /// values of the node), or leaves Results empty, which indicates that the |
4888 | /// node is not to be custom lowered after all. |
4889 | /// |
4890 | /// If the target has no operations that require custom lowering, it need not |
4891 | /// implement this. The default implementation aborts. |
4892 | virtual void ReplaceNodeResults(SDNode * /*N*/, |
4893 | SmallVectorImpl<SDValue> &/*Results*/, |
4894 | SelectionDAG &/*DAG*/) const { |
4895 | llvm_unreachable("ReplaceNodeResults not implemented for this target!" ); |
4896 | } |
4897 | |
4898 | /// This method returns the name of a target specific DAG node. |
4899 | virtual const char *getTargetNodeName(unsigned Opcode) const; |
4900 | |
4901 | /// This method returns a target specific FastISel object, or null if the |
4902 | /// target does not support "fast" ISel. |
4903 | virtual FastISel *createFastISel(FunctionLoweringInfo &, |
4904 | const TargetLibraryInfo *) const { |
4905 | return nullptr; |
4906 | } |
4907 | |
4908 | bool verifyReturnAddressArgumentIsConstant(SDValue Op, |
4909 | SelectionDAG &DAG) const; |
4910 | |
4911 | #ifndef NDEBUG |
4912 | /// Check the given SDNode. Aborts if it is invalid. |
4913 | virtual void verifyTargetSDNode(const SDNode *N) const {}; |
4914 | #endif |
4915 | |
4916 | //===--------------------------------------------------------------------===// |
4917 | // Inline Asm Support hooks |
4918 | // |
4919 | |
4920 | /// This hook allows the target to expand an inline asm call to be explicit |
4921 | /// llvm code if it wants to. This is useful for turning simple inline asms |
4922 | /// into LLVM intrinsics, which gives the compiler more information about the |
4923 | /// behavior of the code. |
4924 | virtual bool ExpandInlineAsm(CallInst *) const { |
4925 | return false; |
4926 | } |
4927 | |
4928 | enum ConstraintType { |
4929 | C_Register, // Constraint represents specific register(s). |
4930 | C_RegisterClass, // Constraint represents any of register(s) in class. |
4931 | C_Memory, // Memory constraint. |
4932 | C_Address, // Address constraint. |
4933 | C_Immediate, // Requires an immediate. |
4934 | C_Other, // Something else. |
4935 | C_Unknown // Unsupported constraint. |
4936 | }; |
4937 | |
4938 | enum ConstraintWeight { |
4939 | // Generic weights. |
4940 | CW_Invalid = -1, // No match. |
4941 | CW_Okay = 0, // Acceptable. |
4942 | CW_Good = 1, // Good weight. |
4943 | CW_Better = 2, // Better weight. |
4944 | CW_Best = 3, // Best weight. |
4945 | |
4946 | // Well-known weights. |
4947 | CW_SpecificReg = CW_Okay, // Specific register operands. |
4948 | CW_Register = CW_Good, // Register operands. |
4949 | CW_Memory = CW_Better, // Memory operands. |
4950 | CW_Constant = CW_Best, // Constant operand. |
4951 | CW_Default = CW_Okay // Default or don't know type. |
4952 | }; |
4953 | |
4954 | /// This contains information for each constraint that we are lowering. |
4955 | struct AsmOperandInfo : public InlineAsm::ConstraintInfo { |
4956 | /// This contains the actual string for the code, like "m". TargetLowering |
4957 | /// picks the 'best' code from ConstraintInfo::Codes that most closely |
4958 | /// matches the operand. |
4959 | std::string ConstraintCode; |
4960 | |
4961 | /// Information about the constraint code, e.g. Register, RegisterClass, |
4962 | /// Memory, Other, Unknown. |
4963 | TargetLowering::ConstraintType ConstraintType = TargetLowering::C_Unknown; |
4964 | |
4965 | /// If this is the result output operand or a clobber, this is null, |
4966 | /// otherwise it is the incoming operand to the CallInst. This gets |
4967 | /// modified as the asm is processed. |
4968 | Value *CallOperandVal = nullptr; |
4969 | |
4970 | /// The ValueType for the operand value. |
4971 | MVT ConstraintVT = MVT::Other; |
4972 | |
4973 | /// Copy constructor for copying from a ConstraintInfo. |
4974 | AsmOperandInfo(InlineAsm::ConstraintInfo Info) |
4975 | : InlineAsm::ConstraintInfo(std::move(Info)) {} |
4976 | |
4977 | /// Return true of this is an input operand that is a matching constraint |
4978 | /// like "4". |
4979 | bool isMatchingInputConstraint() const; |
4980 | |
4981 | /// If this is an input matching constraint, this method returns the output |
4982 | /// operand it matches. |
4983 | unsigned getMatchedOperand() const; |
4984 | }; |
4985 | |
4986 | using AsmOperandInfoVector = std::vector<AsmOperandInfo>; |
4987 | |
4988 | /// Split up the constraint string from the inline assembly value into the |
4989 | /// specific constraints and their prefixes, and also tie in the associated |
4990 | /// operand values. If this returns an empty vector, and if the constraint |
4991 | /// string itself isn't empty, there was an error parsing. |
4992 | virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, |
4993 | const TargetRegisterInfo *TRI, |
4994 | const CallBase &Call) const; |
4995 | |
4996 | /// Examine constraint type and operand type and determine a weight value. |
4997 | /// The operand object must already have been set up with the operand type. |
4998 | virtual ConstraintWeight getMultipleConstraintMatchWeight( |
4999 | AsmOperandInfo &info, int maIndex) const; |
5000 | |
5001 | /// Examine constraint string and operand type and determine a weight value. |
5002 | /// The operand object must already have been set up with the operand type. |
5003 | virtual ConstraintWeight getSingleConstraintMatchWeight( |
5004 | AsmOperandInfo &info, const char *constraint) const; |
5005 | |
5006 | /// Determines the constraint code and constraint type to use for the specific |
5007 | /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType. |
5008 | /// If the actual operand being passed in is available, it can be passed in as |
5009 | /// Op, otherwise an empty SDValue can be passed. |
5010 | virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, |
5011 | SDValue Op, |
5012 | SelectionDAG *DAG = nullptr) const; |
5013 | |
5014 | /// Given a constraint, return the type of constraint it is for this target. |
5015 | virtual ConstraintType getConstraintType(StringRef Constraint) const; |
5016 | |
5017 | using ConstraintPair = std::pair<StringRef, TargetLowering::ConstraintType>; |
5018 | using ConstraintGroup = SmallVector<ConstraintPair>; |
5019 | /// Given an OpInfo with list of constraints codes as strings, return a |
5020 | /// sorted Vector of pairs of constraint codes and their types in priority of |
5021 | /// what we'd prefer to lower them as. This may contain immediates that |
5022 | /// cannot be lowered, but it is meant to be a machine agnostic order of |
5023 | /// preferences. |
5024 | ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const; |
5025 | |
5026 | /// Given a physical register constraint (e.g. {edx}), return the register |
5027 | /// number and the register class for the register. |
5028 | /// |
5029 | /// Given a register class constraint, like 'r', if this corresponds directly |
5030 | /// to an LLVM register class, return a register of 0 and the register class |
5031 | /// pointer. |
5032 | /// |
5033 | /// This should only be used for C_Register constraints. On error, this |
5034 | /// returns a register number of 0 and a null register class pointer. |
5035 | virtual std::pair<unsigned, const TargetRegisterClass *> |
5036 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
5037 | StringRef Constraint, MVT VT) const; |
5038 | |
5039 | virtual InlineAsm::ConstraintCode |
5040 | getInlineAsmMemConstraint(StringRef ConstraintCode) const { |
5041 | if (ConstraintCode == "m" ) |
5042 | return InlineAsm::ConstraintCode::m; |
5043 | if (ConstraintCode == "o" ) |
5044 | return InlineAsm::ConstraintCode::o; |
5045 | if (ConstraintCode == "X" ) |
5046 | return InlineAsm::ConstraintCode::X; |
5047 | if (ConstraintCode == "p" ) |
5048 | return InlineAsm::ConstraintCode::p; |
5049 | return InlineAsm::ConstraintCode::Unknown; |
5050 | } |
5051 | |
5052 | /// Try to replace an X constraint, which matches anything, with another that |
5053 | /// has more specific requirements based on the type of the corresponding |
5054 | /// operand. This returns null if there is no replacement to make. |
5055 | virtual const char *LowerXConstraint(EVT ConstraintVT) const; |
5056 | |
5057 | /// Lower the specified operand into the Ops vector. If it is invalid, don't |
5058 | /// add anything to Ops. |
5059 | virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, |
5060 | std::vector<SDValue> &Ops, |
5061 | SelectionDAG &DAG) const; |
5062 | |
5063 | // Lower custom output constraints. If invalid, return SDValue(). |
5064 | virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, |
5065 | const SDLoc &DL, |
5066 | const AsmOperandInfo &OpInfo, |
5067 | SelectionDAG &DAG) const; |
5068 | |
5069 | // Targets may override this function to collect operands from the CallInst |
5070 | // and for example, lower them into the SelectionDAG operands. |
5071 | virtual void CollectTargetIntrinsicOperands(const CallInst &I, |
5072 | SmallVectorImpl<SDValue> &Ops, |
5073 | SelectionDAG &DAG) const; |
5074 | |
5075 | //===--------------------------------------------------------------------===// |
5076 | // Div utility functions |
5077 | // |
5078 | |
5079 | SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, |
5080 | SmallVectorImpl<SDNode *> &Created) const; |
5081 | SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, |
5082 | SmallVectorImpl<SDNode *> &Created) const; |
5083 | // Build sdiv by power-of-2 with conditional move instructions |
5084 | SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, |
5085 | SelectionDAG &DAG, |
5086 | SmallVectorImpl<SDNode *> &Created) const; |
5087 | |
5088 | /// Targets may override this function to provide custom SDIV lowering for |
5089 | /// power-of-2 denominators. If the target returns an empty SDValue, LLVM |
5090 | /// assumes SDIV is expensive and replaces it with a series of other integer |
5091 | /// operations. |
5092 | virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, |
5093 | SelectionDAG &DAG, |
5094 | SmallVectorImpl<SDNode *> &Created) const; |
5095 | |
5096 | /// Targets may override this function to provide custom SREM lowering for |
5097 | /// power-of-2 denominators. If the target returns an empty SDValue, LLVM |
5098 | /// assumes SREM is expensive and replaces it with a series of other integer |
5099 | /// operations. |
5100 | virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, |
5101 | SelectionDAG &DAG, |
5102 | SmallVectorImpl<SDNode *> &Created) const; |
5103 | |
5104 | /// Indicate whether this target prefers to combine FDIVs with the same |
5105 | /// divisor. If the transform should never be done, return zero. If the |
5106 | /// transform should be done, return the minimum number of divisor uses |
5107 | /// that must exist. |
5108 | virtual unsigned combineRepeatedFPDivisors() const { |
5109 | return 0; |
5110 | } |
5111 | |
5112 | /// Hooks for building estimates in place of slower divisions and square |
5113 | /// roots. |
5114 | |
5115 | /// Return either a square root or its reciprocal estimate value for the input |
5116 | /// operand. |
5117 | /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or |
5118 | /// 'Enabled' as set by a potential default override attribute. |
5119 | /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson |
5120 | /// refinement iterations required to generate a sufficient (though not |
5121 | /// necessarily IEEE-754 compliant) estimate is returned in that parameter. |
5122 | /// The boolean UseOneConstNR output is used to select a Newton-Raphson |
5123 | /// algorithm implementation that uses either one or two constants. |
5124 | /// The boolean Reciprocal is used to select whether the estimate is for the |
5125 | /// square root of the input operand or the reciprocal of its square root. |
5126 | /// A target may choose to implement its own refinement within this function. |
5127 | /// If that's true, then return '0' as the number of RefinementSteps to avoid |
5128 | /// any further refinement of the estimate. |
5129 | /// An empty SDValue return means no estimate sequence can be created. |
5130 | virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, |
5131 | int Enabled, int &RefinementSteps, |
5132 | bool &UseOneConstNR, bool Reciprocal) const { |
5133 | return SDValue(); |
5134 | } |
5135 | |
5136 | /// Try to convert the fminnum/fmaxnum to a compare/select sequence. This is |
5137 | /// required for correctness since InstCombine might have canonicalized a |
5138 | /// fcmp+select sequence to a FMINNUM/FMAXNUM intrinsic. If we were to fall |
5139 | /// through to the default expansion/soften to libcall, we might introduce a |
5140 | /// link-time dependency on libm into a file that originally did not have one. |
5141 | SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const; |
5142 | |
5143 | /// Return a reciprocal estimate value for the input operand. |
5144 | /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or |
5145 | /// 'Enabled' as set by a potential default override attribute. |
5146 | /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson |
5147 | /// refinement iterations required to generate a sufficient (though not |
5148 | /// necessarily IEEE-754 compliant) estimate is returned in that parameter. |
5149 | /// A target may choose to implement its own refinement within this function. |
5150 | /// If that's true, then return '0' as the number of RefinementSteps to avoid |
5151 | /// any further refinement of the estimate. |
5152 | /// An empty SDValue return means no estimate sequence can be created. |
5153 | virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, |
5154 | int Enabled, int &RefinementSteps) const { |
5155 | return SDValue(); |
5156 | } |
5157 | |
5158 | /// Return a target-dependent comparison result if the input operand is |
5159 | /// suitable for use with a square root estimate calculation. For example, the |
5160 | /// comparison may check if the operand is NAN, INF, zero, normal, etc. The |
5161 | /// result should be used as the condition operand for a select or branch. |
5162 | virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, |
5163 | const DenormalMode &Mode) const; |
5164 | |
5165 | /// Return a target-dependent result if the input operand is not suitable for |
5166 | /// use with a square root estimate calculation. |
5167 | virtual SDValue getSqrtResultForDenormInput(SDValue Operand, |
5168 | SelectionDAG &DAG) const { |
5169 | return DAG.getConstantFP(Val: 0.0, DL: SDLoc(Operand), VT: Operand.getValueType()); |
5170 | } |
5171 | |
5172 | //===--------------------------------------------------------------------===// |
5173 | // Legalization utility functions |
5174 | // |
5175 | |
5176 | /// Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, |
5177 | /// respectively, each computing an n/2-bit part of the result. |
5178 | /// \param Result A vector that will be filled with the parts of the result |
5179 | /// in little-endian order. |
5180 | /// \param LL Low bits of the LHS of the MUL. You can use this parameter |
5181 | /// if you want to control how low bits are extracted from the LHS. |
5182 | /// \param LH High bits of the LHS of the MUL. See LL for meaning. |
5183 | /// \param RL Low bits of the RHS of the MUL. See LL for meaning |
5184 | /// \param RH High bits of the RHS of the MUL. See LL for meaning. |
5185 | /// \returns true if the node has been expanded, false if it has not |
5186 | bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, |
5187 | SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT, |
5188 | SelectionDAG &DAG, MulExpansionKind Kind, |
5189 | SDValue LL = SDValue(), SDValue LH = SDValue(), |
5190 | SDValue RL = SDValue(), SDValue RH = SDValue()) const; |
5191 | |
5192 | /// Expand a MUL into two nodes. One that computes the high bits of |
5193 | /// the result and one that computes the low bits. |
5194 | /// \param HiLoVT The value type to use for the Lo and Hi nodes. |
5195 | /// \param LL Low bits of the LHS of the MUL. You can use this parameter |
5196 | /// if you want to control how low bits are extracted from the LHS. |
5197 | /// \param LH High bits of the LHS of the MUL. See LL for meaning. |
5198 | /// \param RL Low bits of the RHS of the MUL. See LL for meaning |
5199 | /// \param RH High bits of the RHS of the MUL. See LL for meaning. |
5200 | /// \returns true if the node has been expanded. false if it has not |
5201 | bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, |
5202 | SelectionDAG &DAG, MulExpansionKind Kind, |
5203 | SDValue LL = SDValue(), SDValue LH = SDValue(), |
5204 | SDValue RL = SDValue(), SDValue RH = SDValue()) const; |
5205 | |
5206 | /// Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit |
5207 | /// urem by constant and other arithmetic ops. The n/2-bit urem by constant |
5208 | /// will be expanded by DAGCombiner. This is not possible for all constant |
5209 | /// divisors. |
5210 | /// \param N Node to expand |
5211 | /// \param Result A vector that will be filled with the lo and high parts of |
5212 | /// the results. For *DIVREM, this will be the quotient parts followed |
5213 | /// by the remainder parts. |
5214 | /// \param HiLoVT The value type to use for the Lo and Hi parts. Should be |
5215 | /// half of VT. |
5216 | /// \param LL Low bits of the LHS of the operation. You can use this |
5217 | /// parameter if you want to control how low bits are extracted from |
5218 | /// the LHS. |
5219 | /// \param LH High bits of the LHS of the operation. See LL for meaning. |
5220 | /// \returns true if the node has been expanded, false if it has not. |
5221 | bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl<SDValue> &Result, |
5222 | EVT HiLoVT, SelectionDAG &DAG, |
5223 | SDValue LL = SDValue(), |
5224 | SDValue LH = SDValue()) const; |
5225 | |
5226 | /// Expand funnel shift. |
5227 | /// \param N Node to expand |
5228 | /// \returns The expansion if successful, SDValue() otherwise |
5229 | SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const; |
5230 | |
5231 | /// Expand rotations. |
5232 | /// \param N Node to expand |
5233 | /// \param AllowVectorOps expand vector rotate, this should only be performed |
5234 | /// if the legalization is happening outside of LegalizeVectorOps |
5235 | /// \returns The expansion if successful, SDValue() otherwise |
5236 | SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const; |
5237 | |
5238 | /// Expand shift-by-parts. |
5239 | /// \param N Node to expand |
5240 | /// \param Lo lower-output-part after conversion |
5241 | /// \param Hi upper-output-part after conversion |
5242 | void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, |
5243 | SelectionDAG &DAG) const; |
5244 | |
5245 | /// Expand float(f32) to SINT(i64) conversion |
5246 | /// \param N Node to expand |
5247 | /// \param Result output after conversion |
5248 | /// \returns True, if the expansion was successful, false otherwise |
5249 | bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; |
5250 | |
5251 | /// Expand float to UINT conversion |
5252 | /// \param N Node to expand |
5253 | /// \param Result output after conversion |
5254 | /// \param Chain output chain after conversion |
5255 | /// \returns True, if the expansion was successful, false otherwise |
5256 | bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, |
5257 | SelectionDAG &DAG) const; |
5258 | |
5259 | /// Expand UINT(i64) to double(f64) conversion |
5260 | /// \param N Node to expand |
5261 | /// \param Result output after conversion |
5262 | /// \param Chain output chain after conversion |
5263 | /// \returns True, if the expansion was successful, false otherwise |
5264 | bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, |
5265 | SelectionDAG &DAG) const; |
5266 | |
5267 | /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs. |
5268 | SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const; |
5269 | |
5270 | /// Expand fminimum/fmaximum into multiple comparison with selects. |
5271 | SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const; |
5272 | |
5273 | /// Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max. |
5274 | /// \param N Node to expand |
5275 | /// \returns The expansion result |
5276 | SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const; |
5277 | |
5278 | /// Truncate Op to ResultVT. If the result is exact, leave it alone. If it is |
5279 | /// not exact, force the result to be odd. |
5280 | /// \param ResultVT The type of result. |
5281 | /// \param Op The value to round. |
5282 | /// \returns The expansion result |
5283 | SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, |
5284 | SelectionDAG &DAG) const; |
5285 | |
5286 | /// Expand round(fp) to fp conversion |
5287 | /// \param N Node to expand |
5288 | /// \returns The expansion result |
5289 | SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const; |
5290 | |
5291 | /// Expand check for floating point class. |
5292 | /// \param ResultVT The type of intrinsic call result. |
5293 | /// \param Op The tested value. |
5294 | /// \param Test The test to perform. |
5295 | /// \param Flags The optimization flags. |
5296 | /// \returns The expansion result or SDValue() if it fails. |
5297 | SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, |
5298 | SDNodeFlags Flags, const SDLoc &DL, |
5299 | SelectionDAG &DAG) const; |
5300 | |
5301 | /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes, |
5302 | /// vector nodes can only succeed if all operations are legal/custom. |
5303 | /// \param N Node to expand |
5304 | /// \returns The expansion result or SDValue() if it fails. |
5305 | SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const; |
5306 | |
5307 | /// Expand VP_CTPOP nodes. |
5308 | /// \returns The expansion result or SDValue() if it fails. |
5309 | SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const; |
5310 | |
5311 | /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes, |
5312 | /// vector nodes can only succeed if all operations are legal/custom. |
5313 | /// \param N Node to expand |
5314 | /// \returns The expansion result or SDValue() if it fails. |
5315 | SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const; |
5316 | |
5317 | /// Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes. |
5318 | /// \param N Node to expand |
5319 | /// \returns The expansion result or SDValue() if it fails. |
5320 | SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const; |
5321 | |
5322 | /// Expand CTTZ via Table Lookup. |
5323 | /// \param N Node to expand |
5324 | /// \returns The expansion result or SDValue() if it fails. |
5325 | SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, |
5326 | SDValue Op, unsigned NumBitsPerElt) const; |
5327 | |
5328 | /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes, |
5329 | /// vector nodes can only succeed if all operations are legal/custom. |
5330 | /// \param N Node to expand |
5331 | /// \returns The expansion result or SDValue() if it fails. |
5332 | SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const; |
5333 | |
5334 | /// Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes. |
5335 | /// \param N Node to expand |
5336 | /// \returns The expansion result or SDValue() if it fails. |
5337 | SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const; |
5338 | |
5339 | /// Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes. |
5340 | /// \param N Node to expand |
5341 | /// \returns The expansion result or SDValue() if it fails. |
5342 | SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const; |
5343 | |
5344 | /// Expand ABS nodes. Expands vector/scalar ABS nodes, |
5345 | /// vector nodes can only succeed if all operations are legal/custom. |
5346 | /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size)) |
5347 | /// \param N Node to expand |
5348 | /// \param IsNegative indicate negated abs |
5349 | /// \returns The expansion result or SDValue() if it fails. |
5350 | SDValue expandABS(SDNode *N, SelectionDAG &DAG, |
5351 | bool IsNegative = false) const; |
5352 | |
5353 | /// Expand ABDS/ABDU nodes. Expands vector/scalar ABDS/ABDU nodes. |
5354 | /// \param N Node to expand |
5355 | /// \returns The expansion result or SDValue() if it fails. |
5356 | SDValue expandABD(SDNode *N, SelectionDAG &DAG) const; |
5357 | |
5358 | /// Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes. |
5359 | /// \param N Node to expand |
5360 | /// \returns The expansion result or SDValue() if it fails. |
5361 | SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const; |
5362 | |
5363 | /// Expand BSWAP nodes. Expands scalar/vector BSWAP nodes with i16/i32/i64 |
5364 | /// scalar types. Returns SDValue() if expand fails. |
5365 | /// \param N Node to expand |
5366 | /// \returns The expansion result or SDValue() if it fails. |
5367 | SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const; |
5368 | |
5369 | /// Expand VP_BSWAP nodes. Expands VP_BSWAP nodes with |
5370 | /// i16/i32/i64 scalar types. Returns SDValue() if expand fails. \param N Node |
5371 | /// to expand \returns The expansion result or SDValue() if it fails. |
5372 | SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const; |
5373 | |
5374 | /// Expand BITREVERSE nodes. Expands scalar/vector BITREVERSE nodes. |
5375 | /// Returns SDValue() if expand fails. |
5376 | /// \param N Node to expand |
5377 | /// \returns The expansion result or SDValue() if it fails. |
5378 | SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const; |
5379 | |
5380 | /// Expand VP_BITREVERSE nodes. Expands VP_BITREVERSE nodes with |
5381 | /// i8/i16/i32/i64 scalar types. \param N Node to expand \returns The |
5382 | /// expansion result or SDValue() if it fails. |
5383 | SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const; |
5384 | |
5385 | /// Turn load of vector type into a load of the individual elements. |
5386 | /// \param LD load to expand |
5387 | /// \returns BUILD_VECTOR and TokenFactor nodes. |
5388 | std::pair<SDValue, SDValue> scalarizeVectorLoad(LoadSDNode *LD, |
5389 | SelectionDAG &DAG) const; |
5390 | |
5391 | // Turn a store of a vector type into stores of the individual elements. |
5392 | /// \param ST Store with a vector value type |
5393 | /// \returns TokenFactor of the individual store chains. |
5394 | SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const; |
5395 | |
5396 | /// Expands an unaligned load to 2 half-size loads for an integer, and |
5397 | /// possibly more for vectors. |
5398 | std::pair<SDValue, SDValue> expandUnalignedLoad(LoadSDNode *LD, |
5399 | SelectionDAG &DAG) const; |
5400 | |
5401 | /// Expands an unaligned store to 2 half-size stores for integer values, and |
5402 | /// possibly more for vectors. |
5403 | SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const; |
5404 | |
5405 | /// Increments memory address \p Addr according to the type of the value |
5406 | /// \p DataVT that should be stored. If the data is stored in compressed |
5407 | /// form, the memory address should be incremented according to the number of |
5408 | /// the stored elements. This number is equal to the number of '1's bits |
5409 | /// in the \p Mask. |
5410 | /// \p DataVT is a vector type. \p Mask is a vector value. |
5411 | /// \p DataVT and \p Mask have the same number of vector elements. |
5412 | SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, |
5413 | EVT DataVT, SelectionDAG &DAG, |
5414 | bool IsCompressedMemory) const; |
5415 | |
5416 | /// Get a pointer to vector element \p Idx located in memory for a vector of |
5417 | /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of |
5418 | /// bounds the returned pointer is unspecified, but will be within the vector |
5419 | /// bounds. |
5420 | SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, |
5421 | SDValue Index) const; |
5422 | |
5423 | /// Get a pointer to a sub-vector of type \p SubVecVT at index \p Idx located |
5424 | /// in memory for a vector of type \p VecVT starting at a base address of |
5425 | /// \p VecPtr. If \p Idx plus the size of \p SubVecVT is out of bounds the |
5426 | /// returned pointer is unspecified, but the value returned will be such that |
5427 | /// the entire subvector would be within the vector bounds. |
5428 | SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, |
5429 | EVT SubVecVT, SDValue Index) const; |
5430 | |
5431 | /// Method for building the DAG expansion of ISD::[US][MIN|MAX]. This |
5432 | /// method accepts integers as its arguments. |
5433 | SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const; |
5434 | |
5435 | /// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This |
5436 | /// method accepts integers as its arguments. |
5437 | SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const; |
5438 | |
5439 | /// Method for building the DAG expansion of ISD::[US]CMP. This |
5440 | /// method accepts integers as its arguments |
5441 | SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const; |
5442 | |
5443 | /// Method for building the DAG expansion of ISD::[US]SHLSAT. This |
5444 | /// method accepts integers as its arguments. |
5445 | SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const; |
5446 | |
5447 | /// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This |
5448 | /// method accepts integers as its arguments. |
5449 | SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const; |
5450 | |
5451 | /// Method for building the DAG expansion of ISD::[US]DIVFIX[SAT]. This |
5452 | /// method accepts integers as its arguments. |
5453 | /// Note: This method may fail if the division could not be performed |
5454 | /// within the type. Clients must retry with a wider type if this happens. |
5455 | SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, |
5456 | SDValue LHS, SDValue RHS, |
5457 | unsigned Scale, SelectionDAG &DAG) const; |
5458 | |
5459 | /// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion |
5460 | /// always suceeds and populates the Result and Overflow arguments. |
5461 | void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, |
5462 | SelectionDAG &DAG) const; |
5463 | |
5464 | /// Method for building the DAG expansion of ISD::S(ADD|SUB)O. Expansion |
5465 | /// always suceeds and populates the Result and Overflow arguments. |
5466 | void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, |
5467 | SelectionDAG &DAG) const; |
5468 | |
5469 | /// Method for building the DAG expansion of ISD::[US]MULO. Returns whether |
5470 | /// expansion was successful and populates the Result and Overflow arguments. |
5471 | bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, |
5472 | SelectionDAG &DAG) const; |
5473 | |
5474 | /// forceExpandWideMUL - Unconditionally expand a MUL into either a libcall or |
5475 | /// brute force via a wide multiplication. The expansion works by |
5476 | /// attempting to do a multiplication on a wider type twice the size of the |
5477 | /// original operands. LL and LH represent the lower and upper halves of the |
5478 | /// first operand. RL and RH represent the lower and upper halves of the |
5479 | /// second operand. The upper and lower halves of the result are stored in Lo |
5480 | /// and Hi. |
5481 | void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, |
5482 | EVT WideVT, const SDValue LL, const SDValue LH, |
5483 | const SDValue RL, const SDValue RH, SDValue &Lo, |
5484 | SDValue &Hi) const; |
5485 | |
5486 | /// Same as above, but creates the upper halves of each operand by |
5487 | /// sign/zero-extending the operands. |
5488 | void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, |
5489 | const SDValue LHS, const SDValue RHS, SDValue &Lo, |
5490 | SDValue &Hi) const; |
5491 | |
5492 | /// Expand a VECREDUCE_* into an explicit calculation. If Count is specified, |
5493 | /// only the first Count elements of the vector are used. |
5494 | SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const; |
5495 | |
5496 | /// Expand a VECREDUCE_SEQ_* into an explicit ordered calculation. |
5497 | SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const; |
5498 | |
5499 | /// Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal. |
5500 | /// Returns true if the expansion was successful. |
5501 | bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const; |
5502 | |
5503 | /// Method for building the DAG expansion of ISD::VECTOR_SPLICE. This |
5504 | /// method accepts vectors as its arguments. |
5505 | SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const; |
5506 | |
5507 | /// Expand a vector VECTOR_COMPRESS into a sequence of extract element, store |
5508 | /// temporarily, advance store position, before re-loading the final vector. |
5509 | SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const; |
5510 | |
5511 | /// Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC |
5512 | /// on the current target. A VP_SETCC will additionally be given a Mask |
5513 | /// and/or EVL not equal to SDValue(). |
5514 | /// |
5515 | /// If the SETCC has been legalized using AND / OR, then the legalized node |
5516 | /// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert |
5517 | /// will be set to false. This will also hold if the VP_SETCC has been |
5518 | /// legalized using VP_AND / VP_OR. |
5519 | /// |
5520 | /// If the SETCC / VP_SETCC has been legalized by using |
5521 | /// getSetCCSwappedOperands(), then the values of LHS and RHS will be |
5522 | /// swapped, CC will be set to the new condition, and NeedInvert will be set |
5523 | /// to false. |
5524 | /// |
5525 | /// If the SETCC / VP_SETCC has been legalized using the inverse condcode, |
5526 | /// then LHS and RHS will be unchanged, CC will set to the inverted condcode, |
5527 | /// and NeedInvert will be set to true. The caller must invert the result of |
5528 | /// the SETCC with SelectionDAG::getLogicalNOT() or take equivalent action to |
5529 | /// swap the effect of a true/false result. |
5530 | /// |
5531 | /// \returns true if the SETCC / VP_SETCC has been legalized, false if it |
5532 | /// hasn't. |
5533 | bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, |
5534 | SDValue &RHS, SDValue &CC, SDValue Mask, |
5535 | SDValue EVL, bool &NeedInvert, const SDLoc &dl, |
5536 | SDValue &Chain, bool IsSignaling = false) const; |
5537 | |
5538 | //===--------------------------------------------------------------------===// |
5539 | // Instruction Emitting Hooks |
5540 | // |
5541 | |
5542 | /// This method should be implemented by targets that mark instructions with |
5543 | /// the 'usesCustomInserter' flag. These instructions are special in various |
5544 | /// ways, which require special support to insert. The specified MachineInstr |
5545 | /// is created but not inserted into any basic blocks, and this method is |
5546 | /// called to expand it into a sequence of instructions, potentially also |
5547 | /// creating new basic blocks and control flow. |
5548 | /// As long as the returned basic block is different (i.e., we created a new |
5549 | /// one), the custom inserter is free to modify the rest of \p MBB. |
5550 | virtual MachineBasicBlock * |
5551 | EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; |
5552 | |
5553 | /// This method should be implemented by targets that mark instructions with |
5554 | /// the 'hasPostISelHook' flag. These instructions must be adjusted after |
5555 | /// instruction selection by target hooks. e.g. To fill in optional defs for |
5556 | /// ARM 's' setting instructions. |
5557 | virtual void AdjustInstrPostInstrSelection(MachineInstr &MI, |
5558 | SDNode *Node) const; |
5559 | |
5560 | /// If this function returns true, SelectionDAGBuilder emits a |
5561 | /// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector. |
5562 | virtual bool useLoadStackGuardNode() const { |
5563 | return false; |
5564 | } |
5565 | |
5566 | virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, |
5567 | const SDLoc &DL) const { |
5568 | llvm_unreachable("not implemented for this target" ); |
5569 | } |
5570 | |
5571 | /// Lower TLS global address SDNode for target independent emulated TLS model. |
5572 | virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, |
5573 | SelectionDAG &DAG) const; |
5574 | |
5575 | /// Expands target specific indirect branch for the case of JumpTable |
5576 | /// expansion. |
5577 | virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, |
5578 | SDValue Addr, int JTI, |
5579 | SelectionDAG &DAG) const; |
5580 | |
5581 | // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits))) |
5582 | // If we're comparing for equality to zero and isCtlzFast is true, expose the |
5583 | // fact that this can be implemented as a ctlz/srl pair, so that the dag |
5584 | // combiner can fold the new nodes. |
5585 | SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const; |
5586 | |
5587 | // Return true if `X & Y eq/ne 0` is preferable to `X & Y ne/eq Y` |
5588 | virtual bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode, EVT) const { |
5589 | return true; |
5590 | } |
5591 | |
5592 | private: |
5593 | SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, |
5594 | const SDLoc &DL, DAGCombinerInfo &DCI) const; |
5595 | SDValue foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, |
5596 | const SDLoc &DL, DAGCombinerInfo &DCI) const; |
5597 | |
5598 | SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0, |
5599 | SDValue N1, ISD::CondCode Cond, |
5600 | DAGCombinerInfo &DCI, |
5601 | const SDLoc &DL) const; |
5602 | |
5603 | // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 |
5604 | SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift( |
5605 | EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, |
5606 | DAGCombinerInfo &DCI, const SDLoc &DL) const; |
5607 | |
5608 | SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, |
5609 | SDValue CompTargetNode, ISD::CondCode Cond, |
5610 | DAGCombinerInfo &DCI, const SDLoc &DL, |
5611 | SmallVectorImpl<SDNode *> &Created) const; |
5612 | SDValue buildUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, |
5613 | ISD::CondCode Cond, DAGCombinerInfo &DCI, |
5614 | const SDLoc &DL) const; |
5615 | |
5616 | SDValue prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, |
5617 | SDValue CompTargetNode, ISD::CondCode Cond, |
5618 | DAGCombinerInfo &DCI, const SDLoc &DL, |
5619 | SmallVectorImpl<SDNode *> &Created) const; |
5620 | SDValue buildSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, |
5621 | ISD::CondCode Cond, DAGCombinerInfo &DCI, |
5622 | const SDLoc &DL) const; |
5623 | }; |
5624 | |
5625 | /// Given an LLVM IR type and return type attributes, compute the return value |
5626 | /// EVTs and flags, and optionally also the offsets, if the return value is |
5627 | /// being lowered to memory. |
5628 | void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, |
5629 | SmallVectorImpl<ISD::OutputArg> &Outs, |
5630 | const TargetLowering &TLI, const DataLayout &DL); |
5631 | |
5632 | } // end namespace llvm |
5633 | |
5634 | #endif // LLVM_CODEGEN_TARGETLOWERING_H |
5635 | |