1//===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file describes how to lower LLVM code to machine code. This has two
11/// main components:
12///
13/// 1. Which ValueTypes are natively supported by the target.
14/// 2. Which operations are supported for supported ValueTypes.
15/// 3. Cost thresholds for alternative implementations of certain operations.
16///
17/// In addition it has a few other components, like information about FP
18/// immediates.
19///
20//===----------------------------------------------------------------------===//
21
22#ifndef LLVM_CODEGEN_TARGETLOWERING_H
23#define LLVM_CODEGEN_TARGETLOWERING_H
24
25#include "llvm/ADT/APInt.h"
26#include "llvm/ADT/ArrayRef.h"
27#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/SmallVector.h"
29#include "llvm/ADT/StringRef.h"
30#include "llvm/CodeGen/DAGCombine.h"
31#include "llvm/CodeGen/ISDOpcodes.h"
32#include "llvm/CodeGen/LowLevelTypeUtils.h"
33#include "llvm/CodeGen/MachineRegisterInfo.h"
34#include "llvm/CodeGen/RuntimeLibcallUtil.h"
35#include "llvm/CodeGen/SelectionDAG.h"
36#include "llvm/CodeGen/SelectionDAGNodes.h"
37#include "llvm/CodeGen/TargetCallingConv.h"
38#include "llvm/CodeGen/ValueTypes.h"
39#include "llvm/CodeGenTypes/MachineValueType.h"
40#include "llvm/IR/Attributes.h"
41#include "llvm/IR/CallingConv.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DerivedTypes.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/InlineAsm.h"
46#include "llvm/IR/Instruction.h"
47#include "llvm/IR/Instructions.h"
48#include "llvm/IR/RuntimeLibcalls.h"
49#include "llvm/IR/Type.h"
50#include "llvm/Support/Alignment.h"
51#include "llvm/Support/AtomicOrdering.h"
52#include "llvm/Support/Casting.h"
53#include "llvm/Support/ErrorHandling.h"
54#include <algorithm>
55#include <cassert>
56#include <climits>
57#include <cstdint>
58#include <iterator>
59#include <map>
60#include <string>
61#include <utility>
62#include <vector>
63
64namespace llvm {
65
66class AssumptionCache;
67class CCState;
68class CCValAssign;
69enum class ComplexDeinterleavingOperation;
70enum class ComplexDeinterleavingRotation;
71class Constant;
72class FastISel;
73class FunctionLoweringInfo;
74class GlobalValue;
75class Loop;
76class GISelKnownBits;
77class IntrinsicInst;
78class IRBuilderBase;
79struct KnownBits;
80class LLVMContext;
81class MachineBasicBlock;
82class MachineFunction;
83class MachineInstr;
84class MachineJumpTableInfo;
85class MachineLoop;
86class MachineRegisterInfo;
87class MCContext;
88class MCExpr;
89class Module;
90class ProfileSummaryInfo;
91class TargetLibraryInfo;
92class TargetMachine;
93class TargetRegisterClass;
94class TargetRegisterInfo;
95class TargetTransformInfo;
96class Value;
97
98namespace Sched {
99
100enum Preference : uint8_t {
101 None, // No preference
102 Source, // Follow source order.
103 RegPressure, // Scheduling for lowest register pressure.
104 Hybrid, // Scheduling for both latency and register pressure.
105 ILP, // Scheduling for ILP in low register pressure mode.
106 VLIW, // Scheduling for VLIW targets.
107 Fast, // Fast suboptimal list scheduling
108 Linearize, // Linearize DAG, no scheduling
109 Last = Linearize // Marker for the last Sched::Preference
110};
111
112} // end namespace Sched
113
114// MemOp models a memory operation, either memset or memcpy/memmove.
115struct MemOp {
116private:
117 // Shared
118 uint64_t Size;
119 bool DstAlignCanChange; // true if destination alignment can satisfy any
120 // constraint.
121 Align DstAlign; // Specified alignment of the memory operation.
122
123 bool AllowOverlap;
124 // memset only
125 bool IsMemset; // If setthis memory operation is a memset.
126 bool ZeroMemset; // If set clears out memory with zeros.
127 // memcpy only
128 bool MemcpyStrSrc; // Indicates whether the memcpy source is an in-register
129 // constant so it does not need to be loaded.
130 Align SrcAlign; // Inferred alignment of the source or default value if the
131 // memory operation does not need to load the value.
132public:
133 static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign,
134 Align SrcAlign, bool IsVolatile,
135 bool MemcpyStrSrc = false) {
136 MemOp Op;
137 Op.Size = Size;
138 Op.DstAlignCanChange = DstAlignCanChange;
139 Op.DstAlign = DstAlign;
140 Op.AllowOverlap = !IsVolatile;
141 Op.IsMemset = false;
142 Op.ZeroMemset = false;
143 Op.MemcpyStrSrc = MemcpyStrSrc;
144 Op.SrcAlign = SrcAlign;
145 return Op;
146 }
147
148 static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign,
149 bool IsZeroMemset, bool IsVolatile) {
150 MemOp Op;
151 Op.Size = Size;
152 Op.DstAlignCanChange = DstAlignCanChange;
153 Op.DstAlign = DstAlign;
154 Op.AllowOverlap = !IsVolatile;
155 Op.IsMemset = true;
156 Op.ZeroMemset = IsZeroMemset;
157 Op.MemcpyStrSrc = false;
158 return Op;
159 }
160
161 uint64_t size() const { return Size; }
162 Align getDstAlign() const {
163 assert(!DstAlignCanChange);
164 return DstAlign;
165 }
166 bool isFixedDstAlign() const { return !DstAlignCanChange; }
167 bool allowOverlap() const { return AllowOverlap; }
168 bool isMemset() const { return IsMemset; }
169 bool isMemcpy() const { return !IsMemset; }
170 bool isMemcpyWithFixedDstAlign() const {
171 return isMemcpy() && !DstAlignCanChange;
172 }
173 bool isZeroMemset() const { return isMemset() && ZeroMemset; }
174 bool isMemcpyStrSrc() const {
175 assert(isMemcpy() && "Must be a memcpy");
176 return MemcpyStrSrc;
177 }
178 Align getSrcAlign() const {
179 assert(isMemcpy() && "Must be a memcpy");
180 return SrcAlign;
181 }
182 bool isSrcAligned(Align AlignCheck) const {
183 return isMemset() || llvm::isAligned(Lhs: AlignCheck, SizeInBytes: SrcAlign.value());
184 }
185 bool isDstAligned(Align AlignCheck) const {
186 return DstAlignCanChange || llvm::isAligned(Lhs: AlignCheck, SizeInBytes: DstAlign.value());
187 }
188 bool isAligned(Align AlignCheck) const {
189 return isSrcAligned(AlignCheck) && isDstAligned(AlignCheck);
190 }
191};
192
193/// This base class for TargetLowering contains the SelectionDAG-independent
194/// parts that can be used from the rest of CodeGen.
195class TargetLoweringBase {
196public:
197 /// This enum indicates whether operations are valid for a target, and if not,
198 /// what action should be used to make them valid.
199 enum LegalizeAction : uint8_t {
200 Legal, // The target natively supports this operation.
201 Promote, // This operation should be executed in a larger type.
202 Expand, // Try to expand this to other ops, otherwise use a libcall.
203 LibCall, // Don't try to expand this to other ops, always use a libcall.
204 Custom // Use the LowerOperation hook to implement custom lowering.
205 };
206
207 /// This enum indicates whether a types are legal for a target, and if not,
208 /// what action should be used to make them valid.
209 enum LegalizeTypeAction : uint8_t {
210 TypeLegal, // The target natively supports this type.
211 TypePromoteInteger, // Replace this integer with a larger one.
212 TypeExpandInteger, // Split this integer into two of half the size.
213 TypeSoftenFloat, // Convert this float to a same size integer type.
214 TypeExpandFloat, // Split this float into two of half the size.
215 TypeScalarizeVector, // Replace this one-element vector with its element.
216 TypeSplitVector, // Split this vector into two of half the size.
217 TypeWidenVector, // This vector should be widened into a larger vector.
218 TypePromoteFloat, // Replace this float with a larger one.
219 TypeSoftPromoteHalf, // Soften half to i16 and use float to do arithmetic.
220 TypeScalarizeScalableVector, // This action is explicitly left unimplemented.
221 // While it is theoretically possible to
222 // legalize operations on scalable types with a
223 // loop that handles the vscale * #lanes of the
224 // vector, this is non-trivial at SelectionDAG
225 // level and these types are better to be
226 // widened or promoted.
227 };
228
229 /// LegalizeKind holds the legalization kind that needs to happen to EVT
230 /// in order to type-legalize it.
231 using LegalizeKind = std::pair<LegalizeTypeAction, EVT>;
232
233 /// Enum that describes how the target represents true/false values.
234 enum BooleanContent {
235 UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage.
236 ZeroOrOneBooleanContent, // All bits zero except for bit 0.
237 ZeroOrNegativeOneBooleanContent // All bits equal to bit 0.
238 };
239
240 /// Enum that describes what type of support for selects the target has.
241 enum SelectSupportKind {
242 ScalarValSelect, // The target supports scalar selects (ex: cmov).
243 ScalarCondVectorVal, // The target supports selects with a scalar condition
244 // and vector values (ex: cmov).
245 VectorMaskSelect // The target supports vector selects with a vector
246 // mask (ex: x86 blends).
247 };
248
249 /// Enum that specifies what an atomic load/AtomicRMWInst is expanded
250 /// to, if at all. Exists because different targets have different levels of
251 /// support for these atomic instructions, and also have different options
252 /// w.r.t. what they should expand to.
253 enum class AtomicExpansionKind {
254 None, // Don't expand the instruction.
255 CastToInteger, // Cast the atomic instruction to another type, e.g. from
256 // floating-point to integer type.
257 LLSC, // Expand the instruction into loadlinked/storeconditional; used
258 // by ARM/AArch64.
259 LLOnly, // Expand the (load) instruction into just a load-linked, which has
260 // greater atomic guarantees than a normal load.
261 CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
262 MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
263 BitTestIntrinsic, // Use a target-specific intrinsic for special bit
264 // operations; used by X86.
265 CmpArithIntrinsic,// Use a target-specific intrinsic for special compare
266 // operations; used by X86.
267 Expand, // Generic expansion in terms of other atomic operations.
268
269 // Rewrite to a non-atomic form for use in a known non-preemptible
270 // environment.
271 NotAtomic
272 };
273
274 /// Enum that specifies when a multiplication should be expanded.
275 enum class MulExpansionKind {
276 Always, // Always expand the instruction.
277 OnlyLegalOrCustom, // Only expand when the resulting instructions are legal
278 // or custom.
279 };
280
281 /// Enum that specifies when a float negation is beneficial.
282 enum class NegatibleCost {
283 Cheaper = 0, // Negated expression is cheaper.
284 Neutral = 1, // Negated expression has the same cost.
285 Expensive = 2 // Negated expression is more expensive.
286 };
287
288 /// Enum of different potentially desirable ways to fold (and/or (setcc ...),
289 /// (setcc ...)).
290 enum AndOrSETCCFoldKind : uint8_t {
291 None = 0, // No fold is preferable.
292 AddAnd = 1, // Fold with `Add` op and `And` op is preferable.
293 NotAnd = 2, // Fold with `Not` op and `And` op is preferable.
294 ABS = 4, // Fold with `llvm.abs` op is preferable.
295 };
296
297 class ArgListEntry {
298 public:
299 Value *Val = nullptr;
300 SDValue Node = SDValue();
301 Type *Ty = nullptr;
302 bool IsSExt : 1;
303 bool IsZExt : 1;
304 bool IsInReg : 1;
305 bool IsSRet : 1;
306 bool IsNest : 1;
307 bool IsByVal : 1;
308 bool IsByRef : 1;
309 bool IsInAlloca : 1;
310 bool IsPreallocated : 1;
311 bool IsReturned : 1;
312 bool IsSwiftSelf : 1;
313 bool IsSwiftAsync : 1;
314 bool IsSwiftError : 1;
315 bool IsCFGuardTarget : 1;
316 MaybeAlign Alignment = std::nullopt;
317 Type *IndirectType = nullptr;
318
319 ArgListEntry()
320 : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
321 IsNest(false), IsByVal(false), IsByRef(false), IsInAlloca(false),
322 IsPreallocated(false), IsReturned(false), IsSwiftSelf(false),
323 IsSwiftAsync(false), IsSwiftError(false), IsCFGuardTarget(false) {}
324
325 void setAttributes(const CallBase *Call, unsigned ArgIdx);
326 };
327 using ArgListTy = std::vector<ArgListEntry>;
328
329 virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC,
330 ArgListTy &Args) const {};
331
332 static ISD::NodeType getExtendForContent(BooleanContent Content) {
333 switch (Content) {
334 case UndefinedBooleanContent:
335 // Extend by adding rubbish bits.
336 return ISD::ANY_EXTEND;
337 case ZeroOrOneBooleanContent:
338 // Extend by adding zero bits.
339 return ISD::ZERO_EXTEND;
340 case ZeroOrNegativeOneBooleanContent:
341 // Extend by copying the sign bit.
342 return ISD::SIGN_EXTEND;
343 }
344 llvm_unreachable("Invalid content kind");
345 }
346
347 explicit TargetLoweringBase(const TargetMachine &TM);
348 TargetLoweringBase(const TargetLoweringBase &) = delete;
349 TargetLoweringBase &operator=(const TargetLoweringBase &) = delete;
350 virtual ~TargetLoweringBase() = default;
351
352 /// Return true if the target support strict float operation
353 bool isStrictFPEnabled() const {
354 return IsStrictFPEnabled;
355 }
356
357protected:
358 /// Initialize all of the actions to default values.
359 void initActions();
360
361public:
362 const TargetMachine &getTargetMachine() const { return TM; }
363
364 virtual bool useSoftFloat() const { return false; }
365
366 /// Return the pointer type for the given address space, defaults to
367 /// the pointer type from the data layout.
368 /// FIXME: The default needs to be removed once all the code is updated.
369 virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const {
370 return MVT::getIntegerVT(BitWidth: DL.getPointerSizeInBits(AS));
371 }
372
373 /// Return the in-memory pointer type for the given address space, defaults to
374 /// the pointer type from the data layout.
375 /// FIXME: The default needs to be removed once all the code is updated.
376 virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const {
377 return MVT::getIntegerVT(BitWidth: DL.getPointerSizeInBits(AS));
378 }
379
380 /// Return the type for frame index, which is determined by
381 /// the alloca address space specified through the data layout.
382 MVT getFrameIndexTy(const DataLayout &DL) const {
383 return getPointerTy(DL, AS: DL.getAllocaAddrSpace());
384 }
385
386 /// Return the type for code pointers, which is determined by the program
387 /// address space specified through the data layout.
388 MVT getProgramPointerTy(const DataLayout &DL) const {
389 return getPointerTy(DL, AS: DL.getProgramAddressSpace());
390 }
391
392 /// Return the type for operands of fence.
393 /// TODO: Let fence operands be of i32 type and remove this.
394 virtual MVT getFenceOperandTy(const DataLayout &DL) const {
395 return getPointerTy(DL);
396 }
397
398 /// Return the type to use for a scalar shift opcode, given the shifted amount
399 /// type. Targets should return a legal type if the input type is legal.
400 /// Targets can return a type that is too small if the input type is illegal.
401 virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const;
402
403 /// Returns the type for the shift amount of a shift opcode. For vectors,
404 /// returns the input type. For scalars, calls getScalarShiftAmountTy.
405 /// If getScalarShiftAmountTy type cannot represent all possible shift
406 /// amounts, returns MVT::i32.
407 EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const;
408
409 /// Return the preferred type to use for a shift opcode, given the shifted
410 /// amount type is \p ShiftValueTy.
411 LLVM_READONLY
412 virtual LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const {
413 return ShiftValueTy;
414 }
415
416 /// Returns the type to be used for the index operand of:
417 /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
418 /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR
419 virtual MVT getVectorIdxTy(const DataLayout &DL) const {
420 return getPointerTy(DL);
421 }
422
423 /// Returns the type to be used for the EVL/AVL operand of VP nodes:
424 /// ISD::VP_ADD, ISD::VP_SUB, etc. It must be a legal scalar integer type,
425 /// and must be at least as large as i32. The EVL is implicitly zero-extended
426 /// to any larger type.
427 virtual MVT getVPExplicitVectorLengthTy() const { return MVT::i32; }
428
429 /// This callback is used to inspect load/store instructions and add
430 /// target-specific MachineMemOperand flags to them. The default
431 /// implementation does nothing.
432 virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const {
433 return MachineMemOperand::MONone;
434 }
435
436 /// This callback is used to inspect load/store SDNode.
437 /// The default implementation does nothing.
438 virtual MachineMemOperand::Flags
439 getTargetMMOFlags(const MemSDNode &Node) const {
440 return MachineMemOperand::MONone;
441 }
442
443 MachineMemOperand::Flags
444 getLoadMemOperandFlags(const LoadInst &LI, const DataLayout &DL,
445 AssumptionCache *AC = nullptr,
446 const TargetLibraryInfo *LibInfo = nullptr) const;
447 MachineMemOperand::Flags getStoreMemOperandFlags(const StoreInst &SI,
448 const DataLayout &DL) const;
449 MachineMemOperand::Flags getAtomicMemOperandFlags(const Instruction &AI,
450 const DataLayout &DL) const;
451
452 virtual bool isSelectSupported(SelectSupportKind /*kind*/) const {
453 return true;
454 }
455
456 /// Return true if the @llvm.get.active.lane.mask intrinsic should be expanded
457 /// using generic code in SelectionDAGBuilder.
458 virtual bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const {
459 return true;
460 }
461
462 virtual bool shouldExpandGetVectorLength(EVT CountVT, unsigned VF,
463 bool IsScalable) const {
464 return true;
465 }
466
467 /// Return true if the @llvm.experimental.cttz.elts intrinsic should be
468 /// expanded using generic code in SelectionDAGBuilder.
469 virtual bool shouldExpandCttzElements(EVT VT) const { return true; }
470
471 /// Return the minimum number of bits required to hold the maximum possible
472 /// number of trailing zero vector elements.
473 unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC,
474 bool ZeroIsPoison,
475 const ConstantRange *VScaleRange) const;
476
477 // Return true if op(vecreduce(x), vecreduce(y)) should be reassociated to
478 // vecreduce(op(x, y)) for the reduction opcode RedOpc.
479 virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const {
480 return true;
481 }
482
483 /// Return true if it is profitable to convert a select of FP constants into
484 /// a constant pool load whose address depends on the select condition. The
485 /// parameter may be used to differentiate a select with FP compare from
486 /// integer compare.
487 virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
488 return true;
489 }
490
491 /// Return true if multiple condition registers are available.
492 bool hasMultipleConditionRegisters() const {
493 return HasMultipleConditionRegisters;
494 }
495
496 /// Return true if the target has BitExtract instructions.
497 bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }
498
499 /// Return the preferred vector type legalization action.
500 virtual TargetLoweringBase::LegalizeTypeAction
501 getPreferredVectorAction(MVT VT) const {
502 // The default action for one element vectors is to scalarize
503 if (VT.getVectorElementCount().isScalar())
504 return TypeScalarizeVector;
505 // The default action for an odd-width vector is to widen.
506 if (!VT.isPow2VectorType())
507 return TypeWidenVector;
508 // The default action for other vectors is to promote
509 return TypePromoteInteger;
510 }
511
512 // Return true if the half type should be promoted using soft promotion rules
513 // where each operation is promoted to f32 individually, then converted to
514 // fp16. The default behavior is to promote chains of operations, keeping
515 // intermediate results in f32 precision and range.
516 virtual bool softPromoteHalfType() const { return false; }
517
518 // Return true if, for soft-promoted half, the half type should be passed
519 // passed to and returned from functions as f32. The default behavior is to
520 // pass as i16. If soft-promoted half is not used, this function is ignored
521 // and values are always passed and returned as f32.
522 virtual bool useFPRegsForHalfType() const { return false; }
523
524 // There are two general methods for expanding a BUILD_VECTOR node:
525 // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle
526 // them together.
527 // 2. Build the vector on the stack and then load it.
528 // If this function returns true, then method (1) will be used, subject to
529 // the constraint that all of the necessary shuffles are legal (as determined
530 // by isShuffleMaskLegal). If this function returns false, then method (2) is
531 // always used. The vector type, and the number of defined values, are
532 // provided.
533 virtual bool
534 shouldExpandBuildVectorWithShuffles(EVT /* VT */,
535 unsigned DefinedValues) const {
536 return DefinedValues < 3;
537 }
538
539 /// Return true if integer divide is usually cheaper than a sequence of
540 /// several shifts, adds, and multiplies for this target.
541 /// The definition of "cheaper" may depend on whether we're optimizing
542 /// for speed or for size.
543 virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; }
544
545 /// Return true if the target can handle a standalone remainder operation.
546 virtual bool hasStandaloneRem(EVT VT) const {
547 return true;
548 }
549
550 /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
551 virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const {
552 // Default behavior is to replace SQRT(X) with X*RSQRT(X).
553 return false;
554 }
555
556 /// Reciprocal estimate status values used by the functions below.
557 enum ReciprocalEstimate : int {
558 Unspecified = -1,
559 Disabled = 0,
560 Enabled = 1
561 };
562
563 /// Return a ReciprocalEstimate enum value for a square root of the given type
564 /// based on the function's attributes. If the operation is not overridden by
565 /// the function's attributes, "Unspecified" is returned and target defaults
566 /// are expected to be used for instruction selection.
567 int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const;
568
569 /// Return a ReciprocalEstimate enum value for a division of the given type
570 /// based on the function's attributes. If the operation is not overridden by
571 /// the function's attributes, "Unspecified" is returned and target defaults
572 /// are expected to be used for instruction selection.
573 int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const;
574
575 /// Return the refinement step count for a square root of the given type based
576 /// on the function's attributes. If the operation is not overridden by
577 /// the function's attributes, "Unspecified" is returned and target defaults
578 /// are expected to be used for instruction selection.
579 int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const;
580
581 /// Return the refinement step count for a division of the given type based
582 /// on the function's attributes. If the operation is not overridden by
583 /// the function's attributes, "Unspecified" is returned and target defaults
584 /// are expected to be used for instruction selection.
585 int getDivRefinementSteps(EVT VT, MachineFunction &MF) const;
586
587 /// Returns true if target has indicated at least one type should be bypassed.
588 bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); }
589
590 /// Returns map of slow types for division or remainder with corresponding
591 /// fast types
592 const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const {
593 return BypassSlowDivWidths;
594 }
595
596 /// Return true only if vscale must be a power of two.
597 virtual bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
598
599 /// Return true if Flow Control is an expensive operation that should be
600 /// avoided.
601 bool isJumpExpensive() const { return JumpIsExpensive; }
602
603 // Costs parameters used by
604 // SelectionDAGBuilder::shouldKeepJumpConditionsTogether.
605 // shouldKeepJumpConditionsTogether will use these parameter value to
606 // determine if two conditions in the form `br (and/or cond1, cond2)` should
607 // be split into two branches or left as one.
608 //
609 // BaseCost is the cost threshold (in latency). If the estimated latency of
610 // computing both `cond1` and `cond2` is below the cost of just computing
611 // `cond1` + BaseCost, the two conditions will be kept together. Otherwise
612 // they will be split.
613 //
614 // LikelyBias increases BaseCost if branch probability info indicates that it
615 // is likely that both `cond1` and `cond2` will be computed.
616 //
617 // UnlikelyBias decreases BaseCost if branch probability info indicates that
618 // it is likely that both `cond1` and `cond2` will be computed.
619 //
620 // Set any field to -1 to make it ignored (setting BaseCost to -1 results in
621 // `shouldKeepJumpConditionsTogether` always returning false).
622 struct CondMergingParams {
623 int BaseCost;
624 int LikelyBias;
625 int UnlikelyBias;
626 };
627 // Return params for deciding if we should keep two branch conditions merged
628 // or split them into two separate branches.
629 // Arg0: The binary op joining the two conditions (and/or).
630 // Arg1: The first condition (cond1)
631 // Arg2: The second condition (cond2)
632 virtual CondMergingParams
633 getJumpConditionMergingParams(Instruction::BinaryOps, const Value *,
634 const Value *) const {
635 // -1 will always result in splitting.
636 return {.BaseCost: -1, .LikelyBias: -1, .UnlikelyBias: -1};
637 }
638
639 /// Return true if selects are only cheaper than branches if the branch is
640 /// unlikely to be predicted right.
641 bool isPredictableSelectExpensive() const {
642 return PredictableSelectIsExpensive;
643 }
644
645 virtual bool fallBackToDAGISel(const Instruction &Inst) const {
646 return false;
647 }
648
649 /// Return true if the following transform is beneficial:
650 /// fold (conv (load x)) -> (load (conv*)x)
651 /// On architectures that don't natively support some vector loads
652 /// efficiently, casting the load to a smaller vector of larger types and
653 /// loading is more efficient, however, this can be undone by optimizations in
654 /// dag combiner.
655 virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
656 const SelectionDAG &DAG,
657 const MachineMemOperand &MMO) const;
658
659 /// Return true if the following transform is beneficial:
660 /// (store (y (conv x)), y*)) -> (store x, (x*))
661 virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT,
662 const SelectionDAG &DAG,
663 const MachineMemOperand &MMO) const {
664 // Default to the same logic as loads.
665 return isLoadBitCastBeneficial(LoadVT: StoreVT, BitcastVT, DAG, MMO);
666 }
667
668 /// Return true if it is expected to be cheaper to do a store of vector
669 /// constant with the given size and type for the address space than to
670 /// store the individual scalar element constants.
671 virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT,
672 unsigned NumElem,
673 unsigned AddrSpace) const {
674 return IsZero;
675 }
676
677 /// Allow store merging for the specified type after legalization in addition
678 /// to before legalization. This may transform stores that do not exist
679 /// earlier (for example, stores created from intrinsics).
680 virtual bool mergeStoresAfterLegalization(EVT MemVT) const {
681 return true;
682 }
683
684 /// Returns if it's reasonable to merge stores to MemVT size.
685 virtual bool canMergeStoresTo(unsigned AS, EVT MemVT,
686 const MachineFunction &MF) const {
687 return true;
688 }
689
690 /// Return true if it is cheap to speculate a call to intrinsic cttz.
691 virtual bool isCheapToSpeculateCttz(Type *Ty) const {
692 return false;
693 }
694
695 /// Return true if it is cheap to speculate a call to intrinsic ctlz.
696 virtual bool isCheapToSpeculateCtlz(Type *Ty) const {
697 return false;
698 }
699
700 /// Return true if ctlz instruction is fast.
701 virtual bool isCtlzFast() const {
702 return false;
703 }
704
705 /// Return true if ctpop instruction is fast.
706 virtual bool isCtpopFast(EVT VT) const {
707 return isOperationLegal(Op: ISD::CTPOP, VT);
708 }
709
710 /// Return the maximum number of "x & (x - 1)" operations that can be done
711 /// instead of deferring to a custom CTPOP.
712 virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const {
713 return 1;
714 }
715
716 /// Return true if instruction generated for equality comparison is folded
717 /// with instruction generated for signed comparison.
718 virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; }
719
720 /// Return true if the heuristic to prefer icmp eq zero should be used in code
721 /// gen prepare.
722 virtual bool preferZeroCompareBranch() const { return false; }
723
724 /// Return true if it is cheaper to split the store of a merged int val
725 /// from a pair of smaller values into multiple stores.
726 virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const {
727 return false;
728 }
729
730 /// Return if the target supports combining a
731 /// chain like:
732 /// \code
733 /// %andResult = and %val1, #mask
734 /// %icmpResult = icmp %andResult, 0
735 /// \endcode
736 /// into a single machine instruction of a form like:
737 /// \code
738 /// cc = test %register, #mask
739 /// \endcode
740 virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
741 return false;
742 }
743
744 /// Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
745 virtual bool
746 areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX,
747 const MemSDNode &NodeY) const {
748 return true;
749 }
750
751 /// Use bitwise logic to make pairs of compares more efficient. For example:
752 /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
753 /// This should be true when it takes more than one instruction to lower
754 /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on
755 /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win.
756 virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const {
757 return false;
758 }
759
760 /// Return the preferred operand type if the target has a quick way to compare
761 /// integer values of the given size. Assume that any legal integer type can
762 /// be compared efficiently. Targets may override this to allow illegal wide
763 /// types to return a vector type if there is support to compare that type.
764 virtual MVT hasFastEqualityCompare(unsigned NumBits) const {
765 MVT VT = MVT::getIntegerVT(BitWidth: NumBits);
766 return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE;
767 }
768
769 /// Return true if the target should transform:
770 /// (X & Y) == Y ---> (~X & Y) == 0
771 /// (X & Y) != Y ---> (~X & Y) != 0
772 ///
773 /// This may be profitable if the target has a bitwise and-not operation that
774 /// sets comparison flags. A target may want to limit the transformation based
775 /// on the type of Y or if Y is a constant.
776 ///
777 /// Note that the transform will not occur if Y is known to be a power-of-2
778 /// because a mask and compare of a single bit can be handled by inverting the
779 /// predicate, for example:
780 /// (X & 8) == 8 ---> (X & 8) != 0
781 virtual bool hasAndNotCompare(SDValue Y) const {
782 return false;
783 }
784
785 /// Return true if the target has a bitwise and-not operation:
786 /// X = ~A & B
787 /// This can be used to simplify select or other instructions.
788 virtual bool hasAndNot(SDValue X) const {
789 // If the target has the more complex version of this operation, assume that
790 // it has this operation too.
791 return hasAndNotCompare(Y: X);
792 }
793
794 /// Return true if the target has a bit-test instruction:
795 /// (X & (1 << Y)) ==/!= 0
796 /// This knowledge can be used to prevent breaking the pattern,
797 /// or creating it if it could be recognized.
798 virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; }
799
800 /// There are two ways to clear extreme bits (either low or high):
801 /// Mask: x & (-1 << y) (the instcombine canonical form)
802 /// Shifts: x >> y << y
803 /// Return true if the variant with 2 variable shifts is preferred.
804 /// Return false if there is no preference.
805 virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const {
806 // By default, let's assume that no one prefers shifts.
807 return false;
808 }
809
810 /// Return true if it is profitable to fold a pair of shifts into a mask.
811 /// This is usually true on most targets. But some targets, like Thumb1,
812 /// have immediate shift instructions, but no immediate "and" instruction;
813 /// this makes the fold unprofitable.
814 virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N,
815 CombineLevel Level) const {
816 return true;
817 }
818
819 /// Should we tranform the IR-optimal check for whether given truncation
820 /// down into KeptBits would be truncating or not:
821 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
822 /// Into it's more traditional form:
823 /// ((%x << C) a>> C) dstcond %x
824 /// Return true if we should transform.
825 /// Return false if there is no preference.
826 virtual bool shouldTransformSignedTruncationCheck(EVT XVT,
827 unsigned KeptBits) const {
828 // By default, let's assume that no one prefers shifts.
829 return false;
830 }
831
832 /// Given the pattern
833 /// (X & (C l>>/<< Y)) ==/!= 0
834 /// return true if it should be transformed into:
835 /// ((X <</l>> Y) & C) ==/!= 0
836 /// WARNING: if 'X' is a constant, the fold may deadlock!
837 /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat()
838 /// here because it can end up being not linked in.
839 virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
840 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
841 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
842 SelectionDAG &DAG) const {
843 if (hasBitTest(X, Y)) {
844 // One interesting pattern that we'd want to form is 'bit test':
845 // ((1 << Y) & C) ==/!= 0
846 // But we also need to be careful not to try to reverse that fold.
847
848 // Is this '1 << Y' ?
849 if (OldShiftOpcode == ISD::SHL && CC->isOne())
850 return false; // Keep the 'bit test' pattern.
851
852 // Will it be '1 << Y' after the transform ?
853 if (XC && NewShiftOpcode == ISD::SHL && XC->isOne())
854 return true; // Do form the 'bit test' pattern.
855 }
856
857 // If 'X' is a constant, and we transform, then we will immediately
858 // try to undo the fold, thus causing endless combine loop.
859 // So by default, let's assume everyone prefers the fold
860 // iff 'X' is not a constant.
861 return !XC;
862 }
863
864 // Return true if its desirable to perform the following transform:
865 // (fmul C, (uitofp Pow2))
866 // -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
867 // (fdiv C, (uitofp Pow2))
868 // -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
869 //
870 // This is only queried after we have verified the transform will be bitwise
871 // equals.
872 //
873 // SDNode *N : The FDiv/FMul node we want to transform.
874 // SDValue FPConst: The Float constant operand in `N`.
875 // SDValue IntPow2: The Integer power of 2 operand in `N`.
876 virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst,
877 SDValue IntPow2) const {
878 // Default to avoiding fdiv which is often very expensive.
879 return N->getOpcode() == ISD::FDIV;
880 }
881
882 // Given:
883 // (icmp eq/ne (and X, C0), (shift X, C1))
884 // or
885 // (icmp eq/ne X, (rotate X, CPow2))
886
887 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
888 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
889 // Do we prefer the shift to be shift-right, shift-left, or rotate.
890 // Note: Its only valid to convert the rotate version to the shift version iff
891 // the shift-amt (`C1`) is a power of 2 (including 0).
892 // If ShiftOpc (current Opcode) is returned, do nothing.
893 virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(
894 EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
895 const APInt &ShiftOrRotateAmt,
896 const std::optional<APInt> &AndMask) const {
897 return ShiftOpc;
898 }
899
900 /// These two forms are equivalent:
901 /// sub %y, (xor %x, -1)
902 /// add (add %x, 1), %y
903 /// The variant with two add's is IR-canonical.
904 /// Some targets may prefer one to the other.
905 virtual bool preferIncOfAddToSubOfNot(EVT VT) const {
906 // By default, let's assume that everyone prefers the form with two add's.
907 return true;
908 }
909
910 // By default prefer folding (abs (sub nsw x, y)) -> abds(x, y). Some targets
911 // may want to avoid this to prevent loss of sub_nsw pattern.
912 virtual bool preferABDSToABSWithNSW(EVT VT) const {
913 return true;
914 }
915
916 // Return true if the target wants to transform Op(Splat(X)) -> Splat(Op(X))
917 virtual bool preferScalarizeSplat(SDNode *N) const { return true; }
918
919 // Return true if the target wants to transform:
920 // (TruncVT truncate(sext_in_reg(VT X, ExtVT))
921 // -> (TruncVT sext_in_reg(truncate(VT X), ExtVT))
922 // Some targets might prefer pre-sextinreg to improve truncation/saturation.
923 virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const {
924 return true;
925 }
926
927 /// Return true if the target wants to use the optimization that
928 /// turns ext(promotableInst1(...(promotableInstN(load)))) into
929 /// promotedInst1(...(promotedInstN(ext(load)))).
930 bool enableExtLdPromotion() const { return EnableExtLdPromotion; }
931
932 /// Return true if the target can combine store(extractelement VectorTy,
933 /// Idx).
934 /// \p Cost[out] gives the cost of that transformation when this is true.
935 virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
936 unsigned &Cost) const {
937 return false;
938 }
939
940 /// Return true if the target shall perform extract vector element and store
941 /// given that the vector is known to be splat of constant.
942 /// \p Index[out] gives the index of the vector element to be extracted when
943 /// this is true.
944 virtual bool shallExtractConstSplatVectorElementToStore(
945 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
946 return false;
947 }
948
949 /// Return true if inserting a scalar into a variable element of an undef
950 /// vector is more efficiently handled by splatting the scalar instead.
951 virtual bool shouldSplatInsEltVarIndex(EVT) const {
952 return false;
953 }
954
955 /// Return true if target always benefits from combining into FMA for a
956 /// given value type. This must typically return false on targets where FMA
957 /// takes more cycles to execute than FADD.
958 virtual bool enableAggressiveFMAFusion(EVT VT) const { return false; }
959
960 /// Return true if target always benefits from combining into FMA for a
961 /// given value type. This must typically return false on targets where FMA
962 /// takes more cycles to execute than FADD.
963 virtual bool enableAggressiveFMAFusion(LLT Ty) const { return false; }
964
965 /// Return the ValueType of the result of SETCC operations.
966 virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
967 EVT VT) const;
968
969 /// Return the ValueType for comparison libcalls. Comparison libcalls include
970 /// floating point comparison calls, and Ordered/Unordered check calls on
971 /// floating point numbers.
972 virtual
973 MVT::SimpleValueType getCmpLibcallReturnType() const;
974
975 /// For targets without i1 registers, this gives the nature of the high-bits
976 /// of boolean values held in types wider than i1.
977 ///
978 /// "Boolean values" are special true/false values produced by nodes like
979 /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND.
980 /// Not to be confused with general values promoted from i1. Some cpus
981 /// distinguish between vectors of boolean and scalars; the isVec parameter
982 /// selects between the two kinds. For example on X86 a scalar boolean should
983 /// be zero extended from i1, while the elements of a vector of booleans
984 /// should be sign extended from i1.
985 ///
986 /// Some cpus also treat floating point types the same way as they treat
987 /// vectors instead of the way they treat scalars.
988 BooleanContent getBooleanContents(bool isVec, bool isFloat) const {
989 if (isVec)
990 return BooleanVectorContents;
991 return isFloat ? BooleanFloatContents : BooleanContents;
992 }
993
994 BooleanContent getBooleanContents(EVT Type) const {
995 return getBooleanContents(isVec: Type.isVector(), isFloat: Type.isFloatingPoint());
996 }
997
998 /// Promote the given target boolean to a target boolean of the given type.
999 /// A target boolean is an integer value, not necessarily of type i1, the bits
1000 /// of which conform to getBooleanContents.
1001 ///
1002 /// ValVT is the type of values that produced the boolean.
1003 SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool,
1004 EVT ValVT) const {
1005 SDLoc dl(Bool);
1006 EVT BoolVT =
1007 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: ValVT);
1008 ISD::NodeType ExtendCode = getExtendForContent(Content: getBooleanContents(Type: ValVT));
1009 return DAG.getNode(Opcode: ExtendCode, DL: dl, VT: BoolVT, Operand: Bool);
1010 }
1011
1012 /// Return target scheduling preference.
1013 Sched::Preference getSchedulingPreference() const {
1014 return SchedPreferenceInfo;
1015 }
1016
1017 /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics
1018 /// for different nodes. This function returns the preference (or none) for
1019 /// the given node.
1020 virtual Sched::Preference getSchedulingPreference(SDNode *) const {
1021 return Sched::None;
1022 }
1023
1024 /// Return the register class that should be used for the specified value
1025 /// type.
1026 virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const {
1027 (void)isDivergent;
1028 const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
1029 assert(RC && "This value type is not natively supported!");
1030 return RC;
1031 }
1032
1033 /// Allows target to decide about the register class of the
1034 /// specific value that is live outside the defining block.
1035 /// Returns true if the value needs uniform register class.
1036 virtual bool requiresUniformRegister(MachineFunction &MF,
1037 const Value *) const {
1038 return false;
1039 }
1040
1041 /// Return the 'representative' register class for the specified value
1042 /// type.
1043 ///
1044 /// The 'representative' register class is the largest legal super-reg
1045 /// register class for the register class of the value type. For example, on
1046 /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep
1047 /// register class is GR64 on x86_64.
1048 virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
1049 const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy];
1050 return RC;
1051 }
1052
1053 /// Return the cost of the 'representative' register class for the specified
1054 /// value type.
1055 virtual uint8_t getRepRegClassCostFor(MVT VT) const {
1056 return RepRegClassCostForVT[VT.SimpleTy];
1057 }
1058
1059 /// Return the preferred strategy to legalize tihs SHIFT instruction, with
1060 /// \p ExpansionFactor being the recursion depth - how many expansion needed.
1061 enum class ShiftLegalizationStrategy {
1062 ExpandToParts,
1063 ExpandThroughStack,
1064 LowerToLibcall
1065 };
1066 virtual ShiftLegalizationStrategy
1067 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
1068 unsigned ExpansionFactor) const {
1069 if (ExpansionFactor == 1)
1070 return ShiftLegalizationStrategy::ExpandToParts;
1071 return ShiftLegalizationStrategy::ExpandThroughStack;
1072 }
1073
1074 /// Return true if the target has native support for the specified value type.
1075 /// This means that it has a register that directly holds it without
1076 /// promotions or expansions.
1077 bool isTypeLegal(EVT VT) const {
1078 assert(!VT.isSimple() ||
1079 (unsigned)VT.getSimpleVT().SimpleTy < std::size(RegClassForVT));
1080 return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr;
1081 }
1082
1083 class ValueTypeActionImpl {
1084 /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum
1085 /// that indicates how instruction selection should deal with the type.
1086 LegalizeTypeAction ValueTypeActions[MVT::VALUETYPE_SIZE];
1087
1088 public:
1089 ValueTypeActionImpl() {
1090 std::fill(std::begin(arr&: ValueTypeActions), std::end(arr&: ValueTypeActions),
1091 TypeLegal);
1092 }
1093
1094 LegalizeTypeAction getTypeAction(MVT VT) const {
1095 return ValueTypeActions[VT.SimpleTy];
1096 }
1097
1098 void setTypeAction(MVT VT, LegalizeTypeAction Action) {
1099 ValueTypeActions[VT.SimpleTy] = Action;
1100 }
1101 };
1102
1103 const ValueTypeActionImpl &getValueTypeActions() const {
1104 return ValueTypeActions;
1105 }
1106
1107 /// Return pair that represents the legalization kind (first) that needs to
1108 /// happen to EVT (second) in order to type-legalize it.
1109 ///
1110 /// First: how we should legalize values of this type, either it is already
1111 /// legal (return 'Legal') or we need to promote it to a larger type (return
1112 /// 'Promote'), or we need to expand it into multiple registers of smaller
1113 /// integer type (return 'Expand'). 'Custom' is not an option.
1114 ///
1115 /// Second: for types supported by the target, this is an identity function.
1116 /// For types that must be promoted to larger types, this returns the larger
1117 /// type to promote to. For integer types that are larger than the largest
1118 /// integer register, this contains one step in the expansion to get to the
1119 /// smaller register. For illegal floating point types, this returns the
1120 /// integer type to transform to.
1121 LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const;
1122
1123 /// Return how we should legalize values of this type, either it is already
1124 /// legal (return 'Legal') or we need to promote it to a larger type (return
1125 /// 'Promote'), or we need to expand it into multiple registers of smaller
1126 /// integer type (return 'Expand'). 'Custom' is not an option.
1127 LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const {
1128 return getTypeConversion(Context, VT).first;
1129 }
1130 LegalizeTypeAction getTypeAction(MVT VT) const {
1131 return ValueTypeActions.getTypeAction(VT);
1132 }
1133
1134 /// For types supported by the target, this is an identity function. For
1135 /// types that must be promoted to larger types, this returns the larger type
1136 /// to promote to. For integer types that are larger than the largest integer
1137 /// register, this contains one step in the expansion to get to the smaller
1138 /// register. For illegal floating point types, this returns the integer type
1139 /// to transform to.
1140 virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const {
1141 return getTypeConversion(Context, VT).second;
1142 }
1143
1144 /// For types supported by the target, this is an identity function. For
1145 /// types that must be expanded (i.e. integer types that are larger than the
1146 /// largest integer register or illegal floating point types), this returns
1147 /// the largest legal type it will be expanded to.
1148 EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const {
1149 assert(!VT.isVector());
1150 while (true) {
1151 switch (getTypeAction(Context, VT)) {
1152 case TypeLegal:
1153 return VT;
1154 case TypeExpandInteger:
1155 VT = getTypeToTransformTo(Context, VT);
1156 break;
1157 default:
1158 llvm_unreachable("Type is not legal nor is it to be expanded!");
1159 }
1160 }
1161 }
1162
1163 /// Vector types are broken down into some number of legal first class types.
1164 /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8
1165 /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64
1166 /// turns into 4 EVT::i32 values with both PPC and X86.
1167 ///
1168 /// This method returns the number of registers needed, and the VT for each
1169 /// register. It also returns the VT and quantity of the intermediate values
1170 /// before they are promoted/expanded.
1171 unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
1172 EVT &IntermediateVT,
1173 unsigned &NumIntermediates,
1174 MVT &RegisterVT) const;
1175
1176 /// Certain targets such as MIPS require that some types such as vectors are
1177 /// always broken down into scalars in some contexts. This occurs even if the
1178 /// vector type is legal.
1179 virtual unsigned getVectorTypeBreakdownForCallingConv(
1180 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1181 unsigned &NumIntermediates, MVT &RegisterVT) const {
1182 return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates,
1183 RegisterVT);
1184 }
1185
1186 struct IntrinsicInfo {
1187 unsigned opc = 0; // target opcode
1188 EVT memVT; // memory VT
1189
1190 // value representing memory location
1191 PointerUnion<const Value *, const PseudoSourceValue *> ptrVal;
1192
1193 // Fallback address space for use if ptrVal is nullptr. std::nullopt means
1194 // unknown address space.
1195 std::optional<unsigned> fallbackAddressSpace;
1196
1197 int offset = 0; // offset off of ptrVal
1198 uint64_t size = 0; // the size of the memory location
1199 // (taken from memVT if zero)
1200 MaybeAlign align = Align(1); // alignment
1201
1202 MachineMemOperand::Flags flags = MachineMemOperand::MONone;
1203 IntrinsicInfo() = default;
1204 };
1205
1206 /// Given an intrinsic, checks if on the target the intrinsic will need to map
1207 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1208 /// true and store the intrinsic information into the IntrinsicInfo that was
1209 /// passed to the function.
1210 virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
1211 MachineFunction &,
1212 unsigned /*Intrinsic*/) const {
1213 return false;
1214 }
1215
1216 /// Returns true if the target can instruction select the specified FP
1217 /// immediate natively. If false, the legalizer will materialize the FP
1218 /// immediate as a load from a constant pool.
1219 virtual bool isFPImmLegal(const APFloat & /*Imm*/, EVT /*VT*/,
1220 bool ForCodeSize = false) const {
1221 return false;
1222 }
1223
1224 /// Targets can use this to indicate that they only support *some*
1225 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1226 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be
1227 /// legal.
1228 virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const {
1229 return true;
1230 }
1231
1232 /// Returns true if the operation can trap for the value type.
1233 ///
1234 /// VT must be a legal type. By default, we optimistically assume most
1235 /// operations don't trap except for integer divide and remainder.
1236 virtual bool canOpTrap(unsigned Op, EVT VT) const;
1237
1238 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1239 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1240 /// constant pool entry.
1241 virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/,
1242 EVT /*VT*/) const {
1243 return false;
1244 }
1245
1246 /// How to legalize this custom operation?
1247 virtual LegalizeAction getCustomOperationAction(SDNode &Op) const {
1248 return Legal;
1249 }
1250
1251 /// Return how this operation should be treated: either it is legal, needs to
1252 /// be promoted to a larger size, needs to be expanded to some other code
1253 /// sequence, or the target has a custom expander for it.
1254 LegalizeAction getOperationAction(unsigned Op, EVT VT) const {
1255 // If a target-specific SDNode requires legalization, require the target
1256 // to provide custom legalization for it.
1257 if (Op >= std::size(OpActions[0]))
1258 return Custom;
1259 if (VT.isExtended())
1260 return Expand;
1261 return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op];
1262 }
1263
1264 /// Custom method defined by each target to indicate if an operation which
1265 /// may require a scale is supported natively by the target.
1266 /// If not, the operation is illegal.
1267 virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT,
1268 unsigned Scale) const {
1269 return false;
1270 }
1271
1272 /// Some fixed point operations may be natively supported by the target but
1273 /// only for specific scales. This method allows for checking
1274 /// if the width is supported by the target for a given operation that may
1275 /// depend on scale.
1276 LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT,
1277 unsigned Scale) const {
1278 auto Action = getOperationAction(Op, VT);
1279 if (Action != Legal)
1280 return Action;
1281
1282 // This operation is supported in this type but may only work on specific
1283 // scales.
1284 bool Supported;
1285 switch (Op) {
1286 default:
1287 llvm_unreachable("Unexpected fixed point operation.");
1288 case ISD::SMULFIX:
1289 case ISD::SMULFIXSAT:
1290 case ISD::UMULFIX:
1291 case ISD::UMULFIXSAT:
1292 case ISD::SDIVFIX:
1293 case ISD::SDIVFIXSAT:
1294 case ISD::UDIVFIX:
1295 case ISD::UDIVFIXSAT:
1296 Supported = isSupportedFixedPointOperation(Op, VT, Scale);
1297 break;
1298 }
1299
1300 return Supported ? Action : Expand;
1301 }
1302
1303 // If Op is a strict floating-point operation, return the result
1304 // of getOperationAction for the equivalent non-strict operation.
1305 LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const {
1306 unsigned EqOpc;
1307 switch (Op) {
1308 default: llvm_unreachable("Unexpected FP pseudo-opcode");
1309#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
1310 case ISD::STRICT_##DAGN: EqOpc = ISD::DAGN; break;
1311#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
1312 case ISD::STRICT_##DAGN: EqOpc = ISD::SETCC; break;
1313#include "llvm/IR/ConstrainedOps.def"
1314 }
1315
1316 return getOperationAction(Op: EqOpc, VT);
1317 }
1318
1319 /// Return true if the specified operation is legal on this target or can be
1320 /// made legal with custom lowering. This is used to help guide high-level
1321 /// lowering decisions. LegalOnly is an optional convenience for code paths
1322 /// traversed pre and post legalisation.
1323 bool isOperationLegalOrCustom(unsigned Op, EVT VT,
1324 bool LegalOnly = false) const {
1325 if (LegalOnly)
1326 return isOperationLegal(Op, VT);
1327
1328 return (VT == MVT::Other || isTypeLegal(VT)) &&
1329 (getOperationAction(Op, VT) == Legal ||
1330 getOperationAction(Op, VT) == Custom);
1331 }
1332
1333 /// Return true if the specified operation is legal on this target or can be
1334 /// made legal using promotion. This is used to help guide high-level lowering
1335 /// decisions. LegalOnly is an optional convenience for code paths traversed
1336 /// pre and post legalisation.
1337 bool isOperationLegalOrPromote(unsigned Op, EVT VT,
1338 bool LegalOnly = false) const {
1339 if (LegalOnly)
1340 return isOperationLegal(Op, VT);
1341
1342 return (VT == MVT::Other || isTypeLegal(VT)) &&
1343 (getOperationAction(Op, VT) == Legal ||
1344 getOperationAction(Op, VT) == Promote);
1345 }
1346
1347 /// Return true if the specified operation is legal on this target or can be
1348 /// made legal with custom lowering or using promotion. This is used to help
1349 /// guide high-level lowering decisions. LegalOnly is an optional convenience
1350 /// for code paths traversed pre and post legalisation.
1351 bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT,
1352 bool LegalOnly = false) const {
1353 if (LegalOnly)
1354 return isOperationLegal(Op, VT);
1355
1356 return (VT == MVT::Other || isTypeLegal(VT)) &&
1357 (getOperationAction(Op, VT) == Legal ||
1358 getOperationAction(Op, VT) == Custom ||
1359 getOperationAction(Op, VT) == Promote);
1360 }
1361
1362 /// Return true if the operation uses custom lowering, regardless of whether
1363 /// the type is legal or not.
1364 bool isOperationCustom(unsigned Op, EVT VT) const {
1365 return getOperationAction(Op, VT) == Custom;
1366 }
1367
1368 /// Return true if lowering to a jump table is allowed.
1369 virtual bool areJTsAllowed(const Function *Fn) const {
1370 if (Fn->getFnAttribute(Kind: "no-jump-tables").getValueAsBool())
1371 return false;
1372
1373 return isOperationLegalOrCustom(Op: ISD::BR_JT, VT: MVT::Other) ||
1374 isOperationLegalOrCustom(Op: ISD::BRIND, VT: MVT::Other);
1375 }
1376
1377 /// Check whether the range [Low,High] fits in a machine word.
1378 bool rangeFitsInWord(const APInt &Low, const APInt &High,
1379 const DataLayout &DL) const {
1380 // FIXME: Using the pointer type doesn't seem ideal.
1381 uint64_t BW = DL.getIndexSizeInBits(AS: 0u);
1382 uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1;
1383 return Range <= BW;
1384 }
1385
1386 /// Return true if lowering to a jump table is suitable for a set of case
1387 /// clusters which may contain \p NumCases cases, \p Range range of values.
1388 virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases,
1389 uint64_t Range, ProfileSummaryInfo *PSI,
1390 BlockFrequencyInfo *BFI) const;
1391
1392 /// Returns preferred type for switch condition.
1393 virtual MVT getPreferredSwitchConditionType(LLVMContext &Context,
1394 EVT ConditionVT) const;
1395
1396 /// Return true if lowering to a bit test is suitable for a set of case
1397 /// clusters which contains \p NumDests unique destinations, \p Low and
1398 /// \p High as its lowest and highest case values, and expects \p NumCmps
1399 /// case value comparisons. Check if the number of destinations, comparison
1400 /// metric, and range are all suitable.
1401 bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
1402 const APInt &Low, const APInt &High,
1403 const DataLayout &DL) const {
1404 // FIXME: I don't think NumCmps is the correct metric: a single case and a
1405 // range of cases both require only one branch to lower. Just looking at the
1406 // number of clusters and destinations should be enough to decide whether to
1407 // build bit tests.
1408
1409 // To lower a range with bit tests, the range must fit the bitwidth of a
1410 // machine word.
1411 if (!rangeFitsInWord(Low, High, DL))
1412 return false;
1413
1414 // Decide whether it's profitable to lower this range with bit tests. Each
1415 // destination requires a bit test and branch, and there is an overall range
1416 // check branch. For a small number of clusters, separate comparisons might
1417 // be cheaper, and for many destinations, splitting the range might be
1418 // better.
1419 return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) ||
1420 (NumDests == 3 && NumCmps >= 6);
1421 }
1422
1423 /// Return true if the specified operation is illegal on this target or
1424 /// unlikely to be made legal with custom lowering. This is used to help guide
1425 /// high-level lowering decisions.
1426 bool isOperationExpand(unsigned Op, EVT VT) const {
1427 return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand);
1428 }
1429
1430 /// Return true if the specified operation is legal on this target.
1431 bool isOperationLegal(unsigned Op, EVT VT) const {
1432 return (VT == MVT::Other || isTypeLegal(VT)) &&
1433 getOperationAction(Op, VT) == Legal;
1434 }
1435
1436 /// Return how this load with extension should be treated: either it is legal,
1437 /// needs to be promoted to a larger size, needs to be expanded to some other
1438 /// code sequence, or the target has a custom expander for it.
1439 LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT,
1440 EVT MemVT) const {
1441 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1442 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
1443 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
1444 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::VALUETYPE_SIZE &&
1445 MemI < MVT::VALUETYPE_SIZE && "Table isn't big enough!");
1446 unsigned Shift = 4 * ExtType;
1447 return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf);
1448 }
1449
1450 /// Return true if the specified load with extension is legal on this target.
1451 bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1452 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal;
1453 }
1454
1455 /// Return true if the specified load with extension is legal or custom
1456 /// on this target.
1457 bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1458 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal ||
1459 getLoadExtAction(ExtType, ValVT, MemVT) == Custom;
1460 }
1461
1462 /// Same as getLoadExtAction, but for atomic loads.
1463 LegalizeAction getAtomicLoadExtAction(unsigned ExtType, EVT ValVT,
1464 EVT MemVT) const {
1465 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1466 unsigned ValI = (unsigned)ValVT.getSimpleVT().SimpleTy;
1467 unsigned MemI = (unsigned)MemVT.getSimpleVT().SimpleTy;
1468 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::VALUETYPE_SIZE &&
1469 MemI < MVT::VALUETYPE_SIZE && "Table isn't big enough!");
1470 unsigned Shift = 4 * ExtType;
1471 LegalizeAction Action =
1472 (LegalizeAction)((AtomicLoadExtActions[ValI][MemI] >> Shift) & 0xf);
1473 assert((Action == Legal || Action == Expand) &&
1474 "Unsupported atomic load extension action.");
1475 return Action;
1476 }
1477
1478 /// Return true if the specified atomic load with extension is legal on
1479 /// this target.
1480 bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1481 return getAtomicLoadExtAction(ExtType, ValVT, MemVT) == Legal;
1482 }
1483
1484 /// Return how this store with truncation should be treated: either it is
1485 /// legal, needs to be promoted to a larger size, needs to be expanded to some
1486 /// other code sequence, or the target has a custom expander for it.
1487 LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const {
1488 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1489 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
1490 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
1491 assert(ValI < MVT::VALUETYPE_SIZE && MemI < MVT::VALUETYPE_SIZE &&
1492 "Table isn't big enough!");
1493 return TruncStoreActions[ValI][MemI];
1494 }
1495
1496 /// Return true if the specified store with truncation is legal on this
1497 /// target.
1498 bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const {
1499 return isTypeLegal(VT: ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal;
1500 }
1501
1502 /// Return true if the specified store with truncation has solution on this
1503 /// target.
1504 bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const {
1505 return isTypeLegal(VT: ValVT) &&
1506 (getTruncStoreAction(ValVT, MemVT) == Legal ||
1507 getTruncStoreAction(ValVT, MemVT) == Custom);
1508 }
1509
1510 virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT,
1511 bool LegalOnly) const {
1512 if (LegalOnly)
1513 return isTruncStoreLegal(ValVT, MemVT);
1514
1515 return isTruncStoreLegalOrCustom(ValVT, MemVT);
1516 }
1517
1518 /// Return how the indexed load should be treated: either it is legal, needs
1519 /// to be promoted to a larger size, needs to be expanded to some other code
1520 /// sequence, or the target has a custom expander for it.
1521 LegalizeAction getIndexedLoadAction(unsigned IdxMode, MVT VT) const {
1522 return getIndexedModeAction(IdxMode, VT, Shift: IMAB_Load);
1523 }
1524
1525 /// Return true if the specified indexed load is legal on this target.
1526 bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const {
1527 return VT.isSimple() &&
1528 (getIndexedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Legal ||
1529 getIndexedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Custom);
1530 }
1531
1532 /// Return how the indexed store should be treated: either it is legal, needs
1533 /// to be promoted to a larger size, needs to be expanded to some other code
1534 /// sequence, or the target has a custom expander for it.
1535 LegalizeAction getIndexedStoreAction(unsigned IdxMode, MVT VT) const {
1536 return getIndexedModeAction(IdxMode, VT, Shift: IMAB_Store);
1537 }
1538
1539 /// Return true if the specified indexed load is legal on this target.
1540 bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const {
1541 return VT.isSimple() &&
1542 (getIndexedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Legal ||
1543 getIndexedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Custom);
1544 }
1545
1546 /// Return how the indexed load should be treated: either it is legal, needs
1547 /// to be promoted to a larger size, needs to be expanded to some other code
1548 /// sequence, or the target has a custom expander for it.
1549 LegalizeAction getIndexedMaskedLoadAction(unsigned IdxMode, MVT VT) const {
1550 return getIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedLoad);
1551 }
1552
1553 /// Return true if the specified indexed load is legal on this target.
1554 bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const {
1555 return VT.isSimple() &&
1556 (getIndexedMaskedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Legal ||
1557 getIndexedMaskedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Custom);
1558 }
1559
1560 /// Return how the indexed store should be treated: either it is legal, needs
1561 /// to be promoted to a larger size, needs to be expanded to some other code
1562 /// sequence, or the target has a custom expander for it.
1563 LegalizeAction getIndexedMaskedStoreAction(unsigned IdxMode, MVT VT) const {
1564 return getIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedStore);
1565 }
1566
1567 /// Return true if the specified indexed load is legal on this target.
1568 bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const {
1569 return VT.isSimple() &&
1570 (getIndexedMaskedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Legal ||
1571 getIndexedMaskedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Custom);
1572 }
1573
1574 /// Returns true if the index type for a masked gather/scatter requires
1575 /// extending
1576 virtual bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const { return false; }
1577
1578 // Returns true if Extend can be folded into the index of a masked gathers/scatters
1579 // on this target.
1580 virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const {
1581 return false;
1582 }
1583
1584 // Return true if the target supports a scatter/gather instruction with
1585 // indices which are scaled by the particular value. Note that all targets
1586 // must by definition support scale of 1.
1587 virtual bool isLegalScaleForGatherScatter(uint64_t Scale,
1588 uint64_t ElemSize) const {
1589 // MGATHER/MSCATTER are only required to support scaling by one or by the
1590 // element size.
1591 if (Scale != ElemSize && Scale != 1)
1592 return false;
1593 return true;
1594 }
1595
1596 /// Return how the condition code should be treated: either it is legal, needs
1597 /// to be expanded to some other code sequence, or the target has a custom
1598 /// expander for it.
1599 LegalizeAction
1600 getCondCodeAction(ISD::CondCode CC, MVT VT) const {
1601 assert((unsigned)CC < std::size(CondCodeActions) &&
1602 ((unsigned)VT.SimpleTy >> 3) < std::size(CondCodeActions[0]) &&
1603 "Table isn't big enough!");
1604 // See setCondCodeAction for how this is encoded.
1605 uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
1606 uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3];
1607 LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF);
1608 assert(Action != Promote && "Can't promote condition code!");
1609 return Action;
1610 }
1611
1612 /// Return true if the specified condition code is legal on this target.
1613 bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const {
1614 return getCondCodeAction(CC, VT) == Legal;
1615 }
1616
1617 /// Return true if the specified condition code is legal or custom on this
1618 /// target.
1619 bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const {
1620 return getCondCodeAction(CC, VT) == Legal ||
1621 getCondCodeAction(CC, VT) == Custom;
1622 }
1623
1624 /// If the action for this operation is to promote, this method returns the
1625 /// ValueType to promote to.
1626 MVT getTypeToPromoteTo(unsigned Op, MVT VT) const {
1627 assert(getOperationAction(Op, VT) == Promote &&
1628 "This operation isn't promoted!");
1629
1630 // See if this has an explicit type specified.
1631 std::map<std::pair<unsigned, MVT::SimpleValueType>,
1632 MVT::SimpleValueType>::const_iterator PTTI =
1633 PromoteToType.find(x: std::make_pair(x&: Op, y&: VT.SimpleTy));
1634 if (PTTI != PromoteToType.end()) return PTTI->second;
1635
1636 assert((VT.isInteger() || VT.isFloatingPoint()) &&
1637 "Cannot autopromote this type, add it with AddPromotedToType.");
1638
1639 uint64_t VTBits = VT.getScalarSizeInBits();
1640 MVT NVT = VT;
1641 do {
1642 NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1);
1643 assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid &&
1644 "Didn't find type to promote to!");
1645 } while (VTBits >= NVT.getScalarSizeInBits() || !isTypeLegal(VT: NVT) ||
1646 getOperationAction(Op, VT: NVT) == Promote);
1647 return NVT;
1648 }
1649
1650 virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
1651 bool AllowUnknown = false) const {
1652 return getValueType(DL, Ty, AllowUnknown);
1653 }
1654
1655 /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM
1656 /// operations except for the pointer size. If AllowUnknown is true, this
1657 /// will return MVT::Other for types with no EVT counterpart (e.g. structs),
1658 /// otherwise it will assert.
1659 EVT getValueType(const DataLayout &DL, Type *Ty,
1660 bool AllowUnknown = false) const {
1661 // Lower scalar pointers to native pointer types.
1662 if (auto *PTy = dyn_cast<PointerType>(Val: Ty))
1663 return getPointerTy(DL, AS: PTy->getAddressSpace());
1664
1665 if (auto *VTy = dyn_cast<VectorType>(Val: Ty)) {
1666 Type *EltTy = VTy->getElementType();
1667 // Lower vectors of pointers to native pointer types.
1668 if (auto *PTy = dyn_cast<PointerType>(Val: EltTy)) {
1669 EVT PointerTy(getPointerTy(DL, AS: PTy->getAddressSpace()));
1670 EltTy = PointerTy.getTypeForEVT(Context&: Ty->getContext());
1671 }
1672 return EVT::getVectorVT(Context&: Ty->getContext(), VT: EVT::getEVT(Ty: EltTy, HandleUnknown: false),
1673 EC: VTy->getElementCount());
1674 }
1675
1676 return EVT::getEVT(Ty, HandleUnknown: AllowUnknown);
1677 }
1678
1679 EVT getMemValueType(const DataLayout &DL, Type *Ty,
1680 bool AllowUnknown = false) const {
1681 // Lower scalar pointers to native pointer types.
1682 if (auto *PTy = dyn_cast<PointerType>(Val: Ty))
1683 return getPointerMemTy(DL, AS: PTy->getAddressSpace());
1684
1685 if (auto *VTy = dyn_cast<VectorType>(Val: Ty)) {
1686 Type *EltTy = VTy->getElementType();
1687 if (auto *PTy = dyn_cast<PointerType>(Val: EltTy)) {
1688 EVT PointerTy(getPointerMemTy(DL, AS: PTy->getAddressSpace()));
1689 EltTy = PointerTy.getTypeForEVT(Context&: Ty->getContext());
1690 }
1691 return EVT::getVectorVT(Context&: Ty->getContext(), VT: EVT::getEVT(Ty: EltTy, HandleUnknown: false),
1692 EC: VTy->getElementCount());
1693 }
1694
1695 return getValueType(DL, Ty, AllowUnknown);
1696 }
1697
1698
1699 /// Return the MVT corresponding to this LLVM type. See getValueType.
1700 MVT getSimpleValueType(const DataLayout &DL, Type *Ty,
1701 bool AllowUnknown = false) const {
1702 return getValueType(DL, Ty, AllowUnknown).getSimpleVT();
1703 }
1704
1705 /// Return the desired alignment for ByVal or InAlloca aggregate function
1706 /// arguments in the caller parameter area. This is the actual alignment, not
1707 /// its logarithm.
1708 virtual uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const;
1709
1710 /// Return the type of registers that this ValueType will eventually require.
1711 MVT getRegisterType(MVT VT) const {
1712 assert((unsigned)VT.SimpleTy < std::size(RegisterTypeForVT));
1713 return RegisterTypeForVT[VT.SimpleTy];
1714 }
1715
1716 /// Return the type of registers that this ValueType will eventually require.
1717 MVT getRegisterType(LLVMContext &Context, EVT VT) const {
1718 if (VT.isSimple())
1719 return getRegisterType(VT: VT.getSimpleVT());
1720 if (VT.isVector()) {
1721 EVT VT1;
1722 MVT RegisterVT;
1723 unsigned NumIntermediates;
1724 (void)getVectorTypeBreakdown(Context, VT, IntermediateVT&: VT1,
1725 NumIntermediates, RegisterVT);
1726 return RegisterVT;
1727 }
1728 if (VT.isInteger()) {
1729 return getRegisterType(Context, VT: getTypeToTransformTo(Context, VT));
1730 }
1731 llvm_unreachable("Unsupported extended type!");
1732 }
1733
1734 /// Return the number of registers that this ValueType will eventually
1735 /// require.
1736 ///
1737 /// This is one for any types promoted to live in larger registers, but may be
1738 /// more than one for types (like i64) that are split into pieces. For types
1739 /// like i140, which are first promoted then expanded, it is the number of
1740 /// registers needed to hold all the bits of the original type. For an i140
1741 /// on a 32 bit machine this means 5 registers.
1742 ///
1743 /// RegisterVT may be passed as a way to override the default settings, for
1744 /// instance with i128 inline assembly operands on SystemZ.
1745 virtual unsigned
1746 getNumRegisters(LLVMContext &Context, EVT VT,
1747 std::optional<MVT> RegisterVT = std::nullopt) const {
1748 if (VT.isSimple()) {
1749 assert((unsigned)VT.getSimpleVT().SimpleTy <
1750 std::size(NumRegistersForVT));
1751 return NumRegistersForVT[VT.getSimpleVT().SimpleTy];
1752 }
1753 if (VT.isVector()) {
1754 EVT VT1;
1755 MVT VT2;
1756 unsigned NumIntermediates;
1757 return getVectorTypeBreakdown(Context, VT, IntermediateVT&: VT1, NumIntermediates, RegisterVT&: VT2);
1758 }
1759 if (VT.isInteger()) {
1760 unsigned BitWidth = VT.getSizeInBits();
1761 unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits();
1762 return (BitWidth + RegWidth - 1) / RegWidth;
1763 }
1764 llvm_unreachable("Unsupported extended type!");
1765 }
1766
1767 /// Certain combinations of ABIs, Targets and features require that types
1768 /// are legal for some operations and not for other operations.
1769 /// For MIPS all vector types must be passed through the integer register set.
1770 virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
1771 CallingConv::ID CC, EVT VT) const {
1772 return getRegisterType(Context, VT);
1773 }
1774
1775 /// Certain targets require unusual breakdowns of certain types. For MIPS,
1776 /// this occurs when a vector type is used, as vector are passed through the
1777 /// integer register set.
1778 virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1779 CallingConv::ID CC,
1780 EVT VT) const {
1781 return getNumRegisters(Context, VT);
1782 }
1783
1784 /// Certain targets have context sensitive alignment requirements, where one
1785 /// type has the alignment requirement of another type.
1786 virtual Align getABIAlignmentForCallingConv(Type *ArgTy,
1787 const DataLayout &DL) const {
1788 return DL.getABITypeAlign(Ty: ArgTy);
1789 }
1790
1791 /// If true, then instruction selection should seek to shrink the FP constant
1792 /// of the specified type to a smaller type in order to save space and / or
1793 /// reduce runtime.
1794 virtual bool ShouldShrinkFPConstant(EVT) const { return true; }
1795
1796 /// Return true if it is profitable to reduce a load to a smaller type.
1797 /// Example: (i16 (trunc (i32 (load x))) -> i16 load x
1798 virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1799 EVT NewVT) const {
1800 // By default, assume that it is cheaper to extract a subvector from a wide
1801 // vector load rather than creating multiple narrow vector loads.
1802 if (NewVT.isVector() && !Load->hasOneUse())
1803 return false;
1804
1805 return true;
1806 }
1807
1808 /// Return true (the default) if it is profitable to remove a sext_inreg(x)
1809 /// where the sext is redundant, and use x directly.
1810 virtual bool shouldRemoveRedundantExtend(SDValue Op) const { return true; }
1811
1812 /// Indicates if any padding is guaranteed to go at the most significant bits
1813 /// when storing the type to memory and the type size isn't equal to the store
1814 /// size.
1815 bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const {
1816 return VT.isScalarInteger() && !VT.isByteSized();
1817 }
1818
1819 /// When splitting a value of the specified type into parts, does the Lo
1820 /// or Hi part come first? This usually follows the endianness, except
1821 /// for ppcf128, where the Hi part always comes first.
1822 bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const {
1823 return DL.isBigEndian() || VT == MVT::ppcf128;
1824 }
1825
1826 /// If true, the target has custom DAG combine transformations that it can
1827 /// perform for the specified node.
1828 bool hasTargetDAGCombine(ISD::NodeType NT) const {
1829 assert(unsigned(NT >> 3) < std::size(TargetDAGCombineArray));
1830 return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7));
1831 }
1832
1833 unsigned getGatherAllAliasesMaxDepth() const {
1834 return GatherAllAliasesMaxDepth;
1835 }
1836
1837 /// Returns the size of the platform's va_list object.
1838 virtual unsigned getVaListSizeInBits(const DataLayout &DL) const {
1839 return getPointerTy(DL).getSizeInBits();
1840 }
1841
1842 /// Get maximum # of store operations permitted for llvm.memset
1843 ///
1844 /// This function returns the maximum number of store operations permitted
1845 /// to replace a call to llvm.memset. The value is set by the target at the
1846 /// performance threshold for such a replacement. If OptSize is true,
1847 /// return the limit for functions that have OptSize attribute.
1848 unsigned getMaxStoresPerMemset(bool OptSize) const {
1849 return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset;
1850 }
1851
1852 /// Get maximum # of store operations permitted for llvm.memcpy
1853 ///
1854 /// This function returns the maximum number of store operations permitted
1855 /// to replace a call to llvm.memcpy. The value is set by the target at the
1856 /// performance threshold for such a replacement. If OptSize is true,
1857 /// return the limit for functions that have OptSize attribute.
1858 unsigned getMaxStoresPerMemcpy(bool OptSize) const {
1859 return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy;
1860 }
1861
1862 /// \brief Get maximum # of store operations to be glued together
1863 ///
1864 /// This function returns the maximum number of store operations permitted
1865 /// to glue together during lowering of llvm.memcpy. The value is set by
1866 // the target at the performance threshold for such a replacement.
1867 virtual unsigned getMaxGluedStoresPerMemcpy() const {
1868 return MaxGluedStoresPerMemcpy;
1869 }
1870
1871 /// Get maximum # of load operations permitted for memcmp
1872 ///
1873 /// This function returns the maximum number of load operations permitted
1874 /// to replace a call to memcmp. The value is set by the target at the
1875 /// performance threshold for such a replacement. If OptSize is true,
1876 /// return the limit for functions that have OptSize attribute.
1877 unsigned getMaxExpandSizeMemcmp(bool OptSize) const {
1878 return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp;
1879 }
1880
1881 /// Get maximum # of store operations permitted for llvm.memmove
1882 ///
1883 /// This function returns the maximum number of store operations permitted
1884 /// to replace a call to llvm.memmove. The value is set by the target at the
1885 /// performance threshold for such a replacement. If OptSize is true,
1886 /// return the limit for functions that have OptSize attribute.
1887 unsigned getMaxStoresPerMemmove(bool OptSize) const {
1888 return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove;
1889 }
1890
1891 /// Determine if the target supports unaligned memory accesses.
1892 ///
1893 /// This function returns true if the target allows unaligned memory accesses
1894 /// of the specified type in the given address space. If true, it also returns
1895 /// a relative speed of the unaligned memory access in the last argument by
1896 /// reference. The higher the speed number the faster the operation comparing
1897 /// to a number returned by another such call. This is used, for example, in
1898 /// situations where an array copy/move/set is converted to a sequence of
1899 /// store operations. Its use helps to ensure that such replacements don't
1900 /// generate code that causes an alignment error (trap) on the target machine.
1901 virtual bool allowsMisalignedMemoryAccesses(
1902 EVT, unsigned AddrSpace = 0, Align Alignment = Align(1),
1903 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1904 unsigned * /*Fast*/ = nullptr) const {
1905 return false;
1906 }
1907
1908 /// LLT handling variant.
1909 virtual bool allowsMisalignedMemoryAccesses(
1910 LLT, unsigned AddrSpace = 0, Align Alignment = Align(1),
1911 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1912 unsigned * /*Fast*/ = nullptr) const {
1913 return false;
1914 }
1915
1916 /// This function returns true if the memory access is aligned or if the
1917 /// target allows this specific unaligned memory access. If the access is
1918 /// allowed, the optional final parameter returns a relative speed of the
1919 /// access (as defined by the target).
1920 bool allowsMemoryAccessForAlignment(
1921 LLVMContext &Context, const DataLayout &DL, EVT VT,
1922 unsigned AddrSpace = 0, Align Alignment = Align(1),
1923 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1924 unsigned *Fast = nullptr) const;
1925
1926 /// Return true if the memory access of this type is aligned or if the target
1927 /// allows this specific unaligned access for the given MachineMemOperand.
1928 /// If the access is allowed, the optional final parameter returns a relative
1929 /// speed of the access (as defined by the target).
1930 bool allowsMemoryAccessForAlignment(LLVMContext &Context,
1931 const DataLayout &DL, EVT VT,
1932 const MachineMemOperand &MMO,
1933 unsigned *Fast = nullptr) const;
1934
1935 /// Return true if the target supports a memory access of this type for the
1936 /// given address space and alignment. If the access is allowed, the optional
1937 /// final parameter returns the relative speed of the access (as defined by
1938 /// the target).
1939 virtual bool
1940 allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1941 unsigned AddrSpace = 0, Align Alignment = Align(1),
1942 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1943 unsigned *Fast = nullptr) const;
1944
1945 /// Return true if the target supports a memory access of this type for the
1946 /// given MachineMemOperand. If the access is allowed, the optional
1947 /// final parameter returns the relative access speed (as defined by the
1948 /// target).
1949 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1950 const MachineMemOperand &MMO,
1951 unsigned *Fast = nullptr) const;
1952
1953 /// LLT handling variant.
1954 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, LLT Ty,
1955 const MachineMemOperand &MMO,
1956 unsigned *Fast = nullptr) const;
1957
1958 /// Returns the target specific optimal type for load and store operations as
1959 /// a result of memset, memcpy, and memmove lowering.
1960 /// It returns EVT::Other if the type should be determined using generic
1961 /// target-independent logic.
1962 virtual EVT
1963 getOptimalMemOpType(const MemOp &Op,
1964 const AttributeList & /*FuncAttributes*/) const {
1965 return MVT::Other;
1966 }
1967
1968 /// LLT returning variant.
1969 virtual LLT
1970 getOptimalMemOpLLT(const MemOp &Op,
1971 const AttributeList & /*FuncAttributes*/) const {
1972 return LLT();
1973 }
1974
1975 /// Returns true if it's safe to use load / store of the specified type to
1976 /// expand memcpy / memset inline.
1977 ///
1978 /// This is mostly true for all types except for some special cases. For
1979 /// example, on X86 targets without SSE2 f64 load / store are done with fldl /
1980 /// fstpl which also does type conversion. Note the specified type doesn't
1981 /// have to be legal as the hook is used before type legalization.
1982 virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; }
1983
1984 /// Return lower limit for number of blocks in a jump table.
1985 virtual unsigned getMinimumJumpTableEntries() const;
1986
1987 /// Return lower limit of the density in a jump table.
1988 unsigned getMinimumJumpTableDensity(bool OptForSize) const;
1989
1990 /// Return upper limit for number of entries in a jump table.
1991 /// Zero if no limit.
1992 unsigned getMaximumJumpTableSize() const;
1993
1994 virtual bool isJumpTableRelative() const;
1995
1996 /// If a physical register, this specifies the register that
1997 /// llvm.savestack/llvm.restorestack should save and restore.
1998 Register getStackPointerRegisterToSaveRestore() const {
1999 return StackPointerRegisterToSaveRestore;
2000 }
2001
2002 /// If a physical register, this returns the register that receives the
2003 /// exception address on entry to an EH pad.
2004 virtual Register
2005 getExceptionPointerRegister(const Constant *PersonalityFn) const {
2006 return Register();
2007 }
2008
2009 /// If a physical register, this returns the register that receives the
2010 /// exception typeid on entry to a landing pad.
2011 virtual Register
2012 getExceptionSelectorRegister(const Constant *PersonalityFn) const {
2013 return Register();
2014 }
2015
2016 virtual bool needsFixedCatchObjects() const {
2017 report_fatal_error(reason: "Funclet EH is not implemented for this target");
2018 }
2019
2020 /// Return the minimum stack alignment of an argument.
2021 Align getMinStackArgumentAlignment() const {
2022 return MinStackArgumentAlignment;
2023 }
2024
2025 /// Return the minimum function alignment.
2026 Align getMinFunctionAlignment() const { return MinFunctionAlignment; }
2027
2028 /// Return the preferred function alignment.
2029 Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
2030
2031 /// Return the preferred loop alignment.
2032 virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const;
2033
2034 /// Return the maximum amount of bytes allowed to be emitted when padding for
2035 /// alignment
2036 virtual unsigned
2037 getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const;
2038
2039 /// Should loops be aligned even when the function is marked OptSize (but not
2040 /// MinSize).
2041 virtual bool alignLoopsWithOptSize() const { return false; }
2042
2043 /// If the target has a standard location for the stack protector guard,
2044 /// returns the address of that location. Otherwise, returns nullptr.
2045 /// DEPRECATED: please override useLoadStackGuardNode and customize
2046 /// LOAD_STACK_GUARD, or customize \@llvm.stackguard().
2047 virtual Value *getIRStackGuard(IRBuilderBase &IRB) const;
2048
2049 /// Inserts necessary declarations for SSP (stack protection) purpose.
2050 /// Should be used only when getIRStackGuard returns nullptr.
2051 virtual void insertSSPDeclarations(Module &M) const;
2052
2053 /// Return the variable that's previously inserted by insertSSPDeclarations,
2054 /// if any, otherwise return nullptr. Should be used only when
2055 /// getIRStackGuard returns nullptr.
2056 virtual Value *getSDagStackGuard(const Module &M) const;
2057
2058 /// If this function returns true, stack protection checks should XOR the
2059 /// frame pointer (or whichever pointer is used to address locals) into the
2060 /// stack guard value before checking it. getIRStackGuard must return nullptr
2061 /// if this returns true.
2062 virtual bool useStackGuardXorFP() const { return false; }
2063
2064 /// If the target has a standard stack protection check function that
2065 /// performs validation and error handling, returns the function. Otherwise,
2066 /// returns nullptr. Must be previously inserted by insertSSPDeclarations.
2067 /// Should be used only when getIRStackGuard returns nullptr.
2068 virtual Function *getSSPStackGuardCheck(const Module &M) const;
2069
2070protected:
2071 Value *getDefaultSafeStackPointerLocation(IRBuilderBase &IRB,
2072 bool UseTLS) const;
2073
2074public:
2075 /// Returns the target-specific address of the unsafe stack pointer.
2076 virtual Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const;
2077
2078 /// Returns the name of the symbol used to emit stack probes or the empty
2079 /// string if not applicable.
2080 virtual bool hasStackProbeSymbol(const MachineFunction &MF) const { return false; }
2081
2082 virtual bool hasInlineStackProbe(const MachineFunction &MF) const { return false; }
2083
2084 virtual StringRef getStackProbeSymbolName(const MachineFunction &MF) const {
2085 return "";
2086 }
2087
2088 /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we
2089 /// are happy to sink it into basic blocks. A cast may be free, but not
2090 /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer.
2091 virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const;
2092
2093 /// Return true if the pointer arguments to CI should be aligned by aligning
2094 /// the object whose address is being passed. If so then MinSize is set to the
2095 /// minimum size the object must be to be aligned and PrefAlign is set to the
2096 /// preferred alignment.
2097 virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/,
2098 Align & /*PrefAlign*/) const {
2099 return false;
2100 }
2101
2102 //===--------------------------------------------------------------------===//
2103 /// \name Helpers for TargetTransformInfo implementations
2104 /// @{
2105
2106 /// Get the ISD node that corresponds to the Instruction class opcode.
2107 int InstructionOpcodeToISD(unsigned Opcode) const;
2108
2109 /// @}
2110
2111 //===--------------------------------------------------------------------===//
2112 /// \name Helpers for atomic expansion.
2113 /// @{
2114
2115 /// Returns the maximum atomic operation size (in bits) supported by
2116 /// the backend. Atomic operations greater than this size (as well
2117 /// as ones that are not naturally aligned), will be expanded by
2118 /// AtomicExpandPass into an __atomic_* library call.
2119 unsigned getMaxAtomicSizeInBitsSupported() const {
2120 return MaxAtomicSizeInBitsSupported;
2121 }
2122
2123 /// Returns the size in bits of the maximum div/rem the backend supports.
2124 /// Larger operations will be expanded by ExpandLargeDivRem.
2125 unsigned getMaxDivRemBitWidthSupported() const {
2126 return MaxDivRemBitWidthSupported;
2127 }
2128
2129 /// Returns the size in bits of the maximum larget fp convert the backend
2130 /// supports. Larger operations will be expanded by ExpandLargeFPConvert.
2131 unsigned getMaxLargeFPConvertBitWidthSupported() const {
2132 return MaxLargeFPConvertBitWidthSupported;
2133 }
2134
2135 /// Returns the size of the smallest cmpxchg or ll/sc instruction
2136 /// the backend supports. Any smaller operations are widened in
2137 /// AtomicExpandPass.
2138 ///
2139 /// Note that *unlike* operations above the maximum size, atomic ops
2140 /// are still natively supported below the minimum; they just
2141 /// require a more complex expansion.
2142 unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; }
2143
2144 /// Whether the target supports unaligned atomic operations.
2145 bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; }
2146
2147 /// Whether AtomicExpandPass should automatically insert fences and reduce
2148 /// ordering for this atomic. This should be true for most architectures with
2149 /// weak memory ordering. Defaults to false.
2150 virtual bool shouldInsertFencesForAtomic(const Instruction *I) const {
2151 return false;
2152 }
2153
2154 /// Whether AtomicExpandPass should automatically insert a trailing fence
2155 /// without reducing the ordering for this atomic. Defaults to false.
2156 virtual bool
2157 shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const {
2158 return false;
2159 }
2160
2161 /// Perform a load-linked operation on Addr, returning a "Value *" with the
2162 /// corresponding pointee type. This may entail some non-trivial operations to
2163 /// truncate or reconstruct types that will be illegal in the backend. See
2164 /// ARMISelLowering for an example implementation.
2165 virtual Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy,
2166 Value *Addr, AtomicOrdering Ord) const {
2167 llvm_unreachable("Load linked unimplemented on this target");
2168 }
2169
2170 /// Perform a store-conditional operation to Addr. Return the status of the
2171 /// store. This should be 0 if the store succeeded, non-zero otherwise.
2172 virtual Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val,
2173 Value *Addr, AtomicOrdering Ord) const {
2174 llvm_unreachable("Store conditional unimplemented on this target");
2175 }
2176
2177 /// Perform a masked atomicrmw using a target-specific intrinsic. This
2178 /// represents the core LL/SC loop which will be lowered at a late stage by
2179 /// the backend. The target-specific intrinsic returns the loaded value and
2180 /// is not responsible for masking and shifting the result.
2181 virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder,
2182 AtomicRMWInst *AI,
2183 Value *AlignedAddr, Value *Incr,
2184 Value *Mask, Value *ShiftAmt,
2185 AtomicOrdering Ord) const {
2186 llvm_unreachable("Masked atomicrmw expansion unimplemented on this target");
2187 }
2188
2189 /// Perform a atomicrmw expansion using a target-specific way. This is
2190 /// expected to be called when masked atomicrmw and bit test atomicrmw don't
2191 /// work, and the target supports another way to lower atomicrmw.
2192 virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const {
2193 llvm_unreachable(
2194 "Generic atomicrmw expansion unimplemented on this target");
2195 }
2196
2197 /// Perform a bit test atomicrmw using a target-specific intrinsic. This
2198 /// represents the combined bit test intrinsic which will be lowered at a late
2199 /// stage by the backend.
2200 virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
2201 llvm_unreachable(
2202 "Bit test atomicrmw expansion unimplemented on this target");
2203 }
2204
2205 /// Perform a atomicrmw which the result is only used by comparison, using a
2206 /// target-specific intrinsic. This represents the combined atomic and compare
2207 /// intrinsic which will be lowered at a late stage by the backend.
2208 virtual void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
2209 llvm_unreachable(
2210 "Compare arith atomicrmw expansion unimplemented on this target");
2211 }
2212
2213 /// Perform a masked cmpxchg using a target-specific intrinsic. This
2214 /// represents the core LL/SC loop which will be lowered at a late stage by
2215 /// the backend. The target-specific intrinsic returns the loaded value and
2216 /// is not responsible for masking and shifting the result.
2217 virtual Value *emitMaskedAtomicCmpXchgIntrinsic(
2218 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
2219 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
2220 llvm_unreachable("Masked cmpxchg expansion unimplemented on this target");
2221 }
2222
2223 //===--------------------------------------------------------------------===//
2224 /// \name KCFI check lowering.
2225 /// @{
2226
2227 virtual MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
2228 MachineBasicBlock::instr_iterator &MBBI,
2229 const TargetInstrInfo *TII) const {
2230 llvm_unreachable("KCFI is not supported on this target");
2231 }
2232
2233 /// @}
2234
2235 /// Inserts in the IR a target-specific intrinsic specifying a fence.
2236 /// It is called by AtomicExpandPass before expanding an
2237 /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad
2238 /// if shouldInsertFencesForAtomic returns true.
2239 ///
2240 /// Inst is the original atomic instruction, prior to other expansions that
2241 /// may be performed.
2242 ///
2243 /// This function should either return a nullptr, or a pointer to an IR-level
2244 /// Instruction*. Even complex fence sequences can be represented by a
2245 /// single Instruction* through an intrinsic to be lowered later.
2246 ///
2247 /// The default implementation emits an IR fence before any release (or
2248 /// stronger) operation that stores, and after any acquire (or stronger)
2249 /// operation. This is generally a correct implementation, but backends may
2250 /// override if they wish to use alternative schemes (e.g. the PowerPC
2251 /// standard ABI uses a fence before a seq_cst load instead of after a
2252 /// seq_cst store).
2253 /// @{
2254 virtual Instruction *emitLeadingFence(IRBuilderBase &Builder,
2255 Instruction *Inst,
2256 AtomicOrdering Ord) const;
2257
2258 virtual Instruction *emitTrailingFence(IRBuilderBase &Builder,
2259 Instruction *Inst,
2260 AtomicOrdering Ord) const;
2261 /// @}
2262
2263 // Emits code that executes when the comparison result in the ll/sc
2264 // expansion of a cmpxchg instruction is such that the store-conditional will
2265 // not execute. This makes it possible to balance out the load-linked with
2266 // a dedicated instruction, if desired.
2267 // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would
2268 // be unnecessarily held, except if clrex, inserted by this hook, is executed.
2269 virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const {}
2270
2271 /// Returns true if arguments should be sign-extended in lib calls.
2272 virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
2273 return IsSigned;
2274 }
2275
2276 /// Returns true if arguments should be extended in lib calls.
2277 virtual bool shouldExtendTypeInLibCall(EVT Type) const {
2278 return true;
2279 }
2280
2281 /// Returns how the given (atomic) load should be expanded by the
2282 /// IR-level AtomicExpand pass.
2283 virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const {
2284 return AtomicExpansionKind::None;
2285 }
2286
2287 /// Returns how the given (atomic) load should be cast by the IR-level
2288 /// AtomicExpand pass.
2289 virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const {
2290 if (LI->getType()->isFloatingPointTy())
2291 return AtomicExpansionKind::CastToInteger;
2292 return AtomicExpansionKind::None;
2293 }
2294
2295 /// Returns how the given (atomic) store should be expanded by the IR-level
2296 /// AtomicExpand pass into. For instance AtomicExpansionKind::Expand will try
2297 /// to use an atomicrmw xchg.
2298 virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const {
2299 return AtomicExpansionKind::None;
2300 }
2301
2302 /// Returns how the given (atomic) store should be cast by the IR-level
2303 /// AtomicExpand pass into. For instance AtomicExpansionKind::CastToInteger
2304 /// will try to cast the operands to integer values.
2305 virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const {
2306 if (SI->getValueOperand()->getType()->isFloatingPointTy())
2307 return AtomicExpansionKind::CastToInteger;
2308 return AtomicExpansionKind::None;
2309 }
2310
2311 /// Returns how the given atomic cmpxchg should be expanded by the IR-level
2312 /// AtomicExpand pass.
2313 virtual AtomicExpansionKind
2314 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
2315 return AtomicExpansionKind::None;
2316 }
2317
2318 /// Returns how the IR-level AtomicExpand pass should expand the given
2319 /// AtomicRMW, if at all. Default is to never expand.
2320 virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
2321 return RMW->isFloatingPointOperation() ?
2322 AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None;
2323 }
2324
2325 /// Returns how the given atomic atomicrmw should be cast by the IR-level
2326 /// AtomicExpand pass.
2327 virtual AtomicExpansionKind
2328 shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const {
2329 if (RMWI->getOperation() == AtomicRMWInst::Xchg &&
2330 (RMWI->getValOperand()->getType()->isFloatingPointTy() ||
2331 RMWI->getValOperand()->getType()->isPointerTy()))
2332 return AtomicExpansionKind::CastToInteger;
2333
2334 return AtomicExpansionKind::None;
2335 }
2336
2337 /// On some platforms, an AtomicRMW that never actually modifies the value
2338 /// (such as fetch_add of 0) can be turned into a fence followed by an
2339 /// atomic load. This may sound useless, but it makes it possible for the
2340 /// processor to keep the cacheline shared, dramatically improving
2341 /// performance. And such idempotent RMWs are useful for implementing some
2342 /// kinds of locks, see for example (justification + benchmarks):
2343 /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
2344 /// This method tries doing that transformation, returning the atomic load if
2345 /// it succeeds, and nullptr otherwise.
2346 /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo
2347 /// another round of expansion.
2348 virtual LoadInst *
2349 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const {
2350 return nullptr;
2351 }
2352
2353 /// Returns how the platform's atomic operations are extended (ZERO_EXTEND,
2354 /// SIGN_EXTEND, or ANY_EXTEND).
2355 virtual ISD::NodeType getExtendForAtomicOps() const {
2356 return ISD::ZERO_EXTEND;
2357 }
2358
2359 /// Returns how the platform's atomic compare and swap expects its comparison
2360 /// value to be extended (ZERO_EXTEND, SIGN_EXTEND, or ANY_EXTEND). This is
2361 /// separate from getExtendForAtomicOps, which is concerned with the
2362 /// sign-extension of the instruction's output, whereas here we are concerned
2363 /// with the sign-extension of the input. For targets with compare-and-swap
2364 /// instructions (or sub-word comparisons in their LL/SC loop expansions),
2365 /// the input can be ANY_EXTEND, but the output will still have a specific
2366 /// extension.
2367 virtual ISD::NodeType getExtendForAtomicCmpSwapArg() const {
2368 return ISD::ANY_EXTEND;
2369 }
2370
2371 /// @}
2372
2373 /// Returns true if we should normalize
2374 /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
2375 /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely
2376 /// that it saves us from materializing N0 and N1 in an integer register.
2377 /// Targets that are able to perform and/or on flags should return false here.
2378 virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context,
2379 EVT VT) const {
2380 // If a target has multiple condition registers, then it likely has logical
2381 // operations on those registers.
2382 if (hasMultipleConditionRegisters())
2383 return false;
2384 // Only do the transform if the value won't be split into multiple
2385 // registers.
2386 LegalizeTypeAction Action = getTypeAction(Context, VT);
2387 return Action != TypeExpandInteger && Action != TypeExpandFloat &&
2388 Action != TypeSplitVector;
2389 }
2390
2391 virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; }
2392
2393 /// Return true if a select of constants (select Cond, C1, C2) should be
2394 /// transformed into simple math ops with the condition value. For example:
2395 /// select Cond, C1, C1-1 --> add (zext Cond), C1-1
2396 virtual bool convertSelectOfConstantsToMath(EVT VT) const {
2397 return false;
2398 }
2399
2400 /// Return true if it is profitable to transform an integer
2401 /// multiplication-by-constant into simpler operations like shifts and adds.
2402 /// This may be true if the target does not directly support the
2403 /// multiplication operation for the specified type or the sequence of simpler
2404 /// ops is faster than the multiply.
2405 virtual bool decomposeMulByConstant(LLVMContext &Context,
2406 EVT VT, SDValue C) const {
2407 return false;
2408 }
2409
2410 /// Return true if it may be profitable to transform
2411 /// (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
2412 /// This may not be true if c1 and c2 can be represented as immediates but
2413 /// c1*c2 cannot, for example.
2414 /// The target should check if c1, c2 and c1*c2 can be represented as
2415 /// immediates, or have to be materialized into registers. If it is not sure
2416 /// about some cases, a default true can be returned to let the DAGCombiner
2417 /// decide.
2418 /// AddNode is (add x, c1), and ConstNode is c2.
2419 virtual bool isMulAddWithConstProfitable(SDValue AddNode,
2420 SDValue ConstNode) const {
2421 return true;
2422 }
2423
2424 /// Return true if it is more correct/profitable to use strict FP_TO_INT
2425 /// conversion operations - canonicalizing the FP source value instead of
2426 /// converting all cases and then selecting based on value.
2427 /// This may be true if the target throws exceptions for out of bounds
2428 /// conversions or has fast FP CMOV.
2429 virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
2430 bool IsSigned) const {
2431 return false;
2432 }
2433
2434 /// Return true if it is beneficial to expand an @llvm.powi.* intrinsic.
2435 /// If not optimizing for size, expanding @llvm.powi.* intrinsics is always
2436 /// considered beneficial.
2437 /// If optimizing for size, expansion is only considered beneficial for upto
2438 /// 5 multiplies and a divide (if the exponent is negative).
2439 bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const {
2440 if (Exponent < 0)
2441 Exponent = -Exponent;
2442 uint64_t E = static_cast<uint64_t>(Exponent);
2443 return !OptForSize || (llvm::popcount(Value: E) + Log2_64(Value: E) < 7);
2444 }
2445
2446 //===--------------------------------------------------------------------===//
2447 // TargetLowering Configuration Methods - These methods should be invoked by
2448 // the derived class constructor to configure this object for the target.
2449 //
2450protected:
2451 /// Specify how the target extends the result of integer and floating point
2452 /// boolean values from i1 to a wider type. See getBooleanContents.
2453 void setBooleanContents(BooleanContent Ty) {
2454 BooleanContents = Ty;
2455 BooleanFloatContents = Ty;
2456 }
2457
2458 /// Specify how the target extends the result of integer and floating point
2459 /// boolean values from i1 to a wider type. See getBooleanContents.
2460 void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) {
2461 BooleanContents = IntTy;
2462 BooleanFloatContents = FloatTy;
2463 }
2464
2465 /// Specify how the target extends the result of a vector boolean value from a
2466 /// vector of i1 to a wider type. See getBooleanContents.
2467 void setBooleanVectorContents(BooleanContent Ty) {
2468 BooleanVectorContents = Ty;
2469 }
2470
2471 /// Specify the target scheduling preference.
2472 void setSchedulingPreference(Sched::Preference Pref) {
2473 SchedPreferenceInfo = Pref;
2474 }
2475
2476 /// Indicate the minimum number of blocks to generate jump tables.
2477 void setMinimumJumpTableEntries(unsigned Val);
2478
2479 /// Indicate the maximum number of entries in jump tables.
2480 /// Set to zero to generate unlimited jump tables.
2481 void setMaximumJumpTableSize(unsigned);
2482
2483 /// If set to a physical register, this specifies the register that
2484 /// llvm.savestack/llvm.restorestack should save and restore.
2485 void setStackPointerRegisterToSaveRestore(Register R) {
2486 StackPointerRegisterToSaveRestore = R;
2487 }
2488
2489 /// Tells the code generator that the target has multiple (allocatable)
2490 /// condition registers that can be used to store the results of comparisons
2491 /// for use by selects and conditional branches. With multiple condition
2492 /// registers, the code generator will not aggressively sink comparisons into
2493 /// the blocks of their users.
2494 void setHasMultipleConditionRegisters(bool hasManyRegs = true) {
2495 HasMultipleConditionRegisters = hasManyRegs;
2496 }
2497
2498 /// Tells the code generator that the target has BitExtract instructions.
2499 /// The code generator will aggressively sink "shift"s into the blocks of
2500 /// their users if the users will generate "and" instructions which can be
2501 /// combined with "shift" to BitExtract instructions.
2502 void setHasExtractBitsInsn(bool hasExtractInsn = true) {
2503 HasExtractBitsInsn = hasExtractInsn;
2504 }
2505
2506 /// Tells the code generator not to expand logic operations on comparison
2507 /// predicates into separate sequences that increase the amount of flow
2508 /// control.
2509 void setJumpIsExpensive(bool isExpensive = true);
2510
2511 /// Tells the code generator which bitwidths to bypass.
2512 void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) {
2513 BypassSlowDivWidths[SlowBitWidth] = FastBitWidth;
2514 }
2515
2516 /// Add the specified register class as an available regclass for the
2517 /// specified value type. This indicates the selector can handle values of
2518 /// that class natively.
2519 void addRegisterClass(MVT VT, const TargetRegisterClass *RC) {
2520 assert((unsigned)VT.SimpleTy < std::size(RegClassForVT));
2521 RegClassForVT[VT.SimpleTy] = RC;
2522 }
2523
2524 /// Return the largest legal super-reg register class of the register class
2525 /// for the specified type and its associated "cost".
2526 virtual std::pair<const TargetRegisterClass *, uint8_t>
2527 findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const;
2528
2529 /// Once all of the register classes are added, this allows us to compute
2530 /// derived properties we expose.
2531 void computeRegisterProperties(const TargetRegisterInfo *TRI);
2532
2533 /// Indicate that the specified operation does not work with the specified
2534 /// type and indicate what to do about it. Note that VT may refer to either
2535 /// the type of a result or that of an operand of Op.
2536 void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) {
2537 assert(Op < std::size(OpActions[0]) && "Table isn't big enough!");
2538 OpActions[(unsigned)VT.SimpleTy][Op] = Action;
2539 }
2540 void setOperationAction(ArrayRef<unsigned> Ops, MVT VT,
2541 LegalizeAction Action) {
2542 for (auto Op : Ops)
2543 setOperationAction(Op, VT, Action);
2544 }
2545 void setOperationAction(ArrayRef<unsigned> Ops, ArrayRef<MVT> VTs,
2546 LegalizeAction Action) {
2547 for (auto VT : VTs)
2548 setOperationAction(Ops, VT, Action);
2549 }
2550
2551 /// Indicate that the specified load with extension does not work with the
2552 /// specified type and indicate what to do about it.
2553 void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT,
2554 LegalizeAction Action) {
2555 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() &&
2556 MemVT.isValid() && "Table isn't big enough!");
2557 assert((unsigned)Action < 0x10 && "too many bits for bitfield array");
2558 unsigned Shift = 4 * ExtType;
2559 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift);
2560 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift;
2561 }
2562 void setLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, MVT MemVT,
2563 LegalizeAction Action) {
2564 for (auto ExtType : ExtTypes)
2565 setLoadExtAction(ExtType, ValVT, MemVT, Action);
2566 }
2567 void setLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT,
2568 ArrayRef<MVT> MemVTs, LegalizeAction Action) {
2569 for (auto MemVT : MemVTs)
2570 setLoadExtAction(ExtTypes, ValVT, MemVT, Action);
2571 }
2572
2573 /// Let target indicate that an extending atomic load of the specified type
2574 /// is legal.
2575 void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT,
2576 LegalizeAction Action) {
2577 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() &&
2578 MemVT.isValid() && "Table isn't big enough!");
2579 assert((unsigned)Action < 0x10 && "too many bits for bitfield array");
2580 unsigned Shift = 4 * ExtType;
2581 AtomicLoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &=
2582 ~((uint16_t)0xF << Shift);
2583 AtomicLoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |=
2584 ((uint16_t)Action << Shift);
2585 }
2586 void setAtomicLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, MVT MemVT,
2587 LegalizeAction Action) {
2588 for (auto ExtType : ExtTypes)
2589 setAtomicLoadExtAction(ExtType, ValVT, MemVT, Action);
2590 }
2591 void setAtomicLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT,
2592 ArrayRef<MVT> MemVTs, LegalizeAction Action) {
2593 for (auto MemVT : MemVTs)
2594 setAtomicLoadExtAction(ExtTypes, ValVT, MemVT, Action);
2595 }
2596
2597 /// Indicate that the specified truncating store does not work with the
2598 /// specified type and indicate what to do about it.
2599 void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action) {
2600 assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!");
2601 TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action;
2602 }
2603
2604 /// Indicate that the specified indexed load does or does not work with the
2605 /// specified type and indicate what to do abort it.
2606 ///
2607 /// NOTE: All indexed mode loads are initialized to Expand in
2608 /// TargetLowering.cpp
2609 void setIndexedLoadAction(ArrayRef<unsigned> IdxModes, MVT VT,
2610 LegalizeAction Action) {
2611 for (auto IdxMode : IdxModes)
2612 setIndexedModeAction(IdxMode, VT, Shift: IMAB_Load, Action);
2613 }
2614
2615 void setIndexedLoadAction(ArrayRef<unsigned> IdxModes, ArrayRef<MVT> VTs,
2616 LegalizeAction Action) {
2617 for (auto VT : VTs)
2618 setIndexedLoadAction(IdxModes, VT, Action);
2619 }
2620
2621 /// Indicate that the specified indexed store does or does not work with the
2622 /// specified type and indicate what to do about it.
2623 ///
2624 /// NOTE: All indexed mode stores are initialized to Expand in
2625 /// TargetLowering.cpp
2626 void setIndexedStoreAction(ArrayRef<unsigned> IdxModes, MVT VT,
2627 LegalizeAction Action) {
2628 for (auto IdxMode : IdxModes)
2629 setIndexedModeAction(IdxMode, VT, Shift: IMAB_Store, Action);
2630 }
2631
2632 void setIndexedStoreAction(ArrayRef<unsigned> IdxModes, ArrayRef<MVT> VTs,
2633 LegalizeAction Action) {
2634 for (auto VT : VTs)
2635 setIndexedStoreAction(IdxModes, VT, Action);
2636 }
2637
2638 /// Indicate that the specified indexed masked load does or does not work with
2639 /// the specified type and indicate what to do about it.
2640 ///
2641 /// NOTE: All indexed mode masked loads are initialized to Expand in
2642 /// TargetLowering.cpp
2643 void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT,
2644 LegalizeAction Action) {
2645 setIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedLoad, Action);
2646 }
2647
2648 /// Indicate that the specified indexed masked store does or does not work
2649 /// with the specified type and indicate what to do about it.
2650 ///
2651 /// NOTE: All indexed mode masked stores are initialized to Expand in
2652 /// TargetLowering.cpp
2653 void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT,
2654 LegalizeAction Action) {
2655 setIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedStore, Action);
2656 }
2657
2658 /// Indicate that the specified condition code is or isn't supported on the
2659 /// target and indicate what to do about it.
2660 void setCondCodeAction(ArrayRef<ISD::CondCode> CCs, MVT VT,
2661 LegalizeAction Action) {
2662 for (auto CC : CCs) {
2663 assert(VT.isValid() && (unsigned)CC < std::size(CondCodeActions) &&
2664 "Table isn't big enough!");
2665 assert((unsigned)Action < 0x10 && "too many bits for bitfield array");
2666 /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the
2667 /// 32-bit value and the upper 29 bits index into the second dimension of
2668 /// the array to select what 32-bit value to use.
2669 uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
2670 CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift);
2671 CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift;
2672 }
2673 }
2674 void setCondCodeAction(ArrayRef<ISD::CondCode> CCs, ArrayRef<MVT> VTs,
2675 LegalizeAction Action) {
2676 for (auto VT : VTs)
2677 setCondCodeAction(CCs, VT, Action);
2678 }
2679
2680 /// If Opc/OrigVT is specified as being promoted, the promotion code defaults
2681 /// to trying a larger integer/fp until it can find one that works. If that
2682 /// default is insufficient, this method can be used by the target to override
2683 /// the default.
2684 void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
2685 PromoteToType[std::make_pair(x&: Opc, y&: OrigVT.SimpleTy)] = DestVT.SimpleTy;
2686 }
2687
2688 /// Convenience method to set an operation to Promote and specify the type
2689 /// in a single call.
2690 void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
2691 setOperationAction(Op: Opc, VT: OrigVT, Action: Promote);
2692 AddPromotedToType(Opc, OrigVT, DestVT);
2693 }
2694 void setOperationPromotedToType(ArrayRef<unsigned> Ops, MVT OrigVT,
2695 MVT DestVT) {
2696 for (auto Op : Ops) {
2697 setOperationAction(Op, VT: OrigVT, Action: Promote);
2698 AddPromotedToType(Opc: Op, OrigVT, DestVT);
2699 }
2700 }
2701
2702 /// Targets should invoke this method for each target independent node that
2703 /// they want to provide a custom DAG combiner for by implementing the
2704 /// PerformDAGCombine virtual method.
2705 void setTargetDAGCombine(ArrayRef<ISD::NodeType> NTs) {
2706 for (auto NT : NTs) {
2707 assert(unsigned(NT >> 3) < std::size(TargetDAGCombineArray));
2708 TargetDAGCombineArray[NT >> 3] |= 1 << (NT & 7);
2709 }
2710 }
2711
2712 /// Set the target's minimum function alignment.
2713 void setMinFunctionAlignment(Align Alignment) {
2714 MinFunctionAlignment = Alignment;
2715 }
2716
2717 /// Set the target's preferred function alignment. This should be set if
2718 /// there is a performance benefit to higher-than-minimum alignment
2719 void setPrefFunctionAlignment(Align Alignment) {
2720 PrefFunctionAlignment = Alignment;
2721 }
2722
2723 /// Set the target's preferred loop alignment. Default alignment is one, it
2724 /// means the target does not care about loop alignment. The target may also
2725 /// override getPrefLoopAlignment to provide per-loop values.
2726 void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; }
2727 void setMaxBytesForAlignment(unsigned MaxBytes) {
2728 MaxBytesForAlignment = MaxBytes;
2729 }
2730
2731 /// Set the minimum stack alignment of an argument.
2732 void setMinStackArgumentAlignment(Align Alignment) {
2733 MinStackArgumentAlignment = Alignment;
2734 }
2735
2736 /// Set the maximum atomic operation size supported by the
2737 /// backend. Atomic operations greater than this size (as well as
2738 /// ones that are not naturally aligned), will be expanded by
2739 /// AtomicExpandPass into an __atomic_* library call.
2740 void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) {
2741 MaxAtomicSizeInBitsSupported = SizeInBits;
2742 }
2743
2744 /// Set the size in bits of the maximum div/rem the backend supports.
2745 /// Larger operations will be expanded by ExpandLargeDivRem.
2746 void setMaxDivRemBitWidthSupported(unsigned SizeInBits) {
2747 MaxDivRemBitWidthSupported = SizeInBits;
2748 }
2749
2750 /// Set the size in bits of the maximum fp convert the backend supports.
2751 /// Larger operations will be expanded by ExpandLargeFPConvert.
2752 void setMaxLargeFPConvertBitWidthSupported(unsigned SizeInBits) {
2753 MaxLargeFPConvertBitWidthSupported = SizeInBits;
2754 }
2755
2756 /// Sets the minimum cmpxchg or ll/sc size supported by the backend.
2757 void setMinCmpXchgSizeInBits(unsigned SizeInBits) {
2758 MinCmpXchgSizeInBits = SizeInBits;
2759 }
2760
2761 /// Sets whether unaligned atomic operations are supported.
2762 void setSupportsUnalignedAtomics(bool UnalignedSupported) {
2763 SupportsUnalignedAtomics = UnalignedSupported;
2764 }
2765
2766public:
2767 //===--------------------------------------------------------------------===//
2768 // Addressing mode description hooks (used by LSR etc).
2769 //
2770
2771 /// CodeGenPrepare sinks address calculations into the same BB as Load/Store
2772 /// instructions reading the address. This allows as much computation as
2773 /// possible to be done in the address mode for that operand. This hook lets
2774 /// targets also pass back when this should be done on intrinsics which
2775 /// load/store.
2776 virtual bool getAddrModeArguments(IntrinsicInst * /*I*/,
2777 SmallVectorImpl<Value*> &/*Ops*/,
2778 Type *&/*AccessTy*/) const {
2779 return false;
2780 }
2781
2782 /// This represents an addressing mode of:
2783 /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*vscale
2784 /// If BaseGV is null, there is no BaseGV.
2785 /// If BaseOffs is zero, there is no base offset.
2786 /// If HasBaseReg is false, there is no base register.
2787 /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with
2788 /// no scale.
2789 /// If ScalableOffset is zero, there is no scalable offset.
2790 struct AddrMode {
2791 GlobalValue *BaseGV = nullptr;
2792 int64_t BaseOffs = 0;
2793 bool HasBaseReg = false;
2794 int64_t Scale = 0;
2795 int64_t ScalableOffset = 0;
2796 AddrMode() = default;
2797 };
2798
2799 /// Return true if the addressing mode represented by AM is legal for this
2800 /// target, for a load/store of the specified type.
2801 ///
2802 /// The type may be VoidTy, in which case only return true if the addressing
2803 /// mode is legal for a load/store of any legal type. TODO: Handle
2804 /// pre/postinc as well.
2805 ///
2806 /// If the address space cannot be determined, it will be -1.
2807 ///
2808 /// TODO: Remove default argument
2809 virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
2810 Type *Ty, unsigned AddrSpace,
2811 Instruction *I = nullptr) const;
2812
2813 /// Returns true if the targets addressing mode can target thread local
2814 /// storage (TLS).
2815 virtual bool addressingModeSupportsTLS(const GlobalValue &) const {
2816 return false;
2817 }
2818
2819 /// Return the prefered common base offset.
2820 virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
2821 int64_t MaxOffset) const {
2822 return 0;
2823 }
2824
2825 /// Return true if the specified immediate is legal icmp immediate, that is
2826 /// the target has icmp instructions which can compare a register against the
2827 /// immediate without having to materialize the immediate into a register.
2828 virtual bool isLegalICmpImmediate(int64_t) const {
2829 return true;
2830 }
2831
2832 /// Return true if the specified immediate is legal add immediate, that is the
2833 /// target has add instructions which can add a register with the immediate
2834 /// without having to materialize the immediate into a register.
2835 virtual bool isLegalAddImmediate(int64_t) const {
2836 return true;
2837 }
2838
2839 /// Return true if adding the specified scalable immediate is legal, that is
2840 /// the target has add instructions which can add a register with the
2841 /// immediate (multiplied by vscale) without having to materialize the
2842 /// immediate into a register.
2843 virtual bool isLegalAddScalableImmediate(int64_t) const { return false; }
2844
2845 /// Return true if the specified immediate is legal for the value input of a
2846 /// store instruction.
2847 virtual bool isLegalStoreImmediate(int64_t Value) const {
2848 // Default implementation assumes that at least 0 works since it is likely
2849 // that a zero register exists or a zero immediate is allowed.
2850 return Value == 0;
2851 }
2852
2853 /// Return true if it's significantly cheaper to shift a vector by a uniform
2854 /// scalar than by an amount which will vary across each lane. On x86 before
2855 /// AVX2 for example, there is a "psllw" instruction for the former case, but
2856 /// no simple instruction for a general "a << b" operation on vectors.
2857 /// This should also apply to lowering for vector funnel shifts (rotates).
2858 virtual bool isVectorShiftByScalarCheap(Type *Ty) const {
2859 return false;
2860 }
2861
2862 /// Given a shuffle vector SVI representing a vector splat, return a new
2863 /// scalar type of size equal to SVI's scalar type if the new type is more
2864 /// profitable. Returns nullptr otherwise. For example under MVE float splats
2865 /// are converted to integer to prevent the need to move from SPR to GPR
2866 /// registers.
2867 virtual Type* shouldConvertSplatType(ShuffleVectorInst* SVI) const {
2868 return nullptr;
2869 }
2870
2871 /// Given a set in interconnected phis of type 'From' that are loaded/stored
2872 /// or bitcast to type 'To', return true if the set should be converted to
2873 /// 'To'.
2874 virtual bool shouldConvertPhiType(Type *From, Type *To) const {
2875 return (From->isIntegerTy() || From->isFloatingPointTy()) &&
2876 (To->isIntegerTy() || To->isFloatingPointTy());
2877 }
2878
2879 /// Returns true if the opcode is a commutative binary operation.
2880 virtual bool isCommutativeBinOp(unsigned Opcode) const {
2881 // FIXME: This should get its info from the td file.
2882 switch (Opcode) {
2883 case ISD::ADD:
2884 case ISD::SMIN:
2885 case ISD::SMAX:
2886 case ISD::UMIN:
2887 case ISD::UMAX:
2888 case ISD::MUL:
2889 case ISD::MULHU:
2890 case ISD::MULHS:
2891 case ISD::SMUL_LOHI:
2892 case ISD::UMUL_LOHI:
2893 case ISD::FADD:
2894 case ISD::FMUL:
2895 case ISD::AND:
2896 case ISD::OR:
2897 case ISD::XOR:
2898 case ISD::SADDO:
2899 case ISD::UADDO:
2900 case ISD::ADDC:
2901 case ISD::ADDE:
2902 case ISD::SADDSAT:
2903 case ISD::UADDSAT:
2904 case ISD::FMINNUM:
2905 case ISD::FMAXNUM:
2906 case ISD::FMINNUM_IEEE:
2907 case ISD::FMAXNUM_IEEE:
2908 case ISD::FMINIMUM:
2909 case ISD::FMAXIMUM:
2910 case ISD::AVGFLOORS:
2911 case ISD::AVGFLOORU:
2912 case ISD::AVGCEILS:
2913 case ISD::AVGCEILU:
2914 case ISD::ABDS:
2915 case ISD::ABDU:
2916 return true;
2917 default: return false;
2918 }
2919 }
2920
2921 /// Return true if the node is a math/logic binary operator.
2922 virtual bool isBinOp(unsigned Opcode) const {
2923 // A commutative binop must be a binop.
2924 if (isCommutativeBinOp(Opcode))
2925 return true;
2926 // These are non-commutative binops.
2927 switch (Opcode) {
2928 case ISD::SUB:
2929 case ISD::SHL:
2930 case ISD::SRL:
2931 case ISD::SRA:
2932 case ISD::ROTL:
2933 case ISD::ROTR:
2934 case ISD::SDIV:
2935 case ISD::UDIV:
2936 case ISD::SREM:
2937 case ISD::UREM:
2938 case ISD::SSUBSAT:
2939 case ISD::USUBSAT:
2940 case ISD::FSUB:
2941 case ISD::FDIV:
2942 case ISD::FREM:
2943 return true;
2944 default:
2945 return false;
2946 }
2947 }
2948
2949 /// Return true if it's free to truncate a value of type FromTy to type
2950 /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
2951 /// by referencing its sub-register AX.
2952 /// Targets must return false when FromTy <= ToTy.
2953 virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const {
2954 return false;
2955 }
2956
2957 /// Return true if a truncation from FromTy to ToTy is permitted when deciding
2958 /// whether a call is in tail position. Typically this means that both results
2959 /// would be assigned to the same register or stack slot, but it could mean
2960 /// the target performs adequate checks of its own before proceeding with the
2961 /// tail call. Targets must return false when FromTy <= ToTy.
2962 virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const {
2963 return false;
2964 }
2965
2966 virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { return false; }
2967 virtual bool isTruncateFree(LLT FromTy, LLT ToTy, const DataLayout &DL,
2968 LLVMContext &Ctx) const {
2969 return isTruncateFree(FromVT: getApproximateEVTForLLT(Ty: FromTy, DL, Ctx),
2970 ToVT: getApproximateEVTForLLT(Ty: ToTy, DL, Ctx));
2971 }
2972
2973 /// Return true if truncating the specific node Val to type VT2 is free.
2974 virtual bool isTruncateFree(SDValue Val, EVT VT2) const {
2975 // Fallback to type matching.
2976 return isTruncateFree(FromVT: Val.getValueType(), ToVT: VT2);
2977 }
2978
2979 virtual bool isProfitableToHoist(Instruction *I) const { return true; }
2980
2981 /// Return true if the extension represented by \p I is free.
2982 /// Unlikely the is[Z|FP]ExtFree family which is based on types,
2983 /// this method can use the context provided by \p I to decide
2984 /// whether or not \p I is free.
2985 /// This method extends the behavior of the is[Z|FP]ExtFree family.
2986 /// In other words, if is[Z|FP]Free returns true, then this method
2987 /// returns true as well. The converse is not true.
2988 /// The target can perform the adequate checks by overriding isExtFreeImpl.
2989 /// \pre \p I must be a sign, zero, or fp extension.
2990 bool isExtFree(const Instruction *I) const {
2991 switch (I->getOpcode()) {
2992 case Instruction::FPExt:
2993 if (isFPExtFree(DestVT: EVT::getEVT(Ty: I->getType()),
2994 SrcVT: EVT::getEVT(Ty: I->getOperand(i: 0)->getType())))
2995 return true;
2996 break;
2997 case Instruction::ZExt:
2998 if (isZExtFree(FromTy: I->getOperand(i: 0)->getType(), ToTy: I->getType()))
2999 return true;
3000 break;
3001 case Instruction::SExt:
3002 break;
3003 default:
3004 llvm_unreachable("Instruction is not an extension");
3005 }
3006 return isExtFreeImpl(I);
3007 }
3008
3009 /// Return true if \p Load and \p Ext can form an ExtLoad.
3010 /// For example, in AArch64
3011 /// %L = load i8, i8* %ptr
3012 /// %E = zext i8 %L to i32
3013 /// can be lowered into one load instruction
3014 /// ldrb w0, [x0]
3015 bool isExtLoad(const LoadInst *Load, const Instruction *Ext,
3016 const DataLayout &DL) const {
3017 EVT VT = getValueType(DL, Ty: Ext->getType());
3018 EVT LoadVT = getValueType(DL, Ty: Load->getType());
3019
3020 // If the load has other users and the truncate is not free, the ext
3021 // probably isn't free.
3022 if (!Load->hasOneUse() && (isTypeLegal(VT: LoadVT) || !isTypeLegal(VT)) &&
3023 !isTruncateFree(FromTy: Ext->getType(), ToTy: Load->getType()))
3024 return false;
3025
3026 // Check whether the target supports casts folded into loads.
3027 unsigned LType;
3028 if (isa<ZExtInst>(Val: Ext))
3029 LType = ISD::ZEXTLOAD;
3030 else {
3031 assert(isa<SExtInst>(Ext) && "Unexpected ext type!");
3032 LType = ISD::SEXTLOAD;
3033 }
3034
3035 return isLoadExtLegal(ExtType: LType, ValVT: VT, MemVT: LoadVT);
3036 }
3037
3038 /// Return true if any actual instruction that defines a value of type FromTy
3039 /// implicitly zero-extends the value to ToTy in the result register.
3040 ///
3041 /// The function should return true when it is likely that the truncate can
3042 /// be freely folded with an instruction defining a value of FromTy. If
3043 /// the defining instruction is unknown (because you're looking at a
3044 /// function argument, PHI, etc.) then the target may require an
3045 /// explicit truncate, which is not necessarily free, but this function
3046 /// does not deal with those cases.
3047 /// Targets must return false when FromTy >= ToTy.
3048 virtual bool isZExtFree(Type *FromTy, Type *ToTy) const {
3049 return false;
3050 }
3051
3052 virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { return false; }
3053 virtual bool isZExtFree(LLT FromTy, LLT ToTy, const DataLayout &DL,
3054 LLVMContext &Ctx) const {
3055 return isZExtFree(FromTy: getApproximateEVTForLLT(Ty: FromTy, DL, Ctx),
3056 ToTy: getApproximateEVTForLLT(Ty: ToTy, DL, Ctx));
3057 }
3058
3059 /// Return true if zero-extending the specific node Val to type VT2 is free
3060 /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or
3061 /// because it's folded such as X86 zero-extending loads).
3062 virtual bool isZExtFree(SDValue Val, EVT VT2) const {
3063 return isZExtFree(FromTy: Val.getValueType(), ToTy: VT2);
3064 }
3065
3066 /// Return true if sign-extension from FromTy to ToTy is cheaper than
3067 /// zero-extension.
3068 virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const {
3069 return false;
3070 }
3071
3072 /// Return true if this constant should be sign extended when promoting to
3073 /// a larger type.
3074 virtual bool signExtendConstant(const ConstantInt *C) const { return false; }
3075
3076 /// Return true if sinking I's operands to the same basic block as I is
3077 /// profitable, e.g. because the operands can be folded into a target
3078 /// instruction during instruction selection. After calling the function
3079 /// \p Ops contains the Uses to sink ordered by dominance (dominating users
3080 /// come first).
3081 virtual bool shouldSinkOperands(Instruction *I,
3082 SmallVectorImpl<Use *> &Ops) const {
3083 return false;
3084 }
3085
3086 /// Try to optimize extending or truncating conversion instructions (like
3087 /// zext, trunc, fptoui, uitofp) for the target.
3088 virtual bool
3089 optimizeExtendOrTruncateConversion(Instruction *I, Loop *L,
3090 const TargetTransformInfo &TTI) const {
3091 return false;
3092 }
3093
3094 /// Return true if the target supplies and combines to a paired load
3095 /// two loaded values of type LoadedType next to each other in memory.
3096 /// RequiredAlignment gives the minimal alignment constraints that must be met
3097 /// to be able to select this paired load.
3098 ///
3099 /// This information is *not* used to generate actual paired loads, but it is
3100 /// used to generate a sequence of loads that is easier to combine into a
3101 /// paired load.
3102 /// For instance, something like this:
3103 /// a = load i64* addr
3104 /// b = trunc i64 a to i32
3105 /// c = lshr i64 a, 32
3106 /// d = trunc i64 c to i32
3107 /// will be optimized into:
3108 /// b = load i32* addr1
3109 /// d = load i32* addr2
3110 /// Where addr1 = addr2 +/- sizeof(i32).
3111 ///
3112 /// In other words, unless the target performs a post-isel load combining,
3113 /// this information should not be provided because it will generate more
3114 /// loads.
3115 virtual bool hasPairedLoad(EVT /*LoadedType*/,
3116 Align & /*RequiredAlignment*/) const {
3117 return false;
3118 }
3119
3120 /// Return true if the target has a vector blend instruction.
3121 virtual bool hasVectorBlend() const { return false; }
3122
3123 /// Get the maximum supported factor for interleaved memory accesses.
3124 /// Default to be the minimum interleave factor: 2.
3125 virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; }
3126
3127 /// Lower an interleaved load to target specific intrinsics. Return
3128 /// true on success.
3129 ///
3130 /// \p LI is the vector load instruction.
3131 /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector.
3132 /// \p Indices is the corresponding indices for each shufflevector.
3133 /// \p Factor is the interleave factor.
3134 virtual bool lowerInterleavedLoad(LoadInst *LI,
3135 ArrayRef<ShuffleVectorInst *> Shuffles,
3136 ArrayRef<unsigned> Indices,
3137 unsigned Factor) const {
3138 return false;
3139 }
3140
3141 /// Lower an interleaved store to target specific intrinsics. Return
3142 /// true on success.
3143 ///
3144 /// \p SI is the vector store instruction.
3145 /// \p SVI is the shufflevector to RE-interleave the stored vector.
3146 /// \p Factor is the interleave factor.
3147 virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
3148 unsigned Factor) const {
3149 return false;
3150 }
3151
3152 /// Lower a deinterleave intrinsic to a target specific load intrinsic.
3153 /// Return true on success. Currently only supports
3154 /// llvm.vector.deinterleave2
3155 ///
3156 /// \p DI is the deinterleave intrinsic.
3157 /// \p LI is the accompanying load instruction
3158 virtual bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
3159 LoadInst *LI) const {
3160 return false;
3161 }
3162
3163 /// Lower an interleave intrinsic to a target specific store intrinsic.
3164 /// Return true on success. Currently only supports
3165 /// llvm.vector.interleave2
3166 ///
3167 /// \p II is the interleave intrinsic.
3168 /// \p SI is the accompanying store instruction
3169 virtual bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
3170 StoreInst *SI) const {
3171 return false;
3172 }
3173
3174 /// Return true if an fpext operation is free (for instance, because
3175 /// single-precision floating-point numbers are implicitly extended to
3176 /// double-precision).
3177 virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const {
3178 assert(SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() &&
3179 "invalid fpext types");
3180 return false;
3181 }
3182
3183 /// Return true if an fpext operation input to an \p Opcode operation is free
3184 /// (for instance, because half-precision floating-point numbers are
3185 /// implicitly extended to float-precision) for an FMA instruction.
3186 virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode,
3187 LLT DestTy, LLT SrcTy) const {
3188 return false;
3189 }
3190
3191 /// Return true if an fpext operation input to an \p Opcode operation is free
3192 /// (for instance, because half-precision floating-point numbers are
3193 /// implicitly extended to float-precision) for an FMA instruction.
3194 virtual bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode,
3195 EVT DestVT, EVT SrcVT) const {
3196 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
3197 "invalid fpext types");
3198 return isFPExtFree(DestVT, SrcVT);
3199 }
3200
3201 /// Return true if folding a vector load into ExtVal (a sign, zero, or any
3202 /// extend node) is profitable.
3203 virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; }
3204
3205 /// Return true if an fneg operation is free to the point where it is never
3206 /// worthwhile to replace it with a bitwise operation.
3207 virtual bool isFNegFree(EVT VT) const {
3208 assert(VT.isFloatingPoint());
3209 return false;
3210 }
3211
3212 /// Return true if an fabs operation is free to the point where it is never
3213 /// worthwhile to replace it with a bitwise operation.
3214 virtual bool isFAbsFree(EVT VT) const {
3215 assert(VT.isFloatingPoint());
3216 return false;
3217 }
3218
3219 /// Return true if an FMA operation is faster than a pair of fmul and fadd
3220 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
3221 /// returns true, otherwise fmuladd is expanded to fmul + fadd.
3222 ///
3223 /// NOTE: This may be called before legalization on types for which FMAs are
3224 /// not legal, but should return true if those types will eventually legalize
3225 /// to types that support FMAs. After legalization, it will only be called on
3226 /// types that support FMAs (via Legal or Custom actions)
3227 virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
3228 EVT) const {
3229 return false;
3230 }
3231
3232 /// Return true if an FMA operation is faster than a pair of fmul and fadd
3233 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
3234 /// returns true, otherwise fmuladd is expanded to fmul + fadd.
3235 ///
3236 /// NOTE: This may be called before legalization on types for which FMAs are
3237 /// not legal, but should return true if those types will eventually legalize
3238 /// to types that support FMAs. After legalization, it will only be called on
3239 /// types that support FMAs (via Legal or Custom actions)
3240 virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
3241 LLT) const {
3242 return false;
3243 }
3244
3245 /// IR version
3246 virtual bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *) const {
3247 return false;
3248 }
3249
3250 /// Returns true if \p MI can be combined with another instruction to
3251 /// form TargetOpcode::G_FMAD. \p N may be an TargetOpcode::G_FADD,
3252 /// TargetOpcode::G_FSUB, or an TargetOpcode::G_FMUL which will be
3253 /// distributed into an fadd/fsub.
3254 virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const {
3255 assert((MI.getOpcode() == TargetOpcode::G_FADD ||
3256 MI.getOpcode() == TargetOpcode::G_FSUB ||
3257 MI.getOpcode() == TargetOpcode::G_FMUL) &&
3258 "unexpected node in FMAD forming combine");
3259 switch (Ty.getScalarSizeInBits()) {
3260 case 16:
3261 return isOperationLegal(Op: TargetOpcode::G_FMAD, VT: MVT::f16);
3262 case 32:
3263 return isOperationLegal(Op: TargetOpcode::G_FMAD, VT: MVT::f32);
3264 case 64:
3265 return isOperationLegal(Op: TargetOpcode::G_FMAD, VT: MVT::f64);
3266 default:
3267 break;
3268 }
3269
3270 return false;
3271 }
3272
3273 /// Returns true if be combined with to form an ISD::FMAD. \p N may be an
3274 /// ISD::FADD, ISD::FSUB, or an ISD::FMUL which will be distributed into an
3275 /// fadd/fsub.
3276 virtual bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const {
3277 assert((N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB ||
3278 N->getOpcode() == ISD::FMUL) &&
3279 "unexpected node in FMAD forming combine");
3280 return isOperationLegal(Op: ISD::FMAD, VT: N->getValueType(ResNo: 0));
3281 }
3282
3283 // Return true when the decision to generate FMA's (or FMS, FMLA etc) rather
3284 // than FMUL and ADD is delegated to the machine combiner.
3285 virtual bool generateFMAsInMachineCombiner(EVT VT,
3286 CodeGenOptLevel OptLevel) const {
3287 return false;
3288 }
3289
3290 /// Return true if it's profitable to narrow operations of type SrcVT to
3291 /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not from
3292 /// i32 to i16.
3293 virtual bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
3294 return false;
3295 }
3296
3297 /// Return true if pulling a binary operation into a select with an identity
3298 /// constant is profitable. This is the inverse of an IR transform.
3299 /// Example: X + (Cond ? Y : 0) --> Cond ? (X + Y) : X
3300 virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
3301 EVT VT) const {
3302 return false;
3303 }
3304
3305 /// Return true if it is beneficial to convert a load of a constant to
3306 /// just the constant itself.
3307 /// On some targets it might be more efficient to use a combination of
3308 /// arithmetic instructions to materialize the constant instead of loading it
3309 /// from a constant pool.
3310 virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
3311 Type *Ty) const {
3312 return false;
3313 }
3314
3315 /// Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type
3316 /// from this source type with this index. This is needed because
3317 /// EXTRACT_SUBVECTOR usually has custom lowering that depends on the index of
3318 /// the first element, and only the target knows which lowering is cheap.
3319 virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
3320 unsigned Index) const {
3321 return false;
3322 }
3323
3324 /// Try to convert an extract element of a vector binary operation into an
3325 /// extract element followed by a scalar operation.
3326 virtual bool shouldScalarizeBinop(SDValue VecOp) const {
3327 return false;
3328 }
3329
3330 /// Return true if extraction of a scalar element from the given vector type
3331 /// at the given index is cheap. For example, if scalar operations occur on
3332 /// the same register file as vector operations, then an extract element may
3333 /// be a sub-register rename rather than an actual instruction.
3334 virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const {
3335 return false;
3336 }
3337
3338 /// Try to convert math with an overflow comparison into the corresponding DAG
3339 /// node operation. Targets may want to override this independently of whether
3340 /// the operation is legal/custom for the given type because it may obscure
3341 /// matching of other patterns.
3342 virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
3343 bool MathUsed) const {
3344 // TODO: The default logic is inherited from code in CodeGenPrepare.
3345 // The opcode should not make a difference by default?
3346 if (Opcode != ISD::UADDO)
3347 return false;
3348
3349 // Allow the transform as long as we have an integer type that is not
3350 // obviously illegal and unsupported and if the math result is used
3351 // besides the overflow check. On some targets (e.g. SPARC), it is
3352 // not profitable to form on overflow op if the math result has no
3353 // concrete users.
3354 if (VT.isVector())
3355 return false;
3356 return MathUsed && (VT.isSimple() || !isOperationExpand(Op: Opcode, VT));
3357 }
3358
3359 // Return true if it is profitable to use a scalar input to a BUILD_VECTOR
3360 // even if the vector itself has multiple uses.
3361 virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const {
3362 return false;
3363 }
3364
3365 // Return true if CodeGenPrepare should consider splitting large offset of a
3366 // GEP to make the GEP fit into the addressing mode and can be sunk into the
3367 // same blocks of its users.
3368 virtual bool shouldConsiderGEPOffsetSplit() const { return false; }
3369
3370 /// Return true if creating a shift of the type by the given
3371 /// amount is not profitable.
3372 virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const {
3373 return false;
3374 }
3375
3376 // Should we fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x))
3377 // A) where y has a single bit set?
3378 virtual bool shouldFoldSelectWithSingleBitTest(EVT VT,
3379 const APInt &AndMask) const {
3380 unsigned ShCt = AndMask.getBitWidth() - 1;
3381 return !shouldAvoidTransformToShift(VT, Amount: ShCt);
3382 }
3383
3384 /// Does this target require the clearing of high-order bits in a register
3385 /// passed to the fp16 to fp conversion library function.
3386 virtual bool shouldKeepZExtForFP16Conv() const { return false; }
3387
3388 /// Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT
3389 /// from min(max(fptoi)) saturation patterns.
3390 virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const {
3391 return isOperationLegalOrCustom(Op, VT);
3392 }
3393
3394 /// Should we expand [US]CMP nodes using two selects and two compares, or by
3395 /// doing arithmetic on boolean types
3396 virtual bool shouldExpandCmpUsingSelects() const { return false; }
3397
3398 /// Does this target support complex deinterleaving
3399 virtual bool isComplexDeinterleavingSupported() const { return false; }
3400
3401 /// Does this target support complex deinterleaving with the given operation
3402 /// and type
3403 virtual bool isComplexDeinterleavingOperationSupported(
3404 ComplexDeinterleavingOperation Operation, Type *Ty) const {
3405 return false;
3406 }
3407
3408 /// Create the IR node for the given complex deinterleaving operation.
3409 /// If one cannot be created using all the given inputs, nullptr should be
3410 /// returned.
3411 virtual Value *createComplexDeinterleavingIR(
3412 IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
3413 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
3414 Value *Accumulator = nullptr) const {
3415 return nullptr;
3416 }
3417
3418 /// Rename the default libcall routine name for the specified libcall.
3419 void setLibcallName(RTLIB::Libcall Call, const char *Name) {
3420 Libcalls.setLibcallName(Call, Name);
3421 }
3422
3423 void setLibcallName(ArrayRef<RTLIB::Libcall> Calls, const char *Name) {
3424 Libcalls.setLibcallName(Calls, Name);
3425 }
3426
3427 /// Get the libcall routine name for the specified libcall.
3428 const char *getLibcallName(RTLIB::Libcall Call) const {
3429 return Libcalls.getLibcallName(Call);
3430 }
3431
3432 /// Override the default CondCode to be used to test the result of the
3433 /// comparison libcall against zero.
3434 /// FIXME: This can't be merged with 'RuntimeLibcallsInfo' because of the ISD.
3435 void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) {
3436 CmpLibcallCCs[Call] = CC;
3437 }
3438
3439
3440 /// Get the CondCode that's to be used to test the result of the comparison
3441 /// libcall against zero.
3442 /// FIXME: This can't be merged with 'RuntimeLibcallsInfo' because of the ISD.
3443 ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const {
3444 return CmpLibcallCCs[Call];
3445 }
3446
3447
3448 /// Set the CallingConv that should be used for the specified libcall.
3449 void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) {
3450 Libcalls.setLibcallCallingConv(Call, CC);
3451 }
3452
3453 /// Get the CallingConv that should be used for the specified libcall.
3454 CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const {
3455 return Libcalls.getLibcallCallingConv(Call);
3456 }
3457
3458 /// Execute target specific actions to finalize target lowering.
3459 /// This is used to set extra flags in MachineFrameInformation and freezing
3460 /// the set of reserved registers.
3461 /// The default implementation just freezes the set of reserved registers.
3462 virtual void finalizeLowering(MachineFunction &MF) const;
3463
3464 //===----------------------------------------------------------------------===//
3465 // GlobalISel Hooks
3466 //===----------------------------------------------------------------------===//
3467 /// Check whether or not \p MI needs to be moved close to its uses.
3468 virtual bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const;
3469
3470
3471private:
3472 const TargetMachine &TM;
3473
3474 /// Tells the code generator that the target has multiple (allocatable)
3475 /// condition registers that can be used to store the results of comparisons
3476 /// for use by selects and conditional branches. With multiple condition
3477 /// registers, the code generator will not aggressively sink comparisons into
3478 /// the blocks of their users.
3479 bool HasMultipleConditionRegisters;
3480
3481 /// Tells the code generator that the target has BitExtract instructions.
3482 /// The code generator will aggressively sink "shift"s into the blocks of
3483 /// their users if the users will generate "and" instructions which can be
3484 /// combined with "shift" to BitExtract instructions.
3485 bool HasExtractBitsInsn;
3486
3487 /// Tells the code generator to bypass slow divide or remainder
3488 /// instructions. For example, BypassSlowDivWidths[32,8] tells the code
3489 /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer
3490 /// div/rem when the operands are positive and less than 256.
3491 DenseMap <unsigned int, unsigned int> BypassSlowDivWidths;
3492
3493 /// Tells the code generator that it shouldn't generate extra flow control
3494 /// instructions and should attempt to combine flow control instructions via
3495 /// predication.
3496 bool JumpIsExpensive;
3497
3498 /// Information about the contents of the high-bits in boolean values held in
3499 /// a type wider than i1. See getBooleanContents.
3500 BooleanContent BooleanContents;
3501
3502 /// Information about the contents of the high-bits in boolean values held in
3503 /// a type wider than i1. See getBooleanContents.
3504 BooleanContent BooleanFloatContents;
3505
3506 /// Information about the contents of the high-bits in boolean vector values
3507 /// when the element type is wider than i1. See getBooleanContents.
3508 BooleanContent BooleanVectorContents;
3509
3510 /// The target scheduling preference: shortest possible total cycles or lowest
3511 /// register usage.
3512 Sched::Preference SchedPreferenceInfo;
3513
3514 /// The minimum alignment that any argument on the stack needs to have.
3515 Align MinStackArgumentAlignment;
3516
3517 /// The minimum function alignment (used when optimizing for size, and to
3518 /// prevent explicitly provided alignment from leading to incorrect code).
3519 Align MinFunctionAlignment;
3520
3521 /// The preferred function alignment (used when alignment unspecified and
3522 /// optimizing for speed).
3523 Align PrefFunctionAlignment;
3524
3525 /// The preferred loop alignment (in log2 bot in bytes).
3526 Align PrefLoopAlignment;
3527 /// The maximum amount of bytes permitted to be emitted for alignment.
3528 unsigned MaxBytesForAlignment;
3529
3530 /// Size in bits of the maximum atomics size the backend supports.
3531 /// Accesses larger than this will be expanded by AtomicExpandPass.
3532 unsigned MaxAtomicSizeInBitsSupported;
3533
3534 /// Size in bits of the maximum div/rem size the backend supports.
3535 /// Larger operations will be expanded by ExpandLargeDivRem.
3536 unsigned MaxDivRemBitWidthSupported;
3537
3538 /// Size in bits of the maximum larget fp convert size the backend
3539 /// supports. Larger operations will be expanded by ExpandLargeFPConvert.
3540 unsigned MaxLargeFPConvertBitWidthSupported;
3541
3542 /// Size in bits of the minimum cmpxchg or ll/sc operation the
3543 /// backend supports.
3544 unsigned MinCmpXchgSizeInBits;
3545
3546 /// This indicates if the target supports unaligned atomic operations.
3547 bool SupportsUnalignedAtomics;
3548
3549 /// If set to a physical register, this specifies the register that
3550 /// llvm.savestack/llvm.restorestack should save and restore.
3551 Register StackPointerRegisterToSaveRestore;
3552
3553 /// This indicates the default register class to use for each ValueType the
3554 /// target supports natively.
3555 const TargetRegisterClass *RegClassForVT[MVT::VALUETYPE_SIZE];
3556 uint16_t NumRegistersForVT[MVT::VALUETYPE_SIZE];
3557 MVT RegisterTypeForVT[MVT::VALUETYPE_SIZE];
3558
3559 /// This indicates the "representative" register class to use for each
3560 /// ValueType the target supports natively. This information is used by the
3561 /// scheduler to track register pressure. By default, the representative
3562 /// register class is the largest legal super-reg register class of the
3563 /// register class of the specified type. e.g. On x86, i8, i16, and i32's
3564 /// representative class would be GR32.
3565 const TargetRegisterClass *RepRegClassForVT[MVT::VALUETYPE_SIZE] = {0};
3566
3567 /// This indicates the "cost" of the "representative" register class for each
3568 /// ValueType. The cost is used by the scheduler to approximate register
3569 /// pressure.
3570 uint8_t RepRegClassCostForVT[MVT::VALUETYPE_SIZE];
3571
3572 /// For any value types we are promoting or expanding, this contains the value
3573 /// type that we are changing to. For Expanded types, this contains one step
3574 /// of the expand (e.g. i64 -> i32), even if there are multiple steps required
3575 /// (e.g. i64 -> i16). For types natively supported by the system, this holds
3576 /// the same type (e.g. i32 -> i32).
3577 MVT TransformToType[MVT::VALUETYPE_SIZE];
3578
3579 /// For each operation and each value type, keep a LegalizeAction that
3580 /// indicates how instruction selection should deal with the operation. Most
3581 /// operations are Legal (aka, supported natively by the target), but
3582 /// operations that are not should be described. Note that operations on
3583 /// non-legal value types are not described here.
3584 LegalizeAction OpActions[MVT::VALUETYPE_SIZE][ISD::BUILTIN_OP_END];
3585
3586 /// For each load extension type and each value type, keep a LegalizeAction
3587 /// that indicates how instruction selection should deal with a load of a
3588 /// specific value type and extension type. Uses 4-bits to store the action
3589 /// for each of the 4 load ext types.
3590 uint16_t LoadExtActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE];
3591
3592 /// Similar to LoadExtActions, but for atomic loads. Only Legal or Expand
3593 /// (default) values are supported.
3594 uint16_t AtomicLoadExtActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE];
3595
3596 /// For each value type pair keep a LegalizeAction that indicates whether a
3597 /// truncating store of a specific value type and truncating type is legal.
3598 LegalizeAction TruncStoreActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE];
3599
3600 /// For each indexed mode and each value type, keep a quad of LegalizeAction
3601 /// that indicates how instruction selection should deal with the load /
3602 /// store / maskedload / maskedstore.
3603 ///
3604 /// The first dimension is the value_type for the reference. The second
3605 /// dimension represents the various modes for load store.
3606 uint16_t IndexedModeActions[MVT::VALUETYPE_SIZE][ISD::LAST_INDEXED_MODE];
3607
3608 /// For each condition code (ISD::CondCode) keep a LegalizeAction that
3609 /// indicates how instruction selection should deal with the condition code.
3610 ///
3611 /// Because each CC action takes up 4 bits, we need to have the array size be
3612 /// large enough to fit all of the value types. This can be done by rounding
3613 /// up the MVT::VALUETYPE_SIZE value to the next multiple of 8.
3614 uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::VALUETYPE_SIZE + 7) / 8];
3615
3616 ValueTypeActionImpl ValueTypeActions;
3617
3618private:
3619 /// Targets can specify ISD nodes that they would like PerformDAGCombine
3620 /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this
3621 /// array.
3622 unsigned char
3623 TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT-1)/CHAR_BIT];
3624
3625 /// For operations that must be promoted to a specific type, this holds the
3626 /// destination type. This map should be sparse, so don't hold it as an
3627 /// array.
3628 ///
3629 /// Targets add entries to this map with AddPromotedToType(..), clients access
3630 /// this with getTypeToPromoteTo(..).
3631 std::map<std::pair<unsigned, MVT::SimpleValueType>, MVT::SimpleValueType>
3632 PromoteToType;
3633
3634 /// The list of libcalls that the target will use.
3635 RTLIB::RuntimeLibcallsInfo Libcalls;
3636
3637 /// The ISD::CondCode that should be used to test the result of each of the
3638 /// comparison libcall against zero.
3639 ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL];
3640
3641 /// The bits of IndexedModeActions used to store the legalisation actions
3642 /// We store the data as | ML | MS | L | S | each taking 4 bits.
3643 enum IndexedModeActionsBits {
3644 IMAB_Store = 0,
3645 IMAB_Load = 4,
3646 IMAB_MaskedStore = 8,
3647 IMAB_MaskedLoad = 12
3648 };
3649
3650 void setIndexedModeAction(unsigned IdxMode, MVT VT, unsigned Shift,
3651 LegalizeAction Action) {
3652 assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&
3653 (unsigned)Action < 0xf && "Table isn't big enough!");
3654 unsigned Ty = (unsigned)VT.SimpleTy;
3655 IndexedModeActions[Ty][IdxMode] &= ~(0xf << Shift);
3656 IndexedModeActions[Ty][IdxMode] |= ((uint16_t)Action) << Shift;
3657 }
3658
3659 LegalizeAction getIndexedModeAction(unsigned IdxMode, MVT VT,
3660 unsigned Shift) const {
3661 assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&
3662 "Table isn't big enough!");
3663 unsigned Ty = (unsigned)VT.SimpleTy;
3664 return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] >> Shift) & 0xf);
3665 }
3666
3667protected:
3668 /// Return true if the extension represented by \p I is free.
3669 /// \pre \p I is a sign, zero, or fp extension and
3670 /// is[Z|FP]ExtFree of the related types is not true.
3671 virtual bool isExtFreeImpl(const Instruction *I) const { return false; }
3672
3673 /// Depth that GatherAllAliases should continue looking for chain
3674 /// dependencies when trying to find a more preferable chain. As an
3675 /// approximation, this should be more than the number of consecutive stores
3676 /// expected to be merged.
3677 unsigned GatherAllAliasesMaxDepth;
3678
3679 /// \brief Specify maximum number of store instructions per memset call.
3680 ///
3681 /// When lowering \@llvm.memset this field specifies the maximum number of
3682 /// store operations that may be substituted for the call to memset. Targets
3683 /// must set this value based on the cost threshold for that target. Targets
3684 /// should assume that the memset will be done using as many of the largest
3685 /// store operations first, followed by smaller ones, if necessary, per
3686 /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine
3687 /// with 16-bit alignment would result in four 2-byte stores and one 1-byte
3688 /// store. This only applies to setting a constant array of a constant size.
3689 unsigned MaxStoresPerMemset;
3690 /// Likewise for functions with the OptSize attribute.
3691 unsigned MaxStoresPerMemsetOptSize;
3692
3693 /// \brief Specify maximum number of store instructions per memcpy call.
3694 ///
3695 /// When lowering \@llvm.memcpy this field specifies the maximum number of
3696 /// store operations that may be substituted for a call to memcpy. Targets
3697 /// must set this value based on the cost threshold for that target. Targets
3698 /// should assume that the memcpy will be done using as many of the largest
3699 /// store operations first, followed by smaller ones, if necessary, per
3700 /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine
3701 /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store
3702 /// and one 1-byte store. This only applies to copying a constant array of
3703 /// constant size.
3704 unsigned MaxStoresPerMemcpy;
3705 /// Likewise for functions with the OptSize attribute.
3706 unsigned MaxStoresPerMemcpyOptSize;
3707 /// \brief Specify max number of store instructions to glue in inlined memcpy.
3708 ///
3709 /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number
3710 /// of store instructions to keep together. This helps in pairing and
3711 // vectorization later on.
3712 unsigned MaxGluedStoresPerMemcpy = 0;
3713
3714 /// \brief Specify maximum number of load instructions per memcmp call.
3715 ///
3716 /// When lowering \@llvm.memcmp this field specifies the maximum number of
3717 /// pairs of load operations that may be substituted for a call to memcmp.
3718 /// Targets must set this value based on the cost threshold for that target.
3719 /// Targets should assume that the memcmp will be done using as many of the
3720 /// largest load operations first, followed by smaller ones, if necessary, per
3721 /// alignment restrictions. For example, loading 7 bytes on a 32-bit machine
3722 /// with 32-bit alignment would result in one 4-byte load, a one 2-byte load
3723 /// and one 1-byte load. This only applies to copying a constant array of
3724 /// constant size.
3725 unsigned MaxLoadsPerMemcmp;
3726 /// Likewise for functions with the OptSize attribute.
3727 unsigned MaxLoadsPerMemcmpOptSize;
3728
3729 /// \brief Specify maximum number of store instructions per memmove call.
3730 ///
3731 /// When lowering \@llvm.memmove this field specifies the maximum number of
3732 /// store instructions that may be substituted for a call to memmove. Targets
3733 /// must set this value based on the cost threshold for that target. Targets
3734 /// should assume that the memmove will be done using as many of the largest
3735 /// store operations first, followed by smaller ones, if necessary, per
3736 /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine
3737 /// with 8-bit alignment would result in nine 1-byte stores. This only
3738 /// applies to copying a constant array of constant size.
3739 unsigned MaxStoresPerMemmove;
3740 /// Likewise for functions with the OptSize attribute.
3741 unsigned MaxStoresPerMemmoveOptSize;
3742
3743 /// Tells the code generator that select is more expensive than a branch if
3744 /// the branch is usually predicted right.
3745 bool PredictableSelectIsExpensive;
3746
3747 /// \see enableExtLdPromotion.
3748 bool EnableExtLdPromotion;
3749
3750 /// Return true if the value types that can be represented by the specified
3751 /// register class are all legal.
3752 bool isLegalRC(const TargetRegisterInfo &TRI,
3753 const TargetRegisterClass &RC) const;
3754
3755 /// Replace/modify any TargetFrameIndex operands with a targte-dependent
3756 /// sequence of memory operands that is recognized by PrologEpilogInserter.
3757 MachineBasicBlock *emitPatchPoint(MachineInstr &MI,
3758 MachineBasicBlock *MBB) const;
3759
3760 bool IsStrictFPEnabled;
3761};
3762
3763/// This class defines information used to lower LLVM code to legal SelectionDAG
3764/// operators that the target instruction selector can accept natively.
3765///
3766/// This class also defines callbacks that targets must implement to lower
3767/// target-specific constructs to SelectionDAG operators.
3768class TargetLowering : public TargetLoweringBase {
3769public:
3770 struct DAGCombinerInfo;
3771 struct MakeLibCallOptions;
3772
3773 TargetLowering(const TargetLowering &) = delete;
3774 TargetLowering &operator=(const TargetLowering &) = delete;
3775
3776 explicit TargetLowering(const TargetMachine &TM);
3777
3778 bool isPositionIndependent() const;
3779
3780 virtual bool isSDNodeSourceOfDivergence(const SDNode *N,
3781 FunctionLoweringInfo *FLI,
3782 UniformityInfo *UA) const {
3783 return false;
3784 }
3785
3786 // Lets target to control the following reassociation of operands: (op (op x,
3787 // c1), y) -> (op (op x, y), c1) where N0 is (op x, c1) and N1 is y. By
3788 // default consider profitable any case where N0 has single use. This
3789 // behavior reflects the condition replaced by this target hook call in the
3790 // DAGCombiner. Any particular target can implement its own heuristic to
3791 // restrict common combiner.
3792 virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
3793 SDValue N1) const {
3794 return N0.hasOneUse();
3795 }
3796
3797 // Lets target to control the following reassociation of operands: (op (op x,
3798 // c1), y) -> (op (op x, y), c1) where N0 is (op x, c1) and N1 is y. By
3799 // default consider profitable any case where N0 has single use. This
3800 // behavior reflects the condition replaced by this target hook call in the
3801 // combiner. Any particular target can implement its own heuristic to
3802 // restrict common combiner.
3803 virtual bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0,
3804 Register N1) const {
3805 return MRI.hasOneNonDBGUse(RegNo: N0);
3806 }
3807
3808 virtual bool isSDNodeAlwaysUniform(const SDNode * N) const {
3809 return false;
3810 }
3811
3812 /// Returns true by value, base pointer and offset pointer and addressing mode
3813 /// by reference if the node's address can be legally represented as
3814 /// pre-indexed load / store address.
3815 virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/,
3816 SDValue &/*Offset*/,
3817 ISD::MemIndexedMode &/*AM*/,
3818 SelectionDAG &/*DAG*/) const {
3819 return false;
3820 }
3821
3822 /// Returns true by value, base pointer and offset pointer and addressing mode
3823 /// by reference if this node can be combined with a load / store to form a
3824 /// post-indexed load / store.
3825 virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/,
3826 SDValue &/*Base*/,
3827 SDValue &/*Offset*/,
3828 ISD::MemIndexedMode &/*AM*/,
3829 SelectionDAG &/*DAG*/) const {
3830 return false;
3831 }
3832
3833 /// Returns true if the specified base+offset is a legal indexed addressing
3834 /// mode for this target. \p MI is the load or store instruction that is being
3835 /// considered for transformation.
3836 virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
3837 bool IsPre, MachineRegisterInfo &MRI) const {
3838 return false;
3839 }
3840
3841 /// Return the entry encoding for a jump table in the current function. The
3842 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
3843 virtual unsigned getJumpTableEncoding() const;
3844
3845 virtual const MCExpr *
3846 LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/,
3847 const MachineBasicBlock * /*MBB*/, unsigned /*uid*/,
3848 MCContext &/*Ctx*/) const {
3849 llvm_unreachable("Need to implement this hook if target has custom JTIs");
3850 }
3851
3852 /// Returns relocation base for the given PIC jumptable.
3853 virtual SDValue getPICJumpTableRelocBase(SDValue Table,
3854 SelectionDAG &DAG) const;
3855
3856 /// This returns the relocation base for the given PIC jumptable, the same as
3857 /// getPICJumpTableRelocBase, but as an MCExpr.
3858 virtual const MCExpr *
3859 getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
3860 unsigned JTI, MCContext &Ctx) const;
3861
3862 /// Return true if folding a constant offset with the given GlobalAddress is
3863 /// legal. It is frequently not legal in PIC relocation models.
3864 virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
3865
3866 /// On x86, return true if the operand with index OpNo is a CALL or JUMP
3867 /// instruction, which can use either a memory constraint or an address
3868 /// constraint. -fasm-blocks "__asm call foo" lowers to
3869 /// call void asm sideeffect inteldialect "call ${0:P}", "*m..."
3870 ///
3871 /// This function is used by a hack to choose the address constraint,
3872 /// lowering to a direct call.
3873 virtual bool
3874 isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs,
3875 unsigned OpNo) const {
3876 return false;
3877 }
3878
3879 bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
3880 SDValue &Chain) const;
3881
3882 void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS,
3883 SDValue &NewRHS, ISD::CondCode &CCCode,
3884 const SDLoc &DL, const SDValue OldLHS,
3885 const SDValue OldRHS) const;
3886
3887 void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS,
3888 SDValue &NewRHS, ISD::CondCode &CCCode,
3889 const SDLoc &DL, const SDValue OldLHS,
3890 const SDValue OldRHS, SDValue &Chain,
3891 bool IsSignaling = false) const;
3892
3893 virtual SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL,
3894 SDValue Chain, MachineMemOperand *MMO,
3895 SDValue &NewLoad, SDValue Ptr,
3896 SDValue PassThru, SDValue Mask) const {
3897 llvm_unreachable("Not Implemented");
3898 }
3899
3900 virtual SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL,
3901 SDValue Chain, MachineMemOperand *MMO,
3902 SDValue Ptr, SDValue Val,
3903 SDValue Mask) const {
3904 llvm_unreachable("Not Implemented");
3905 }
3906
3907 /// Returns a pair of (return value, chain).
3908 /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC.
3909 std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC,
3910 EVT RetVT, ArrayRef<SDValue> Ops,
3911 MakeLibCallOptions CallOptions,
3912 const SDLoc &dl,
3913 SDValue Chain = SDValue()) const;
3914
3915 /// Check whether parameters to a call that are passed in callee saved
3916 /// registers are the same as from the calling function. This needs to be
3917 /// checked for tail call eligibility.
3918 bool parametersInCSRMatch(const MachineRegisterInfo &MRI,
3919 const uint32_t *CallerPreservedMask,
3920 const SmallVectorImpl<CCValAssign> &ArgLocs,
3921 const SmallVectorImpl<SDValue> &OutVals) const;
3922
3923 //===--------------------------------------------------------------------===//
3924 // TargetLowering Optimization Methods
3925 //
3926
3927 /// A convenience struct that encapsulates a DAG, and two SDValues for
3928 /// returning information from TargetLowering to its clients that want to
3929 /// combine.
3930 struct TargetLoweringOpt {
3931 SelectionDAG &DAG;
3932 bool LegalTys;
3933 bool LegalOps;
3934 SDValue Old;
3935 SDValue New;
3936
3937 explicit TargetLoweringOpt(SelectionDAG &InDAG,
3938 bool LT, bool LO) :
3939 DAG(InDAG), LegalTys(LT), LegalOps(LO) {}
3940
3941 bool LegalTypes() const { return LegalTys; }
3942 bool LegalOperations() const { return LegalOps; }
3943
3944 bool CombineTo(SDValue O, SDValue N) {
3945 Old = O;
3946 New = N;
3947 return true;
3948 }
3949 };
3950
3951 /// Determines the optimal series of memory ops to replace the memset / memcpy.
3952 /// Return true if the number of memory ops is below the threshold (Limit).
3953 /// Note that this is always the case when Limit is ~0.
3954 /// It returns the types of the sequence of memory ops to perform
3955 /// memset / memcpy by reference.
3956 virtual bool
3957 findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
3958 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
3959 const AttributeList &FuncAttributes) const;
3960
3961 /// Check to see if the specified operand of the specified instruction is a
3962 /// constant integer. If so, check to see if there are any bits set in the
3963 /// constant that are not demanded. If so, shrink the constant and return
3964 /// true.
3965 bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
3966 const APInt &DemandedElts,
3967 TargetLoweringOpt &TLO) const;
3968
3969 /// Helper wrapper around ShrinkDemandedConstant, demanding all elements.
3970 bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
3971 TargetLoweringOpt &TLO) const;
3972
3973 // Target hook to do target-specific const optimization, which is called by
3974 // ShrinkDemandedConstant. This function should return true if the target
3975 // doesn't want ShrinkDemandedConstant to further optimize the constant.
3976 virtual bool targetShrinkDemandedConstant(SDValue Op,
3977 const APInt &DemandedBits,
3978 const APInt &DemandedElts,
3979 TargetLoweringOpt &TLO) const {
3980 return false;
3981 }
3982
3983 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
3984 /// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
3985 /// but it could be generalized for targets with other types of implicit
3986 /// widening casts.
3987 bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
3988 const APInt &DemandedBits,
3989 TargetLoweringOpt &TLO) const;
3990
3991 /// Look at Op. At this point, we know that only the DemandedBits bits of the
3992 /// result of Op are ever used downstream. If we can use this information to
3993 /// simplify Op, create a new simplified DAG node and return true, returning
3994 /// the original and new nodes in Old and New. Otherwise, analyze the
3995 /// expression and return a mask of KnownOne and KnownZero bits for the
3996 /// expression (used to simplify the caller). The KnownZero/One bits may only
3997 /// be accurate for those bits in the Demanded masks.
3998 /// \p AssumeSingleUse When this parameter is true, this function will
3999 /// attempt to simplify \p Op even if there are multiple uses.
4000 /// Callers are responsible for correctly updating the DAG based on the
4001 /// results of this function, because simply replacing TLO.Old
4002 /// with TLO.New will be incorrect when this parameter is true and TLO.Old
4003 /// has multiple uses.
4004 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
4005 const APInt &DemandedElts, KnownBits &Known,
4006 TargetLoweringOpt &TLO, unsigned Depth = 0,
4007 bool AssumeSingleUse = false) const;
4008
4009 /// Helper wrapper around SimplifyDemandedBits, demanding all elements.
4010 /// Adds Op back to the worklist upon success.
4011 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
4012 KnownBits &Known, TargetLoweringOpt &TLO,
4013 unsigned Depth = 0,
4014 bool AssumeSingleUse = false) const;
4015
4016 /// Helper wrapper around SimplifyDemandedBits.
4017 /// Adds Op back to the worklist upon success.
4018 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
4019 DAGCombinerInfo &DCI) const;
4020
4021 /// Helper wrapper around SimplifyDemandedBits.
4022 /// Adds Op back to the worklist upon success.
4023 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
4024 const APInt &DemandedElts,
4025 DAGCombinerInfo &DCI) const;
4026
4027 /// More limited version of SimplifyDemandedBits that can be used to "look
4028 /// through" ops that don't contribute to the DemandedBits/DemandedElts -
4029 /// bitwise ops etc.
4030 SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits,
4031 const APInt &DemandedElts,
4032 SelectionDAG &DAG,
4033 unsigned Depth = 0) const;
4034
4035 /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all
4036 /// elements.
4037 SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits,
4038 SelectionDAG &DAG,
4039 unsigned Depth = 0) const;
4040
4041 /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all
4042 /// bits from only some vector elements.
4043 SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op,
4044 const APInt &DemandedElts,
4045 SelectionDAG &DAG,
4046 unsigned Depth = 0) const;
4047
4048 /// Look at Vector Op. At this point, we know that only the DemandedElts
4049 /// elements of the result of Op are ever used downstream. If we can use
4050 /// this information to simplify Op, create a new simplified DAG node and
4051 /// return true, storing the original and new nodes in TLO.
4052 /// Otherwise, analyze the expression and return a mask of KnownUndef and
4053 /// KnownZero elements for the expression (used to simplify the caller).
4054 /// The KnownUndef/Zero elements may only be accurate for those bits
4055 /// in the DemandedMask.
4056 /// \p AssumeSingleUse When this parameter is true, this function will
4057 /// attempt to simplify \p Op even if there are multiple uses.
4058 /// Callers are responsible for correctly updating the DAG based on the
4059 /// results of this function, because simply replacing TLO.Old
4060 /// with TLO.New will be incorrect when this parameter is true and TLO.Old
4061 /// has multiple uses.
4062 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask,
4063 APInt &KnownUndef, APInt &KnownZero,
4064 TargetLoweringOpt &TLO, unsigned Depth = 0,
4065 bool AssumeSingleUse = false) const;
4066
4067 /// Helper wrapper around SimplifyDemandedVectorElts.
4068 /// Adds Op back to the worklist upon success.
4069 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
4070 DAGCombinerInfo &DCI) const;
4071
4072 /// Return true if the target supports simplifying demanded vector elements by
4073 /// converting them to undefs.
4074 virtual bool
4075 shouldSimplifyDemandedVectorElts(SDValue Op,
4076 const TargetLoweringOpt &TLO) const {
4077 return true;
4078 }
4079
4080 /// Determine which of the bits specified in Mask are known to be either zero
4081 /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts
4082 /// argument allows us to only collect the known bits that are shared by the
4083 /// requested vector elements.
4084 virtual void computeKnownBitsForTargetNode(const SDValue Op,
4085 KnownBits &Known,
4086 const APInt &DemandedElts,
4087 const SelectionDAG &DAG,
4088 unsigned Depth = 0) const;
4089
4090 /// Determine which of the bits specified in Mask are known to be either zero
4091 /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts
4092 /// argument allows us to only collect the known bits that are shared by the
4093 /// requested vector elements. This is for GISel.
4094 virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis,
4095 Register R, KnownBits &Known,
4096 const APInt &DemandedElts,
4097 const MachineRegisterInfo &MRI,
4098 unsigned Depth = 0) const;
4099
4100 /// Determine the known alignment for the pointer value \p R. This is can
4101 /// typically be inferred from the number of low known 0 bits. However, for a
4102 /// pointer with a non-integral address space, the alignment value may be
4103 /// independent from the known low bits.
4104 virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis,
4105 Register R,
4106 const MachineRegisterInfo &MRI,
4107 unsigned Depth = 0) const;
4108
4109 /// Determine which of the bits of FrameIndex \p FIOp are known to be 0.
4110 /// Default implementation computes low bits based on alignment
4111 /// information. This should preserve known bits passed into it.
4112 virtual void computeKnownBitsForFrameIndex(int FIOp,
4113 KnownBits &Known,
4114 const MachineFunction &MF) const;
4115
4116 /// This method can be implemented by targets that want to expose additional
4117 /// information about sign bits to the DAG Combiner. The DemandedElts
4118 /// argument allows us to only collect the minimum sign bits that are shared
4119 /// by the requested vector elements.
4120 virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
4121 const APInt &DemandedElts,
4122 const SelectionDAG &DAG,
4123 unsigned Depth = 0) const;
4124
4125 /// This method can be implemented by targets that want to expose additional
4126 /// information about sign bits to GlobalISel combiners. The DemandedElts
4127 /// argument allows us to only collect the minimum sign bits that are shared
4128 /// by the requested vector elements.
4129 virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis,
4130 Register R,
4131 const APInt &DemandedElts,
4132 const MachineRegisterInfo &MRI,
4133 unsigned Depth = 0) const;
4134
4135 /// Attempt to simplify any target nodes based on the demanded vector
4136 /// elements, returning true on success. Otherwise, analyze the expression and
4137 /// return a mask of KnownUndef and KnownZero elements for the expression
4138 /// (used to simplify the caller). The KnownUndef/Zero elements may only be
4139 /// accurate for those bits in the DemandedMask.
4140 virtual bool SimplifyDemandedVectorEltsForTargetNode(
4141 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef,
4142 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const;
4143
4144 /// Attempt to simplify any target nodes based on the demanded bits/elts,
4145 /// returning true on success. Otherwise, analyze the
4146 /// expression and return a mask of KnownOne and KnownZero bits for the
4147 /// expression (used to simplify the caller). The KnownZero/One bits may only
4148 /// be accurate for those bits in the Demanded masks.
4149 virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op,
4150 const APInt &DemandedBits,
4151 const APInt &DemandedElts,
4152 KnownBits &Known,
4153 TargetLoweringOpt &TLO,
4154 unsigned Depth = 0) const;
4155
4156 /// More limited version of SimplifyDemandedBits that can be used to "look
4157 /// through" ops that don't contribute to the DemandedBits/DemandedElts -
4158 /// bitwise ops etc.
4159 virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
4160 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
4161 SelectionDAG &DAG, unsigned Depth) const;
4162
4163 /// Return true if this function can prove that \p Op is never poison
4164 /// and, if \p PoisonOnly is false, does not have undef bits. The DemandedElts
4165 /// argument limits the check to the requested vector elements.
4166 virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(
4167 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4168 bool PoisonOnly, unsigned Depth) const;
4169
4170 /// Return true if Op can create undef or poison from non-undef & non-poison
4171 /// operands. The DemandedElts argument limits the check to the requested
4172 /// vector elements.
4173 virtual bool
4174 canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts,
4175 const SelectionDAG &DAG, bool PoisonOnly,
4176 bool ConsiderFlags, unsigned Depth) const;
4177
4178 /// Tries to build a legal vector shuffle using the provided parameters
4179 /// or equivalent variations. The Mask argument maybe be modified as the
4180 /// function tries different variations.
4181 /// Returns an empty SDValue if the operation fails.
4182 SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
4183 SDValue N1, MutableArrayRef<int> Mask,
4184 SelectionDAG &DAG) const;
4185
4186 /// This method returns the constant pool value that will be loaded by LD.
4187 /// NOTE: You must check for implicit extensions of the constant by LD.
4188 virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const;
4189
4190 /// If \p SNaN is false, \returns true if \p Op is known to never be any
4191 /// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling
4192 /// NaN.
4193 virtual bool isKnownNeverNaNForTargetNode(SDValue Op,
4194 const SelectionDAG &DAG,
4195 bool SNaN = false,
4196 unsigned Depth = 0) const;
4197
4198 /// Return true if vector \p Op has the same value across all \p DemandedElts,
4199 /// indicating any elements which may be undef in the output \p UndefElts.
4200 virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,
4201 APInt &UndefElts,
4202 const SelectionDAG &DAG,
4203 unsigned Depth = 0) const;
4204
4205 /// Returns true if the given Opc is considered a canonical constant for the
4206 /// target, which should not be transformed back into a BUILD_VECTOR.
4207 virtual bool isTargetCanonicalConstantNode(SDValue Op) const {
4208 return Op.getOpcode() == ISD::SPLAT_VECTOR ||
4209 Op.getOpcode() == ISD::SPLAT_VECTOR_PARTS;
4210 }
4211
4212 struct DAGCombinerInfo {
4213 void *DC; // The DAG Combiner object.
4214 CombineLevel Level;
4215 bool CalledByLegalizer;
4216
4217 public:
4218 SelectionDAG &DAG;
4219
4220 DAGCombinerInfo(SelectionDAG &dag, CombineLevel level, bool cl, void *dc)
4221 : DC(dc), Level(level), CalledByLegalizer(cl), DAG(dag) {}
4222
4223 bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; }
4224 bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; }
4225 bool isAfterLegalizeDAG() const { return Level >= AfterLegalizeDAG; }
4226 CombineLevel getDAGCombineLevel() { return Level; }
4227 bool isCalledByLegalizer() const { return CalledByLegalizer; }
4228
4229 void AddToWorklist(SDNode *N);
4230 SDValue CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo = true);
4231 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true);
4232 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true);
4233
4234 bool recursivelyDeleteUnusedNodes(SDNode *N);
4235
4236 void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO);
4237 };
4238
4239 /// Return if the N is a constant or constant vector equal to the true value
4240 /// from getBooleanContents().
4241 bool isConstTrueVal(SDValue N) const;
4242
4243 /// Return if the N is a constant or constant vector equal to the false value
4244 /// from getBooleanContents().
4245 bool isConstFalseVal(SDValue N) const;
4246
4247 /// Return if \p N is a True value when extended to \p VT.
4248 bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const;
4249
4250 /// Try to simplify a setcc built with the specified operands and cc. If it is
4251 /// unable to simplify it, return a null SDValue.
4252 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
4253 bool foldBooleans, DAGCombinerInfo &DCI,
4254 const SDLoc &dl) const;
4255
4256 // For targets which wrap address, unwrap for analysis.
4257 virtual SDValue unwrapAddress(SDValue N) const { return N; }
4258
4259 /// Returns true (and the GlobalValue and the offset) if the node is a
4260 /// GlobalAddress + offset.
4261 virtual bool
4262 isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
4263
4264 /// This method will be invoked for all target nodes and for any
4265 /// target-independent nodes that the target has registered with invoke it
4266 /// for.
4267 ///
4268 /// The semantics are as follows:
4269 /// Return Value:
4270 /// SDValue.Val == 0 - No change was made
4271 /// SDValue.Val == N - N was replaced, is dead, and is already handled.
4272 /// otherwise - N should be replaced by the returned Operand.
4273 ///
4274 /// In addition, methods provided by DAGCombinerInfo may be used to perform
4275 /// more complex transformations.
4276 ///
4277 virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
4278
4279 /// Return true if it is profitable to move this shift by a constant amount
4280 /// through its operand, adjusting any immediate operands as necessary to
4281 /// preserve semantics. This transformation may not be desirable if it
4282 /// disrupts a particularly auspicious target-specific tree (e.g. bitfield
4283 /// extraction in AArch64). By default, it returns true.
4284 ///
4285 /// @param N the shift node
4286 /// @param Level the current DAGCombine legalization level.
4287 virtual bool isDesirableToCommuteWithShift(const SDNode *N,
4288 CombineLevel Level) const {
4289 return true;
4290 }
4291
4292 /// GlobalISel - return true if it is profitable to move this shift by a
4293 /// constant amount through its operand, adjusting any immediate operands as
4294 /// necessary to preserve semantics. This transformation may not be desirable
4295 /// if it disrupts a particularly auspicious target-specific tree (e.g.
4296 /// bitfield extraction in AArch64). By default, it returns true.
4297 ///
4298 /// @param MI the shift instruction
4299 /// @param IsAfterLegal true if running after legalization.
4300 virtual bool isDesirableToCommuteWithShift(const MachineInstr &MI,
4301 bool IsAfterLegal) const {
4302 return true;
4303 }
4304
4305 /// GlobalISel - return true if it's profitable to perform the combine:
4306 /// shl ([sza]ext x), y => zext (shl x, y)
4307 virtual bool isDesirableToPullExtFromShl(const MachineInstr &MI) const {
4308 return true;
4309 }
4310
4311 // Return AndOrSETCCFoldKind::{AddAnd, ABS} if its desirable to try and
4312 // optimize LogicOp(SETCC0, SETCC1). An example (what is implemented as of
4313 // writing this) is:
4314 // With C as a power of 2 and C != 0 and C != INT_MIN:
4315 // AddAnd:
4316 // (icmp eq A, C) | (icmp eq A, -C)
4317 // -> (icmp eq and(add(A, C), ~(C + C)), 0)
4318 // (icmp ne A, C) & (icmp ne A, -C)w
4319 // -> (icmp ne and(add(A, C), ~(C + C)), 0)
4320 // ABS:
4321 // (icmp eq A, C) | (icmp eq A, -C)
4322 // -> (icmp eq Abs(A), C)
4323 // (icmp ne A, C) & (icmp ne A, -C)w
4324 // -> (icmp ne Abs(A), C)
4325 //
4326 // @param LogicOp the logic op
4327 // @param SETCC0 the first of the SETCC nodes
4328 // @param SETCC0 the second of the SETCC nodes
4329 virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(
4330 const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const {
4331 return AndOrSETCCFoldKind::None;
4332 }
4333
4334 /// Return true if it is profitable to combine an XOR of a logical shift
4335 /// to create a logical shift of NOT. This transformation may not be desirable
4336 /// if it disrupts a particularly auspicious target-specific tree (e.g.
4337 /// BIC on ARM/AArch64). By default, it returns true.
4338 virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const {
4339 return true;
4340 }
4341
4342 /// Return true if the target has native support for the specified value type
4343 /// and it is 'desirable' to use the type for the given node type. e.g. On x86
4344 /// i16 is legal, but undesirable since i16 instruction encodings are longer
4345 /// and some i16 instructions are slow.
4346 virtual bool isTypeDesirableForOp(unsigned /*Opc*/, EVT VT) const {
4347 // By default, assume all legal types are desirable.
4348 return isTypeLegal(VT);
4349 }
4350
4351 /// Return true if it is profitable for dag combiner to transform a floating
4352 /// point op of specified opcode to a equivalent op of an integer
4353 /// type. e.g. f32 load -> i32 load can be profitable on ARM.
4354 virtual bool isDesirableToTransformToIntegerOp(unsigned /*Opc*/,
4355 EVT /*VT*/) const {
4356 return false;
4357 }
4358
4359 /// This method query the target whether it is beneficial for dag combiner to
4360 /// promote the specified node. If true, it should return the desired
4361 /// promotion type by reference.
4362 virtual bool IsDesirableToPromoteOp(SDValue /*Op*/, EVT &/*PVT*/) const {
4363 return false;
4364 }
4365
4366 /// Return true if the target supports swifterror attribute. It optimizes
4367 /// loads and stores to reading and writing a specific register.
4368 virtual bool supportSwiftError() const {
4369 return false;
4370 }
4371
4372 /// Return true if the target supports that a subset of CSRs for the given
4373 /// machine function is handled explicitly via copies.
4374 virtual bool supportSplitCSR(MachineFunction *MF) const {
4375 return false;
4376 }
4377
4378 /// Return true if the target supports kcfi operand bundles.
4379 virtual bool supportKCFIBundles() const { return false; }
4380
4381 /// Return true if the target supports ptrauth operand bundles.
4382 virtual bool supportPtrAuthBundles() const { return false; }
4383
4384 /// Perform necessary initialization to handle a subset of CSRs explicitly
4385 /// via copies. This function is called at the beginning of instruction
4386 /// selection.
4387 virtual void initializeSplitCSR(MachineBasicBlock *Entry) const {
4388 llvm_unreachable("Not Implemented");
4389 }
4390
4391 /// Insert explicit copies in entry and exit blocks. We copy a subset of
4392 /// CSRs to virtual registers in the entry block, and copy them back to
4393 /// physical registers in the exit blocks. This function is called at the end
4394 /// of instruction selection.
4395 virtual void insertCopiesSplitCSR(
4396 MachineBasicBlock *Entry,
4397 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
4398 llvm_unreachable("Not Implemented");
4399 }
4400
4401 /// Return the newly negated expression if the cost is not expensive and
4402 /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
4403 /// do the negation.
4404 virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
4405 bool LegalOps, bool OptForSize,
4406 NegatibleCost &Cost,
4407 unsigned Depth = 0) const;
4408
4409 SDValue getCheaperOrNeutralNegatedExpression(
4410 SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize,
4411 const NegatibleCost CostThreshold = NegatibleCost::Neutral,
4412 unsigned Depth = 0) const {
4413 NegatibleCost Cost = NegatibleCost::Expensive;
4414 SDValue Neg =
4415 getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth);
4416 if (!Neg)
4417 return SDValue();
4418
4419 if (Cost <= CostThreshold)
4420 return Neg;
4421
4422 // Remove the new created node to avoid the side effect to the DAG.
4423 if (Neg->use_empty())
4424 DAG.RemoveDeadNode(N: Neg.getNode());
4425 return SDValue();
4426 }
4427
4428 /// This is the helper function to return the newly negated expression only
4429 /// when the cost is cheaper.
4430 SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG,
4431 bool LegalOps, bool OptForSize,
4432 unsigned Depth = 0) const {
4433 return getCheaperOrNeutralNegatedExpression(Op, DAG, LegalOps, OptForSize,
4434 CostThreshold: NegatibleCost::Cheaper, Depth);
4435 }
4436
4437 /// This is the helper function to return the newly negated expression if
4438 /// the cost is not expensive.
4439 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps,
4440 bool OptForSize, unsigned Depth = 0) const {
4441 NegatibleCost Cost = NegatibleCost::Expensive;
4442 return getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth);
4443 }
4444
4445 //===--------------------------------------------------------------------===//
4446 // Lowering methods - These methods must be implemented by targets so that
4447 // the SelectionDAGBuilder code knows how to lower these.
4448 //
4449
4450 /// Target-specific splitting of values into parts that fit a register
4451 /// storing a legal type
4452 virtual bool splitValueIntoRegisterParts(
4453 SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
4454 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
4455 return false;
4456 }
4457
4458 /// Allows the target to handle physreg-carried dependency
4459 /// in target-specific way. Used from the ScheduleDAGSDNodes to decide whether
4460 /// to add the edge to the dependency graph.
4461 /// Def - input: Selection DAG node defininfg physical register
4462 /// User - input: Selection DAG node using physical register
4463 /// Op - input: Number of User operand
4464 /// PhysReg - inout: set to the physical register if the edge is
4465 /// necessary, unchanged otherwise
4466 /// Cost - inout: physical register copy cost.
4467 /// Returns 'true' is the edge is necessary, 'false' otherwise
4468 virtual bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
4469 const TargetRegisterInfo *TRI,
4470 const TargetInstrInfo *TII,
4471 unsigned &PhysReg, int &Cost) const {
4472 return false;
4473 }
4474
4475 /// Target-specific combining of register parts into its original value
4476 virtual SDValue
4477 joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL,
4478 const SDValue *Parts, unsigned NumParts,
4479 MVT PartVT, EVT ValueVT,
4480 std::optional<CallingConv::ID> CC) const {
4481 return SDValue();
4482 }
4483
4484 /// This hook must be implemented to lower the incoming (formal) arguments,
4485 /// described by the Ins array, into the specified DAG. The implementation
4486 /// should fill in the InVals array with legal-type argument values, and
4487 /// return the resulting token chain value.
4488 virtual SDValue LowerFormalArguments(
4489 SDValue /*Chain*/, CallingConv::ID /*CallConv*/, bool /*isVarArg*/,
4490 const SmallVectorImpl<ISD::InputArg> & /*Ins*/, const SDLoc & /*dl*/,
4491 SelectionDAG & /*DAG*/, SmallVectorImpl<SDValue> & /*InVals*/) const {
4492 llvm_unreachable("Not Implemented");
4493 }
4494
4495 /// This structure contains the information necessary for lowering
4496 /// pointer-authenticating indirect calls. It is equivalent to the "ptrauth"
4497 /// operand bundle found on the call instruction, if any.
4498 struct PtrAuthInfo {
4499 uint64_t Key;
4500 SDValue Discriminator;
4501 };
4502
4503 /// This structure contains all information that is necessary for lowering
4504 /// calls. It is passed to TLI::LowerCallTo when the SelectionDAG builder
4505 /// needs to lower a call, and targets will see this struct in their LowerCall
4506 /// implementation.
4507 struct CallLoweringInfo {
4508 SDValue Chain;
4509 Type *RetTy = nullptr;
4510 bool RetSExt : 1;
4511 bool RetZExt : 1;
4512 bool IsVarArg : 1;
4513 bool IsInReg : 1;
4514 bool DoesNotReturn : 1;
4515 bool IsReturnValueUsed : 1;
4516 bool IsConvergent : 1;
4517 bool IsPatchPoint : 1;
4518 bool IsPreallocated : 1;
4519 bool NoMerge : 1;
4520
4521 // IsTailCall should be modified by implementations of
4522 // TargetLowering::LowerCall that perform tail call conversions.
4523 bool IsTailCall = false;
4524
4525 // Is Call lowering done post SelectionDAG type legalization.
4526 bool IsPostTypeLegalization = false;
4527
4528 unsigned NumFixedArgs = -1;
4529 CallingConv::ID CallConv = CallingConv::C;
4530 SDValue Callee;
4531 ArgListTy Args;
4532 SelectionDAG &DAG;
4533 SDLoc DL;
4534 const CallBase *CB = nullptr;
4535 SmallVector<ISD::OutputArg, 32> Outs;
4536 SmallVector<SDValue, 32> OutVals;
4537 SmallVector<ISD::InputArg, 32> Ins;
4538 SmallVector<SDValue, 4> InVals;
4539 const ConstantInt *CFIType = nullptr;
4540 SDValue ConvergenceControlToken;
4541
4542 std::optional<PtrAuthInfo> PAI;
4543
4544 CallLoweringInfo(SelectionDAG &DAG)
4545 : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false),
4546 DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false),
4547 IsPatchPoint(false), IsPreallocated(false), NoMerge(false),
4548 DAG(DAG) {}
4549
4550 CallLoweringInfo &setDebugLoc(const SDLoc &dl) {
4551 DL = dl;
4552 return *this;
4553 }
4554
4555 CallLoweringInfo &setChain(SDValue InChain) {
4556 Chain = InChain;
4557 return *this;
4558 }
4559
4560 // setCallee with target/module-specific attributes
4561 CallLoweringInfo &setLibCallee(CallingConv::ID CC, Type *ResultType,
4562 SDValue Target, ArgListTy &&ArgsList) {
4563 RetTy = ResultType;
4564 Callee = Target;
4565 CallConv = CC;
4566 NumFixedArgs = ArgsList.size();
4567 Args = std::move(ArgsList);
4568
4569 DAG.getTargetLoweringInfo().markLibCallAttributes(
4570 MF: &(DAG.getMachineFunction()), CC, Args);
4571 return *this;
4572 }
4573
4574 CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultType,
4575 SDValue Target, ArgListTy &&ArgsList,
4576 AttributeSet ResultAttrs = {}) {
4577 RetTy = ResultType;
4578 IsInReg = ResultAttrs.hasAttribute(Kind: Attribute::InReg);
4579 RetSExt = ResultAttrs.hasAttribute(Kind: Attribute::SExt);
4580 RetZExt = ResultAttrs.hasAttribute(Kind: Attribute::ZExt);
4581 NoMerge = ResultAttrs.hasAttribute(Kind: Attribute::NoMerge);
4582
4583 Callee = Target;
4584 CallConv = CC;
4585 NumFixedArgs = ArgsList.size();
4586 Args = std::move(ArgsList);
4587 return *this;
4588 }
4589
4590 CallLoweringInfo &setCallee(Type *ResultType, FunctionType *FTy,
4591 SDValue Target, ArgListTy &&ArgsList,
4592 const CallBase &Call) {
4593 RetTy = ResultType;
4594
4595 IsInReg = Call.hasRetAttr(Kind: Attribute::InReg);
4596 DoesNotReturn =
4597 Call.doesNotReturn() ||
4598 (!isa<InvokeInst>(Val: Call) && isa<UnreachableInst>(Val: Call.getNextNode()));
4599 IsVarArg = FTy->isVarArg();
4600 IsReturnValueUsed = !Call.use_empty();
4601 RetSExt = Call.hasRetAttr(Kind: Attribute::SExt);
4602 RetZExt = Call.hasRetAttr(Kind: Attribute::ZExt);
4603 NoMerge = Call.hasFnAttr(Kind: Attribute::NoMerge);
4604
4605 Callee = Target;
4606
4607 CallConv = Call.getCallingConv();
4608 NumFixedArgs = FTy->getNumParams();
4609 Args = std::move(ArgsList);
4610
4611 CB = &Call;
4612
4613 return *this;
4614 }
4615
4616 CallLoweringInfo &setInRegister(bool Value = true) {
4617 IsInReg = Value;
4618 return *this;
4619 }
4620
4621 CallLoweringInfo &setNoReturn(bool Value = true) {
4622 DoesNotReturn = Value;
4623 return *this;
4624 }
4625
4626 CallLoweringInfo &setVarArg(bool Value = true) {
4627 IsVarArg = Value;
4628 return *this;
4629 }
4630
4631 CallLoweringInfo &setTailCall(bool Value = true) {
4632 IsTailCall = Value;
4633 return *this;
4634 }
4635
4636 CallLoweringInfo &setDiscardResult(bool Value = true) {
4637 IsReturnValueUsed = !Value;
4638 return *this;
4639 }
4640
4641 CallLoweringInfo &setConvergent(bool Value = true) {
4642 IsConvergent = Value;
4643 return *this;
4644 }
4645
4646 CallLoweringInfo &setSExtResult(bool Value = true) {
4647 RetSExt = Value;
4648 return *this;
4649 }
4650
4651 CallLoweringInfo &setZExtResult(bool Value = true) {
4652 RetZExt = Value;
4653 return *this;
4654 }
4655
4656 CallLoweringInfo &setIsPatchPoint(bool Value = true) {
4657 IsPatchPoint = Value;
4658 return *this;
4659 }
4660
4661 CallLoweringInfo &setIsPreallocated(bool Value = true) {
4662 IsPreallocated = Value;
4663 return *this;
4664 }
4665
4666 CallLoweringInfo &setPtrAuth(PtrAuthInfo Value) {
4667 PAI = Value;
4668 return *this;
4669 }
4670
4671 CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) {
4672 IsPostTypeLegalization = Value;
4673 return *this;
4674 }
4675
4676 CallLoweringInfo &setCFIType(const ConstantInt *Type) {
4677 CFIType = Type;
4678 return *this;
4679 }
4680
4681 CallLoweringInfo &setConvergenceControlToken(SDValue Token) {
4682 ConvergenceControlToken = Token;
4683 return *this;
4684 }
4685
4686 ArgListTy &getArgs() {
4687 return Args;
4688 }
4689 };
4690
4691 /// This structure is used to pass arguments to makeLibCall function.
4692 struct MakeLibCallOptions {
4693 // By passing type list before soften to makeLibCall, the target hook
4694 // shouldExtendTypeInLibCall can get the original type before soften.
4695 ArrayRef<EVT> OpsVTBeforeSoften;
4696 EVT RetVTBeforeSoften;
4697 bool IsSExt : 1;
4698 bool DoesNotReturn : 1;
4699 bool IsReturnValueUsed : 1;
4700 bool IsPostTypeLegalization : 1;
4701 bool IsSoften : 1;
4702
4703 MakeLibCallOptions()
4704 : IsSExt(false), DoesNotReturn(false), IsReturnValueUsed(true),
4705 IsPostTypeLegalization(false), IsSoften(false) {}
4706
4707 MakeLibCallOptions &setSExt(bool Value = true) {
4708 IsSExt = Value;
4709 return *this;
4710 }
4711
4712 MakeLibCallOptions &setNoReturn(bool Value = true) {
4713 DoesNotReturn = Value;
4714 return *this;
4715 }
4716
4717 MakeLibCallOptions &setDiscardResult(bool Value = true) {
4718 IsReturnValueUsed = !Value;
4719 return *this;
4720 }
4721
4722 MakeLibCallOptions &setIsPostTypeLegalization(bool Value = true) {
4723 IsPostTypeLegalization = Value;
4724 return *this;
4725 }
4726
4727 MakeLibCallOptions &setTypeListBeforeSoften(ArrayRef<EVT> OpsVT, EVT RetVT,
4728 bool Value = true) {
4729 OpsVTBeforeSoften = OpsVT;
4730 RetVTBeforeSoften = RetVT;
4731 IsSoften = Value;
4732 return *this;
4733 }
4734 };
4735
4736 /// This function lowers an abstract call to a function into an actual call.
4737 /// This returns a pair of operands. The first element is the return value
4738 /// for the function (if RetTy is not VoidTy). The second element is the
4739 /// outgoing token chain. It calls LowerCall to do the actual lowering.
4740 std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const;
4741
4742 /// This hook must be implemented to lower calls into the specified
4743 /// DAG. The outgoing arguments to the call are described by the Outs array,
4744 /// and the values to be returned by the call are described by the Ins
4745 /// array. The implementation should fill in the InVals array with legal-type
4746 /// return values from the call, and return the resulting token chain value.
4747 virtual SDValue
4748 LowerCall(CallLoweringInfo &/*CLI*/,
4749 SmallVectorImpl<SDValue> &/*InVals*/) const {
4750 llvm_unreachable("Not Implemented");
4751 }
4752
4753 /// Target-specific cleanup for formal ByVal parameters.
4754 virtual void HandleByVal(CCState *, unsigned &, Align) const {}
4755
4756 /// This hook should be implemented to check whether the return values
4757 /// described by the Outs array can fit into the return registers. If false
4758 /// is returned, an sret-demotion is performed.
4759 virtual bool CanLowerReturn(CallingConv::ID /*CallConv*/,
4760 MachineFunction &/*MF*/, bool /*isVarArg*/,
4761 const SmallVectorImpl<ISD::OutputArg> &/*Outs*/,
4762 LLVMContext &/*Context*/) const
4763 {
4764 // Return true by default to get preexisting behavior.
4765 return true;
4766 }
4767
4768 /// This hook must be implemented to lower outgoing return values, described
4769 /// by the Outs array, into the specified DAG. The implementation should
4770 /// return the resulting token chain value.
4771 virtual SDValue LowerReturn(SDValue /*Chain*/, CallingConv::ID /*CallConv*/,
4772 bool /*isVarArg*/,
4773 const SmallVectorImpl<ISD::OutputArg> & /*Outs*/,
4774 const SmallVectorImpl<SDValue> & /*OutVals*/,
4775 const SDLoc & /*dl*/,
4776 SelectionDAG & /*DAG*/) const {
4777 llvm_unreachable("Not Implemented");
4778 }
4779
4780 /// Return true if result of the specified node is used by a return node
4781 /// only. It also compute and return the input chain for the tail call.
4782 ///
4783 /// This is used to determine whether it is possible to codegen a libcall as
4784 /// tail call at legalization time.
4785 virtual bool isUsedByReturnOnly(SDNode *, SDValue &/*Chain*/) const {
4786 return false;
4787 }
4788
4789 /// Return true if the target may be able emit the call instruction as a tail
4790 /// call. This is used by optimization passes to determine if it's profitable
4791 /// to duplicate return instructions to enable tailcall optimization.
4792 virtual bool mayBeEmittedAsTailCall(const CallInst *) const {
4793 return false;
4794 }
4795
4796 /// Return the register ID of the name passed in. Used by named register
4797 /// global variables extension. There is no target-independent behaviour
4798 /// so the default action is to bail.
4799 virtual Register getRegisterByName(const char* RegName, LLT Ty,
4800 const MachineFunction &MF) const {
4801 report_fatal_error(reason: "Named registers not implemented for this target");
4802 }
4803
4804 /// Return the type that should be used to zero or sign extend a
4805 /// zeroext/signext integer return value. FIXME: Some C calling conventions
4806 /// require the return type to be promoted, but this is not true all the time,
4807 /// e.g. i1/i8/i16 on x86/x86_64. It is also not necessary for non-C calling
4808 /// conventions. The frontend should handle this and include all of the
4809 /// necessary information.
4810 virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
4811 ISD::NodeType /*ExtendKind*/) const {
4812 EVT MinVT = getRegisterType(VT: MVT::i32);
4813 return VT.bitsLT(VT: MinVT) ? MinVT : VT;
4814 }
4815
4816 /// For some targets, an LLVM struct type must be broken down into multiple
4817 /// simple types, but the calling convention specifies that the entire struct
4818 /// must be passed in a block of consecutive registers.
4819 virtual bool
4820 functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv,
4821 bool isVarArg,
4822 const DataLayout &DL) const {
4823 return false;
4824 }
4825
4826 /// For most targets, an LLVM type must be broken down into multiple
4827 /// smaller types. Usually the halves are ordered according to the endianness
4828 /// but for some platform that would break. So this method will default to
4829 /// matching the endianness but can be overridden.
4830 virtual bool
4831 shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const {
4832 return DL.isLittleEndian();
4833 }
4834
4835 /// Returns a 0 terminated array of registers that can be safely used as
4836 /// scratch registers.
4837 virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const {
4838 return nullptr;
4839 }
4840
4841 /// Returns a 0 terminated array of rounding control registers that can be
4842 /// attached into strict FP call.
4843 virtual ArrayRef<MCPhysReg> getRoundingControlRegisters() const {
4844 return ArrayRef<MCPhysReg>();
4845 }
4846
4847 /// This callback is used to prepare for a volatile or atomic load.
4848 /// It takes a chain node as input and returns the chain for the load itself.
4849 ///
4850 /// Having a callback like this is necessary for targets like SystemZ,
4851 /// which allows a CPU to reuse the result of a previous load indefinitely,
4852 /// even if a cache-coherent store is performed by another CPU. The default
4853 /// implementation does nothing.
4854 virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL,
4855 SelectionDAG &DAG) const {
4856 return Chain;
4857 }
4858
4859 /// This callback is invoked by the type legalizer to legalize nodes with an
4860 /// illegal operand type but legal result types. It replaces the
4861 /// LowerOperation callback in the type Legalizer. The reason we can not do
4862 /// away with LowerOperation entirely is that LegalizeDAG isn't yet ready to
4863 /// use this callback.
4864 ///
4865 /// TODO: Consider merging with ReplaceNodeResults.
4866 ///
4867 /// The target places new result values for the node in Results (their number
4868 /// and types must exactly match those of the original return values of
4869 /// the node), or leaves Results empty, which indicates that the node is not
4870 /// to be custom lowered after all.
4871 /// The default implementation calls LowerOperation.
4872 virtual void LowerOperationWrapper(SDNode *N,
4873 SmallVectorImpl<SDValue> &Results,
4874 SelectionDAG &DAG) const;
4875
4876 /// This callback is invoked for operations that are unsupported by the
4877 /// target, which are registered to use 'custom' lowering, and whose defined
4878 /// values are all legal. If the target has no operations that require custom
4879 /// lowering, it need not implement this. The default implementation of this
4880 /// aborts.
4881 virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
4882
4883 /// This callback is invoked when a node result type is illegal for the
4884 /// target, and the operation was registered to use 'custom' lowering for that
4885 /// result type. The target places new result values for the node in Results
4886 /// (their number and types must exactly match those of the original return
4887 /// values of the node), or leaves Results empty, which indicates that the
4888 /// node is not to be custom lowered after all.
4889 ///
4890 /// If the target has no operations that require custom lowering, it need not
4891 /// implement this. The default implementation aborts.
4892 virtual void ReplaceNodeResults(SDNode * /*N*/,
4893 SmallVectorImpl<SDValue> &/*Results*/,
4894 SelectionDAG &/*DAG*/) const {
4895 llvm_unreachable("ReplaceNodeResults not implemented for this target!");
4896 }
4897
4898 /// This method returns the name of a target specific DAG node.
4899 virtual const char *getTargetNodeName(unsigned Opcode) const;
4900
4901 /// This method returns a target specific FastISel object, or null if the
4902 /// target does not support "fast" ISel.
4903 virtual FastISel *createFastISel(FunctionLoweringInfo &,
4904 const TargetLibraryInfo *) const {
4905 return nullptr;
4906 }
4907
4908 bool verifyReturnAddressArgumentIsConstant(SDValue Op,
4909 SelectionDAG &DAG) const;
4910
4911#ifndef NDEBUG
4912 /// Check the given SDNode. Aborts if it is invalid.
4913 virtual void verifyTargetSDNode(const SDNode *N) const {};
4914#endif
4915
4916 //===--------------------------------------------------------------------===//
4917 // Inline Asm Support hooks
4918 //
4919
4920 /// This hook allows the target to expand an inline asm call to be explicit
4921 /// llvm code if it wants to. This is useful for turning simple inline asms
4922 /// into LLVM intrinsics, which gives the compiler more information about the
4923 /// behavior of the code.
4924 virtual bool ExpandInlineAsm(CallInst *) const {
4925 return false;
4926 }
4927
4928 enum ConstraintType {
4929 C_Register, // Constraint represents specific register(s).
4930 C_RegisterClass, // Constraint represents any of register(s) in class.
4931 C_Memory, // Memory constraint.
4932 C_Address, // Address constraint.
4933 C_Immediate, // Requires an immediate.
4934 C_Other, // Something else.
4935 C_Unknown // Unsupported constraint.
4936 };
4937
4938 enum ConstraintWeight {
4939 // Generic weights.
4940 CW_Invalid = -1, // No match.
4941 CW_Okay = 0, // Acceptable.
4942 CW_Good = 1, // Good weight.
4943 CW_Better = 2, // Better weight.
4944 CW_Best = 3, // Best weight.
4945
4946 // Well-known weights.
4947 CW_SpecificReg = CW_Okay, // Specific register operands.
4948 CW_Register = CW_Good, // Register operands.
4949 CW_Memory = CW_Better, // Memory operands.
4950 CW_Constant = CW_Best, // Constant operand.
4951 CW_Default = CW_Okay // Default or don't know type.
4952 };
4953
4954 /// This contains information for each constraint that we are lowering.
4955 struct AsmOperandInfo : public InlineAsm::ConstraintInfo {
4956 /// This contains the actual string for the code, like "m". TargetLowering
4957 /// picks the 'best' code from ConstraintInfo::Codes that most closely
4958 /// matches the operand.
4959 std::string ConstraintCode;
4960
4961 /// Information about the constraint code, e.g. Register, RegisterClass,
4962 /// Memory, Other, Unknown.
4963 TargetLowering::ConstraintType ConstraintType = TargetLowering::C_Unknown;
4964
4965 /// If this is the result output operand or a clobber, this is null,
4966 /// otherwise it is the incoming operand to the CallInst. This gets
4967 /// modified as the asm is processed.
4968 Value *CallOperandVal = nullptr;
4969
4970 /// The ValueType for the operand value.
4971 MVT ConstraintVT = MVT::Other;
4972
4973 /// Copy constructor for copying from a ConstraintInfo.
4974 AsmOperandInfo(InlineAsm::ConstraintInfo Info)
4975 : InlineAsm::ConstraintInfo(std::move(Info)) {}
4976
4977 /// Return true of this is an input operand that is a matching constraint
4978 /// like "4".
4979 bool isMatchingInputConstraint() const;
4980
4981 /// If this is an input matching constraint, this method returns the output
4982 /// operand it matches.
4983 unsigned getMatchedOperand() const;
4984 };
4985
4986 using AsmOperandInfoVector = std::vector<AsmOperandInfo>;
4987
4988 /// Split up the constraint string from the inline assembly value into the
4989 /// specific constraints and their prefixes, and also tie in the associated
4990 /// operand values. If this returns an empty vector, and if the constraint
4991 /// string itself isn't empty, there was an error parsing.
4992 virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL,
4993 const TargetRegisterInfo *TRI,
4994 const CallBase &Call) const;
4995
4996 /// Examine constraint type and operand type and determine a weight value.
4997 /// The operand object must already have been set up with the operand type.
4998 virtual ConstraintWeight getMultipleConstraintMatchWeight(
4999 AsmOperandInfo &info, int maIndex) const;
5000
5001 /// Examine constraint string and operand type and determine a weight value.
5002 /// The operand object must already have been set up with the operand type.
5003 virtual ConstraintWeight getSingleConstraintMatchWeight(
5004 AsmOperandInfo &info, const char *constraint) const;
5005
5006 /// Determines the constraint code and constraint type to use for the specific
5007 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
5008 /// If the actual operand being passed in is available, it can be passed in as
5009 /// Op, otherwise an empty SDValue can be passed.
5010 virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo,
5011 SDValue Op,
5012 SelectionDAG *DAG = nullptr) const;
5013
5014 /// Given a constraint, return the type of constraint it is for this target.
5015 virtual ConstraintType getConstraintType(StringRef Constraint) const;
5016
5017 using ConstraintPair = std::pair<StringRef, TargetLowering::ConstraintType>;
5018 using ConstraintGroup = SmallVector<ConstraintPair>;
5019 /// Given an OpInfo with list of constraints codes as strings, return a
5020 /// sorted Vector of pairs of constraint codes and their types in priority of
5021 /// what we'd prefer to lower them as. This may contain immediates that
5022 /// cannot be lowered, but it is meant to be a machine agnostic order of
5023 /// preferences.
5024 ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const;
5025
5026 /// Given a physical register constraint (e.g. {edx}), return the register
5027 /// number and the register class for the register.
5028 ///
5029 /// Given a register class constraint, like 'r', if this corresponds directly
5030 /// to an LLVM register class, return a register of 0 and the register class
5031 /// pointer.
5032 ///
5033 /// This should only be used for C_Register constraints. On error, this
5034 /// returns a register number of 0 and a null register class pointer.
5035 virtual std::pair<unsigned, const TargetRegisterClass *>
5036 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
5037 StringRef Constraint, MVT VT) const;
5038
5039 virtual InlineAsm::ConstraintCode
5040 getInlineAsmMemConstraint(StringRef ConstraintCode) const {
5041 if (ConstraintCode == "m")
5042 return InlineAsm::ConstraintCode::m;
5043 if (ConstraintCode == "o")
5044 return InlineAsm::ConstraintCode::o;
5045 if (ConstraintCode == "X")
5046 return InlineAsm::ConstraintCode::X;
5047 if (ConstraintCode == "p")
5048 return InlineAsm::ConstraintCode::p;
5049 return InlineAsm::ConstraintCode::Unknown;
5050 }
5051
5052 /// Try to replace an X constraint, which matches anything, with another that
5053 /// has more specific requirements based on the type of the corresponding
5054 /// operand. This returns null if there is no replacement to make.
5055 virtual const char *LowerXConstraint(EVT ConstraintVT) const;
5056
5057 /// Lower the specified operand into the Ops vector. If it is invalid, don't
5058 /// add anything to Ops.
5059 virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
5060 std::vector<SDValue> &Ops,
5061 SelectionDAG &DAG) const;
5062
5063 // Lower custom output constraints. If invalid, return SDValue().
5064 virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue,
5065 const SDLoc &DL,
5066 const AsmOperandInfo &OpInfo,
5067 SelectionDAG &DAG) const;
5068
5069 // Targets may override this function to collect operands from the CallInst
5070 // and for example, lower them into the SelectionDAG operands.
5071 virtual void CollectTargetIntrinsicOperands(const CallInst &I,
5072 SmallVectorImpl<SDValue> &Ops,
5073 SelectionDAG &DAG) const;
5074
5075 //===--------------------------------------------------------------------===//
5076 // Div utility functions
5077 //
5078
5079 SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
5080 SmallVectorImpl<SDNode *> &Created) const;
5081 SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
5082 SmallVectorImpl<SDNode *> &Created) const;
5083 // Build sdiv by power-of-2 with conditional move instructions
5084 SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor,
5085 SelectionDAG &DAG,
5086 SmallVectorImpl<SDNode *> &Created) const;
5087
5088 /// Targets may override this function to provide custom SDIV lowering for
5089 /// power-of-2 denominators. If the target returns an empty SDValue, LLVM
5090 /// assumes SDIV is expensive and replaces it with a series of other integer
5091 /// operations.
5092 virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor,
5093 SelectionDAG &DAG,
5094 SmallVectorImpl<SDNode *> &Created) const;
5095
5096 /// Targets may override this function to provide custom SREM lowering for
5097 /// power-of-2 denominators. If the target returns an empty SDValue, LLVM
5098 /// assumes SREM is expensive and replaces it with a series of other integer
5099 /// operations.
5100 virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor,
5101 SelectionDAG &DAG,
5102 SmallVectorImpl<SDNode *> &Created) const;
5103
5104 /// Indicate whether this target prefers to combine FDIVs with the same
5105 /// divisor. If the transform should never be done, return zero. If the
5106 /// transform should be done, return the minimum number of divisor uses
5107 /// that must exist.
5108 virtual unsigned combineRepeatedFPDivisors() const {
5109 return 0;
5110 }
5111
5112 /// Hooks for building estimates in place of slower divisions and square
5113 /// roots.
5114
5115 /// Return either a square root or its reciprocal estimate value for the input
5116 /// operand.
5117 /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or
5118 /// 'Enabled' as set by a potential default override attribute.
5119 /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson
5120 /// refinement iterations required to generate a sufficient (though not
5121 /// necessarily IEEE-754 compliant) estimate is returned in that parameter.
5122 /// The boolean UseOneConstNR output is used to select a Newton-Raphson
5123 /// algorithm implementation that uses either one or two constants.
5124 /// The boolean Reciprocal is used to select whether the estimate is for the
5125 /// square root of the input operand or the reciprocal of its square root.
5126 /// A target may choose to implement its own refinement within this function.
5127 /// If that's true, then return '0' as the number of RefinementSteps to avoid
5128 /// any further refinement of the estimate.
5129 /// An empty SDValue return means no estimate sequence can be created.
5130 virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
5131 int Enabled, int &RefinementSteps,
5132 bool &UseOneConstNR, bool Reciprocal) const {
5133 return SDValue();
5134 }
5135
5136 /// Try to convert the fminnum/fmaxnum to a compare/select sequence. This is
5137 /// required for correctness since InstCombine might have canonicalized a
5138 /// fcmp+select sequence to a FMINNUM/FMAXNUM intrinsic. If we were to fall
5139 /// through to the default expansion/soften to libcall, we might introduce a
5140 /// link-time dependency on libm into a file that originally did not have one.
5141 SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const;
5142
5143 /// Return a reciprocal estimate value for the input operand.
5144 /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or
5145 /// 'Enabled' as set by a potential default override attribute.
5146 /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson
5147 /// refinement iterations required to generate a sufficient (though not
5148 /// necessarily IEEE-754 compliant) estimate is returned in that parameter.
5149 /// A target may choose to implement its own refinement within this function.
5150 /// If that's true, then return '0' as the number of RefinementSteps to avoid
5151 /// any further refinement of the estimate.
5152 /// An empty SDValue return means no estimate sequence can be created.
5153 virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
5154 int Enabled, int &RefinementSteps) const {
5155 return SDValue();
5156 }
5157
5158 /// Return a target-dependent comparison result if the input operand is
5159 /// suitable for use with a square root estimate calculation. For example, the
5160 /// comparison may check if the operand is NAN, INF, zero, normal, etc. The
5161 /// result should be used as the condition operand for a select or branch.
5162 virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
5163 const DenormalMode &Mode) const;
5164
5165 /// Return a target-dependent result if the input operand is not suitable for
5166 /// use with a square root estimate calculation.
5167 virtual SDValue getSqrtResultForDenormInput(SDValue Operand,
5168 SelectionDAG &DAG) const {
5169 return DAG.getConstantFP(Val: 0.0, DL: SDLoc(Operand), VT: Operand.getValueType());
5170 }
5171
5172 //===--------------------------------------------------------------------===//
5173 // Legalization utility functions
5174 //
5175
5176 /// Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes,
5177 /// respectively, each computing an n/2-bit part of the result.
5178 /// \param Result A vector that will be filled with the parts of the result
5179 /// in little-endian order.
5180 /// \param LL Low bits of the LHS of the MUL. You can use this parameter
5181 /// if you want to control how low bits are extracted from the LHS.
5182 /// \param LH High bits of the LHS of the MUL. See LL for meaning.
5183 /// \param RL Low bits of the RHS of the MUL. See LL for meaning
5184 /// \param RH High bits of the RHS of the MUL. See LL for meaning.
5185 /// \returns true if the node has been expanded, false if it has not
5186 bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS,
5187 SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT,
5188 SelectionDAG &DAG, MulExpansionKind Kind,
5189 SDValue LL = SDValue(), SDValue LH = SDValue(),
5190 SDValue RL = SDValue(), SDValue RH = SDValue()) const;
5191
5192 /// Expand a MUL into two nodes. One that computes the high bits of
5193 /// the result and one that computes the low bits.
5194 /// \param HiLoVT The value type to use for the Lo and Hi nodes.
5195 /// \param LL Low bits of the LHS of the MUL. You can use this parameter
5196 /// if you want to control how low bits are extracted from the LHS.
5197 /// \param LH High bits of the LHS of the MUL. See LL for meaning.
5198 /// \param RL Low bits of the RHS of the MUL. See LL for meaning
5199 /// \param RH High bits of the RHS of the MUL. See LL for meaning.
5200 /// \returns true if the node has been expanded. false if it has not
5201 bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
5202 SelectionDAG &DAG, MulExpansionKind Kind,
5203 SDValue LL = SDValue(), SDValue LH = SDValue(),
5204 SDValue RL = SDValue(), SDValue RH = SDValue()) const;
5205
5206 /// Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit
5207 /// urem by constant and other arithmetic ops. The n/2-bit urem by constant
5208 /// will be expanded by DAGCombiner. This is not possible for all constant
5209 /// divisors.
5210 /// \param N Node to expand
5211 /// \param Result A vector that will be filled with the lo and high parts of
5212 /// the results. For *DIVREM, this will be the quotient parts followed
5213 /// by the remainder parts.
5214 /// \param HiLoVT The value type to use for the Lo and Hi parts. Should be
5215 /// half of VT.
5216 /// \param LL Low bits of the LHS of the operation. You can use this
5217 /// parameter if you want to control how low bits are extracted from
5218 /// the LHS.
5219 /// \param LH High bits of the LHS of the operation. See LL for meaning.
5220 /// \returns true if the node has been expanded, false if it has not.
5221 bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl<SDValue> &Result,
5222 EVT HiLoVT, SelectionDAG &DAG,
5223 SDValue LL = SDValue(),
5224 SDValue LH = SDValue()) const;
5225
5226 /// Expand funnel shift.
5227 /// \param N Node to expand
5228 /// \returns The expansion if successful, SDValue() otherwise
5229 SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const;
5230
5231 /// Expand rotations.
5232 /// \param N Node to expand
5233 /// \param AllowVectorOps expand vector rotate, this should only be performed
5234 /// if the legalization is happening outside of LegalizeVectorOps
5235 /// \returns The expansion if successful, SDValue() otherwise
5236 SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const;
5237
5238 /// Expand shift-by-parts.
5239 /// \param N Node to expand
5240 /// \param Lo lower-output-part after conversion
5241 /// \param Hi upper-output-part after conversion
5242 void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi,
5243 SelectionDAG &DAG) const;
5244
5245 /// Expand float(f32) to SINT(i64) conversion
5246 /// \param N Node to expand
5247 /// \param Result output after conversion
5248 /// \returns True, if the expansion was successful, false otherwise
5249 bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
5250
5251 /// Expand float to UINT conversion
5252 /// \param N Node to expand
5253 /// \param Result output after conversion
5254 /// \param Chain output chain after conversion
5255 /// \returns True, if the expansion was successful, false otherwise
5256 bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain,
5257 SelectionDAG &DAG) const;
5258
5259 /// Expand UINT(i64) to double(f64) conversion
5260 /// \param N Node to expand
5261 /// \param Result output after conversion
5262 /// \param Chain output chain after conversion
5263 /// \returns True, if the expansion was successful, false otherwise
5264 bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain,
5265 SelectionDAG &DAG) const;
5266
5267 /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
5268 SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const;
5269
5270 /// Expand fminimum/fmaximum into multiple comparison with selects.
5271 SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const;
5272
5273 /// Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
5274 /// \param N Node to expand
5275 /// \returns The expansion result
5276 SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const;
5277
5278 /// Truncate Op to ResultVT. If the result is exact, leave it alone. If it is
5279 /// not exact, force the result to be odd.
5280 /// \param ResultVT The type of result.
5281 /// \param Op The value to round.
5282 /// \returns The expansion result
5283 SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL,
5284 SelectionDAG &DAG) const;
5285
5286 /// Expand round(fp) to fp conversion
5287 /// \param N Node to expand
5288 /// \returns The expansion result
5289 SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const;
5290
5291 /// Expand check for floating point class.
5292 /// \param ResultVT The type of intrinsic call result.
5293 /// \param Op The tested value.
5294 /// \param Test The test to perform.
5295 /// \param Flags The optimization flags.
5296 /// \returns The expansion result or SDValue() if it fails.
5297 SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test,
5298 SDNodeFlags Flags, const SDLoc &DL,
5299 SelectionDAG &DAG) const;
5300
5301 /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes,
5302 /// vector nodes can only succeed if all operations are legal/custom.
5303 /// \param N Node to expand
5304 /// \returns The expansion result or SDValue() if it fails.
5305 SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const;
5306
5307 /// Expand VP_CTPOP nodes.
5308 /// \returns The expansion result or SDValue() if it fails.
5309 SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const;
5310
5311 /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes,
5312 /// vector nodes can only succeed if all operations are legal/custom.
5313 /// \param N Node to expand
5314 /// \returns The expansion result or SDValue() if it fails.
5315 SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const;
5316
5317 /// Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
5318 /// \param N Node to expand
5319 /// \returns The expansion result or SDValue() if it fails.
5320 SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const;
5321
5322 /// Expand CTTZ via Table Lookup.
5323 /// \param N Node to expand
5324 /// \returns The expansion result or SDValue() if it fails.
5325 SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5326 SDValue Op, unsigned NumBitsPerElt) const;
5327
5328 /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes,
5329 /// vector nodes can only succeed if all operations are legal/custom.
5330 /// \param N Node to expand
5331 /// \returns The expansion result or SDValue() if it fails.
5332 SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const;
5333
5334 /// Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
5335 /// \param N Node to expand
5336 /// \returns The expansion result or SDValue() if it fails.
5337 SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const;
5338
5339 /// Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.
5340 /// \param N Node to expand
5341 /// \returns The expansion result or SDValue() if it fails.
5342 SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const;
5343
5344 /// Expand ABS nodes. Expands vector/scalar ABS nodes,
5345 /// vector nodes can only succeed if all operations are legal/custom.
5346 /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size))
5347 /// \param N Node to expand
5348 /// \param IsNegative indicate negated abs
5349 /// \returns The expansion result or SDValue() if it fails.
5350 SDValue expandABS(SDNode *N, SelectionDAG &DAG,
5351 bool IsNegative = false) const;
5352
5353 /// Expand ABDS/ABDU nodes. Expands vector/scalar ABDS/ABDU nodes.
5354 /// \param N Node to expand
5355 /// \returns The expansion result or SDValue() if it fails.
5356 SDValue expandABD(SDNode *N, SelectionDAG &DAG) const;
5357
5358 /// Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
5359 /// \param N Node to expand
5360 /// \returns The expansion result or SDValue() if it fails.
5361 SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const;
5362
5363 /// Expand BSWAP nodes. Expands scalar/vector BSWAP nodes with i16/i32/i64
5364 /// scalar types. Returns SDValue() if expand fails.
5365 /// \param N Node to expand
5366 /// \returns The expansion result or SDValue() if it fails.
5367 SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const;
5368
5369 /// Expand VP_BSWAP nodes. Expands VP_BSWAP nodes with
5370 /// i16/i32/i64 scalar types. Returns SDValue() if expand fails. \param N Node
5371 /// to expand \returns The expansion result or SDValue() if it fails.
5372 SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const;
5373
5374 /// Expand BITREVERSE nodes. Expands scalar/vector BITREVERSE nodes.
5375 /// Returns SDValue() if expand fails.
5376 /// \param N Node to expand
5377 /// \returns The expansion result or SDValue() if it fails.
5378 SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const;
5379
5380 /// Expand VP_BITREVERSE nodes. Expands VP_BITREVERSE nodes with
5381 /// i8/i16/i32/i64 scalar types. \param N Node to expand \returns The
5382 /// expansion result or SDValue() if it fails.
5383 SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const;
5384
5385 /// Turn load of vector type into a load of the individual elements.
5386 /// \param LD load to expand
5387 /// \returns BUILD_VECTOR and TokenFactor nodes.
5388 std::pair<SDValue, SDValue> scalarizeVectorLoad(LoadSDNode *LD,
5389 SelectionDAG &DAG) const;
5390
5391 // Turn a store of a vector type into stores of the individual elements.
5392 /// \param ST Store with a vector value type
5393 /// \returns TokenFactor of the individual store chains.
5394 SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const;
5395
5396 /// Expands an unaligned load to 2 half-size loads for an integer, and
5397 /// possibly more for vectors.
5398 std::pair<SDValue, SDValue> expandUnalignedLoad(LoadSDNode *LD,
5399 SelectionDAG &DAG) const;
5400
5401 /// Expands an unaligned store to 2 half-size stores for integer values, and
5402 /// possibly more for vectors.
5403 SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const;
5404
5405 /// Increments memory address \p Addr according to the type of the value
5406 /// \p DataVT that should be stored. If the data is stored in compressed
5407 /// form, the memory address should be incremented according to the number of
5408 /// the stored elements. This number is equal to the number of '1's bits
5409 /// in the \p Mask.
5410 /// \p DataVT is a vector type. \p Mask is a vector value.
5411 /// \p DataVT and \p Mask have the same number of vector elements.
5412 SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL,
5413 EVT DataVT, SelectionDAG &DAG,
5414 bool IsCompressedMemory) const;
5415
5416 /// Get a pointer to vector element \p Idx located in memory for a vector of
5417 /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of
5418 /// bounds the returned pointer is unspecified, but will be within the vector
5419 /// bounds.
5420 SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
5421 SDValue Index) const;
5422
5423 /// Get a pointer to a sub-vector of type \p SubVecVT at index \p Idx located
5424 /// in memory for a vector of type \p VecVT starting at a base address of
5425 /// \p VecPtr. If \p Idx plus the size of \p SubVecVT is out of bounds the
5426 /// returned pointer is unspecified, but the value returned will be such that
5427 /// the entire subvector would be within the vector bounds.
5428 SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
5429 EVT SubVecVT, SDValue Index) const;
5430
5431 /// Method for building the DAG expansion of ISD::[US][MIN|MAX]. This
5432 /// method accepts integers as its arguments.
5433 SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const;
5434
5435 /// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This
5436 /// method accepts integers as its arguments.
5437 SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const;
5438
5439 /// Method for building the DAG expansion of ISD::[US]CMP. This
5440 /// method accepts integers as its arguments
5441 SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const;
5442
5443 /// Method for building the DAG expansion of ISD::[US]SHLSAT. This
5444 /// method accepts integers as its arguments.
5445 SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const;
5446
5447 /// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This
5448 /// method accepts integers as its arguments.
5449 SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const;
5450
5451 /// Method for building the DAG expansion of ISD::[US]DIVFIX[SAT]. This
5452 /// method accepts integers as its arguments.
5453 /// Note: This method may fail if the division could not be performed
5454 /// within the type. Clients must retry with a wider type if this happens.
5455 SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
5456 SDValue LHS, SDValue RHS,
5457 unsigned Scale, SelectionDAG &DAG) const;
5458
5459 /// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion
5460 /// always suceeds and populates the Result and Overflow arguments.
5461 void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow,
5462 SelectionDAG &DAG) const;
5463
5464 /// Method for building the DAG expansion of ISD::S(ADD|SUB)O. Expansion
5465 /// always suceeds and populates the Result and Overflow arguments.
5466 void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow,
5467 SelectionDAG &DAG) const;
5468
5469 /// Method for building the DAG expansion of ISD::[US]MULO. Returns whether
5470 /// expansion was successful and populates the Result and Overflow arguments.
5471 bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow,
5472 SelectionDAG &DAG) const;
5473
5474 /// forceExpandWideMUL - Unconditionally expand a MUL into either a libcall or
5475 /// brute force via a wide multiplication. The expansion works by
5476 /// attempting to do a multiplication on a wider type twice the size of the
5477 /// original operands. LL and LH represent the lower and upper halves of the
5478 /// first operand. RL and RH represent the lower and upper halves of the
5479 /// second operand. The upper and lower halves of the result are stored in Lo
5480 /// and Hi.
5481 void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed,
5482 EVT WideVT, const SDValue LL, const SDValue LH,
5483 const SDValue RL, const SDValue RH, SDValue &Lo,
5484 SDValue &Hi) const;
5485
5486 /// Same as above, but creates the upper halves of each operand by
5487 /// sign/zero-extending the operands.
5488 void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed,
5489 const SDValue LHS, const SDValue RHS, SDValue &Lo,
5490 SDValue &Hi) const;
5491
5492 /// Expand a VECREDUCE_* into an explicit calculation. If Count is specified,
5493 /// only the first Count elements of the vector are used.
5494 SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const;
5495
5496 /// Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
5497 SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const;
5498
5499 /// Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
5500 /// Returns true if the expansion was successful.
5501 bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const;
5502
5503 /// Method for building the DAG expansion of ISD::VECTOR_SPLICE. This
5504 /// method accepts vectors as its arguments.
5505 SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const;
5506
5507 /// Expand a vector VECTOR_COMPRESS into a sequence of extract element, store
5508 /// temporarily, advance store position, before re-loading the final vector.
5509 SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const;
5510
5511 /// Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC
5512 /// on the current target. A VP_SETCC will additionally be given a Mask
5513 /// and/or EVL not equal to SDValue().
5514 ///
5515 /// If the SETCC has been legalized using AND / OR, then the legalized node
5516 /// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert
5517 /// will be set to false. This will also hold if the VP_SETCC has been
5518 /// legalized using VP_AND / VP_OR.
5519 ///
5520 /// If the SETCC / VP_SETCC has been legalized by using
5521 /// getSetCCSwappedOperands(), then the values of LHS and RHS will be
5522 /// swapped, CC will be set to the new condition, and NeedInvert will be set
5523 /// to false.
5524 ///
5525 /// If the SETCC / VP_SETCC has been legalized using the inverse condcode,
5526 /// then LHS and RHS will be unchanged, CC will set to the inverted condcode,
5527 /// and NeedInvert will be set to true. The caller must invert the result of
5528 /// the SETCC with SelectionDAG::getLogicalNOT() or take equivalent action to
5529 /// swap the effect of a true/false result.
5530 ///
5531 /// \returns true if the SETCC / VP_SETCC has been legalized, false if it
5532 /// hasn't.
5533 bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS,
5534 SDValue &RHS, SDValue &CC, SDValue Mask,
5535 SDValue EVL, bool &NeedInvert, const SDLoc &dl,
5536 SDValue &Chain, bool IsSignaling = false) const;
5537
5538 //===--------------------------------------------------------------------===//
5539 // Instruction Emitting Hooks
5540 //
5541
5542 /// This method should be implemented by targets that mark instructions with
5543 /// the 'usesCustomInserter' flag. These instructions are special in various
5544 /// ways, which require special support to insert. The specified MachineInstr
5545 /// is created but not inserted into any basic blocks, and this method is
5546 /// called to expand it into a sequence of instructions, potentially also
5547 /// creating new basic blocks and control flow.
5548 /// As long as the returned basic block is different (i.e., we created a new
5549 /// one), the custom inserter is free to modify the rest of \p MBB.
5550 virtual MachineBasicBlock *
5551 EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
5552
5553 /// This method should be implemented by targets that mark instructions with
5554 /// the 'hasPostISelHook' flag. These instructions must be adjusted after
5555 /// instruction selection by target hooks. e.g. To fill in optional defs for
5556 /// ARM 's' setting instructions.
5557 virtual void AdjustInstrPostInstrSelection(MachineInstr &MI,
5558 SDNode *Node) const;
5559
5560 /// If this function returns true, SelectionDAGBuilder emits a
5561 /// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector.
5562 virtual bool useLoadStackGuardNode() const {
5563 return false;
5564 }
5565
5566 virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
5567 const SDLoc &DL) const {
5568 llvm_unreachable("not implemented for this target");
5569 }
5570
5571 /// Lower TLS global address SDNode for target independent emulated TLS model.
5572 virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
5573 SelectionDAG &DAG) const;
5574
5575 /// Expands target specific indirect branch for the case of JumpTable
5576 /// expansion.
5577 virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
5578 SDValue Addr, int JTI,
5579 SelectionDAG &DAG) const;
5580
5581 // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits)))
5582 // If we're comparing for equality to zero and isCtlzFast is true, expose the
5583 // fact that this can be implemented as a ctlz/srl pair, so that the dag
5584 // combiner can fold the new nodes.
5585 SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const;
5586
5587 // Return true if `X & Y eq/ne 0` is preferable to `X & Y ne/eq Y`
5588 virtual bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode, EVT) const {
5589 return true;
5590 }
5591
5592private:
5593 SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
5594 const SDLoc &DL, DAGCombinerInfo &DCI) const;
5595 SDValue foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
5596 const SDLoc &DL, DAGCombinerInfo &DCI) const;
5597
5598 SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0,
5599 SDValue N1, ISD::CondCode Cond,
5600 DAGCombinerInfo &DCI,
5601 const SDLoc &DL) const;
5602
5603 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5604 SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift(
5605 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
5606 DAGCombinerInfo &DCI, const SDLoc &DL) const;
5607
5608 SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
5609 SDValue CompTargetNode, ISD::CondCode Cond,
5610 DAGCombinerInfo &DCI, const SDLoc &DL,
5611 SmallVectorImpl<SDNode *> &Created) const;
5612 SDValue buildUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode,
5613 ISD::CondCode Cond, DAGCombinerInfo &DCI,
5614 const SDLoc &DL) const;
5615
5616 SDValue prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
5617 SDValue CompTargetNode, ISD::CondCode Cond,
5618 DAGCombinerInfo &DCI, const SDLoc &DL,
5619 SmallVectorImpl<SDNode *> &Created) const;
5620 SDValue buildSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode,
5621 ISD::CondCode Cond, DAGCombinerInfo &DCI,
5622 const SDLoc &DL) const;
5623};
5624
5625/// Given an LLVM IR type and return type attributes, compute the return value
5626/// EVTs and flags, and optionally also the offsets, if the return value is
5627/// being lowered to memory.
5628void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr,
5629 SmallVectorImpl<ISD::OutputArg> &Outs,
5630 const TargetLowering &TLI, const DataLayout &DL);
5631
5632} // end namespace llvm
5633
5634#endif // LLVM_CODEGEN_TARGETLOWERING_H
5635