1 | //===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This file a TargetTransformInfo::Concept conforming object specific to the |
10 | /// NVPTX target machine. It uses the target's detailed information to |
11 | /// provide more precise answers to certain TTI queries, while letting the |
12 | /// target independent and default TTI implementations handle the rest. |
13 | /// |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H |
17 | #define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H |
18 | |
19 | #include "NVPTXTargetMachine.h" |
20 | #include "MCTargetDesc/NVPTXBaseInfo.h" |
21 | #include "llvm/Analysis/TargetTransformInfo.h" |
22 | #include "llvm/CodeGen/BasicTTIImpl.h" |
23 | #include "llvm/CodeGen/TargetLowering.h" |
24 | #include <optional> |
25 | |
26 | namespace llvm { |
27 | |
28 | class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> { |
29 | typedef BasicTTIImplBase<NVPTXTTIImpl> BaseT; |
30 | typedef TargetTransformInfo TTI; |
31 | friend BaseT; |
32 | |
33 | const NVPTXSubtarget *ST; |
34 | const NVPTXTargetLowering *TLI; |
35 | |
36 | const NVPTXSubtarget *getST() const { return ST; }; |
37 | const NVPTXTargetLowering *getTLI() const { return TLI; }; |
38 | |
39 | public: |
40 | explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F) |
41 | : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl()), |
42 | TLI(ST->getTargetLowering()) {} |
43 | |
44 | bool hasBranchDivergence(const Function *F = nullptr) { return true; } |
45 | |
46 | bool isSourceOfDivergence(const Value *V); |
47 | |
48 | unsigned getFlatAddressSpace() const { |
49 | return AddressSpace::ADDRESS_SPACE_GENERIC; |
50 | } |
51 | |
52 | bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const { |
53 | return AS != AddressSpace::ADDRESS_SPACE_SHARED && |
54 | AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM; |
55 | } |
56 | |
57 | std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, |
58 | IntrinsicInst &II) const; |
59 | |
60 | // Loads and stores can be vectorized if the alignment is at least as big as |
61 | // the load/store we want to vectorize. |
62 | bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, |
63 | unsigned AddrSpace) const { |
64 | return Alignment >= ChainSizeInBytes; |
65 | } |
66 | bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, |
67 | unsigned AddrSpace) const { |
68 | return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace); |
69 | } |
70 | |
71 | // NVPTX has infinite registers of all kinds, but the actual machine doesn't. |
72 | // We conservatively return 1 here which is just enough to enable the |
73 | // vectorizers but disables heuristics based on the number of registers. |
74 | // FIXME: Return a more reasonable number, while keeping an eye on |
75 | // LoopVectorizer's unrolling heuristics. |
76 | unsigned getNumberOfRegisters(bool Vector) const { return 1; } |
77 | |
78 | // Only <2 x half> should be vectorized, so always return 32 for the vector |
79 | // register size. |
80 | TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { |
81 | return TypeSize::getFixed(ExactSize: 32); |
82 | } |
83 | unsigned getMinVectorRegisterBitWidth() const { return 32; } |
84 | |
85 | // We don't want to prevent inlining because of target-cpu and -features |
86 | // attributes that were added to newer versions of LLVM/Clang: There are |
87 | // no incompatible functions in PTX, ptxas will throw errors in such cases. |
88 | bool areInlineCompatible(const Function *Caller, |
89 | const Function *Callee) const { |
90 | return true; |
91 | } |
92 | |
93 | // Increase the inlining cost threshold by a factor of 11, reflecting that |
94 | // calls are particularly expensive in NVPTX. |
95 | unsigned getInliningThresholdMultiplier() const { return 11; } |
96 | |
97 | InstructionCost getArithmeticInstrCost( |
98 | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, |
99 | TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
100 | TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
101 | ArrayRef<const Value *> Args = std::nullopt, |
102 | const Instruction *CxtI = nullptr); |
103 | |
104 | void (Loop *L, ScalarEvolution &SE, |
105 | TTI::UnrollingPreferences &UP, |
106 | OptimizationRemarkEmitter *ORE); |
107 | |
108 | void getPeelingPreferences(Loop *L, ScalarEvolution &SE, |
109 | TTI::PeelingPreferences &PP); |
110 | |
111 | bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { |
112 | // Volatile loads/stores are only supported for shared and global address |
113 | // spaces, or for generic AS that maps to them. |
114 | if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC || |
115 | AddrSpace == llvm::ADDRESS_SPACE_GLOBAL || |
116 | AddrSpace == llvm::ADDRESS_SPACE_SHARED)) |
117 | return false; |
118 | |
119 | switch(I->getOpcode()){ |
120 | default: |
121 | return false; |
122 | case Instruction::Load: |
123 | case Instruction::Store: |
124 | return true; |
125 | } |
126 | } |
127 | }; |
128 | |
129 | } // end namespace llvm |
130 | |
131 | #endif |
132 | |