1//===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// This file implements a TargetTransformInfo analysis pass specific to the
11// R600 target machine. It uses the target's detailed information to provide
12// more precise answers to certain TTI queries, while letting the target
13// independent and default TTI implementations handle the rest.
14//
15//===----------------------------------------------------------------------===//
16
17#include "R600TargetTransformInfo.h"
18#include "AMDGPU.h"
19#include "AMDGPUTargetMachine.h"
20#include "R600Subtarget.h"
21
22using namespace llvm;
23
24#define DEBUG_TYPE "R600tti"
25
26R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
27 : BaseT(TM, F.getDataLayout()),
28 ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))),
29 TLI(ST->getTargetLowering()), CommonTTI(TM, F) {}
30
31unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
32 return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
33}
34
35unsigned R600TTIImpl::getNumberOfRegisters(unsigned ClassID) const {
36 bool Vec = ClassID == 1;
37 return getHardwareNumberOfRegisters(Vec);
38}
39
40TypeSize
41R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
42 return TypeSize::getFixed(ExactSize: 32);
43}
44
45unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; }
46
47unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
48 if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
49 AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
50 return 128;
51 if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
52 AddrSpace == AMDGPUAS::REGION_ADDRESS)
53 return 64;
54 if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
55 return 32;
56
57 if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
58 AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
59 (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
60 AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
61 return 128;
62 llvm_unreachable("unhandled address space");
63}
64
65bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
66 Align Alignment,
67 unsigned AddrSpace) const {
68 // We allow vectorization of flat stores, even though we may need to decompose
69 // them later if they may access private memory. We don't have enough context
70 // here, and legalization can handle it.
71 return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
72}
73
74bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
75 Align Alignment,
76 unsigned AddrSpace) const {
77 return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
78}
79
80bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
81 Align Alignment,
82 unsigned AddrSpace) const {
83 return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
84}
85
86unsigned
87R600TTIImpl::getMaxInterleaveFactor(ElementCount VF,
88 bool HasUnorderedReductions) const {
89 // Disable unrolling if the loop is not vectorized.
90 // TODO: Enable this again.
91 if (VF.isScalar())
92 return 1;
93
94 return 8;
95}
96
97InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode,
98 TTI::TargetCostKind CostKind,
99 const Instruction *I) const {
100 if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
101 return Opcode == Instruction::PHI ? 0 : 1;
102
103 // XXX - For some reason this isn't called for switch.
104 switch (Opcode) {
105 case Instruction::UncondBr:
106 case Instruction::CondBr:
107 case Instruction::Ret:
108 return 10;
109 default:
110 return BaseT::getCFInstrCost(Opcode, CostKind, I);
111 }
112}
113
114InstructionCost R600TTIImpl::getVectorInstrCost(
115 unsigned Opcode, Type *ValTy, TTI::TargetCostKind CostKind, unsigned Index,
116 const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC) const {
117 switch (Opcode) {
118 case Instruction::ExtractElement:
119 case Instruction::InsertElement: {
120 unsigned EltSize =
121 DL.getTypeSizeInBits(Ty: cast<VectorType>(Val: ValTy)->getElementType());
122 if (EltSize < 32) {
123 return BaseT::getVectorInstrCost(Opcode, Val: ValTy, CostKind, Index, Op0, Op1,
124 VIC);
125 }
126
127 // Extracts are just reads of a subregister, so are free. Inserts are
128 // considered free because we don't want to have any cost for scalarizing
129 // operations, and we don't have to copy into a different register class.
130
131 // Dynamic indexing isn't free and is best avoided.
132 return Index == ~0u ? 2 : 0;
133 }
134 default:
135 return BaseT::getVectorInstrCost(Opcode, Val: ValTy, CostKind, Index, Op0, Op1,
136 VIC);
137 }
138}
139
140void R600TTIImpl::getUnrollingPreferences(
141 Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP,
142 OptimizationRemarkEmitter *ORE) const {
143 CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);
144}
145
146void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
147 TTI::PeelingPreferences &PP) const {
148 CommonTTI.getPeelingPreferences(L, SE, PP);
149}
150