R600TargetTransformInfo.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp]

1	//===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// \file
10	// This file implements a TargetTransformInfo analysis pass specific to the
11	// R600 target machine. It uses the target's detailed information to provide
12	// more precise answers to certain TTI queries, while letting the target
13	// independent and default TTI implementations handle the rest.
14	//
15	//===----------------------------------------------------------------------===//
16
17	#include "R600TargetTransformInfo.h"
18	#include "AMDGPU.h"
19	#include "AMDGPUTargetMachine.h"
20	#include "R600Subtarget.h"
21
22	using namespace llvm;
23
24	#define DEBUG_TYPE "R600tti"
25
26	R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine TM, const* Function &F)
27	: BaseT (TM, F.getDataLayout()),
28	ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))),
29	TLI(ST->getTargetLowering()), CommonTTI (TM, F) {}
30
31	unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
32	return `4` * `128`; // XXX - 4 channels. Should these count as vector instead?
33	}
34
35	unsigned R600TTIImpl::getNumberOfRegisters(unsigned ClassID) const {
36	bool Vec = ClassID == `1`;
37	return getHardwareNumberOfRegisters(Vec);
38	}
39
40	TypeSize
41	R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
42	return TypeSize::getFixed(ExactSize: `32`);
43	}
44
45	unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return `32`; }
46
47	unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
48	if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS \|\|
49	AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
50	return `128`;
51	if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS \|\|
52	AddrSpace == AMDGPUAS::REGION_ADDRESS)
53	return `64`;
54	if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
55	return `32`;
56
57	if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS \|\|
58	AddrSpace == AMDGPUAS::PARAM_I_ADDRESS \|\|
59	(AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
60	AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
61	return `128`;
62	llvm_unreachable("unhandled address space");
63	}
64
65	bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
66	Align Alignment,
67	unsigned AddrSpace) const {
68	// We allow vectorization of flat stores, even though we may need to decompose
69	// them later if they may access private memory. We don't have enough context
70	// here, and legalization can handle it.
71	return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
72	}
73
74	bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
75	Align Alignment,
76	unsigned AddrSpace) const {
77	return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
78	}
79
80	bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
81	Align Alignment,
82	unsigned AddrSpace) const {
83	return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
84	}
85
86	unsigned
87	R600TTIImpl::getMaxInterleaveFactor(ElementCount VF,
88	bool HasUnorderedReductions) const {
89	// Disable unrolling if the loop is not vectorized.
90	// TODO: Enable this again.
91	if (VF.isScalar())
92	return `1`;
93
94	return `8`;
95	}
96
97	InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode,
98	TTI::TargetCostKind CostKind,
99	const Instruction I) const* {
100	if (CostKind == TTI::TCK_CodeSize \|\| CostKind == TTI::TCK_SizeAndLatency)
101	return Opcode == Instruction::PHI ? `0` : `1`;
102
103	// XXX - For some reason this isn't called for switch.
104	switch (Opcode) {
105	case Instruction::UncondBr:
106	case Instruction::CondBr:
107	case Instruction::Ret:
108	return `10`;
109	default:
110	return BaseT::getCFInstrCost(Opcode, CostKind, I);
111	}
112	}
113
114	InstructionCost R600TTIImpl::getVectorInstrCost(
115	unsigned Opcode, Type ValTy, TTI::TargetCostKind CostKind, unsigned* Index,
116	const Value Op0, const* Value Op1, TTI::VectorInstrContext VIC) const* {
117	switch (Opcode) {
118	case Instruction::ExtractElement:
119	case Instruction::InsertElement: {
120	unsigned EltSize =
121	DL.getTypeSizeInBits(Ty: cast<VectorType>(Val: ValTy)->getElementType());
122	if (EltSize < `32`) {
123	return BaseT::getVectorInstrCost(Opcode, Val: ValTy, CostKind, Index, Op0, Op1,
124	VIC);
125	}
126
127	// Extracts are just reads of a subregister, so are free. Inserts are
128	// considered free because we don't want to have any cost for scalarizing
129	// operations, and we don't have to copy into a different register class.
130
131	// Dynamic indexing isn't free and is best avoided.
132	return Index == ~`0u` ? `2` : `0`;
133	}
134	default:
135	return BaseT::getVectorInstrCost(Opcode, Val: ValTy, CostKind, Index, Op0, Op1,
136	VIC);
137	}
138	}
139
140	void R600TTIImpl::getUnrollingPreferences(
141	Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP,
142	OptimizationRemarkEmitter ORE) const* {
143	CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);
144	}
145
146	void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
147	TTI::PeelingPreferences &PP) const {
148	CommonTTI.getPeelingPreferences(L, SE, PP);
149	}
150

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp