HexagonSubtarget.cpp source code [llvm_projects/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp]

1	//===- HexagonSubtarget.cpp - Hexagon Subtarget Information ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the Hexagon specific subclass of TargetSubtarget.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "HexagonSubtarget.h"
14	#include "Hexagon.h"
15	#include "HexagonInstrInfo.h"
16	#include "HexagonRegisterInfo.h"
17	#include "MCTargetDesc/HexagonMCTargetDesc.h"
18	#include "llvm/ADT/STLExtras.h"
19	#include "llvm/ADT/SmallSet.h"
20	#include "llvm/ADT/SmallVector.h"
21	#include "llvm/ADT/StringRef.h"
22	#include "llvm/CodeGen/MachineInstr.h"
23	#include "llvm/CodeGen/MachineOperand.h"
24	#include "llvm/CodeGen/MachineScheduler.h"
25	#include "llvm/CodeGen/ScheduleDAG.h"
26	#include "llvm/CodeGen/ScheduleDAGInstrs.h"
27	#include "llvm/IR/IntrinsicsHexagon.h"
28	#include "llvm/Support/CommandLine.h"
29	#include "llvm/Support/ErrorHandling.h"
30	#include "llvm/Target/TargetMachine.h"
31	#include <algorithm>
32	#include <cassert>
33	#include <map>
34	#include <optional>
35
36	using namespace llvm;
37
38	#define DEBUG_TYPE "hexagon-subtarget"
39
40	#define GET_SUBTARGETINFO_CTOR
41	#define GET_SUBTARGETINFO_TARGET_DESC
42	#include "HexagonGenSubtargetInfo.inc"
43
44	static cl::opt<bool> EnableBSBSched("enable-bsb-sched", cl::Hidden,
45	cl::init(Val: true));
46
47	static cl::opt<bool> EnableTCLatencySched("enable-tc-latency-sched", cl::Hidden,
48	cl::init(Val: false));
49
50	static cl::opt<bool>
51	EnableDotCurSched("enable-cur-sched", cl::Hidden, cl::init(Val: true),
52	cl::desc ("Enable the scheduler to generate .cur"));
53
54	static cl::opt<bool>
55	DisableHexagonMISched("disable-hexagon-misched", cl::Hidden,
56	cl::desc ("Disable Hexagon MI Scheduling"));
57
58	static cl::opt<bool> OverrideLongCalls(
59	"hexagon-long-calls", cl::Hidden,
60	cl::desc ("If present, forces/disables the use of long calls"));
61
62	static cl::opt<bool>
63	EnablePredicatedCalls("hexagon-pred-calls", cl::Hidden,
64	cl::desc ("Consider calls to be predicable"));
65
66	static cl::opt<bool> SchedPredsCloser("sched-preds-closer", cl::Hidden,
67	cl::init(Val: true));
68
69	static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization",
70	cl::Hidden, cl::init(Val: true));
71
72	static cl::opt<bool> EnableCheckBankConflict(
73	"hexagon-check-bank-conflict", cl::Hidden, cl::init(Val: true),
74	cl::desc ("Enable checking for cache bank conflicts"));
75
76	HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
77	StringRef FS, const TargetMachine &TM)
78	: HexagonGenSubtargetInfo (TT, CPU, /TuneCPU/ CPU, FS),
79	OptLevel(TM.getOptLevel()),
80	CPUString(std::string (Hexagon_MC::selectHexagonCPU(CPU))),
81	TargetTriple (TT), InstrInfo (initializeSubtargetDependencies(CPU, FS)),
82	RegInfo (getHwMode()), TLInfo (TM, *this),
83	InstrItins(getInstrItineraryForCPU(CPU: CPUString)) {
84	Hexagon_MC::addArchSubtarget(STI: this, FS);
85	// Beware of the default constructor of InstrItineraryData: it will
86	// reset all members to 0.
87	assert(InstrItins.Itineraries != nullptr && "InstrItins not initialized");
88	}
89
90	HexagonSubtarget &
91	HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
92	std::optional<Hexagon::ArchEnum> ArchVer = Hexagon::getCpu(CPU: CPUString);
93	if (ArchVer)
94	HexagonArchVersion = *ArchVer;
95	else
96	llvm_unreachable("Unrecognized Hexagon processor version");
97
98	UseHVX128BOps = false;
99	UseHVX64BOps = false;
100	UseAudioOps = false;
101	UseLongCalls = false;
102
103	SubtargetFeatures Features(FS);
104
105	// Turn on QFloat if the HVX version is v68+.
106	// The function ParseSubtargetFeatures will set feature bits and initialize
107	// subtarget's variables all in one, so there isn't a good way to preprocess
108	// the feature string, other than by tinkering with it directly.
109	auto IsQFloatFS = [](StringRef F) {
110	return F == "+hvx-qfloat" \|\| F == "-hvx-qfloat";
111	};
112	if (!llvm::count_if(Range: Features.getFeatures(), P: IsQFloatFS)) {
113	auto getHvxVersion = [&Features](StringRef FS) -> StringRef {
114	for (StringRef F : llvm::reverse(C: Features.getFeatures())) {
115	if (F.starts_with(Prefix: "+hvxv"))
116	return F;
117	}
118	for (StringRef F : llvm::reverse(C: Features.getFeatures())) {
119	if (F == "-hvx")
120	return StringRef ();
121	if (F.starts_with(Prefix: "+hvx") \|\| F == "-hvx")
122	return F.take_front(N: `4`); // Return "+hvx" or "-hvx".
123	}
124	return StringRef ();
125	};
126
127	bool AddQFloat = false;
128	StringRef HvxVer = getHvxVersion (FS);
129	if (HvxVer.starts_with(Prefix: "+hvxv")) {
130	int Ver = `0`;
131	if (!HvxVer.drop_front(N: `5`).consumeInteger(Radix: `10`, Result&: Ver) && Ver >= `68`)
132	AddQFloat = true;
133	} else if (HvxVer == "+hvx") {
134	if (hasV68Ops())
135	AddQFloat = true;
136	}
137
138	if (AddQFloat)
139	Features.AddFeature(String: "+hvx-qfloat");
140	}
141
142	std::string FeatureString = Features.getString();
143	ParseSubtargetFeatures(CPU: CPUString, /TuneCPU/ CPUString, FS: FeatureString);
144
145	if (useHVXV68Ops())
146	UseHVXFloatingPoint = UseHVXIEEEFPOps \|\| UseHVXQFloatOps;
147
148	if (UseHVXQFloatOps && UseHVXIEEEFPOps && UseHVXFloatingPoint)
149	LLVM_DEBUG(
150	dbgs() << "Behavior is undefined for simultaneous qfloat and ieee hvx codegen...");
151
152	if (OverrideLongCalls.getPosition())
153	UseLongCalls = OverrideLongCalls;
154
155	UseBSBScheduling = hasV60Ops() && EnableBSBSched;
156
157	if (isTinyCore()) {
158	// Tiny core has a single thread, so back-to-back scheduling is enabled by
159	// default.
160	if (!EnableBSBSched.getPosition())
161	UseBSBScheduling = false;
162	}
163
164	FeatureBitset FeatureBits = getFeatureBits();
165	if (HexagonDisableDuplex)
166	setFeatureBits(FeatureBits.reset(I: Hexagon::FeatureDuplex));
167	setFeatureBits(Hexagon_MC::completeHVXFeatures(FB: FeatureBits));
168
169	return *this;
170	}
171
172	bool HexagonSubtarget::isHVXElementType(MVT Ty, bool IncludeBool) const {
173	if (!useHVXOps())
174	return false;
175	if (Ty.isVector())
176	Ty = Ty.getVectorElementType();
177	if (IncludeBool && Ty == MVT::i1)
178	return true;
179	ArrayRef<MVT> ElemTypes = getHVXElementTypes();
180	return llvm::is_contained(Range&: ElemTypes, Element: Ty);
181	}
182
183	bool HexagonSubtarget::isHVXVectorType(EVT VecTy, bool IncludeBool) const {
184	if (!VecTy.isSimple())
185	return false;
186	if (!VecTy.isVector() \|\| !useHVXOps() \|\| VecTy.isScalableVector())
187	return false;
188	MVT ElemTy = VecTy.getSimpleVT().getVectorElementType();
189	if (!IncludeBool && ElemTy == MVT::i1)
190	return false;
191
192	unsigned HwLen = getVectorLength();
193	unsigned NumElems = VecTy.getVectorNumElements();
194	ArrayRef<MVT> ElemTypes = getHVXElementTypes();
195
196	if (IncludeBool && ElemTy == MVT::i1) {
197	// Boolean HVX vector types are formed from regular HVX vector types
198	// by replacing the element type with i1.
199	for (MVT T : ElemTypes)
200	if (NumElems * T.getSizeInBits() == `8` * HwLen)
201	return true;
202	return false;
203	}
204
205	unsigned VecWidth = VecTy.getSizeInBits();
206	if (VecWidth != `8` * HwLen && VecWidth != `16` * HwLen)
207	return false;
208	return llvm::is_contained(Range&: ElemTypes, Element: ElemTy);
209	}
210
211	bool HexagonSubtarget::isTypeForHVX(Type VecTy, bool* IncludeBool) const {
212	if (!VecTy->isVectorTy() \|\| isa<ScalableVectorType>(Val: VecTy))
213	return false;
214	// Avoid types like <2 x i32>.*
215	Type *ScalTy = VecTy->getScalarType();
216	if (!ScalTy->isIntegerTy() &&
217	!(ScalTy->isFloatingPointTy() && useHVXFloatingPoint()))
218	return false;
219	// The given type may be something like <17 x i32>, which is not MVT,
220	// but can be represented as (non-simple) EVT.
221	EVT Ty = EVT::getEVT(Ty: VecTy, /HandleUnknown/false);
222	if (!Ty.getVectorElementType().isSimple())
223	return false;
224
225	auto isHvxTy = [this, IncludeBool](MVT SimpleTy) {
226	if (isHVXVectorType(VecTy: SimpleTy, IncludeBool))
227	return true;
228	auto Action = getTargetLowering()->getPreferredVectorAction(VT: SimpleTy);
229	return Action == TargetLoweringBase::TypeWidenVector;
230	};
231
232	// Round up EVT to have power-of-2 elements, and keep checking if it
233	// qualifies for HVX, dividing it in half after each step.
234	MVT ElemTy = Ty.getVectorElementType().getSimpleVT();
235	unsigned VecLen = PowerOf2Ceil(A: Ty.getVectorNumElements());
236	while (VecLen > `1`) {
237	MVT SimpleTy = MVT::getVectorVT(VT: ElemTy, NumElements: VecLen);
238	if (SimpleTy.isValid() && isHvxTy (SimpleTy))
239	return true;
240	VecLen /= `2`;
241	}
242
243	return false;
244	}
245
246	void HexagonSubtarget::UsrOverflowMutation::apply(ScheduleDAGInstrs *DAG) {
247	for (SUnit &SU : DAG->SUnits) {
248	if (!SU.isInstr())
249	continue;
250	SmallVector<SDep, `4`> Erase;
251	for (auto &D : SU.Preds)
252	if (D.getKind() == SDep::Output && D.getReg() == Hexagon::USR_OVF)
253	Erase.push_back(Elt: D);
254	for (auto &E : Erase)
255	SU.removePred(D: E);
256	}
257	}
258
259	void HexagonSubtarget::HVXMemLatencyMutation::apply(ScheduleDAGInstrs *DAG) {
260	for (SUnit &SU : DAG->SUnits) {
261	// Update the latency of chain edges between v60 vector load or store
262	// instructions to be 1. These instruction cannot be scheduled in the
263	// same packet.
264	MachineInstr &MI1 = *SU.getInstr();
265	auto QII = static_cast<const* HexagonInstrInfo*>(DAG->TII);
266	bool IsStoreMI1 = MI1.mayStore();
267	bool IsLoadMI1 = MI1.mayLoad();
268	if (!QII->isHVXVec(MI: MI1) \|\| !(IsStoreMI1 \|\| IsLoadMI1))
269	continue;
270	for (SDep &SI : SU.Succs) {
271	if (SI.getKind() != SDep::Order \|\| SI.getLatency() != `0`)
272	continue;
273	MachineInstr &MI2 = *SI.getSUnit()->getInstr();
274	if (!QII->isHVXVec(MI: MI2))
275	continue;
276	if ((IsStoreMI1 && MI2.mayStore()) \|\| (IsLoadMI1 && MI2.mayLoad())) {
277	SI.setLatency(`1`);
278	SU.setHeightDirty();
279	// Change the dependence in the opposite direction too.
280	for (SDep &PI : SI.getSUnit()->Preds) {
281	if (PI.getSUnit() != &SU \|\| PI.getKind() != SDep::Order)
282	continue;
283	PI.setLatency(`1`);
284	SI.getSUnit()->setDepthDirty();
285	}
286	}
287	}
288	}
289	}
290
291	// Check if a call and subsequent A2_tfrpi instructions should maintain
292	// scheduling affinity. We are looking for the TFRI to be consumed in
293	// the next instruction. This should help reduce the instances of
294	// double register pairs being allocated and scheduled before a call
295	// when not used until after the call. This situation is exacerbated
296	// by the fact that we allocate the pair from the callee saves list,
297	// leading to excess spills and restores.
298	bool HexagonSubtarget::CallMutation::shouldTFRICallBind(
299	const HexagonInstrInfo &HII, const SUnit &Inst1,
300	const SUnit &Inst2) const {
301	if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi)
302	return false;
303
304	// TypeXTYPE are 64 bit operations.
305	unsigned Type = HII.getType(MI: *Inst2.getInstr());
306	return Type == HexagonII::TypeS_2op \|\| Type == HexagonII::TypeS_3op \|\|
307	Type == HexagonII::TypeALU64 \|\| Type == HexagonII::TypeM;
308	}
309
310	void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
311	ScheduleDAGMI DAG = static_cast<ScheduleDAGMI>(DAGInstrs);
312	SUnit* LastSequentialCall = nullptr;
313	// Map from virtual register to physical register from the copy.
314	DenseMap<unsigned, unsigned> VRegHoldingReg;
315	// Map from the physical register to the instruction that uses virtual
316	// register. This is used to create the barrier edge.
317	DenseMap<unsigned, SUnit *> LastVRegUse;
318	auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo();
319	auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
320
321	// Currently we only catch the situation when compare gets scheduled
322	// before preceding call.
323	for (unsigned su = `0`, e = DAG->SUnits.size(); su != e; ++su) {
324	// Remember the call.
325	if (DAG->SUnits [su].getInstr()->isCall())
326	LastSequentialCall = &DAG->SUnits [su];
327	// Look for a compare that defines a predicate.
328	else if (DAG->SUnits [su].getInstr()->isCompare() && LastSequentialCall)
329	DAG->addEdge(SuccSU: &DAG->SUnits [su], PredDep: SDep (LastSequentialCall, SDep::Barrier));
330	// Look for call and tfri instructions.*
331	else if (SchedPredsCloser && LastSequentialCall && su > `1` && su < e-`1` &&
332	shouldTFRICallBind(HII, Inst1: DAG->SUnits [su], Inst2: DAG->SUnits [su+`1`]))
333	DAG->addEdge(SuccSU: &DAG->SUnits [su], PredDep: SDep (&DAG->SUnits [su-`1`], SDep::Barrier));
334	// Prevent redundant register copies due to reads and writes of physical
335	// registers. The original motivation for this was the code generated
336	// between two calls, which are caused both the return value and the
337	// argument for the next call being in %r0.
338	// Example:
339	// 1: <call1>
340	// 2: %vreg = COPY %r0
341	// 3: <use of %vreg>
342	// 4: %r0 = ...
343	// 5: <call2>
344	// The scheduler would often swap 3 and 4, so an additional register is
345	// needed. This code inserts a Barrier dependence between 3 & 4 to prevent
346	// this.
347	// The code below checks for all the physical registers, not just R0/D0/V0.
348	else if (SchedRetvalOptimization) {
349	const MachineInstr *MI = DAG->SUnits [su].getInstr();
350	if (MI->isCopy() && MI->getOperand(i: `1`).getReg().isPhysical()) {
351	// %vregX = COPY %r0
352	VRegHoldingReg [MI->getOperand(i: `0`).getReg()] = MI->getOperand(i: `1`).getReg();
353	LastVRegUse.erase(Val: MI->getOperand(i: `1`).getReg());
354	} else {
355	for (const MachineOperand &MO : MI->operands()) {
356	if (!MO.isReg())
357	continue;
358	if (MO.isUse() && !MI->isCopy() &&
359	VRegHoldingReg.count(Val: MO.getReg())) {
360	// <use of %vregX>
361	LastVRegUse [VRegHoldingReg [MO.getReg()]] = &DAG->SUnits [su];
362	} else if (MO.isDef() && MO.getReg().isPhysical()) {
363	for (MCRegAliasIterator AI(MO.getReg(), &TRI, true); AI.isValid();
364	++AI) {
365	if (LastVRegUse.count(Val: *AI) &&
366	LastVRegUse [*AI] != &DAG->SUnits [su])
367	// %r0 = ...
368	DAG->addEdge(SuccSU: &DAG->SUnits [su], PredDep: SDep (LastVRegUse [*AI], SDep::Barrier));
369	LastVRegUse.erase(Val: *AI);
370	}
371	}
372	}
373	}
374	}
375	}
376	}
377
378	void HexagonSubtarget::BankConflictMutation::apply(ScheduleDAGInstrs *DAG) {
379	if (!EnableCheckBankConflict)
380	return;
381
382	const auto &HII = static_cast<const HexagonInstrInfo&>(*DAG->TII);
383
384	// Create artificial edges between loads that could likely cause a bank
385	// conflict. Since such loads would normally not have any dependency
386	// between them, we cannot rely on existing edges.
387	for (unsigned i = `0`, e = DAG->SUnits.size(); i != e; ++i) {
388	SUnit &S0 = DAG->SUnits [i];
389	MachineInstr &L0 = *S0.getInstr();
390	if (!L0.mayLoad() \|\| L0.mayStore() \|\|
391	HII.getAddrMode(MI: L0) != HexagonII::BaseImmOffset)
392	continue;
393	int64_t Offset0;
394	LocationSize Size0 = `0`;
395	MachineOperand *BaseOp0 = HII.getBaseAndOffset(MI: L0, Offset&: Offset0, AccessSize&: Size0);
396	// Is the access size is longer than the L1 cache line, skip the check.
397	if (BaseOp0 == nullptr \|\| !BaseOp0->isReg() \|\| !Size0.hasValue() \|\|
398	Size0.getValue() >= `32`)
399	continue;
400	// Scan only up to 32 instructions ahead (to avoid n^2 complexity).
401	for (unsigned j = i+`1`, m = std::min(a: i+`32`, b: e); j != m; ++j) {
402	SUnit &S1 = DAG->SUnits [j];
403	MachineInstr &L1 = *S1.getInstr();
404	if (!L1.mayLoad() \|\| L1.mayStore() \|\|
405	HII.getAddrMode(MI: L1) != HexagonII::BaseImmOffset)
406	continue;
407	int64_t Offset1;
408	LocationSize Size1 = `0`;
409	MachineOperand *BaseOp1 = HII.getBaseAndOffset(MI: L1, Offset&: Offset1, AccessSize&: Size1);
410	if (BaseOp1 == nullptr \|\| !BaseOp1->isReg() \|\| !Size0.hasValue() \|\|
411	Size1.getValue() >= `32` \|\| BaseOp0->getReg() != BaseOp1->getReg())
412	continue;
413	// Check bits 3 and 4 of the offset: if they differ, a bank conflict
414	// is unlikely.
415	if (((Offset0 ^ Offset1) & `0x18`) != `0`)
416	continue;
417	// Bits 3 and 4 are the same, add an artificial edge and set extra
418	// latency.
419	SDep A(&S0, SDep::Artificial);
420	A.setLatency(`1`);
421	S1.addPred(D: A, Required: true);
422	}
423	}
424	}
425
426	/// Enable use of alias analysis during code generation (during MI
427	/// scheduling, DAGCombine, etc.).
428	bool HexagonSubtarget::useAA() const {
429	if (OptLevel != CodeGenOptLevel::None)
430	return true;
431	return false;
432	}
433
434	/// Perform target specific adjustments to the latency of a schedule
435	/// dependency.
436	void HexagonSubtarget::adjustSchedDependency(
437	SUnit Src, int* SrcOpIdx, SUnit Dst, int* DstOpIdx, SDep &Dep,
438	const TargetSchedModel SchedModel) const* {
439	if (!Src->isInstr() \|\| !Dst->isInstr())
440	return;
441
442	MachineInstr *SrcInst = Src->getInstr();
443	MachineInstr *DstInst = Dst->getInstr();
444	const HexagonInstrInfo *QII = getInstrInfo();
445
446	// Instructions with .new operands have zero latency.
447	SmallSet<SUnit *, `4`> ExclSrc;
448	SmallSet<SUnit *, `4`> ExclDst;
449	if (QII->canExecuteInBundle(First: SrcInst, Second: DstInst) &&
450	isBestZeroLatency(Src, Dst, TII: QII, ExclSrc, ExclDst)) {
451	Dep.setLatency(`0`);
452	return;
453	}
454
455	// Set the latency for a copy to zero since we hope that is will get
456	// removed.
457	if (DstInst->isCopy())
458	Dep.setLatency(`0`);
459
460	// If it's a REG_SEQUENCE/COPY, use its destination instruction to determine
461	// the correct latency.
462	// If there are multiple uses of the def of COPY/REG_SEQUENCE, set the latency
463	// only if the latencies on all the uses are equal, otherwise set it to
464	// default.
465	if ((DstInst->isRegSequence() \|\| DstInst->isCopy())) {
466	Register DReg = DstInst->getOperand(i: `0`).getReg();
467	std::optional<unsigned> DLatency;
468	for (const auto &DDep : Dst->Succs) {
469	MachineInstr *DDst = DDep.getSUnit()->getInstr();
470	int UseIdx = -`1`;
471	for (unsigned OpNum = `0`; OpNum < DDst->getNumOperands(); OpNum++) {
472	const MachineOperand &MO = DDst->getOperand(i: OpNum);
473	if (MO.isReg() && MO.getReg() && MO.isUse() && MO.getReg() == DReg) {
474	UseIdx = OpNum;
475	break;
476	}
477	}
478
479	if (UseIdx == -`1`)
480	continue;
481
482	std::optional<unsigned> Latency =
483	InstrInfo.getOperandLatency(ItinData: &InstrItins, DefMI: SrcInst, DefIdx: `0`, UseMI: DDst, UseIdx);
484
485	// Set DLatency for the first time.
486	if (!DLatency)
487	DLatency = Latency;
488
489	// For multiple uses, if the Latency is different across uses, reset
490	// DLatency.
491	if (DLatency != Latency) {
492	DLatency = std::nullopt;
493	break;
494	}
495	}
496	Dep.setLatency(DLatency ? *DLatency : `0`);
497	}
498
499	// Try to schedule uses near definitions to generate .cur.
500	ExclSrc.clear();
501	ExclDst.clear();
502	if (EnableDotCurSched && QII->isToBeScheduledASAP(MI1: SrcInst, MI2: DstInst) &&
503	isBestZeroLatency(Src, Dst, TII: QII, ExclSrc, ExclDst)) {
504	Dep.setLatency(`0`);
505	return;
506	}
507	int Latency = Dep.getLatency();
508	bool IsArtificial = Dep.isArtificial();
509	Latency = updateLatency(SrcInst&: SrcInst, DstInst&: DstInst, IsArtificial, Latency);
510	Dep.setLatency(Latency);
511	}
512
513	void HexagonSubtarget::getPostRAMutations(
514	std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
515	Mutations.push_back(x: std::make_unique<UsrOverflowMutation>());
516	Mutations.push_back(x: std::make_unique<HVXMemLatencyMutation>());
517	Mutations.push_back(x: std::make_unique<BankConflictMutation>());
518	}
519
520	void HexagonSubtarget::getSMSMutations(
521	std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
522	Mutations.push_back(x: std::make_unique<UsrOverflowMutation>());
523	Mutations.push_back(x: std::make_unique<HVXMemLatencyMutation>());
524	}
525
526	// Pin the vtable to this file.
527	void HexagonSubtarget::anchor() {}
528
529	bool HexagonSubtarget::enableMachineScheduler() const {
530	if (DisableHexagonMISched.getNumOccurrences())
531	return !DisableHexagonMISched;
532	return true;
533	}
534
535	bool HexagonSubtarget::usePredicatedCalls() const {
536	return EnablePredicatedCalls;
537	}
538
539	int HexagonSubtarget::updateLatency(MachineInstr &SrcInst,
540	MachineInstr &DstInst, bool IsArtificial,
541	int Latency) const {
542	if (IsArtificial)
543	return `1`;
544	if (!hasV60Ops())
545	return Latency;
546
547	auto &QII = static_cast<const HexagonInstrInfo &>(*getInstrInfo());
548	// BSB scheduling.
549	if (QII.isHVXVec(MI: SrcInst) \|\| useBSBScheduling())
550	Latency = (Latency + `1`) >> `1`;
551	return Latency;
552	}
553
554	void HexagonSubtarget::restoreLatency(SUnit Src, SUnit Dst) const {
555	MachineInstr *SrcI = Src->getInstr();
556	for (auto &I : Src->Succs) {
557	if (!I.isAssignedRegDep() \|\| I.getSUnit() != Dst)
558	continue;
559	Register DepR = I.getReg();
560	int DefIdx = -`1`;
561	for (unsigned OpNum = `0`; OpNum < SrcI->getNumOperands(); OpNum++) {
562	const MachineOperand &MO = SrcI->getOperand(i: OpNum);
563	bool IsSameOrSubReg = false;
564	if (MO.isReg()) {
565	Register MOReg = MO.getReg();
566	if (DepR.isVirtual()) {
567	IsSameOrSubReg = (MOReg == DepR);
568	} else {
569	IsSameOrSubReg = getRegisterInfo()->isSubRegisterEq(RegA: DepR, RegB: MOReg);
570	}
571	if (MO.isDef() && IsSameOrSubReg)
572	DefIdx = OpNum;
573	}
574	}
575	assert(DefIdx >= `0` && "Def Reg not found in Src MI");
576	MachineInstr *DstI = Dst->getInstr();
577	SDep T = I;
578	for (unsigned OpNum = `0`; OpNum < DstI->getNumOperands(); OpNum++) {
579	const MachineOperand &MO = DstI->getOperand(i: OpNum);
580	if (MO.isReg() && MO.isUse() && MO.getReg() == DepR) {
581	std::optional<unsigned> Latency = InstrInfo.getOperandLatency(
582	ItinData: &InstrItins, DefMI: SrcI, DefIdx, UseMI: DstI, UseIdx: OpNum);
583
584	// For some instructions (ex: COPY), we might end up with < 0 latency
585	// as they don't have any Itinerary class associated with them.
586	if (!Latency)
587	Latency = `0`;
588	bool IsArtificial = I.isArtificial();
589	Latency = updateLatency(SrcInst&: SrcI, DstInst&: DstI, IsArtificial, Latency: *Latency);
590	I.setLatency(*Latency);
591	}
592	}
593
594	// Update the latency of opposite edge too.
595	T.setSUnit(Src);
596	auto F = find(Range&: Dst->Preds, Val: T);
597	assert(F != Dst->Preds.end());
598	F->setLatency(I.getLatency());
599	}
600	}
601
602	/// Change the latency between the two SUnits.
603	void HexagonSubtarget::changeLatency(SUnit Src, SUnit Dst, unsigned Lat)
604	const {
605	for (auto &I : Src->Succs) {
606	if (!I.isAssignedRegDep() \|\| I.getSUnit() != Dst)
607	continue;
608	SDep T = I;
609	I.setLatency(Lat);
610
611	// Update the latency of opposite edge too.
612	T.setSUnit(Src);
613	auto F = find(Range&: Dst->Preds, Val: T);
614	assert(F != Dst->Preds.end());
615	F->setLatency(Lat);
616	}
617	}
618
619	/// If the SUnit has a zero latency edge, return the other SUnit.
620	static SUnit getZeroLatency(SUnit N, SmallVector<SDep, `4`> &Deps) {
621	for (auto &I : Deps)
622	if (I.isAssignedRegDep() && I.getLatency() == `0` &&
623	!I.getSUnit()->getInstr()->isPseudo())
624	return I.getSUnit();
625	return nullptr;
626	}
627
628	// Return true if these are the best two instructions to schedule
629	// together with a zero latency. Only one dependence should have a zero
630	// latency. If there are multiple choices, choose the best, and change
631	// the others, if needed.
632	bool HexagonSubtarget::isBestZeroLatency(SUnit Src, SUnit Dst,
633	const HexagonInstrInfo TII, SmallSet<SUnit, `4`> &ExclSrc,
634	SmallSet<SUnit, `4`> &ExclDst) const* {
635	MachineInstr &SrcInst = *Src->getInstr();
636	MachineInstr &DstInst = *Dst->getInstr();
637
638	// Ignore Boundary SU nodes as these have null instructions.
639	if (Dst->isBoundaryNode())
640	return false;
641
642	if (SrcInst.isPHI() \|\| DstInst.isPHI())
643	return false;
644
645	if (!TII->isToBeScheduledASAP(MI1: SrcInst, MI2: DstInst) &&
646	!TII->canExecuteInBundle(First: SrcInst, Second: DstInst))
647	return false;
648
649	// The architecture doesn't allow three dependent instructions in the same
650	// packet. So, if the destination has a zero latency successor, then it's
651	// not a candidate for a zero latency predecessor.
652	if (getZeroLatency(N: Dst, Deps&: Dst->Succs) != nullptr)
653	return false;
654
655	// Check if the Dst instruction is the best candidate first.
656	SUnit Best = nullptr*;
657	SUnit DstBest = nullptr*;
658	SUnit *SrcBest = getZeroLatency(N: Dst, Deps&: Dst->Preds);
659	if (SrcBest == nullptr \|\| Src->NodeNum >= SrcBest->NodeNum) {
660	// Check that Src doesn't have a better candidate.
661	DstBest = getZeroLatency(N: Src, Deps&: Src->Succs);
662	if (DstBest == nullptr \|\| Dst->NodeNum <= DstBest->NodeNum)
663	Best = Dst;
664	}
665	if (Best != Dst)
666	return false;
667
668	// The caller frequently adds the same dependence twice. If so, then
669	// return true for this case too.
670	if ((Src == SrcBest && Dst == DstBest ) \|\|
671	(SrcBest == nullptr && Dst == DstBest) \|\|
672	(Src == SrcBest && Dst == nullptr))
673	return true;
674
675	// Reassign the latency for the previous bests, which requires setting
676	// the dependence edge in both directions.
677	if (SrcBest != nullptr) {
678	if (!hasV60Ops())
679	changeLatency(Src: SrcBest, Dst, Lat: `1`);
680	else
681	restoreLatency(Src: SrcBest, Dst);
682	}
683	if (DstBest != nullptr) {
684	if (!hasV60Ops())
685	changeLatency(Src, Dst: DstBest, Lat: `1`);
686	else
687	restoreLatency(Src, Dst: DstBest);
688	}
689
690	// Attempt to find another opprotunity for zero latency in a different
691	// dependence.
692	if (SrcBest && DstBest)
693	// If there is an edge from SrcBest to DstBst, then try to change that
694	// to 0 now.
695	changeLatency(Src: SrcBest, Dst: DstBest, Lat: `0`);
696	else if (DstBest) {
697	// Check if the previous best destination instruction has a new zero
698	// latency dependence opportunity.
699	ExclSrc.insert(Ptr: Src);
700	for (auto &I : DstBest->Preds)
701	if (ExclSrc.count(Ptr: I.getSUnit()) == `0` &&
702	isBestZeroLatency(Src: I.getSUnit(), Dst: DstBest, TII, ExclSrc, ExclDst))
703	changeLatency(Src: I.getSUnit(), Dst: DstBest, Lat: `0`);
704	} else if (SrcBest) {
705	// Check if previous best source instruction has a new zero latency
706	// dependence opportunity.
707	ExclDst.insert(Ptr: Dst);
708	for (auto &I : SrcBest->Succs)
709	if (ExclDst.count(Ptr: I.getSUnit()) == `0` &&
710	isBestZeroLatency(Src: SrcBest, Dst: I.getSUnit(), TII, ExclSrc, ExclDst))
711	changeLatency(Src: SrcBest, Dst: I.getSUnit(), Lat: `0`);
712	}
713
714	return true;
715	}
716
717	unsigned HexagonSubtarget::getL1CacheLineSize() const {
718	return `32`;
719	}
720
721	unsigned HexagonSubtarget::getL1PrefetchDistance() const {
722	return `32`;
723	}
724
725	bool HexagonSubtarget::enableSubRegLiveness() const { return true; }
726
727	Intrinsic::ID HexagonSubtarget::getIntrinsicId(unsigned Opc) const {
728	struct Scalar {
729	unsigned Opcode;
730	Intrinsic::ID IntId;
731	};
732	struct Hvx {
733	unsigned Opcode;
734	Intrinsic::ID Int64Id, Int128Id;
735	};
736
737	static Scalar ScalarInts[] = {
738	#define GET_SCALAR_INTRINSICS
739	#include "HexagonDepInstrIntrinsics.inc"
740	#undef GET_SCALAR_INTRINSICS
741	};
742
743	static Hvx HvxInts[] = {
744	#define GET_HVX_INTRINSICS
745	#include "HexagonDepInstrIntrinsics.inc"
746	#undef GET_HVX_INTRINSICS
747	};
748
749	const auto CmpOpcode = [](auto A, auto B) { return A.Opcode < B.Opcode; };
750	[[maybe_unused]] static bool SortedScalar =
751	(llvm::sort(C&: ScalarInts, Comp: CmpOpcode), true);
752	[[maybe_unused]] static bool SortedHvx =
753	(llvm::sort(C&: HvxInts, Comp: CmpOpcode), true);
754
755	auto [BS, ES] = std::make_pair(x: std::begin(arr&: ScalarInts), y: std::end(arr&: ScalarInts));
756	auto [BH, EH] = std::make_pair(x: std::begin(arr&: HvxInts), y: std::end(arr&: HvxInts));
757
758	auto FoundScalar = std::lower_bound(first: BS, last: ES, val: Scalar{.Opcode: Opc, .IntId: `0`}, comp: CmpOpcode);
759	if (FoundScalar != ES && FoundScalar->Opcode == Opc)
760	return FoundScalar->IntId;
761
762	auto FoundHvx = std::lower_bound(first: BH, last: EH, val: Hvx{.Opcode: Opc, .Int64Id: `0`, .Int128Id: `0`}, comp: CmpOpcode);
763	if (FoundHvx != EH && FoundHvx->Opcode == Opc) {
764	unsigned HwLen = getVectorLength();
765	if (HwLen == `64`)
766	return FoundHvx->Int64Id;
767	if (HwLen == `128`)
768	return FoundHvx->Int128Id;
769	}
770
771	std::string error = "Invalid opcode (" + std::to_string(val: Opc) + ")";
772	llvm_unreachable(error.c_str());
773	return `0`;
774	}
775

Browse the source code of llvm_projects/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp