1 | //===- ARMLatencyMutations.cpp - ARM Latency Mutations --------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file This file contains the ARM definition DAG scheduling mutations which |
10 | /// change inter-instruction latencies |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "ARMLatencyMutations.h" |
15 | #include "ARMSubtarget.h" |
16 | #include "Thumb2InstrInfo.h" |
17 | #include "llvm/Analysis/AliasAnalysis.h" |
18 | #include "llvm/CodeGen/ScheduleDAG.h" |
19 | #include "llvm/CodeGen/ScheduleDAGMutation.h" |
20 | #include "llvm/CodeGen/TargetInstrInfo.h" |
21 | #include <algorithm> |
22 | #include <array> |
23 | #include <initializer_list> |
24 | #include <memory> |
25 | |
26 | namespace llvm { |
27 | |
28 | namespace { |
29 | |
30 | // Precompute information about opcodes to speed up pass |
31 | |
32 | class InstructionInformation { |
33 | protected: |
34 | struct IInfo { |
35 | bool HasBRegAddr : 1; // B-side of addr gen is a register |
36 | bool HasBRegAddrShift : 1; // B-side of addr gen has a shift |
37 | bool IsDivide : 1; // Some form of integer divide |
38 | bool IsInlineShiftALU : 1; // Inline shift+ALU |
39 | bool IsMultiply : 1; // Some form of integer multiply |
40 | bool IsMVEIntMAC : 1; // MVE 8/16/32-bit integer MAC operation |
41 | bool IsNonSubwordLoad : 1; // Load which is a word or larger |
42 | bool IsShift : 1; // Shift operation |
43 | bool IsRev : 1; // REV operation |
44 | bool ProducesQP : 1; // Produces a vector register result |
45 | bool ProducesDP : 1; // Produces a double-precision register result |
46 | bool ProducesSP : 1; // Produces a single-precision register result |
47 | bool ConsumesQP : 1; // Consumes a vector register result |
48 | bool ConsumesDP : 1; // Consumes a double-precision register result |
49 | bool ConsumesSP : 1; // Consumes a single-precision register result |
50 | unsigned MVEIntMACMatched; // Matched operand type (for MVE) |
51 | unsigned AddressOpMask; // Mask indicating which operands go into AGU |
52 | IInfo() |
53 | : HasBRegAddr(false), HasBRegAddrShift(false), IsDivide(false), |
54 | IsInlineShiftALU(false), IsMultiply(false), IsMVEIntMAC(false), |
55 | IsNonSubwordLoad(false), IsShift(false), IsRev(false), |
56 | ProducesQP(false), ProducesDP(false), ProducesSP(false), |
57 | ConsumesQP(false), ConsumesDP(false), ConsumesSP(false), |
58 | MVEIntMACMatched(0), AddressOpMask(0) {} |
59 | }; |
60 | typedef std::array<IInfo, ARM::INSTRUCTION_LIST_END> IInfoArray; |
61 | IInfoArray Info; |
62 | |
63 | public: |
64 | // Always available information |
65 | unsigned getAddressOpMask(unsigned Op) { return Info[Op].AddressOpMask; } |
66 | bool hasBRegAddr(unsigned Op) { return Info[Op].HasBRegAddr; } |
67 | bool hasBRegAddrShift(unsigned Op) { return Info[Op].HasBRegAddrShift; } |
68 | bool isDivide(unsigned Op) { return Info[Op].IsDivide; } |
69 | bool isInlineShiftALU(unsigned Op) { return Info[Op].IsInlineShiftALU; } |
70 | bool isMultiply(unsigned Op) { return Info[Op].IsMultiply; } |
71 | bool isMVEIntMAC(unsigned Op) { return Info[Op].IsMVEIntMAC; } |
72 | bool isNonSubwordLoad(unsigned Op) { return Info[Op].IsNonSubwordLoad; } |
73 | bool isRev(unsigned Op) { return Info[Op].IsRev; } |
74 | bool isShift(unsigned Op) { return Info[Op].IsShift; } |
75 | |
76 | // information available if markDPConsumers is called. |
77 | bool producesQP(unsigned Op) { return Info[Op].ProducesQP; } |
78 | bool producesDP(unsigned Op) { return Info[Op].ProducesDP; } |
79 | bool producesSP(unsigned Op) { return Info[Op].ProducesSP; } |
80 | bool consumesQP(unsigned Op) { return Info[Op].ConsumesQP; } |
81 | bool consumesDP(unsigned Op) { return Info[Op].ConsumesDP; } |
82 | bool consumesSP(unsigned Op) { return Info[Op].ConsumesSP; } |
83 | |
84 | bool isMVEIntMACMatched(unsigned SrcOp, unsigned DstOp) { |
85 | return SrcOp == DstOp || Info[DstOp].MVEIntMACMatched == SrcOp; |
86 | } |
87 | |
88 | InstructionInformation(const ARMBaseInstrInfo *TII); |
89 | |
90 | protected: |
91 | void markDPProducersConsumers(const ARMBaseInstrInfo *TII); |
92 | }; |
93 | |
94 | InstructionInformation::InstructionInformation(const ARMBaseInstrInfo *TII) { |
95 | using namespace ARM; |
96 | |
97 | std::initializer_list<unsigned> hasBRegAddrList = { |
98 | t2LDRs, t2LDRBs, t2LDRHs, t2STRs, t2STRBs, t2STRHs, |
99 | tLDRr, tLDRBr, tLDRHr, tSTRr, tSTRBr, tSTRHr, |
100 | }; |
101 | for (auto op : hasBRegAddrList) { |
102 | Info[op].HasBRegAddr = true; |
103 | } |
104 | |
105 | std::initializer_list<unsigned> hasBRegAddrShiftList = { |
106 | t2LDRs, t2LDRBs, t2LDRHs, t2STRs, t2STRBs, t2STRHs, |
107 | }; |
108 | for (auto op : hasBRegAddrShiftList) { |
109 | Info[op].HasBRegAddrShift = true; |
110 | } |
111 | |
112 | Info[t2SDIV].IsDivide = Info[t2UDIV].IsDivide = true; |
113 | |
114 | std::initializer_list<unsigned> isInlineShiftALUList = { |
115 | t2ADCrs, t2ADDSrs, t2ADDrs, t2BICrs, t2EORrs, |
116 | t2ORNrs, t2RSBSrs, t2RSBrs, t2SBCrs, t2SUBrs, |
117 | t2SUBSrs, t2CMPrs, t2CMNzrs, t2TEQrs, t2TSTrs, |
118 | }; |
119 | for (auto op : isInlineShiftALUList) { |
120 | Info[op].IsInlineShiftALU = true; |
121 | } |
122 | |
123 | Info[t2SDIV].IsDivide = Info[t2UDIV].IsDivide = true; |
124 | |
125 | std::initializer_list<unsigned> isMultiplyList = { |
126 | t2MUL, t2MLA, t2MLS, t2SMLABB, t2SMLABT, t2SMLAD, t2SMLADX, |
127 | t2SMLAL, t2SMLALBB, t2SMLALBT, t2SMLALD, t2SMLALDX, t2SMLALTB, t2SMLALTT, |
128 | t2SMLATB, t2SMLATT, t2SMLAWT, t2SMLSD, t2SMLSDX, t2SMLSLD, t2SMLSLDX, |
129 | t2SMMLA, t2SMMLAR, t2SMMLS, t2SMMLSR, t2SMMUL, t2SMMULR, t2SMUAD, |
130 | t2SMUADX, t2SMULBB, t2SMULBT, t2SMULL, t2SMULTB, t2SMULTT, t2SMULWT, |
131 | t2SMUSD, t2SMUSDX, t2UMAAL, t2UMLAL, t2UMULL, tMUL, |
132 | }; |
133 | for (auto op : isMultiplyList) { |
134 | Info[op].IsMultiply = true; |
135 | } |
136 | |
137 | std::initializer_list<unsigned> isMVEIntMACList = { |
138 | MVE_VMLAS_qr_i16, MVE_VMLAS_qr_i32, MVE_VMLAS_qr_i8, |
139 | MVE_VMLA_qr_i16, MVE_VMLA_qr_i32, MVE_VMLA_qr_i8, |
140 | MVE_VQDMLAH_qrs16, MVE_VQDMLAH_qrs32, MVE_VQDMLAH_qrs8, |
141 | MVE_VQDMLASH_qrs16, MVE_VQDMLASH_qrs32, MVE_VQDMLASH_qrs8, |
142 | MVE_VQRDMLAH_qrs16, MVE_VQRDMLAH_qrs32, MVE_VQRDMLAH_qrs8, |
143 | MVE_VQRDMLASH_qrs16, MVE_VQRDMLASH_qrs32, MVE_VQRDMLASH_qrs8, |
144 | MVE_VQDMLADHXs16, MVE_VQDMLADHXs32, MVE_VQDMLADHXs8, |
145 | MVE_VQDMLADHs16, MVE_VQDMLADHs32, MVE_VQDMLADHs8, |
146 | MVE_VQDMLSDHXs16, MVE_VQDMLSDHXs32, MVE_VQDMLSDHXs8, |
147 | MVE_VQDMLSDHs16, MVE_VQDMLSDHs32, MVE_VQDMLSDHs8, |
148 | MVE_VQRDMLADHXs16, MVE_VQRDMLADHXs32, MVE_VQRDMLADHXs8, |
149 | MVE_VQRDMLADHs16, MVE_VQRDMLADHs32, MVE_VQRDMLADHs8, |
150 | MVE_VQRDMLSDHXs16, MVE_VQRDMLSDHXs32, MVE_VQRDMLSDHXs8, |
151 | MVE_VQRDMLSDHs16, MVE_VQRDMLSDHs32, MVE_VQRDMLSDHs8, |
152 | }; |
153 | for (auto op : isMVEIntMACList) { |
154 | Info[op].IsMVEIntMAC = true; |
155 | } |
156 | |
157 | std::initializer_list<unsigned> isNonSubwordLoadList = { |
158 | t2LDRi12, t2LDRi8, t2LDR_POST, t2LDR_PRE, t2LDRpci, |
159 | t2LDRs, t2LDRDi8, t2LDRD_POST, t2LDRD_PRE, tLDRi, |
160 | tLDRpci, tLDRr, tLDRspi, |
161 | }; |
162 | for (auto op : isNonSubwordLoadList) { |
163 | Info[op].IsNonSubwordLoad = true; |
164 | } |
165 | |
166 | std::initializer_list<unsigned> isRevList = { |
167 | t2REV, t2REV16, t2REVSH, t2RBIT, tREV, tREV16, tREVSH, |
168 | }; |
169 | for (auto op : isRevList) { |
170 | Info[op].IsRev = true; |
171 | } |
172 | |
173 | std::initializer_list<unsigned> isShiftList = { |
174 | t2ASRri, t2ASRrr, t2LSLri, t2LSLrr, t2LSRri, t2LSRrr, t2RORri, t2RORrr, |
175 | tASRri, tASRrr, tLSLSri, tLSLri, tLSLrr, tLSRri, tLSRrr, tROR, |
176 | }; |
177 | for (auto op : isShiftList) { |
178 | Info[op].IsShift = true; |
179 | } |
180 | |
181 | std::initializer_list<unsigned> Address1List = { |
182 | t2LDRBi12, |
183 | t2LDRBi8, |
184 | t2LDRBpci, |
185 | t2LDRBs, |
186 | t2LDRHi12, |
187 | t2LDRHi8, |
188 | t2LDRHpci, |
189 | t2LDRHs, |
190 | t2LDRSBi12, |
191 | t2LDRSBi8, |
192 | t2LDRSBpci, |
193 | t2LDRSBs, |
194 | t2LDRSHi12, |
195 | t2LDRSHi8, |
196 | t2LDRSHpci, |
197 | t2LDRSHs, |
198 | t2LDRi12, |
199 | t2LDRi8, |
200 | t2LDRpci, |
201 | t2LDRs, |
202 | tLDRBi, |
203 | tLDRBr, |
204 | tLDRHi, |
205 | tLDRHr, |
206 | tLDRSB, |
207 | tLDRSH, |
208 | tLDRi, |
209 | tLDRpci, |
210 | tLDRr, |
211 | tLDRspi, |
212 | t2STRBi12, |
213 | t2STRBi8, |
214 | t2STRBs, |
215 | t2STRHi12, |
216 | t2STRHi8, |
217 | t2STRHs, |
218 | t2STRi12, |
219 | t2STRi8, |
220 | t2STRs, |
221 | tSTRBi, |
222 | tSTRBr, |
223 | tSTRHi, |
224 | tSTRHr, |
225 | tSTRi, |
226 | tSTRr, |
227 | tSTRspi, |
228 | VLDRD, |
229 | VLDRH, |
230 | VLDRS, |
231 | VSTRD, |
232 | VSTRH, |
233 | VSTRS, |
234 | MVE_VLD20_16, |
235 | MVE_VLD20_32, |
236 | MVE_VLD20_8, |
237 | MVE_VLD21_16, |
238 | MVE_VLD21_32, |
239 | MVE_VLD21_8, |
240 | MVE_VLD40_16, |
241 | MVE_VLD40_32, |
242 | MVE_VLD40_8, |
243 | MVE_VLD41_16, |
244 | MVE_VLD41_32, |
245 | MVE_VLD41_8, |
246 | MVE_VLD42_16, |
247 | MVE_VLD42_32, |
248 | MVE_VLD42_8, |
249 | MVE_VLD43_16, |
250 | MVE_VLD43_32, |
251 | MVE_VLD43_8, |
252 | MVE_VLDRBS16, |
253 | MVE_VLDRBS16_rq, |
254 | MVE_VLDRBS32, |
255 | MVE_VLDRBS32_rq, |
256 | MVE_VLDRBU16, |
257 | MVE_VLDRBU16_rq, |
258 | MVE_VLDRBU32, |
259 | MVE_VLDRBU32_rq, |
260 | MVE_VLDRBU8, |
261 | MVE_VLDRBU8_rq, |
262 | MVE_VLDRDU64_qi, |
263 | MVE_VLDRDU64_rq, |
264 | MVE_VLDRDU64_rq_u, |
265 | MVE_VLDRHS32, |
266 | MVE_VLDRHS32_rq, |
267 | MVE_VLDRHS32_rq_u, |
268 | MVE_VLDRHU16, |
269 | MVE_VLDRHU16_rq, |
270 | MVE_VLDRHU16_rq_u, |
271 | MVE_VLDRHU32, |
272 | MVE_VLDRHU32_rq, |
273 | MVE_VLDRHU32_rq_u, |
274 | MVE_VLDRWU32, |
275 | MVE_VLDRWU32_qi, |
276 | MVE_VLDRWU32_rq, |
277 | MVE_VLDRWU32_rq_u, |
278 | MVE_VST20_16, |
279 | MVE_VST20_32, |
280 | MVE_VST20_8, |
281 | MVE_VST21_16, |
282 | MVE_VST21_32, |
283 | MVE_VST21_8, |
284 | MVE_VST40_16, |
285 | MVE_VST40_32, |
286 | MVE_VST40_8, |
287 | MVE_VST41_16, |
288 | MVE_VST41_32, |
289 | MVE_VST41_8, |
290 | MVE_VST42_16, |
291 | MVE_VST42_32, |
292 | MVE_VST42_8, |
293 | MVE_VST43_16, |
294 | MVE_VST43_32, |
295 | MVE_VST43_8, |
296 | MVE_VSTRB16, |
297 | MVE_VSTRB16_rq, |
298 | MVE_VSTRB32, |
299 | MVE_VSTRB32_rq, |
300 | MVE_VSTRBU8, |
301 | MVE_VSTRB8_rq, |
302 | MVE_VSTRD64_qi, |
303 | MVE_VSTRD64_rq, |
304 | MVE_VSTRD64_rq_u, |
305 | MVE_VSTRH32, |
306 | MVE_VSTRH32_rq, |
307 | MVE_VSTRH32_rq_u, |
308 | MVE_VSTRHU16, |
309 | MVE_VSTRH16_rq, |
310 | MVE_VSTRH16_rq_u, |
311 | MVE_VSTRWU32, |
312 | MVE_VSTRW32_qi, |
313 | MVE_VSTRW32_rq, |
314 | MVE_VSTRW32_rq_u, |
315 | }; |
316 | std::initializer_list<unsigned> Address2List = { |
317 | t2LDRB_POST, |
318 | t2LDRB_PRE, |
319 | t2LDRDi8, |
320 | t2LDRH_POST, |
321 | t2LDRH_PRE, |
322 | t2LDRSB_POST, |
323 | t2LDRSB_PRE, |
324 | t2LDRSH_POST, |
325 | t2LDRSH_PRE, |
326 | t2LDR_POST, |
327 | t2LDR_PRE, |
328 | t2STRB_POST, |
329 | t2STRB_PRE, |
330 | t2STRDi8, |
331 | t2STRH_POST, |
332 | t2STRH_PRE, |
333 | t2STR_POST, |
334 | t2STR_PRE, |
335 | MVE_VLD20_16_wb, |
336 | MVE_VLD20_32_wb, |
337 | MVE_VLD20_8_wb, |
338 | MVE_VLD21_16_wb, |
339 | MVE_VLD21_32_wb, |
340 | MVE_VLD21_8_wb, |
341 | MVE_VLD40_16_wb, |
342 | MVE_VLD40_32_wb, |
343 | MVE_VLD40_8_wb, |
344 | MVE_VLD41_16_wb, |
345 | MVE_VLD41_32_wb, |
346 | MVE_VLD41_8_wb, |
347 | MVE_VLD42_16_wb, |
348 | MVE_VLD42_32_wb, |
349 | MVE_VLD42_8_wb, |
350 | MVE_VLD43_16_wb, |
351 | MVE_VLD43_32_wb, |
352 | MVE_VLD43_8_wb, |
353 | MVE_VLDRBS16_post, |
354 | MVE_VLDRBS16_pre, |
355 | MVE_VLDRBS32_post, |
356 | MVE_VLDRBS32_pre, |
357 | MVE_VLDRBU16_post, |
358 | MVE_VLDRBU16_pre, |
359 | MVE_VLDRBU32_post, |
360 | MVE_VLDRBU32_pre, |
361 | MVE_VLDRBU8_post, |
362 | MVE_VLDRBU8_pre, |
363 | MVE_VLDRDU64_qi_pre, |
364 | MVE_VLDRHS32_post, |
365 | MVE_VLDRHS32_pre, |
366 | MVE_VLDRHU16_post, |
367 | MVE_VLDRHU16_pre, |
368 | MVE_VLDRHU32_post, |
369 | MVE_VLDRHU32_pre, |
370 | MVE_VLDRWU32_post, |
371 | MVE_VLDRWU32_pre, |
372 | MVE_VLDRWU32_qi_pre, |
373 | MVE_VST20_16_wb, |
374 | MVE_VST20_32_wb, |
375 | MVE_VST20_8_wb, |
376 | MVE_VST21_16_wb, |
377 | MVE_VST21_32_wb, |
378 | MVE_VST21_8_wb, |
379 | MVE_VST40_16_wb, |
380 | MVE_VST40_32_wb, |
381 | MVE_VST40_8_wb, |
382 | MVE_VST41_16_wb, |
383 | MVE_VST41_32_wb, |
384 | MVE_VST41_8_wb, |
385 | MVE_VST42_16_wb, |
386 | MVE_VST42_32_wb, |
387 | MVE_VST42_8_wb, |
388 | MVE_VST43_16_wb, |
389 | MVE_VST43_32_wb, |
390 | MVE_VST43_8_wb, |
391 | MVE_VSTRB16_post, |
392 | MVE_VSTRB16_pre, |
393 | MVE_VSTRB32_post, |
394 | MVE_VSTRB32_pre, |
395 | MVE_VSTRBU8_post, |
396 | MVE_VSTRBU8_pre, |
397 | MVE_VSTRD64_qi_pre, |
398 | MVE_VSTRH32_post, |
399 | MVE_VSTRH32_pre, |
400 | MVE_VSTRHU16_post, |
401 | MVE_VSTRHU16_pre, |
402 | MVE_VSTRWU32_post, |
403 | MVE_VSTRWU32_pre, |
404 | MVE_VSTRW32_qi_pre, |
405 | }; |
406 | std::initializer_list<unsigned> Address3List = { |
407 | t2LDRD_POST, |
408 | t2LDRD_PRE, |
409 | t2STRD_POST, |
410 | t2STRD_PRE, |
411 | }; |
412 | // Compute a mask of which operands are involved in address computation |
413 | for (auto &op : Address1List) { |
414 | Info[op].AddressOpMask = 0x6; |
415 | } |
416 | for (auto &op : Address2List) { |
417 | Info[op].AddressOpMask = 0xc; |
418 | } |
419 | for (auto &op : Address3List) { |
420 | Info[op].AddressOpMask = 0x18; |
421 | } |
422 | for (auto &op : hasBRegAddrShiftList) { |
423 | Info[op].AddressOpMask |= 0x8; |
424 | } |
425 | } |
426 | |
427 | void InstructionInformation::markDPProducersConsumers( |
428 | const ARMBaseInstrInfo *TII) { |
429 | // Learn about all instructions which have FP source/dest registers |
430 | for (unsigned MI = 0; MI < ARM::INSTRUCTION_LIST_END; ++MI) { |
431 | const MCInstrDesc &MID = TII->get(Opcode: MI); |
432 | auto Operands = MID.operands(); |
433 | for (unsigned OI = 0, OIE = MID.getNumOperands(); OI != OIE; ++OI) { |
434 | bool MarkQP = false, MarkDP = false, MarkSP = false; |
435 | switch (Operands[OI].RegClass) { |
436 | case ARM::MQPRRegClassID: |
437 | case ARM::DPRRegClassID: |
438 | case ARM::DPR_8RegClassID: |
439 | case ARM::DPR_VFP2RegClassID: |
440 | case ARM::DPairRegClassID: |
441 | case ARM::DPairSpcRegClassID: |
442 | case ARM::DQuadRegClassID: |
443 | case ARM::DQuadSpcRegClassID: |
444 | case ARM::DTripleRegClassID: |
445 | case ARM::DTripleSpcRegClassID: |
446 | MarkDP = true; |
447 | break; |
448 | case ARM::QPRRegClassID: |
449 | case ARM::QPR_8RegClassID: |
450 | case ARM::QPR_VFP2RegClassID: |
451 | case ARM::QQPRRegClassID: |
452 | case ARM::QQQQPRRegClassID: |
453 | MarkQP = true; |
454 | break; |
455 | case ARM::SPRRegClassID: |
456 | case ARM::SPR_8RegClassID: |
457 | case ARM::FPWithVPRRegClassID: |
458 | MarkSP = true; |
459 | break; |
460 | default: |
461 | break; |
462 | } |
463 | if (MarkQP) { |
464 | if (OI < MID.getNumDefs()) |
465 | Info[MI].ProducesQP = true; |
466 | else |
467 | Info[MI].ConsumesQP = true; |
468 | } |
469 | if (MarkDP) { |
470 | if (OI < MID.getNumDefs()) |
471 | Info[MI].ProducesDP = true; |
472 | else |
473 | Info[MI].ConsumesDP = true; |
474 | } |
475 | if (MarkSP) { |
476 | if (OI < MID.getNumDefs()) |
477 | Info[MI].ProducesSP = true; |
478 | else |
479 | Info[MI].ConsumesSP = true; |
480 | } |
481 | } |
482 | } |
483 | } |
484 | |
485 | } // anonymous namespace |
486 | |
487 | static bool hasImplicitCPSRUse(const MachineInstr *MI) { |
488 | return MI->getDesc().hasImplicitUseOfPhysReg(Reg: ARM::CPSR); |
489 | } |
490 | |
491 | void ARMOverrideBypasses::setBidirLatencies(SUnit &SrcSU, SDep &SrcDep, |
492 | unsigned latency) { |
493 | SDep Reverse = SrcDep; |
494 | Reverse.setSUnit(&SrcSU); |
495 | for (SDep &PDep : SrcDep.getSUnit()->Preds) { |
496 | if (PDep == Reverse) { |
497 | PDep.setLatency(latency); |
498 | SrcDep.getSUnit()->setDepthDirty(); |
499 | break; |
500 | } |
501 | } |
502 | SrcDep.setLatency(latency); |
503 | SrcSU.setHeightDirty(); |
504 | } |
505 | |
506 | static bool mismatchedPred(ARMCC::CondCodes a, ARMCC::CondCodes b) { |
507 | return (a & 0xe) != (b & 0xe); |
508 | } |
509 | |
510 | // Set output dependences to zero latency for processors which can |
511 | // simultaneously issue to the same register. Returns true if a change |
512 | // was made. |
513 | bool ARMOverrideBypasses::zeroOutputDependences(SUnit &ISU, SDep &Dep) { |
514 | if (Dep.getKind() == SDep::Output) { |
515 | setBidirLatencies(SrcSU&: ISU, SrcDep&: Dep, latency: 0); |
516 | return true; |
517 | } |
518 | return false; |
519 | } |
520 | |
521 | // The graph doesn't look inside of bundles to determine their |
522 | // scheduling boundaries and reports zero latency into and out of them |
523 | // (except for CPSR into the bundle, which has latency 1). |
524 | // Make some better scheduling assumptions: |
525 | // 1) CPSR uses have zero latency; other uses have incoming latency 1 |
526 | // 2) CPSR defs retain a latency of zero; others have a latency of 1. |
527 | // |
528 | // Returns 1 if a use change was made; 2 if a def change was made; 0 otherwise |
529 | unsigned ARMOverrideBypasses::makeBundleAssumptions(SUnit &ISU, SDep &Dep) { |
530 | |
531 | SUnit &DepSU = *Dep.getSUnit(); |
532 | const MachineInstr *SrcMI = ISU.getInstr(); |
533 | unsigned SrcOpcode = SrcMI->getOpcode(); |
534 | const MachineInstr *DstMI = DepSU.getInstr(); |
535 | unsigned DstOpcode = DstMI->getOpcode(); |
536 | |
537 | if (DstOpcode == ARM::BUNDLE && TII->isPredicated(MI: *DstMI)) { |
538 | setBidirLatencies( |
539 | SrcSU&: ISU, SrcDep&: Dep, |
540 | latency: (Dep.isAssignedRegDep() && Dep.getReg() == ARM::CPSR) ? 0 : 1); |
541 | return 1; |
542 | } |
543 | if (SrcOpcode == ARM::BUNDLE && TII->isPredicated(MI: *SrcMI) && |
544 | Dep.isAssignedRegDep() && Dep.getReg() != ARM::CPSR) { |
545 | setBidirLatencies(SrcSU&: ISU, SrcDep&: Dep, latency: 1); |
546 | return 2; |
547 | } |
548 | return 0; |
549 | } |
550 | |
551 | // Determine whether there is a memory RAW hazard here and set up latency |
552 | // accordingly |
553 | bool ARMOverrideBypasses::memoryRAWHazard(SUnit &ISU, SDep &Dep, |
554 | unsigned latency) { |
555 | if (!Dep.isNormalMemory()) |
556 | return false; |
557 | auto &SrcInst = *ISU.getInstr(); |
558 | auto &DstInst = *Dep.getSUnit()->getInstr(); |
559 | if (!SrcInst.mayStore() || !DstInst.mayLoad()) |
560 | return false; |
561 | |
562 | auto SrcMO = *SrcInst.memoperands().begin(); |
563 | auto DstMO = *DstInst.memoperands().begin(); |
564 | auto SrcVal = SrcMO->getValue(); |
565 | auto DstVal = DstMO->getValue(); |
566 | auto SrcPseudoVal = SrcMO->getPseudoValue(); |
567 | auto DstPseudoVal = DstMO->getPseudoValue(); |
568 | if (SrcVal && DstVal && AA->alias(V1: SrcVal, V2: DstVal) == AliasResult::MustAlias && |
569 | SrcMO->getOffset() == DstMO->getOffset()) { |
570 | setBidirLatencies(SrcSU&: ISU, SrcDep&: Dep, latency); |
571 | return true; |
572 | } else if (SrcPseudoVal && DstPseudoVal && |
573 | SrcPseudoVal->kind() == DstPseudoVal->kind() && |
574 | SrcPseudoVal->kind() == PseudoSourceValue::FixedStack) { |
575 | // Spills/fills |
576 | auto FS0 = cast<FixedStackPseudoSourceValue>(Val: SrcPseudoVal); |
577 | auto FS1 = cast<FixedStackPseudoSourceValue>(Val: DstPseudoVal); |
578 | if (FS0 == FS1) { |
579 | setBidirLatencies(SrcSU&: ISU, SrcDep&: Dep, latency); |
580 | return true; |
581 | } |
582 | } |
583 | return false; |
584 | } |
585 | |
586 | namespace { |
587 | |
588 | std::unique_ptr<InstructionInformation> II; |
589 | |
590 | class CortexM7InstructionInformation : public InstructionInformation { |
591 | public: |
592 | CortexM7InstructionInformation(const ARMBaseInstrInfo *TII) |
593 | : InstructionInformation(TII) {} |
594 | }; |
595 | |
596 | class CortexM7Overrides : public ARMOverrideBypasses { |
597 | public: |
598 | CortexM7Overrides(const ARMBaseInstrInfo *TII, AAResults *AA) |
599 | : ARMOverrideBypasses(TII, AA) { |
600 | if (!II) |
601 | II.reset(p: new CortexM7InstructionInformation(TII)); |
602 | } |
603 | |
604 | void modifyBypasses(SUnit &) override; |
605 | }; |
606 | |
607 | void CortexM7Overrides::modifyBypasses(SUnit &ISU) { |
608 | const MachineInstr *SrcMI = ISU.getInstr(); |
609 | unsigned SrcOpcode = SrcMI->getOpcode(); |
610 | bool isNSWload = II->isNonSubwordLoad(Op: SrcOpcode); |
611 | |
612 | // Walk the successors looking for latency overrides that are needed |
613 | for (SDep &Dep : ISU.Succs) { |
614 | |
615 | // Output dependences should have 0 latency, as M7 is able to |
616 | // schedule writers to the same register for simultaneous issue. |
617 | if (zeroOutputDependences(ISU, Dep)) |
618 | continue; |
619 | |
620 | if (memoryRAWHazard(ISU, Dep, latency: 4)) |
621 | continue; |
622 | |
623 | // Ignore dependencies other than data |
624 | if (Dep.getKind() != SDep::Data) |
625 | continue; |
626 | |
627 | SUnit &DepSU = *Dep.getSUnit(); |
628 | if (DepSU.isBoundaryNode()) |
629 | continue; |
630 | |
631 | if (makeBundleAssumptions(ISU, Dep) == 1) |
632 | continue; |
633 | |
634 | const MachineInstr *DstMI = DepSU.getInstr(); |
635 | unsigned DstOpcode = DstMI->getOpcode(); |
636 | |
637 | // Word loads into any multiply or divide instruction are considered |
638 | // cannot bypass their scheduling stage. Didn't do this in the .td file |
639 | // because we cannot easily create a read advance that is 0 from certain |
640 | // writer classes and 1 from all the rest. |
641 | // (The other way around would have been easy.) |
642 | if (isNSWload && (II->isMultiply(Op: DstOpcode) || II->isDivide(Op: DstOpcode))) |
643 | setBidirLatencies(SrcSU&: ISU, SrcDep&: Dep, latency: Dep.getLatency() + 1); |
644 | |
645 | // Word loads into B operand of a load/store are considered cannot bypass |
646 | // their scheduling stage. Cannot do in the .td file because |
647 | // need to decide between -1 and -2 for ReadAdvance |
648 | if (isNSWload && II->hasBRegAddr(Op: DstOpcode) && |
649 | DstMI->getOperand(i: 2).getReg() == Dep.getReg()) |
650 | setBidirLatencies(SrcSU&: ISU, SrcDep&: Dep, latency: Dep.getLatency() + 1); |
651 | |
652 | // Multiplies into any address generation cannot bypass from EX3. Cannot do |
653 | // in the .td file because need to decide between -1 and -2 for ReadAdvance |
654 | if (II->isMultiply(Op: SrcOpcode)) { |
655 | unsigned OpMask = II->getAddressOpMask(Op: DstOpcode) >> 1; |
656 | for (unsigned i = 1; OpMask; ++i, OpMask >>= 1) { |
657 | if ((OpMask & 1) && DstMI->getOperand(i).isReg() && |
658 | DstMI->getOperand(i).getReg() == Dep.getReg()) { |
659 | setBidirLatencies(SrcSU&: ISU, SrcDep&: Dep, latency: 4); // first legal bypass is EX4->EX1 |
660 | break; |
661 | } |
662 | } |
663 | } |
664 | |
665 | // Mismatched conditional producers take longer on M7; they end up looking |
666 | // like they were produced at EX3 and read at IS. |
667 | if (TII->isPredicated(MI: *SrcMI) && Dep.isAssignedRegDep() && |
668 | (SrcOpcode == ARM::BUNDLE || |
669 | mismatchedPred(a: TII->getPredicate(MI: *SrcMI), |
670 | b: TII->getPredicate(MI: *DstMI)))) { |
671 | unsigned Lat = 1; |
672 | // Operand A of shift+ALU is treated as an EX1 read instead of EX2. |
673 | if (II->isInlineShiftALU(Op: DstOpcode) && DstMI->getOperand(i: 3).getImm() && |
674 | DstMI->getOperand(i: 1).getReg() == Dep.getReg()) |
675 | Lat = 2; |
676 | Lat = std::min(a: 3u, b: Dep.getLatency() + Lat); |
677 | setBidirLatencies(SrcSU&: ISU, SrcDep&: Dep, latency: std::max(a: Dep.getLatency(), b: Lat)); |
678 | } |
679 | |
680 | // CC setter into conditional producer shouldn't have a latency of more |
681 | // than 1 unless it's due to an implicit read. (All the "true" readers |
682 | // of the condition code use an implicit read, and predicates use an |
683 | // explicit.) |
684 | if (Dep.isAssignedRegDep() && Dep.getReg() == ARM::CPSR && |
685 | TII->isPredicated(MI: *DstMI) && !hasImplicitCPSRUse(MI: DstMI)) |
686 | setBidirLatencies(SrcSU&: ISU, SrcDep&: Dep, latency: 1); |
687 | |
688 | // REV instructions cannot bypass directly into the EX1 shifter. The |
689 | // code is slightly inexact as it doesn't attempt to ensure that the bypass |
690 | // is to the shifter operands. |
691 | if (II->isRev(Op: SrcOpcode)) { |
692 | if (II->isInlineShiftALU(Op: DstOpcode)) |
693 | setBidirLatencies(SrcSU&: ISU, SrcDep&: Dep, latency: 2); |
694 | else if (II->isShift(Op: DstOpcode)) |
695 | setBidirLatencies(SrcSU&: ISU, SrcDep&: Dep, latency: 1); |
696 | } |
697 | } |
698 | } |
699 | |
700 | class M85InstructionInformation : public InstructionInformation { |
701 | public: |
702 | M85InstructionInformation(const ARMBaseInstrInfo *t) |
703 | : InstructionInformation(t) { |
704 | markDPProducersConsumers(TII: t); |
705 | } |
706 | }; |
707 | |
708 | class M85Overrides : public ARMOverrideBypasses { |
709 | public: |
710 | M85Overrides(const ARMBaseInstrInfo *t, AAResults *a) |
711 | : ARMOverrideBypasses(t, a) { |
712 | if (!II) |
713 | II.reset(p: new M85InstructionInformation(t)); |
714 | } |
715 | |
716 | void modifyBypasses(SUnit &) override; |
717 | |
718 | private: |
719 | unsigned computeBypassStage(const MCSchedClassDesc *SCD); |
720 | signed modifyMixedWidthFP(const MachineInstr *SrcMI, |
721 | const MachineInstr *DstMI, unsigned RegID, |
722 | const MCSchedClassDesc *SCD); |
723 | }; |
724 | |
725 | unsigned M85Overrides::computeBypassStage(const MCSchedClassDesc *SCDesc) { |
726 | auto SM = DAG->getSchedModel(); |
727 | unsigned DefIdx = 0; // just look for the first output's timing |
728 | if (DefIdx < SCDesc->NumWriteLatencyEntries) { |
729 | // Lookup the definition's write latency in SubtargetInfo. |
730 | const MCWriteLatencyEntry *WLEntry = |
731 | SM->getSubtargetInfo()->getWriteLatencyEntry(SC: SCDesc, DefIdx); |
732 | unsigned Latency = WLEntry->Cycles >= 0 ? WLEntry->Cycles : 1000; |
733 | if (Latency == 4) |
734 | return 2; |
735 | else if (Latency == 5) |
736 | return 3; |
737 | else if (Latency > 3) |
738 | return 3; |
739 | else |
740 | return Latency; |
741 | } |
742 | return 2; |
743 | } |
744 | |
745 | // Latency changes for bypassing between FP registers of different sizes: |
746 | // |
747 | // Note that mixed DP/SP are unlikely because of the semantics |
748 | // of C. Mixed MVE/SP are quite common when MVE intrinsics are used. |
749 | signed M85Overrides::modifyMixedWidthFP(const MachineInstr *SrcMI, |
750 | const MachineInstr *DstMI, |
751 | unsigned RegID, |
752 | const MCSchedClassDesc *SCD) { |
753 | |
754 | if (!II->producesSP(Op: SrcMI->getOpcode()) && |
755 | !II->producesDP(Op: SrcMI->getOpcode()) && |
756 | !II->producesQP(Op: SrcMI->getOpcode())) |
757 | return 0; |
758 | |
759 | if (Register::isVirtualRegister(Reg: RegID)) { |
760 | if (II->producesSP(Op: SrcMI->getOpcode()) && |
761 | II->consumesDP(Op: DstMI->getOpcode())) { |
762 | for (auto &OP : SrcMI->operands()) |
763 | if (OP.isReg() && OP.isDef() && OP.getReg() == RegID && |
764 | OP.getSubReg() == ARM::ssub_1) |
765 | return 5 - computeBypassStage(SCDesc: SCD); |
766 | } else if (II->producesSP(Op: SrcMI->getOpcode()) && |
767 | II->consumesQP(Op: DstMI->getOpcode())) { |
768 | for (auto &OP : SrcMI->operands()) |
769 | if (OP.isReg() && OP.isDef() && OP.getReg() == RegID && |
770 | (OP.getSubReg() == ARM::ssub_1 || OP.getSubReg() == ARM::ssub_3)) |
771 | return 5 - computeBypassStage(SCDesc: SCD) - |
772 | ((OP.getSubReg() == ARM::ssub_2 || |
773 | OP.getSubReg() == ARM::ssub_3) |
774 | ? 1 |
775 | : 0); |
776 | } else if (II->producesDP(Op: SrcMI->getOpcode()) && |
777 | II->consumesQP(Op: DstMI->getOpcode())) { |
778 | for (auto &OP : SrcMI->operands()) |
779 | if (OP.isReg() && OP.isDef() && OP.getReg() == RegID && |
780 | OP.getSubReg() == ARM::ssub_1) |
781 | return -1; |
782 | } else if (II->producesDP(Op: SrcMI->getOpcode()) && |
783 | II->consumesSP(Op: DstMI->getOpcode())) { |
784 | for (auto &OP : DstMI->operands()) |
785 | if (OP.isReg() && OP.isUse() && OP.getReg() == RegID && |
786 | OP.getSubReg() == ARM::ssub_1) |
787 | return 5 - computeBypassStage(SCDesc: SCD); |
788 | } else if (II->producesQP(Op: SrcMI->getOpcode()) && |
789 | II->consumesSP(Op: DstMI->getOpcode())) { |
790 | for (auto &OP : DstMI->operands()) |
791 | if (OP.isReg() && OP.isUse() && OP.getReg() == RegID && |
792 | (OP.getSubReg() == ARM::ssub_1 || OP.getSubReg() == ARM::ssub_3)) |
793 | return 5 - computeBypassStage(SCDesc: SCD) + |
794 | ((OP.getSubReg() == ARM::ssub_2 || |
795 | OP.getSubReg() == ARM::ssub_3) |
796 | ? 1 |
797 | : 0); |
798 | } else if (II->producesQP(Op: SrcMI->getOpcode()) && |
799 | II->consumesDP(Op: DstMI->getOpcode())) { |
800 | for (auto &OP : DstMI->operands()) |
801 | if (OP.isReg() && OP.isUse() && OP.getReg() == RegID && |
802 | OP.getSubReg() == ARM::ssub_1) |
803 | return 1; |
804 | } |
805 | } else if (Register::isPhysicalRegister(Reg: RegID)) { |
806 | // Note that when the producer is narrower, not all of the producers |
807 | // may be present in the scheduling graph; somewhere earlier in the |
808 | // compiler, an implicit def/use of the aliased full register gets |
809 | // added to the producer, and so only that producer is seen as *the* |
810 | // single producer. This behavior also has the unfortunate effect of |
811 | // serializing the producers in the compiler's view of things. |
812 | if (II->producesSP(Op: SrcMI->getOpcode()) && |
813 | II->consumesDP(Op: DstMI->getOpcode())) { |
814 | for (auto &OP : SrcMI->operands()) |
815 | if (OP.isReg() && OP.isDef() && OP.getReg() >= ARM::S1 && |
816 | OP.getReg() <= ARM::S31 && (OP.getReg() - ARM::S0) % 2 && |
817 | (OP.getReg() == RegID || |
818 | (OP.getReg() - ARM::S0) / 2 + ARM::D0 == RegID || |
819 | (OP.getReg() - ARM::S0) / 4 + ARM::Q0 == RegID)) |
820 | return 5 - computeBypassStage(SCDesc: SCD); |
821 | } else if (II->producesSP(Op: SrcMI->getOpcode()) && |
822 | II->consumesQP(Op: DstMI->getOpcode())) { |
823 | for (auto &OP : SrcMI->operands()) |
824 | if (OP.isReg() && OP.isDef() && OP.getReg() >= ARM::S1 && |
825 | OP.getReg() <= ARM::S31 && (OP.getReg() - ARM::S0) % 2 && |
826 | (OP.getReg() == RegID || |
827 | (OP.getReg() - ARM::S0) / 2 + ARM::D0 == RegID || |
828 | (OP.getReg() - ARM::S0) / 4 + ARM::Q0 == RegID)) |
829 | return 5 - computeBypassStage(SCDesc: SCD) - |
830 | (((OP.getReg() - ARM::S0) / 2) % 2 ? 1 : 0); |
831 | } else if (II->producesDP(Op: SrcMI->getOpcode()) && |
832 | II->consumesQP(Op: DstMI->getOpcode())) { |
833 | for (auto &OP : SrcMI->operands()) |
834 | if (OP.isReg() && OP.isDef() && OP.getReg() >= ARM::D0 && |
835 | OP.getReg() <= ARM::D15 && (OP.getReg() - ARM::D0) % 2 && |
836 | (OP.getReg() == RegID || |
837 | (OP.getReg() - ARM::D0) / 2 + ARM::Q0 == RegID)) |
838 | return -1; |
839 | } else if (II->producesDP(Op: SrcMI->getOpcode()) && |
840 | II->consumesSP(Op: DstMI->getOpcode())) { |
841 | if (RegID >= ARM::S1 && RegID <= ARM::S31 && (RegID - ARM::S0) % 2) |
842 | return 5 - computeBypassStage(SCDesc: SCD); |
843 | } else if (II->producesQP(Op: SrcMI->getOpcode()) && |
844 | II->consumesSP(Op: DstMI->getOpcode())) { |
845 | if (RegID >= ARM::S1 && RegID <= ARM::S31 && (RegID - ARM::S0) % 2) |
846 | return 5 - computeBypassStage(SCDesc: SCD) + |
847 | (((RegID - ARM::S0) / 2) % 2 ? 1 : 0); |
848 | } else if (II->producesQP(Op: SrcMI->getOpcode()) && |
849 | II->consumesDP(Op: DstMI->getOpcode())) { |
850 | if (RegID >= ARM::D1 && RegID <= ARM::D15 && (RegID - ARM::D0) % 2) |
851 | return 1; |
852 | } |
853 | } |
854 | return 0; |
855 | } |
856 | |
857 | void M85Overrides::modifyBypasses(SUnit &ISU) { |
858 | const MachineInstr *SrcMI = ISU.getInstr(); |
859 | unsigned SrcOpcode = SrcMI->getOpcode(); |
860 | bool isNSWload = II->isNonSubwordLoad(Op: SrcOpcode); |
861 | |
862 | // Walk the successors looking for latency overrides that are needed |
863 | for (SDep &Dep : ISU.Succs) { |
864 | |
865 | // Output dependences should have 0 latency, as CortexM85 is able to |
866 | // schedule writers to the same register for simultaneous issue. |
867 | if (zeroOutputDependences(ISU, Dep)) |
868 | continue; |
869 | |
870 | if (memoryRAWHazard(ISU, Dep, latency: 3)) |
871 | continue; |
872 | |
873 | // Ignore dependencies other than data or strong ordering. |
874 | if (Dep.getKind() != SDep::Data) |
875 | continue; |
876 | |
877 | SUnit &DepSU = *Dep.getSUnit(); |
878 | if (DepSU.isBoundaryNode()) |
879 | continue; |
880 | |
881 | if (makeBundleAssumptions(ISU, Dep) == 1) |
882 | continue; |
883 | |
884 | const MachineInstr *DstMI = DepSU.getInstr(); |
885 | unsigned DstOpcode = DstMI->getOpcode(); |
886 | |
887 | // Word loads into B operand of a load/store with cannot bypass their |
888 | // scheduling stage. Cannot do in the .td file because need to decide |
889 | // between -1 and -2 for ReadAdvance |
890 | |
891 | if (isNSWload && II->hasBRegAddrShift(Op: DstOpcode) && |
892 | DstMI->getOperand(i: 3).getImm() != 0 && // shift operand |
893 | DstMI->getOperand(i: 2).getReg() == Dep.getReg()) |
894 | setBidirLatencies(SrcSU&: ISU, SrcDep&: Dep, latency: Dep.getLatency() + 1); |
895 | |
896 | if (isNSWload && isMVEVectorInstruction(MI: DstMI)) { |
897 | setBidirLatencies(SrcSU&: ISU, SrcDep&: Dep, latency: Dep.getLatency() + 1); |
898 | } |
899 | |
900 | if (II->isMVEIntMAC(Op: DstOpcode) && |
901 | II->isMVEIntMACMatched(SrcOp: SrcOpcode, DstOp: DstOpcode) && |
902 | DstMI->getOperand(i: 0).isReg() && |
903 | DstMI->getOperand(i: 0).getReg() == Dep.getReg()) |
904 | setBidirLatencies(SrcSU&: ISU, SrcDep&: Dep, latency: Dep.getLatency() - 1); |
905 | |
906 | // CC setter into conditional producer shouldn't have a latency of more |
907 | // than 0 unless it's due to an implicit read. |
908 | if (Dep.isAssignedRegDep() && Dep.getReg() == ARM::CPSR && |
909 | TII->isPredicated(MI: *DstMI) && !hasImplicitCPSRUse(MI: DstMI)) |
910 | setBidirLatencies(SrcSU&: ISU, SrcDep&: Dep, latency: 0); |
911 | |
912 | if (signed ALat = modifyMixedWidthFP(SrcMI, DstMI, RegID: Dep.getReg(), |
913 | SCD: DAG->getSchedClass(SU: &ISU))) |
914 | setBidirLatencies(SrcSU&: ISU, SrcDep&: Dep, latency: std::max(a: 0, b: signed(Dep.getLatency()) + ALat)); |
915 | |
916 | if (II->isRev(Op: SrcOpcode)) { |
917 | if (II->isInlineShiftALU(Op: DstOpcode)) |
918 | setBidirLatencies(SrcSU&: ISU, SrcDep&: Dep, latency: 1); |
919 | else if (II->isShift(Op: DstOpcode)) |
920 | setBidirLatencies(SrcSU&: ISU, SrcDep&: Dep, latency: 1); |
921 | } |
922 | } |
923 | } |
924 | |
925 | // Add M55 specific overrides for latencies between instructions. Currently it: |
926 | // - Adds an extra cycle latency between MVE VMLAV and scalar instructions. |
927 | class CortexM55Overrides : public ARMOverrideBypasses { |
928 | public: |
929 | CortexM55Overrides(const ARMBaseInstrInfo *TII, AAResults *AA) |
930 | : ARMOverrideBypasses(TII, AA) {} |
931 | |
932 | void modifyBypasses(SUnit &SU) override { |
933 | MachineInstr *SrcMI = SU.getInstr(); |
934 | if (!(SrcMI->getDesc().TSFlags & ARMII::HorizontalReduction)) |
935 | return; |
936 | |
937 | for (SDep &Dep : SU.Succs) { |
938 | if (Dep.getKind() != SDep::Data) |
939 | continue; |
940 | SUnit &DepSU = *Dep.getSUnit(); |
941 | if (DepSU.isBoundaryNode()) |
942 | continue; |
943 | MachineInstr *DstMI = DepSU.getInstr(); |
944 | |
945 | if (!isMVEVectorInstruction(MI: DstMI) && !DstMI->mayStore()) |
946 | setBidirLatencies(SrcSU&: SU, SrcDep&: Dep, latency: 3); |
947 | } |
948 | } |
949 | }; |
950 | |
951 | } // end anonymous namespace |
952 | |
953 | void ARMOverrideBypasses::apply(ScheduleDAGInstrs *DAGInstrs) { |
954 | DAG = DAGInstrs; |
955 | for (SUnit &ISU : DAGInstrs->SUnits) { |
956 | if (ISU.isBoundaryNode()) |
957 | continue; |
958 | modifyBypasses(ISU); |
959 | } |
960 | if (DAGInstrs->ExitSU.getInstr()) |
961 | modifyBypasses(DAGInstrs->ExitSU); |
962 | } |
963 | |
964 | std::unique_ptr<ScheduleDAGMutation> |
965 | createARMLatencyMutations(const ARMSubtarget &ST, AAResults *AA) { |
966 | if (ST.isCortexM85()) |
967 | return std::make_unique<M85Overrides>(args: ST.getInstrInfo(), args&: AA); |
968 | else if (ST.isCortexM7()) |
969 | return std::make_unique<CortexM7Overrides>(args: ST.getInstrInfo(), args&: AA); |
970 | else if (ST.isCortexM55()) |
971 | return std::make_unique<CortexM55Overrides>(args: ST.getInstrInfo(), args&: AA); |
972 | |
973 | return nullptr; |
974 | } |
975 | |
976 | } // end namespace llvm |
977 | |