1 | //===- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This implements the TargetLoweringBase class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/ADT/BitVector.h" |
14 | #include "llvm/ADT/STLExtras.h" |
15 | #include "llvm/ADT/SmallVector.h" |
16 | #include "llvm/ADT/StringExtras.h" |
17 | #include "llvm/ADT/StringRef.h" |
18 | #include "llvm/ADT/Twine.h" |
19 | #include "llvm/Analysis/Loads.h" |
20 | #include "llvm/Analysis/TargetTransformInfo.h" |
21 | #include "llvm/CodeGen/Analysis.h" |
22 | #include "llvm/CodeGen/ISDOpcodes.h" |
23 | #include "llvm/CodeGen/MachineBasicBlock.h" |
24 | #include "llvm/CodeGen/MachineFrameInfo.h" |
25 | #include "llvm/CodeGen/MachineFunction.h" |
26 | #include "llvm/CodeGen/MachineInstr.h" |
27 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
28 | #include "llvm/CodeGen/MachineMemOperand.h" |
29 | #include "llvm/CodeGen/MachineOperand.h" |
30 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
31 | #include "llvm/CodeGen/RuntimeLibcallUtil.h" |
32 | #include "llvm/CodeGen/StackMaps.h" |
33 | #include "llvm/CodeGen/TargetLowering.h" |
34 | #include "llvm/CodeGen/TargetOpcodes.h" |
35 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
36 | #include "llvm/CodeGen/ValueTypes.h" |
37 | #include "llvm/CodeGenTypes/MachineValueType.h" |
38 | #include "llvm/IR/Attributes.h" |
39 | #include "llvm/IR/CallingConv.h" |
40 | #include "llvm/IR/DataLayout.h" |
41 | #include "llvm/IR/DerivedTypes.h" |
42 | #include "llvm/IR/Function.h" |
43 | #include "llvm/IR/GlobalValue.h" |
44 | #include "llvm/IR/GlobalVariable.h" |
45 | #include "llvm/IR/IRBuilder.h" |
46 | #include "llvm/IR/Module.h" |
47 | #include "llvm/IR/Type.h" |
48 | #include "llvm/Support/Casting.h" |
49 | #include "llvm/Support/CommandLine.h" |
50 | #include "llvm/Support/Compiler.h" |
51 | #include "llvm/Support/ErrorHandling.h" |
52 | #include "llvm/Support/MathExtras.h" |
53 | #include "llvm/Target/TargetMachine.h" |
54 | #include "llvm/Target/TargetOptions.h" |
55 | #include "llvm/TargetParser/Triple.h" |
56 | #include "llvm/Transforms/Utils/SizeOpts.h" |
57 | #include <algorithm> |
58 | #include <cassert> |
59 | #include <cstdint> |
60 | #include <cstring> |
61 | #include <iterator> |
62 | #include <string> |
63 | #include <tuple> |
64 | #include <utility> |
65 | |
66 | using namespace llvm; |
67 | |
68 | static cl::opt<bool> JumpIsExpensiveOverride( |
69 | "jump-is-expensive" , cl::init(Val: false), |
70 | cl::desc("Do not create extra branches to split comparison logic." ), |
71 | cl::Hidden); |
72 | |
73 | static cl::opt<unsigned> MinimumJumpTableEntries |
74 | ("min-jump-table-entries" , cl::init(Val: 4), cl::Hidden, |
75 | cl::desc("Set minimum number of entries to use a jump table." )); |
76 | |
77 | static cl::opt<unsigned> MaximumJumpTableSize |
78 | ("max-jump-table-size" , cl::init(UINT_MAX), cl::Hidden, |
79 | cl::desc("Set maximum size of jump tables." )); |
80 | |
81 | /// Minimum jump table density for normal functions. |
82 | static cl::opt<unsigned> |
83 | JumpTableDensity("jump-table-density" , cl::init(Val: 10), cl::Hidden, |
84 | cl::desc("Minimum density for building a jump table in " |
85 | "a normal function" )); |
86 | |
87 | /// Minimum jump table density for -Os or -Oz functions. |
88 | static cl::opt<unsigned> OptsizeJumpTableDensity( |
89 | "optsize-jump-table-density" , cl::init(Val: 40), cl::Hidden, |
90 | cl::desc("Minimum density for building a jump table in " |
91 | "an optsize function" )); |
92 | |
93 | // FIXME: This option is only to test if the strict fp operation processed |
94 | // correctly by preventing mutating strict fp operation to normal fp operation |
95 | // during development. When the backend supports strict float operation, this |
96 | // option will be meaningless. |
97 | static cl::opt<bool> DisableStrictNodeMutation("disable-strictnode-mutation" , |
98 | cl::desc("Don't mutate strict-float node to a legalize node" ), |
99 | cl::init(Val: false), cl::Hidden); |
100 | |
101 | /// GetFPLibCall - Helper to return the right libcall for the given floating |
102 | /// point type, or UNKNOWN_LIBCALL if there is none. |
103 | RTLIB::Libcall RTLIB::getFPLibCall(EVT VT, |
104 | RTLIB::Libcall Call_F32, |
105 | RTLIB::Libcall Call_F64, |
106 | RTLIB::Libcall Call_F80, |
107 | RTLIB::Libcall Call_F128, |
108 | RTLIB::Libcall Call_PPCF128) { |
109 | return |
110 | VT == MVT::f32 ? Call_F32 : |
111 | VT == MVT::f64 ? Call_F64 : |
112 | VT == MVT::f80 ? Call_F80 : |
113 | VT == MVT::f128 ? Call_F128 : |
114 | VT == MVT::ppcf128 ? Call_PPCF128 : |
115 | RTLIB::UNKNOWN_LIBCALL; |
116 | } |
117 | |
118 | /// getFPEXT - Return the FPEXT_*_* value for the given types, or |
119 | /// UNKNOWN_LIBCALL if there is none. |
120 | RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { |
121 | if (OpVT == MVT::f16) { |
122 | if (RetVT == MVT::f32) |
123 | return FPEXT_F16_F32; |
124 | if (RetVT == MVT::f64) |
125 | return FPEXT_F16_F64; |
126 | if (RetVT == MVT::f80) |
127 | return FPEXT_F16_F80; |
128 | if (RetVT == MVT::f128) |
129 | return FPEXT_F16_F128; |
130 | } else if (OpVT == MVT::f32) { |
131 | if (RetVT == MVT::f64) |
132 | return FPEXT_F32_F64; |
133 | if (RetVT == MVT::f128) |
134 | return FPEXT_F32_F128; |
135 | if (RetVT == MVT::ppcf128) |
136 | return FPEXT_F32_PPCF128; |
137 | } else if (OpVT == MVT::f64) { |
138 | if (RetVT == MVT::f128) |
139 | return FPEXT_F64_F128; |
140 | else if (RetVT == MVT::ppcf128) |
141 | return FPEXT_F64_PPCF128; |
142 | } else if (OpVT == MVT::f80) { |
143 | if (RetVT == MVT::f128) |
144 | return FPEXT_F80_F128; |
145 | } else if (OpVT == MVT::bf16) { |
146 | if (RetVT == MVT::f32) |
147 | return FPEXT_BF16_F32; |
148 | } |
149 | |
150 | return UNKNOWN_LIBCALL; |
151 | } |
152 | |
153 | /// getFPROUND - Return the FPROUND_*_* value for the given types, or |
154 | /// UNKNOWN_LIBCALL if there is none. |
155 | RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { |
156 | if (RetVT == MVT::f16) { |
157 | if (OpVT == MVT::f32) |
158 | return FPROUND_F32_F16; |
159 | if (OpVT == MVT::f64) |
160 | return FPROUND_F64_F16; |
161 | if (OpVT == MVT::f80) |
162 | return FPROUND_F80_F16; |
163 | if (OpVT == MVT::f128) |
164 | return FPROUND_F128_F16; |
165 | if (OpVT == MVT::ppcf128) |
166 | return FPROUND_PPCF128_F16; |
167 | } else if (RetVT == MVT::bf16) { |
168 | if (OpVT == MVT::f32) |
169 | return FPROUND_F32_BF16; |
170 | if (OpVT == MVT::f64) |
171 | return FPROUND_F64_BF16; |
172 | } else if (RetVT == MVT::f32) { |
173 | if (OpVT == MVT::f64) |
174 | return FPROUND_F64_F32; |
175 | if (OpVT == MVT::f80) |
176 | return FPROUND_F80_F32; |
177 | if (OpVT == MVT::f128) |
178 | return FPROUND_F128_F32; |
179 | if (OpVT == MVT::ppcf128) |
180 | return FPROUND_PPCF128_F32; |
181 | } else if (RetVT == MVT::f64) { |
182 | if (OpVT == MVT::f80) |
183 | return FPROUND_F80_F64; |
184 | if (OpVT == MVT::f128) |
185 | return FPROUND_F128_F64; |
186 | if (OpVT == MVT::ppcf128) |
187 | return FPROUND_PPCF128_F64; |
188 | } else if (RetVT == MVT::f80) { |
189 | if (OpVT == MVT::f128) |
190 | return FPROUND_F128_F80; |
191 | } |
192 | |
193 | return UNKNOWN_LIBCALL; |
194 | } |
195 | |
196 | /// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or |
197 | /// UNKNOWN_LIBCALL if there is none. |
198 | RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { |
199 | if (OpVT == MVT::f16) { |
200 | if (RetVT == MVT::i32) |
201 | return FPTOSINT_F16_I32; |
202 | if (RetVT == MVT::i64) |
203 | return FPTOSINT_F16_I64; |
204 | if (RetVT == MVT::i128) |
205 | return FPTOSINT_F16_I128; |
206 | } else if (OpVT == MVT::f32) { |
207 | if (RetVT == MVT::i32) |
208 | return FPTOSINT_F32_I32; |
209 | if (RetVT == MVT::i64) |
210 | return FPTOSINT_F32_I64; |
211 | if (RetVT == MVT::i128) |
212 | return FPTOSINT_F32_I128; |
213 | } else if (OpVT == MVT::f64) { |
214 | if (RetVT == MVT::i32) |
215 | return FPTOSINT_F64_I32; |
216 | if (RetVT == MVT::i64) |
217 | return FPTOSINT_F64_I64; |
218 | if (RetVT == MVT::i128) |
219 | return FPTOSINT_F64_I128; |
220 | } else if (OpVT == MVT::f80) { |
221 | if (RetVT == MVT::i32) |
222 | return FPTOSINT_F80_I32; |
223 | if (RetVT == MVT::i64) |
224 | return FPTOSINT_F80_I64; |
225 | if (RetVT == MVT::i128) |
226 | return FPTOSINT_F80_I128; |
227 | } else if (OpVT == MVT::f128) { |
228 | if (RetVT == MVT::i32) |
229 | return FPTOSINT_F128_I32; |
230 | if (RetVT == MVT::i64) |
231 | return FPTOSINT_F128_I64; |
232 | if (RetVT == MVT::i128) |
233 | return FPTOSINT_F128_I128; |
234 | } else if (OpVT == MVT::ppcf128) { |
235 | if (RetVT == MVT::i32) |
236 | return FPTOSINT_PPCF128_I32; |
237 | if (RetVT == MVT::i64) |
238 | return FPTOSINT_PPCF128_I64; |
239 | if (RetVT == MVT::i128) |
240 | return FPTOSINT_PPCF128_I128; |
241 | } |
242 | return UNKNOWN_LIBCALL; |
243 | } |
244 | |
245 | /// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or |
246 | /// UNKNOWN_LIBCALL if there is none. |
247 | RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { |
248 | if (OpVT == MVT::f16) { |
249 | if (RetVT == MVT::i32) |
250 | return FPTOUINT_F16_I32; |
251 | if (RetVT == MVT::i64) |
252 | return FPTOUINT_F16_I64; |
253 | if (RetVT == MVT::i128) |
254 | return FPTOUINT_F16_I128; |
255 | } else if (OpVT == MVT::f32) { |
256 | if (RetVT == MVT::i32) |
257 | return FPTOUINT_F32_I32; |
258 | if (RetVT == MVT::i64) |
259 | return FPTOUINT_F32_I64; |
260 | if (RetVT == MVT::i128) |
261 | return FPTOUINT_F32_I128; |
262 | } else if (OpVT == MVT::f64) { |
263 | if (RetVT == MVT::i32) |
264 | return FPTOUINT_F64_I32; |
265 | if (RetVT == MVT::i64) |
266 | return FPTOUINT_F64_I64; |
267 | if (RetVT == MVT::i128) |
268 | return FPTOUINT_F64_I128; |
269 | } else if (OpVT == MVT::f80) { |
270 | if (RetVT == MVT::i32) |
271 | return FPTOUINT_F80_I32; |
272 | if (RetVT == MVT::i64) |
273 | return FPTOUINT_F80_I64; |
274 | if (RetVT == MVT::i128) |
275 | return FPTOUINT_F80_I128; |
276 | } else if (OpVT == MVT::f128) { |
277 | if (RetVT == MVT::i32) |
278 | return FPTOUINT_F128_I32; |
279 | if (RetVT == MVT::i64) |
280 | return FPTOUINT_F128_I64; |
281 | if (RetVT == MVT::i128) |
282 | return FPTOUINT_F128_I128; |
283 | } else if (OpVT == MVT::ppcf128) { |
284 | if (RetVT == MVT::i32) |
285 | return FPTOUINT_PPCF128_I32; |
286 | if (RetVT == MVT::i64) |
287 | return FPTOUINT_PPCF128_I64; |
288 | if (RetVT == MVT::i128) |
289 | return FPTOUINT_PPCF128_I128; |
290 | } |
291 | return UNKNOWN_LIBCALL; |
292 | } |
293 | |
294 | /// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or |
295 | /// UNKNOWN_LIBCALL if there is none. |
296 | RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) { |
297 | if (OpVT == MVT::i32) { |
298 | if (RetVT == MVT::f16) |
299 | return SINTTOFP_I32_F16; |
300 | if (RetVT == MVT::f32) |
301 | return SINTTOFP_I32_F32; |
302 | if (RetVT == MVT::f64) |
303 | return SINTTOFP_I32_F64; |
304 | if (RetVT == MVT::f80) |
305 | return SINTTOFP_I32_F80; |
306 | if (RetVT == MVT::f128) |
307 | return SINTTOFP_I32_F128; |
308 | if (RetVT == MVT::ppcf128) |
309 | return SINTTOFP_I32_PPCF128; |
310 | } else if (OpVT == MVT::i64) { |
311 | if (RetVT == MVT::f16) |
312 | return SINTTOFP_I64_F16; |
313 | if (RetVT == MVT::f32) |
314 | return SINTTOFP_I64_F32; |
315 | if (RetVT == MVT::f64) |
316 | return SINTTOFP_I64_F64; |
317 | if (RetVT == MVT::f80) |
318 | return SINTTOFP_I64_F80; |
319 | if (RetVT == MVT::f128) |
320 | return SINTTOFP_I64_F128; |
321 | if (RetVT == MVT::ppcf128) |
322 | return SINTTOFP_I64_PPCF128; |
323 | } else if (OpVT == MVT::i128) { |
324 | if (RetVT == MVT::f16) |
325 | return SINTTOFP_I128_F16; |
326 | if (RetVT == MVT::f32) |
327 | return SINTTOFP_I128_F32; |
328 | if (RetVT == MVT::f64) |
329 | return SINTTOFP_I128_F64; |
330 | if (RetVT == MVT::f80) |
331 | return SINTTOFP_I128_F80; |
332 | if (RetVT == MVT::f128) |
333 | return SINTTOFP_I128_F128; |
334 | if (RetVT == MVT::ppcf128) |
335 | return SINTTOFP_I128_PPCF128; |
336 | } |
337 | return UNKNOWN_LIBCALL; |
338 | } |
339 | |
340 | /// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or |
341 | /// UNKNOWN_LIBCALL if there is none. |
342 | RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { |
343 | if (OpVT == MVT::i32) { |
344 | if (RetVT == MVT::f16) |
345 | return UINTTOFP_I32_F16; |
346 | if (RetVT == MVT::f32) |
347 | return UINTTOFP_I32_F32; |
348 | if (RetVT == MVT::f64) |
349 | return UINTTOFP_I32_F64; |
350 | if (RetVT == MVT::f80) |
351 | return UINTTOFP_I32_F80; |
352 | if (RetVT == MVT::f128) |
353 | return UINTTOFP_I32_F128; |
354 | if (RetVT == MVT::ppcf128) |
355 | return UINTTOFP_I32_PPCF128; |
356 | } else if (OpVT == MVT::i64) { |
357 | if (RetVT == MVT::f16) |
358 | return UINTTOFP_I64_F16; |
359 | if (RetVT == MVT::f32) |
360 | return UINTTOFP_I64_F32; |
361 | if (RetVT == MVT::f64) |
362 | return UINTTOFP_I64_F64; |
363 | if (RetVT == MVT::f80) |
364 | return UINTTOFP_I64_F80; |
365 | if (RetVT == MVT::f128) |
366 | return UINTTOFP_I64_F128; |
367 | if (RetVT == MVT::ppcf128) |
368 | return UINTTOFP_I64_PPCF128; |
369 | } else if (OpVT == MVT::i128) { |
370 | if (RetVT == MVT::f16) |
371 | return UINTTOFP_I128_F16; |
372 | if (RetVT == MVT::f32) |
373 | return UINTTOFP_I128_F32; |
374 | if (RetVT == MVT::f64) |
375 | return UINTTOFP_I128_F64; |
376 | if (RetVT == MVT::f80) |
377 | return UINTTOFP_I128_F80; |
378 | if (RetVT == MVT::f128) |
379 | return UINTTOFP_I128_F128; |
380 | if (RetVT == MVT::ppcf128) |
381 | return UINTTOFP_I128_PPCF128; |
382 | } |
383 | return UNKNOWN_LIBCALL; |
384 | } |
385 | |
386 | RTLIB::Libcall RTLIB::getPOWI(EVT RetVT) { |
387 | return getFPLibCall(VT: RetVT, Call_F32: POWI_F32, Call_F64: POWI_F64, Call_F80: POWI_F80, Call_F128: POWI_F128, |
388 | Call_PPCF128: POWI_PPCF128); |
389 | } |
390 | |
391 | RTLIB::Libcall RTLIB::getLDEXP(EVT RetVT) { |
392 | return getFPLibCall(VT: RetVT, Call_F32: LDEXP_F32, Call_F64: LDEXP_F64, Call_F80: LDEXP_F80, Call_F128: LDEXP_F128, |
393 | Call_PPCF128: LDEXP_PPCF128); |
394 | } |
395 | |
396 | RTLIB::Libcall RTLIB::getFREXP(EVT RetVT) { |
397 | return getFPLibCall(VT: RetVT, Call_F32: FREXP_F32, Call_F64: FREXP_F64, Call_F80: FREXP_F80, Call_F128: FREXP_F128, |
398 | Call_PPCF128: FREXP_PPCF128); |
399 | } |
400 | |
401 | RTLIB::Libcall RTLIB::getOutlineAtomicHelper(const Libcall (&LC)[5][4], |
402 | AtomicOrdering Order, |
403 | uint64_t MemSize) { |
404 | unsigned ModeN, ModelN; |
405 | switch (MemSize) { |
406 | case 1: |
407 | ModeN = 0; |
408 | break; |
409 | case 2: |
410 | ModeN = 1; |
411 | break; |
412 | case 4: |
413 | ModeN = 2; |
414 | break; |
415 | case 8: |
416 | ModeN = 3; |
417 | break; |
418 | case 16: |
419 | ModeN = 4; |
420 | break; |
421 | default: |
422 | return RTLIB::UNKNOWN_LIBCALL; |
423 | } |
424 | |
425 | switch (Order) { |
426 | case AtomicOrdering::Monotonic: |
427 | ModelN = 0; |
428 | break; |
429 | case AtomicOrdering::Acquire: |
430 | ModelN = 1; |
431 | break; |
432 | case AtomicOrdering::Release: |
433 | ModelN = 2; |
434 | break; |
435 | case AtomicOrdering::AcquireRelease: |
436 | case AtomicOrdering::SequentiallyConsistent: |
437 | ModelN = 3; |
438 | break; |
439 | default: |
440 | return UNKNOWN_LIBCALL; |
441 | } |
442 | |
443 | return LC[ModeN][ModelN]; |
444 | } |
445 | |
446 | RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order, |
447 | MVT VT) { |
448 | if (!VT.isScalarInteger()) |
449 | return UNKNOWN_LIBCALL; |
450 | uint64_t MemSize = VT.getScalarSizeInBits() / 8; |
451 | |
452 | #define LCALLS(A, B) \ |
453 | { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL } |
454 | #define LCALL5(A) \ |
455 | LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16) |
456 | switch (Opc) { |
457 | case ISD::ATOMIC_CMP_SWAP: { |
458 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_CAS)}; |
459 | return getOutlineAtomicHelper(LC, Order, MemSize); |
460 | } |
461 | case ISD::ATOMIC_SWAP: { |
462 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_SWP)}; |
463 | return getOutlineAtomicHelper(LC, Order, MemSize); |
464 | } |
465 | case ISD::ATOMIC_LOAD_ADD: { |
466 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDADD)}; |
467 | return getOutlineAtomicHelper(LC, Order, MemSize); |
468 | } |
469 | case ISD::ATOMIC_LOAD_OR: { |
470 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDSET)}; |
471 | return getOutlineAtomicHelper(LC, Order, MemSize); |
472 | } |
473 | case ISD::ATOMIC_LOAD_CLR: { |
474 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDCLR)}; |
475 | return getOutlineAtomicHelper(LC, Order, MemSize); |
476 | } |
477 | case ISD::ATOMIC_LOAD_XOR: { |
478 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDEOR)}; |
479 | return getOutlineAtomicHelper(LC, Order, MemSize); |
480 | } |
481 | default: |
482 | return UNKNOWN_LIBCALL; |
483 | } |
484 | #undef LCALLS |
485 | #undef LCALL5 |
486 | } |
487 | |
488 | RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) { |
489 | #define OP_TO_LIBCALL(Name, Enum) \ |
490 | case Name: \ |
491 | switch (VT.SimpleTy) { \ |
492 | default: \ |
493 | return UNKNOWN_LIBCALL; \ |
494 | case MVT::i8: \ |
495 | return Enum##_1; \ |
496 | case MVT::i16: \ |
497 | return Enum##_2; \ |
498 | case MVT::i32: \ |
499 | return Enum##_4; \ |
500 | case MVT::i64: \ |
501 | return Enum##_8; \ |
502 | case MVT::i128: \ |
503 | return Enum##_16; \ |
504 | } |
505 | |
506 | switch (Opc) { |
507 | OP_TO_LIBCALL(ISD::ATOMIC_SWAP, SYNC_LOCK_TEST_AND_SET) |
508 | OP_TO_LIBCALL(ISD::ATOMIC_CMP_SWAP, SYNC_VAL_COMPARE_AND_SWAP) |
509 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_ADD, SYNC_FETCH_AND_ADD) |
510 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_SUB, SYNC_FETCH_AND_SUB) |
511 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_AND, SYNC_FETCH_AND_AND) |
512 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_OR, SYNC_FETCH_AND_OR) |
513 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_XOR, SYNC_FETCH_AND_XOR) |
514 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_NAND, SYNC_FETCH_AND_NAND) |
515 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MAX, SYNC_FETCH_AND_MAX) |
516 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMAX, SYNC_FETCH_AND_UMAX) |
517 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MIN, SYNC_FETCH_AND_MIN) |
518 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMIN, SYNC_FETCH_AND_UMIN) |
519 | } |
520 | |
521 | #undef OP_TO_LIBCALL |
522 | |
523 | return UNKNOWN_LIBCALL; |
524 | } |
525 | |
526 | RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { |
527 | switch (ElementSize) { |
528 | case 1: |
529 | return MEMCPY_ELEMENT_UNORDERED_ATOMIC_1; |
530 | case 2: |
531 | return MEMCPY_ELEMENT_UNORDERED_ATOMIC_2; |
532 | case 4: |
533 | return MEMCPY_ELEMENT_UNORDERED_ATOMIC_4; |
534 | case 8: |
535 | return MEMCPY_ELEMENT_UNORDERED_ATOMIC_8; |
536 | case 16: |
537 | return MEMCPY_ELEMENT_UNORDERED_ATOMIC_16; |
538 | default: |
539 | return UNKNOWN_LIBCALL; |
540 | } |
541 | } |
542 | |
543 | RTLIB::Libcall RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { |
544 | switch (ElementSize) { |
545 | case 1: |
546 | return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1; |
547 | case 2: |
548 | return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2; |
549 | case 4: |
550 | return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4; |
551 | case 8: |
552 | return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8; |
553 | case 16: |
554 | return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16; |
555 | default: |
556 | return UNKNOWN_LIBCALL; |
557 | } |
558 | } |
559 | |
560 | RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { |
561 | switch (ElementSize) { |
562 | case 1: |
563 | return MEMSET_ELEMENT_UNORDERED_ATOMIC_1; |
564 | case 2: |
565 | return MEMSET_ELEMENT_UNORDERED_ATOMIC_2; |
566 | case 4: |
567 | return MEMSET_ELEMENT_UNORDERED_ATOMIC_4; |
568 | case 8: |
569 | return MEMSET_ELEMENT_UNORDERED_ATOMIC_8; |
570 | case 16: |
571 | return MEMSET_ELEMENT_UNORDERED_ATOMIC_16; |
572 | default: |
573 | return UNKNOWN_LIBCALL; |
574 | } |
575 | } |
576 | |
577 | void RTLIB::initCmpLibcallCCs(ISD::CondCode *CmpLibcallCCs) { |
578 | std::fill(first: CmpLibcallCCs, last: CmpLibcallCCs + RTLIB::UNKNOWN_LIBCALL, |
579 | value: ISD::SETCC_INVALID); |
580 | CmpLibcallCCs[RTLIB::OEQ_F32] = ISD::SETEQ; |
581 | CmpLibcallCCs[RTLIB::OEQ_F64] = ISD::SETEQ; |
582 | CmpLibcallCCs[RTLIB::OEQ_F128] = ISD::SETEQ; |
583 | CmpLibcallCCs[RTLIB::OEQ_PPCF128] = ISD::SETEQ; |
584 | CmpLibcallCCs[RTLIB::UNE_F32] = ISD::SETNE; |
585 | CmpLibcallCCs[RTLIB::UNE_F64] = ISD::SETNE; |
586 | CmpLibcallCCs[RTLIB::UNE_F128] = ISD::SETNE; |
587 | CmpLibcallCCs[RTLIB::UNE_PPCF128] = ISD::SETNE; |
588 | CmpLibcallCCs[RTLIB::OGE_F32] = ISD::SETGE; |
589 | CmpLibcallCCs[RTLIB::OGE_F64] = ISD::SETGE; |
590 | CmpLibcallCCs[RTLIB::OGE_F128] = ISD::SETGE; |
591 | CmpLibcallCCs[RTLIB::OGE_PPCF128] = ISD::SETGE; |
592 | CmpLibcallCCs[RTLIB::OLT_F32] = ISD::SETLT; |
593 | CmpLibcallCCs[RTLIB::OLT_F64] = ISD::SETLT; |
594 | CmpLibcallCCs[RTLIB::OLT_F128] = ISD::SETLT; |
595 | CmpLibcallCCs[RTLIB::OLT_PPCF128] = ISD::SETLT; |
596 | CmpLibcallCCs[RTLIB::OLE_F32] = ISD::SETLE; |
597 | CmpLibcallCCs[RTLIB::OLE_F64] = ISD::SETLE; |
598 | CmpLibcallCCs[RTLIB::OLE_F128] = ISD::SETLE; |
599 | CmpLibcallCCs[RTLIB::OLE_PPCF128] = ISD::SETLE; |
600 | CmpLibcallCCs[RTLIB::OGT_F32] = ISD::SETGT; |
601 | CmpLibcallCCs[RTLIB::OGT_F64] = ISD::SETGT; |
602 | CmpLibcallCCs[RTLIB::OGT_F128] = ISD::SETGT; |
603 | CmpLibcallCCs[RTLIB::OGT_PPCF128] = ISD::SETGT; |
604 | CmpLibcallCCs[RTLIB::UO_F32] = ISD::SETNE; |
605 | CmpLibcallCCs[RTLIB::UO_F64] = ISD::SETNE; |
606 | CmpLibcallCCs[RTLIB::UO_F128] = ISD::SETNE; |
607 | CmpLibcallCCs[RTLIB::UO_PPCF128] = ISD::SETNE; |
608 | } |
609 | |
610 | /// NOTE: The TargetMachine owns TLOF. |
611 | TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) |
612 | : TM(tm), Libcalls(TM.getTargetTriple()) { |
613 | initActions(); |
614 | |
615 | // Perform these initializations only once. |
616 | MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = |
617 | MaxLoadsPerMemcmp = 8; |
618 | MaxGluedStoresPerMemcpy = 0; |
619 | MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = |
620 | MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4; |
621 | HasMultipleConditionRegisters = false; |
622 | HasExtractBitsInsn = false; |
623 | JumpIsExpensive = JumpIsExpensiveOverride; |
624 | PredictableSelectIsExpensive = false; |
625 | EnableExtLdPromotion = false; |
626 | StackPointerRegisterToSaveRestore = 0; |
627 | BooleanContents = UndefinedBooleanContent; |
628 | BooleanFloatContents = UndefinedBooleanContent; |
629 | BooleanVectorContents = UndefinedBooleanContent; |
630 | SchedPreferenceInfo = Sched::ILP; |
631 | GatherAllAliasesMaxDepth = 18; |
632 | IsStrictFPEnabled = DisableStrictNodeMutation; |
633 | MaxBytesForAlignment = 0; |
634 | MaxAtomicSizeInBitsSupported = 0; |
635 | |
636 | // Assume that even with libcalls, no target supports wider than 128 bit |
637 | // division. |
638 | MaxDivRemBitWidthSupported = 128; |
639 | |
640 | MaxLargeFPConvertBitWidthSupported = llvm::IntegerType::MAX_INT_BITS; |
641 | |
642 | MinCmpXchgSizeInBits = 0; |
643 | SupportsUnalignedAtomics = false; |
644 | |
645 | RTLIB::initCmpLibcallCCs(CmpLibcallCCs); |
646 | } |
647 | |
648 | void TargetLoweringBase::initActions() { |
649 | // All operations default to being supported. |
650 | memset(s: OpActions, c: 0, n: sizeof(OpActions)); |
651 | memset(s: LoadExtActions, c: 0, n: sizeof(LoadExtActions)); |
652 | memset(s: TruncStoreActions, c: 0, n: sizeof(TruncStoreActions)); |
653 | memset(s: IndexedModeActions, c: 0, n: sizeof(IndexedModeActions)); |
654 | memset(s: CondCodeActions, c: 0, n: sizeof(CondCodeActions)); |
655 | std::fill(first: std::begin(arr&: RegClassForVT), last: std::end(arr&: RegClassForVT), value: nullptr); |
656 | std::fill(first: std::begin(arr&: TargetDAGCombineArray), |
657 | last: std::end(arr&: TargetDAGCombineArray), value: 0); |
658 | |
659 | // Let extending atomic loads be unsupported by default. |
660 | for (MVT ValVT : MVT::all_valuetypes()) |
661 | for (MVT MemVT : MVT::all_valuetypes()) |
662 | setAtomicLoadExtAction(ExtTypes: {ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT, MemVT, |
663 | Action: Expand); |
664 | |
665 | // We're somewhat special casing MVT::i2 and MVT::i4. Ideally we want to |
666 | // remove this and targets should individually set these types if not legal. |
667 | for (ISD::NodeType NT : enum_seq(Begin: ISD::DELETED_NODE, End: ISD::BUILTIN_OP_END, |
668 | force_iteration_on_noniterable_enum)) { |
669 | for (MVT VT : {MVT::i2, MVT::i4}) |
670 | OpActions[(unsigned)VT.SimpleTy][NT] = Expand; |
671 | } |
672 | for (MVT AVT : MVT::all_valuetypes()) { |
673 | for (MVT VT : {MVT::i2, MVT::i4, MVT::v128i2, MVT::v64i4}) { |
674 | setTruncStoreAction(ValVT: AVT, MemVT: VT, Action: Expand); |
675 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: AVT, MemVT: VT, Action: Expand); |
676 | setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: AVT, MemVT: VT, Action: Expand); |
677 | } |
678 | } |
679 | for (unsigned IM = (unsigned)ISD::PRE_INC; |
680 | IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { |
681 | for (MVT VT : {MVT::i2, MVT::i4}) { |
682 | setIndexedLoadAction(IdxModes: IM, VT, Action: Expand); |
683 | setIndexedStoreAction(IdxModes: IM, VT, Action: Expand); |
684 | setIndexedMaskedLoadAction(IdxMode: IM, VT, Action: Expand); |
685 | setIndexedMaskedStoreAction(IdxMode: IM, VT, Action: Expand); |
686 | } |
687 | } |
688 | |
689 | for (MVT VT : MVT::fp_valuetypes()) { |
690 | MVT IntVT = MVT::getIntegerVT(BitWidth: VT.getFixedSizeInBits()); |
691 | if (IntVT.isValid()) { |
692 | setOperationAction(Op: ISD::ATOMIC_SWAP, VT, Action: Promote); |
693 | AddPromotedToType(Opc: ISD::ATOMIC_SWAP, OrigVT: VT, DestVT: IntVT); |
694 | } |
695 | } |
696 | |
697 | // Set default actions for various operations. |
698 | for (MVT VT : MVT::all_valuetypes()) { |
699 | // Default all indexed load / store to expand. |
700 | for (unsigned IM = (unsigned)ISD::PRE_INC; |
701 | IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { |
702 | setIndexedLoadAction(IdxModes: IM, VT, Action: Expand); |
703 | setIndexedStoreAction(IdxModes: IM, VT, Action: Expand); |
704 | setIndexedMaskedLoadAction(IdxMode: IM, VT, Action: Expand); |
705 | setIndexedMaskedStoreAction(IdxMode: IM, VT, Action: Expand); |
706 | } |
707 | |
708 | // Most backends expect to see the node which just returns the value loaded. |
709 | setOperationAction(Op: ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Action: Expand); |
710 | |
711 | // These operations default to expand. |
712 | setOperationAction(Ops: {ISD::FGETSIGN, ISD::CONCAT_VECTORS, |
713 | ISD::FMINNUM, ISD::FMAXNUM, |
714 | ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE, |
715 | ISD::FMINIMUM, ISD::FMAXIMUM, |
716 | ISD::FMAD, ISD::SMIN, |
717 | ISD::SMAX, ISD::UMIN, |
718 | ISD::UMAX, ISD::ABS, |
719 | ISD::FSHL, ISD::FSHR, |
720 | ISD::SADDSAT, ISD::UADDSAT, |
721 | ISD::SSUBSAT, ISD::USUBSAT, |
722 | ISD::SSHLSAT, ISD::USHLSAT, |
723 | ISD::SMULFIX, ISD::SMULFIXSAT, |
724 | ISD::UMULFIX, ISD::UMULFIXSAT, |
725 | ISD::SDIVFIX, ISD::SDIVFIXSAT, |
726 | ISD::UDIVFIX, ISD::UDIVFIXSAT, |
727 | ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, |
728 | ISD::IS_FPCLASS}, |
729 | VT, Action: Expand); |
730 | |
731 | // Overflow operations default to expand |
732 | setOperationAction(Ops: {ISD::SADDO, ISD::SSUBO, ISD::UADDO, ISD::USUBO, |
733 | ISD::SMULO, ISD::UMULO}, |
734 | VT, Action: Expand); |
735 | |
736 | // Carry-using overflow operations default to expand. |
737 | setOperationAction(Ops: {ISD::UADDO_CARRY, ISD::USUBO_CARRY, ISD::SETCCCARRY, |
738 | ISD::SADDO_CARRY, ISD::SSUBO_CARRY}, |
739 | VT, Action: Expand); |
740 | |
741 | // ADDC/ADDE/SUBC/SUBE default to expand. |
742 | setOperationAction(Ops: {ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}, VT, |
743 | Action: Expand); |
744 | |
745 | // [US]CMP default to expand |
746 | setOperationAction(Ops: {ISD::UCMP, ISD::SCMP}, VT, Action: Expand); |
747 | |
748 | // Halving adds |
749 | setOperationAction( |
750 | Ops: {ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS, ISD::AVGCEILU}, VT, |
751 | Action: Expand); |
752 | |
753 | // Absolute difference |
754 | setOperationAction(Ops: {ISD::ABDS, ISD::ABDU}, VT, Action: Expand); |
755 | |
756 | // These default to Expand so they will be expanded to CTLZ/CTTZ by default. |
757 | setOperationAction(Ops: {ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, |
758 | Action: Expand); |
759 | |
760 | setOperationAction(Ops: {ISD::BITREVERSE, ISD::PARITY}, VT, Action: Expand); |
761 | |
762 | // These library functions default to expand. |
763 | setOperationAction(Ops: {ISD::FROUND, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP}, VT, |
764 | Action: Expand); |
765 | |
766 | // These operations default to expand for vector types. |
767 | if (VT.isVector()) |
768 | setOperationAction( |
769 | Ops: {ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG, |
770 | ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG, |
771 | ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::FTAN, ISD::FACOS, |
772 | ISD::FASIN, ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH}, |
773 | VT, Action: Expand); |
774 | |
775 | // Constrained floating-point operations default to expand. |
776 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
777 | setOperationAction(ISD::STRICT_##DAGN, VT, Expand); |
778 | #include "llvm/IR/ConstrainedOps.def" |
779 | |
780 | // For most targets @llvm.get.dynamic.area.offset just returns 0. |
781 | setOperationAction(Op: ISD::GET_DYNAMIC_AREA_OFFSET, VT, Action: Expand); |
782 | |
783 | // Vector reduction default to expand. |
784 | setOperationAction( |
785 | Ops: {ISD::VECREDUCE_FADD, ISD::VECREDUCE_FMUL, ISD::VECREDUCE_ADD, |
786 | ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, |
787 | ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, |
788 | ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_FMAX, |
789 | ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAXIMUM, ISD::VECREDUCE_FMINIMUM, |
790 | ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_SEQ_FMUL}, |
791 | VT, Action: Expand); |
792 | |
793 | // Named vector shuffles default to expand. |
794 | setOperationAction(Op: ISD::VECTOR_SPLICE, VT, Action: Expand); |
795 | |
796 | // Only some target support this vector operation. Most need to expand it. |
797 | setOperationAction(Op: ISD::VECTOR_COMPRESS, VT, Action: Expand); |
798 | |
799 | // VP operations default to expand. |
800 | #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) \ |
801 | setOperationAction(ISD::SDOPC, VT, Expand); |
802 | #include "llvm/IR/VPIntrinsics.def" |
803 | |
804 | // FP environment operations default to expand. |
805 | setOperationAction(Op: ISD::GET_FPENV, VT, Action: Expand); |
806 | setOperationAction(Op: ISD::SET_FPENV, VT, Action: Expand); |
807 | setOperationAction(Op: ISD::RESET_FPENV, VT, Action: Expand); |
808 | } |
809 | |
810 | // Most targets ignore the @llvm.prefetch intrinsic. |
811 | setOperationAction(Op: ISD::PREFETCH, VT: MVT::Other, Action: Expand); |
812 | |
813 | // Most targets also ignore the @llvm.readcyclecounter intrinsic. |
814 | setOperationAction(Op: ISD::READCYCLECOUNTER, VT: MVT::i64, Action: Expand); |
815 | |
816 | // Most targets also ignore the @llvm.readsteadycounter intrinsic. |
817 | setOperationAction(Op: ISD::READSTEADYCOUNTER, VT: MVT::i64, Action: Expand); |
818 | |
819 | // ConstantFP nodes default to expand. Targets can either change this to |
820 | // Legal, in which case all fp constants are legal, or use isFPImmLegal() |
821 | // to optimize expansions for certain constants. |
822 | setOperationAction(Ops: ISD::ConstantFP, |
823 | VTs: {MVT::bf16, MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128}, |
824 | Action: Expand); |
825 | |
826 | // These library functions default to expand. |
827 | setOperationAction(Ops: {ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, |
828 | ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, |
829 | ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, |
830 | ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT, |
831 | ISD::FROUNDEVEN, ISD::FTAN, ISD::FACOS, ISD::FASIN, |
832 | ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH}, |
833 | VTs: {MVT::f32, MVT::f64, MVT::f128}, Action: Expand); |
834 | |
835 | setOperationAction(Ops: {ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN, ISD::FCOSH, |
836 | ISD::FSINH, ISD::FTANH}, |
837 | VT: MVT::f16, Action: Promote); |
838 | // Default ISD::TRAP to expand (which turns it into abort). |
839 | setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Expand); |
840 | |
841 | // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand" |
842 | // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP. |
843 | setOperationAction(Op: ISD::DEBUGTRAP, VT: MVT::Other, Action: Expand); |
844 | |
845 | setOperationAction(Op: ISD::UBSANTRAP, VT: MVT::Other, Action: Expand); |
846 | |
847 | setOperationAction(Op: ISD::GET_FPENV_MEM, VT: MVT::Other, Action: Expand); |
848 | setOperationAction(Op: ISD::SET_FPENV_MEM, VT: MVT::Other, Action: Expand); |
849 | |
850 | for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) { |
851 | setOperationAction(Op: ISD::GET_FPMODE, VT, Action: Expand); |
852 | setOperationAction(Op: ISD::SET_FPMODE, VT, Action: Expand); |
853 | } |
854 | setOperationAction(Op: ISD::RESET_FPMODE, VT: MVT::Other, Action: Expand); |
855 | |
856 | // This one by default will call __clear_cache unless the target |
857 | // wants something different. |
858 | setOperationAction(Op: ISD::CLEAR_CACHE, VT: MVT::Other, Action: LibCall); |
859 | } |
860 | |
861 | MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL, |
862 | EVT) const { |
863 | return MVT::getIntegerVT(BitWidth: DL.getPointerSizeInBits(AS: 0)); |
864 | } |
865 | |
866 | EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, |
867 | const DataLayout &DL) const { |
868 | assert(LHSTy.isInteger() && "Shift amount is not an integer type!" ); |
869 | if (LHSTy.isVector()) |
870 | return LHSTy; |
871 | MVT ShiftVT = getScalarShiftAmountTy(DL, LHSTy); |
872 | // If any possible shift value won't fit in the prefered type, just use |
873 | // something safe. Assume it will be legalized when the shift is expanded. |
874 | if (ShiftVT.getSizeInBits() < Log2_32_Ceil(Value: LHSTy.getSizeInBits())) |
875 | ShiftVT = MVT::i32; |
876 | assert(ShiftVT.getSizeInBits() >= Log2_32_Ceil(LHSTy.getSizeInBits()) && |
877 | "ShiftVT is still too small!" ); |
878 | return ShiftVT; |
879 | } |
880 | |
881 | bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const { |
882 | assert(isTypeLegal(VT)); |
883 | switch (Op) { |
884 | default: |
885 | return false; |
886 | case ISD::SDIV: |
887 | case ISD::UDIV: |
888 | case ISD::SREM: |
889 | case ISD::UREM: |
890 | return true; |
891 | } |
892 | } |
893 | |
894 | bool TargetLoweringBase::isFreeAddrSpaceCast(unsigned SrcAS, |
895 | unsigned DestAS) const { |
896 | return TM.isNoopAddrSpaceCast(SrcAS, DestAS); |
897 | } |
898 | |
899 | unsigned TargetLoweringBase::getBitWidthForCttzElements( |
900 | Type *RetTy, ElementCount EC, bool ZeroIsPoison, |
901 | const ConstantRange *VScaleRange) const { |
902 | // Find the smallest "sensible" element type to use for the expansion. |
903 | ConstantRange CR(APInt(64, EC.getKnownMinValue())); |
904 | if (EC.isScalable()) |
905 | CR = CR.umul_sat(Other: *VScaleRange); |
906 | |
907 | if (ZeroIsPoison) |
908 | CR = CR.subtract(CI: APInt(64, 1)); |
909 | |
910 | unsigned EltWidth = RetTy->getScalarSizeInBits(); |
911 | EltWidth = std::min(a: EltWidth, b: (unsigned)CR.getActiveBits()); |
912 | EltWidth = std::max(a: llvm::bit_ceil(Value: EltWidth), b: (unsigned)8); |
913 | |
914 | return EltWidth; |
915 | } |
916 | |
917 | void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) { |
918 | // If the command-line option was specified, ignore this request. |
919 | if (!JumpIsExpensiveOverride.getNumOccurrences()) |
920 | JumpIsExpensive = isExpensive; |
921 | } |
922 | |
923 | TargetLoweringBase::LegalizeKind |
924 | TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { |
925 | // If this is a simple type, use the ComputeRegisterProp mechanism. |
926 | if (VT.isSimple()) { |
927 | MVT SVT = VT.getSimpleVT(); |
928 | assert((unsigned)SVT.SimpleTy < std::size(TransformToType)); |
929 | MVT NVT = TransformToType[SVT.SimpleTy]; |
930 | LegalizeTypeAction LA = ValueTypeActions.getTypeAction(VT: SVT); |
931 | |
932 | assert((LA == TypeLegal || LA == TypeSoftenFloat || |
933 | LA == TypeSoftPromoteHalf || |
934 | (NVT.isVector() || |
935 | ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger)) && |
936 | "Promote may not follow Expand or Promote" ); |
937 | |
938 | if (LA == TypeSplitVector) |
939 | return LegalizeKind(LA, EVT(SVT).getHalfNumVectorElementsVT(Context)); |
940 | if (LA == TypeScalarizeVector) |
941 | return LegalizeKind(LA, SVT.getVectorElementType()); |
942 | return LegalizeKind(LA, NVT); |
943 | } |
944 | |
945 | // Handle Extended Scalar Types. |
946 | if (!VT.isVector()) { |
947 | assert(VT.isInteger() && "Float types must be simple" ); |
948 | unsigned BitSize = VT.getSizeInBits(); |
949 | // First promote to a power-of-two size, then expand if necessary. |
950 | if (BitSize < 8 || !isPowerOf2_32(Value: BitSize)) { |
951 | EVT NVT = VT.getRoundIntegerType(Context); |
952 | assert(NVT != VT && "Unable to round integer VT" ); |
953 | LegalizeKind NextStep = getTypeConversion(Context, VT: NVT); |
954 | // Avoid multi-step promotion. |
955 | if (NextStep.first == TypePromoteInteger) |
956 | return NextStep; |
957 | // Return rounded integer type. |
958 | return LegalizeKind(TypePromoteInteger, NVT); |
959 | } |
960 | |
961 | return LegalizeKind(TypeExpandInteger, |
962 | EVT::getIntegerVT(Context, BitWidth: VT.getSizeInBits() / 2)); |
963 | } |
964 | |
965 | // Handle vector types. |
966 | ElementCount NumElts = VT.getVectorElementCount(); |
967 | EVT EltVT = VT.getVectorElementType(); |
968 | |
969 | // Vectors with only one element are always scalarized. |
970 | if (NumElts.isScalar()) |
971 | return LegalizeKind(TypeScalarizeVector, EltVT); |
972 | |
973 | // Try to widen vector elements until the element type is a power of two and |
974 | // promote it to a legal type later on, for example: |
975 | // <3 x i8> -> <4 x i8> -> <4 x i32> |
976 | if (EltVT.isInteger()) { |
977 | // Vectors with a number of elements that is not a power of two are always |
978 | // widened, for example <3 x i8> -> <4 x i8>. |
979 | if (!VT.isPow2VectorType()) { |
980 | NumElts = NumElts.coefficientNextPowerOf2(); |
981 | EVT NVT = EVT::getVectorVT(Context, VT: EltVT, EC: NumElts); |
982 | return LegalizeKind(TypeWidenVector, NVT); |
983 | } |
984 | |
985 | // Examine the element type. |
986 | LegalizeKind LK = getTypeConversion(Context, VT: EltVT); |
987 | |
988 | // If type is to be expanded, split the vector. |
989 | // <4 x i140> -> <2 x i140> |
990 | if (LK.first == TypeExpandInteger) { |
991 | if (VT.getVectorElementCount().isScalable()) |
992 | return LegalizeKind(TypeScalarizeScalableVector, EltVT); |
993 | return LegalizeKind(TypeSplitVector, |
994 | VT.getHalfNumVectorElementsVT(Context)); |
995 | } |
996 | |
997 | // Promote the integer element types until a legal vector type is found |
998 | // or until the element integer type is too big. If a legal type was not |
999 | // found, fallback to the usual mechanism of widening/splitting the |
1000 | // vector. |
1001 | EVT OldEltVT = EltVT; |
1002 | while (true) { |
1003 | // Increase the bitwidth of the element to the next pow-of-two |
1004 | // (which is greater than 8 bits). |
1005 | EltVT = EVT::getIntegerVT(Context, BitWidth: 1 + EltVT.getSizeInBits()) |
1006 | .getRoundIntegerType(Context); |
1007 | |
1008 | // Stop trying when getting a non-simple element type. |
1009 | // Note that vector elements may be greater than legal vector element |
1010 | // types. Example: X86 XMM registers hold 64bit element on 32bit |
1011 | // systems. |
1012 | if (!EltVT.isSimple()) |
1013 | break; |
1014 | |
1015 | // Build a new vector type and check if it is legal. |
1016 | MVT NVT = MVT::getVectorVT(VT: EltVT.getSimpleVT(), EC: NumElts); |
1017 | // Found a legal promoted vector type. |
1018 | if (NVT != MVT() && ValueTypeActions.getTypeAction(VT: NVT) == TypeLegal) |
1019 | return LegalizeKind(TypePromoteInteger, |
1020 | EVT::getVectorVT(Context, VT: EltVT, EC: NumElts)); |
1021 | } |
1022 | |
1023 | // Reset the type to the unexpanded type if we did not find a legal vector |
1024 | // type with a promoted vector element type. |
1025 | EltVT = OldEltVT; |
1026 | } |
1027 | |
1028 | // Try to widen the vector until a legal type is found. |
1029 | // If there is no wider legal type, split the vector. |
1030 | while (true) { |
1031 | // Round up to the next power of 2. |
1032 | NumElts = NumElts.coefficientNextPowerOf2(); |
1033 | |
1034 | // If there is no simple vector type with this many elements then there |
1035 | // cannot be a larger legal vector type. Note that this assumes that |
1036 | // there are no skipped intermediate vector types in the simple types. |
1037 | if (!EltVT.isSimple()) |
1038 | break; |
1039 | MVT LargerVector = MVT::getVectorVT(VT: EltVT.getSimpleVT(), EC: NumElts); |
1040 | if (LargerVector == MVT()) |
1041 | break; |
1042 | |
1043 | // If this type is legal then widen the vector. |
1044 | if (ValueTypeActions.getTypeAction(VT: LargerVector) == TypeLegal) |
1045 | return LegalizeKind(TypeWidenVector, LargerVector); |
1046 | } |
1047 | |
1048 | // Widen odd vectors to next power of two. |
1049 | if (!VT.isPow2VectorType()) { |
1050 | EVT NVT = VT.getPow2VectorType(Context); |
1051 | return LegalizeKind(TypeWidenVector, NVT); |
1052 | } |
1053 | |
1054 | if (VT.getVectorElementCount() == ElementCount::getScalable(MinVal: 1)) |
1055 | return LegalizeKind(TypeScalarizeScalableVector, EltVT); |
1056 | |
1057 | // Vectors with illegal element types are expanded. |
1058 | EVT NVT = EVT::getVectorVT(Context, VT: EltVT, |
1059 | EC: VT.getVectorElementCount().divideCoefficientBy(RHS: 2)); |
1060 | return LegalizeKind(TypeSplitVector, NVT); |
1061 | } |
1062 | |
1063 | static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, |
1064 | unsigned &NumIntermediates, |
1065 | MVT &RegisterVT, |
1066 | TargetLoweringBase *TLI) { |
1067 | // Figure out the right, legal destination reg to copy into. |
1068 | ElementCount EC = VT.getVectorElementCount(); |
1069 | MVT EltTy = VT.getVectorElementType(); |
1070 | |
1071 | unsigned NumVectorRegs = 1; |
1072 | |
1073 | // Scalable vectors cannot be scalarized, so splitting or widening is |
1074 | // required. |
1075 | if (VT.isScalableVector() && !isPowerOf2_32(Value: EC.getKnownMinValue())) |
1076 | llvm_unreachable( |
1077 | "Splitting or widening of non-power-of-2 MVTs is not implemented." ); |
1078 | |
1079 | // FIXME: We don't support non-power-of-2-sized vectors for now. |
1080 | // Ideally we could break down into LHS/RHS like LegalizeDAG does. |
1081 | if (!isPowerOf2_32(Value: EC.getKnownMinValue())) { |
1082 | // Split EC to unit size (scalable property is preserved). |
1083 | NumVectorRegs = EC.getKnownMinValue(); |
1084 | EC = ElementCount::getFixed(MinVal: 1); |
1085 | } |
1086 | |
1087 | // Divide the input until we get to a supported size. This will |
1088 | // always end up with an EC that represent a scalar or a scalable |
1089 | // scalar. |
1090 | while (EC.getKnownMinValue() > 1 && |
1091 | !TLI->isTypeLegal(VT: MVT::getVectorVT(VT: EltTy, EC))) { |
1092 | EC = EC.divideCoefficientBy(RHS: 2); |
1093 | NumVectorRegs <<= 1; |
1094 | } |
1095 | |
1096 | NumIntermediates = NumVectorRegs; |
1097 | |
1098 | MVT NewVT = MVT::getVectorVT(VT: EltTy, EC); |
1099 | if (!TLI->isTypeLegal(VT: NewVT)) |
1100 | NewVT = EltTy; |
1101 | IntermediateVT = NewVT; |
1102 | |
1103 | unsigned LaneSizeInBits = NewVT.getScalarSizeInBits(); |
1104 | |
1105 | // Convert sizes such as i33 to i64. |
1106 | LaneSizeInBits = llvm::bit_ceil(Value: LaneSizeInBits); |
1107 | |
1108 | MVT DestVT = TLI->getRegisterType(VT: NewVT); |
1109 | RegisterVT = DestVT; |
1110 | if (EVT(DestVT).bitsLT(VT: NewVT)) // Value is expanded, e.g. i64 -> i16. |
1111 | return NumVectorRegs * (LaneSizeInBits / DestVT.getScalarSizeInBits()); |
1112 | |
1113 | // Otherwise, promotion or legal types use the same number of registers as |
1114 | // the vector decimated to the appropriate level. |
1115 | return NumVectorRegs; |
1116 | } |
1117 | |
1118 | /// isLegalRC - Return true if the value types that can be represented by the |
1119 | /// specified register class are all legal. |
1120 | bool TargetLoweringBase::isLegalRC(const TargetRegisterInfo &TRI, |
1121 | const TargetRegisterClass &RC) const { |
1122 | for (const auto *I = TRI.legalclasstypes_begin(RC); *I != MVT::Other; ++I) |
1123 | if (isTypeLegal(VT: *I)) |
1124 | return true; |
1125 | return false; |
1126 | } |
1127 | |
1128 | /// Replace/modify any TargetFrameIndex operands with a targte-dependent |
1129 | /// sequence of memory operands that is recognized by PrologEpilogInserter. |
1130 | MachineBasicBlock * |
1131 | TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI, |
1132 | MachineBasicBlock *MBB) const { |
1133 | MachineInstr *MI = &InitialMI; |
1134 | MachineFunction &MF = *MI->getMF(); |
1135 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1136 | |
1137 | // We're handling multiple types of operands here: |
1138 | // PATCHPOINT MetaArgs - live-in, read only, direct |
1139 | // STATEPOINT Deopt Spill - live-through, read only, indirect |
1140 | // STATEPOINT Deopt Alloca - live-through, read only, direct |
1141 | // (We're currently conservative and mark the deopt slots read/write in |
1142 | // practice.) |
1143 | // STATEPOINT GC Spill - live-through, read/write, indirect |
1144 | // STATEPOINT GC Alloca - live-through, read/write, direct |
1145 | // The live-in vs live-through is handled already (the live through ones are |
1146 | // all stack slots), but we need to handle the different type of stackmap |
1147 | // operands and memory effects here. |
1148 | |
1149 | if (llvm::none_of(Range: MI->operands(), |
1150 | P: [](MachineOperand &Operand) { return Operand.isFI(); })) |
1151 | return MBB; |
1152 | |
1153 | MachineInstrBuilder MIB = BuildMI(MF, MIMD: MI->getDebugLoc(), MCID: MI->getDesc()); |
1154 | |
1155 | // Inherit previous memory operands. |
1156 | MIB.cloneMemRefs(OtherMI: *MI); |
1157 | |
1158 | for (unsigned i = 0; i < MI->getNumOperands(); ++i) { |
1159 | MachineOperand &MO = MI->getOperand(i); |
1160 | if (!MO.isFI()) { |
1161 | // Index of Def operand this Use it tied to. |
1162 | // Since Defs are coming before Uses, if Use is tied, then |
1163 | // index of Def must be smaller that index of that Use. |
1164 | // Also, Defs preserve their position in new MI. |
1165 | unsigned TiedTo = i; |
1166 | if (MO.isReg() && MO.isTied()) |
1167 | TiedTo = MI->findTiedOperandIdx(OpIdx: i); |
1168 | MIB.add(MO); |
1169 | if (TiedTo < i) |
1170 | MIB->tieOperands(DefIdx: TiedTo, UseIdx: MIB->getNumOperands() - 1); |
1171 | continue; |
1172 | } |
1173 | |
1174 | // foldMemoryOperand builds a new MI after replacing a single FI operand |
1175 | // with the canonical set of five x86 addressing-mode operands. |
1176 | int FI = MO.getIndex(); |
1177 | |
1178 | // Add frame index operands recognized by stackmaps.cpp |
1179 | if (MFI.isStatepointSpillSlotObjectIndex(ObjectIdx: FI)) { |
1180 | // indirect-mem-ref tag, size, #FI, offset. |
1181 | // Used for spills inserted by StatepointLowering. This codepath is not |
1182 | // used for patchpoints/stackmaps at all, for these spilling is done via |
1183 | // foldMemoryOperand callback only. |
1184 | assert(MI->getOpcode() == TargetOpcode::STATEPOINT && "sanity" ); |
1185 | MIB.addImm(Val: StackMaps::IndirectMemRefOp); |
1186 | MIB.addImm(Val: MFI.getObjectSize(ObjectIdx: FI)); |
1187 | MIB.add(MO); |
1188 | MIB.addImm(Val: 0); |
1189 | } else { |
1190 | // direct-mem-ref tag, #FI, offset. |
1191 | // Used by patchpoint, and direct alloca arguments to statepoints |
1192 | MIB.addImm(Val: StackMaps::DirectMemRefOp); |
1193 | MIB.add(MO); |
1194 | MIB.addImm(Val: 0); |
1195 | } |
1196 | |
1197 | assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!" ); |
1198 | |
1199 | // Add a new memory operand for this FI. |
1200 | assert(MFI.getObjectOffset(FI) != -1); |
1201 | |
1202 | // Note: STATEPOINT MMOs are added during SelectionDAG. STACKMAP, and |
1203 | // PATCHPOINT should be updated to do the same. (TODO) |
1204 | if (MI->getOpcode() != TargetOpcode::STATEPOINT) { |
1205 | auto Flags = MachineMemOperand::MOLoad; |
1206 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
1207 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI), F: Flags, |
1208 | Size: MF.getDataLayout().getPointerSize(), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI)); |
1209 | MIB->addMemOperand(MF, MO: MMO); |
1210 | } |
1211 | } |
1212 | MBB->insert(I: MachineBasicBlock::iterator(MI), MI: MIB); |
1213 | MI->eraseFromParent(); |
1214 | return MBB; |
1215 | } |
1216 | |
1217 | /// findRepresentativeClass - Return the largest legal super-reg register class |
1218 | /// of the register class for the specified type and its associated "cost". |
1219 | // This function is in TargetLowering because it uses RegClassForVT which would |
1220 | // need to be moved to TargetRegisterInfo and would necessitate moving |
1221 | // isTypeLegal over as well - a massive change that would just require |
1222 | // TargetLowering having a TargetRegisterInfo class member that it would use. |
1223 | std::pair<const TargetRegisterClass *, uint8_t> |
1224 | TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI, |
1225 | MVT VT) const { |
1226 | const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; |
1227 | if (!RC) |
1228 | return std::make_pair(x&: RC, y: 0); |
1229 | |
1230 | // Compute the set of all super-register classes. |
1231 | BitVector SuperRegRC(TRI->getNumRegClasses()); |
1232 | for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) |
1233 | SuperRegRC.setBitsInMask(Mask: RCI.getMask()); |
1234 | |
1235 | // Find the first legal register class with the largest spill size. |
1236 | const TargetRegisterClass *BestRC = RC; |
1237 | for (unsigned i : SuperRegRC.set_bits()) { |
1238 | const TargetRegisterClass *SuperRC = TRI->getRegClass(i); |
1239 | // We want the largest possible spill size. |
1240 | if (TRI->getSpillSize(RC: *SuperRC) <= TRI->getSpillSize(RC: *BestRC)) |
1241 | continue; |
1242 | if (!isLegalRC(TRI: *TRI, RC: *SuperRC)) |
1243 | continue; |
1244 | BestRC = SuperRC; |
1245 | } |
1246 | return std::make_pair(x&: BestRC, y: 1); |
1247 | } |
1248 | |
1249 | /// computeRegisterProperties - Once all of the register classes are added, |
1250 | /// this allows us to compute derived properties we expose. |
1251 | void TargetLoweringBase::computeRegisterProperties( |
1252 | const TargetRegisterInfo *TRI) { |
1253 | // Everything defaults to needing one register. |
1254 | for (unsigned i = 0; i != MVT::VALUETYPE_SIZE; ++i) { |
1255 | NumRegistersForVT[i] = 1; |
1256 | RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i; |
1257 | } |
1258 | // ...except isVoid, which doesn't need any registers. |
1259 | NumRegistersForVT[MVT::isVoid] = 0; |
1260 | |
1261 | // Find the largest integer register class. |
1262 | unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE; |
1263 | for (; RegClassForVT[LargestIntReg] == nullptr; --LargestIntReg) |
1264 | assert(LargestIntReg != MVT::i1 && "No integer registers defined!" ); |
1265 | |
1266 | // Every integer value type larger than this largest register takes twice as |
1267 | // many registers to represent as the previous ValueType. |
1268 | for (unsigned ExpandedReg = LargestIntReg + 1; |
1269 | ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) { |
1270 | NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; |
1271 | RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; |
1272 | TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); |
1273 | ValueTypeActions.setTypeAction(VT: (MVT::SimpleValueType)ExpandedReg, |
1274 | Action: TypeExpandInteger); |
1275 | } |
1276 | |
1277 | // Inspect all of the ValueType's smaller than the largest integer |
1278 | // register to see which ones need promotion. |
1279 | unsigned LegalIntReg = LargestIntReg; |
1280 | for (unsigned IntReg = LargestIntReg - 1; |
1281 | IntReg >= (unsigned)MVT::i1; --IntReg) { |
1282 | MVT IVT = (MVT::SimpleValueType)IntReg; |
1283 | if (isTypeLegal(VT: IVT)) { |
1284 | LegalIntReg = IntReg; |
1285 | } else { |
1286 | RegisterTypeForVT[IntReg] = TransformToType[IntReg] = |
1287 | (MVT::SimpleValueType)LegalIntReg; |
1288 | ValueTypeActions.setTypeAction(VT: IVT, Action: TypePromoteInteger); |
1289 | } |
1290 | } |
1291 | |
1292 | // ppcf128 type is really two f64's. |
1293 | if (!isTypeLegal(VT: MVT::ppcf128)) { |
1294 | if (isTypeLegal(VT: MVT::f64)) { |
1295 | NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; |
1296 | RegisterTypeForVT[MVT::ppcf128] = MVT::f64; |
1297 | TransformToType[MVT::ppcf128] = MVT::f64; |
1298 | ValueTypeActions.setTypeAction(VT: MVT::ppcf128, Action: TypeExpandFloat); |
1299 | } else { |
1300 | NumRegistersForVT[MVT::ppcf128] = NumRegistersForVT[MVT::i128]; |
1301 | RegisterTypeForVT[MVT::ppcf128] = RegisterTypeForVT[MVT::i128]; |
1302 | TransformToType[MVT::ppcf128] = MVT::i128; |
1303 | ValueTypeActions.setTypeAction(VT: MVT::ppcf128, Action: TypeSoftenFloat); |
1304 | } |
1305 | } |
1306 | |
1307 | // Decide how to handle f128. If the target does not have native f128 support, |
1308 | // expand it to i128 and we will be generating soft float library calls. |
1309 | if (!isTypeLegal(VT: MVT::f128)) { |
1310 | NumRegistersForVT[MVT::f128] = NumRegistersForVT[MVT::i128]; |
1311 | RegisterTypeForVT[MVT::f128] = RegisterTypeForVT[MVT::i128]; |
1312 | TransformToType[MVT::f128] = MVT::i128; |
1313 | ValueTypeActions.setTypeAction(VT: MVT::f128, Action: TypeSoftenFloat); |
1314 | } |
1315 | |
1316 | // Decide how to handle f80. If the target does not have native f80 support, |
1317 | // expand it to i96 and we will be generating soft float library calls. |
1318 | if (!isTypeLegal(VT: MVT::f80)) { |
1319 | NumRegistersForVT[MVT::f80] = 3*NumRegistersForVT[MVT::i32]; |
1320 | RegisterTypeForVT[MVT::f80] = RegisterTypeForVT[MVT::i32]; |
1321 | TransformToType[MVT::f80] = MVT::i32; |
1322 | ValueTypeActions.setTypeAction(VT: MVT::f80, Action: TypeSoftenFloat); |
1323 | } |
1324 | |
1325 | // Decide how to handle f64. If the target does not have native f64 support, |
1326 | // expand it to i64 and we will be generating soft float library calls. |
1327 | if (!isTypeLegal(VT: MVT::f64)) { |
1328 | NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; |
1329 | RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64]; |
1330 | TransformToType[MVT::f64] = MVT::i64; |
1331 | ValueTypeActions.setTypeAction(VT: MVT::f64, Action: TypeSoftenFloat); |
1332 | } |
1333 | |
1334 | // Decide how to handle f32. If the target does not have native f32 support, |
1335 | // expand it to i32 and we will be generating soft float library calls. |
1336 | if (!isTypeLegal(VT: MVT::f32)) { |
1337 | NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; |
1338 | RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; |
1339 | TransformToType[MVT::f32] = MVT::i32; |
1340 | ValueTypeActions.setTypeAction(VT: MVT::f32, Action: TypeSoftenFloat); |
1341 | } |
1342 | |
1343 | // Decide how to handle f16. If the target does not have native f16 support, |
1344 | // promote it to f32, because there are no f16 library calls (except for |
1345 | // conversions). |
1346 | if (!isTypeLegal(VT: MVT::f16)) { |
1347 | // Allow targets to control how we legalize half. |
1348 | bool SoftPromoteHalfType = softPromoteHalfType(); |
1349 | bool UseFPRegsForHalfType = !SoftPromoteHalfType || useFPRegsForHalfType(); |
1350 | |
1351 | if (!UseFPRegsForHalfType) { |
1352 | NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16]; |
1353 | RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16]; |
1354 | } else { |
1355 | NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32]; |
1356 | RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32]; |
1357 | } |
1358 | TransformToType[MVT::f16] = MVT::f32; |
1359 | if (SoftPromoteHalfType) { |
1360 | ValueTypeActions.setTypeAction(VT: MVT::f16, Action: TypeSoftPromoteHalf); |
1361 | } else { |
1362 | ValueTypeActions.setTypeAction(VT: MVT::f16, Action: TypePromoteFloat); |
1363 | } |
1364 | } |
1365 | |
1366 | // Decide how to handle bf16. If the target does not have native bf16 support, |
1367 | // promote it to f32, because there are no bf16 library calls (except for |
1368 | // converting from f32 to bf16). |
1369 | if (!isTypeLegal(VT: MVT::bf16)) { |
1370 | NumRegistersForVT[MVT::bf16] = NumRegistersForVT[MVT::f32]; |
1371 | RegisterTypeForVT[MVT::bf16] = RegisterTypeForVT[MVT::f32]; |
1372 | TransformToType[MVT::bf16] = MVT::f32; |
1373 | ValueTypeActions.setTypeAction(VT: MVT::bf16, Action: TypeSoftPromoteHalf); |
1374 | } |
1375 | |
1376 | // Loop over all of the vector value types to see which need transformations. |
1377 | for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; |
1378 | i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { |
1379 | MVT VT = (MVT::SimpleValueType) i; |
1380 | if (isTypeLegal(VT)) |
1381 | continue; |
1382 | |
1383 | MVT EltVT = VT.getVectorElementType(); |
1384 | ElementCount EC = VT.getVectorElementCount(); |
1385 | bool IsLegalWiderType = false; |
1386 | bool IsScalable = VT.isScalableVector(); |
1387 | LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT); |
1388 | switch (PreferredAction) { |
1389 | case TypePromoteInteger: { |
1390 | MVT::SimpleValueType EndVT = IsScalable ? |
1391 | MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE : |
1392 | MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE; |
1393 | // Try to promote the elements of integer vectors. If no legal |
1394 | // promotion was found, fall through to the widen-vector method. |
1395 | for (unsigned nVT = i + 1; |
1396 | (MVT::SimpleValueType)nVT <= EndVT; ++nVT) { |
1397 | MVT SVT = (MVT::SimpleValueType) nVT; |
1398 | // Promote vectors of integers to vectors with the same number |
1399 | // of elements, with a wider element type. |
1400 | if (SVT.getScalarSizeInBits() > EltVT.getFixedSizeInBits() && |
1401 | SVT.getVectorElementCount() == EC && isTypeLegal(VT: SVT)) { |
1402 | TransformToType[i] = SVT; |
1403 | RegisterTypeForVT[i] = SVT; |
1404 | NumRegistersForVT[i] = 1; |
1405 | ValueTypeActions.setTypeAction(VT, Action: TypePromoteInteger); |
1406 | IsLegalWiderType = true; |
1407 | break; |
1408 | } |
1409 | } |
1410 | if (IsLegalWiderType) |
1411 | break; |
1412 | [[fallthrough]]; |
1413 | } |
1414 | |
1415 | case TypeWidenVector: |
1416 | if (isPowerOf2_32(Value: EC.getKnownMinValue())) { |
1417 | // Try to widen the vector. |
1418 | for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { |
1419 | MVT SVT = (MVT::SimpleValueType) nVT; |
1420 | if (SVT.getVectorElementType() == EltVT && |
1421 | SVT.isScalableVector() == IsScalable && |
1422 | SVT.getVectorElementCount().getKnownMinValue() > |
1423 | EC.getKnownMinValue() && |
1424 | isTypeLegal(VT: SVT)) { |
1425 | TransformToType[i] = SVT; |
1426 | RegisterTypeForVT[i] = SVT; |
1427 | NumRegistersForVT[i] = 1; |
1428 | ValueTypeActions.setTypeAction(VT, Action: TypeWidenVector); |
1429 | IsLegalWiderType = true; |
1430 | break; |
1431 | } |
1432 | } |
1433 | if (IsLegalWiderType) |
1434 | break; |
1435 | } else { |
1436 | // Only widen to the next power of 2 to keep consistency with EVT. |
1437 | MVT NVT = VT.getPow2VectorType(); |
1438 | if (isTypeLegal(VT: NVT)) { |
1439 | TransformToType[i] = NVT; |
1440 | ValueTypeActions.setTypeAction(VT, Action: TypeWidenVector); |
1441 | RegisterTypeForVT[i] = NVT; |
1442 | NumRegistersForVT[i] = 1; |
1443 | break; |
1444 | } |
1445 | } |
1446 | [[fallthrough]]; |
1447 | |
1448 | case TypeSplitVector: |
1449 | case TypeScalarizeVector: { |
1450 | MVT IntermediateVT; |
1451 | MVT RegisterVT; |
1452 | unsigned NumIntermediates; |
1453 | unsigned NumRegisters = getVectorTypeBreakdownMVT(VT, IntermediateVT, |
1454 | NumIntermediates, RegisterVT, TLI: this); |
1455 | NumRegistersForVT[i] = NumRegisters; |
1456 | assert(NumRegistersForVT[i] == NumRegisters && |
1457 | "NumRegistersForVT size cannot represent NumRegisters!" ); |
1458 | RegisterTypeForVT[i] = RegisterVT; |
1459 | |
1460 | MVT NVT = VT.getPow2VectorType(); |
1461 | if (NVT == VT) { |
1462 | // Type is already a power of 2. The default action is to split. |
1463 | TransformToType[i] = MVT::Other; |
1464 | if (PreferredAction == TypeScalarizeVector) |
1465 | ValueTypeActions.setTypeAction(VT, Action: TypeScalarizeVector); |
1466 | else if (PreferredAction == TypeSplitVector) |
1467 | ValueTypeActions.setTypeAction(VT, Action: TypeSplitVector); |
1468 | else if (EC.getKnownMinValue() > 1) |
1469 | ValueTypeActions.setTypeAction(VT, Action: TypeSplitVector); |
1470 | else |
1471 | ValueTypeActions.setTypeAction(VT, Action: EC.isScalable() |
1472 | ? TypeScalarizeScalableVector |
1473 | : TypeScalarizeVector); |
1474 | } else { |
1475 | TransformToType[i] = NVT; |
1476 | ValueTypeActions.setTypeAction(VT, Action: TypeWidenVector); |
1477 | } |
1478 | break; |
1479 | } |
1480 | default: |
1481 | llvm_unreachable("Unknown vector legalization action!" ); |
1482 | } |
1483 | } |
1484 | |
1485 | // Determine the 'representative' register class for each value type. |
1486 | // An representative register class is the largest (meaning one which is |
1487 | // not a sub-register class / subreg register class) legal register class for |
1488 | // a group of value types. For example, on i386, i8, i16, and i32 |
1489 | // representative would be GR32; while on x86_64 it's GR64. |
1490 | for (unsigned i = 0; i != MVT::VALUETYPE_SIZE; ++i) { |
1491 | const TargetRegisterClass* RRC; |
1492 | uint8_t Cost; |
1493 | std::tie(args&: RRC, args&: Cost) = findRepresentativeClass(TRI, VT: (MVT::SimpleValueType)i); |
1494 | RepRegClassForVT[i] = RRC; |
1495 | RepRegClassCostForVT[i] = Cost; |
1496 | } |
1497 | } |
1498 | |
1499 | EVT TargetLoweringBase::getSetCCResultType(const DataLayout &DL, LLVMContext &, |
1500 | EVT VT) const { |
1501 | assert(!VT.isVector() && "No default SetCC type for vectors!" ); |
1502 | return getPointerTy(DL).SimpleTy; |
1503 | } |
1504 | |
1505 | MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const { |
1506 | return MVT::i32; // return the default value |
1507 | } |
1508 | |
1509 | /// getVectorTypeBreakdown - Vector types are broken down into some number of |
1510 | /// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 |
1511 | /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. |
1512 | /// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86. |
1513 | /// |
1514 | /// This method returns the number of registers needed, and the VT for each |
1515 | /// register. It also returns the VT and quantity of the intermediate values |
1516 | /// before they are promoted/expanded. |
1517 | unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, |
1518 | EVT VT, EVT &IntermediateVT, |
1519 | unsigned &NumIntermediates, |
1520 | MVT &RegisterVT) const { |
1521 | ElementCount EltCnt = VT.getVectorElementCount(); |
1522 | |
1523 | // If there is a wider vector type with the same element type as this one, |
1524 | // or a promoted vector type that has the same number of elements which |
1525 | // are wider, then we should convert to that legal vector type. |
1526 | // This handles things like <2 x float> -> <4 x float> and |
1527 | // <4 x i1> -> <4 x i32>. |
1528 | LegalizeTypeAction TA = getTypeAction(Context, VT); |
1529 | if (!EltCnt.isScalar() && |
1530 | (TA == TypeWidenVector || TA == TypePromoteInteger)) { |
1531 | EVT RegisterEVT = getTypeToTransformTo(Context, VT); |
1532 | if (isTypeLegal(VT: RegisterEVT)) { |
1533 | IntermediateVT = RegisterEVT; |
1534 | RegisterVT = RegisterEVT.getSimpleVT(); |
1535 | NumIntermediates = 1; |
1536 | return 1; |
1537 | } |
1538 | } |
1539 | |
1540 | // Figure out the right, legal destination reg to copy into. |
1541 | EVT EltTy = VT.getVectorElementType(); |
1542 | |
1543 | unsigned NumVectorRegs = 1; |
1544 | |
1545 | // Scalable vectors cannot be scalarized, so handle the legalisation of the |
1546 | // types like done elsewhere in SelectionDAG. |
1547 | if (EltCnt.isScalable()) { |
1548 | LegalizeKind LK; |
1549 | EVT PartVT = VT; |
1550 | do { |
1551 | // Iterate until we've found a legal (part) type to hold VT. |
1552 | LK = getTypeConversion(Context, VT: PartVT); |
1553 | PartVT = LK.second; |
1554 | } while (LK.first != TypeLegal); |
1555 | |
1556 | if (!PartVT.isVector()) { |
1557 | report_fatal_error( |
1558 | reason: "Don't know how to legalize this scalable vector type" ); |
1559 | } |
1560 | |
1561 | NumIntermediates = |
1562 | divideCeil(Numerator: VT.getVectorElementCount().getKnownMinValue(), |
1563 | Denominator: PartVT.getVectorElementCount().getKnownMinValue()); |
1564 | IntermediateVT = PartVT; |
1565 | RegisterVT = getRegisterType(Context, VT: IntermediateVT); |
1566 | return NumIntermediates; |
1567 | } |
1568 | |
1569 | // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally |
1570 | // we could break down into LHS/RHS like LegalizeDAG does. |
1571 | if (!isPowerOf2_32(Value: EltCnt.getKnownMinValue())) { |
1572 | NumVectorRegs = EltCnt.getKnownMinValue(); |
1573 | EltCnt = ElementCount::getFixed(MinVal: 1); |
1574 | } |
1575 | |
1576 | // Divide the input until we get to a supported size. This will always |
1577 | // end with a scalar if the target doesn't support vectors. |
1578 | while (EltCnt.getKnownMinValue() > 1 && |
1579 | !isTypeLegal(VT: EVT::getVectorVT(Context, VT: EltTy, EC: EltCnt))) { |
1580 | EltCnt = EltCnt.divideCoefficientBy(RHS: 2); |
1581 | NumVectorRegs <<= 1; |
1582 | } |
1583 | |
1584 | NumIntermediates = NumVectorRegs; |
1585 | |
1586 | EVT NewVT = EVT::getVectorVT(Context, VT: EltTy, EC: EltCnt); |
1587 | if (!isTypeLegal(VT: NewVT)) |
1588 | NewVT = EltTy; |
1589 | IntermediateVT = NewVT; |
1590 | |
1591 | MVT DestVT = getRegisterType(Context, VT: NewVT); |
1592 | RegisterVT = DestVT; |
1593 | |
1594 | if (EVT(DestVT).bitsLT(VT: NewVT)) { // Value is expanded, e.g. i64 -> i16. |
1595 | TypeSize NewVTSize = NewVT.getSizeInBits(); |
1596 | // Convert sizes such as i33 to i64. |
1597 | if (!llvm::has_single_bit<uint32_t>(Value: NewVTSize.getKnownMinValue())) |
1598 | NewVTSize = NewVTSize.coefficientNextPowerOf2(); |
1599 | return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); |
1600 | } |
1601 | |
1602 | // Otherwise, promotion or legal types use the same number of registers as |
1603 | // the vector decimated to the appropriate level. |
1604 | return NumVectorRegs; |
1605 | } |
1606 | |
1607 | bool TargetLoweringBase::isSuitableForJumpTable(const SwitchInst *SI, |
1608 | uint64_t NumCases, |
1609 | uint64_t Range, |
1610 | ProfileSummaryInfo *PSI, |
1611 | BlockFrequencyInfo *BFI) const { |
1612 | // FIXME: This function check the maximum table size and density, but the |
1613 | // minimum size is not checked. It would be nice if the minimum size is |
1614 | // also combined within this function. Currently, the minimum size check is |
1615 | // performed in findJumpTable() in SelectionDAGBuiler and |
1616 | // getEstimatedNumberOfCaseClusters() in BasicTTIImpl. |
1617 | const bool OptForSize = |
1618 | SI->getParent()->getParent()->hasOptSize() || |
1619 | llvm::shouldOptimizeForSize(BB: SI->getParent(), PSI, BFI); |
1620 | const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize); |
1621 | const unsigned MaxJumpTableSize = getMaximumJumpTableSize(); |
1622 | |
1623 | // Check whether the number of cases is small enough and |
1624 | // the range is dense enough for a jump table. |
1625 | return (OptForSize || Range <= MaxJumpTableSize) && |
1626 | (NumCases * 100 >= Range * MinDensity); |
1627 | } |
1628 | |
1629 | MVT TargetLoweringBase::getPreferredSwitchConditionType(LLVMContext &Context, |
1630 | EVT ConditionVT) const { |
1631 | return getRegisterType(Context, VT: ConditionVT); |
1632 | } |
1633 | |
1634 | /// Get the EVTs and ArgFlags collections that represent the legalized return |
1635 | /// type of the given function. This does not require a DAG or a return value, |
1636 | /// and is suitable for use before any DAGs for the function are constructed. |
1637 | /// TODO: Move this out of TargetLowering.cpp. |
1638 | void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType, |
1639 | AttributeList attr, |
1640 | SmallVectorImpl<ISD::OutputArg> &Outs, |
1641 | const TargetLowering &TLI, const DataLayout &DL) { |
1642 | SmallVector<EVT, 4> ValueVTs; |
1643 | ComputeValueVTs(TLI, DL, Ty: ReturnType, ValueVTs); |
1644 | unsigned NumValues = ValueVTs.size(); |
1645 | if (NumValues == 0) return; |
1646 | |
1647 | for (unsigned j = 0, f = NumValues; j != f; ++j) { |
1648 | EVT VT = ValueVTs[j]; |
1649 | ISD::NodeType ExtendKind = ISD::ANY_EXTEND; |
1650 | |
1651 | if (attr.hasRetAttr(Kind: Attribute::SExt)) |
1652 | ExtendKind = ISD::SIGN_EXTEND; |
1653 | else if (attr.hasRetAttr(Kind: Attribute::ZExt)) |
1654 | ExtendKind = ISD::ZERO_EXTEND; |
1655 | |
1656 | if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) |
1657 | VT = TLI.getTypeForExtReturn(Context&: ReturnType->getContext(), VT, ExtendKind); |
1658 | |
1659 | unsigned NumParts = |
1660 | TLI.getNumRegistersForCallingConv(Context&: ReturnType->getContext(), CC, VT); |
1661 | MVT PartVT = |
1662 | TLI.getRegisterTypeForCallingConv(Context&: ReturnType->getContext(), CC, VT); |
1663 | |
1664 | // 'inreg' on function refers to return value |
1665 | ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); |
1666 | if (attr.hasRetAttr(Kind: Attribute::InReg)) |
1667 | Flags.setInReg(); |
1668 | |
1669 | // Propagate extension type if any |
1670 | if (attr.hasRetAttr(Kind: Attribute::SExt)) |
1671 | Flags.setSExt(); |
1672 | else if (attr.hasRetAttr(Kind: Attribute::ZExt)) |
1673 | Flags.setZExt(); |
1674 | |
1675 | for (unsigned i = 0; i < NumParts; ++i) { |
1676 | ISD::ArgFlagsTy OutFlags = Flags; |
1677 | if (NumParts > 1 && i == 0) |
1678 | OutFlags.setSplit(); |
1679 | else if (i == NumParts - 1 && i != 0) |
1680 | OutFlags.setSplitEnd(); |
1681 | |
1682 | Outs.push_back( |
1683 | Elt: ISD::OutputArg(OutFlags, PartVT, VT, /*isfixed=*/true, 0, 0)); |
1684 | } |
1685 | } |
1686 | } |
1687 | |
1688 | /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate |
1689 | /// function arguments in the caller parameter area. This is the actual |
1690 | /// alignment, not its logarithm. |
1691 | uint64_t TargetLoweringBase::getByValTypeAlignment(Type *Ty, |
1692 | const DataLayout &DL) const { |
1693 | return DL.getABITypeAlign(Ty).value(); |
1694 | } |
1695 | |
1696 | bool TargetLoweringBase::allowsMemoryAccessForAlignment( |
1697 | LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, |
1698 | Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const { |
1699 | // Check if the specified alignment is sufficient based on the data layout. |
1700 | // TODO: While using the data layout works in practice, a better solution |
1701 | // would be to implement this check directly (make this a virtual function). |
1702 | // For example, the ABI alignment may change based on software platform while |
1703 | // this function should only be affected by hardware implementation. |
1704 | Type *Ty = VT.getTypeForEVT(Context); |
1705 | if (VT.isZeroSized() || Alignment >= DL.getABITypeAlign(Ty)) { |
1706 | // Assume that an access that meets the ABI-specified alignment is fast. |
1707 | if (Fast != nullptr) |
1708 | *Fast = 1; |
1709 | return true; |
1710 | } |
1711 | |
1712 | // This is a misaligned access. |
1713 | return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast); |
1714 | } |
1715 | |
1716 | bool TargetLoweringBase::allowsMemoryAccessForAlignment( |
1717 | LLVMContext &Context, const DataLayout &DL, EVT VT, |
1718 | const MachineMemOperand &MMO, unsigned *Fast) const { |
1719 | return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace: MMO.getAddrSpace(), |
1720 | Alignment: MMO.getAlign(), Flags: MMO.getFlags(), Fast); |
1721 | } |
1722 | |
1723 | bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, |
1724 | const DataLayout &DL, EVT VT, |
1725 | unsigned AddrSpace, Align Alignment, |
1726 | MachineMemOperand::Flags Flags, |
1727 | unsigned *Fast) const { |
1728 | return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace, Alignment, |
1729 | Flags, Fast); |
1730 | } |
1731 | |
1732 | bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, |
1733 | const DataLayout &DL, EVT VT, |
1734 | const MachineMemOperand &MMO, |
1735 | unsigned *Fast) const { |
1736 | return allowsMemoryAccess(Context, DL, VT, AddrSpace: MMO.getAddrSpace(), Alignment: MMO.getAlign(), |
1737 | Flags: MMO.getFlags(), Fast); |
1738 | } |
1739 | |
1740 | bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, |
1741 | const DataLayout &DL, LLT Ty, |
1742 | const MachineMemOperand &MMO, |
1743 | unsigned *Fast) const { |
1744 | EVT VT = getApproximateEVTForLLT(Ty, DL, Ctx&: Context); |
1745 | return allowsMemoryAccess(Context, DL, VT, AddrSpace: MMO.getAddrSpace(), Alignment: MMO.getAlign(), |
1746 | Flags: MMO.getFlags(), Fast); |
1747 | } |
1748 | |
1749 | //===----------------------------------------------------------------------===// |
1750 | // TargetTransformInfo Helpers |
1751 | //===----------------------------------------------------------------------===// |
1752 | |
1753 | int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { |
1754 | enum InstructionOpcodes { |
1755 | #define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM, |
1756 | #define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM |
1757 | #include "llvm/IR/Instruction.def" |
1758 | }; |
1759 | switch (static_cast<InstructionOpcodes>(Opcode)) { |
1760 | case Ret: return 0; |
1761 | case Br: return 0; |
1762 | case Switch: return 0; |
1763 | case IndirectBr: return 0; |
1764 | case Invoke: return 0; |
1765 | case CallBr: return 0; |
1766 | case Resume: return 0; |
1767 | case Unreachable: return 0; |
1768 | case CleanupRet: return 0; |
1769 | case CatchRet: return 0; |
1770 | case CatchPad: return 0; |
1771 | case CatchSwitch: return 0; |
1772 | case CleanupPad: return 0; |
1773 | case FNeg: return ISD::FNEG; |
1774 | case Add: return ISD::ADD; |
1775 | case FAdd: return ISD::FADD; |
1776 | case Sub: return ISD::SUB; |
1777 | case FSub: return ISD::FSUB; |
1778 | case Mul: return ISD::MUL; |
1779 | case FMul: return ISD::FMUL; |
1780 | case UDiv: return ISD::UDIV; |
1781 | case SDiv: return ISD::SDIV; |
1782 | case FDiv: return ISD::FDIV; |
1783 | case URem: return ISD::UREM; |
1784 | case SRem: return ISD::SREM; |
1785 | case FRem: return ISD::FREM; |
1786 | case Shl: return ISD::SHL; |
1787 | case LShr: return ISD::SRL; |
1788 | case AShr: return ISD::SRA; |
1789 | case And: return ISD::AND; |
1790 | case Or: return ISD::OR; |
1791 | case Xor: return ISD::XOR; |
1792 | case Alloca: return 0; |
1793 | case Load: return ISD::LOAD; |
1794 | case Store: return ISD::STORE; |
1795 | case GetElementPtr: return 0; |
1796 | case Fence: return 0; |
1797 | case AtomicCmpXchg: return 0; |
1798 | case AtomicRMW: return 0; |
1799 | case Trunc: return ISD::TRUNCATE; |
1800 | case ZExt: return ISD::ZERO_EXTEND; |
1801 | case SExt: return ISD::SIGN_EXTEND; |
1802 | case FPToUI: return ISD::FP_TO_UINT; |
1803 | case FPToSI: return ISD::FP_TO_SINT; |
1804 | case UIToFP: return ISD::UINT_TO_FP; |
1805 | case SIToFP: return ISD::SINT_TO_FP; |
1806 | case FPTrunc: return ISD::FP_ROUND; |
1807 | case FPExt: return ISD::FP_EXTEND; |
1808 | case PtrToInt: return ISD::BITCAST; |
1809 | case IntToPtr: return ISD::BITCAST; |
1810 | case BitCast: return ISD::BITCAST; |
1811 | case AddrSpaceCast: return ISD::ADDRSPACECAST; |
1812 | case ICmp: return ISD::SETCC; |
1813 | case FCmp: return ISD::SETCC; |
1814 | case PHI: return 0; |
1815 | case Call: return 0; |
1816 | case Select: return ISD::SELECT; |
1817 | case UserOp1: return 0; |
1818 | case UserOp2: return 0; |
1819 | case VAArg: return 0; |
1820 | case ExtractElement: return ISD::EXTRACT_VECTOR_ELT; |
1821 | case InsertElement: return ISD::INSERT_VECTOR_ELT; |
1822 | case ShuffleVector: return ISD::VECTOR_SHUFFLE; |
1823 | case ExtractValue: return ISD::MERGE_VALUES; |
1824 | case InsertValue: return ISD::MERGE_VALUES; |
1825 | case LandingPad: return 0; |
1826 | case Freeze: return ISD::FREEZE; |
1827 | } |
1828 | |
1829 | llvm_unreachable("Unknown instruction type encountered!" ); |
1830 | } |
1831 | |
1832 | Value * |
1833 | TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilderBase &IRB, |
1834 | bool UseTLS) const { |
1835 | // compiler-rt provides a variable with a magic name. Targets that do not |
1836 | // link with compiler-rt may also provide such a variable. |
1837 | Module *M = IRB.GetInsertBlock()->getParent()->getParent(); |
1838 | const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr" ; |
1839 | auto UnsafeStackPtr = |
1840 | dyn_cast_or_null<GlobalVariable>(Val: M->getNamedValue(Name: UnsafeStackPtrVar)); |
1841 | |
1842 | Type *StackPtrTy = PointerType::getUnqual(C&: M->getContext()); |
1843 | |
1844 | if (!UnsafeStackPtr) { |
1845 | auto TLSModel = UseTLS ? |
1846 | GlobalValue::InitialExecTLSModel : |
1847 | GlobalValue::NotThreadLocal; |
1848 | // The global variable is not defined yet, define it ourselves. |
1849 | // We use the initial-exec TLS model because we do not support the |
1850 | // variable living anywhere other than in the main executable. |
1851 | UnsafeStackPtr = new GlobalVariable( |
1852 | *M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr, |
1853 | UnsafeStackPtrVar, nullptr, TLSModel); |
1854 | } else { |
1855 | // The variable exists, check its type and attributes. |
1856 | if (UnsafeStackPtr->getValueType() != StackPtrTy) |
1857 | report_fatal_error(reason: Twine(UnsafeStackPtrVar) + " must have void* type" ); |
1858 | if (UseTLS != UnsafeStackPtr->isThreadLocal()) |
1859 | report_fatal_error(reason: Twine(UnsafeStackPtrVar) + " must " + |
1860 | (UseTLS ? "" : "not " ) + "be thread-local" ); |
1861 | } |
1862 | return UnsafeStackPtr; |
1863 | } |
1864 | |
1865 | Value * |
1866 | TargetLoweringBase::getSafeStackPointerLocation(IRBuilderBase &IRB) const { |
1867 | if (!TM.getTargetTriple().isAndroid()) |
1868 | return getDefaultSafeStackPointerLocation(IRB, UseTLS: true); |
1869 | |
1870 | // Android provides a libc function to retrieve the address of the current |
1871 | // thread's unsafe stack pointer. |
1872 | Module *M = IRB.GetInsertBlock()->getParent()->getParent(); |
1873 | auto *PtrTy = PointerType::getUnqual(C&: M->getContext()); |
1874 | FunctionCallee Fn = |
1875 | M->getOrInsertFunction(Name: "__safestack_pointer_address" , RetTy: PtrTy); |
1876 | return IRB.CreateCall(Callee: Fn); |
1877 | } |
1878 | |
1879 | //===----------------------------------------------------------------------===// |
1880 | // Loop Strength Reduction hooks |
1881 | //===----------------------------------------------------------------------===// |
1882 | |
1883 | /// isLegalAddressingMode - Return true if the addressing mode represented |
1884 | /// by AM is legal for this target, for a load/store of the specified type. |
1885 | bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL, |
1886 | const AddrMode &AM, Type *Ty, |
1887 | unsigned AS, Instruction *I) const { |
1888 | // The default implementation of this implements a conservative RISCy, r+r and |
1889 | // r+i addr mode. |
1890 | |
1891 | // Scalable offsets not supported |
1892 | if (AM.ScalableOffset) |
1893 | return false; |
1894 | |
1895 | // Allows a sign-extended 16-bit immediate field. |
1896 | if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) |
1897 | return false; |
1898 | |
1899 | // No global is ever allowed as a base. |
1900 | if (AM.BaseGV) |
1901 | return false; |
1902 | |
1903 | // Only support r+r, |
1904 | switch (AM.Scale) { |
1905 | case 0: // "r+i" or just "i", depending on HasBaseReg. |
1906 | break; |
1907 | case 1: |
1908 | if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. |
1909 | return false; |
1910 | // Otherwise we have r+r or r+i. |
1911 | break; |
1912 | case 2: |
1913 | if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. |
1914 | return false; |
1915 | // Allow 2*r as r+r. |
1916 | break; |
1917 | default: // Don't allow n * r |
1918 | return false; |
1919 | } |
1920 | |
1921 | return true; |
1922 | } |
1923 | |
1924 | //===----------------------------------------------------------------------===// |
1925 | // Stack Protector |
1926 | //===----------------------------------------------------------------------===// |
1927 | |
1928 | // For OpenBSD return its special guard variable. Otherwise return nullptr, |
1929 | // so that SelectionDAG handle SSP. |
1930 | Value *TargetLoweringBase::getIRStackGuard(IRBuilderBase &IRB) const { |
1931 | if (getTargetMachine().getTargetTriple().isOSOpenBSD()) { |
1932 | Module &M = *IRB.GetInsertBlock()->getParent()->getParent(); |
1933 | PointerType *PtrTy = PointerType::getUnqual(C&: M.getContext()); |
1934 | Constant *C = M.getOrInsertGlobal(Name: "__guard_local" , Ty: PtrTy); |
1935 | if (GlobalVariable *G = dyn_cast_or_null<GlobalVariable>(Val: C)) |
1936 | G->setVisibility(GlobalValue::HiddenVisibility); |
1937 | return C; |
1938 | } |
1939 | return nullptr; |
1940 | } |
1941 | |
1942 | // Currently only support "standard" __stack_chk_guard. |
1943 | // TODO: add LOAD_STACK_GUARD support. |
1944 | void TargetLoweringBase::insertSSPDeclarations(Module &M) const { |
1945 | if (!M.getNamedValue(Name: "__stack_chk_guard" )) { |
1946 | auto *GV = new GlobalVariable(M, PointerType::getUnqual(C&: M.getContext()), |
1947 | false, GlobalVariable::ExternalLinkage, |
1948 | nullptr, "__stack_chk_guard" ); |
1949 | |
1950 | // FreeBSD has "__stack_chk_guard" defined externally on libc.so |
1951 | if (M.getDirectAccessExternalData() && |
1952 | !TM.getTargetTriple().isWindowsGNUEnvironment() && |
1953 | !(TM.getTargetTriple().isPPC64() && |
1954 | TM.getTargetTriple().isOSFreeBSD()) && |
1955 | (!TM.getTargetTriple().isOSDarwin() || |
1956 | TM.getRelocationModel() == Reloc::Static)) |
1957 | GV->setDSOLocal(true); |
1958 | } |
1959 | } |
1960 | |
1961 | // Currently only support "standard" __stack_chk_guard. |
1962 | // TODO: add LOAD_STACK_GUARD support. |
1963 | Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const { |
1964 | return M.getNamedValue(Name: "__stack_chk_guard" ); |
1965 | } |
1966 | |
1967 | Function *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const { |
1968 | return nullptr; |
1969 | } |
1970 | |
1971 | unsigned TargetLoweringBase::getMinimumJumpTableEntries() const { |
1972 | return MinimumJumpTableEntries; |
1973 | } |
1974 | |
1975 | void TargetLoweringBase::setMinimumJumpTableEntries(unsigned Val) { |
1976 | MinimumJumpTableEntries = Val; |
1977 | } |
1978 | |
1979 | unsigned TargetLoweringBase::getMinimumJumpTableDensity(bool OptForSize) const { |
1980 | return OptForSize ? OptsizeJumpTableDensity : JumpTableDensity; |
1981 | } |
1982 | |
1983 | unsigned TargetLoweringBase::getMaximumJumpTableSize() const { |
1984 | return MaximumJumpTableSize; |
1985 | } |
1986 | |
1987 | void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) { |
1988 | MaximumJumpTableSize = Val; |
1989 | } |
1990 | |
1991 | bool TargetLoweringBase::isJumpTableRelative() const { |
1992 | return getTargetMachine().isPositionIndependent(); |
1993 | } |
1994 | |
1995 | Align TargetLoweringBase::getPrefLoopAlignment(MachineLoop *ML) const { |
1996 | if (TM.Options.LoopAlignment) |
1997 | return Align(TM.Options.LoopAlignment); |
1998 | return PrefLoopAlignment; |
1999 | } |
2000 | |
2001 | unsigned TargetLoweringBase::getMaxPermittedBytesForAlignment( |
2002 | MachineBasicBlock *MBB) const { |
2003 | return MaxBytesForAlignment; |
2004 | } |
2005 | |
2006 | //===----------------------------------------------------------------------===// |
2007 | // Reciprocal Estimates |
2008 | //===----------------------------------------------------------------------===// |
2009 | |
2010 | /// Get the reciprocal estimate attribute string for a function that will |
2011 | /// override the target defaults. |
2012 | static StringRef getRecipEstimateForFunc(MachineFunction &MF) { |
2013 | const Function &F = MF.getFunction(); |
2014 | return F.getFnAttribute(Kind: "reciprocal-estimates" ).getValueAsString(); |
2015 | } |
2016 | |
2017 | /// Construct a string for the given reciprocal operation of the given type. |
2018 | /// This string should match the corresponding option to the front-end's |
2019 | /// "-mrecip" flag assuming those strings have been passed through in an |
2020 | /// attribute string. For example, "vec-divf" for a division of a vXf32. |
2021 | static std::string getReciprocalOpName(bool IsSqrt, EVT VT) { |
2022 | std::string Name = VT.isVector() ? "vec-" : "" ; |
2023 | |
2024 | Name += IsSqrt ? "sqrt" : "div" ; |
2025 | |
2026 | // TODO: Handle other float types? |
2027 | if (VT.getScalarType() == MVT::f64) { |
2028 | Name += "d" ; |
2029 | } else if (VT.getScalarType() == MVT::f16) { |
2030 | Name += "h" ; |
2031 | } else { |
2032 | assert(VT.getScalarType() == MVT::f32 && |
2033 | "Unexpected FP type for reciprocal estimate" ); |
2034 | Name += "f" ; |
2035 | } |
2036 | |
2037 | return Name; |
2038 | } |
2039 | |
2040 | /// Return the character position and value (a single numeric character) of a |
2041 | /// customized refinement operation in the input string if it exists. Return |
2042 | /// false if there is no customized refinement step count. |
2043 | static bool parseRefinementStep(StringRef In, size_t &Position, |
2044 | uint8_t &Value) { |
2045 | const char RefStepToken = ':'; |
2046 | Position = In.find(C: RefStepToken); |
2047 | if (Position == StringRef::npos) |
2048 | return false; |
2049 | |
2050 | StringRef RefStepString = In.substr(Start: Position + 1); |
2051 | // Allow exactly one numeric character for the additional refinement |
2052 | // step parameter. |
2053 | if (RefStepString.size() == 1) { |
2054 | char RefStepChar = RefStepString[0]; |
2055 | if (isDigit(C: RefStepChar)) { |
2056 | Value = RefStepChar - '0'; |
2057 | return true; |
2058 | } |
2059 | } |
2060 | report_fatal_error(reason: "Invalid refinement step for -recip." ); |
2061 | } |
2062 | |
2063 | /// For the input attribute string, return one of the ReciprocalEstimate enum |
2064 | /// status values (enabled, disabled, or not specified) for this operation on |
2065 | /// the specified data type. |
2066 | static int getOpEnabled(bool IsSqrt, EVT VT, StringRef Override) { |
2067 | if (Override.empty()) |
2068 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2069 | |
2070 | SmallVector<StringRef, 4> OverrideVector; |
2071 | Override.split(A&: OverrideVector, Separator: ','); |
2072 | unsigned NumArgs = OverrideVector.size(); |
2073 | |
2074 | // Check if "all", "none", or "default" was specified. |
2075 | if (NumArgs == 1) { |
2076 | // Look for an optional setting of the number of refinement steps needed |
2077 | // for this type of reciprocal operation. |
2078 | size_t RefPos; |
2079 | uint8_t RefSteps; |
2080 | if (parseRefinementStep(In: Override, Position&: RefPos, Value&: RefSteps)) { |
2081 | // Split the string for further processing. |
2082 | Override = Override.substr(Start: 0, N: RefPos); |
2083 | } |
2084 | |
2085 | // All reciprocal types are enabled. |
2086 | if (Override == "all" ) |
2087 | return TargetLoweringBase::ReciprocalEstimate::Enabled; |
2088 | |
2089 | // All reciprocal types are disabled. |
2090 | if (Override == "none" ) |
2091 | return TargetLoweringBase::ReciprocalEstimate::Disabled; |
2092 | |
2093 | // Target defaults for enablement are used. |
2094 | if (Override == "default" ) |
2095 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2096 | } |
2097 | |
2098 | // The attribute string may omit the size suffix ('f'/'d'). |
2099 | std::string VTName = getReciprocalOpName(IsSqrt, VT); |
2100 | std::string VTNameNoSize = VTName; |
2101 | VTNameNoSize.pop_back(); |
2102 | static const char DisabledPrefix = '!'; |
2103 | |
2104 | for (StringRef RecipType : OverrideVector) { |
2105 | size_t RefPos; |
2106 | uint8_t RefSteps; |
2107 | if (parseRefinementStep(In: RecipType, Position&: RefPos, Value&: RefSteps)) |
2108 | RecipType = RecipType.substr(Start: 0, N: RefPos); |
2109 | |
2110 | // Ignore the disablement token for string matching. |
2111 | bool IsDisabled = RecipType[0] == DisabledPrefix; |
2112 | if (IsDisabled) |
2113 | RecipType = RecipType.substr(Start: 1); |
2114 | |
2115 | if (RecipType == VTName || RecipType == VTNameNoSize) |
2116 | return IsDisabled ? TargetLoweringBase::ReciprocalEstimate::Disabled |
2117 | : TargetLoweringBase::ReciprocalEstimate::Enabled; |
2118 | } |
2119 | |
2120 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2121 | } |
2122 | |
2123 | /// For the input attribute string, return the customized refinement step count |
2124 | /// for this operation on the specified data type. If the step count does not |
2125 | /// exist, return the ReciprocalEstimate enum value for unspecified. |
2126 | static int getOpRefinementSteps(bool IsSqrt, EVT VT, StringRef Override) { |
2127 | if (Override.empty()) |
2128 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2129 | |
2130 | SmallVector<StringRef, 4> OverrideVector; |
2131 | Override.split(A&: OverrideVector, Separator: ','); |
2132 | unsigned NumArgs = OverrideVector.size(); |
2133 | |
2134 | // Check if "all", "default", or "none" was specified. |
2135 | if (NumArgs == 1) { |
2136 | // Look for an optional setting of the number of refinement steps needed |
2137 | // for this type of reciprocal operation. |
2138 | size_t RefPos; |
2139 | uint8_t RefSteps; |
2140 | if (!parseRefinementStep(In: Override, Position&: RefPos, Value&: RefSteps)) |
2141 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2142 | |
2143 | // Split the string for further processing. |
2144 | Override = Override.substr(Start: 0, N: RefPos); |
2145 | assert(Override != "none" && |
2146 | "Disabled reciprocals, but specifed refinement steps?" ); |
2147 | |
2148 | // If this is a general override, return the specified number of steps. |
2149 | if (Override == "all" || Override == "default" ) |
2150 | return RefSteps; |
2151 | } |
2152 | |
2153 | // The attribute string may omit the size suffix ('f'/'d'). |
2154 | std::string VTName = getReciprocalOpName(IsSqrt, VT); |
2155 | std::string VTNameNoSize = VTName; |
2156 | VTNameNoSize.pop_back(); |
2157 | |
2158 | for (StringRef RecipType : OverrideVector) { |
2159 | size_t RefPos; |
2160 | uint8_t RefSteps; |
2161 | if (!parseRefinementStep(In: RecipType, Position&: RefPos, Value&: RefSteps)) |
2162 | continue; |
2163 | |
2164 | RecipType = RecipType.substr(Start: 0, N: RefPos); |
2165 | if (RecipType == VTName || RecipType == VTNameNoSize) |
2166 | return RefSteps; |
2167 | } |
2168 | |
2169 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2170 | } |
2171 | |
2172 | int TargetLoweringBase::getRecipEstimateSqrtEnabled(EVT VT, |
2173 | MachineFunction &MF) const { |
2174 | return getOpEnabled(IsSqrt: true, VT, Override: getRecipEstimateForFunc(MF)); |
2175 | } |
2176 | |
2177 | int TargetLoweringBase::getRecipEstimateDivEnabled(EVT VT, |
2178 | MachineFunction &MF) const { |
2179 | return getOpEnabled(IsSqrt: false, VT, Override: getRecipEstimateForFunc(MF)); |
2180 | } |
2181 | |
2182 | int TargetLoweringBase::getSqrtRefinementSteps(EVT VT, |
2183 | MachineFunction &MF) const { |
2184 | return getOpRefinementSteps(IsSqrt: true, VT, Override: getRecipEstimateForFunc(MF)); |
2185 | } |
2186 | |
2187 | int TargetLoweringBase::getDivRefinementSteps(EVT VT, |
2188 | MachineFunction &MF) const { |
2189 | return getOpRefinementSteps(IsSqrt: false, VT, Override: getRecipEstimateForFunc(MF)); |
2190 | } |
2191 | |
2192 | bool TargetLoweringBase::isLoadBitCastBeneficial( |
2193 | EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, |
2194 | const MachineMemOperand &MMO) const { |
2195 | // Single-element vectors are scalarized, so we should generally avoid having |
2196 | // any memory operations on such types, as they would get scalarized too. |
2197 | if (LoadVT.isFixedLengthVector() && BitcastVT.isFixedLengthVector() && |
2198 | BitcastVT.getVectorNumElements() == 1) |
2199 | return false; |
2200 | |
2201 | // Don't do if we could do an indexed load on the original type, but not on |
2202 | // the new one. |
2203 | if (!LoadVT.isSimple() || !BitcastVT.isSimple()) |
2204 | return true; |
2205 | |
2206 | MVT LoadMVT = LoadVT.getSimpleVT(); |
2207 | |
2208 | // Don't bother doing this if it's just going to be promoted again later, as |
2209 | // doing so might interfere with other combines. |
2210 | if (getOperationAction(Op: ISD::LOAD, VT: LoadMVT) == Promote && |
2211 | getTypeToPromoteTo(Op: ISD::LOAD, VT: LoadMVT) == BitcastVT.getSimpleVT()) |
2212 | return false; |
2213 | |
2214 | unsigned Fast = 0; |
2215 | return allowsMemoryAccess(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), VT: BitcastVT, |
2216 | MMO, Fast: &Fast) && |
2217 | Fast; |
2218 | } |
2219 | |
2220 | void TargetLoweringBase::finalizeLowering(MachineFunction &MF) const { |
2221 | MF.getRegInfo().freezeReservedRegs(); |
2222 | } |
2223 | |
2224 | MachineMemOperand::Flags TargetLoweringBase::getLoadMemOperandFlags( |
2225 | const LoadInst &LI, const DataLayout &DL, AssumptionCache *AC, |
2226 | const TargetLibraryInfo *LibInfo) const { |
2227 | MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad; |
2228 | if (LI.isVolatile()) |
2229 | Flags |= MachineMemOperand::MOVolatile; |
2230 | |
2231 | if (LI.hasMetadata(KindID: LLVMContext::MD_nontemporal)) |
2232 | Flags |= MachineMemOperand::MONonTemporal; |
2233 | |
2234 | if (LI.hasMetadata(KindID: LLVMContext::MD_invariant_load)) |
2235 | Flags |= MachineMemOperand::MOInvariant; |
2236 | |
2237 | if (isDereferenceableAndAlignedPointer(V: LI.getPointerOperand(), Ty: LI.getType(), |
2238 | Alignment: LI.getAlign(), DL, CtxI: &LI, AC, |
2239 | /*DT=*/nullptr, TLI: LibInfo)) |
2240 | Flags |= MachineMemOperand::MODereferenceable; |
2241 | |
2242 | Flags |= getTargetMMOFlags(I: LI); |
2243 | return Flags; |
2244 | } |
2245 | |
2246 | MachineMemOperand::Flags |
2247 | TargetLoweringBase::getStoreMemOperandFlags(const StoreInst &SI, |
2248 | const DataLayout &DL) const { |
2249 | MachineMemOperand::Flags Flags = MachineMemOperand::MOStore; |
2250 | |
2251 | if (SI.isVolatile()) |
2252 | Flags |= MachineMemOperand::MOVolatile; |
2253 | |
2254 | if (SI.hasMetadata(KindID: LLVMContext::MD_nontemporal)) |
2255 | Flags |= MachineMemOperand::MONonTemporal; |
2256 | |
2257 | // FIXME: Not preserving dereferenceable |
2258 | Flags |= getTargetMMOFlags(I: SI); |
2259 | return Flags; |
2260 | } |
2261 | |
2262 | MachineMemOperand::Flags |
2263 | TargetLoweringBase::getAtomicMemOperandFlags(const Instruction &AI, |
2264 | const DataLayout &DL) const { |
2265 | auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; |
2266 | |
2267 | if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Val: &AI)) { |
2268 | if (RMW->isVolatile()) |
2269 | Flags |= MachineMemOperand::MOVolatile; |
2270 | } else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: &AI)) { |
2271 | if (CmpX->isVolatile()) |
2272 | Flags |= MachineMemOperand::MOVolatile; |
2273 | } else |
2274 | llvm_unreachable("not an atomic instruction" ); |
2275 | |
2276 | // FIXME: Not preserving dereferenceable |
2277 | Flags |= getTargetMMOFlags(I: AI); |
2278 | return Flags; |
2279 | } |
2280 | |
2281 | Instruction *TargetLoweringBase::emitLeadingFence(IRBuilderBase &Builder, |
2282 | Instruction *Inst, |
2283 | AtomicOrdering Ord) const { |
2284 | if (isReleaseOrStronger(AO: Ord) && Inst->hasAtomicStore()) |
2285 | return Builder.CreateFence(Ordering: Ord); |
2286 | else |
2287 | return nullptr; |
2288 | } |
2289 | |
2290 | Instruction *TargetLoweringBase::emitTrailingFence(IRBuilderBase &Builder, |
2291 | Instruction *Inst, |
2292 | AtomicOrdering Ord) const { |
2293 | if (isAcquireOrStronger(AO: Ord)) |
2294 | return Builder.CreateFence(Ordering: Ord); |
2295 | else |
2296 | return nullptr; |
2297 | } |
2298 | |
2299 | //===----------------------------------------------------------------------===// |
2300 | // GlobalISel Hooks |
2301 | //===----------------------------------------------------------------------===// |
2302 | |
2303 | bool TargetLoweringBase::shouldLocalize(const MachineInstr &MI, |
2304 | const TargetTransformInfo *TTI) const { |
2305 | auto &MF = *MI.getMF(); |
2306 | auto &MRI = MF.getRegInfo(); |
2307 | // Assuming a spill and reload of a value has a cost of 1 instruction each, |
2308 | // this helper function computes the maximum number of uses we should consider |
2309 | // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We |
2310 | // break even in terms of code size when the original MI has 2 users vs |
2311 | // choosing to potentially spill. Any more than 2 users we we have a net code |
2312 | // size increase. This doesn't take into account register pressure though. |
2313 | auto maxUses = [](unsigned RematCost) { |
2314 | // A cost of 1 means remats are basically free. |
2315 | if (RematCost == 1) |
2316 | return std::numeric_limits<unsigned>::max(); |
2317 | if (RematCost == 2) |
2318 | return 2U; |
2319 | |
2320 | // Remat is too expensive, only sink if there's one user. |
2321 | if (RematCost > 2) |
2322 | return 1U; |
2323 | llvm_unreachable("Unexpected remat cost" ); |
2324 | }; |
2325 | |
2326 | switch (MI.getOpcode()) { |
2327 | default: |
2328 | return false; |
2329 | // Constants-like instructions should be close to their users. |
2330 | // We don't want long live-ranges for them. |
2331 | case TargetOpcode::G_CONSTANT: |
2332 | case TargetOpcode::G_FCONSTANT: |
2333 | case TargetOpcode::G_FRAME_INDEX: |
2334 | case TargetOpcode::G_INTTOPTR: |
2335 | return true; |
2336 | case TargetOpcode::G_GLOBAL_VALUE: { |
2337 | unsigned RematCost = TTI->getGISelRematGlobalCost(); |
2338 | Register Reg = MI.getOperand(i: 0).getReg(); |
2339 | unsigned MaxUses = maxUses(RematCost); |
2340 | if (MaxUses == UINT_MAX) |
2341 | return true; // Remats are "free" so always localize. |
2342 | return MRI.hasAtMostUserInstrs(Reg, MaxUsers: MaxUses); |
2343 | } |
2344 | } |
2345 | } |
2346 | |