1 | //===- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This implements the TargetLoweringBase class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/ADT/BitVector.h" |
14 | #include "llvm/ADT/STLExtras.h" |
15 | #include "llvm/ADT/SmallVector.h" |
16 | #include "llvm/ADT/StringExtras.h" |
17 | #include "llvm/ADT/StringRef.h" |
18 | #include "llvm/ADT/Twine.h" |
19 | #include "llvm/Analysis/Loads.h" |
20 | #include "llvm/Analysis/TargetTransformInfo.h" |
21 | #include "llvm/CodeGen/Analysis.h" |
22 | #include "llvm/CodeGen/ISDOpcodes.h" |
23 | #include "llvm/CodeGen/MachineBasicBlock.h" |
24 | #include "llvm/CodeGen/MachineFrameInfo.h" |
25 | #include "llvm/CodeGen/MachineFunction.h" |
26 | #include "llvm/CodeGen/MachineInstr.h" |
27 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
28 | #include "llvm/CodeGen/MachineMemOperand.h" |
29 | #include "llvm/CodeGen/MachineOperand.h" |
30 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
31 | #include "llvm/CodeGen/RuntimeLibcallUtil.h" |
32 | #include "llvm/CodeGen/StackMaps.h" |
33 | #include "llvm/CodeGen/TargetLowering.h" |
34 | #include "llvm/CodeGen/TargetOpcodes.h" |
35 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
36 | #include "llvm/CodeGen/ValueTypes.h" |
37 | #include "llvm/CodeGenTypes/MachineValueType.h" |
38 | #include "llvm/IR/Attributes.h" |
39 | #include "llvm/IR/CallingConv.h" |
40 | #include "llvm/IR/DataLayout.h" |
41 | #include "llvm/IR/DerivedTypes.h" |
42 | #include "llvm/IR/Function.h" |
43 | #include "llvm/IR/GlobalValue.h" |
44 | #include "llvm/IR/GlobalVariable.h" |
45 | #include "llvm/IR/IRBuilder.h" |
46 | #include "llvm/IR/Module.h" |
47 | #include "llvm/IR/Type.h" |
48 | #include "llvm/Support/Casting.h" |
49 | #include "llvm/Support/CommandLine.h" |
50 | #include "llvm/Support/Compiler.h" |
51 | #include "llvm/Support/ErrorHandling.h" |
52 | #include "llvm/Support/MathExtras.h" |
53 | #include "llvm/Target/TargetMachine.h" |
54 | #include "llvm/Target/TargetOptions.h" |
55 | #include "llvm/TargetParser/Triple.h" |
56 | #include "llvm/Transforms/Utils/SizeOpts.h" |
57 | #include <algorithm> |
58 | #include <cassert> |
59 | #include <cstdint> |
60 | #include <cstring> |
61 | #include <iterator> |
62 | #include <string> |
63 | #include <tuple> |
64 | #include <utility> |
65 | |
66 | using namespace llvm; |
67 | |
68 | static cl::opt<bool> JumpIsExpensiveOverride( |
69 | "jump-is-expensive" , cl::init(Val: false), |
70 | cl::desc("Do not create extra branches to split comparison logic." ), |
71 | cl::Hidden); |
72 | |
73 | static cl::opt<unsigned> MinimumJumpTableEntries |
74 | ("min-jump-table-entries" , cl::init(Val: 4), cl::Hidden, |
75 | cl::desc("Set minimum number of entries to use a jump table." )); |
76 | |
77 | static cl::opt<unsigned> MaximumJumpTableSize |
78 | ("max-jump-table-size" , cl::init(UINT_MAX), cl::Hidden, |
79 | cl::desc("Set maximum size of jump tables." )); |
80 | |
81 | /// Minimum jump table density for normal functions. |
82 | static cl::opt<unsigned> |
83 | JumpTableDensity("jump-table-density" , cl::init(Val: 10), cl::Hidden, |
84 | cl::desc("Minimum density for building a jump table in " |
85 | "a normal function" )); |
86 | |
87 | /// Minimum jump table density for -Os or -Oz functions. |
88 | static cl::opt<unsigned> OptsizeJumpTableDensity( |
89 | "optsize-jump-table-density" , cl::init(Val: 40), cl::Hidden, |
90 | cl::desc("Minimum density for building a jump table in " |
91 | "an optsize function" )); |
92 | |
93 | // FIXME: This option is only to test if the strict fp operation processed |
94 | // correctly by preventing mutating strict fp operation to normal fp operation |
95 | // during development. When the backend supports strict float operation, this |
96 | // option will be meaningless. |
97 | static cl::opt<bool> DisableStrictNodeMutation("disable-strictnode-mutation" , |
98 | cl::desc("Don't mutate strict-float node to a legalize node" ), |
99 | cl::init(Val: false), cl::Hidden); |
100 | |
101 | /// GetFPLibCall - Helper to return the right libcall for the given floating |
102 | /// point type, or UNKNOWN_LIBCALL if there is none. |
103 | RTLIB::Libcall RTLIB::getFPLibCall(EVT VT, |
104 | RTLIB::Libcall Call_F32, |
105 | RTLIB::Libcall Call_F64, |
106 | RTLIB::Libcall Call_F80, |
107 | RTLIB::Libcall Call_F128, |
108 | RTLIB::Libcall Call_PPCF128) { |
109 | return |
110 | VT == MVT::f32 ? Call_F32 : |
111 | VT == MVT::f64 ? Call_F64 : |
112 | VT == MVT::f80 ? Call_F80 : |
113 | VT == MVT::f128 ? Call_F128 : |
114 | VT == MVT::ppcf128 ? Call_PPCF128 : |
115 | RTLIB::UNKNOWN_LIBCALL; |
116 | } |
117 | |
118 | /// getFPEXT - Return the FPEXT_*_* value for the given types, or |
119 | /// UNKNOWN_LIBCALL if there is none. |
120 | RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { |
121 | if (OpVT == MVT::f16) { |
122 | if (RetVT == MVT::f32) |
123 | return FPEXT_F16_F32; |
124 | if (RetVT == MVT::f64) |
125 | return FPEXT_F16_F64; |
126 | if (RetVT == MVT::f80) |
127 | return FPEXT_F16_F80; |
128 | if (RetVT == MVT::f128) |
129 | return FPEXT_F16_F128; |
130 | } else if (OpVT == MVT::f32) { |
131 | if (RetVT == MVT::f64) |
132 | return FPEXT_F32_F64; |
133 | if (RetVT == MVT::f128) |
134 | return FPEXT_F32_F128; |
135 | if (RetVT == MVT::ppcf128) |
136 | return FPEXT_F32_PPCF128; |
137 | } else if (OpVT == MVT::f64) { |
138 | if (RetVT == MVT::f128) |
139 | return FPEXT_F64_F128; |
140 | else if (RetVT == MVT::ppcf128) |
141 | return FPEXT_F64_PPCF128; |
142 | } else if (OpVT == MVT::f80) { |
143 | if (RetVT == MVT::f128) |
144 | return FPEXT_F80_F128; |
145 | } else if (OpVT == MVT::bf16) { |
146 | if (RetVT == MVT::f32) |
147 | return FPEXT_BF16_F32; |
148 | } |
149 | |
150 | return UNKNOWN_LIBCALL; |
151 | } |
152 | |
153 | /// getFPROUND - Return the FPROUND_*_* value for the given types, or |
154 | /// UNKNOWN_LIBCALL if there is none. |
155 | RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { |
156 | if (RetVT == MVT::f16) { |
157 | if (OpVT == MVT::f32) |
158 | return FPROUND_F32_F16; |
159 | if (OpVT == MVT::f64) |
160 | return FPROUND_F64_F16; |
161 | if (OpVT == MVT::f80) |
162 | return FPROUND_F80_F16; |
163 | if (OpVT == MVT::f128) |
164 | return FPROUND_F128_F16; |
165 | if (OpVT == MVT::ppcf128) |
166 | return FPROUND_PPCF128_F16; |
167 | } else if (RetVT == MVT::bf16) { |
168 | if (OpVT == MVT::f32) |
169 | return FPROUND_F32_BF16; |
170 | if (OpVT == MVT::f64) |
171 | return FPROUND_F64_BF16; |
172 | if (OpVT == MVT::f80) |
173 | return FPROUND_F80_BF16; |
174 | if (OpVT == MVT::f128) |
175 | return FPROUND_F128_BF16; |
176 | } else if (RetVT == MVT::f32) { |
177 | if (OpVT == MVT::f64) |
178 | return FPROUND_F64_F32; |
179 | if (OpVT == MVT::f80) |
180 | return FPROUND_F80_F32; |
181 | if (OpVT == MVT::f128) |
182 | return FPROUND_F128_F32; |
183 | if (OpVT == MVT::ppcf128) |
184 | return FPROUND_PPCF128_F32; |
185 | } else if (RetVT == MVT::f64) { |
186 | if (OpVT == MVT::f80) |
187 | return FPROUND_F80_F64; |
188 | if (OpVT == MVT::f128) |
189 | return FPROUND_F128_F64; |
190 | if (OpVT == MVT::ppcf128) |
191 | return FPROUND_PPCF128_F64; |
192 | } else if (RetVT == MVT::f80) { |
193 | if (OpVT == MVT::f128) |
194 | return FPROUND_F128_F80; |
195 | } |
196 | |
197 | return UNKNOWN_LIBCALL; |
198 | } |
199 | |
200 | /// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or |
201 | /// UNKNOWN_LIBCALL if there is none. |
202 | RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { |
203 | if (OpVT == MVT::f16) { |
204 | if (RetVT == MVT::i32) |
205 | return FPTOSINT_F16_I32; |
206 | if (RetVT == MVT::i64) |
207 | return FPTOSINT_F16_I64; |
208 | if (RetVT == MVT::i128) |
209 | return FPTOSINT_F16_I128; |
210 | } else if (OpVT == MVT::f32) { |
211 | if (RetVT == MVT::i32) |
212 | return FPTOSINT_F32_I32; |
213 | if (RetVT == MVT::i64) |
214 | return FPTOSINT_F32_I64; |
215 | if (RetVT == MVT::i128) |
216 | return FPTOSINT_F32_I128; |
217 | } else if (OpVT == MVT::f64) { |
218 | if (RetVT == MVT::i32) |
219 | return FPTOSINT_F64_I32; |
220 | if (RetVT == MVT::i64) |
221 | return FPTOSINT_F64_I64; |
222 | if (RetVT == MVT::i128) |
223 | return FPTOSINT_F64_I128; |
224 | } else if (OpVT == MVT::f80) { |
225 | if (RetVT == MVT::i32) |
226 | return FPTOSINT_F80_I32; |
227 | if (RetVT == MVT::i64) |
228 | return FPTOSINT_F80_I64; |
229 | if (RetVT == MVT::i128) |
230 | return FPTOSINT_F80_I128; |
231 | } else if (OpVT == MVT::f128) { |
232 | if (RetVT == MVT::i32) |
233 | return FPTOSINT_F128_I32; |
234 | if (RetVT == MVT::i64) |
235 | return FPTOSINT_F128_I64; |
236 | if (RetVT == MVT::i128) |
237 | return FPTOSINT_F128_I128; |
238 | } else if (OpVT == MVT::ppcf128) { |
239 | if (RetVT == MVT::i32) |
240 | return FPTOSINT_PPCF128_I32; |
241 | if (RetVT == MVT::i64) |
242 | return FPTOSINT_PPCF128_I64; |
243 | if (RetVT == MVT::i128) |
244 | return FPTOSINT_PPCF128_I128; |
245 | } |
246 | return UNKNOWN_LIBCALL; |
247 | } |
248 | |
249 | /// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or |
250 | /// UNKNOWN_LIBCALL if there is none. |
251 | RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { |
252 | if (OpVT == MVT::f16) { |
253 | if (RetVT == MVT::i32) |
254 | return FPTOUINT_F16_I32; |
255 | if (RetVT == MVT::i64) |
256 | return FPTOUINT_F16_I64; |
257 | if (RetVT == MVT::i128) |
258 | return FPTOUINT_F16_I128; |
259 | } else if (OpVT == MVT::f32) { |
260 | if (RetVT == MVT::i32) |
261 | return FPTOUINT_F32_I32; |
262 | if (RetVT == MVT::i64) |
263 | return FPTOUINT_F32_I64; |
264 | if (RetVT == MVT::i128) |
265 | return FPTOUINT_F32_I128; |
266 | } else if (OpVT == MVT::f64) { |
267 | if (RetVT == MVT::i32) |
268 | return FPTOUINT_F64_I32; |
269 | if (RetVT == MVT::i64) |
270 | return FPTOUINT_F64_I64; |
271 | if (RetVT == MVT::i128) |
272 | return FPTOUINT_F64_I128; |
273 | } else if (OpVT == MVT::f80) { |
274 | if (RetVT == MVT::i32) |
275 | return FPTOUINT_F80_I32; |
276 | if (RetVT == MVT::i64) |
277 | return FPTOUINT_F80_I64; |
278 | if (RetVT == MVT::i128) |
279 | return FPTOUINT_F80_I128; |
280 | } else if (OpVT == MVT::f128) { |
281 | if (RetVT == MVT::i32) |
282 | return FPTOUINT_F128_I32; |
283 | if (RetVT == MVT::i64) |
284 | return FPTOUINT_F128_I64; |
285 | if (RetVT == MVT::i128) |
286 | return FPTOUINT_F128_I128; |
287 | } else if (OpVT == MVT::ppcf128) { |
288 | if (RetVT == MVT::i32) |
289 | return FPTOUINT_PPCF128_I32; |
290 | if (RetVT == MVT::i64) |
291 | return FPTOUINT_PPCF128_I64; |
292 | if (RetVT == MVT::i128) |
293 | return FPTOUINT_PPCF128_I128; |
294 | } |
295 | return UNKNOWN_LIBCALL; |
296 | } |
297 | |
298 | /// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or |
299 | /// UNKNOWN_LIBCALL if there is none. |
300 | RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) { |
301 | if (OpVT == MVT::i32) { |
302 | if (RetVT == MVT::f16) |
303 | return SINTTOFP_I32_F16; |
304 | if (RetVT == MVT::f32) |
305 | return SINTTOFP_I32_F32; |
306 | if (RetVT == MVT::f64) |
307 | return SINTTOFP_I32_F64; |
308 | if (RetVT == MVT::f80) |
309 | return SINTTOFP_I32_F80; |
310 | if (RetVT == MVT::f128) |
311 | return SINTTOFP_I32_F128; |
312 | if (RetVT == MVT::ppcf128) |
313 | return SINTTOFP_I32_PPCF128; |
314 | } else if (OpVT == MVT::i64) { |
315 | if (RetVT == MVT::bf16) |
316 | return SINTTOFP_I64_BF16; |
317 | if (RetVT == MVT::f16) |
318 | return SINTTOFP_I64_F16; |
319 | if (RetVT == MVT::f32) |
320 | return SINTTOFP_I64_F32; |
321 | if (RetVT == MVT::f64) |
322 | return SINTTOFP_I64_F64; |
323 | if (RetVT == MVT::f80) |
324 | return SINTTOFP_I64_F80; |
325 | if (RetVT == MVT::f128) |
326 | return SINTTOFP_I64_F128; |
327 | if (RetVT == MVT::ppcf128) |
328 | return SINTTOFP_I64_PPCF128; |
329 | } else if (OpVT == MVT::i128) { |
330 | if (RetVT == MVT::f16) |
331 | return SINTTOFP_I128_F16; |
332 | if (RetVT == MVT::f32) |
333 | return SINTTOFP_I128_F32; |
334 | if (RetVT == MVT::f64) |
335 | return SINTTOFP_I128_F64; |
336 | if (RetVT == MVT::f80) |
337 | return SINTTOFP_I128_F80; |
338 | if (RetVT == MVT::f128) |
339 | return SINTTOFP_I128_F128; |
340 | if (RetVT == MVT::ppcf128) |
341 | return SINTTOFP_I128_PPCF128; |
342 | } |
343 | return UNKNOWN_LIBCALL; |
344 | } |
345 | |
346 | /// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or |
347 | /// UNKNOWN_LIBCALL if there is none. |
348 | RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { |
349 | if (OpVT == MVT::i32) { |
350 | if (RetVT == MVT::f16) |
351 | return UINTTOFP_I32_F16; |
352 | if (RetVT == MVT::f32) |
353 | return UINTTOFP_I32_F32; |
354 | if (RetVT == MVT::f64) |
355 | return UINTTOFP_I32_F64; |
356 | if (RetVT == MVT::f80) |
357 | return UINTTOFP_I32_F80; |
358 | if (RetVT == MVT::f128) |
359 | return UINTTOFP_I32_F128; |
360 | if (RetVT == MVT::ppcf128) |
361 | return UINTTOFP_I32_PPCF128; |
362 | } else if (OpVT == MVT::i64) { |
363 | if (RetVT == MVT::bf16) |
364 | return UINTTOFP_I64_BF16; |
365 | if (RetVT == MVT::f16) |
366 | return UINTTOFP_I64_F16; |
367 | if (RetVT == MVT::f32) |
368 | return UINTTOFP_I64_F32; |
369 | if (RetVT == MVT::f64) |
370 | return UINTTOFP_I64_F64; |
371 | if (RetVT == MVT::f80) |
372 | return UINTTOFP_I64_F80; |
373 | if (RetVT == MVT::f128) |
374 | return UINTTOFP_I64_F128; |
375 | if (RetVT == MVT::ppcf128) |
376 | return UINTTOFP_I64_PPCF128; |
377 | } else if (OpVT == MVT::i128) { |
378 | if (RetVT == MVT::f16) |
379 | return UINTTOFP_I128_F16; |
380 | if (RetVT == MVT::f32) |
381 | return UINTTOFP_I128_F32; |
382 | if (RetVT == MVT::f64) |
383 | return UINTTOFP_I128_F64; |
384 | if (RetVT == MVT::f80) |
385 | return UINTTOFP_I128_F80; |
386 | if (RetVT == MVT::f128) |
387 | return UINTTOFP_I128_F128; |
388 | if (RetVT == MVT::ppcf128) |
389 | return UINTTOFP_I128_PPCF128; |
390 | } |
391 | return UNKNOWN_LIBCALL; |
392 | } |
393 | |
394 | RTLIB::Libcall RTLIB::getPOWI(EVT RetVT) { |
395 | return getFPLibCall(VT: RetVT, Call_F32: POWI_F32, Call_F64: POWI_F64, Call_F80: POWI_F80, Call_F128: POWI_F128, |
396 | Call_PPCF128: POWI_PPCF128); |
397 | } |
398 | |
399 | RTLIB::Libcall RTLIB::getLDEXP(EVT RetVT) { |
400 | return getFPLibCall(VT: RetVT, Call_F32: LDEXP_F32, Call_F64: LDEXP_F64, Call_F80: LDEXP_F80, Call_F128: LDEXP_F128, |
401 | Call_PPCF128: LDEXP_PPCF128); |
402 | } |
403 | |
404 | RTLIB::Libcall RTLIB::getFREXP(EVT RetVT) { |
405 | return getFPLibCall(VT: RetVT, Call_F32: FREXP_F32, Call_F64: FREXP_F64, Call_F80: FREXP_F80, Call_F128: FREXP_F128, |
406 | Call_PPCF128: FREXP_PPCF128); |
407 | } |
408 | |
409 | RTLIB::Libcall RTLIB::getSINCOS(EVT RetVT) { |
410 | return getFPLibCall(VT: RetVT, Call_F32: SINCOS_F32, Call_F64: SINCOS_F64, Call_F80: SINCOS_F80, Call_F128: SINCOS_F128, |
411 | Call_PPCF128: SINCOS_PPCF128); |
412 | } |
413 | |
414 | RTLIB::Libcall RTLIB::getSINCOSPI(EVT RetVT) { |
415 | return getFPLibCall(VT: RetVT, Call_F32: SINCOSPI_F32, Call_F64: SINCOSPI_F64, Call_F80: SINCOSPI_F80, |
416 | Call_F128: SINCOSPI_F128, Call_PPCF128: SINCOSPI_PPCF128); |
417 | } |
418 | |
419 | RTLIB::Libcall RTLIB::getMODF(EVT RetVT) { |
420 | return getFPLibCall(VT: RetVT, Call_F32: MODF_F32, Call_F64: MODF_F64, Call_F80: MODF_F80, Call_F128: MODF_F128, |
421 | Call_PPCF128: MODF_PPCF128); |
422 | } |
423 | |
424 | RTLIB::Libcall RTLIB::getOutlineAtomicHelper(const Libcall (&LC)[5][4], |
425 | AtomicOrdering Order, |
426 | uint64_t MemSize) { |
427 | unsigned ModeN, ModelN; |
428 | switch (MemSize) { |
429 | case 1: |
430 | ModeN = 0; |
431 | break; |
432 | case 2: |
433 | ModeN = 1; |
434 | break; |
435 | case 4: |
436 | ModeN = 2; |
437 | break; |
438 | case 8: |
439 | ModeN = 3; |
440 | break; |
441 | case 16: |
442 | ModeN = 4; |
443 | break; |
444 | default: |
445 | return RTLIB::UNKNOWN_LIBCALL; |
446 | } |
447 | |
448 | switch (Order) { |
449 | case AtomicOrdering::Monotonic: |
450 | ModelN = 0; |
451 | break; |
452 | case AtomicOrdering::Acquire: |
453 | ModelN = 1; |
454 | break; |
455 | case AtomicOrdering::Release: |
456 | ModelN = 2; |
457 | break; |
458 | case AtomicOrdering::AcquireRelease: |
459 | case AtomicOrdering::SequentiallyConsistent: |
460 | ModelN = 3; |
461 | break; |
462 | default: |
463 | return UNKNOWN_LIBCALL; |
464 | } |
465 | |
466 | return LC[ModeN][ModelN]; |
467 | } |
468 | |
469 | RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order, |
470 | MVT VT) { |
471 | if (!VT.isScalarInteger()) |
472 | return UNKNOWN_LIBCALL; |
473 | uint64_t MemSize = VT.getScalarSizeInBits() / 8; |
474 | |
475 | #define LCALLS(A, B) \ |
476 | { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL } |
477 | #define LCALL5(A) \ |
478 | LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16) |
479 | switch (Opc) { |
480 | case ISD::ATOMIC_CMP_SWAP: { |
481 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_CAS)}; |
482 | return getOutlineAtomicHelper(LC, Order, MemSize); |
483 | } |
484 | case ISD::ATOMIC_SWAP: { |
485 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_SWP)}; |
486 | return getOutlineAtomicHelper(LC, Order, MemSize); |
487 | } |
488 | case ISD::ATOMIC_LOAD_ADD: { |
489 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDADD)}; |
490 | return getOutlineAtomicHelper(LC, Order, MemSize); |
491 | } |
492 | case ISD::ATOMIC_LOAD_OR: { |
493 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDSET)}; |
494 | return getOutlineAtomicHelper(LC, Order, MemSize); |
495 | } |
496 | case ISD::ATOMIC_LOAD_CLR: { |
497 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDCLR)}; |
498 | return getOutlineAtomicHelper(LC, Order, MemSize); |
499 | } |
500 | case ISD::ATOMIC_LOAD_XOR: { |
501 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDEOR)}; |
502 | return getOutlineAtomicHelper(LC, Order, MemSize); |
503 | } |
504 | default: |
505 | return UNKNOWN_LIBCALL; |
506 | } |
507 | #undef LCALLS |
508 | #undef LCALL5 |
509 | } |
510 | |
511 | RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) { |
512 | #define OP_TO_LIBCALL(Name, Enum) \ |
513 | case Name: \ |
514 | switch (VT.SimpleTy) { \ |
515 | default: \ |
516 | return UNKNOWN_LIBCALL; \ |
517 | case MVT::i8: \ |
518 | return Enum##_1; \ |
519 | case MVT::i16: \ |
520 | return Enum##_2; \ |
521 | case MVT::i32: \ |
522 | return Enum##_4; \ |
523 | case MVT::i64: \ |
524 | return Enum##_8; \ |
525 | case MVT::i128: \ |
526 | return Enum##_16; \ |
527 | } |
528 | |
529 | switch (Opc) { |
530 | OP_TO_LIBCALL(ISD::ATOMIC_SWAP, SYNC_LOCK_TEST_AND_SET) |
531 | OP_TO_LIBCALL(ISD::ATOMIC_CMP_SWAP, SYNC_VAL_COMPARE_AND_SWAP) |
532 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_ADD, SYNC_FETCH_AND_ADD) |
533 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_SUB, SYNC_FETCH_AND_SUB) |
534 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_AND, SYNC_FETCH_AND_AND) |
535 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_OR, SYNC_FETCH_AND_OR) |
536 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_XOR, SYNC_FETCH_AND_XOR) |
537 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_NAND, SYNC_FETCH_AND_NAND) |
538 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MAX, SYNC_FETCH_AND_MAX) |
539 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMAX, SYNC_FETCH_AND_UMAX) |
540 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MIN, SYNC_FETCH_AND_MIN) |
541 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMIN, SYNC_FETCH_AND_UMIN) |
542 | } |
543 | |
544 | #undef OP_TO_LIBCALL |
545 | |
546 | return UNKNOWN_LIBCALL; |
547 | } |
548 | |
549 | RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { |
550 | switch (ElementSize) { |
551 | case 1: |
552 | return MEMCPY_ELEMENT_UNORDERED_ATOMIC_1; |
553 | case 2: |
554 | return MEMCPY_ELEMENT_UNORDERED_ATOMIC_2; |
555 | case 4: |
556 | return MEMCPY_ELEMENT_UNORDERED_ATOMIC_4; |
557 | case 8: |
558 | return MEMCPY_ELEMENT_UNORDERED_ATOMIC_8; |
559 | case 16: |
560 | return MEMCPY_ELEMENT_UNORDERED_ATOMIC_16; |
561 | default: |
562 | return UNKNOWN_LIBCALL; |
563 | } |
564 | } |
565 | |
566 | RTLIB::Libcall RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { |
567 | switch (ElementSize) { |
568 | case 1: |
569 | return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1; |
570 | case 2: |
571 | return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2; |
572 | case 4: |
573 | return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4; |
574 | case 8: |
575 | return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8; |
576 | case 16: |
577 | return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16; |
578 | default: |
579 | return UNKNOWN_LIBCALL; |
580 | } |
581 | } |
582 | |
583 | RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { |
584 | switch (ElementSize) { |
585 | case 1: |
586 | return MEMSET_ELEMENT_UNORDERED_ATOMIC_1; |
587 | case 2: |
588 | return MEMSET_ELEMENT_UNORDERED_ATOMIC_2; |
589 | case 4: |
590 | return MEMSET_ELEMENT_UNORDERED_ATOMIC_4; |
591 | case 8: |
592 | return MEMSET_ELEMENT_UNORDERED_ATOMIC_8; |
593 | case 16: |
594 | return MEMSET_ELEMENT_UNORDERED_ATOMIC_16; |
595 | default: |
596 | return UNKNOWN_LIBCALL; |
597 | } |
598 | } |
599 | |
600 | void RTLIB::initCmpLibcallCCs(ISD::CondCode *CmpLibcallCCs) { |
601 | std::fill(first: CmpLibcallCCs, last: CmpLibcallCCs + RTLIB::UNKNOWN_LIBCALL, |
602 | value: ISD::SETCC_INVALID); |
603 | CmpLibcallCCs[RTLIB::OEQ_F32] = ISD::SETEQ; |
604 | CmpLibcallCCs[RTLIB::OEQ_F64] = ISD::SETEQ; |
605 | CmpLibcallCCs[RTLIB::OEQ_F128] = ISD::SETEQ; |
606 | CmpLibcallCCs[RTLIB::OEQ_PPCF128] = ISD::SETEQ; |
607 | CmpLibcallCCs[RTLIB::UNE_F32] = ISD::SETNE; |
608 | CmpLibcallCCs[RTLIB::UNE_F64] = ISD::SETNE; |
609 | CmpLibcallCCs[RTLIB::UNE_F128] = ISD::SETNE; |
610 | CmpLibcallCCs[RTLIB::UNE_PPCF128] = ISD::SETNE; |
611 | CmpLibcallCCs[RTLIB::OGE_F32] = ISD::SETGE; |
612 | CmpLibcallCCs[RTLIB::OGE_F64] = ISD::SETGE; |
613 | CmpLibcallCCs[RTLIB::OGE_F128] = ISD::SETGE; |
614 | CmpLibcallCCs[RTLIB::OGE_PPCF128] = ISD::SETGE; |
615 | CmpLibcallCCs[RTLIB::OLT_F32] = ISD::SETLT; |
616 | CmpLibcallCCs[RTLIB::OLT_F64] = ISD::SETLT; |
617 | CmpLibcallCCs[RTLIB::OLT_F128] = ISD::SETLT; |
618 | CmpLibcallCCs[RTLIB::OLT_PPCF128] = ISD::SETLT; |
619 | CmpLibcallCCs[RTLIB::OLE_F32] = ISD::SETLE; |
620 | CmpLibcallCCs[RTLIB::OLE_F64] = ISD::SETLE; |
621 | CmpLibcallCCs[RTLIB::OLE_F128] = ISD::SETLE; |
622 | CmpLibcallCCs[RTLIB::OLE_PPCF128] = ISD::SETLE; |
623 | CmpLibcallCCs[RTLIB::OGT_F32] = ISD::SETGT; |
624 | CmpLibcallCCs[RTLIB::OGT_F64] = ISD::SETGT; |
625 | CmpLibcallCCs[RTLIB::OGT_F128] = ISD::SETGT; |
626 | CmpLibcallCCs[RTLIB::OGT_PPCF128] = ISD::SETGT; |
627 | CmpLibcallCCs[RTLIB::UO_F32] = ISD::SETNE; |
628 | CmpLibcallCCs[RTLIB::UO_F64] = ISD::SETNE; |
629 | CmpLibcallCCs[RTLIB::UO_F128] = ISD::SETNE; |
630 | CmpLibcallCCs[RTLIB::UO_PPCF128] = ISD::SETNE; |
631 | } |
632 | |
633 | /// NOTE: The TargetMachine owns TLOF. |
634 | TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) |
635 | : TM(tm), Libcalls(TM.getTargetTriple(), TM.Options.ExceptionModel, |
636 | TM.Options.FloatABIType, TM.Options.EABIVersion, |
637 | TM.Options.MCOptions.getABIName()) { |
638 | initActions(); |
639 | |
640 | // Perform these initializations only once. |
641 | MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = |
642 | MaxLoadsPerMemcmp = 8; |
643 | MaxGluedStoresPerMemcpy = 0; |
644 | MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = |
645 | MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4; |
646 | HasMultipleConditionRegisters = false; |
647 | HasExtractBitsInsn = false; |
648 | JumpIsExpensive = JumpIsExpensiveOverride; |
649 | PredictableSelectIsExpensive = false; |
650 | EnableExtLdPromotion = false; |
651 | StackPointerRegisterToSaveRestore = 0; |
652 | BooleanContents = UndefinedBooleanContent; |
653 | BooleanFloatContents = UndefinedBooleanContent; |
654 | BooleanVectorContents = UndefinedBooleanContent; |
655 | SchedPreferenceInfo = Sched::ILP; |
656 | GatherAllAliasesMaxDepth = 18; |
657 | IsStrictFPEnabled = DisableStrictNodeMutation; |
658 | MaxBytesForAlignment = 0; |
659 | MaxAtomicSizeInBitsSupported = 0; |
660 | |
661 | // Assume that even with libcalls, no target supports wider than 128 bit |
662 | // division. |
663 | MaxDivRemBitWidthSupported = 128; |
664 | |
665 | MaxLargeFPConvertBitWidthSupported = llvm::IntegerType::MAX_INT_BITS; |
666 | |
667 | MinCmpXchgSizeInBits = 0; |
668 | SupportsUnalignedAtomics = false; |
669 | |
670 | RTLIB::initCmpLibcallCCs(CmpLibcallCCs); |
671 | } |
672 | |
673 | // Define the virtual destructor out-of-line to act as a key method to anchor |
674 | // debug info (see coding standards). |
675 | TargetLoweringBase::~TargetLoweringBase() = default; |
676 | |
677 | void TargetLoweringBase::initActions() { |
678 | // All operations default to being supported. |
679 | memset(s: OpActions, c: 0, n: sizeof(OpActions)); |
680 | memset(s: LoadExtActions, c: 0, n: sizeof(LoadExtActions)); |
681 | memset(s: TruncStoreActions, c: 0, n: sizeof(TruncStoreActions)); |
682 | memset(s: IndexedModeActions, c: 0, n: sizeof(IndexedModeActions)); |
683 | memset(s: CondCodeActions, c: 0, n: sizeof(CondCodeActions)); |
684 | llvm::fill(Range&: RegClassForVT, Value: nullptr); |
685 | llvm::fill(Range&: TargetDAGCombineArray, Value: 0); |
686 | |
687 | // Let extending atomic loads be unsupported by default. |
688 | for (MVT ValVT : MVT::all_valuetypes()) |
689 | for (MVT MemVT : MVT::all_valuetypes()) |
690 | setAtomicLoadExtAction(ExtTypes: {ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT, MemVT, |
691 | Action: Expand); |
692 | |
693 | // We're somewhat special casing MVT::i2 and MVT::i4. Ideally we want to |
694 | // remove this and targets should individually set these types if not legal. |
695 | for (ISD::NodeType NT : enum_seq(Begin: ISD::DELETED_NODE, End: ISD::BUILTIN_OP_END, |
696 | force_iteration_on_noniterable_enum)) { |
697 | for (MVT VT : {MVT::i2, MVT::i4}) |
698 | OpActions[(unsigned)VT.SimpleTy][NT] = Expand; |
699 | } |
700 | for (MVT AVT : MVT::all_valuetypes()) { |
701 | for (MVT VT : {MVT::i2, MVT::i4, MVT::v128i2, MVT::v64i4}) { |
702 | setTruncStoreAction(ValVT: AVT, MemVT: VT, Action: Expand); |
703 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: AVT, MemVT: VT, Action: Expand); |
704 | setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: AVT, MemVT: VT, Action: Expand); |
705 | } |
706 | } |
707 | for (unsigned IM = (unsigned)ISD::PRE_INC; |
708 | IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { |
709 | for (MVT VT : {MVT::i2, MVT::i4}) { |
710 | setIndexedLoadAction(IdxModes: IM, VT, Action: Expand); |
711 | setIndexedStoreAction(IdxModes: IM, VT, Action: Expand); |
712 | setIndexedMaskedLoadAction(IdxMode: IM, VT, Action: Expand); |
713 | setIndexedMaskedStoreAction(IdxMode: IM, VT, Action: Expand); |
714 | } |
715 | } |
716 | |
717 | for (MVT VT : MVT::fp_valuetypes()) { |
718 | MVT IntVT = MVT::getIntegerVT(BitWidth: VT.getFixedSizeInBits()); |
719 | if (IntVT.isValid()) { |
720 | setOperationAction(Op: ISD::ATOMIC_SWAP, VT, Action: Promote); |
721 | AddPromotedToType(Opc: ISD::ATOMIC_SWAP, OrigVT: VT, DestVT: IntVT); |
722 | } |
723 | } |
724 | |
725 | // Set default actions for various operations. |
726 | for (MVT VT : MVT::all_valuetypes()) { |
727 | // Default all indexed load / store to expand. |
728 | for (unsigned IM = (unsigned)ISD::PRE_INC; |
729 | IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { |
730 | setIndexedLoadAction(IdxModes: IM, VT, Action: Expand); |
731 | setIndexedStoreAction(IdxModes: IM, VT, Action: Expand); |
732 | setIndexedMaskedLoadAction(IdxMode: IM, VT, Action: Expand); |
733 | setIndexedMaskedStoreAction(IdxMode: IM, VT, Action: Expand); |
734 | } |
735 | |
736 | // Most backends expect to see the node which just returns the value loaded. |
737 | setOperationAction(Op: ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Action: Expand); |
738 | |
739 | // These operations default to expand. |
740 | setOperationAction(Ops: {ISD::FGETSIGN, ISD::CONCAT_VECTORS, |
741 | ISD::FMINNUM, ISD::FMAXNUM, |
742 | ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE, |
743 | ISD::FMINIMUM, ISD::FMAXIMUM, |
744 | ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM, |
745 | ISD::FMAD, ISD::SMIN, |
746 | ISD::SMAX, ISD::UMIN, |
747 | ISD::UMAX, ISD::ABS, |
748 | ISD::FSHL, ISD::FSHR, |
749 | ISD::SADDSAT, ISD::UADDSAT, |
750 | ISD::SSUBSAT, ISD::USUBSAT, |
751 | ISD::SSHLSAT, ISD::USHLSAT, |
752 | ISD::SMULFIX, ISD::SMULFIXSAT, |
753 | ISD::UMULFIX, ISD::UMULFIXSAT, |
754 | ISD::SDIVFIX, ISD::SDIVFIXSAT, |
755 | ISD::UDIVFIX, ISD::UDIVFIXSAT, |
756 | ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, |
757 | ISD::IS_FPCLASS}, |
758 | VT, Action: Expand); |
759 | |
760 | // Overflow operations default to expand |
761 | setOperationAction(Ops: {ISD::SADDO, ISD::SSUBO, ISD::UADDO, ISD::USUBO, |
762 | ISD::SMULO, ISD::UMULO}, |
763 | VT, Action: Expand); |
764 | |
765 | // Carry-using overflow operations default to expand. |
766 | setOperationAction(Ops: {ISD::UADDO_CARRY, ISD::USUBO_CARRY, ISD::SETCCCARRY, |
767 | ISD::SADDO_CARRY, ISD::SSUBO_CARRY}, |
768 | VT, Action: Expand); |
769 | |
770 | // ADDC/ADDE/SUBC/SUBE default to expand. |
771 | setOperationAction(Ops: {ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}, VT, |
772 | Action: Expand); |
773 | |
774 | // [US]CMP default to expand |
775 | setOperationAction(Ops: {ISD::UCMP, ISD::SCMP}, VT, Action: Expand); |
776 | |
777 | // Halving adds |
778 | setOperationAction( |
779 | Ops: {ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS, ISD::AVGCEILU}, VT, |
780 | Action: Expand); |
781 | |
782 | // Absolute difference |
783 | setOperationAction(Ops: {ISD::ABDS, ISD::ABDU}, VT, Action: Expand); |
784 | |
785 | // Saturated trunc |
786 | setOperationAction(Op: ISD::TRUNCATE_SSAT_S, VT, Action: Expand); |
787 | setOperationAction(Op: ISD::TRUNCATE_SSAT_U, VT, Action: Expand); |
788 | setOperationAction(Op: ISD::TRUNCATE_USAT_U, VT, Action: Expand); |
789 | |
790 | // These default to Expand so they will be expanded to CTLZ/CTTZ by default. |
791 | setOperationAction(Ops: {ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, |
792 | Action: Expand); |
793 | |
794 | setOperationAction(Ops: {ISD::BITREVERSE, ISD::PARITY}, VT, Action: Expand); |
795 | |
796 | // These library functions default to expand. |
797 | setOperationAction(Ops: {ISD::FROUND, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP, |
798 | ISD::FSINCOS, ISD::FSINCOSPI, ISD::FMODF}, |
799 | VT, Action: Expand); |
800 | |
801 | // These operations default to expand for vector types. |
802 | if (VT.isVector()) |
803 | setOperationAction( |
804 | Ops: {ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG, |
805 | ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG, |
806 | ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::LROUND, |
807 | ISD::LLROUND, ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN, |
808 | ISD::FCOSH, ISD::FSINH, ISD::FTANH, ISD::FATAN2}, |
809 | VT, Action: Expand); |
810 | |
811 | // Constrained floating-point operations default to expand. |
812 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
813 | setOperationAction(ISD::STRICT_##DAGN, VT, Expand); |
814 | #include "llvm/IR/ConstrainedOps.def" |
815 | |
816 | // For most targets @llvm.get.dynamic.area.offset just returns 0. |
817 | setOperationAction(Op: ISD::GET_DYNAMIC_AREA_OFFSET, VT, Action: Expand); |
818 | |
819 | // Vector reduction default to expand. |
820 | setOperationAction( |
821 | Ops: {ISD::VECREDUCE_FADD, ISD::VECREDUCE_FMUL, ISD::VECREDUCE_ADD, |
822 | ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, |
823 | ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, |
824 | ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_FMAX, |
825 | ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAXIMUM, ISD::VECREDUCE_FMINIMUM, |
826 | ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_SEQ_FMUL}, |
827 | VT, Action: Expand); |
828 | |
829 | // Named vector shuffles default to expand. |
830 | setOperationAction(Op: ISD::VECTOR_SPLICE, VT, Action: Expand); |
831 | |
832 | // Only some target support this vector operation. Most need to expand it. |
833 | setOperationAction(Op: ISD::VECTOR_COMPRESS, VT, Action: Expand); |
834 | |
835 | // VP operations default to expand. |
836 | #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) \ |
837 | setOperationAction(ISD::SDOPC, VT, Expand); |
838 | #include "llvm/IR/VPIntrinsics.def" |
839 | |
840 | // Masked vector extracts default to expand. |
841 | setOperationAction(Op: ISD::VECTOR_FIND_LAST_ACTIVE, VT, Action: Expand); |
842 | |
843 | // FP environment operations default to expand. |
844 | setOperationAction(Op: ISD::GET_FPENV, VT, Action: Expand); |
845 | setOperationAction(Op: ISD::SET_FPENV, VT, Action: Expand); |
846 | setOperationAction(Op: ISD::RESET_FPENV, VT, Action: Expand); |
847 | } |
848 | |
849 | // Most targets ignore the @llvm.prefetch intrinsic. |
850 | setOperationAction(Op: ISD::PREFETCH, VT: MVT::Other, Action: Expand); |
851 | |
852 | // Most targets also ignore the @llvm.readcyclecounter intrinsic. |
853 | setOperationAction(Op: ISD::READCYCLECOUNTER, VT: MVT::i64, Action: Expand); |
854 | |
855 | // Most targets also ignore the @llvm.readsteadycounter intrinsic. |
856 | setOperationAction(Op: ISD::READSTEADYCOUNTER, VT: MVT::i64, Action: Expand); |
857 | |
858 | // ConstantFP nodes default to expand. Targets can either change this to |
859 | // Legal, in which case all fp constants are legal, or use isFPImmLegal() |
860 | // to optimize expansions for certain constants. |
861 | setOperationAction(Ops: ISD::ConstantFP, |
862 | VTs: {MVT::bf16, MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128}, |
863 | Action: Expand); |
864 | |
865 | // These library functions default to expand. |
866 | setOperationAction(Ops: {ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, |
867 | ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, |
868 | ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, |
869 | ISD::FROUNDEVEN, ISD::FTAN, ISD::FACOS, ISD::FASIN, |
870 | ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH, |
871 | ISD::FATAN2}, |
872 | VTs: {MVT::f32, MVT::f64, MVT::f128}, Action: Expand); |
873 | |
874 | // FIXME: Query RuntimeLibCalls to make the decision. |
875 | setOperationAction(Ops: {ISD::LRINT, ISD::LLRINT, ISD::LROUND, ISD::LLROUND}, |
876 | VTs: {MVT::f32, MVT::f64, MVT::f128}, Action: LibCall); |
877 | |
878 | setOperationAction(Ops: {ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN, ISD::FCOSH, |
879 | ISD::FSINH, ISD::FTANH, ISD::FATAN2}, |
880 | VT: MVT::f16, Action: Promote); |
881 | // Default ISD::TRAP to expand (which turns it into abort). |
882 | setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Expand); |
883 | |
884 | // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand" |
885 | // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP. |
886 | setOperationAction(Op: ISD::DEBUGTRAP, VT: MVT::Other, Action: Expand); |
887 | |
888 | setOperationAction(Op: ISD::UBSANTRAP, VT: MVT::Other, Action: Expand); |
889 | |
890 | setOperationAction(Op: ISD::GET_FPENV_MEM, VT: MVT::Other, Action: Expand); |
891 | setOperationAction(Op: ISD::SET_FPENV_MEM, VT: MVT::Other, Action: Expand); |
892 | |
893 | for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) { |
894 | setOperationAction(Op: ISD::GET_FPMODE, VT, Action: Expand); |
895 | setOperationAction(Op: ISD::SET_FPMODE, VT, Action: Expand); |
896 | } |
897 | setOperationAction(Op: ISD::RESET_FPMODE, VT: MVT::Other, Action: Expand); |
898 | |
899 | // This one by default will call __clear_cache unless the target |
900 | // wants something different. |
901 | setOperationAction(Op: ISD::CLEAR_CACHE, VT: MVT::Other, Action: LibCall); |
902 | } |
903 | |
904 | MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL, |
905 | EVT) const { |
906 | return MVT::getIntegerVT(BitWidth: DL.getPointerSizeInBits(AS: 0)); |
907 | } |
908 | |
909 | EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, |
910 | const DataLayout &DL) const { |
911 | assert(LHSTy.isInteger() && "Shift amount is not an integer type!" ); |
912 | if (LHSTy.isVector()) |
913 | return LHSTy; |
914 | MVT ShiftVT = getScalarShiftAmountTy(DL, LHSTy); |
915 | // If any possible shift value won't fit in the prefered type, just use |
916 | // something safe. Assume it will be legalized when the shift is expanded. |
917 | if (ShiftVT.getSizeInBits() < Log2_32_Ceil(Value: LHSTy.getSizeInBits())) |
918 | ShiftVT = MVT::i32; |
919 | assert(ShiftVT.getSizeInBits() >= Log2_32_Ceil(LHSTy.getSizeInBits()) && |
920 | "ShiftVT is still too small!" ); |
921 | return ShiftVT; |
922 | } |
923 | |
924 | bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const { |
925 | assert(isTypeLegal(VT)); |
926 | switch (Op) { |
927 | default: |
928 | return false; |
929 | case ISD::SDIV: |
930 | case ISD::UDIV: |
931 | case ISD::SREM: |
932 | case ISD::UREM: |
933 | return true; |
934 | } |
935 | } |
936 | |
937 | bool TargetLoweringBase::isFreeAddrSpaceCast(unsigned SrcAS, |
938 | unsigned DestAS) const { |
939 | return TM.isNoopAddrSpaceCast(SrcAS, DestAS); |
940 | } |
941 | |
942 | unsigned TargetLoweringBase::getBitWidthForCttzElements( |
943 | Type *RetTy, ElementCount EC, bool ZeroIsPoison, |
944 | const ConstantRange *VScaleRange) const { |
945 | // Find the smallest "sensible" element type to use for the expansion. |
946 | ConstantRange CR(APInt(64, EC.getKnownMinValue())); |
947 | if (EC.isScalable()) |
948 | CR = CR.umul_sat(Other: *VScaleRange); |
949 | |
950 | if (ZeroIsPoison) |
951 | CR = CR.subtract(CI: APInt(64, 1)); |
952 | |
953 | unsigned EltWidth = RetTy->getScalarSizeInBits(); |
954 | EltWidth = std::min(a: EltWidth, b: (unsigned)CR.getActiveBits()); |
955 | EltWidth = std::max(a: llvm::bit_ceil(Value: EltWidth), b: (unsigned)8); |
956 | |
957 | return EltWidth; |
958 | } |
959 | |
960 | void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) { |
961 | // If the command-line option was specified, ignore this request. |
962 | if (!JumpIsExpensiveOverride.getNumOccurrences()) |
963 | JumpIsExpensive = isExpensive; |
964 | } |
965 | |
966 | TargetLoweringBase::LegalizeKind |
967 | TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { |
968 | // If this is a simple type, use the ComputeRegisterProp mechanism. |
969 | if (VT.isSimple()) { |
970 | MVT SVT = VT.getSimpleVT(); |
971 | assert((unsigned)SVT.SimpleTy < std::size(TransformToType)); |
972 | MVT NVT = TransformToType[SVT.SimpleTy]; |
973 | LegalizeTypeAction LA = ValueTypeActions.getTypeAction(VT: SVT); |
974 | |
975 | assert((LA == TypeLegal || LA == TypeSoftenFloat || |
976 | LA == TypeSoftPromoteHalf || |
977 | (NVT.isVector() || |
978 | ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger)) && |
979 | "Promote may not follow Expand or Promote" ); |
980 | |
981 | if (LA == TypeSplitVector) |
982 | return LegalizeKind(LA, EVT(SVT).getHalfNumVectorElementsVT(Context)); |
983 | if (LA == TypeScalarizeVector) |
984 | return LegalizeKind(LA, SVT.getVectorElementType()); |
985 | return LegalizeKind(LA, NVT); |
986 | } |
987 | |
988 | // Handle Extended Scalar Types. |
989 | if (!VT.isVector()) { |
990 | assert(VT.isInteger() && "Float types must be simple" ); |
991 | unsigned BitSize = VT.getSizeInBits(); |
992 | // First promote to a power-of-two size, then expand if necessary. |
993 | if (BitSize < 8 || !isPowerOf2_32(Value: BitSize)) { |
994 | EVT NVT = VT.getRoundIntegerType(Context); |
995 | assert(NVT != VT && "Unable to round integer VT" ); |
996 | LegalizeKind NextStep = getTypeConversion(Context, VT: NVT); |
997 | // Avoid multi-step promotion. |
998 | if (NextStep.first == TypePromoteInteger) |
999 | return NextStep; |
1000 | // Return rounded integer type. |
1001 | return LegalizeKind(TypePromoteInteger, NVT); |
1002 | } |
1003 | |
1004 | return LegalizeKind(TypeExpandInteger, |
1005 | EVT::getIntegerVT(Context, BitWidth: VT.getSizeInBits() / 2)); |
1006 | } |
1007 | |
1008 | // Handle vector types. |
1009 | ElementCount NumElts = VT.getVectorElementCount(); |
1010 | EVT EltVT = VT.getVectorElementType(); |
1011 | |
1012 | // Vectors with only one element are always scalarized. |
1013 | if (NumElts.isScalar()) |
1014 | return LegalizeKind(TypeScalarizeVector, EltVT); |
1015 | |
1016 | // Try to widen vector elements until the element type is a power of two and |
1017 | // promote it to a legal type later on, for example: |
1018 | // <3 x i8> -> <4 x i8> -> <4 x i32> |
1019 | if (EltVT.isInteger()) { |
1020 | // Vectors with a number of elements that is not a power of two are always |
1021 | // widened, for example <3 x i8> -> <4 x i8>. |
1022 | if (!VT.isPow2VectorType()) { |
1023 | NumElts = NumElts.coefficientNextPowerOf2(); |
1024 | EVT NVT = EVT::getVectorVT(Context, VT: EltVT, EC: NumElts); |
1025 | return LegalizeKind(TypeWidenVector, NVT); |
1026 | } |
1027 | |
1028 | // Examine the element type. |
1029 | LegalizeKind LK = getTypeConversion(Context, VT: EltVT); |
1030 | |
1031 | // If type is to be expanded, split the vector. |
1032 | // <4 x i140> -> <2 x i140> |
1033 | if (LK.first == TypeExpandInteger) { |
1034 | if (NumElts.isScalable() && NumElts.getKnownMinValue() == 1) |
1035 | return LegalizeKind(TypeScalarizeScalableVector, EltVT); |
1036 | return LegalizeKind(TypeSplitVector, |
1037 | VT.getHalfNumVectorElementsVT(Context)); |
1038 | } |
1039 | |
1040 | // Promote the integer element types until a legal vector type is found |
1041 | // or until the element integer type is too big. If a legal type was not |
1042 | // found, fallback to the usual mechanism of widening/splitting the |
1043 | // vector. |
1044 | EVT OldEltVT = EltVT; |
1045 | while (true) { |
1046 | // Increase the bitwidth of the element to the next pow-of-two |
1047 | // (which is greater than 8 bits). |
1048 | EltVT = EVT::getIntegerVT(Context, BitWidth: 1 + EltVT.getSizeInBits()) |
1049 | .getRoundIntegerType(Context); |
1050 | |
1051 | // Stop trying when getting a non-simple element type. |
1052 | // Note that vector elements may be greater than legal vector element |
1053 | // types. Example: X86 XMM registers hold 64bit element on 32bit |
1054 | // systems. |
1055 | if (!EltVT.isSimple()) |
1056 | break; |
1057 | |
1058 | // Build a new vector type and check if it is legal. |
1059 | MVT NVT = MVT::getVectorVT(VT: EltVT.getSimpleVT(), EC: NumElts); |
1060 | // Found a legal promoted vector type. |
1061 | if (NVT != MVT() && ValueTypeActions.getTypeAction(VT: NVT) == TypeLegal) |
1062 | return LegalizeKind(TypePromoteInteger, |
1063 | EVT::getVectorVT(Context, VT: EltVT, EC: NumElts)); |
1064 | } |
1065 | |
1066 | // Reset the type to the unexpanded type if we did not find a legal vector |
1067 | // type with a promoted vector element type. |
1068 | EltVT = OldEltVT; |
1069 | } |
1070 | |
1071 | // Try to widen the vector until a legal type is found. |
1072 | // If there is no wider legal type, split the vector. |
1073 | while (true) { |
1074 | // Round up to the next power of 2. |
1075 | NumElts = NumElts.coefficientNextPowerOf2(); |
1076 | |
1077 | // If there is no simple vector type with this many elements then there |
1078 | // cannot be a larger legal vector type. Note that this assumes that |
1079 | // there are no skipped intermediate vector types in the simple types. |
1080 | if (!EltVT.isSimple()) |
1081 | break; |
1082 | MVT LargerVector = MVT::getVectorVT(VT: EltVT.getSimpleVT(), EC: NumElts); |
1083 | if (LargerVector == MVT()) |
1084 | break; |
1085 | |
1086 | // If this type is legal then widen the vector. |
1087 | if (ValueTypeActions.getTypeAction(VT: LargerVector) == TypeLegal) |
1088 | return LegalizeKind(TypeWidenVector, LargerVector); |
1089 | } |
1090 | |
1091 | // Widen odd vectors to next power of two. |
1092 | if (!VT.isPow2VectorType()) { |
1093 | EVT NVT = VT.getPow2VectorType(Context); |
1094 | return LegalizeKind(TypeWidenVector, NVT); |
1095 | } |
1096 | |
1097 | if (VT.getVectorElementCount() == ElementCount::getScalable(MinVal: 1)) |
1098 | return LegalizeKind(TypeScalarizeScalableVector, EltVT); |
1099 | |
1100 | // Vectors with illegal element types are expanded. |
1101 | EVT NVT = EVT::getVectorVT(Context, VT: EltVT, |
1102 | EC: VT.getVectorElementCount().divideCoefficientBy(RHS: 2)); |
1103 | return LegalizeKind(TypeSplitVector, NVT); |
1104 | } |
1105 | |
1106 | static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, |
1107 | unsigned &NumIntermediates, |
1108 | MVT &RegisterVT, |
1109 | TargetLoweringBase *TLI) { |
1110 | // Figure out the right, legal destination reg to copy into. |
1111 | ElementCount EC = VT.getVectorElementCount(); |
1112 | MVT EltTy = VT.getVectorElementType(); |
1113 | |
1114 | unsigned NumVectorRegs = 1; |
1115 | |
1116 | // Scalable vectors cannot be scalarized, so splitting or widening is |
1117 | // required. |
1118 | if (VT.isScalableVector() && !isPowerOf2_32(Value: EC.getKnownMinValue())) |
1119 | llvm_unreachable( |
1120 | "Splitting or widening of non-power-of-2 MVTs is not implemented." ); |
1121 | |
1122 | // FIXME: We don't support non-power-of-2-sized vectors for now. |
1123 | // Ideally we could break down into LHS/RHS like LegalizeDAG does. |
1124 | if (!isPowerOf2_32(Value: EC.getKnownMinValue())) { |
1125 | // Split EC to unit size (scalable property is preserved). |
1126 | NumVectorRegs = EC.getKnownMinValue(); |
1127 | EC = ElementCount::getFixed(MinVal: 1); |
1128 | } |
1129 | |
1130 | // Divide the input until we get to a supported size. This will |
1131 | // always end up with an EC that represent a scalar or a scalable |
1132 | // scalar. |
1133 | while (EC.getKnownMinValue() > 1 && |
1134 | !TLI->isTypeLegal(VT: MVT::getVectorVT(VT: EltTy, EC))) { |
1135 | EC = EC.divideCoefficientBy(RHS: 2); |
1136 | NumVectorRegs <<= 1; |
1137 | } |
1138 | |
1139 | NumIntermediates = NumVectorRegs; |
1140 | |
1141 | MVT NewVT = MVT::getVectorVT(VT: EltTy, EC); |
1142 | if (!TLI->isTypeLegal(VT: NewVT)) |
1143 | NewVT = EltTy; |
1144 | IntermediateVT = NewVT; |
1145 | |
1146 | unsigned LaneSizeInBits = NewVT.getScalarSizeInBits(); |
1147 | |
1148 | // Convert sizes such as i33 to i64. |
1149 | LaneSizeInBits = llvm::bit_ceil(Value: LaneSizeInBits); |
1150 | |
1151 | MVT DestVT = TLI->getRegisterType(VT: NewVT); |
1152 | RegisterVT = DestVT; |
1153 | if (EVT(DestVT).bitsLT(VT: NewVT)) // Value is expanded, e.g. i64 -> i16. |
1154 | return NumVectorRegs * (LaneSizeInBits / DestVT.getScalarSizeInBits()); |
1155 | |
1156 | // Otherwise, promotion or legal types use the same number of registers as |
1157 | // the vector decimated to the appropriate level. |
1158 | return NumVectorRegs; |
1159 | } |
1160 | |
1161 | /// isLegalRC - Return true if the value types that can be represented by the |
1162 | /// specified register class are all legal. |
1163 | bool TargetLoweringBase::isLegalRC(const TargetRegisterInfo &TRI, |
1164 | const TargetRegisterClass &RC) const { |
1165 | for (const auto *I = TRI.legalclasstypes_begin(RC); *I != MVT::Other; ++I) |
1166 | if (isTypeLegal(VT: *I)) |
1167 | return true; |
1168 | return false; |
1169 | } |
1170 | |
1171 | /// Replace/modify any TargetFrameIndex operands with a targte-dependent |
1172 | /// sequence of memory operands that is recognized by PrologEpilogInserter. |
1173 | MachineBasicBlock * |
1174 | TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI, |
1175 | MachineBasicBlock *MBB) const { |
1176 | MachineInstr *MI = &InitialMI; |
1177 | MachineFunction &MF = *MI->getMF(); |
1178 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1179 | |
1180 | // We're handling multiple types of operands here: |
1181 | // PATCHPOINT MetaArgs - live-in, read only, direct |
1182 | // STATEPOINT Deopt Spill - live-through, read only, indirect |
1183 | // STATEPOINT Deopt Alloca - live-through, read only, direct |
1184 | // (We're currently conservative and mark the deopt slots read/write in |
1185 | // practice.) |
1186 | // STATEPOINT GC Spill - live-through, read/write, indirect |
1187 | // STATEPOINT GC Alloca - live-through, read/write, direct |
1188 | // The live-in vs live-through is handled already (the live through ones are |
1189 | // all stack slots), but we need to handle the different type of stackmap |
1190 | // operands and memory effects here. |
1191 | |
1192 | if (llvm::none_of(Range: MI->operands(), |
1193 | P: [](MachineOperand &Operand) { return Operand.isFI(); })) |
1194 | return MBB; |
1195 | |
1196 | MachineInstrBuilder MIB = BuildMI(MF, MIMD: MI->getDebugLoc(), MCID: MI->getDesc()); |
1197 | |
1198 | // Inherit previous memory operands. |
1199 | MIB.cloneMemRefs(OtherMI: *MI); |
1200 | |
1201 | for (unsigned i = 0; i < MI->getNumOperands(); ++i) { |
1202 | MachineOperand &MO = MI->getOperand(i); |
1203 | if (!MO.isFI()) { |
1204 | // Index of Def operand this Use it tied to. |
1205 | // Since Defs are coming before Uses, if Use is tied, then |
1206 | // index of Def must be smaller that index of that Use. |
1207 | // Also, Defs preserve their position in new MI. |
1208 | unsigned TiedTo = i; |
1209 | if (MO.isReg() && MO.isTied()) |
1210 | TiedTo = MI->findTiedOperandIdx(OpIdx: i); |
1211 | MIB.add(MO); |
1212 | if (TiedTo < i) |
1213 | MIB->tieOperands(DefIdx: TiedTo, UseIdx: MIB->getNumOperands() - 1); |
1214 | continue; |
1215 | } |
1216 | |
1217 | // foldMemoryOperand builds a new MI after replacing a single FI operand |
1218 | // with the canonical set of five x86 addressing-mode operands. |
1219 | int FI = MO.getIndex(); |
1220 | |
1221 | // Add frame index operands recognized by stackmaps.cpp |
1222 | if (MFI.isStatepointSpillSlotObjectIndex(ObjectIdx: FI)) { |
1223 | // indirect-mem-ref tag, size, #FI, offset. |
1224 | // Used for spills inserted by StatepointLowering. This codepath is not |
1225 | // used for patchpoints/stackmaps at all, for these spilling is done via |
1226 | // foldMemoryOperand callback only. |
1227 | assert(MI->getOpcode() == TargetOpcode::STATEPOINT && "sanity" ); |
1228 | MIB.addImm(Val: StackMaps::IndirectMemRefOp); |
1229 | MIB.addImm(Val: MFI.getObjectSize(ObjectIdx: FI)); |
1230 | MIB.add(MO); |
1231 | MIB.addImm(Val: 0); |
1232 | } else { |
1233 | // direct-mem-ref tag, #FI, offset. |
1234 | // Used by patchpoint, and direct alloca arguments to statepoints |
1235 | MIB.addImm(Val: StackMaps::DirectMemRefOp); |
1236 | MIB.add(MO); |
1237 | MIB.addImm(Val: 0); |
1238 | } |
1239 | |
1240 | assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!" ); |
1241 | |
1242 | // Add a new memory operand for this FI. |
1243 | assert(MFI.getObjectOffset(FI) != -1); |
1244 | |
1245 | // Note: STATEPOINT MMOs are added during SelectionDAG. STACKMAP, and |
1246 | // PATCHPOINT should be updated to do the same. (TODO) |
1247 | if (MI->getOpcode() != TargetOpcode::STATEPOINT) { |
1248 | auto Flags = MachineMemOperand::MOLoad; |
1249 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
1250 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI), F: Flags, |
1251 | Size: MF.getDataLayout().getPointerSize(), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI)); |
1252 | MIB->addMemOperand(MF, MO: MMO); |
1253 | } |
1254 | } |
1255 | MBB->insert(I: MachineBasicBlock::iterator(MI), MI: MIB); |
1256 | MI->eraseFromParent(); |
1257 | return MBB; |
1258 | } |
1259 | |
1260 | /// findRepresentativeClass - Return the largest legal super-reg register class |
1261 | /// of the register class for the specified type and its associated "cost". |
1262 | // This function is in TargetLowering because it uses RegClassForVT which would |
1263 | // need to be moved to TargetRegisterInfo and would necessitate moving |
1264 | // isTypeLegal over as well - a massive change that would just require |
1265 | // TargetLowering having a TargetRegisterInfo class member that it would use. |
1266 | std::pair<const TargetRegisterClass *, uint8_t> |
1267 | TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI, |
1268 | MVT VT) const { |
1269 | const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; |
1270 | if (!RC) |
1271 | return std::make_pair(x&: RC, y: 0); |
1272 | |
1273 | // Compute the set of all super-register classes. |
1274 | BitVector SuperRegRC(TRI->getNumRegClasses()); |
1275 | for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) |
1276 | SuperRegRC.setBitsInMask(Mask: RCI.getMask()); |
1277 | |
1278 | // Find the first legal register class with the largest spill size. |
1279 | const TargetRegisterClass *BestRC = RC; |
1280 | for (unsigned i : SuperRegRC.set_bits()) { |
1281 | const TargetRegisterClass *SuperRC = TRI->getRegClass(i); |
1282 | // We want the largest possible spill size. |
1283 | if (TRI->getSpillSize(RC: *SuperRC) <= TRI->getSpillSize(RC: *BestRC)) |
1284 | continue; |
1285 | if (!isLegalRC(TRI: *TRI, RC: *SuperRC)) |
1286 | continue; |
1287 | BestRC = SuperRC; |
1288 | } |
1289 | return std::make_pair(x&: BestRC, y: 1); |
1290 | } |
1291 | |
1292 | /// computeRegisterProperties - Once all of the register classes are added, |
1293 | /// this allows us to compute derived properties we expose. |
1294 | void TargetLoweringBase::computeRegisterProperties( |
1295 | const TargetRegisterInfo *TRI) { |
1296 | // Everything defaults to needing one register. |
1297 | for (unsigned i = 0; i != MVT::VALUETYPE_SIZE; ++i) { |
1298 | NumRegistersForVT[i] = 1; |
1299 | RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i; |
1300 | } |
1301 | // ...except isVoid, which doesn't need any registers. |
1302 | NumRegistersForVT[MVT::isVoid] = 0; |
1303 | |
1304 | // Find the largest integer register class. |
1305 | unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE; |
1306 | for (; RegClassForVT[LargestIntReg] == nullptr; --LargestIntReg) |
1307 | assert(LargestIntReg != MVT::i1 && "No integer registers defined!" ); |
1308 | |
1309 | // Every integer value type larger than this largest register takes twice as |
1310 | // many registers to represent as the previous ValueType. |
1311 | for (unsigned ExpandedReg = LargestIntReg + 1; |
1312 | ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) { |
1313 | NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; |
1314 | RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; |
1315 | TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); |
1316 | ValueTypeActions.setTypeAction(VT: (MVT::SimpleValueType)ExpandedReg, |
1317 | Action: TypeExpandInteger); |
1318 | } |
1319 | |
1320 | // Inspect all of the ValueType's smaller than the largest integer |
1321 | // register to see which ones need promotion. |
1322 | unsigned LegalIntReg = LargestIntReg; |
1323 | for (unsigned IntReg = LargestIntReg - 1; |
1324 | IntReg >= (unsigned)MVT::i1; --IntReg) { |
1325 | MVT IVT = (MVT::SimpleValueType)IntReg; |
1326 | if (isTypeLegal(VT: IVT)) { |
1327 | LegalIntReg = IntReg; |
1328 | } else { |
1329 | RegisterTypeForVT[IntReg] = TransformToType[IntReg] = |
1330 | (MVT::SimpleValueType)LegalIntReg; |
1331 | ValueTypeActions.setTypeAction(VT: IVT, Action: TypePromoteInteger); |
1332 | } |
1333 | } |
1334 | |
1335 | // ppcf128 type is really two f64's. |
1336 | if (!isTypeLegal(VT: MVT::ppcf128)) { |
1337 | if (isTypeLegal(VT: MVT::f64)) { |
1338 | NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; |
1339 | RegisterTypeForVT[MVT::ppcf128] = MVT::f64; |
1340 | TransformToType[MVT::ppcf128] = MVT::f64; |
1341 | ValueTypeActions.setTypeAction(VT: MVT::ppcf128, Action: TypeExpandFloat); |
1342 | } else { |
1343 | NumRegistersForVT[MVT::ppcf128] = NumRegistersForVT[MVT::i128]; |
1344 | RegisterTypeForVT[MVT::ppcf128] = RegisterTypeForVT[MVT::i128]; |
1345 | TransformToType[MVT::ppcf128] = MVT::i128; |
1346 | ValueTypeActions.setTypeAction(VT: MVT::ppcf128, Action: TypeSoftenFloat); |
1347 | } |
1348 | } |
1349 | |
1350 | // Decide how to handle f128. If the target does not have native f128 support, |
1351 | // expand it to i128 and we will be generating soft float library calls. |
1352 | if (!isTypeLegal(VT: MVT::f128)) { |
1353 | NumRegistersForVT[MVT::f128] = NumRegistersForVT[MVT::i128]; |
1354 | RegisterTypeForVT[MVT::f128] = RegisterTypeForVT[MVT::i128]; |
1355 | TransformToType[MVT::f128] = MVT::i128; |
1356 | ValueTypeActions.setTypeAction(VT: MVT::f128, Action: TypeSoftenFloat); |
1357 | } |
1358 | |
1359 | // Decide how to handle f80. If the target does not have native f80 support, |
1360 | // expand it to i96 and we will be generating soft float library calls. |
1361 | if (!isTypeLegal(VT: MVT::f80)) { |
1362 | NumRegistersForVT[MVT::f80] = 3*NumRegistersForVT[MVT::i32]; |
1363 | RegisterTypeForVT[MVT::f80] = RegisterTypeForVT[MVT::i32]; |
1364 | TransformToType[MVT::f80] = MVT::i32; |
1365 | ValueTypeActions.setTypeAction(VT: MVT::f80, Action: TypeSoftenFloat); |
1366 | } |
1367 | |
1368 | // Decide how to handle f64. If the target does not have native f64 support, |
1369 | // expand it to i64 and we will be generating soft float library calls. |
1370 | if (!isTypeLegal(VT: MVT::f64)) { |
1371 | NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; |
1372 | RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64]; |
1373 | TransformToType[MVT::f64] = MVT::i64; |
1374 | ValueTypeActions.setTypeAction(VT: MVT::f64, Action: TypeSoftenFloat); |
1375 | } |
1376 | |
1377 | // Decide how to handle f32. If the target does not have native f32 support, |
1378 | // expand it to i32 and we will be generating soft float library calls. |
1379 | if (!isTypeLegal(VT: MVT::f32)) { |
1380 | NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; |
1381 | RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; |
1382 | TransformToType[MVT::f32] = MVT::i32; |
1383 | ValueTypeActions.setTypeAction(VT: MVT::f32, Action: TypeSoftenFloat); |
1384 | } |
1385 | |
1386 | // Decide how to handle f16. If the target does not have native f16 support, |
1387 | // promote it to f32, because there are no f16 library calls (except for |
1388 | // conversions). |
1389 | if (!isTypeLegal(VT: MVT::f16)) { |
1390 | // Allow targets to control how we legalize half. |
1391 | bool SoftPromoteHalfType = softPromoteHalfType(); |
1392 | bool UseFPRegsForHalfType = !SoftPromoteHalfType || useFPRegsForHalfType(); |
1393 | |
1394 | if (!UseFPRegsForHalfType) { |
1395 | NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16]; |
1396 | RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16]; |
1397 | } else { |
1398 | NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32]; |
1399 | RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32]; |
1400 | } |
1401 | TransformToType[MVT::f16] = MVT::f32; |
1402 | if (SoftPromoteHalfType) { |
1403 | ValueTypeActions.setTypeAction(VT: MVT::f16, Action: TypeSoftPromoteHalf); |
1404 | } else { |
1405 | ValueTypeActions.setTypeAction(VT: MVT::f16, Action: TypePromoteFloat); |
1406 | } |
1407 | } |
1408 | |
1409 | // Decide how to handle bf16. If the target does not have native bf16 support, |
1410 | // promote it to f32, because there are no bf16 library calls (except for |
1411 | // converting from f32 to bf16). |
1412 | if (!isTypeLegal(VT: MVT::bf16)) { |
1413 | NumRegistersForVT[MVT::bf16] = NumRegistersForVT[MVT::f32]; |
1414 | RegisterTypeForVT[MVT::bf16] = RegisterTypeForVT[MVT::f32]; |
1415 | TransformToType[MVT::bf16] = MVT::f32; |
1416 | ValueTypeActions.setTypeAction(VT: MVT::bf16, Action: TypeSoftPromoteHalf); |
1417 | } |
1418 | |
1419 | // Loop over all of the vector value types to see which need transformations. |
1420 | for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; |
1421 | i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { |
1422 | MVT VT = (MVT::SimpleValueType) i; |
1423 | if (isTypeLegal(VT)) |
1424 | continue; |
1425 | |
1426 | MVT EltVT = VT.getVectorElementType(); |
1427 | ElementCount EC = VT.getVectorElementCount(); |
1428 | bool IsLegalWiderType = false; |
1429 | bool IsScalable = VT.isScalableVector(); |
1430 | LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT); |
1431 | switch (PreferredAction) { |
1432 | case TypePromoteInteger: { |
1433 | MVT::SimpleValueType EndVT = IsScalable ? |
1434 | MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE : |
1435 | MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE; |
1436 | // Try to promote the elements of integer vectors. If no legal |
1437 | // promotion was found, fall through to the widen-vector method. |
1438 | for (unsigned nVT = i + 1; |
1439 | (MVT::SimpleValueType)nVT <= EndVT; ++nVT) { |
1440 | MVT SVT = (MVT::SimpleValueType) nVT; |
1441 | // Promote vectors of integers to vectors with the same number |
1442 | // of elements, with a wider element type. |
1443 | if (SVT.getScalarSizeInBits() > EltVT.getFixedSizeInBits() && |
1444 | SVT.getVectorElementCount() == EC && isTypeLegal(VT: SVT)) { |
1445 | TransformToType[i] = SVT; |
1446 | RegisterTypeForVT[i] = SVT; |
1447 | NumRegistersForVT[i] = 1; |
1448 | ValueTypeActions.setTypeAction(VT, Action: TypePromoteInteger); |
1449 | IsLegalWiderType = true; |
1450 | break; |
1451 | } |
1452 | } |
1453 | if (IsLegalWiderType) |
1454 | break; |
1455 | [[fallthrough]]; |
1456 | } |
1457 | |
1458 | case TypeWidenVector: |
1459 | if (isPowerOf2_32(Value: EC.getKnownMinValue())) { |
1460 | // Try to widen the vector. |
1461 | for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { |
1462 | MVT SVT = (MVT::SimpleValueType) nVT; |
1463 | if (SVT.getVectorElementType() == EltVT && |
1464 | SVT.isScalableVector() == IsScalable && |
1465 | SVT.getVectorElementCount().getKnownMinValue() > |
1466 | EC.getKnownMinValue() && |
1467 | isTypeLegal(VT: SVT)) { |
1468 | TransformToType[i] = SVT; |
1469 | RegisterTypeForVT[i] = SVT; |
1470 | NumRegistersForVT[i] = 1; |
1471 | ValueTypeActions.setTypeAction(VT, Action: TypeWidenVector); |
1472 | IsLegalWiderType = true; |
1473 | break; |
1474 | } |
1475 | } |
1476 | if (IsLegalWiderType) |
1477 | break; |
1478 | } else { |
1479 | // Only widen to the next power of 2 to keep consistency with EVT. |
1480 | MVT NVT = VT.getPow2VectorType(); |
1481 | if (isTypeLegal(VT: NVT)) { |
1482 | TransformToType[i] = NVT; |
1483 | ValueTypeActions.setTypeAction(VT, Action: TypeWidenVector); |
1484 | RegisterTypeForVT[i] = NVT; |
1485 | NumRegistersForVT[i] = 1; |
1486 | break; |
1487 | } |
1488 | } |
1489 | [[fallthrough]]; |
1490 | |
1491 | case TypeSplitVector: |
1492 | case TypeScalarizeVector: { |
1493 | MVT IntermediateVT; |
1494 | MVT RegisterVT; |
1495 | unsigned NumIntermediates; |
1496 | unsigned NumRegisters = getVectorTypeBreakdownMVT(VT, IntermediateVT, |
1497 | NumIntermediates, RegisterVT, TLI: this); |
1498 | NumRegistersForVT[i] = NumRegisters; |
1499 | assert(NumRegistersForVT[i] == NumRegisters && |
1500 | "NumRegistersForVT size cannot represent NumRegisters!" ); |
1501 | RegisterTypeForVT[i] = RegisterVT; |
1502 | |
1503 | MVT NVT = VT.getPow2VectorType(); |
1504 | if (NVT == VT) { |
1505 | // Type is already a power of 2. The default action is to split. |
1506 | TransformToType[i] = MVT::Other; |
1507 | if (PreferredAction == TypeScalarizeVector) |
1508 | ValueTypeActions.setTypeAction(VT, Action: TypeScalarizeVector); |
1509 | else if (PreferredAction == TypeSplitVector) |
1510 | ValueTypeActions.setTypeAction(VT, Action: TypeSplitVector); |
1511 | else if (EC.getKnownMinValue() > 1) |
1512 | ValueTypeActions.setTypeAction(VT, Action: TypeSplitVector); |
1513 | else |
1514 | ValueTypeActions.setTypeAction(VT, Action: EC.isScalable() |
1515 | ? TypeScalarizeScalableVector |
1516 | : TypeScalarizeVector); |
1517 | } else { |
1518 | TransformToType[i] = NVT; |
1519 | ValueTypeActions.setTypeAction(VT, Action: TypeWidenVector); |
1520 | } |
1521 | break; |
1522 | } |
1523 | default: |
1524 | llvm_unreachable("Unknown vector legalization action!" ); |
1525 | } |
1526 | } |
1527 | |
1528 | // Determine the 'representative' register class for each value type. |
1529 | // An representative register class is the largest (meaning one which is |
1530 | // not a sub-register class / subreg register class) legal register class for |
1531 | // a group of value types. For example, on i386, i8, i16, and i32 |
1532 | // representative would be GR32; while on x86_64 it's GR64. |
1533 | for (unsigned i = 0; i != MVT::VALUETYPE_SIZE; ++i) { |
1534 | const TargetRegisterClass* RRC; |
1535 | uint8_t Cost; |
1536 | std::tie(args&: RRC, args&: Cost) = findRepresentativeClass(TRI, VT: (MVT::SimpleValueType)i); |
1537 | RepRegClassForVT[i] = RRC; |
1538 | RepRegClassCostForVT[i] = Cost; |
1539 | } |
1540 | } |
1541 | |
1542 | EVT TargetLoweringBase::getSetCCResultType(const DataLayout &DL, LLVMContext &, |
1543 | EVT VT) const { |
1544 | assert(!VT.isVector() && "No default SetCC type for vectors!" ); |
1545 | return getPointerTy(DL).SimpleTy; |
1546 | } |
1547 | |
1548 | MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const { |
1549 | return MVT::i32; // return the default value |
1550 | } |
1551 | |
1552 | /// getVectorTypeBreakdown - Vector types are broken down into some number of |
1553 | /// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 |
1554 | /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. |
1555 | /// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86. |
1556 | /// |
1557 | /// This method returns the number of registers needed, and the VT for each |
1558 | /// register. It also returns the VT and quantity of the intermediate values |
1559 | /// before they are promoted/expanded. |
1560 | unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, |
1561 | EVT VT, EVT &IntermediateVT, |
1562 | unsigned &NumIntermediates, |
1563 | MVT &RegisterVT) const { |
1564 | ElementCount EltCnt = VT.getVectorElementCount(); |
1565 | |
1566 | // If there is a wider vector type with the same element type as this one, |
1567 | // or a promoted vector type that has the same number of elements which |
1568 | // are wider, then we should convert to that legal vector type. |
1569 | // This handles things like <2 x float> -> <4 x float> and |
1570 | // <4 x i1> -> <4 x i32>. |
1571 | LegalizeTypeAction TA = getTypeAction(Context, VT); |
1572 | if (!EltCnt.isScalar() && |
1573 | (TA == TypeWidenVector || TA == TypePromoteInteger)) { |
1574 | EVT RegisterEVT = getTypeToTransformTo(Context, VT); |
1575 | if (isTypeLegal(VT: RegisterEVT)) { |
1576 | IntermediateVT = RegisterEVT; |
1577 | RegisterVT = RegisterEVT.getSimpleVT(); |
1578 | NumIntermediates = 1; |
1579 | return 1; |
1580 | } |
1581 | } |
1582 | |
1583 | // Figure out the right, legal destination reg to copy into. |
1584 | EVT EltTy = VT.getVectorElementType(); |
1585 | |
1586 | unsigned NumVectorRegs = 1; |
1587 | |
1588 | // Scalable vectors cannot be scalarized, so handle the legalisation of the |
1589 | // types like done elsewhere in SelectionDAG. |
1590 | if (EltCnt.isScalable()) { |
1591 | LegalizeKind LK; |
1592 | EVT PartVT = VT; |
1593 | do { |
1594 | // Iterate until we've found a legal (part) type to hold VT. |
1595 | LK = getTypeConversion(Context, VT: PartVT); |
1596 | PartVT = LK.second; |
1597 | } while (LK.first != TypeLegal); |
1598 | |
1599 | if (!PartVT.isVector()) { |
1600 | report_fatal_error( |
1601 | reason: "Don't know how to legalize this scalable vector type" ); |
1602 | } |
1603 | |
1604 | NumIntermediates = |
1605 | divideCeil(Numerator: VT.getVectorElementCount().getKnownMinValue(), |
1606 | Denominator: PartVT.getVectorElementCount().getKnownMinValue()); |
1607 | IntermediateVT = PartVT; |
1608 | RegisterVT = getRegisterType(Context, VT: IntermediateVT); |
1609 | return NumIntermediates; |
1610 | } |
1611 | |
1612 | // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally |
1613 | // we could break down into LHS/RHS like LegalizeDAG does. |
1614 | if (!isPowerOf2_32(Value: EltCnt.getKnownMinValue())) { |
1615 | NumVectorRegs = EltCnt.getKnownMinValue(); |
1616 | EltCnt = ElementCount::getFixed(MinVal: 1); |
1617 | } |
1618 | |
1619 | // Divide the input until we get to a supported size. This will always |
1620 | // end with a scalar if the target doesn't support vectors. |
1621 | while (EltCnt.getKnownMinValue() > 1 && |
1622 | !isTypeLegal(VT: EVT::getVectorVT(Context, VT: EltTy, EC: EltCnt))) { |
1623 | EltCnt = EltCnt.divideCoefficientBy(RHS: 2); |
1624 | NumVectorRegs <<= 1; |
1625 | } |
1626 | |
1627 | NumIntermediates = NumVectorRegs; |
1628 | |
1629 | EVT NewVT = EVT::getVectorVT(Context, VT: EltTy, EC: EltCnt); |
1630 | if (!isTypeLegal(VT: NewVT)) |
1631 | NewVT = EltTy; |
1632 | IntermediateVT = NewVT; |
1633 | |
1634 | MVT DestVT = getRegisterType(Context, VT: NewVT); |
1635 | RegisterVT = DestVT; |
1636 | |
1637 | if (EVT(DestVT).bitsLT(VT: NewVT)) { // Value is expanded, e.g. i64 -> i16. |
1638 | TypeSize NewVTSize = NewVT.getSizeInBits(); |
1639 | // Convert sizes such as i33 to i64. |
1640 | if (!llvm::has_single_bit<uint32_t>(Value: NewVTSize.getKnownMinValue())) |
1641 | NewVTSize = NewVTSize.coefficientNextPowerOf2(); |
1642 | return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); |
1643 | } |
1644 | |
1645 | // Otherwise, promotion or legal types use the same number of registers as |
1646 | // the vector decimated to the appropriate level. |
1647 | return NumVectorRegs; |
1648 | } |
1649 | |
1650 | bool TargetLoweringBase::isSuitableForJumpTable(const SwitchInst *SI, |
1651 | uint64_t NumCases, |
1652 | uint64_t Range, |
1653 | ProfileSummaryInfo *PSI, |
1654 | BlockFrequencyInfo *BFI) const { |
1655 | // FIXME: This function check the maximum table size and density, but the |
1656 | // minimum size is not checked. It would be nice if the minimum size is |
1657 | // also combined within this function. Currently, the minimum size check is |
1658 | // performed in findJumpTable() in SelectionDAGBuiler and |
1659 | // getEstimatedNumberOfCaseClusters() in BasicTTIImpl. |
1660 | const bool OptForSize = |
1661 | llvm::shouldOptimizeForSize(BB: SI->getParent(), PSI, BFI); |
1662 | const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize); |
1663 | const unsigned MaxJumpTableSize = getMaximumJumpTableSize(); |
1664 | |
1665 | // Check whether the number of cases is small enough and |
1666 | // the range is dense enough for a jump table. |
1667 | return (OptForSize || Range <= MaxJumpTableSize) && |
1668 | (NumCases * 100 >= Range * MinDensity); |
1669 | } |
1670 | |
1671 | MVT TargetLoweringBase::getPreferredSwitchConditionType(LLVMContext &Context, |
1672 | EVT ConditionVT) const { |
1673 | return getRegisterType(Context, VT: ConditionVT); |
1674 | } |
1675 | |
1676 | /// Get the EVTs and ArgFlags collections that represent the legalized return |
1677 | /// type of the given function. This does not require a DAG or a return value, |
1678 | /// and is suitable for use before any DAGs for the function are constructed. |
1679 | /// TODO: Move this out of TargetLowering.cpp. |
1680 | void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType, |
1681 | AttributeList attr, |
1682 | SmallVectorImpl<ISD::OutputArg> &Outs, |
1683 | const TargetLowering &TLI, const DataLayout &DL) { |
1684 | SmallVector<EVT, 4> ValueVTs; |
1685 | ComputeValueVTs(TLI, DL, Ty: ReturnType, ValueVTs); |
1686 | unsigned NumValues = ValueVTs.size(); |
1687 | if (NumValues == 0) return; |
1688 | |
1689 | for (unsigned j = 0, f = NumValues; j != f; ++j) { |
1690 | EVT VT = ValueVTs[j]; |
1691 | ISD::NodeType ExtendKind = ISD::ANY_EXTEND; |
1692 | |
1693 | if (attr.hasRetAttr(Kind: Attribute::SExt)) |
1694 | ExtendKind = ISD::SIGN_EXTEND; |
1695 | else if (attr.hasRetAttr(Kind: Attribute::ZExt)) |
1696 | ExtendKind = ISD::ZERO_EXTEND; |
1697 | |
1698 | if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) |
1699 | VT = TLI.getTypeForExtReturn(Context&: ReturnType->getContext(), VT, ExtendKind); |
1700 | |
1701 | unsigned NumParts = |
1702 | TLI.getNumRegistersForCallingConv(Context&: ReturnType->getContext(), CC, VT); |
1703 | MVT PartVT = |
1704 | TLI.getRegisterTypeForCallingConv(Context&: ReturnType->getContext(), CC, VT); |
1705 | |
1706 | // 'inreg' on function refers to return value |
1707 | ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); |
1708 | if (attr.hasRetAttr(Kind: Attribute::InReg)) |
1709 | Flags.setInReg(); |
1710 | |
1711 | // Propagate extension type if any |
1712 | if (attr.hasRetAttr(Kind: Attribute::SExt)) |
1713 | Flags.setSExt(); |
1714 | else if (attr.hasRetAttr(Kind: Attribute::ZExt)) |
1715 | Flags.setZExt(); |
1716 | |
1717 | for (unsigned i = 0; i < NumParts; ++i) |
1718 | Outs.push_back(Elt: ISD::OutputArg(Flags, PartVT, VT, /*isfixed=*/true, 0, 0)); |
1719 | } |
1720 | } |
1721 | |
1722 | Align TargetLoweringBase::getByValTypeAlignment(Type *Ty, |
1723 | const DataLayout &DL) const { |
1724 | return DL.getABITypeAlign(Ty); |
1725 | } |
1726 | |
1727 | bool TargetLoweringBase::allowsMemoryAccessForAlignment( |
1728 | LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, |
1729 | Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const { |
1730 | // Check if the specified alignment is sufficient based on the data layout. |
1731 | // TODO: While using the data layout works in practice, a better solution |
1732 | // would be to implement this check directly (make this a virtual function). |
1733 | // For example, the ABI alignment may change based on software platform while |
1734 | // this function should only be affected by hardware implementation. |
1735 | Type *Ty = VT.getTypeForEVT(Context); |
1736 | if (VT.isZeroSized() || Alignment >= DL.getABITypeAlign(Ty)) { |
1737 | // Assume that an access that meets the ABI-specified alignment is fast. |
1738 | if (Fast != nullptr) |
1739 | *Fast = 1; |
1740 | return true; |
1741 | } |
1742 | |
1743 | // This is a misaligned access. |
1744 | return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast); |
1745 | } |
1746 | |
1747 | bool TargetLoweringBase::allowsMemoryAccessForAlignment( |
1748 | LLVMContext &Context, const DataLayout &DL, EVT VT, |
1749 | const MachineMemOperand &MMO, unsigned *Fast) const { |
1750 | return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace: MMO.getAddrSpace(), |
1751 | Alignment: MMO.getAlign(), Flags: MMO.getFlags(), Fast); |
1752 | } |
1753 | |
1754 | bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, |
1755 | const DataLayout &DL, EVT VT, |
1756 | unsigned AddrSpace, Align Alignment, |
1757 | MachineMemOperand::Flags Flags, |
1758 | unsigned *Fast) const { |
1759 | return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace, Alignment, |
1760 | Flags, Fast); |
1761 | } |
1762 | |
1763 | bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, |
1764 | const DataLayout &DL, EVT VT, |
1765 | const MachineMemOperand &MMO, |
1766 | unsigned *Fast) const { |
1767 | return allowsMemoryAccess(Context, DL, VT, AddrSpace: MMO.getAddrSpace(), Alignment: MMO.getAlign(), |
1768 | Flags: MMO.getFlags(), Fast); |
1769 | } |
1770 | |
1771 | bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, |
1772 | const DataLayout &DL, LLT Ty, |
1773 | const MachineMemOperand &MMO, |
1774 | unsigned *Fast) const { |
1775 | EVT VT = getApproximateEVTForLLT(Ty, Ctx&: Context); |
1776 | return allowsMemoryAccess(Context, DL, VT, AddrSpace: MMO.getAddrSpace(), Alignment: MMO.getAlign(), |
1777 | Flags: MMO.getFlags(), Fast); |
1778 | } |
1779 | |
1780 | //===----------------------------------------------------------------------===// |
1781 | // TargetTransformInfo Helpers |
1782 | //===----------------------------------------------------------------------===// |
1783 | |
1784 | int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { |
1785 | enum InstructionOpcodes { |
1786 | #define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM, |
1787 | #define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM |
1788 | #include "llvm/IR/Instruction.def" |
1789 | }; |
1790 | switch (static_cast<InstructionOpcodes>(Opcode)) { |
1791 | case Ret: return 0; |
1792 | case Br: return 0; |
1793 | case Switch: return 0; |
1794 | case IndirectBr: return 0; |
1795 | case Invoke: return 0; |
1796 | case CallBr: return 0; |
1797 | case Resume: return 0; |
1798 | case Unreachable: return 0; |
1799 | case CleanupRet: return 0; |
1800 | case CatchRet: return 0; |
1801 | case CatchPad: return 0; |
1802 | case CatchSwitch: return 0; |
1803 | case CleanupPad: return 0; |
1804 | case FNeg: return ISD::FNEG; |
1805 | case Add: return ISD::ADD; |
1806 | case FAdd: return ISD::FADD; |
1807 | case Sub: return ISD::SUB; |
1808 | case FSub: return ISD::FSUB; |
1809 | case Mul: return ISD::MUL; |
1810 | case FMul: return ISD::FMUL; |
1811 | case UDiv: return ISD::UDIV; |
1812 | case SDiv: return ISD::SDIV; |
1813 | case FDiv: return ISD::FDIV; |
1814 | case URem: return ISD::UREM; |
1815 | case SRem: return ISD::SREM; |
1816 | case FRem: return ISD::FREM; |
1817 | case Shl: return ISD::SHL; |
1818 | case LShr: return ISD::SRL; |
1819 | case AShr: return ISD::SRA; |
1820 | case And: return ISD::AND; |
1821 | case Or: return ISD::OR; |
1822 | case Xor: return ISD::XOR; |
1823 | case Alloca: return 0; |
1824 | case Load: return ISD::LOAD; |
1825 | case Store: return ISD::STORE; |
1826 | case GetElementPtr: return 0; |
1827 | case Fence: return 0; |
1828 | case AtomicCmpXchg: return 0; |
1829 | case AtomicRMW: return 0; |
1830 | case Trunc: return ISD::TRUNCATE; |
1831 | case ZExt: return ISD::ZERO_EXTEND; |
1832 | case SExt: return ISD::SIGN_EXTEND; |
1833 | case FPToUI: return ISD::FP_TO_UINT; |
1834 | case FPToSI: return ISD::FP_TO_SINT; |
1835 | case UIToFP: return ISD::UINT_TO_FP; |
1836 | case SIToFP: return ISD::SINT_TO_FP; |
1837 | case FPTrunc: return ISD::FP_ROUND; |
1838 | case FPExt: return ISD::FP_EXTEND; |
1839 | case PtrToInt: return ISD::BITCAST; |
1840 | case IntToPtr: return ISD::BITCAST; |
1841 | case BitCast: return ISD::BITCAST; |
1842 | case AddrSpaceCast: return ISD::ADDRSPACECAST; |
1843 | case ICmp: return ISD::SETCC; |
1844 | case FCmp: return ISD::SETCC; |
1845 | case PHI: return 0; |
1846 | case Call: return 0; |
1847 | case Select: return ISD::SELECT; |
1848 | case UserOp1: return 0; |
1849 | case UserOp2: return 0; |
1850 | case VAArg: return 0; |
1851 | case ExtractElement: return ISD::EXTRACT_VECTOR_ELT; |
1852 | case InsertElement: return ISD::INSERT_VECTOR_ELT; |
1853 | case ShuffleVector: return ISD::VECTOR_SHUFFLE; |
1854 | case ExtractValue: return ISD::MERGE_VALUES; |
1855 | case InsertValue: return ISD::MERGE_VALUES; |
1856 | case LandingPad: return 0; |
1857 | case Freeze: return ISD::FREEZE; |
1858 | } |
1859 | |
1860 | llvm_unreachable("Unknown instruction type encountered!" ); |
1861 | } |
1862 | |
1863 | int TargetLoweringBase::IntrinsicIDToISD(Intrinsic::ID ID) const { |
1864 | switch (ID) { |
1865 | case Intrinsic::exp: |
1866 | return ISD::FEXP; |
1867 | case Intrinsic::exp2: |
1868 | return ISD::FEXP2; |
1869 | default: |
1870 | return ISD::DELETED_NODE; |
1871 | } |
1872 | } |
1873 | |
1874 | Value * |
1875 | TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilderBase &IRB, |
1876 | bool UseTLS) const { |
1877 | // compiler-rt provides a variable with a magic name. Targets that do not |
1878 | // link with compiler-rt may also provide such a variable. |
1879 | Module *M = IRB.GetInsertBlock()->getParent()->getParent(); |
1880 | const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr" ; |
1881 | auto UnsafeStackPtr = |
1882 | dyn_cast_or_null<GlobalVariable>(Val: M->getNamedValue(Name: UnsafeStackPtrVar)); |
1883 | |
1884 | const DataLayout &DL = M->getDataLayout(); |
1885 | PointerType *StackPtrTy = DL.getAllocaPtrType(Ctx&: M->getContext()); |
1886 | |
1887 | if (!UnsafeStackPtr) { |
1888 | auto TLSModel = UseTLS ? |
1889 | GlobalValue::InitialExecTLSModel : |
1890 | GlobalValue::NotThreadLocal; |
1891 | // The global variable is not defined yet, define it ourselves. |
1892 | // We use the initial-exec TLS model because we do not support the |
1893 | // variable living anywhere other than in the main executable. |
1894 | UnsafeStackPtr = new GlobalVariable( |
1895 | *M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr, |
1896 | UnsafeStackPtrVar, nullptr, TLSModel); |
1897 | } else { |
1898 | // The variable exists, check its type and attributes. |
1899 | // |
1900 | // FIXME: Move to IR verifier. |
1901 | if (UnsafeStackPtr->getValueType() != StackPtrTy) |
1902 | report_fatal_error(reason: Twine(UnsafeStackPtrVar) + " must have void* type" ); |
1903 | if (UseTLS != UnsafeStackPtr->isThreadLocal()) |
1904 | report_fatal_error(reason: Twine(UnsafeStackPtrVar) + " must " + |
1905 | (UseTLS ? "" : "not " ) + "be thread-local" ); |
1906 | } |
1907 | return UnsafeStackPtr; |
1908 | } |
1909 | |
1910 | Value * |
1911 | TargetLoweringBase::getSafeStackPointerLocation(IRBuilderBase &IRB) const { |
1912 | if (!TM.getTargetTriple().isAndroid()) |
1913 | return getDefaultSafeStackPointerLocation(IRB, UseTLS: true); |
1914 | |
1915 | // Android provides a libc function to retrieve the address of the current |
1916 | // thread's unsafe stack pointer. |
1917 | Module *M = IRB.GetInsertBlock()->getParent()->getParent(); |
1918 | auto *PtrTy = PointerType::getUnqual(C&: M->getContext()); |
1919 | FunctionCallee Fn = |
1920 | M->getOrInsertFunction(Name: "__safestack_pointer_address" , RetTy: PtrTy); |
1921 | return IRB.CreateCall(Callee: Fn); |
1922 | } |
1923 | |
1924 | //===----------------------------------------------------------------------===// |
1925 | // Loop Strength Reduction hooks |
1926 | //===----------------------------------------------------------------------===// |
1927 | |
1928 | /// isLegalAddressingMode - Return true if the addressing mode represented |
1929 | /// by AM is legal for this target, for a load/store of the specified type. |
1930 | bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL, |
1931 | const AddrMode &AM, Type *Ty, |
1932 | unsigned AS, Instruction *I) const { |
1933 | // The default implementation of this implements a conservative RISCy, r+r and |
1934 | // r+i addr mode. |
1935 | |
1936 | // Scalable offsets not supported |
1937 | if (AM.ScalableOffset) |
1938 | return false; |
1939 | |
1940 | // Allows a sign-extended 16-bit immediate field. |
1941 | if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) |
1942 | return false; |
1943 | |
1944 | // No global is ever allowed as a base. |
1945 | if (AM.BaseGV) |
1946 | return false; |
1947 | |
1948 | // Only support r+r, |
1949 | switch (AM.Scale) { |
1950 | case 0: // "r+i" or just "i", depending on HasBaseReg. |
1951 | break; |
1952 | case 1: |
1953 | if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. |
1954 | return false; |
1955 | // Otherwise we have r+r or r+i. |
1956 | break; |
1957 | case 2: |
1958 | if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. |
1959 | return false; |
1960 | // Allow 2*r as r+r. |
1961 | break; |
1962 | default: // Don't allow n * r |
1963 | return false; |
1964 | } |
1965 | |
1966 | return true; |
1967 | } |
1968 | |
1969 | //===----------------------------------------------------------------------===// |
1970 | // Stack Protector |
1971 | //===----------------------------------------------------------------------===// |
1972 | |
1973 | // For OpenBSD return its special guard variable. Otherwise return nullptr, |
1974 | // so that SelectionDAG handle SSP. |
1975 | Value *TargetLoweringBase::getIRStackGuard(IRBuilderBase &IRB) const { |
1976 | if (getTargetMachine().getTargetTriple().isOSOpenBSD()) { |
1977 | Module &M = *IRB.GetInsertBlock()->getParent()->getParent(); |
1978 | PointerType *PtrTy = PointerType::getUnqual(C&: M.getContext()); |
1979 | GlobalVariable *G = M.getOrInsertGlobal(Name: "__guard_local" , Ty: PtrTy); |
1980 | G->setVisibility(GlobalValue::HiddenVisibility); |
1981 | return G; |
1982 | } |
1983 | return nullptr; |
1984 | } |
1985 | |
1986 | // Currently only support "standard" __stack_chk_guard. |
1987 | // TODO: add LOAD_STACK_GUARD support. |
1988 | void TargetLoweringBase::insertSSPDeclarations(Module &M) const { |
1989 | if (!M.getNamedValue(Name: "__stack_chk_guard" )) { |
1990 | auto *GV = new GlobalVariable(M, PointerType::getUnqual(C&: M.getContext()), |
1991 | false, GlobalVariable::ExternalLinkage, |
1992 | nullptr, "__stack_chk_guard" ); |
1993 | |
1994 | // FreeBSD has "__stack_chk_guard" defined externally on libc.so |
1995 | if (M.getDirectAccessExternalData() && |
1996 | !TM.getTargetTriple().isWindowsGNUEnvironment() && |
1997 | !(TM.getTargetTriple().isPPC64() && |
1998 | TM.getTargetTriple().isOSFreeBSD()) && |
1999 | (!TM.getTargetTriple().isOSDarwin() || |
2000 | TM.getRelocationModel() == Reloc::Static)) |
2001 | GV->setDSOLocal(true); |
2002 | } |
2003 | } |
2004 | |
2005 | // Currently only support "standard" __stack_chk_guard. |
2006 | // TODO: add LOAD_STACK_GUARD support. |
2007 | Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const { |
2008 | if (getTargetMachine().getTargetTriple().isOSOpenBSD()) { |
2009 | return M.getNamedValue(Name: "__guard_local" ); |
2010 | } |
2011 | return M.getNamedValue(Name: "__stack_chk_guard" ); |
2012 | } |
2013 | |
2014 | Function *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const { |
2015 | return nullptr; |
2016 | } |
2017 | |
2018 | unsigned TargetLoweringBase::getMinimumJumpTableEntries() const { |
2019 | return MinimumJumpTableEntries; |
2020 | } |
2021 | |
2022 | void TargetLoweringBase::setMinimumJumpTableEntries(unsigned Val) { |
2023 | MinimumJumpTableEntries = Val; |
2024 | } |
2025 | |
2026 | unsigned TargetLoweringBase::getMinimumJumpTableDensity(bool OptForSize) const { |
2027 | return OptForSize ? OptsizeJumpTableDensity : JumpTableDensity; |
2028 | } |
2029 | |
2030 | unsigned TargetLoweringBase::getMaximumJumpTableSize() const { |
2031 | return MaximumJumpTableSize; |
2032 | } |
2033 | |
2034 | void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) { |
2035 | MaximumJumpTableSize = Val; |
2036 | } |
2037 | |
2038 | bool TargetLoweringBase::isJumpTableRelative() const { |
2039 | return getTargetMachine().isPositionIndependent(); |
2040 | } |
2041 | |
2042 | Align TargetLoweringBase::getPrefLoopAlignment(MachineLoop *ML) const { |
2043 | if (TM.Options.LoopAlignment) |
2044 | return Align(TM.Options.LoopAlignment); |
2045 | return PrefLoopAlignment; |
2046 | } |
2047 | |
2048 | unsigned TargetLoweringBase::getMaxPermittedBytesForAlignment( |
2049 | MachineBasicBlock *MBB) const { |
2050 | return MaxBytesForAlignment; |
2051 | } |
2052 | |
2053 | //===----------------------------------------------------------------------===// |
2054 | // Reciprocal Estimates |
2055 | //===----------------------------------------------------------------------===// |
2056 | |
2057 | /// Get the reciprocal estimate attribute string for a function that will |
2058 | /// override the target defaults. |
2059 | static StringRef getRecipEstimateForFunc(MachineFunction &MF) { |
2060 | const Function &F = MF.getFunction(); |
2061 | return F.getFnAttribute(Kind: "reciprocal-estimates" ).getValueAsString(); |
2062 | } |
2063 | |
2064 | /// Construct a string for the given reciprocal operation of the given type. |
2065 | /// This string should match the corresponding option to the front-end's |
2066 | /// "-mrecip" flag assuming those strings have been passed through in an |
2067 | /// attribute string. For example, "vec-divf" for a division of a vXf32. |
2068 | static std::string getReciprocalOpName(bool IsSqrt, EVT VT) { |
2069 | std::string Name = VT.isVector() ? "vec-" : "" ; |
2070 | |
2071 | Name += IsSqrt ? "sqrt" : "div" ; |
2072 | |
2073 | // TODO: Handle other float types? |
2074 | if (VT.getScalarType() == MVT::f64) { |
2075 | Name += "d" ; |
2076 | } else if (VT.getScalarType() == MVT::f16) { |
2077 | Name += "h" ; |
2078 | } else { |
2079 | assert(VT.getScalarType() == MVT::f32 && |
2080 | "Unexpected FP type for reciprocal estimate" ); |
2081 | Name += "f" ; |
2082 | } |
2083 | |
2084 | return Name; |
2085 | } |
2086 | |
2087 | /// Return the character position and value (a single numeric character) of a |
2088 | /// customized refinement operation in the input string if it exists. Return |
2089 | /// false if there is no customized refinement step count. |
2090 | static bool parseRefinementStep(StringRef In, size_t &Position, |
2091 | uint8_t &Value) { |
2092 | const char RefStepToken = ':'; |
2093 | Position = In.find(C: RefStepToken); |
2094 | if (Position == StringRef::npos) |
2095 | return false; |
2096 | |
2097 | StringRef RefStepString = In.substr(Start: Position + 1); |
2098 | // Allow exactly one numeric character for the additional refinement |
2099 | // step parameter. |
2100 | if (RefStepString.size() == 1) { |
2101 | char RefStepChar = RefStepString[0]; |
2102 | if (isDigit(C: RefStepChar)) { |
2103 | Value = RefStepChar - '0'; |
2104 | return true; |
2105 | } |
2106 | } |
2107 | report_fatal_error(reason: "Invalid refinement step for -recip." ); |
2108 | } |
2109 | |
2110 | /// For the input attribute string, return one of the ReciprocalEstimate enum |
2111 | /// status values (enabled, disabled, or not specified) for this operation on |
2112 | /// the specified data type. |
2113 | static int getOpEnabled(bool IsSqrt, EVT VT, StringRef Override) { |
2114 | if (Override.empty()) |
2115 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2116 | |
2117 | SmallVector<StringRef, 4> OverrideVector; |
2118 | Override.split(A&: OverrideVector, Separator: ','); |
2119 | unsigned NumArgs = OverrideVector.size(); |
2120 | |
2121 | // Check if "all", "none", or "default" was specified. |
2122 | if (NumArgs == 1) { |
2123 | // Look for an optional setting of the number of refinement steps needed |
2124 | // for this type of reciprocal operation. |
2125 | size_t RefPos; |
2126 | uint8_t RefSteps; |
2127 | if (parseRefinementStep(In: Override, Position&: RefPos, Value&: RefSteps)) { |
2128 | // Split the string for further processing. |
2129 | Override = Override.substr(Start: 0, N: RefPos); |
2130 | } |
2131 | |
2132 | // All reciprocal types are enabled. |
2133 | if (Override == "all" ) |
2134 | return TargetLoweringBase::ReciprocalEstimate::Enabled; |
2135 | |
2136 | // All reciprocal types are disabled. |
2137 | if (Override == "none" ) |
2138 | return TargetLoweringBase::ReciprocalEstimate::Disabled; |
2139 | |
2140 | // Target defaults for enablement are used. |
2141 | if (Override == "default" ) |
2142 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2143 | } |
2144 | |
2145 | // The attribute string may omit the size suffix ('f'/'d'). |
2146 | std::string VTName = getReciprocalOpName(IsSqrt, VT); |
2147 | std::string VTNameNoSize = VTName; |
2148 | VTNameNoSize.pop_back(); |
2149 | static const char DisabledPrefix = '!'; |
2150 | |
2151 | for (StringRef RecipType : OverrideVector) { |
2152 | size_t RefPos; |
2153 | uint8_t RefSteps; |
2154 | if (parseRefinementStep(In: RecipType, Position&: RefPos, Value&: RefSteps)) |
2155 | RecipType = RecipType.substr(Start: 0, N: RefPos); |
2156 | |
2157 | // Ignore the disablement token for string matching. |
2158 | bool IsDisabled = RecipType[0] == DisabledPrefix; |
2159 | if (IsDisabled) |
2160 | RecipType = RecipType.substr(Start: 1); |
2161 | |
2162 | if (RecipType == VTName || RecipType == VTNameNoSize) |
2163 | return IsDisabled ? TargetLoweringBase::ReciprocalEstimate::Disabled |
2164 | : TargetLoweringBase::ReciprocalEstimate::Enabled; |
2165 | } |
2166 | |
2167 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2168 | } |
2169 | |
2170 | /// For the input attribute string, return the customized refinement step count |
2171 | /// for this operation on the specified data type. If the step count does not |
2172 | /// exist, return the ReciprocalEstimate enum value for unspecified. |
2173 | static int getOpRefinementSteps(bool IsSqrt, EVT VT, StringRef Override) { |
2174 | if (Override.empty()) |
2175 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2176 | |
2177 | SmallVector<StringRef, 4> OverrideVector; |
2178 | Override.split(A&: OverrideVector, Separator: ','); |
2179 | unsigned NumArgs = OverrideVector.size(); |
2180 | |
2181 | // Check if "all", "default", or "none" was specified. |
2182 | if (NumArgs == 1) { |
2183 | // Look for an optional setting of the number of refinement steps needed |
2184 | // for this type of reciprocal operation. |
2185 | size_t RefPos; |
2186 | uint8_t RefSteps; |
2187 | if (!parseRefinementStep(In: Override, Position&: RefPos, Value&: RefSteps)) |
2188 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2189 | |
2190 | // Split the string for further processing. |
2191 | Override = Override.substr(Start: 0, N: RefPos); |
2192 | assert(Override != "none" && |
2193 | "Disabled reciprocals, but specifed refinement steps?" ); |
2194 | |
2195 | // If this is a general override, return the specified number of steps. |
2196 | if (Override == "all" || Override == "default" ) |
2197 | return RefSteps; |
2198 | } |
2199 | |
2200 | // The attribute string may omit the size suffix ('f'/'d'). |
2201 | std::string VTName = getReciprocalOpName(IsSqrt, VT); |
2202 | std::string VTNameNoSize = VTName; |
2203 | VTNameNoSize.pop_back(); |
2204 | |
2205 | for (StringRef RecipType : OverrideVector) { |
2206 | size_t RefPos; |
2207 | uint8_t RefSteps; |
2208 | if (!parseRefinementStep(In: RecipType, Position&: RefPos, Value&: RefSteps)) |
2209 | continue; |
2210 | |
2211 | RecipType = RecipType.substr(Start: 0, N: RefPos); |
2212 | if (RecipType == VTName || RecipType == VTNameNoSize) |
2213 | return RefSteps; |
2214 | } |
2215 | |
2216 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2217 | } |
2218 | |
2219 | int TargetLoweringBase::getRecipEstimateSqrtEnabled(EVT VT, |
2220 | MachineFunction &MF) const { |
2221 | return getOpEnabled(IsSqrt: true, VT, Override: getRecipEstimateForFunc(MF)); |
2222 | } |
2223 | |
2224 | int TargetLoweringBase::getRecipEstimateDivEnabled(EVT VT, |
2225 | MachineFunction &MF) const { |
2226 | return getOpEnabled(IsSqrt: false, VT, Override: getRecipEstimateForFunc(MF)); |
2227 | } |
2228 | |
2229 | int TargetLoweringBase::getSqrtRefinementSteps(EVT VT, |
2230 | MachineFunction &MF) const { |
2231 | return getOpRefinementSteps(IsSqrt: true, VT, Override: getRecipEstimateForFunc(MF)); |
2232 | } |
2233 | |
2234 | int TargetLoweringBase::getDivRefinementSteps(EVT VT, |
2235 | MachineFunction &MF) const { |
2236 | return getOpRefinementSteps(IsSqrt: false, VT, Override: getRecipEstimateForFunc(MF)); |
2237 | } |
2238 | |
2239 | bool TargetLoweringBase::isLoadBitCastBeneficial( |
2240 | EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, |
2241 | const MachineMemOperand &MMO) const { |
2242 | // Single-element vectors are scalarized, so we should generally avoid having |
2243 | // any memory operations on such types, as they would get scalarized too. |
2244 | if (LoadVT.isFixedLengthVector() && BitcastVT.isFixedLengthVector() && |
2245 | BitcastVT.getVectorNumElements() == 1) |
2246 | return false; |
2247 | |
2248 | // Don't do if we could do an indexed load on the original type, but not on |
2249 | // the new one. |
2250 | if (!LoadVT.isSimple() || !BitcastVT.isSimple()) |
2251 | return true; |
2252 | |
2253 | MVT LoadMVT = LoadVT.getSimpleVT(); |
2254 | |
2255 | // Don't bother doing this if it's just going to be promoted again later, as |
2256 | // doing so might interfere with other combines. |
2257 | if (getOperationAction(Op: ISD::LOAD, VT: LoadMVT) == Promote && |
2258 | getTypeToPromoteTo(Op: ISD::LOAD, VT: LoadMVT) == BitcastVT.getSimpleVT()) |
2259 | return false; |
2260 | |
2261 | unsigned Fast = 0; |
2262 | return allowsMemoryAccess(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), VT: BitcastVT, |
2263 | MMO, Fast: &Fast) && |
2264 | Fast; |
2265 | } |
2266 | |
2267 | void TargetLoweringBase::finalizeLowering(MachineFunction &MF) const { |
2268 | MF.getRegInfo().freezeReservedRegs(); |
2269 | } |
2270 | |
2271 | MachineMemOperand::Flags TargetLoweringBase::getLoadMemOperandFlags( |
2272 | const LoadInst &LI, const DataLayout &DL, AssumptionCache *AC, |
2273 | const TargetLibraryInfo *LibInfo) const { |
2274 | MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad; |
2275 | if (LI.isVolatile()) |
2276 | Flags |= MachineMemOperand::MOVolatile; |
2277 | |
2278 | if (LI.hasMetadata(KindID: LLVMContext::MD_nontemporal)) |
2279 | Flags |= MachineMemOperand::MONonTemporal; |
2280 | |
2281 | if (LI.hasMetadata(KindID: LLVMContext::MD_invariant_load)) |
2282 | Flags |= MachineMemOperand::MOInvariant; |
2283 | |
2284 | if (isDereferenceableAndAlignedPointer(V: LI.getPointerOperand(), Ty: LI.getType(), |
2285 | Alignment: LI.getAlign(), DL, CtxI: &LI, AC, |
2286 | /*DT=*/nullptr, TLI: LibInfo)) |
2287 | Flags |= MachineMemOperand::MODereferenceable; |
2288 | |
2289 | Flags |= getTargetMMOFlags(I: LI); |
2290 | return Flags; |
2291 | } |
2292 | |
2293 | MachineMemOperand::Flags |
2294 | TargetLoweringBase::getStoreMemOperandFlags(const StoreInst &SI, |
2295 | const DataLayout &DL) const { |
2296 | MachineMemOperand::Flags Flags = MachineMemOperand::MOStore; |
2297 | |
2298 | if (SI.isVolatile()) |
2299 | Flags |= MachineMemOperand::MOVolatile; |
2300 | |
2301 | if (SI.hasMetadata(KindID: LLVMContext::MD_nontemporal)) |
2302 | Flags |= MachineMemOperand::MONonTemporal; |
2303 | |
2304 | // FIXME: Not preserving dereferenceable |
2305 | Flags |= getTargetMMOFlags(I: SI); |
2306 | return Flags; |
2307 | } |
2308 | |
2309 | MachineMemOperand::Flags |
2310 | TargetLoweringBase::getAtomicMemOperandFlags(const Instruction &AI, |
2311 | const DataLayout &DL) const { |
2312 | auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; |
2313 | |
2314 | if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Val: &AI)) { |
2315 | if (RMW->isVolatile()) |
2316 | Flags |= MachineMemOperand::MOVolatile; |
2317 | } else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: &AI)) { |
2318 | if (CmpX->isVolatile()) |
2319 | Flags |= MachineMemOperand::MOVolatile; |
2320 | } else |
2321 | llvm_unreachable("not an atomic instruction" ); |
2322 | |
2323 | // FIXME: Not preserving dereferenceable |
2324 | Flags |= getTargetMMOFlags(I: AI); |
2325 | return Flags; |
2326 | } |
2327 | |
2328 | Instruction *TargetLoweringBase::emitLeadingFence(IRBuilderBase &Builder, |
2329 | Instruction *Inst, |
2330 | AtomicOrdering Ord) const { |
2331 | if (isReleaseOrStronger(AO: Ord) && Inst->hasAtomicStore()) |
2332 | return Builder.CreateFence(Ordering: Ord); |
2333 | else |
2334 | return nullptr; |
2335 | } |
2336 | |
2337 | Instruction *TargetLoweringBase::emitTrailingFence(IRBuilderBase &Builder, |
2338 | Instruction *Inst, |
2339 | AtomicOrdering Ord) const { |
2340 | if (isAcquireOrStronger(AO: Ord)) |
2341 | return Builder.CreateFence(Ordering: Ord); |
2342 | else |
2343 | return nullptr; |
2344 | } |
2345 | |
2346 | //===----------------------------------------------------------------------===// |
2347 | // GlobalISel Hooks |
2348 | //===----------------------------------------------------------------------===// |
2349 | |
2350 | bool TargetLoweringBase::shouldLocalize(const MachineInstr &MI, |
2351 | const TargetTransformInfo *TTI) const { |
2352 | auto &MF = *MI.getMF(); |
2353 | auto &MRI = MF.getRegInfo(); |
2354 | // Assuming a spill and reload of a value has a cost of 1 instruction each, |
2355 | // this helper function computes the maximum number of uses we should consider |
2356 | // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We |
2357 | // break even in terms of code size when the original MI has 2 users vs |
2358 | // choosing to potentially spill. Any more than 2 users we we have a net code |
2359 | // size increase. This doesn't take into account register pressure though. |
2360 | auto maxUses = [](unsigned RematCost) { |
2361 | // A cost of 1 means remats are basically free. |
2362 | if (RematCost == 1) |
2363 | return std::numeric_limits<unsigned>::max(); |
2364 | if (RematCost == 2) |
2365 | return 2U; |
2366 | |
2367 | // Remat is too expensive, only sink if there's one user. |
2368 | if (RematCost > 2) |
2369 | return 1U; |
2370 | llvm_unreachable("Unexpected remat cost" ); |
2371 | }; |
2372 | |
2373 | switch (MI.getOpcode()) { |
2374 | default: |
2375 | return false; |
2376 | // Constants-like instructions should be close to their users. |
2377 | // We don't want long live-ranges for them. |
2378 | case TargetOpcode::G_CONSTANT: |
2379 | case TargetOpcode::G_FCONSTANT: |
2380 | case TargetOpcode::G_FRAME_INDEX: |
2381 | case TargetOpcode::G_INTTOPTR: |
2382 | return true; |
2383 | case TargetOpcode::G_GLOBAL_VALUE: { |
2384 | unsigned RematCost = TTI->getGISelRematGlobalCost(); |
2385 | Register Reg = MI.getOperand(i: 0).getReg(); |
2386 | unsigned MaxUses = maxUses(RematCost); |
2387 | if (MaxUses == UINT_MAX) |
2388 | return true; // Remats are "free" so always localize. |
2389 | return MRI.hasAtMostUserInstrs(Reg, MaxUsers: MaxUses); |
2390 | } |
2391 | } |
2392 | } |
2393 | |