1 | //===- X86LegalizerInfo.cpp --------------------------------------*- C++ -*-==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This file implements the targeting of the Machinelegalizer class for X86. |
10 | /// \todo This should be generated by TableGen. |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "X86LegalizerInfo.h" |
14 | #include "X86Subtarget.h" |
15 | #include "X86TargetMachine.h" |
16 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
17 | #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" |
18 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
19 | #include "llvm/CodeGen/MachineConstantPool.h" |
20 | #include "llvm/CodeGen/TargetOpcodes.h" |
21 | #include "llvm/CodeGen/ValueTypes.h" |
22 | #include "llvm/IR/DerivedTypes.h" |
23 | #include "llvm/IR/Type.h" |
24 | |
25 | using namespace llvm; |
26 | using namespace TargetOpcode; |
27 | using namespace LegalizeActions; |
28 | using namespace LegalityPredicates; |
29 | |
30 | X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, |
31 | const X86TargetMachine &TM) |
32 | : Subtarget(STI) { |
33 | |
34 | bool Is64Bit = Subtarget.is64Bit(); |
35 | bool HasCMOV = Subtarget.canUseCMOV(); |
36 | bool HasSSE1 = Subtarget.hasSSE1(); |
37 | bool HasSSE2 = Subtarget.hasSSE2(); |
38 | bool HasSSE41 = Subtarget.hasSSE41(); |
39 | bool HasAVX = Subtarget.hasAVX(); |
40 | bool HasAVX2 = Subtarget.hasAVX2(); |
41 | bool HasAVX512 = Subtarget.hasAVX512(); |
42 | bool HasVLX = Subtarget.hasVLX(); |
43 | bool HasDQI = Subtarget.hasAVX512() && Subtarget.hasDQI(); |
44 | bool HasBWI = Subtarget.hasAVX512() && Subtarget.hasBWI(); |
45 | bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87(); |
46 | bool HasPOPCNT = Subtarget.hasPOPCNT(); |
47 | bool HasLZCNT = Subtarget.hasLZCNT(); |
48 | bool HasBMI = Subtarget.hasBMI(); |
49 | |
50 | const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: TM.getPointerSizeInBits(AS: 0)); |
51 | const LLT s1 = LLT::scalar(SizeInBits: 1); |
52 | const LLT s8 = LLT::scalar(SizeInBits: 8); |
53 | const LLT s16 = LLT::scalar(SizeInBits: 16); |
54 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
55 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
56 | const LLT s80 = LLT::scalar(SizeInBits: 80); |
57 | const LLT s128 = LLT::scalar(SizeInBits: 128); |
58 | const LLT sMaxScalar = Subtarget.is64Bit() ? s64 : s32; |
59 | const LLT v2s32 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32); |
60 | const LLT v4s8 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 8); |
61 | |
62 | const LLT v16s8 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8); |
63 | const LLT v8s16 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16); |
64 | const LLT v4s32 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32); |
65 | const LLT v2s64 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64); |
66 | const LLT v2p0 = LLT::fixed_vector(NumElements: 2, ScalarTy: p0); |
67 | |
68 | const LLT v32s8 = LLT::fixed_vector(NumElements: 32, ScalarSizeInBits: 8); |
69 | const LLT v16s16 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 16); |
70 | const LLT v8s32 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 32); |
71 | const LLT v4s64 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 64); |
72 | const LLT v4p0 = LLT::fixed_vector(NumElements: 4, ScalarTy: p0); |
73 | |
74 | const LLT v64s8 = LLT::fixed_vector(NumElements: 64, ScalarSizeInBits: 8); |
75 | const LLT v32s16 = LLT::fixed_vector(NumElements: 32, ScalarSizeInBits: 16); |
76 | const LLT v16s32 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 32); |
77 | const LLT v8s64 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 64); |
78 | |
79 | const LLT s8MaxVector = HasAVX512 ? v64s8 : HasAVX ? v32s8 : v16s8; |
80 | const LLT s16MaxVector = HasAVX512 ? v32s16 : HasAVX ? v16s16 : v8s16; |
81 | const LLT s32MaxVector = HasAVX512 ? v16s32 : HasAVX ? v8s32 : v4s32; |
82 | const LLT s64MaxVector = HasAVX512 ? v8s64 : HasAVX ? v4s64 : v2s64; |
83 | |
84 | // todo: AVX512 bool vector predicate types |
85 | |
86 | // implicit/constants |
87 | // 32/64-bits needs support for s64/s128 to handle cases: |
88 | // s64 = EXTEND (G_IMPLICIT_DEF s32) -> s64 = G_IMPLICIT_DEF |
89 | // s128 = EXTEND (G_IMPLICIT_DEF s32/s64) -> s128 = G_IMPLICIT_DEF |
90 | getActionDefinitionsBuilder(Opcode: G_IMPLICIT_DEF) |
91 | .legalFor(Types: {p0, s1, s8, s16, s32, s64}) |
92 | .legalFor(Pred: Is64Bit, Types: {s128}); |
93 | |
94 | getActionDefinitionsBuilder(Opcode: G_CONSTANT) |
95 | .legalFor(Types: {p0, s8, s16, s32}) |
96 | .legalFor(Pred: Is64Bit, Types: {s64}) |
97 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
98 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar); |
99 | |
100 | getActionDefinitionsBuilder( |
101 | Opcodes: {G_LROUND, G_LLROUND, G_FCOS, G_FCOSH, G_FACOS, G_FSIN, G_FSINH, |
102 | G_FASIN, G_FTAN, G_FTANH, G_FATAN, G_FATAN2, G_FPOW, G_FEXP, |
103 | G_FEXP2, G_FEXP10, G_FLOG, G_FLOG2, G_FLOG10, G_FPOWI, G_FSINCOS}) |
104 | .libcall(); |
105 | |
106 | getActionDefinitionsBuilder(Opcode: G_FSQRT) |
107 | .legalFor(Pred: HasSSE1 || UseX87, Types: {s32}) |
108 | .legalFor(Pred: HasSSE2 || UseX87, Types: {s64}) |
109 | .legalFor(Pred: UseX87, Types: {s80}); |
110 | |
111 | // merge/unmerge |
112 | for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { |
113 | unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; |
114 | unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0; |
115 | getActionDefinitionsBuilder(Opcode: Op) |
116 | .widenScalarToNextPow2(TypeIdx: LitTyIdx, /*Min=*/MinSize: 8) |
117 | .widenScalarToNextPow2(TypeIdx: BigTyIdx, /*Min=*/MinSize: 16) |
118 | .minScalar(TypeIdx: LitTyIdx, Ty: s8) |
119 | .minScalar(TypeIdx: BigTyIdx, Ty: s32) |
120 | .legalIf(Predicate: [=](const LegalityQuery &Q) { |
121 | switch (Q.Types[BigTyIdx].getSizeInBits()) { |
122 | case 16: |
123 | case 32: |
124 | case 64: |
125 | case 128: |
126 | case 256: |
127 | case 512: |
128 | break; |
129 | default: |
130 | return false; |
131 | } |
132 | switch (Q.Types[LitTyIdx].getSizeInBits()) { |
133 | case 8: |
134 | case 16: |
135 | case 32: |
136 | case 64: |
137 | case 128: |
138 | case 256: |
139 | return true; |
140 | default: |
141 | return false; |
142 | } |
143 | }); |
144 | } |
145 | |
146 | // integer addition/subtraction |
147 | getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB}) |
148 | .legalFor(Types: {s8, s16, s32}) |
149 | .legalFor(Pred: Is64Bit, Types: {s64}) |
150 | .legalFor(Pred: HasSSE2, Types: {v16s8, v8s16, v4s32, v2s64}) |
151 | .legalFor(Pred: HasAVX2, Types: {v32s8, v16s16, v8s32, v4s64}) |
152 | .legalFor(Pred: HasAVX512, Types: {v16s32, v8s64}) |
153 | .legalFor(Pred: HasBWI, Types: {v64s8, v32s16}) |
154 | .clampMinNumElements(TypeIdx: 0, EltTy: s8, MinElements: 16) |
155 | .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8) |
156 | .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4) |
157 | .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: 2) |
158 | .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: HasBWI ? 64 : (HasAVX2 ? 32 : 16)) |
159 | .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasBWI ? 32 : (HasAVX2 ? 16 : 8)) |
160 | .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX2 ? 8 : 4)) |
161 | .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: HasAVX512 ? 8 : (HasAVX2 ? 4 : 2)) |
162 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
163 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
164 | .scalarize(TypeIdx: 0); |
165 | |
166 | getActionDefinitionsBuilder(Opcodes: {G_UADDE, G_UADDO, G_USUBE, G_USUBO}) |
167 | .legalFor(Types: {{s8, s1}, {s16, s1}, {s32, s1}}) |
168 | .legalFor(Pred: Is64Bit, Types: {{s64, s1}}) |
169 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
170 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
171 | .clampScalar(TypeIdx: 1, MinTy: s1, MaxTy: s1) |
172 | .scalarize(TypeIdx: 0); |
173 | |
174 | // integer multiply |
175 | getActionDefinitionsBuilder(Opcode: G_MUL) |
176 | .legalFor(Types: {s8, s16, s32}) |
177 | .legalFor(Pred: Is64Bit, Types: {s64}) |
178 | .legalFor(Pred: HasSSE2, Types: {v8s16}) |
179 | .legalFor(Pred: HasSSE41, Types: {v4s32}) |
180 | .legalFor(Pred: HasAVX2, Types: {v16s16, v8s32}) |
181 | .legalFor(Pred: HasAVX512, Types: {v16s32}) |
182 | .legalFor(Pred: HasDQI, Types: {v8s64}) |
183 | .legalFor(Pred: HasDQI && HasVLX, Types: {v2s64, v4s64}) |
184 | .legalFor(Pred: HasBWI, Types: {v32s16}) |
185 | .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8) |
186 | .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4) |
187 | .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: HasVLX ? 2 : 8) |
188 | .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasBWI ? 32 : (HasAVX2 ? 16 : 8)) |
189 | .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX2 ? 8 : 4)) |
190 | .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 8) |
191 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
192 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
193 | .scalarize(TypeIdx: 0); |
194 | |
195 | getActionDefinitionsBuilder(Opcodes: {G_SMULH, G_UMULH}) |
196 | .legalFor(Types: {s8, s16, s32}) |
197 | .legalFor(Pred: Is64Bit, Types: {s64}) |
198 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
199 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
200 | .scalarize(TypeIdx: 0); |
201 | |
202 | // integer divisions |
203 | getActionDefinitionsBuilder(Opcodes: {G_SDIV, G_SREM, G_UDIV, G_UREM}) |
204 | .legalFor(Types: {s8, s16, s32}) |
205 | .legalFor(Pred: Is64Bit, Types: {s64}) |
206 | .libcallFor(Types: {s64}) |
207 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar); |
208 | |
209 | // integer shifts |
210 | getActionDefinitionsBuilder(Opcodes: {G_SHL, G_LSHR, G_ASHR}) |
211 | .legalFor(Types: {{s8, s8}, {s16, s8}, {s32, s8}}) |
212 | .legalFor(Pred: Is64Bit, Types: {{s64, s8}}) |
213 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
214 | .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: s8); |
215 | |
216 | // integer logic |
217 | getActionDefinitionsBuilder(Opcodes: {G_AND, G_OR, G_XOR}) |
218 | .legalFor(Types: {s8, s16, s32}) |
219 | .legalFor(Pred: Is64Bit, Types: {s64}) |
220 | .legalFor(Pred: HasSSE2, Types: {v16s8, v8s16, v4s32, v2s64}) |
221 | .legalFor(Pred: HasAVX, Types: {v32s8, v16s16, v8s32, v4s64}) |
222 | .legalFor(Pred: HasAVX512, Types: {v64s8, v32s16, v16s32, v8s64}) |
223 | .clampMinNumElements(TypeIdx: 0, EltTy: s8, MinElements: 16) |
224 | .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8) |
225 | .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4) |
226 | .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: 2) |
227 | .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: HasAVX512 ? 64 : (HasAVX ? 32 : 16)) |
228 | .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasAVX512 ? 32 : (HasAVX ? 16 : 8)) |
229 | .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX ? 8 : 4)) |
230 | .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: HasAVX512 ? 8 : (HasAVX ? 4 : 2)) |
231 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
232 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
233 | .scalarize(TypeIdx: 0); |
234 | |
235 | // integer comparison |
236 | const std::initializer_list<LLT> IntTypes32 = {s8, s16, s32, p0}; |
237 | const std::initializer_list<LLT> IntTypes64 = {s8, s16, s32, s64, p0}; |
238 | |
239 | getActionDefinitionsBuilder(Opcode: G_ICMP) |
240 | .legalForCartesianProduct(Types0: {s8}, Types1: Is64Bit ? IntTypes64 : IntTypes32) |
241 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s8) |
242 | .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: sMaxScalar); |
243 | |
244 | // bswap |
245 | getActionDefinitionsBuilder(Opcode: G_BSWAP) |
246 | .legalFor(Types: {s32}) |
247 | .legalFor(Pred: Is64Bit, Types: {s64}) |
248 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
249 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sMaxScalar); |
250 | |
251 | // popcount |
252 | getActionDefinitionsBuilder(Opcode: G_CTPOP) |
253 | .legalFor(Pred: HasPOPCNT, Types: {{s16, s16}, {s32, s32}}) |
254 | .legalFor(Pred: HasPOPCNT && Is64Bit, Types: {{s64, s64}}) |
255 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16) |
256 | .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar) |
257 | .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1); |
258 | |
259 | // count leading zeros (LZCNT) |
260 | getActionDefinitionsBuilder(Opcode: G_CTLZ) |
261 | .legalFor(Pred: HasLZCNT, Types: {{s16, s16}, {s32, s32}}) |
262 | .legalFor(Pred: HasLZCNT && Is64Bit, Types: {{s64, s64}}) |
263 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16) |
264 | .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar) |
265 | .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1); |
266 | |
267 | // count trailing zeros |
268 | getActionDefinitionsBuilder(Opcode: G_CTTZ_ZERO_UNDEF) |
269 | .legalFor(Types: {{s16, s16}, {s32, s32}}) |
270 | .legalFor(Pred: Is64Bit, Types: {{s64, s64}}) |
271 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16) |
272 | .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar) |
273 | .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1); |
274 | |
275 | getActionDefinitionsBuilder(Opcode: G_CTTZ) |
276 | .legalFor(Pred: HasBMI, Types: {{s16, s16}, {s32, s32}}) |
277 | .legalFor(Pred: HasBMI && Is64Bit, Types: {{s64, s64}}) |
278 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16) |
279 | .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar) |
280 | .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1); |
281 | |
282 | // control flow |
283 | getActionDefinitionsBuilder(Opcode: G_PHI) |
284 | .legalFor(Types: {s8, s16, s32, p0}) |
285 | .legalFor(Pred: UseX87, Types: {s80}) |
286 | .legalFor(Pred: Is64Bit, Types: {s64}) |
287 | .legalFor(Pred: HasSSE1, Types: {v16s8, v8s16, v4s32, v2s64}) |
288 | .legalFor(Pred: HasAVX, Types: {v32s8, v16s16, v8s32, v4s64}) |
289 | .legalFor(Pred: HasAVX512, Types: {v64s8, v32s16, v16s32, v8s64}) |
290 | .clampMinNumElements(TypeIdx: 0, EltTy: s8, MinElements: 16) |
291 | .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8) |
292 | .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4) |
293 | .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: 2) |
294 | .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: HasAVX512 ? 64 : (HasAVX ? 32 : 16)) |
295 | .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasAVX512 ? 32 : (HasAVX ? 16 : 8)) |
296 | .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX ? 8 : 4)) |
297 | .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: HasAVX512 ? 8 : (HasAVX ? 4 : 2)) |
298 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
299 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
300 | .scalarize(TypeIdx: 0); |
301 | |
302 | getActionDefinitionsBuilder(Opcode: G_BRCOND).legalFor(Types: {s1}); |
303 | |
304 | // pointer handling |
305 | const std::initializer_list<LLT> PtrTypes32 = {s1, s8, s16, s32}; |
306 | const std::initializer_list<LLT> PtrTypes64 = {s1, s8, s16, s32, s64}; |
307 | |
308 | getActionDefinitionsBuilder(Opcode: G_PTRTOINT) |
309 | .legalForCartesianProduct(Types0: Is64Bit ? PtrTypes64 : PtrTypes32, Types1: {p0}) |
310 | .maxScalar(TypeIdx: 0, Ty: sMaxScalar) |
311 | .widenScalarToNextPow2(TypeIdx: 0, /*Min*/ MinSize: 8); |
312 | |
313 | getActionDefinitionsBuilder(Opcode: G_INTTOPTR).legalFor(Types: {{p0, sMaxScalar}}); |
314 | |
315 | getActionDefinitionsBuilder(Opcode: G_CONSTANT_POOL).legalFor(Types: {p0}); |
316 | |
317 | getActionDefinitionsBuilder(Opcode: G_PTR_ADD) |
318 | .legalFor(Types: {{p0, s32}}) |
319 | .legalFor(Pred: Is64Bit, Types: {{p0, s64}}) |
320 | .widenScalarToNextPow2(TypeIdx: 1, /*Min*/ MinSize: 32) |
321 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: sMaxScalar); |
322 | |
323 | getActionDefinitionsBuilder(Opcodes: {G_FRAME_INDEX, G_GLOBAL_VALUE}).legalFor(Types: {p0}); |
324 | |
325 | // load/store: add more corner cases |
326 | for (unsigned Op : {G_LOAD, G_STORE}) { |
327 | auto &Action = getActionDefinitionsBuilder(Opcode: Op); |
328 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s8, .Align: 1}, |
329 | {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: 1}, |
330 | {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 1}, |
331 | {.Type0: s80, .Type1: p0, .MemTy: s80, .Align: 1}, |
332 | {.Type0: p0, .Type1: p0, .MemTy: p0, .Align: 1}, |
333 | {.Type0: v4s8, .Type1: p0, .MemTy: v4s8, .Align: 1}}); |
334 | if (Is64Bit) |
335 | Action.legalForTypesWithMemDesc( |
336 | TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 1}, {.Type0: v2s32, .Type1: p0, .MemTy: v2s32, .Align: 1}}); |
337 | |
338 | if (HasSSE1) |
339 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v4s32, .Type1: p0, .MemTy: v4s32, .Align: 1}}); |
340 | if (HasSSE2) |
341 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v16s8, .Type1: p0, .MemTy: v16s8, .Align: 1}, |
342 | {.Type0: v8s16, .Type1: p0, .MemTy: v8s16, .Align: 1}, |
343 | {.Type0: v2s64, .Type1: p0, .MemTy: v2s64, .Align: 1}, |
344 | {.Type0: v2p0, .Type1: p0, .MemTy: v2p0, .Align: 1}}); |
345 | if (HasAVX) |
346 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v32s8, .Type1: p0, .MemTy: v32s8, .Align: 1}, |
347 | {.Type0: v16s16, .Type1: p0, .MemTy: v16s16, .Align: 1}, |
348 | {.Type0: v8s32, .Type1: p0, .MemTy: v8s32, .Align: 1}, |
349 | {.Type0: v4s64, .Type1: p0, .MemTy: v4s64, .Align: 1}, |
350 | {.Type0: v4p0, .Type1: p0, .MemTy: v4p0, .Align: 1}}); |
351 | if (HasAVX512) |
352 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v64s8, .Type1: p0, .MemTy: v64s8, .Align: 1}, |
353 | {.Type0: v32s16, .Type1: p0, .MemTy: v32s16, .Align: 1}, |
354 | {.Type0: v16s32, .Type1: p0, .MemTy: v16s32, .Align: 1}, |
355 | {.Type0: v8s64, .Type1: p0, .MemTy: v8s64, .Align: 1}}); |
356 | |
357 | // X86 supports extending loads but not stores for GPRs |
358 | if (Op == G_LOAD) { |
359 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s1, .Align: 1}, |
360 | {.Type0: s16, .Type1: p0, .MemTy: s8, .Align: 1}, |
361 | {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 1}, |
362 | {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 1}}); |
363 | if (Is64Bit) |
364 | Action.legalForTypesWithMemDesc( |
365 | TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 1}}); |
366 | } else { |
367 | Action.customIf(Predicate: [=](const LegalityQuery &Query) { |
368 | return Query.Types[0] != Query.MMODescrs[0].MemoryTy; |
369 | }); |
370 | } |
371 | Action.widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
372 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
373 | .scalarize(TypeIdx: 0); |
374 | } |
375 | |
376 | for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) { |
377 | auto &Action = getActionDefinitionsBuilder(Opcode: Op); |
378 | Action.legalForTypesWithMemDesc( |
379 | TypesAndMemDesc: {{.Type0: s16, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 1}}); |
380 | if (Is64Bit) |
381 | Action.legalForTypesWithMemDesc( |
382 | TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 1}}); |
383 | // TODO - SSE41/AVX2/AVX512F/AVX512BW vector extensions |
384 | } |
385 | |
386 | // sext, zext, and anyext |
387 | getActionDefinitionsBuilder(Opcode: G_ANYEXT) |
388 | .legalFor(Types: {s8, s16, s32, s128}) |
389 | .legalFor(Pred: Is64Bit, Types: {s64}) |
390 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
391 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
392 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 8) |
393 | .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: sMaxScalar) |
394 | .scalarize(TypeIdx: 0); |
395 | |
396 | getActionDefinitionsBuilder(Opcodes: {G_SEXT, G_ZEXT}) |
397 | .legalFor(Types: {s8, s16, s32}) |
398 | .legalFor(Pred: Is64Bit, Types: {s64}) |
399 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
400 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
401 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 8) |
402 | .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: sMaxScalar) |
403 | .scalarize(TypeIdx: 0); |
404 | |
405 | getActionDefinitionsBuilder(Opcode: G_SEXT_INREG).lower(); |
406 | |
407 | // fp constants |
408 | getActionDefinitionsBuilder(Opcode: G_FCONSTANT) |
409 | .legalFor(Types: {s32, s64}) |
410 | .legalFor(Pred: UseX87, Types: {s80}); |
411 | |
412 | // fp arithmetic |
413 | getActionDefinitionsBuilder(Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV}) |
414 | .legalFor(Types: {s32, s64}) |
415 | .legalFor(Pred: HasSSE1, Types: {v4s32}) |
416 | .legalFor(Pred: HasSSE2, Types: {v2s64}) |
417 | .legalFor(Pred: HasAVX, Types: {v8s32, v4s64}) |
418 | .legalFor(Pred: HasAVX512, Types: {v16s32, v8s64}) |
419 | .legalFor(Pred: UseX87, Types: {s80}); |
420 | |
421 | getActionDefinitionsBuilder(Opcode: G_FABS) |
422 | .legalFor(Pred: UseX87, Types: {s80}) |
423 | .legalFor(Pred: UseX87 && !Is64Bit, Types: {s64}) |
424 | .lower(); |
425 | |
426 | // fp comparison |
427 | getActionDefinitionsBuilder(Opcode: G_FCMP) |
428 | .legalFor(Pred: HasSSE1 || UseX87, Types: {s8, s32}) |
429 | .legalFor(Pred: HasSSE2 || UseX87, Types: {s8, s64}) |
430 | .legalFor(Pred: UseX87, Types: {s8, s80}) |
431 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s8) |
432 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32) |
433 | .widenScalarToNextPow2(TypeIdx: 1); |
434 | |
435 | // fp conversions |
436 | getActionDefinitionsBuilder(Opcode: G_FPEXT) |
437 | .legalFor(Pred: HasSSE2, Types: {{s64, s32}}) |
438 | .legalFor(Pred: HasAVX, Types: {{v4s64, v4s32}}) |
439 | .legalFor(Pred: HasAVX512, Types: {{v8s64, v8s32}}); |
440 | |
441 | getActionDefinitionsBuilder(Opcode: G_FPTRUNC) |
442 | .legalFor(Pred: HasSSE2, Types: {{s32, s64}}) |
443 | .legalFor(Pred: HasAVX, Types: {{v4s32, v4s64}}) |
444 | .legalFor(Pred: HasAVX512, Types: {{v8s32, v8s64}}); |
445 | |
446 | getActionDefinitionsBuilder(Opcode: G_SITOFP) |
447 | .legalFor(Pred: HasSSE1, Types: {{s32, s32}}) |
448 | .legalFor(Pred: HasSSE1 && Is64Bit, Types: {{s32, s64}}) |
449 | .legalFor(Pred: HasSSE2, Types: {{s64, s32}}) |
450 | .legalFor(Pred: HasSSE2 && Is64Bit, Types: {{s64, s64}}) |
451 | .clampScalar(TypeIdx: 1, MinTy: (UseX87 && !HasSSE1) ? s16 : s32, MaxTy: sMaxScalar) |
452 | .widenScalarToNextPow2(TypeIdx: 1) |
453 | .customForCartesianProduct(Pred: UseX87, Types0: {s32, s64, s80}, Types1: {s16, s32, s64}) |
454 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32) |
455 | .widenScalarToNextPow2(TypeIdx: 0); |
456 | |
457 | getActionDefinitionsBuilder(Opcode: G_FPTOSI) |
458 | .legalFor(Pred: HasSSE1, Types: {{s32, s32}}) |
459 | .legalFor(Pred: HasSSE1 && Is64Bit, Types: {{s64, s32}}) |
460 | .legalFor(Pred: HasSSE2, Types: {{s32, s64}}) |
461 | .legalFor(Pred: HasSSE2 && Is64Bit, Types: {{s64, s64}}) |
462 | .clampScalar(TypeIdx: 0, MinTy: (UseX87 && !HasSSE1) ? s16 : s32, MaxTy: sMaxScalar) |
463 | .widenScalarToNextPow2(TypeIdx: 0) |
464 | .customForCartesianProduct(Pred: UseX87, Types0: {s16, s32, s64}, Types1: {s32, s64, s80}) |
465 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32) |
466 | .widenScalarToNextPow2(TypeIdx: 1); |
467 | |
468 | // For G_UITOFP and G_FPTOUI without AVX512, we have to custom legalize types |
469 | // <= s32 manually. Otherwise, in custom handler there is no way to |
470 | // understand whether s32 is an original type and we need to promote it to |
471 | // s64 or s32 is obtained after widening and we shouldn't widen it to s64. |
472 | // |
473 | // For AVX512 we simply widen types as there is direct mapping from opcodes |
474 | // to asm instructions. |
475 | getActionDefinitionsBuilder(Opcode: G_UITOFP) |
476 | .legalFor(Pred: HasAVX512, Types: {{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}}) |
477 | .customIf(Predicate: [=](const LegalityQuery &Query) { |
478 | return !HasAVX512 && |
479 | ((HasSSE1 && typeIs(TypeIdx: 0, TypesInit: s32)(Query)) || |
480 | (HasSSE2 && typeIs(TypeIdx: 0, TypesInit: s64)(Query))) && |
481 | scalarNarrowerThan(TypeIdx: 1, Size: Is64Bit ? 64 : 32)(Query); |
482 | }) |
483 | .lowerIf(Predicate: [=](const LegalityQuery &Query) { |
484 | // Lower conversions from s64 |
485 | return !HasAVX512 && |
486 | ((HasSSE1 && typeIs(TypeIdx: 0, TypesInit: s32)(Query)) || |
487 | (HasSSE2 && typeIs(TypeIdx: 0, TypesInit: s64)(Query))) && |
488 | (Is64Bit && typeIs(TypeIdx: 1, TypesInit: s64)(Query)); |
489 | }) |
490 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32) |
491 | .widenScalarToNextPow2(TypeIdx: 0) |
492 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: sMaxScalar) |
493 | .widenScalarToNextPow2(TypeIdx: 1); |
494 | |
495 | getActionDefinitionsBuilder(Opcode: G_FPTOUI) |
496 | .legalFor(Pred: HasAVX512, Types: {{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}}) |
497 | .customIf(Predicate: [=](const LegalityQuery &Query) { |
498 | return !HasAVX512 && |
499 | ((HasSSE1 && typeIs(TypeIdx: 1, TypesInit: s32)(Query)) || |
500 | (HasSSE2 && typeIs(TypeIdx: 1, TypesInit: s64)(Query))) && |
501 | scalarNarrowerThan(TypeIdx: 0, Size: Is64Bit ? 64 : 32)(Query); |
502 | }) |
503 | // TODO: replace with customized legalization using |
504 | // specifics of cvttsd2si. The selection of this node requires |
505 | // a vector type. Either G_SCALAR_TO_VECTOR is needed or more advanced |
506 | // support of G_BUILD_VECTOR/G_INSERT_VECTOR_ELT is required beforehand. |
507 | .lowerIf(Predicate: [=](const LegalityQuery &Query) { |
508 | return !HasAVX512 && |
509 | ((HasSSE1 && typeIs(TypeIdx: 1, TypesInit: s32)(Query)) || |
510 | (HasSSE2 && typeIs(TypeIdx: 1, TypesInit: s64)(Query))) && |
511 | (Is64Bit && typeIs(TypeIdx: 0, TypesInit: s64)(Query)); |
512 | }) |
513 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sMaxScalar) |
514 | .widenScalarToNextPow2(TypeIdx: 0) |
515 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32) |
516 | .widenScalarToNextPow2(TypeIdx: 1); |
517 | |
518 | // vector ops |
519 | getActionDefinitionsBuilder(Opcode: G_BUILD_VECTOR) |
520 | .customIf(Predicate: [=](const LegalityQuery &Query) { |
521 | return (HasSSE1 && typeInSet(TypeIdx: 0, TypesInit: {v4s32})(Query)) || |
522 | (HasSSE2 && typeInSet(TypeIdx: 0, TypesInit: {v2s64, v8s16, v16s8})(Query)) || |
523 | (HasAVX && typeInSet(TypeIdx: 0, TypesInit: {v4s64, v8s32, v16s16, v32s8})(Query)) || |
524 | (HasAVX512 && typeInSet(TypeIdx: 0, TypesInit: {v8s64, v16s32, v32s16, v64s8})); |
525 | }) |
526 | .clampNumElements(TypeIdx: 0, MinTy: v16s8, MaxTy: s8MaxVector) |
527 | .clampNumElements(TypeIdx: 0, MinTy: v8s16, MaxTy: s16MaxVector) |
528 | .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: s32MaxVector) |
529 | .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: s64MaxVector) |
530 | .moreElementsToNextPow2(TypeIdx: 0); |
531 | |
532 | getActionDefinitionsBuilder(Opcodes: {G_EXTRACT, G_INSERT}) |
533 | .legalIf(Predicate: [=](const LegalityQuery &Query) { |
534 | unsigned SubIdx = Query.Opcode == G_EXTRACT ? 0 : 1; |
535 | unsigned FullIdx = Query.Opcode == G_EXTRACT ? 1 : 0; |
536 | return (HasAVX && typePairInSet(TypeIdx0: SubIdx, TypeIdx1: FullIdx, |
537 | TypesInit: {{v16s8, v32s8}, |
538 | {v8s16, v16s16}, |
539 | {v4s32, v8s32}, |
540 | {v2s64, v4s64}})(Query)) || |
541 | (HasAVX512 && typePairInSet(TypeIdx0: SubIdx, TypeIdx1: FullIdx, |
542 | TypesInit: {{v16s8, v64s8}, |
543 | {v32s8, v64s8}, |
544 | {v8s16, v32s16}, |
545 | {v16s16, v32s16}, |
546 | {v4s32, v16s32}, |
547 | {v8s32, v16s32}, |
548 | {v2s64, v8s64}, |
549 | {v4s64, v8s64}})(Query)); |
550 | }); |
551 | |
552 | // todo: only permit dst types up to max legal vector register size? |
553 | getActionDefinitionsBuilder(Opcode: G_CONCAT_VECTORS) |
554 | .legalFor( |
555 | Pred: HasSSE1, |
556 | Types: {{v32s8, v16s8}, {v16s16, v8s16}, {v8s32, v4s32}, {v4s64, v2s64}}) |
557 | .legalFor(Pred: HasAVX, Types: {{v64s8, v16s8}, |
558 | {v64s8, v32s8}, |
559 | {v32s16, v8s16}, |
560 | {v32s16, v16s16}, |
561 | {v16s32, v4s32}, |
562 | {v16s32, v8s32}, |
563 | {v8s64, v2s64}, |
564 | {v8s64, v4s64}}); |
565 | |
566 | // todo: vectors and address spaces |
567 | getActionDefinitionsBuilder(Opcode: G_SELECT) |
568 | .legalFor(Types: {{s8, s32}, {s16, s32}, {s32, s32}, {s64, s32}, {p0, s32}}) |
569 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
570 | .clampScalar(TypeIdx: 0, MinTy: HasCMOV ? s16 : s8, MaxTy: sMaxScalar) |
571 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32); |
572 | |
573 | // memory intrinsics |
574 | getActionDefinitionsBuilder(Opcodes: {G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); |
575 | |
576 | getActionDefinitionsBuilder(Opcodes: {G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE}) |
577 | .lower(); |
578 | |
579 | // fp intrinsics |
580 | getActionDefinitionsBuilder(Opcode: G_INTRINSIC_ROUNDEVEN) |
581 | .scalarize(TypeIdx: 0) |
582 | .minScalar(TypeIdx: 0, Ty: LLT::scalar(SizeInBits: 32)) |
583 | .libcall(); |
584 | |
585 | getActionDefinitionsBuilder(Opcodes: {G_FREEZE, G_CONSTANT_FOLD_BARRIER}) |
586 | .legalFor(Types: {s8, s16, s32, s64, p0}) |
587 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
588 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar); |
589 | |
590 | getLegacyLegalizerInfo().computeTables(); |
591 | verify(MII: *STI.getInstrInfo()); |
592 | } |
593 | |
594 | bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, |
595 | LostDebugLocObserver &LocObserver) const { |
596 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
597 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
598 | switch (MI.getOpcode()) { |
599 | default: |
600 | // No idea what to do. |
601 | return false; |
602 | case TargetOpcode::G_BUILD_VECTOR: |
603 | return legalizeBuildVector(MI, MRI, Helper); |
604 | case TargetOpcode::G_FPTOUI: |
605 | return legalizeFPTOUI(MI, MRI, Helper); |
606 | case TargetOpcode::G_UITOFP: |
607 | return legalizeUITOFP(MI, MRI, Helper); |
608 | case TargetOpcode::G_STORE: |
609 | return legalizeNarrowingStore(MI, MRI, Helper); |
610 | case TargetOpcode::G_SITOFP: |
611 | return legalizeSITOFP(MI, MRI, Helper); |
612 | case TargetOpcode::G_FPTOSI: |
613 | return legalizeFPTOSI(MI, MRI, Helper); |
614 | } |
615 | llvm_unreachable("expected switch to return" ); |
616 | } |
617 | |
618 | bool X86LegalizerInfo::legalizeSITOFP(MachineInstr &MI, |
619 | MachineRegisterInfo &MRI, |
620 | LegalizerHelper &Helper) const { |
621 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
622 | MachineFunction &MF = *MI.getMF(); |
623 | auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); |
624 | |
625 | assert((SrcTy.getSizeInBits() == 16 || SrcTy.getSizeInBits() == 32 || |
626 | SrcTy.getSizeInBits() == 64) && |
627 | "Unexpected source type for SITOFP in X87 mode." ); |
628 | |
629 | TypeSize MemSize = SrcTy.getSizeInBytes(); |
630 | MachinePointerInfo PtrInfo; |
631 | Align Alignmt = Helper.getStackTemporaryAlignment(Type: SrcTy); |
632 | auto SlotPointer = Helper.createStackTemporary(Bytes: MemSize, Alignment: Alignmt, PtrInfo); |
633 | MachineMemOperand *StoreMMO = MF.getMachineMemOperand( |
634 | PtrInfo, F: MachineMemOperand::MOStore, Size: MemSize, BaseAlignment: Align(MemSize)); |
635 | |
636 | // Store the integer value on the FPU stack. |
637 | MIRBuilder.buildStore(Val: Src, Addr: SlotPointer, MMO&: *StoreMMO); |
638 | |
639 | MachineMemOperand *LoadMMO = MF.getMachineMemOperand( |
640 | PtrInfo, F: MachineMemOperand::MOLoad, Size: MemSize, BaseAlignment: Align(MemSize)); |
641 | MIRBuilder.buildInstr(Opcode: X86::G_FILD) |
642 | .addDef(RegNo: Dst) |
643 | .addUse(RegNo: SlotPointer.getReg(Idx: 0)) |
644 | .addMemOperand(MMO: LoadMMO); |
645 | |
646 | MI.eraseFromParent(); |
647 | return true; |
648 | } |
649 | |
650 | bool X86LegalizerInfo::legalizeFPTOSI(MachineInstr &MI, |
651 | MachineRegisterInfo &MRI, |
652 | LegalizerHelper &Helper) const { |
653 | MachineFunction &MF = *MI.getMF(); |
654 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
655 | auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); |
656 | |
657 | TypeSize MemSize = DstTy.getSizeInBytes(); |
658 | MachinePointerInfo PtrInfo; |
659 | Align Alignmt = Helper.getStackTemporaryAlignment(Type: DstTy); |
660 | auto SlotPointer = Helper.createStackTemporary(Bytes: MemSize, Alignment: Alignmt, PtrInfo); |
661 | MachineMemOperand *StoreMMO = MF.getMachineMemOperand( |
662 | PtrInfo, F: MachineMemOperand::MOStore, Size: MemSize, BaseAlignment: Align(MemSize)); |
663 | |
664 | MIRBuilder.buildInstr(Opcode: X86::G_FIST) |
665 | .addUse(RegNo: Src) |
666 | .addUse(RegNo: SlotPointer.getReg(Idx: 0)) |
667 | .addMemOperand(MMO: StoreMMO); |
668 | |
669 | MIRBuilder.buildLoad(Res: Dst, Addr: SlotPointer, PtrInfo, Alignment: Align(MemSize)); |
670 | MI.eraseFromParent(); |
671 | return true; |
672 | } |
673 | |
674 | bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI, |
675 | MachineRegisterInfo &MRI, |
676 | LegalizerHelper &Helper) const { |
677 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
678 | const auto &BuildVector = cast<GBuildVector>(Val&: MI); |
679 | Register Dst = BuildVector.getReg(Idx: 0); |
680 | LLT DstTy = MRI.getType(Reg: Dst); |
681 | MachineFunction &MF = MIRBuilder.getMF(); |
682 | LLVMContext &Ctx = MF.getFunction().getContext(); |
683 | uint64_t DstTySize = DstTy.getScalarSizeInBits(); |
684 | |
685 | SmallVector<Constant *, 4> CstIdxs; |
686 | for (unsigned i = 0; i < BuildVector.getNumSources(); ++i) { |
687 | Register Source = BuildVector.getSourceReg(I: i); |
688 | |
689 | auto ValueAndReg = getIConstantVRegValWithLookThrough(VReg: Source, MRI); |
690 | if (ValueAndReg) { |
691 | CstIdxs.emplace_back(Args: ConstantInt::get(Context&: Ctx, V: ValueAndReg->Value)); |
692 | continue; |
693 | } |
694 | |
695 | auto FPValueAndReg = getFConstantVRegValWithLookThrough(VReg: Source, MRI); |
696 | if (FPValueAndReg) { |
697 | CstIdxs.emplace_back(Args: ConstantFP::get(Context&: Ctx, V: FPValueAndReg->Value)); |
698 | continue; |
699 | } |
700 | |
701 | if (getOpcodeDef<GImplicitDef>(Reg: Source, MRI)) { |
702 | CstIdxs.emplace_back(Args: UndefValue::get(T: Type::getIntNTy(C&: Ctx, N: DstTySize))); |
703 | continue; |
704 | } |
705 | return false; |
706 | } |
707 | |
708 | Constant *ConstVal = ConstantVector::get(V: CstIdxs); |
709 | |
710 | const DataLayout &DL = MIRBuilder.getDataLayout(); |
711 | unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace(); |
712 | Align Alignment(DL.getABITypeAlign(Ty: ConstVal->getType())); |
713 | auto Addr = MIRBuilder.buildConstantPool( |
714 | Res: LLT::pointer(AddressSpace: AddrSpace, SizeInBits: DL.getPointerSizeInBits(AS: AddrSpace)), |
715 | Idx: MF.getConstantPool()->getConstantPoolIndex(C: ConstVal, Alignment)); |
716 | MachineMemOperand *MMO = |
717 | MF.getMachineMemOperand(PtrInfo: MachinePointerInfo::getConstantPool(MF), |
718 | f: MachineMemOperand::MOLoad, MemTy: DstTy, base_alignment: Alignment); |
719 | |
720 | MIRBuilder.buildLoad(Res: Dst, Addr, MMO&: *MMO); |
721 | MI.eraseFromParent(); |
722 | return true; |
723 | } |
724 | |
725 | bool X86LegalizerInfo::legalizeFPTOUI(MachineInstr &MI, |
726 | MachineRegisterInfo &MRI, |
727 | LegalizerHelper &Helper) const { |
728 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
729 | auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); |
730 | unsigned DstSizeInBits = DstTy.getScalarSizeInBits(); |
731 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
732 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
733 | |
734 | // Simply reuse FPTOSI when it is possible to widen the type |
735 | if (DstSizeInBits <= 32) { |
736 | auto Casted = MIRBuilder.buildFPTOSI(Dst: DstTy == s32 ? s64 : s32, Src0: Src); |
737 | MIRBuilder.buildTrunc(Res: Dst, Op: Casted); |
738 | MI.eraseFromParent(); |
739 | return true; |
740 | } |
741 | |
742 | return false; |
743 | } |
744 | |
745 | bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI, |
746 | MachineRegisterInfo &MRI, |
747 | LegalizerHelper &Helper) const { |
748 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
749 | auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); |
750 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
751 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
752 | |
753 | // Simply reuse SITOFP when it is possible to widen the type |
754 | if (SrcTy.getSizeInBits() <= 32) { |
755 | auto Ext = MIRBuilder.buildZExt(Res: SrcTy == s32 ? s64 : s32, Op: Src); |
756 | MIRBuilder.buildSITOFP(Dst, Src0: Ext); |
757 | MI.eraseFromParent(); |
758 | return true; |
759 | } |
760 | |
761 | return false; |
762 | } |
763 | |
764 | bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI, |
765 | MachineRegisterInfo &MRI, |
766 | LegalizerHelper &Helper) const { |
767 | auto &Store = cast<GStore>(Val&: MI); |
768 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
769 | MachineMemOperand &MMO = **Store.memoperands_begin(); |
770 | MachineFunction &MF = MIRBuilder.getMF(); |
771 | LLT ValTy = MRI.getType(Reg: Store.getValueReg()); |
772 | auto *NewMMO = MF.getMachineMemOperand(MMO: &MMO, PtrInfo: MMO.getPointerInfo(), Ty: ValTy); |
773 | |
774 | Helper.Observer.changingInstr(MI&: Store); |
775 | Store.setMemRefs(MF, MemRefs: {NewMMO}); |
776 | Helper.Observer.changedInstr(MI&: Store); |
777 | return true; |
778 | } |
779 | |
780 | bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, |
781 | MachineInstr &MI) const { |
782 | return true; |
783 | } |
784 | |