1 | //===- X86LegalizerInfo.cpp --------------------------------------*- C++ -*-==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This file implements the targeting of the Machinelegalizer class for X86. |
10 | /// \todo This should be generated by TableGen. |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "X86LegalizerInfo.h" |
14 | #include "X86Subtarget.h" |
15 | #include "X86TargetMachine.h" |
16 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
17 | #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" |
18 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
19 | #include "llvm/CodeGen/MachineConstantPool.h" |
20 | #include "llvm/CodeGen/TargetOpcodes.h" |
21 | #include "llvm/CodeGen/ValueTypes.h" |
22 | #include "llvm/IR/DerivedTypes.h" |
23 | #include "llvm/IR/Type.h" |
24 | |
25 | using namespace llvm; |
26 | using namespace TargetOpcode; |
27 | using namespace LegalizeActions; |
28 | using namespace LegalityPredicates; |
29 | |
30 | X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, |
31 | const X86TargetMachine &TM) |
32 | : Subtarget(STI) { |
33 | |
34 | bool Is64Bit = Subtarget.is64Bit(); |
35 | bool HasCMOV = Subtarget.canUseCMOV(); |
36 | bool HasSSE1 = Subtarget.hasSSE1(); |
37 | bool HasSSE2 = Subtarget.hasSSE2(); |
38 | bool HasSSE41 = Subtarget.hasSSE41(); |
39 | bool HasAVX = Subtarget.hasAVX(); |
40 | bool HasAVX2 = Subtarget.hasAVX2(); |
41 | bool HasAVX512 = Subtarget.hasAVX512(); |
42 | bool HasVLX = Subtarget.hasVLX(); |
43 | bool HasDQI = Subtarget.hasAVX512() && Subtarget.hasDQI(); |
44 | bool HasBWI = Subtarget.hasAVX512() && Subtarget.hasBWI(); |
45 | bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87(); |
46 | |
47 | const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: TM.getPointerSizeInBits(AS: 0)); |
48 | const LLT s1 = LLT::scalar(SizeInBits: 1); |
49 | const LLT s8 = LLT::scalar(SizeInBits: 8); |
50 | const LLT s16 = LLT::scalar(SizeInBits: 16); |
51 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
52 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
53 | const LLT s80 = LLT::scalar(SizeInBits: 80); |
54 | const LLT s128 = LLT::scalar(SizeInBits: 128); |
55 | const LLT sMaxScalar = Subtarget.is64Bit() ? s64 : s32; |
56 | const LLT v2s32 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32); |
57 | const LLT v4s8 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 8); |
58 | |
59 | |
60 | const LLT v16s8 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8); |
61 | const LLT v8s16 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16); |
62 | const LLT v4s32 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32); |
63 | const LLT v2s64 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64); |
64 | const LLT v2p0 = LLT::fixed_vector(NumElements: 2, ScalarTy: p0); |
65 | |
66 | const LLT v32s8 = LLT::fixed_vector(NumElements: 32, ScalarSizeInBits: 8); |
67 | const LLT v16s16 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 16); |
68 | const LLT v8s32 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 32); |
69 | const LLT v4s64 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 64); |
70 | const LLT v4p0 = LLT::fixed_vector(NumElements: 4, ScalarTy: p0); |
71 | |
72 | const LLT v64s8 = LLT::fixed_vector(NumElements: 64, ScalarSizeInBits: 8); |
73 | const LLT v32s16 = LLT::fixed_vector(NumElements: 32, ScalarSizeInBits: 16); |
74 | const LLT v16s32 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 32); |
75 | const LLT v8s64 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 64); |
76 | |
77 | const LLT s8MaxVector = HasAVX512 ? v64s8 : HasAVX ? v32s8 : v16s8; |
78 | const LLT s16MaxVector = HasAVX512 ? v32s16 : HasAVX ? v16s16 : v8s16; |
79 | const LLT s32MaxVector = HasAVX512 ? v16s32 : HasAVX ? v8s32 : v4s32; |
80 | const LLT s64MaxVector = HasAVX512 ? v8s64 : HasAVX ? v4s64 : v2s64; |
81 | |
82 | // todo: AVX512 bool vector predicate types |
83 | |
84 | // implicit/constants |
85 | getActionDefinitionsBuilder(Opcode: G_IMPLICIT_DEF) |
86 | .legalIf(Predicate: [=](const LegalityQuery &Query) -> bool { |
87 | // 32/64-bits needs support for s64/s128 to handle cases: |
88 | // s64 = EXTEND (G_IMPLICIT_DEF s32) -> s64 = G_IMPLICIT_DEF |
89 | // s128 = EXTEND (G_IMPLICIT_DEF s32/s64) -> s128 = G_IMPLICIT_DEF |
90 | return typeInSet(TypeIdx: 0, TypesInit: {p0, s1, s8, s16, s32, s64})(Query) || |
91 | (Is64Bit && typeInSet(TypeIdx: 0, TypesInit: {s128})(Query)); |
92 | }); |
93 | |
94 | getActionDefinitionsBuilder(Opcode: G_CONSTANT) |
95 | .legalIf(Predicate: [=](const LegalityQuery &Query) -> bool { |
96 | return typeInSet(TypeIdx: 0, TypesInit: {p0, s8, s16, s32})(Query) || |
97 | (Is64Bit && typeInSet(TypeIdx: 0, TypesInit: {s64})(Query)); |
98 | }) |
99 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
100 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar); |
101 | |
102 | // merge/unmerge |
103 | for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { |
104 | unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; |
105 | unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0; |
106 | getActionDefinitionsBuilder(Opcode: Op) |
107 | .widenScalarToNextPow2(TypeIdx: LitTyIdx, /*Min=*/MinSize: 8) |
108 | .widenScalarToNextPow2(TypeIdx: BigTyIdx, /*Min=*/MinSize: 16) |
109 | .minScalar(TypeIdx: LitTyIdx, Ty: s8) |
110 | .minScalar(TypeIdx: BigTyIdx, Ty: s32) |
111 | .legalIf(Predicate: [=](const LegalityQuery &Q) { |
112 | switch (Q.Types[BigTyIdx].getSizeInBits()) { |
113 | case 16: |
114 | case 32: |
115 | case 64: |
116 | case 128: |
117 | case 256: |
118 | case 512: |
119 | break; |
120 | default: |
121 | return false; |
122 | } |
123 | switch (Q.Types[LitTyIdx].getSizeInBits()) { |
124 | case 8: |
125 | case 16: |
126 | case 32: |
127 | case 64: |
128 | case 128: |
129 | case 256: |
130 | return true; |
131 | default: |
132 | return false; |
133 | } |
134 | }); |
135 | } |
136 | |
137 | // integer addition/subtraction |
138 | getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB}) |
139 | .legalIf(Predicate: [=](const LegalityQuery &Query) -> bool { |
140 | if (typeInSet(TypeIdx: 0, TypesInit: {s8, s16, s32})(Query)) |
141 | return true; |
142 | if (Is64Bit && typeInSet(TypeIdx: 0, TypesInit: {s64})(Query)) |
143 | return true; |
144 | if (HasSSE2 && typeInSet(TypeIdx: 0, TypesInit: {v16s8, v8s16, v4s32, v2s64})(Query)) |
145 | return true; |
146 | if (HasAVX2 && typeInSet(TypeIdx: 0, TypesInit: {v32s8, v16s16, v8s32, v4s64})(Query)) |
147 | return true; |
148 | if (HasAVX512 && typeInSet(TypeIdx: 0, TypesInit: {v16s32, v8s64})(Query)) |
149 | return true; |
150 | if (HasBWI && typeInSet(TypeIdx: 0, TypesInit: {v64s8, v32s16})(Query)) |
151 | return true; |
152 | return false; |
153 | }) |
154 | .clampMinNumElements(TypeIdx: 0, EltTy: s8, MinElements: 16) |
155 | .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8) |
156 | .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4) |
157 | .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: 2) |
158 | .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: HasBWI ? 64 : (HasAVX2 ? 32 : 16)) |
159 | .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasBWI ? 32 : (HasAVX2 ? 16 : 8)) |
160 | .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX2 ? 8 : 4)) |
161 | .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: HasAVX512 ? 8 : (HasAVX2 ? 4 : 2)) |
162 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
163 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
164 | .scalarize(TypeIdx: 0); |
165 | |
166 | getActionDefinitionsBuilder(Opcodes: {G_UADDE, G_UADDO, G_USUBE, G_USUBO}) |
167 | .legalIf(Predicate: [=](const LegalityQuery &Query) -> bool { |
168 | return typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{s8, s1}, {s16, s1}, {s32, s1}})(Query) || |
169 | (Is64Bit && typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{s64, s1}})(Query)); |
170 | }) |
171 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
172 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
173 | .clampScalar(TypeIdx: 1, MinTy: s1, MaxTy: s1) |
174 | .scalarize(TypeIdx: 0); |
175 | |
176 | // integer multiply |
177 | getActionDefinitionsBuilder(Opcode: G_MUL) |
178 | .legalIf(Predicate: [=](const LegalityQuery &Query) -> bool { |
179 | if (typeInSet(TypeIdx: 0, TypesInit: {s8, s16, s32})(Query)) |
180 | return true; |
181 | if (Is64Bit && typeInSet(TypeIdx: 0, TypesInit: {s64})(Query)) |
182 | return true; |
183 | if (HasSSE2 && typeInSet(TypeIdx: 0, TypesInit: {v8s16})(Query)) |
184 | return true; |
185 | if (HasSSE41 && typeInSet(TypeIdx: 0, TypesInit: {v4s32})(Query)) |
186 | return true; |
187 | if (HasAVX2 && typeInSet(TypeIdx: 0, TypesInit: {v16s16, v8s32})(Query)) |
188 | return true; |
189 | if (HasAVX512 && typeInSet(TypeIdx: 0, TypesInit: {v16s32})(Query)) |
190 | return true; |
191 | if (HasDQI && typeInSet(TypeIdx: 0, TypesInit: {v8s64})(Query)) |
192 | return true; |
193 | if (HasDQI && HasVLX && typeInSet(TypeIdx: 0, TypesInit: {v2s64, v4s64})(Query)) |
194 | return true; |
195 | if (HasBWI && typeInSet(TypeIdx: 0, TypesInit: {v32s16})(Query)) |
196 | return true; |
197 | return false; |
198 | }) |
199 | .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8) |
200 | .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4) |
201 | .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: HasVLX ? 2 : 8) |
202 | .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasBWI ? 32 : (HasAVX2 ? 16 : 8)) |
203 | .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX2 ? 8 : 4)) |
204 | .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 8) |
205 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
206 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
207 | .scalarize(TypeIdx: 0); |
208 | |
209 | getActionDefinitionsBuilder(Opcodes: {G_SMULH, G_UMULH}) |
210 | .legalIf(Predicate: [=](const LegalityQuery &Query) -> bool { |
211 | return typeInSet(TypeIdx: 0, TypesInit: {s8, s16, s32})(Query) || |
212 | (Is64Bit && typeInSet(TypeIdx: 0, TypesInit: {s64})(Query)); |
213 | }) |
214 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
215 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
216 | .scalarize(TypeIdx: 0); |
217 | |
218 | // integer divisions |
219 | getActionDefinitionsBuilder(Opcodes: {G_SDIV, G_SREM, G_UDIV, G_UREM}) |
220 | .legalIf(Predicate: [=](const LegalityQuery &Query) -> bool { |
221 | return typeInSet(TypeIdx: 0, TypesInit: {s8, s16, s32})(Query) || |
222 | (Is64Bit && typeInSet(TypeIdx: 0, TypesInit: {s64})(Query)); |
223 | }) |
224 | .libcallFor(Types: {s64}) |
225 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar); |
226 | |
227 | // integer shifts |
228 | getActionDefinitionsBuilder(Opcodes: {G_SHL, G_LSHR, G_ASHR}) |
229 | .legalIf(Predicate: [=](const LegalityQuery &Query) -> bool { |
230 | return typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{s8, s8}, {s16, s8}, {s32, s8}})(Query) || |
231 | (Is64Bit && typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{s64, s8}})(Query)); |
232 | }) |
233 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
234 | .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: s8); |
235 | |
236 | // integer logic |
237 | getActionDefinitionsBuilder(Opcodes: {G_AND, G_OR, G_XOR}) |
238 | .legalIf(Predicate: [=](const LegalityQuery &Query) -> bool { |
239 | if (typeInSet(TypeIdx: 0, TypesInit: {s8, s16, s32})(Query)) |
240 | return true; |
241 | if (Is64Bit && typeInSet(TypeIdx: 0, TypesInit: {s64})(Query)) |
242 | return true; |
243 | if (HasSSE2 && typeInSet(TypeIdx: 0, TypesInit: {v16s8, v8s16, v4s32, v2s64})(Query)) |
244 | return true; |
245 | if (HasAVX && typeInSet(TypeIdx: 0, TypesInit: {v32s8, v16s16, v8s32, v4s64})(Query)) |
246 | return true; |
247 | if (HasAVX512 && typeInSet(TypeIdx: 0, TypesInit: {v64s8, v32s16, v16s32, v8s64})(Query)) |
248 | return true; |
249 | return false; |
250 | }) |
251 | .clampMinNumElements(TypeIdx: 0, EltTy: s8, MinElements: 16) |
252 | .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8) |
253 | .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4) |
254 | .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: 2) |
255 | .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: HasAVX512 ? 64 : (HasAVX ? 32 : 16)) |
256 | .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasAVX512 ? 32 : (HasAVX ? 16 : 8)) |
257 | .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX ? 8 : 4)) |
258 | .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: HasAVX512 ? 8 : (HasAVX ? 4 : 2)) |
259 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
260 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
261 | .scalarize(TypeIdx: 0); |
262 | |
263 | // integer comparison |
264 | const std::initializer_list<LLT> IntTypes32 = {s8, s16, s32, p0}; |
265 | const std::initializer_list<LLT> IntTypes64 = {s8, s16, s32, s64, p0}; |
266 | |
267 | getActionDefinitionsBuilder(Opcode: G_ICMP) |
268 | .legalForCartesianProduct(Types0: {s8}, Types1: Is64Bit ? IntTypes64 : IntTypes32) |
269 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s8) |
270 | .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: sMaxScalar); |
271 | |
272 | // bswap |
273 | getActionDefinitionsBuilder(Opcode: G_BSWAP) |
274 | .legalIf(Predicate: [=](const LegalityQuery &Query) { |
275 | return Query.Types[0] == s32 || |
276 | (Subtarget.is64Bit() && Query.Types[0] == s64); |
277 | }) |
278 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
279 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sMaxScalar); |
280 | |
281 | // popcount |
282 | getActionDefinitionsBuilder(Opcode: G_CTPOP) |
283 | .legalIf(Predicate: [=](const LegalityQuery &Query) -> bool { |
284 | return Subtarget.hasPOPCNT() && |
285 | (typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{s16, s16}, {s32, s32}})(Query) || |
286 | (Is64Bit && typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{s64, s64}})(Query))); |
287 | }) |
288 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16) |
289 | .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar) |
290 | .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1); |
291 | |
292 | // count leading zeros (LZCNT) |
293 | getActionDefinitionsBuilder(Opcode: G_CTLZ) |
294 | .legalIf(Predicate: [=](const LegalityQuery &Query) -> bool { |
295 | return Subtarget.hasLZCNT() && |
296 | (typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{s16, s16}, {s32, s32}})(Query) || |
297 | (Is64Bit && typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{s64, s64}})(Query))); |
298 | }) |
299 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16) |
300 | .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar) |
301 | .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1); |
302 | |
303 | // count trailing zeros |
304 | getActionDefinitionsBuilder(Opcodes: {G_CTTZ_ZERO_UNDEF, G_CTTZ}) |
305 | .legalIf(Predicate: [=](const LegalityQuery &Query) -> bool { |
306 | return (Query.Opcode == G_CTTZ_ZERO_UNDEF || Subtarget.hasBMI()) && |
307 | (typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{s16, s16}, {s32, s32}})(Query) || |
308 | (Is64Bit && typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{s64, s64}})(Query))); |
309 | }) |
310 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16) |
311 | .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar) |
312 | .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1); |
313 | |
314 | // control flow |
315 | getActionDefinitionsBuilder(Opcode: G_PHI) |
316 | .legalIf(Predicate: [=](const LegalityQuery &Query) -> bool { |
317 | return typeInSet(TypeIdx: 0, TypesInit: {s8, s16, s32, p0})(Query) || |
318 | (Is64Bit && typeInSet(TypeIdx: 0, TypesInit: {s64})(Query)) || |
319 | (HasSSE1 && typeInSet(TypeIdx: 0, TypesInit: {v16s8, v8s16, v4s32, v2s64})(Query)) || |
320 | (HasAVX && typeInSet(TypeIdx: 0, TypesInit: {v32s8, v16s16, v8s32, v4s64})(Query)) || |
321 | (HasAVX512 && |
322 | typeInSet(TypeIdx: 0, TypesInit: {v64s8, v32s16, v16s32, v8s64})(Query)); |
323 | }) |
324 | .clampMinNumElements(TypeIdx: 0, EltTy: s8, MinElements: 16) |
325 | .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8) |
326 | .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4) |
327 | .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: 2) |
328 | .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: HasAVX512 ? 64 : (HasAVX ? 32 : 16)) |
329 | .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasAVX512 ? 32 : (HasAVX ? 16 : 8)) |
330 | .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX ? 8 : 4)) |
331 | .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: HasAVX512 ? 8 : (HasAVX ? 4 : 2)) |
332 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32) |
333 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
334 | .scalarize(TypeIdx: 0); |
335 | |
336 | getActionDefinitionsBuilder(Opcode: G_BRCOND).legalFor(Types: {s1}); |
337 | |
338 | // pointer handling |
339 | const std::initializer_list<LLT> PtrTypes32 = {s1, s8, s16, s32}; |
340 | const std::initializer_list<LLT> PtrTypes64 = {s1, s8, s16, s32, s64}; |
341 | |
342 | getActionDefinitionsBuilder(Opcode: G_PTRTOINT) |
343 | .legalForCartesianProduct(Types0: Is64Bit ? PtrTypes64 : PtrTypes32, Types1: {p0}) |
344 | .maxScalar(TypeIdx: 0, Ty: sMaxScalar) |
345 | .widenScalarToNextPow2(TypeIdx: 0, /*Min*/ MinSize: 8); |
346 | |
347 | getActionDefinitionsBuilder(Opcode: G_INTTOPTR).legalFor(Types: {{p0, sMaxScalar}}); |
348 | |
349 | getActionDefinitionsBuilder(Opcode: G_CONSTANT_POOL).legalFor(Types: {p0}); |
350 | |
351 | getActionDefinitionsBuilder(Opcode: G_PTR_ADD) |
352 | .legalIf(Predicate: [=](const LegalityQuery &Query) -> bool { |
353 | return typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{p0, s32}})(Query) || |
354 | (Is64Bit && typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{p0, s64}})(Query)); |
355 | }) |
356 | .widenScalarToNextPow2(TypeIdx: 1, /*Min*/ MinSize: 32) |
357 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: sMaxScalar); |
358 | |
359 | getActionDefinitionsBuilder(Opcodes: {G_FRAME_INDEX, G_GLOBAL_VALUE}).legalFor(Types: {p0}); |
360 | |
361 | // load/store: add more corner cases |
362 | for (unsigned Op : {G_LOAD, G_STORE}) { |
363 | auto &Action = getActionDefinitionsBuilder(Opcode: Op); |
364 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s1, .Align: 1}, |
365 | {.Type0: s8, .Type1: p0, .MemTy: s8, .Align: 1}, |
366 | {.Type0: s16, .Type1: p0, .MemTy: s8, .Align: 1}, |
367 | {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: 1}, |
368 | {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 1}, |
369 | {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 1}, |
370 | {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 1}, |
371 | {.Type0: s80, .Type1: p0, .MemTy: s80, .Align: 1}, |
372 | {.Type0: p0, .Type1: p0, .MemTy: p0, .Align: 1}, |
373 | {.Type0: v4s8, .Type1: p0, .MemTy: v4s8, .Align: 1}}); |
374 | if (Is64Bit) |
375 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 1}, |
376 | {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 1}, |
377 | {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 1}, |
378 | {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 1}, |
379 | {.Type0: v2s32, .Type1: p0, .MemTy: v2s32, .Align: 1}}); |
380 | if (HasSSE1) |
381 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v4s32, .Type1: p0, .MemTy: v4s32, .Align: 1}}); |
382 | if (HasSSE2) |
383 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v16s8, .Type1: p0, .MemTy: v16s8, .Align: 1}, |
384 | {.Type0: v8s16, .Type1: p0, .MemTy: v8s16, .Align: 1}, |
385 | {.Type0: v2s64, .Type1: p0, .MemTy: v2s64, .Align: 1}, |
386 | {.Type0: v2p0, .Type1: p0, .MemTy: v2p0, .Align: 1}}); |
387 | if (HasAVX) |
388 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v32s8, .Type1: p0, .MemTy: v32s8, .Align: 1}, |
389 | {.Type0: v16s16, .Type1: p0, .MemTy: v16s16, .Align: 1}, |
390 | {.Type0: v8s32, .Type1: p0, .MemTy: v8s32, .Align: 1}, |
391 | {.Type0: v4s64, .Type1: p0, .MemTy: v4s64, .Align: 1}, |
392 | {.Type0: v4p0, .Type1: p0, .MemTy: v4p0, .Align: 1}}); |
393 | if (HasAVX512) |
394 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v64s8, .Type1: p0, .MemTy: v64s8, .Align: 1}, |
395 | {.Type0: v32s16, .Type1: p0, .MemTy: v32s16, .Align: 1}, |
396 | {.Type0: v16s32, .Type1: p0, .MemTy: v16s32, .Align: 1}, |
397 | {.Type0: v8s64, .Type1: p0, .MemTy: v8s64, .Align: 1}}); |
398 | Action.widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
399 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
400 | .scalarize(TypeIdx: 0); |
401 | } |
402 | |
403 | for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) { |
404 | auto &Action = getActionDefinitionsBuilder(Opcode: Op); |
405 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s16, .Type1: p0, .MemTy: s8, .Align: 1}, |
406 | {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 1}, |
407 | {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 1}}); |
408 | if (Is64Bit) |
409 | Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 1}, |
410 | {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 1}, |
411 | {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 1}}); |
412 | // TODO - SSE41/AVX2/AVX512F/AVX512BW vector extensions |
413 | } |
414 | |
415 | // sext, zext, and anyext |
416 | getActionDefinitionsBuilder(Opcodes: {G_SEXT, G_ZEXT, G_ANYEXT}) |
417 | .legalIf(Predicate: [=](const LegalityQuery &Query) { |
418 | return typeInSet(TypeIdx: 0, TypesInit: {s8, s16, s32})(Query) || |
419 | (Query.Opcode == G_ANYEXT && Query.Types[0] == s128) || |
420 | (Is64Bit && Query.Types[0] == s64); |
421 | }) |
422 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
423 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar) |
424 | .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 8) |
425 | .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: sMaxScalar) |
426 | .scalarize(TypeIdx: 0); |
427 | |
428 | getActionDefinitionsBuilder(Opcode: G_SEXT_INREG).lower(); |
429 | |
430 | // fp constants |
431 | getActionDefinitionsBuilder(Opcode: G_FCONSTANT) |
432 | .legalIf(Predicate: [=](const LegalityQuery &Query) -> bool { |
433 | return (typeInSet(TypeIdx: 0, TypesInit: {s32, s64})(Query)) || |
434 | (UseX87 && typeInSet(TypeIdx: 0, TypesInit: {s80})(Query)); |
435 | }); |
436 | |
437 | // fp arithmetic |
438 | getActionDefinitionsBuilder(Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV}) |
439 | .legalIf(Predicate: [=](const LegalityQuery &Query) { |
440 | return (typeInSet(TypeIdx: 0, TypesInit: {s32, s64})(Query)) || |
441 | (HasSSE1 && typeInSet(TypeIdx: 0, TypesInit: {v4s32})(Query)) || |
442 | (HasSSE2 && typeInSet(TypeIdx: 0, TypesInit: {v2s64})(Query)) || |
443 | (HasAVX && typeInSet(TypeIdx: 0, TypesInit: {v8s32, v4s64})(Query)) || |
444 | (HasAVX512 && typeInSet(TypeIdx: 0, TypesInit: {v16s32, v8s64})(Query)) || |
445 | (UseX87 && typeInSet(TypeIdx: 0, TypesInit: {s80})(Query)); |
446 | }); |
447 | |
448 | // fp comparison |
449 | getActionDefinitionsBuilder(Opcode: G_FCMP) |
450 | .legalIf(Predicate: [=](const LegalityQuery &Query) { |
451 | return (HasSSE1 && typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{s8, s32}})(Query)) || |
452 | (HasSSE2 && typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{s8, s64}})(Query)); |
453 | }) |
454 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s8) |
455 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32) |
456 | .widenScalarToNextPow2(TypeIdx: 1); |
457 | |
458 | // fp conversions |
459 | getActionDefinitionsBuilder(Opcode: G_FPEXT).legalIf(Predicate: [=](const LegalityQuery &Query) { |
460 | return (HasSSE2 && typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{s64, s32}})(Query)) || |
461 | (HasAVX && typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{v4s64, v4s32}})(Query)) || |
462 | (HasAVX512 && typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{v8s64, v8s32}})(Query)); |
463 | }); |
464 | |
465 | getActionDefinitionsBuilder(Opcode: G_FPTRUNC).legalIf( |
466 | Predicate: [=](const LegalityQuery &Query) { |
467 | return (HasSSE2 && typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{s32, s64}})(Query)) || |
468 | (HasAVX && typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{v4s32, v4s64}})(Query)) || |
469 | (HasAVX512 && typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{v8s32, v8s64}})(Query)); |
470 | }); |
471 | |
472 | getActionDefinitionsBuilder(Opcode: G_SITOFP) |
473 | .legalIf(Predicate: [=](const LegalityQuery &Query) { |
474 | return (HasSSE1 && |
475 | (typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{s32, s32}})(Query) || |
476 | (Is64Bit && typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{s32, s64}})(Query)))) || |
477 | (HasSSE2 && |
478 | (typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{s64, s32}})(Query) || |
479 | (Is64Bit && typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{s64, s64}})(Query)))); |
480 | }) |
481 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: sMaxScalar) |
482 | .widenScalarToNextPow2(TypeIdx: 1) |
483 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32) |
484 | .widenScalarToNextPow2(TypeIdx: 0); |
485 | |
486 | getActionDefinitionsBuilder(Opcode: G_FPTOSI) |
487 | .legalIf(Predicate: [=](const LegalityQuery &Query) { |
488 | return (HasSSE1 && |
489 | (typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{s32, s32}})(Query) || |
490 | (Is64Bit && typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{s64, s32}})(Query)))) || |
491 | (HasSSE2 && |
492 | (typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{s32, s64}})(Query) || |
493 | (Is64Bit && typePairInSet(TypeIdx0: 0, TypeIdx1: 1, TypesInit: {{s64, s64}})(Query)))); |
494 | }) |
495 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32) |
496 | .widenScalarToNextPow2(TypeIdx: 0) |
497 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sMaxScalar) |
498 | .widenScalarToNextPow2(TypeIdx: 1); |
499 | |
500 | // vector ops |
501 | getActionDefinitionsBuilder(Opcode: G_BUILD_VECTOR) |
502 | .customIf(Predicate: [=](const LegalityQuery &Query) { |
503 | return (HasSSE1 && typeInSet(TypeIdx: 0, TypesInit: {v4s32})(Query)) || |
504 | (HasSSE2 && typeInSet(TypeIdx: 0, TypesInit: {v2s64, v8s16, v16s8})(Query)) || |
505 | (HasAVX && typeInSet(TypeIdx: 0, TypesInit: {v4s64, v8s32, v16s16, v32s8})(Query)) || |
506 | (HasAVX512 && typeInSet(TypeIdx: 0, TypesInit: {v8s64, v16s32, v32s16, v64s8})); |
507 | }) |
508 | .clampNumElements(TypeIdx: 0, MinTy: v16s8, MaxTy: s8MaxVector) |
509 | .clampNumElements(TypeIdx: 0, MinTy: v8s16, MaxTy: s16MaxVector) |
510 | .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: s32MaxVector) |
511 | .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: s64MaxVector) |
512 | .moreElementsToNextPow2(TypeIdx: 0); |
513 | |
514 | getActionDefinitionsBuilder(Opcodes: {G_EXTRACT, G_INSERT}) |
515 | .legalIf(Predicate: [=](const LegalityQuery &Query) { |
516 | unsigned SubIdx = Query.Opcode == G_EXTRACT ? 0 : 1; |
517 | unsigned FullIdx = Query.Opcode == G_EXTRACT ? 1 : 0; |
518 | return (HasAVX && typePairInSet(TypeIdx0: SubIdx, TypeIdx1: FullIdx, |
519 | TypesInit: {{v16s8, v32s8}, |
520 | {v8s16, v16s16}, |
521 | {v4s32, v8s32}, |
522 | {v2s64, v4s64}})(Query)) || |
523 | (HasAVX512 && typePairInSet(TypeIdx0: SubIdx, TypeIdx1: FullIdx, |
524 | TypesInit: {{v16s8, v64s8}, |
525 | {v32s8, v64s8}, |
526 | {v8s16, v32s16}, |
527 | {v16s16, v32s16}, |
528 | {v4s32, v16s32}, |
529 | {v8s32, v16s32}, |
530 | {v2s64, v8s64}, |
531 | {v4s64, v8s64}})(Query)); |
532 | }); |
533 | |
534 | // todo: only permit dst types up to max legal vector register size? |
535 | getActionDefinitionsBuilder(Opcode: G_CONCAT_VECTORS) |
536 | .legalIf(Predicate: [=](const LegalityQuery &Query) { |
537 | return (HasSSE1 && typePairInSet(TypeIdx0: 1, TypeIdx1: 0, |
538 | TypesInit: {{v16s8, v32s8}, |
539 | {v8s16, v16s16}, |
540 | {v4s32, v8s32}, |
541 | {v2s64, v4s64}})(Query)) || |
542 | (HasAVX && typePairInSet(TypeIdx0: 1, TypeIdx1: 0, |
543 | TypesInit: {{v16s8, v64s8}, |
544 | {v32s8, v64s8}, |
545 | {v8s16, v32s16}, |
546 | {v16s16, v32s16}, |
547 | {v4s32, v16s32}, |
548 | {v8s32, v16s32}, |
549 | {v2s64, v8s64}, |
550 | {v4s64, v8s64}})(Query)); |
551 | }); |
552 | |
553 | // todo: vectors and address spaces |
554 | getActionDefinitionsBuilder(Opcode: G_SELECT) |
555 | .legalFor(Types: {{s8, s32}, {s16, s32}, {s32, s32}, {s64, s32}, {p0, s32}}) |
556 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
557 | .clampScalar(TypeIdx: 0, MinTy: HasCMOV ? s16 : s8, MaxTy: sMaxScalar) |
558 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32); |
559 | |
560 | // memory intrinsics |
561 | getActionDefinitionsBuilder(Opcodes: {G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); |
562 | |
563 | getActionDefinitionsBuilder(Opcodes: {G_DYN_STACKALLOC, |
564 | G_STACKSAVE, |
565 | G_STACKRESTORE}).lower(); |
566 | |
567 | // fp intrinsics |
568 | getActionDefinitionsBuilder(Opcode: G_INTRINSIC_ROUNDEVEN) |
569 | .scalarize(TypeIdx: 0) |
570 | .minScalar(TypeIdx: 0, Ty: LLT::scalar(SizeInBits: 32)) |
571 | .libcall(); |
572 | |
573 | getActionDefinitionsBuilder(Opcodes: {G_FREEZE, G_CONSTANT_FOLD_BARRIER}) |
574 | .legalFor(Types: {s8, s16, s32, s64, p0}) |
575 | .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8) |
576 | .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar); |
577 | |
578 | getLegacyLegalizerInfo().computeTables(); |
579 | verify(MII: *STI.getInstrInfo()); |
580 | } |
581 | |
582 | bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, |
583 | LostDebugLocObserver &LocObserver) const { |
584 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
585 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
586 | switch (MI.getOpcode()) { |
587 | default: |
588 | // No idea what to do. |
589 | return false; |
590 | case TargetOpcode::G_BUILD_VECTOR: |
591 | return legalizeBuildVector(MI, MRI, Helper); |
592 | } |
593 | llvm_unreachable("expected switch to return" ); |
594 | } |
595 | |
596 | bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI, |
597 | MachineRegisterInfo &MRI, |
598 | LegalizerHelper &Helper) const { |
599 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
600 | const auto &BuildVector = cast<GBuildVector>(Val&: MI); |
601 | Register Dst = BuildVector.getReg(Idx: 0); |
602 | LLT DstTy = MRI.getType(Reg: Dst); |
603 | MachineFunction &MF = MIRBuilder.getMF(); |
604 | LLVMContext &Ctx = MF.getFunction().getContext(); |
605 | uint64_t DstTySize = DstTy.getScalarSizeInBits(); |
606 | |
607 | SmallVector<Constant *, 4> CstIdxs; |
608 | for (unsigned i = 0; i < BuildVector.getNumSources(); ++i) { |
609 | Register Source = BuildVector.getSourceReg(I: i); |
610 | |
611 | auto ValueAndReg = getIConstantVRegValWithLookThrough(VReg: Source, MRI); |
612 | if (ValueAndReg) { |
613 | CstIdxs.emplace_back(Args: ConstantInt::get(Context&: Ctx, V: ValueAndReg->Value)); |
614 | continue; |
615 | } |
616 | |
617 | auto FPValueAndReg = getFConstantVRegValWithLookThrough(VReg: Source, MRI); |
618 | if (FPValueAndReg) { |
619 | CstIdxs.emplace_back(Args: ConstantFP::get(Context&: Ctx, V: FPValueAndReg->Value)); |
620 | continue; |
621 | } |
622 | |
623 | if (getOpcodeDef<GImplicitDef>(Reg: Source, MRI)) { |
624 | CstIdxs.emplace_back(Args: UndefValue::get(T: Type::getIntNTy(C&: Ctx, N: DstTySize))); |
625 | continue; |
626 | } |
627 | return false; |
628 | } |
629 | |
630 | Constant *ConstVal = ConstantVector::get(V: CstIdxs); |
631 | |
632 | const DataLayout &DL = MIRBuilder.getDataLayout(); |
633 | unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace(); |
634 | Align Alignment(DL.getABITypeAlign(Ty: ConstVal->getType())); |
635 | auto Addr = MIRBuilder.buildConstantPool( |
636 | Res: LLT::pointer(AddressSpace: AddrSpace, SizeInBits: DL.getPointerSizeInBits(AS: AddrSpace)), |
637 | Idx: MF.getConstantPool()->getConstantPoolIndex(C: ConstVal, Alignment)); |
638 | MachineMemOperand *MMO = |
639 | MF.getMachineMemOperand(PtrInfo: MachinePointerInfo::getConstantPool(MF), |
640 | f: MachineMemOperand::MOLoad, MemTy: DstTy, base_alignment: Alignment); |
641 | |
642 | MIRBuilder.buildLoad(Res: Dst, Addr, MMO&: *MMO); |
643 | MI.eraseFromParent(); |
644 | return true; |
645 | } |
646 | |
647 | bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, |
648 | MachineInstr &MI) const { |
649 | return true; |
650 | } |
651 | |