1//===- X86LegalizerInfo.cpp --------------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for X86.
10/// \todo This should be generated by TableGen.
11//===----------------------------------------------------------------------===//
12
13#include "X86LegalizerInfo.h"
14#include "X86Subtarget.h"
15#include "X86TargetMachine.h"
16#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
17#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
18#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
19#include "llvm/CodeGen/MachineConstantPool.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/TargetOpcodes.h"
22#include "llvm/CodeGen/ValueTypes.h"
23#include "llvm/IR/DerivedTypes.h"
24#include "llvm/IR/IntrinsicsX86.h"
25#include "llvm/IR/Type.h"
26
27using namespace llvm;
28using namespace TargetOpcode;
29using namespace LegalizeActions;
30using namespace LegalityPredicates;
31
32X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
33 const X86TargetMachine &TM)
34 : Subtarget(STI) {
35
36 bool Is64Bit = Subtarget.is64Bit();
37 bool HasCMOV = Subtarget.canUseCMOV();
38 bool HasSSE1 = Subtarget.hasSSE1();
39 bool HasSSE2 = Subtarget.hasSSE2();
40 bool HasSSE41 = Subtarget.hasSSE41();
41 bool HasAVX = Subtarget.hasAVX();
42 bool HasAVX2 = Subtarget.hasAVX2();
43 bool HasAVX512 = Subtarget.hasAVX512();
44 bool HasVLX = Subtarget.hasVLX();
45 bool HasDQI = Subtarget.hasAVX512() && Subtarget.hasDQI();
46 bool HasBWI = Subtarget.hasAVX512() && Subtarget.hasBWI();
47 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
48 bool HasPOPCNT = Subtarget.hasPOPCNT();
49 bool HasLZCNT = Subtarget.hasLZCNT();
50 bool HasBMI = Subtarget.hasBMI();
51
52 const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: TM.getPointerSizeInBits(AS: 0));
53 const LLT s1 = LLT::scalar(SizeInBits: 1);
54 const LLT s8 = LLT::scalar(SizeInBits: 8);
55 const LLT s16 = LLT::scalar(SizeInBits: 16);
56 const LLT s32 = LLT::scalar(SizeInBits: 32);
57 const LLT s64 = LLT::scalar(SizeInBits: 64);
58 const LLT s80 = LLT::scalar(SizeInBits: 80);
59 const LLT s128 = LLT::scalar(SizeInBits: 128);
60 const LLT sMaxScalar = Subtarget.is64Bit() ? s64 : s32;
61 const LLT v2s32 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32);
62 const LLT v4s8 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 8);
63
64 const LLT v16s8 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8);
65 const LLT v8s16 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16);
66 const LLT v4s32 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32);
67 const LLT v2s64 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64);
68 const LLT v2p0 = LLT::fixed_vector(NumElements: 2, ScalarTy: p0);
69
70 const LLT v32s8 = LLT::fixed_vector(NumElements: 32, ScalarSizeInBits: 8);
71 const LLT v16s16 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 16);
72 const LLT v8s32 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 32);
73 const LLT v4s64 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 64);
74 const LLT v4p0 = LLT::fixed_vector(NumElements: 4, ScalarTy: p0);
75
76 const LLT v64s8 = LLT::fixed_vector(NumElements: 64, ScalarSizeInBits: 8);
77 const LLT v32s16 = LLT::fixed_vector(NumElements: 32, ScalarSizeInBits: 16);
78 const LLT v16s32 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 32);
79 const LLT v8s64 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 64);
80
81 const LLT s8MaxVector = HasAVX512 ? v64s8 : HasAVX ? v32s8 : v16s8;
82 const LLT s16MaxVector = HasAVX512 ? v32s16 : HasAVX ? v16s16 : v8s16;
83 const LLT s32MaxVector = HasAVX512 ? v16s32 : HasAVX ? v8s32 : v4s32;
84 const LLT s64MaxVector = HasAVX512 ? v8s64 : HasAVX ? v4s64 : v2s64;
85
86 // todo: AVX512 bool vector predicate types
87
88 // implicit/constants
89 // 32/64-bits needs support for s64/s128 to handle cases:
90 // s64 = EXTEND (G_IMPLICIT_DEF s32) -> s64 = G_IMPLICIT_DEF
91 // s128 = EXTEND (G_IMPLICIT_DEF s32/s64) -> s128 = G_IMPLICIT_DEF
92 getActionDefinitionsBuilder(
93 Opcodes: {G_IMPLICIT_DEF, G_PHI, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
94 .legalFor(Types: {p0, s1, s8, s16, s32, s64})
95 .legalFor(Pred: UseX87, Types: {s80})
96 .legalFor(Pred: Is64Bit, Types: {s128})
97 .legalFor(Pred: HasSSE2, Types: {v16s8, v8s16, v4s32, v2s64})
98 .legalFor(Pred: HasAVX, Types: {v32s8, v16s16, v8s32, v4s64})
99 .legalFor(Pred: HasAVX512, Types: {v64s8, v32s16, v16s32, v8s64})
100 .widenScalarOrEltToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8)
101 .clampScalarOrElt(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
102 .moreElementsToNextPow2(TypeIdx: 0)
103 .clampNumElements(TypeIdx: 0, MinTy: v16s8, MaxTy: s8MaxVector)
104 .clampNumElements(TypeIdx: 0, MinTy: v8s16, MaxTy: s16MaxVector)
105 .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: s32MaxVector)
106 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: s64MaxVector)
107 .clampMaxNumElements(TypeIdx: 0, EltTy: p0,
108 MaxElements: Is64Bit ? s64MaxVector.getNumElements()
109 : s32MaxVector.getNumElements())
110 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
111
112 getActionDefinitionsBuilder(Opcode: G_CONSTANT)
113 .legalFor(Types: {p0, s8, s16, s32})
114 .legalFor(Pred: Is64Bit, Types: {s64})
115 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8)
116 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar);
117
118 getActionDefinitionsBuilder(Opcodes: {G_LROUND, G_LLROUND})
119 .widenScalarIf(Predicate: typeIs(TypeIdx: 1, TypesInit: s16),
120 Mutation: [=](const LegalityQuery &) {
121 return std::pair<unsigned, LLT>(1, s32);
122 })
123 .libcall();
124
125 getActionDefinitionsBuilder(
126 Opcodes: {G_FCOS, G_FCOSH, G_FACOS, G_FSIN, G_FSINH, G_FASIN, G_FTAN,
127 G_FTANH, G_FATAN, G_FATAN2, G_FPOW, G_FEXP, G_FEXP2, G_FEXP10,
128 G_FLOG, G_FLOG2, G_FLOG10, G_FPOWI, G_FSINCOS, G_FCEIL, G_FFLOOR})
129 .libcall();
130
131 getActionDefinitionsBuilder(Opcode: G_FSQRT)
132 .legalFor(Pred: HasSSE1 || UseX87, Types: {s32})
133 .legalFor(Pred: HasSSE2 || UseX87, Types: {s64})
134 .legalFor(Pred: UseX87, Types: {s80});
135
136 getActionDefinitionsBuilder(Opcodes: {G_GET_ROUNDING, G_SET_ROUNDING})
137 .customFor(Types: {s32});
138
139 // merge/unmerge
140 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
141 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
142 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
143 getActionDefinitionsBuilder(Opcode: Op)
144 .widenScalarToNextPow2(TypeIdx: LitTyIdx, /*Min=*/MinSize: 8)
145 .widenScalarToNextPow2(TypeIdx: BigTyIdx, /*Min=*/MinSize: 16)
146 .minScalar(TypeIdx: LitTyIdx, Ty: s8)
147 .minScalar(TypeIdx: BigTyIdx, Ty: s32)
148 .legalIf(Predicate: [=](const LegalityQuery &Q) {
149 switch (Q.Types[BigTyIdx].getSizeInBits()) {
150 case 16:
151 case 32:
152 case 64:
153 case 128:
154 case 256:
155 case 512:
156 break;
157 default:
158 return false;
159 }
160 switch (Q.Types[LitTyIdx].getSizeInBits()) {
161 case 8:
162 case 16:
163 case 32:
164 case 64:
165 case 128:
166 case 256:
167 return true;
168 default:
169 return false;
170 }
171 });
172 }
173
174 getActionDefinitionsBuilder(Opcodes: {G_UMIN, G_UMAX, G_SMIN, G_SMAX})
175 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32)
176 .lower();
177
178 // integer addition/subtraction
179 getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB})
180 .legalFor(Types: {s8, s16, s32})
181 .legalFor(Pred: Is64Bit, Types: {s64})
182 .legalFor(Pred: HasSSE2, Types: {v16s8, v8s16, v4s32, v2s64})
183 .legalFor(Pred: HasAVX2, Types: {v32s8, v16s16, v8s32, v4s64})
184 .legalFor(Pred: HasAVX512, Types: {v16s32, v8s64})
185 .legalFor(Pred: HasBWI, Types: {v64s8, v32s16})
186 .clampMinNumElements(TypeIdx: 0, EltTy: s8, MinElements: 16)
187 .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8)
188 .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4)
189 .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: 2)
190 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: HasBWI ? 64 : (HasAVX2 ? 32 : 16))
191 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasBWI ? 32 : (HasAVX2 ? 16 : 8))
192 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX2 ? 8 : 4))
193 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: HasAVX512 ? 8 : (HasAVX2 ? 4 : 2))
194 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32)
195 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
196 .scalarize(TypeIdx: 0);
197
198 getActionDefinitionsBuilder(Opcodes: {G_UADDE, G_UADDO, G_USUBE, G_USUBO})
199 .legalFor(Types: {{s8, s8}, {s16, s8}, {s32, s8}})
200 .legalFor(Pred: Is64Bit, Types: {{s64, s8}})
201 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32)
202 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
203 .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: s8)
204 .scalarize(TypeIdx: 0);
205
206 // integer multiply
207 getActionDefinitionsBuilder(Opcode: G_MUL)
208 .legalFor(Types: {s8, s16, s32})
209 .legalFor(Pred: Is64Bit, Types: {s64})
210 .legalFor(Pred: HasSSE2, Types: {v8s16})
211 .legalFor(Pred: HasSSE41, Types: {v4s32})
212 .legalFor(Pred: HasAVX2, Types: {v16s16, v8s32})
213 .legalFor(Pred: HasAVX512, Types: {v16s32})
214 .legalFor(Pred: HasDQI, Types: {v8s64})
215 .legalFor(Pred: HasDQI && HasVLX, Types: {v2s64, v4s64})
216 .legalFor(Pred: HasBWI, Types: {v32s16})
217 .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8)
218 .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4)
219 .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: HasVLX ? 2 : 8)
220 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasBWI ? 32 : (HasAVX2 ? 16 : 8))
221 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX2 ? 8 : 4))
222 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 8)
223 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32)
224 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
225 .scalarize(TypeIdx: 0);
226
227 getActionDefinitionsBuilder(Opcodes: {G_SMULH, G_UMULH})
228 .legalFor(Types: {s8, s16, s32})
229 .legalFor(Pred: Is64Bit, Types: {s64})
230 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32)
231 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
232 .scalarize(TypeIdx: 0);
233
234 // integer divisions
235 getActionDefinitionsBuilder(Opcodes: {G_SDIV, G_SREM, G_UDIV, G_UREM})
236 .legalFor(Types: {s8, s16, s32})
237 .legalFor(Pred: Is64Bit, Types: {s64})
238 .libcallFor(Types: {s64})
239 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar);
240
241 // integer shifts
242 getActionDefinitionsBuilder(Opcodes: {G_SHL, G_LSHR, G_ASHR})
243 .legalFor(Types: {{s8, s8}, {s16, s8}, {s32, s8}})
244 .legalFor(Pred: Is64Bit, Types: {{s64, s8}})
245 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
246 .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: s8);
247
248 // integer logic
249 getActionDefinitionsBuilder(Opcodes: {G_AND, G_OR, G_XOR})
250 .legalFor(Types: {s8, s16, s32})
251 .legalFor(Pred: Is64Bit, Types: {s64})
252 .legalFor(Pred: HasSSE2, Types: {v16s8, v8s16, v4s32, v2s64})
253 .legalFor(Pred: HasAVX, Types: {v32s8, v16s16, v8s32, v4s64})
254 .legalFor(Pred: HasAVX512, Types: {v64s8, v32s16, v16s32, v8s64})
255 .clampNumElements(TypeIdx: 0, MinTy: v16s8, MaxTy: s8MaxVector)
256 .clampNumElements(TypeIdx: 0, MinTy: v8s16, MaxTy: s16MaxVector)
257 .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: s32MaxVector)
258 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: s64MaxVector)
259 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32)
260 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
261 .scalarize(TypeIdx: 0);
262
263 // integer comparison
264 const std::initializer_list<LLT> IntTypes32 = {s8, s16, s32, p0};
265 const std::initializer_list<LLT> IntTypes64 = {s8, s16, s32, s64, p0};
266
267 getActionDefinitionsBuilder(Opcode: G_ICMP)
268 .legalForCartesianProduct(Types0: {s8}, Types1: Is64Bit ? IntTypes64 : IntTypes32)
269 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s8)
270 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 8)
271 .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: sMaxScalar);
272
273 // bswap
274 getActionDefinitionsBuilder(Opcode: G_BSWAP)
275 .legalFor(Types: {s32})
276 .legalFor(Pred: Is64Bit, Types: {s64})
277 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32)
278 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sMaxScalar);
279
280 // popcount
281 getActionDefinitionsBuilder(Opcode: G_CTPOP)
282 .legalFor(Pred: HasPOPCNT, Types: {{s16, s16}, {s32, s32}})
283 .legalFor(Pred: HasPOPCNT && Is64Bit, Types: {{s64, s64}})
284 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16)
285 .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar)
286 .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1);
287
288 // count leading zeros (LZCNT)
289 getActionDefinitionsBuilder(Opcode: G_CTLZ)
290 .legalFor(Pred: HasLZCNT, Types: {{s16, s16}, {s32, s32}})
291 .legalFor(Pred: HasLZCNT && Is64Bit, Types: {{s64, s64}})
292 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16)
293 .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar)
294 .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1);
295
296 // count trailing zeros
297 getActionDefinitionsBuilder(Opcode: G_CTTZ_ZERO_UNDEF)
298 .legalFor(Types: {{s16, s16}, {s32, s32}})
299 .legalFor(Pred: Is64Bit, Types: {{s64, s64}})
300 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16)
301 .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar)
302 .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1);
303
304 getActionDefinitionsBuilder(Opcode: G_CTTZ)
305 .legalFor(Pred: HasBMI, Types: {{s16, s16}, {s32, s32}})
306 .legalFor(Pred: HasBMI && Is64Bit, Types: {{s64, s64}})
307 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16)
308 .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar)
309 .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1);
310
311 getActionDefinitionsBuilder(Opcode: G_BRCOND).legalFor(Types: {s1});
312
313 // pointer handling
314 const std::initializer_list<LLT> PtrTypes32 = {s1, s8, s16, s32};
315 const std::initializer_list<LLT> PtrTypes64 = {s1, s8, s16, s32, s64};
316
317 getActionDefinitionsBuilder(Opcode: G_PTRTOINT)
318 .legalForCartesianProduct(Types0: Is64Bit ? PtrTypes64 : PtrTypes32, Types1: {p0})
319 .maxScalar(TypeIdx: 0, Ty: sMaxScalar)
320 .widenScalarToNextPow2(TypeIdx: 0, /*Min*/ MinSize: 8);
321
322 getActionDefinitionsBuilder(Opcode: G_INTTOPTR).legalFor(Types: {{p0, sMaxScalar}});
323
324 getActionDefinitionsBuilder(Opcode: G_CONSTANT_POOL).legalFor(Types: {p0});
325
326 getActionDefinitionsBuilder(Opcode: G_PTR_ADD)
327 .legalFor(Types: {{p0, s32}})
328 .legalFor(Pred: Is64Bit, Types: {{p0, s64}})
329 .widenScalarToNextPow2(TypeIdx: 1, /*Min*/ MinSize: 32)
330 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: sMaxScalar);
331
332 getActionDefinitionsBuilder(Opcodes: {G_FRAME_INDEX, G_GLOBAL_VALUE}).legalFor(Types: {p0});
333
334 // load/store: add more corner cases
335 for (unsigned Op : {G_LOAD, G_STORE}) {
336 auto &Action = getActionDefinitionsBuilder(Opcode: Op);
337 Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s8, .Align: 1},
338 {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: 1},
339 {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 1},
340 {.Type0: s80, .Type1: p0, .MemTy: s80, .Align: 1},
341 {.Type0: p0, .Type1: p0, .MemTy: p0, .Align: 1},
342 {.Type0: v4s8, .Type1: p0, .MemTy: v4s8, .Align: 1}});
343 if (Is64Bit)
344 Action.legalForTypesWithMemDesc(
345 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 1}, {.Type0: v2s32, .Type1: p0, .MemTy: v2s32, .Align: 1}});
346
347 if (HasSSE1)
348 Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v4s32, .Type1: p0, .MemTy: v4s32, .Align: 1}});
349 if (HasSSE2)
350 Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v16s8, .Type1: p0, .MemTy: v16s8, .Align: 1},
351 {.Type0: v8s16, .Type1: p0, .MemTy: v8s16, .Align: 1},
352 {.Type0: v2s64, .Type1: p0, .MemTy: v2s64, .Align: 1},
353 {.Type0: v2p0, .Type1: p0, .MemTy: v2p0, .Align: 1}});
354 if (HasAVX)
355 Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v32s8, .Type1: p0, .MemTy: v32s8, .Align: 1},
356 {.Type0: v16s16, .Type1: p0, .MemTy: v16s16, .Align: 1},
357 {.Type0: v8s32, .Type1: p0, .MemTy: v8s32, .Align: 1},
358 {.Type0: v4s64, .Type1: p0, .MemTy: v4s64, .Align: 1},
359 {.Type0: v4p0, .Type1: p0, .MemTy: v4p0, .Align: 1}});
360 if (HasAVX512)
361 Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v64s8, .Type1: p0, .MemTy: v64s8, .Align: 1},
362 {.Type0: v32s16, .Type1: p0, .MemTy: v32s16, .Align: 1},
363 {.Type0: v16s32, .Type1: p0, .MemTy: v16s32, .Align: 1},
364 {.Type0: v8s64, .Type1: p0, .MemTy: v8s64, .Align: 1}});
365
366 // X86 supports extending loads but not stores for GPRs
367 if (Op == G_LOAD) {
368 Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s1, .Align: 1},
369 {.Type0: s16, .Type1: p0, .MemTy: s8, .Align: 1},
370 {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 1},
371 {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 1}});
372 if (Is64Bit)
373 Action.legalForTypesWithMemDesc(
374 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 1}});
375 } else {
376 Action.customIf(Predicate: [=](const LegalityQuery &Query) {
377 return Query.Types[0] != Query.MMODescrs[0].MemoryTy;
378 });
379 }
380 Action.widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8)
381 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
382 .scalarize(TypeIdx: 0);
383 }
384
385 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
386 auto &Action = getActionDefinitionsBuilder(Opcode: Op);
387 Action.legalForTypesWithMemDesc(
388 TypesAndMemDesc: {{.Type0: s16, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 1}});
389 if (Is64Bit)
390 Action.legalForTypesWithMemDesc(
391 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 1}});
392 // TODO - SSE41/AVX2/AVX512F/AVX512BW vector extensions
393 }
394
395 // sext, zext, and anyext
396 getActionDefinitionsBuilder(Opcode: G_ANYEXT)
397 .legalFor(Types: {s8, s16, s32, s128})
398 .legalFor(Pred: Is64Bit, Types: {s64})
399 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8)
400 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
401 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 8)
402 .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: sMaxScalar)
403 .scalarize(TypeIdx: 0);
404
405 getActionDefinitionsBuilder(Opcodes: {G_SEXT, G_ZEXT})
406 .legalFor(Types: {s8, s16, s32})
407 .legalFor(Pred: Is64Bit, Types: {s64})
408 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8)
409 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
410 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 8)
411 .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: sMaxScalar)
412 .scalarize(TypeIdx: 0);
413
414 getActionDefinitionsBuilder(Opcode: G_SEXT_INREG).lower();
415
416 // fp constants
417 getActionDefinitionsBuilder(Opcode: G_FCONSTANT)
418 .legalFor(Types: {s32, s64})
419 .legalFor(Pred: UseX87, Types: {s80});
420
421 // fp arithmetic
422 getActionDefinitionsBuilder(Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV})
423 .legalFor(Types: {s32, s64})
424 .legalFor(Pred: HasSSE1, Types: {v4s32})
425 .legalFor(Pred: HasSSE2, Types: {v2s64})
426 .legalFor(Pred: HasAVX, Types: {v8s32, v4s64})
427 .legalFor(Pred: HasAVX512, Types: {v16s32, v8s64})
428 .legalFor(Pred: UseX87, Types: {s80});
429
430 getActionDefinitionsBuilder(Opcode: G_FABS)
431 .legalFor(Pred: UseX87, Types: {s80})
432 .legalFor(Pred: UseX87 && !Is64Bit, Types: {s64})
433 .lower();
434
435 // fp comparison
436 getActionDefinitionsBuilder(Opcode: G_FCMP)
437 .legalFor(Pred: HasSSE1 || UseX87, Types: {s8, s32})
438 .legalFor(Pred: HasSSE2 || UseX87, Types: {s8, s64})
439 .legalFor(Pred: UseX87, Types: {s8, s80})
440 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s8)
441 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32)
442 .widenScalarToNextPow2(TypeIdx: 1);
443
444 // fp conversions
445 getActionDefinitionsBuilder(Opcode: G_FPEXT)
446 .legalFor(Pred: HasSSE2, Types: {{s64, s32}})
447 .legalFor(Pred: HasAVX, Types: {{v4s64, v4s32}})
448 .legalFor(Pred: HasAVX512, Types: {{v8s64, v8s32}})
449 .libcall();
450
451 getActionDefinitionsBuilder(Opcode: G_FPTRUNC)
452 .legalFor(Pred: HasSSE2, Types: {{s32, s64}})
453 .legalFor(Pred: HasAVX, Types: {{v4s32, v4s64}})
454 .legalFor(Pred: HasAVX512, Types: {{v8s32, v8s64}});
455
456 getActionDefinitionsBuilder(Opcode: G_SITOFP)
457 .legalFor(Pred: HasSSE1, Types: {{s32, s32}})
458 .legalFor(Pred: HasSSE1 && Is64Bit, Types: {{s32, s64}})
459 .legalFor(Pred: HasSSE2, Types: {{s64, s32}})
460 .legalFor(Pred: HasSSE2 && Is64Bit, Types: {{s64, s64}})
461 .clampScalar(TypeIdx: 1, MinTy: (UseX87 && !HasSSE1) ? s16 : s32, MaxTy: sMaxScalar)
462 .widenScalarToNextPow2(TypeIdx: 1)
463 .customForCartesianProduct(Pred: UseX87, Types0: {s32, s64, s80}, Types1: {s16, s32, s64})
464 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32)
465 .widenScalarToNextPow2(TypeIdx: 0);
466
467 getActionDefinitionsBuilder(Opcode: G_FPTOSI)
468 .legalFor(Pred: HasSSE1, Types: {{s32, s32}})
469 .legalFor(Pred: HasSSE1 && Is64Bit, Types: {{s64, s32}})
470 .legalFor(Pred: HasSSE2, Types: {{s32, s64}})
471 .legalFor(Pred: HasSSE2 && Is64Bit, Types: {{s64, s64}})
472 .clampScalar(TypeIdx: 0, MinTy: (UseX87 && !HasSSE1) ? s16 : s32, MaxTy: sMaxScalar)
473 .widenScalarToNextPow2(TypeIdx: 0)
474 .customForCartesianProduct(Pred: UseX87, Types0: {s16, s32, s64}, Types1: {s32, s64, s80})
475 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32)
476 .widenScalarToNextPow2(TypeIdx: 1);
477
478 // For G_UITOFP and G_FPTOUI without AVX512, we have to custom legalize types
479 // <= s32 manually. Otherwise, in custom handler there is no way to
480 // understand whether s32 is an original type and we need to promote it to
481 // s64 or s32 is obtained after widening and we shouldn't widen it to s64.
482 //
483 // For AVX512 we simply widen types as there is direct mapping from opcodes
484 // to asm instructions.
485 getActionDefinitionsBuilder(Opcode: G_UITOFP)
486 .legalFor(Pred: HasAVX512, Types: {{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}})
487 .customIf(Predicate: [=](const LegalityQuery &Query) {
488 return !HasAVX512 &&
489 ((HasSSE1 && typeIs(TypeIdx: 0, TypesInit: s32)(Query)) ||
490 (HasSSE2 && typeIs(TypeIdx: 0, TypesInit: s64)(Query))) &&
491 scalarNarrowerThan(TypeIdx: 1, Size: Is64Bit ? 64 : 32)(Query);
492 })
493 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
494 // Lower conversions from s64
495 return !HasAVX512 &&
496 ((HasSSE1 && typeIs(TypeIdx: 0, TypesInit: s32)(Query)) ||
497 (HasSSE2 && typeIs(TypeIdx: 0, TypesInit: s64)(Query))) &&
498 (Is64Bit && typeIs(TypeIdx: 1, TypesInit: s64)(Query));
499 })
500 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32)
501 .widenScalarToNextPow2(TypeIdx: 0)
502 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: sMaxScalar)
503 .widenScalarToNextPow2(TypeIdx: 1);
504
505 getActionDefinitionsBuilder(Opcode: G_FPTOUI)
506 .legalFor(Pred: HasAVX512, Types: {{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}})
507 .customIf(Predicate: [=](const LegalityQuery &Query) {
508 return !HasAVX512 &&
509 ((HasSSE1 && typeIs(TypeIdx: 1, TypesInit: s32)(Query)) ||
510 (HasSSE2 && typeIs(TypeIdx: 1, TypesInit: s64)(Query))) &&
511 scalarNarrowerThan(TypeIdx: 0, Size: Is64Bit ? 64 : 32)(Query);
512 })
513 // TODO: replace with customized legalization using
514 // specifics of cvttsd2si. The selection of this node requires
515 // a vector type. Either G_SCALAR_TO_VECTOR is needed or more advanced
516 // support of G_BUILD_VECTOR/G_INSERT_VECTOR_ELT is required beforehand.
517 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
518 return !HasAVX512 &&
519 ((HasSSE1 && typeIs(TypeIdx: 1, TypesInit: s32)(Query)) ||
520 (HasSSE2 && typeIs(TypeIdx: 1, TypesInit: s64)(Query))) &&
521 (Is64Bit && typeIs(TypeIdx: 0, TypesInit: s64)(Query));
522 })
523 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sMaxScalar)
524 .widenScalarToNextPow2(TypeIdx: 0)
525 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32)
526 .widenScalarToNextPow2(TypeIdx: 1);
527
528 // vector ops
529 getActionDefinitionsBuilder(Opcode: G_BUILD_VECTOR)
530 .customIf(Predicate: [=](const LegalityQuery &Query) {
531 return (HasSSE1 && typeInSet(TypeIdx: 0, TypesInit: {v4s32})(Query)) ||
532 (HasSSE2 && typeInSet(TypeIdx: 0, TypesInit: {v2s64, v8s16, v16s8})(Query)) ||
533 (HasAVX && typeInSet(TypeIdx: 0, TypesInit: {v4s64, v8s32, v16s16, v32s8})(Query)) ||
534 (HasAVX512 && typeInSet(TypeIdx: 0, TypesInit: {v8s64, v16s32, v32s16, v64s8}));
535 })
536 .clampNumElements(TypeIdx: 0, MinTy: v16s8, MaxTy: s8MaxVector)
537 .clampNumElements(TypeIdx: 0, MinTy: v8s16, MaxTy: s16MaxVector)
538 .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: s32MaxVector)
539 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: s64MaxVector)
540 .moreElementsToNextPow2(TypeIdx: 0);
541
542 getActionDefinitionsBuilder(Opcodes: {G_EXTRACT, G_INSERT})
543 .legalIf(Predicate: [=](const LegalityQuery &Query) {
544 unsigned SubIdx = Query.Opcode == G_EXTRACT ? 0 : 1;
545 unsigned FullIdx = Query.Opcode == G_EXTRACT ? 1 : 0;
546 return (HasAVX && typePairInSet(TypeIdx0: SubIdx, TypeIdx1: FullIdx,
547 TypesInit: {{v16s8, v32s8},
548 {v8s16, v16s16},
549 {v4s32, v8s32},
550 {v2s64, v4s64}})(Query)) ||
551 (HasAVX512 && typePairInSet(TypeIdx0: SubIdx, TypeIdx1: FullIdx,
552 TypesInit: {{v16s8, v64s8},
553 {v32s8, v64s8},
554 {v8s16, v32s16},
555 {v16s16, v32s16},
556 {v4s32, v16s32},
557 {v8s32, v16s32},
558 {v2s64, v8s64},
559 {v4s64, v8s64}})(Query));
560 });
561
562 // todo: only permit dst types up to max legal vector register size?
563 getActionDefinitionsBuilder(Opcode: G_CONCAT_VECTORS)
564 .legalFor(
565 Pred: HasSSE1,
566 Types: {{v32s8, v16s8}, {v16s16, v8s16}, {v8s32, v4s32}, {v4s64, v2s64}})
567 .legalFor(Pred: HasAVX, Types: {{v64s8, v16s8},
568 {v64s8, v32s8},
569 {v32s16, v8s16},
570 {v32s16, v16s16},
571 {v16s32, v4s32},
572 {v16s32, v8s32},
573 {v8s64, v2s64},
574 {v8s64, v4s64}});
575
576 // todo: vectors and address spaces
577 getActionDefinitionsBuilder(Opcode: G_SELECT)
578 .legalFor(Types: {{s16, s32}, {s32, s32}, {p0, s32}})
579 .legalFor(Pred: !HasCMOV, Types: {{s8, s32}})
580 .legalFor(Pred: Is64Bit, Types: {{s64, s32}})
581 .legalFor(Pred: UseX87, Types: {{s80, s32}})
582 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32)
583 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8)
584 .clampScalar(TypeIdx: 0, MinTy: HasCMOV ? s16 : s8, MaxTy: sMaxScalar);
585
586 // memory intrinsics
587 getActionDefinitionsBuilder(Opcodes: {G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
588
589 getActionDefinitionsBuilder(Opcodes: {G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE})
590 .lower();
591
592 // fp intrinsics
593 // fpclass for i686 is disabled for llvm issue #171992
594 getActionDefinitionsBuilder(Opcode: G_IS_FPCLASS)
595 .lowerFor(Pred: Is64Bit, Types: {{s1, s32}, {s1, s64}, {s1, s80}});
596
597 getActionDefinitionsBuilder(Opcodes: {G_INTRINSIC_ROUNDEVEN, G_INTRINSIC_TRUNC})
598 .scalarize(TypeIdx: 0)
599 .minScalar(TypeIdx: 0, Ty: LLT::scalar(SizeInBits: 32))
600 .libcall();
601
602 getLegacyLegalizerInfo().computeTables();
603 verify(MII: *STI.getInstrInfo());
604}
605
606bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
607 LostDebugLocObserver &LocObserver) const {
608 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
609 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
610 switch (MI.getOpcode()) {
611 default:
612 // No idea what to do.
613 return false;
614 case TargetOpcode::G_BUILD_VECTOR:
615 return legalizeBuildVector(MI, MRI, Helper);
616 case TargetOpcode::G_FPTOUI:
617 return legalizeFPTOUI(MI, MRI, Helper);
618 case TargetOpcode::G_UITOFP:
619 return legalizeUITOFP(MI, MRI, Helper);
620 case TargetOpcode::G_STORE:
621 return legalizeNarrowingStore(MI, MRI, Helper);
622 case TargetOpcode::G_SITOFP:
623 return legalizeSITOFP(MI, MRI, Helper);
624 case TargetOpcode::G_FPTOSI:
625 return legalizeFPTOSI(MI, MRI, Helper);
626 case TargetOpcode::G_GET_ROUNDING:
627 return legalizeGETROUNDING(MI, MRI, Helper);
628 case TargetOpcode::G_SET_ROUNDING:
629 return legalizeSETROUNDING(MI, MRI, Helper);
630 }
631 llvm_unreachable("expected switch to return");
632}
633
634bool X86LegalizerInfo::legalizeSITOFP(MachineInstr &MI,
635 MachineRegisterInfo &MRI,
636 LegalizerHelper &Helper) const {
637 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
638 MachineFunction &MF = *MI.getMF();
639 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
640
641 assert((SrcTy.getSizeInBits() == 16 || SrcTy.getSizeInBits() == 32 ||
642 SrcTy.getSizeInBits() == 64) &&
643 "Unexpected source type for SITOFP in X87 mode.");
644
645 TypeSize MemSize = SrcTy.getSizeInBytes();
646 MachinePointerInfo PtrInfo;
647 Align Alignmt = Helper.getStackTemporaryAlignment(Type: SrcTy);
648 auto SlotPointer = Helper.createStackTemporary(Bytes: MemSize, Alignment: Alignmt, PtrInfo);
649 MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
650 PtrInfo, F: MachineMemOperand::MOStore, Size: MemSize, BaseAlignment: Align(MemSize));
651
652 // Store the integer value on the FPU stack.
653 MIRBuilder.buildStore(Val: Src, Addr: SlotPointer, MMO&: *StoreMMO);
654
655 MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
656 PtrInfo, F: MachineMemOperand::MOLoad, Size: MemSize, BaseAlignment: Align(MemSize));
657 MIRBuilder.buildInstr(Opcode: X86::G_FILD)
658 .addDef(RegNo: Dst)
659 .addUse(RegNo: SlotPointer.getReg(Idx: 0))
660 .addMemOperand(MMO: LoadMMO);
661
662 MI.eraseFromParent();
663 return true;
664}
665
666bool X86LegalizerInfo::legalizeFPTOSI(MachineInstr &MI,
667 MachineRegisterInfo &MRI,
668 LegalizerHelper &Helper) const {
669 MachineFunction &MF = *MI.getMF();
670 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
671 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
672
673 TypeSize MemSize = DstTy.getSizeInBytes();
674 MachinePointerInfo PtrInfo;
675 Align Alignmt = Helper.getStackTemporaryAlignment(Type: DstTy);
676 auto SlotPointer = Helper.createStackTemporary(Bytes: MemSize, Alignment: Alignmt, PtrInfo);
677 MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
678 PtrInfo, F: MachineMemOperand::MOStore, Size: MemSize, BaseAlignment: Align(MemSize));
679
680 MIRBuilder.buildInstr(Opcode: X86::G_FIST)
681 .addUse(RegNo: Src)
682 .addUse(RegNo: SlotPointer.getReg(Idx: 0))
683 .addMemOperand(MMO: StoreMMO);
684
685 MIRBuilder.buildLoad(Res: Dst, Addr: SlotPointer, PtrInfo, Alignment: Align(MemSize));
686 MI.eraseFromParent();
687 return true;
688}
689
690bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI,
691 MachineRegisterInfo &MRI,
692 LegalizerHelper &Helper) const {
693 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
694 const auto &BuildVector = cast<GBuildVector>(Val&: MI);
695 Register Dst = BuildVector.getReg(Idx: 0);
696 LLT DstTy = MRI.getType(Reg: Dst);
697 MachineFunction &MF = MIRBuilder.getMF();
698 LLVMContext &Ctx = MF.getFunction().getContext();
699 uint64_t DstTySize = DstTy.getScalarSizeInBits();
700
701 SmallVector<Constant *, 4> CstIdxs;
702 for (unsigned i = 0; i < BuildVector.getNumSources(); ++i) {
703 Register Source = BuildVector.getSourceReg(I: i);
704
705 auto ValueAndReg = getIConstantVRegValWithLookThrough(VReg: Source, MRI);
706 if (ValueAndReg) {
707 CstIdxs.emplace_back(Args: ConstantInt::get(Context&: Ctx, V: ValueAndReg->Value));
708 continue;
709 }
710
711 auto FPValueAndReg = getFConstantVRegValWithLookThrough(VReg: Source, MRI);
712 if (FPValueAndReg) {
713 CstIdxs.emplace_back(Args: ConstantFP::get(Context&: Ctx, V: FPValueAndReg->Value));
714 continue;
715 }
716
717 if (getOpcodeDef<GImplicitDef>(Reg: Source, MRI)) {
718 CstIdxs.emplace_back(Args: UndefValue::get(T: Type::getIntNTy(C&: Ctx, N: DstTySize)));
719 continue;
720 }
721 return false;
722 }
723
724 Constant *ConstVal = ConstantVector::get(V: CstIdxs);
725
726 const DataLayout &DL = MIRBuilder.getDataLayout();
727 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
728 Align Alignment(DL.getABITypeAlign(Ty: ConstVal->getType()));
729 auto Addr = MIRBuilder.buildConstantPool(
730 Res: LLT::pointer(AddressSpace: AddrSpace, SizeInBits: DL.getPointerSizeInBits(AS: AddrSpace)),
731 Idx: MF.getConstantPool()->getConstantPoolIndex(C: ConstVal, Alignment));
732 MachineMemOperand *MMO =
733 MF.getMachineMemOperand(PtrInfo: MachinePointerInfo::getConstantPool(MF),
734 f: MachineMemOperand::MOLoad, MemTy: DstTy, base_alignment: Alignment);
735
736 MIRBuilder.buildLoad(Res: Dst, Addr, MMO&: *MMO);
737 MI.eraseFromParent();
738 return true;
739}
740
741bool X86LegalizerInfo::legalizeFPTOUI(MachineInstr &MI,
742 MachineRegisterInfo &MRI,
743 LegalizerHelper &Helper) const {
744 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
745 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
746 unsigned DstSizeInBits = DstTy.getScalarSizeInBits();
747 const LLT s32 = LLT::scalar(SizeInBits: 32);
748 const LLT s64 = LLT::scalar(SizeInBits: 64);
749
750 // Simply reuse FPTOSI when it is possible to widen the type
751 if (DstSizeInBits <= 32) {
752 auto Casted = MIRBuilder.buildFPTOSI(Dst: DstTy == s32 ? s64 : s32, Src0: Src);
753 MIRBuilder.buildTrunc(Res: Dst, Op: Casted);
754 MI.eraseFromParent();
755 return true;
756 }
757
758 return false;
759}
760
761bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI,
762 MachineRegisterInfo &MRI,
763 LegalizerHelper &Helper) const {
764 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
765 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
766 const LLT s32 = LLT::scalar(SizeInBits: 32);
767 const LLT s64 = LLT::scalar(SizeInBits: 64);
768
769 // Simply reuse SITOFP when it is possible to widen the type
770 if (SrcTy.getSizeInBits() <= 32) {
771 auto Ext = MIRBuilder.buildZExt(Res: SrcTy == s32 ? s64 : s32, Op: Src);
772 MIRBuilder.buildSITOFP(Dst, Src0: Ext);
773 MI.eraseFromParent();
774 return true;
775 }
776
777 return false;
778}
779
780bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI,
781 MachineRegisterInfo &MRI,
782 LegalizerHelper &Helper) const {
783 auto &Store = cast<GStore>(Val&: MI);
784 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
785 MachineMemOperand &MMO = **Store.memoperands_begin();
786 MachineFunction &MF = MIRBuilder.getMF();
787 LLT ValTy = MRI.getType(Reg: Store.getValueReg());
788 auto *NewMMO = MF.getMachineMemOperand(MMO: &MMO, PtrInfo: MMO.getPointerInfo(), Ty: ValTy);
789
790 Helper.Observer.changingInstr(MI&: Store);
791 Store.setMemRefs(MF, MemRefs: {NewMMO});
792 Helper.Observer.changedInstr(MI&: Store);
793 return true;
794}
795
796bool X86LegalizerInfo::legalizeGETROUNDING(MachineInstr &MI,
797 MachineRegisterInfo &MRI,
798 LegalizerHelper &Helper) const {
799 /*
800 The rounding mode is in bits 11:10 of FPSR, and has the following
801 settings:
802 00 Round to nearest
803 01 Round to -inf
804 10 Round to +inf
805 11 Round to 0
806
807 GET_ROUNDING, on the other hand, expects the following:
808 -1 Undefined
809 0 Round to 0
810 1 Round to nearest
811 2 Round to +inf
812 3 Round to -inf
813
814 To perform the conversion, we use a packed lookup table of the four 2-bit
815 values that we can index by FPSP[11:10]
816 0x2d --> (0b00,10,11,01) --> (0,2,3,1) >> FPSR[11:10]
817
818 (0x2d >> ((FPSR >> 9) & 6)) & 3
819 */
820
821 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
822 MachineFunction &MF = MIRBuilder.getMF();
823 Register Dst = MI.getOperand(i: 0).getReg();
824 LLT DstTy = MRI.getType(Reg: Dst);
825 const LLT s8 = LLT::scalar(SizeInBits: 8);
826 const LLT s16 = LLT::scalar(SizeInBits: 16);
827 const LLT s32 = LLT::scalar(SizeInBits: 32);
828
829 // Save FP Control Word to stack slot
830 int MemSize = 2;
831 Align Alignment = Align(2);
832 MachinePointerInfo PtrInfo;
833 auto StackTemp = Helper.createStackTemporary(Bytes: TypeSize::getFixed(ExactSize: MemSize),
834 Alignment, PtrInfo);
835 Register StackPtr = StackTemp.getReg(Idx: 0);
836
837 auto StoreMMO = MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOStore,
838 Size: MemSize, BaseAlignment: Alignment);
839
840 // Store FP Control Word to stack slot using G_FNSTCW16
841 MIRBuilder.buildInstr(Opcode: X86::G_FNSTCW16)
842 .addUse(RegNo: StackPtr)
843 .addMemOperand(MMO: StoreMMO);
844
845 // Load FP Control Word from stack slot
846 auto LoadMMO = MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOLoad,
847 Size: MemSize, BaseAlignment: Alignment);
848
849 auto CWD32 =
850 MIRBuilder.buildZExt(Res: s32, Op: MIRBuilder.buildLoad(Res: s16, Addr: StackPtr, MMO&: *LoadMMO));
851 auto Shifted8 = MIRBuilder.buildTrunc(
852 Res: s8, Op: MIRBuilder.buildLShr(Dst: s32, Src0: CWD32, Src1: MIRBuilder.buildConstant(Res: s8, Val: 9)));
853 auto Masked32 = MIRBuilder.buildZExt(
854 Res: s32, Op: MIRBuilder.buildAnd(Dst: s8, Src0: Shifted8, Src1: MIRBuilder.buildConstant(Res: s8, Val: 6)));
855
856 // LUT is a packed lookup table (0x2d) used to map the 2-bit x87 FPU rounding
857 // mode (from bits 11:10 of the control word) to the values expected by
858 // GET_ROUNDING. The mapping is performed by shifting LUT right by the
859 // extracted rounding mode and masking the result with 3 to obtain the final
860 auto LUT = MIRBuilder.buildConstant(Res: s32, Val: 0x2d);
861 auto LUTShifted = MIRBuilder.buildLShr(Dst: s32, Src0: LUT, Src1: Masked32);
862 auto RetVal =
863 MIRBuilder.buildAnd(Dst: s32, Src0: LUTShifted, Src1: MIRBuilder.buildConstant(Res: s32, Val: 3));
864 auto RetValTrunc = MIRBuilder.buildZExtOrTrunc(Res: DstTy, Op: RetVal);
865
866 MIRBuilder.buildCopy(Res: Dst, Op: RetValTrunc);
867
868 MI.eraseFromParent();
869 return true;
870}
871
872bool X86LegalizerInfo::legalizeSETROUNDING(MachineInstr &MI,
873 MachineRegisterInfo &MRI,
874 LegalizerHelper &Helper) const {
875 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
876 MachineFunction &MF = MIRBuilder.getMF();
877 Register Src = MI.getOperand(i: 0).getReg();
878 const LLT s8 = LLT::scalar(SizeInBits: 8);
879 const LLT s16 = LLT::scalar(SizeInBits: 16);
880 const LLT s32 = LLT::scalar(SizeInBits: 32);
881
882 // Allocate stack slot for control word and MXCSR (4 bytes).
883 int MemSize = 4;
884 Align Alignment = Align(4);
885 MachinePointerInfo PtrInfo;
886 auto StackTemp = Helper.createStackTemporary(Bytes: TypeSize::getFixed(ExactSize: MemSize),
887 Alignment, PtrInfo);
888 Register StackPtr = StackTemp.getReg(Idx: 0);
889
890 auto StoreMMO =
891 MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOStore, Size: 2, BaseAlignment: Align(2));
892 MIRBuilder.buildInstr(Opcode: X86::G_FNSTCW16)
893 .addUse(RegNo: StackPtr)
894 .addMemOperand(MMO: StoreMMO);
895
896 auto LoadMMO =
897 MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOLoad, Size: 2, BaseAlignment: Align(2));
898 auto CWD16 = MIRBuilder.buildLoad(Res: s16, Addr: StackPtr, MMO&: *LoadMMO);
899
900 // Clear RM field (bits 11:10)
901 auto ClearedCWD =
902 MIRBuilder.buildAnd(Dst: s16, Src0: CWD16, Src1: MIRBuilder.buildConstant(Res: s16, Val: 0xf3ff));
903
904 // Check if Src is a constant
905 auto *SrcDef = MRI.getVRegDef(Reg: Src);
906 Register RMBits;
907 Register MXCSRRMBits;
908
909 if (SrcDef && SrcDef->getOpcode() == TargetOpcode::G_CONSTANT) {
910 uint64_t RM = getIConstantFromReg(VReg: Src, MRI).getZExtValue();
911 int FieldVal = X86::getRoundingModeX86(RM);
912
913 if (FieldVal == X86::rmInvalid) {
914 FieldVal = X86::rmToNearest;
915 LLVMContext &C = MF.getFunction().getContext();
916 C.diagnose(DI: DiagnosticInfoUnsupported(
917 MF.getFunction(), "rounding mode is not supported by X86 hardware",
918 DiagnosticLocation(MI.getDebugLoc()), DS_Error));
919 return false;
920 }
921
922 FieldVal = FieldVal << 3;
923 RMBits = MIRBuilder.buildConstant(Res: s16, Val: FieldVal).getReg(Idx: 0);
924 MXCSRRMBits = MIRBuilder.buildConstant(Res: s32, Val: FieldVal).getReg(Idx: 0);
925 } else {
926 // Convert Src (rounding mode) to bits for control word
927 // (0xc9 << (2 * Src + 4)) & 0xc00
928 auto Src32 = MIRBuilder.buildZExtOrTrunc(Res: s32, Op: Src);
929 auto ShiftAmt = MIRBuilder.buildAdd(
930 Dst: s32, Src0: MIRBuilder.buildShl(Dst: s32, Src0: Src32, Src1: MIRBuilder.buildConstant(Res: s32, Val: 1)),
931 Src1: MIRBuilder.buildConstant(Res: s32, Val: 4));
932 auto ShiftAmt8 = MIRBuilder.buildTrunc(Res: s8, Op: ShiftAmt);
933 auto Shifted = MIRBuilder.buildShl(Dst: s16, Src0: MIRBuilder.buildConstant(Res: s16, Val: 0xc9),
934 Src1: ShiftAmt8);
935 RMBits =
936 MIRBuilder.buildAnd(Dst: s16, Src0: Shifted, Src1: MIRBuilder.buildConstant(Res: s16, Val: 0xc00))
937 .getReg(Idx: 0);
938
939 // For non-constant case, we still need to compute MXCSR bits dynamically
940 auto RMBits32 = MIRBuilder.buildZExt(Res: s32, Op: RMBits);
941 MXCSRRMBits =
942 MIRBuilder.buildShl(Dst: s32, Src0: RMBits32, Src1: MIRBuilder.buildConstant(Res: s32, Val: 3))
943 .getReg(Idx: 0);
944 }
945 // Update rounding mode bits
946 auto NewCWD =
947 MIRBuilder.buildOr(Dst: s16, Src0: ClearedCWD, Src1: RMBits, Flags: MachineInstr::Disjoint);
948
949 // Store new FP Control Word to stack
950 auto StoreNewMMO =
951 MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOStore, Size: 2, BaseAlignment: Align(2));
952 MIRBuilder.buildStore(Val: NewCWD, Addr: StackPtr, MMO&: *StoreNewMMO);
953
954 // Load FP control word from the slot using G_FLDCW16
955 auto LoadNewMMO =
956 MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOLoad, Size: 2, BaseAlignment: Align(2));
957 MIRBuilder.buildInstr(Opcode: X86::G_FLDCW16)
958 .addUse(RegNo: StackPtr)
959 .addMemOperand(MMO: LoadNewMMO);
960
961 if (Subtarget.hasSSE1()) {
962 // Store MXCSR to stack (use STMXCSR)
963 auto StoreMXCSRMMO = MF.getMachineMemOperand(
964 PtrInfo, F: MachineMemOperand::MOStore, Size: 4, BaseAlignment: Align(4));
965 MIRBuilder.buildInstr(Opcode: TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
966 .addIntrinsicID(ID: Intrinsic::x86_sse_stmxcsr)
967 .addUse(RegNo: StackPtr)
968 .addMemOperand(MMO: StoreMXCSRMMO);
969
970 // Load MXCSR from stack
971 auto LoadMXCSRMMO = MF.getMachineMemOperand(
972 PtrInfo, F: MachineMemOperand::MOLoad, Size: 4, BaseAlignment: Align(4));
973 auto MXCSR = MIRBuilder.buildLoad(Res: s32, Addr: StackPtr, MMO&: *LoadMXCSRMMO);
974
975 // Clear RM field (bits 14:13)
976 auto ClearedMXCSR = MIRBuilder.buildAnd(
977 Dst: s32, Src0: MXCSR, Src1: MIRBuilder.buildConstant(Res: s32, Val: 0xffff9fff));
978
979 // Update rounding mode bits
980 auto NewMXCSR = MIRBuilder.buildOr(Dst: s32, Src0: ClearedMXCSR, Src1: MXCSRRMBits);
981
982 // Store new MXCSR to stack
983 auto StoreNewMXCSRMMO = MF.getMachineMemOperand(
984 PtrInfo, F: MachineMemOperand::MOStore, Size: 4, BaseAlignment: Align(4));
985 MIRBuilder.buildStore(Val: NewMXCSR, Addr: StackPtr, MMO&: *StoreNewMXCSRMMO);
986
987 // Load MXCSR from stack (use LDMXCSR)
988 auto LoadNewMXCSRMMO = MF.getMachineMemOperand(
989 PtrInfo, F: MachineMemOperand::MOLoad, Size: 4, BaseAlignment: Align(4));
990 MIRBuilder.buildInstr(Opcode: TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
991 .addIntrinsicID(ID: Intrinsic::x86_sse_ldmxcsr)
992 .addUse(RegNo: StackPtr)
993 .addMemOperand(MMO: LoadNewMXCSRMMO);
994 }
995
996 MI.eraseFromParent();
997 return true;
998}
999
1000bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
1001 MachineInstr &MI) const {
1002 return true;
1003}
1004