1//===- X86LegalizerInfo.cpp --------------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for X86.
10/// \todo This should be generated by TableGen.
11//===----------------------------------------------------------------------===//
12
13#include "X86LegalizerInfo.h"
14#include "X86Subtarget.h"
15#include "X86TargetMachine.h"
16#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
17#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
18#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
19#include "llvm/CodeGen/MachineConstantPool.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/TargetOpcodes.h"
22#include "llvm/CodeGen/ValueTypes.h"
23#include "llvm/IR/DerivedTypes.h"
24#include "llvm/IR/IntrinsicsX86.h"
25#include "llvm/IR/Type.h"
26
27using namespace llvm;
28using namespace TargetOpcode;
29using namespace LegalizeActions;
30using namespace LegalityPredicates;
31
32X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
33 const X86TargetMachine &TM)
34 : Subtarget(STI) {
35
36 bool Is64Bit = Subtarget.is64Bit();
37 bool HasCMOV = Subtarget.canUseCMOV();
38 bool HasSSE1 = Subtarget.hasSSE1();
39 bool HasSSE2 = Subtarget.hasSSE2();
40 bool HasSSE41 = Subtarget.hasSSE41();
41 bool HasAVX = Subtarget.hasAVX();
42 bool HasAVX2 = Subtarget.hasAVX2();
43 bool HasAVX512 = Subtarget.hasAVX512();
44 bool HasVLX = Subtarget.hasVLX();
45 bool HasDQI = Subtarget.hasAVX512() && Subtarget.hasDQI();
46 bool HasBWI = Subtarget.hasAVX512() && Subtarget.hasBWI();
47 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
48 bool HasPOPCNT = Subtarget.hasPOPCNT();
49 bool HasLZCNT = Subtarget.hasLZCNT();
50 bool HasBMI = Subtarget.hasBMI();
51
52 const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: TM.getPointerSizeInBits(AS: 0));
53 const LLT s1 = LLT::scalar(SizeInBits: 1);
54 const LLT s8 = LLT::scalar(SizeInBits: 8);
55 const LLT s16 = LLT::scalar(SizeInBits: 16);
56 const LLT s32 = LLT::scalar(SizeInBits: 32);
57 const LLT s64 = LLT::scalar(SizeInBits: 64);
58 const LLT s80 = LLT::scalar(SizeInBits: 80);
59 const LLT s128 = LLT::scalar(SizeInBits: 128);
60 const LLT sMaxScalar = Subtarget.is64Bit() ? s64 : s32;
61 const LLT v2s32 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32);
62 const LLT v4s8 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 8);
63
64 const LLT v16s8 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8);
65 const LLT v8s16 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16);
66 const LLT v4s32 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32);
67 const LLT v2s64 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64);
68 const LLT v2p0 = LLT::fixed_vector(NumElements: 2, ScalarTy: p0);
69
70 const LLT v32s8 = LLT::fixed_vector(NumElements: 32, ScalarSizeInBits: 8);
71 const LLT v16s16 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 16);
72 const LLT v8s32 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 32);
73 const LLT v4s64 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 64);
74 const LLT v4p0 = LLT::fixed_vector(NumElements: 4, ScalarTy: p0);
75
76 const LLT v64s8 = LLT::fixed_vector(NumElements: 64, ScalarSizeInBits: 8);
77 const LLT v32s16 = LLT::fixed_vector(NumElements: 32, ScalarSizeInBits: 16);
78 const LLT v16s32 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 32);
79 const LLT v8s64 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 64);
80
81 const LLT s8MaxVector = HasAVX512 ? v64s8 : HasAVX ? v32s8 : v16s8;
82 const LLT s16MaxVector = HasAVX512 ? v32s16 : HasAVX ? v16s16 : v8s16;
83 const LLT s32MaxVector = HasAVX512 ? v16s32 : HasAVX ? v8s32 : v4s32;
84 const LLT s64MaxVector = HasAVX512 ? v8s64 : HasAVX ? v4s64 : v2s64;
85
86 // todo: AVX512 bool vector predicate types
87
88 // implicit/constants
89 // 32/64-bits needs support for s64/s128 to handle cases:
90 // s64 = EXTEND (G_IMPLICIT_DEF s32) -> s64 = G_IMPLICIT_DEF
91 // s128 = EXTEND (G_IMPLICIT_DEF s32/s64) -> s128 = G_IMPLICIT_DEF
92 getActionDefinitionsBuilder(
93 Opcodes: {G_IMPLICIT_DEF, G_PHI, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
94 .legalFor(Types: {p0, s1, s8, s16, s32, s64})
95 .legalFor(Pred: UseX87, Types: {s80})
96 .legalFor(Pred: Is64Bit, Types: {s128})
97 .legalFor(Pred: HasSSE2, Types: {v16s8, v8s16, v4s32, v2s64})
98 .legalFor(Pred: HasAVX, Types: {v32s8, v16s16, v8s32, v4s64})
99 .legalFor(Pred: HasAVX512, Types: {v64s8, v32s16, v16s32, v8s64})
100 .widenScalarOrEltToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8)
101 .clampScalarOrElt(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
102 .moreElementsToNextPow2(TypeIdx: 0)
103 .clampMinNumElements(TypeIdx: 0, EltTy: s8, MinElements: 16)
104 .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8)
105 .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4)
106 .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: 2)
107 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: HasAVX512 ? 64 : (HasAVX ? 32 : 16))
108 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasAVX512 ? 32 : (HasAVX ? 16 : 8))
109 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX ? 8 : 4))
110 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: HasAVX512 ? 8 : (HasAVX ? 4 : 2))
111 .clampMaxNumElements(TypeIdx: 0, EltTy: p0,
112 MaxElements: Is64Bit ? s64MaxVector.getNumElements()
113 : s32MaxVector.getNumElements())
114 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
115
116 getActionDefinitionsBuilder(Opcode: G_CONSTANT)
117 .legalFor(Types: {p0, s8, s16, s32})
118 .legalFor(Pred: Is64Bit, Types: {s64})
119 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8)
120 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar);
121
122 getActionDefinitionsBuilder(Opcodes: {G_LROUND, G_LLROUND})
123 .widenScalarIf(Predicate: typeIs(TypeIdx: 1, TypesInit: s16),
124 Mutation: [=](const LegalityQuery &) {
125 return std::pair<unsigned, LLT>(1, s32);
126 })
127 .libcall();
128
129 getActionDefinitionsBuilder(
130 Opcodes: {G_FCOS, G_FCOSH, G_FACOS, G_FSIN, G_FSINH, G_FASIN, G_FTAN,
131 G_FTANH, G_FATAN, G_FATAN2, G_FPOW, G_FEXP, G_FEXP2, G_FEXP10,
132 G_FLOG, G_FLOG2, G_FLOG10, G_FPOWI, G_FSINCOS, G_FCEIL, G_FFLOOR})
133 .libcall();
134
135 getActionDefinitionsBuilder(Opcode: G_FSQRT)
136 .legalFor(Pred: HasSSE1 || UseX87, Types: {s32})
137 .legalFor(Pred: HasSSE2 || UseX87, Types: {s64})
138 .legalFor(Pred: UseX87, Types: {s80});
139
140 getActionDefinitionsBuilder(Opcodes: {G_GET_ROUNDING, G_SET_ROUNDING})
141 .customFor(Types: {s32});
142
143 // merge/unmerge
144 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
145 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
146 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
147 getActionDefinitionsBuilder(Opcode: Op)
148 .widenScalarToNextPow2(TypeIdx: LitTyIdx, /*Min=*/MinSize: 8)
149 .widenScalarToNextPow2(TypeIdx: BigTyIdx, /*Min=*/MinSize: 16)
150 .minScalar(TypeIdx: LitTyIdx, Ty: s8)
151 .minScalar(TypeIdx: BigTyIdx, Ty: s32)
152 .legalIf(Predicate: [=](const LegalityQuery &Q) {
153 switch (Q.Types[BigTyIdx].getSizeInBits()) {
154 case 16:
155 case 32:
156 case 64:
157 case 128:
158 case 256:
159 case 512:
160 break;
161 default:
162 return false;
163 }
164 switch (Q.Types[LitTyIdx].getSizeInBits()) {
165 case 8:
166 case 16:
167 case 32:
168 case 64:
169 case 128:
170 case 256:
171 return true;
172 default:
173 return false;
174 }
175 });
176 }
177
178 getActionDefinitionsBuilder(Opcodes: {G_UMIN, G_UMAX, G_SMIN, G_SMAX})
179 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32)
180 .lower();
181
182 // integer addition/subtraction
183 getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB})
184 .legalFor(Types: {s8, s16, s32})
185 .legalFor(Pred: Is64Bit, Types: {s64})
186 .legalFor(Pred: HasSSE2, Types: {v16s8, v8s16, v4s32, v2s64})
187 .legalFor(Pred: HasAVX2, Types: {v32s8, v16s16, v8s32, v4s64})
188 .legalFor(Pred: HasAVX512, Types: {v16s32, v8s64})
189 .legalFor(Pred: HasBWI, Types: {v64s8, v32s16})
190 .clampMinNumElements(TypeIdx: 0, EltTy: s8, MinElements: 16)
191 .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8)
192 .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4)
193 .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: 2)
194 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: HasBWI ? 64 : (HasAVX2 ? 32 : 16))
195 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasBWI ? 32 : (HasAVX2 ? 16 : 8))
196 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX2 ? 8 : 4))
197 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: HasAVX512 ? 8 : (HasAVX2 ? 4 : 2))
198 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32)
199 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
200 .scalarize(TypeIdx: 0);
201
202 getActionDefinitionsBuilder(Opcodes: {G_UADDE, G_UADDO, G_USUBE, G_USUBO})
203 .legalFor(Types: {{s8, s8}, {s16, s8}, {s32, s8}})
204 .legalFor(Pred: Is64Bit, Types: {{s64, s8}})
205 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32)
206 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
207 .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: s8)
208 .scalarize(TypeIdx: 0);
209
210 // integer multiply
211 getActionDefinitionsBuilder(Opcode: G_MUL)
212 .legalFor(Types: {s8, s16, s32})
213 .legalFor(Pred: Is64Bit, Types: {s64})
214 .legalFor(Pred: HasSSE2, Types: {v8s16})
215 .legalFor(Pred: HasSSE41, Types: {v4s32})
216 .legalFor(Pred: HasAVX2, Types: {v16s16, v8s32})
217 .legalFor(Pred: HasAVX512, Types: {v16s32})
218 .legalFor(Pred: HasDQI, Types: {v8s64})
219 .legalFor(Pred: HasDQI && HasVLX, Types: {v2s64, v4s64})
220 .legalFor(Pred: HasBWI, Types: {v32s16})
221 .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8)
222 .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4)
223 .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: HasVLX ? 2 : 8)
224 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasBWI ? 32 : (HasAVX2 ? 16 : 8))
225 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX2 ? 8 : 4))
226 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 8)
227 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32)
228 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
229 .scalarize(TypeIdx: 0);
230
231 getActionDefinitionsBuilder(Opcodes: {G_SMULH, G_UMULH})
232 .legalFor(Types: {s8, s16, s32})
233 .legalFor(Pred: Is64Bit, Types: {s64})
234 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32)
235 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
236 .scalarize(TypeIdx: 0);
237
238 // integer divisions
239 getActionDefinitionsBuilder(Opcodes: {G_SDIV, G_SREM, G_UDIV, G_UREM})
240 .legalFor(Types: {s8, s16, s32})
241 .legalFor(Pred: Is64Bit, Types: {s64})
242 .libcallFor(Types: {s64})
243 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar);
244
245 // integer shifts
246 getActionDefinitionsBuilder(Opcodes: {G_SHL, G_LSHR, G_ASHR})
247 .legalFor(Types: {{s8, s8}, {s16, s8}, {s32, s8}})
248 .legalFor(Pred: Is64Bit, Types: {{s64, s8}})
249 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
250 .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: s8);
251
252 // integer logic
253 getActionDefinitionsBuilder(Opcodes: {G_AND, G_OR, G_XOR})
254 .legalFor(Types: {s8, s16, s32})
255 .legalFor(Pred: Is64Bit, Types: {s64})
256 .legalFor(Pred: HasSSE2, Types: {v16s8, v8s16, v4s32, v2s64})
257 .legalFor(Pred: HasAVX, Types: {v32s8, v16s16, v8s32, v4s64})
258 .legalFor(Pred: HasAVX512, Types: {v64s8, v32s16, v16s32, v8s64})
259 .clampMinNumElements(TypeIdx: 0, EltTy: s8, MinElements: 16)
260 .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8)
261 .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4)
262 .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: 2)
263 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: HasAVX512 ? 64 : (HasAVX ? 32 : 16))
264 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasAVX512 ? 32 : (HasAVX ? 16 : 8))
265 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX ? 8 : 4))
266 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: HasAVX512 ? 8 : (HasAVX ? 4 : 2))
267 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32)
268 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
269 .scalarize(TypeIdx: 0);
270
271 // integer comparison
272 const std::initializer_list<LLT> IntTypes32 = {s8, s16, s32, p0};
273 const std::initializer_list<LLT> IntTypes64 = {s8, s16, s32, s64, p0};
274
275 getActionDefinitionsBuilder(Opcode: G_ICMP)
276 .legalForCartesianProduct(Types0: {s8}, Types1: Is64Bit ? IntTypes64 : IntTypes32)
277 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s8)
278 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 8)
279 .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: sMaxScalar);
280
281 // bswap
282 getActionDefinitionsBuilder(Opcode: G_BSWAP)
283 .legalFor(Types: {s32})
284 .legalFor(Pred: Is64Bit, Types: {s64})
285 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32)
286 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sMaxScalar);
287
288 // popcount
289 getActionDefinitionsBuilder(Opcode: G_CTPOP)
290 .legalFor(Pred: HasPOPCNT, Types: {{s16, s16}, {s32, s32}})
291 .legalFor(Pred: HasPOPCNT && Is64Bit, Types: {{s64, s64}})
292 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16)
293 .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar)
294 .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1);
295
296 // count leading zeros (LZCNT)
297 getActionDefinitionsBuilder(Opcode: G_CTLZ)
298 .legalFor(Pred: HasLZCNT, Types: {{s16, s16}, {s32, s32}})
299 .legalFor(Pred: HasLZCNT && Is64Bit, Types: {{s64, s64}})
300 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16)
301 .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar)
302 .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1);
303
304 // count trailing zeros
305 getActionDefinitionsBuilder(Opcode: G_CTTZ_ZERO_UNDEF)
306 .legalFor(Types: {{s16, s16}, {s32, s32}})
307 .legalFor(Pred: Is64Bit, Types: {{s64, s64}})
308 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16)
309 .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar)
310 .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1);
311
312 getActionDefinitionsBuilder(Opcode: G_CTTZ)
313 .legalFor(Pred: HasBMI, Types: {{s16, s16}, {s32, s32}})
314 .legalFor(Pred: HasBMI && Is64Bit, Types: {{s64, s64}})
315 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16)
316 .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar)
317 .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1);
318
319 getActionDefinitionsBuilder(Opcode: G_BRCOND).legalFor(Types: {s1});
320
321 // pointer handling
322 const std::initializer_list<LLT> PtrTypes32 = {s1, s8, s16, s32};
323 const std::initializer_list<LLT> PtrTypes64 = {s1, s8, s16, s32, s64};
324
325 getActionDefinitionsBuilder(Opcode: G_PTRTOINT)
326 .legalForCartesianProduct(Types0: Is64Bit ? PtrTypes64 : PtrTypes32, Types1: {p0})
327 .maxScalar(TypeIdx: 0, Ty: sMaxScalar)
328 .widenScalarToNextPow2(TypeIdx: 0, /*Min*/ MinSize: 8);
329
330 getActionDefinitionsBuilder(Opcode: G_INTTOPTR).legalFor(Types: {{p0, sMaxScalar}});
331
332 getActionDefinitionsBuilder(Opcode: G_CONSTANT_POOL).legalFor(Types: {p0});
333
334 getActionDefinitionsBuilder(Opcode: G_PTR_ADD)
335 .legalFor(Types: {{p0, s32}})
336 .legalFor(Pred: Is64Bit, Types: {{p0, s64}})
337 .widenScalarToNextPow2(TypeIdx: 1, /*Min*/ MinSize: 32)
338 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: sMaxScalar);
339
340 getActionDefinitionsBuilder(Opcodes: {G_FRAME_INDEX, G_GLOBAL_VALUE}).legalFor(Types: {p0});
341
342 // load/store: add more corner cases
343 for (unsigned Op : {G_LOAD, G_STORE}) {
344 auto &Action = getActionDefinitionsBuilder(Opcode: Op);
345 Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s8, .Align: 1},
346 {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: 1},
347 {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 1},
348 {.Type0: s80, .Type1: p0, .MemTy: s80, .Align: 1},
349 {.Type0: p0, .Type1: p0, .MemTy: p0, .Align: 1},
350 {.Type0: v4s8, .Type1: p0, .MemTy: v4s8, .Align: 1}});
351 if (Is64Bit)
352 Action.legalForTypesWithMemDesc(
353 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 1}, {.Type0: v2s32, .Type1: p0, .MemTy: v2s32, .Align: 1}});
354
355 if (HasSSE1)
356 Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v4s32, .Type1: p0, .MemTy: v4s32, .Align: 1}});
357 if (HasSSE2)
358 Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v16s8, .Type1: p0, .MemTy: v16s8, .Align: 1},
359 {.Type0: v8s16, .Type1: p0, .MemTy: v8s16, .Align: 1},
360 {.Type0: v2s64, .Type1: p0, .MemTy: v2s64, .Align: 1},
361 {.Type0: v2p0, .Type1: p0, .MemTy: v2p0, .Align: 1}});
362 if (HasAVX)
363 Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v32s8, .Type1: p0, .MemTy: v32s8, .Align: 1},
364 {.Type0: v16s16, .Type1: p0, .MemTy: v16s16, .Align: 1},
365 {.Type0: v8s32, .Type1: p0, .MemTy: v8s32, .Align: 1},
366 {.Type0: v4s64, .Type1: p0, .MemTy: v4s64, .Align: 1},
367 {.Type0: v4p0, .Type1: p0, .MemTy: v4p0, .Align: 1}});
368 if (HasAVX512)
369 Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v64s8, .Type1: p0, .MemTy: v64s8, .Align: 1},
370 {.Type0: v32s16, .Type1: p0, .MemTy: v32s16, .Align: 1},
371 {.Type0: v16s32, .Type1: p0, .MemTy: v16s32, .Align: 1},
372 {.Type0: v8s64, .Type1: p0, .MemTy: v8s64, .Align: 1}});
373
374 // X86 supports extending loads but not stores for GPRs
375 if (Op == G_LOAD) {
376 Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s1, .Align: 1},
377 {.Type0: s16, .Type1: p0, .MemTy: s8, .Align: 1},
378 {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 1},
379 {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 1}});
380 if (Is64Bit)
381 Action.legalForTypesWithMemDesc(
382 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 1}});
383 } else {
384 Action.customIf(Predicate: [=](const LegalityQuery &Query) {
385 return Query.Types[0] != Query.MMODescrs[0].MemoryTy;
386 });
387 }
388 Action.widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8)
389 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
390 .scalarize(TypeIdx: 0);
391 }
392
393 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
394 auto &Action = getActionDefinitionsBuilder(Opcode: Op);
395 Action.legalForTypesWithMemDesc(
396 TypesAndMemDesc: {{.Type0: s16, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 1}});
397 if (Is64Bit)
398 Action.legalForTypesWithMemDesc(
399 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 1}});
400 // TODO - SSE41/AVX2/AVX512F/AVX512BW vector extensions
401 }
402
403 // sext, zext, and anyext
404 getActionDefinitionsBuilder(Opcode: G_ANYEXT)
405 .legalFor(Types: {s8, s16, s32, s128})
406 .legalFor(Pred: Is64Bit, Types: {s64})
407 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8)
408 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
409 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 8)
410 .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: sMaxScalar)
411 .scalarize(TypeIdx: 0);
412
413 getActionDefinitionsBuilder(Opcodes: {G_SEXT, G_ZEXT})
414 .legalFor(Types: {s8, s16, s32})
415 .legalFor(Pred: Is64Bit, Types: {s64})
416 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8)
417 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
418 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 8)
419 .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: sMaxScalar)
420 .scalarize(TypeIdx: 0);
421
422 getActionDefinitionsBuilder(Opcode: G_SEXT_INREG).lower();
423
424 // fp constants
425 getActionDefinitionsBuilder(Opcode: G_FCONSTANT)
426 .legalFor(Types: {s32, s64})
427 .legalFor(Pred: UseX87, Types: {s80});
428
429 // fp arithmetic
430 getActionDefinitionsBuilder(Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV})
431 .legalFor(Types: {s32, s64})
432 .legalFor(Pred: HasSSE1, Types: {v4s32})
433 .legalFor(Pred: HasSSE2, Types: {v2s64})
434 .legalFor(Pred: HasAVX, Types: {v8s32, v4s64})
435 .legalFor(Pred: HasAVX512, Types: {v16s32, v8s64})
436 .legalFor(Pred: UseX87, Types: {s80});
437
438 getActionDefinitionsBuilder(Opcode: G_FABS)
439 .legalFor(Pred: UseX87, Types: {s80})
440 .legalFor(Pred: UseX87 && !Is64Bit, Types: {s64})
441 .lower();
442
443 // fp comparison
444 getActionDefinitionsBuilder(Opcode: G_FCMP)
445 .legalFor(Pred: HasSSE1 || UseX87, Types: {s8, s32})
446 .legalFor(Pred: HasSSE2 || UseX87, Types: {s8, s64})
447 .legalFor(Pred: UseX87, Types: {s8, s80})
448 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s8)
449 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32)
450 .widenScalarToNextPow2(TypeIdx: 1);
451
452 // fp conversions
453 getActionDefinitionsBuilder(Opcode: G_FPEXT)
454 .legalFor(Pred: HasSSE2, Types: {{s64, s32}})
455 .legalFor(Pred: HasAVX, Types: {{v4s64, v4s32}})
456 .legalFor(Pred: HasAVX512, Types: {{v8s64, v8s32}})
457 .libcall();
458
459 getActionDefinitionsBuilder(Opcode: G_FPTRUNC)
460 .legalFor(Pred: HasSSE2, Types: {{s32, s64}})
461 .legalFor(Pred: HasAVX, Types: {{v4s32, v4s64}})
462 .legalFor(Pred: HasAVX512, Types: {{v8s32, v8s64}});
463
464 getActionDefinitionsBuilder(Opcode: G_SITOFP)
465 .legalFor(Pred: HasSSE1, Types: {{s32, s32}})
466 .legalFor(Pred: HasSSE1 && Is64Bit, Types: {{s32, s64}})
467 .legalFor(Pred: HasSSE2, Types: {{s64, s32}})
468 .legalFor(Pred: HasSSE2 && Is64Bit, Types: {{s64, s64}})
469 .clampScalar(TypeIdx: 1, MinTy: (UseX87 && !HasSSE1) ? s16 : s32, MaxTy: sMaxScalar)
470 .widenScalarToNextPow2(TypeIdx: 1)
471 .customForCartesianProduct(Pred: UseX87, Types0: {s32, s64, s80}, Types1: {s16, s32, s64})
472 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32)
473 .widenScalarToNextPow2(TypeIdx: 0);
474
475 getActionDefinitionsBuilder(Opcode: G_FPTOSI)
476 .legalFor(Pred: HasSSE1, Types: {{s32, s32}})
477 .legalFor(Pred: HasSSE1 && Is64Bit, Types: {{s64, s32}})
478 .legalFor(Pred: HasSSE2, Types: {{s32, s64}})
479 .legalFor(Pred: HasSSE2 && Is64Bit, Types: {{s64, s64}})
480 .clampScalar(TypeIdx: 0, MinTy: (UseX87 && !HasSSE1) ? s16 : s32, MaxTy: sMaxScalar)
481 .widenScalarToNextPow2(TypeIdx: 0)
482 .customForCartesianProduct(Pred: UseX87, Types0: {s16, s32, s64}, Types1: {s32, s64, s80})
483 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32)
484 .widenScalarToNextPow2(TypeIdx: 1);
485
486 // For G_UITOFP and G_FPTOUI without AVX512, we have to custom legalize types
487 // <= s32 manually. Otherwise, in custom handler there is no way to
488 // understand whether s32 is an original type and we need to promote it to
489 // s64 or s32 is obtained after widening and we shouldn't widen it to s64.
490 //
491 // For AVX512 we simply widen types as there is direct mapping from opcodes
492 // to asm instructions.
493 getActionDefinitionsBuilder(Opcode: G_UITOFP)
494 .legalFor(Pred: HasAVX512, Types: {{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}})
495 .customIf(Predicate: [=](const LegalityQuery &Query) {
496 return !HasAVX512 &&
497 ((HasSSE1 && typeIs(TypeIdx: 0, TypesInit: s32)(Query)) ||
498 (HasSSE2 && typeIs(TypeIdx: 0, TypesInit: s64)(Query))) &&
499 scalarNarrowerThan(TypeIdx: 1, Size: Is64Bit ? 64 : 32)(Query);
500 })
501 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
502 // Lower conversions from s64
503 return !HasAVX512 &&
504 ((HasSSE1 && typeIs(TypeIdx: 0, TypesInit: s32)(Query)) ||
505 (HasSSE2 && typeIs(TypeIdx: 0, TypesInit: s64)(Query))) &&
506 (Is64Bit && typeIs(TypeIdx: 1, TypesInit: s64)(Query));
507 })
508 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32)
509 .widenScalarToNextPow2(TypeIdx: 0)
510 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: sMaxScalar)
511 .widenScalarToNextPow2(TypeIdx: 1);
512
513 getActionDefinitionsBuilder(Opcode: G_FPTOUI)
514 .legalFor(Pred: HasAVX512, Types: {{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}})
515 .customIf(Predicate: [=](const LegalityQuery &Query) {
516 return !HasAVX512 &&
517 ((HasSSE1 && typeIs(TypeIdx: 1, TypesInit: s32)(Query)) ||
518 (HasSSE2 && typeIs(TypeIdx: 1, TypesInit: s64)(Query))) &&
519 scalarNarrowerThan(TypeIdx: 0, Size: Is64Bit ? 64 : 32)(Query);
520 })
521 // TODO: replace with customized legalization using
522 // specifics of cvttsd2si. The selection of this node requires
523 // a vector type. Either G_SCALAR_TO_VECTOR is needed or more advanced
524 // support of G_BUILD_VECTOR/G_INSERT_VECTOR_ELT is required beforehand.
525 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
526 return !HasAVX512 &&
527 ((HasSSE1 && typeIs(TypeIdx: 1, TypesInit: s32)(Query)) ||
528 (HasSSE2 && typeIs(TypeIdx: 1, TypesInit: s64)(Query))) &&
529 (Is64Bit && typeIs(TypeIdx: 0, TypesInit: s64)(Query));
530 })
531 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sMaxScalar)
532 .widenScalarToNextPow2(TypeIdx: 0)
533 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32)
534 .widenScalarToNextPow2(TypeIdx: 1);
535
536 // vector ops
537 getActionDefinitionsBuilder(Opcode: G_BUILD_VECTOR)
538 .customIf(Predicate: [=](const LegalityQuery &Query) {
539 return (HasSSE1 && typeInSet(TypeIdx: 0, TypesInit: {v4s32})(Query)) ||
540 (HasSSE2 && typeInSet(TypeIdx: 0, TypesInit: {v2s64, v8s16, v16s8})(Query)) ||
541 (HasAVX && typeInSet(TypeIdx: 0, TypesInit: {v4s64, v8s32, v16s16, v32s8})(Query)) ||
542 (HasAVX512 && typeInSet(TypeIdx: 0, TypesInit: {v8s64, v16s32, v32s16, v64s8}));
543 })
544 .clampNumElements(TypeIdx: 0, MinTy: v16s8, MaxTy: s8MaxVector)
545 .clampNumElements(TypeIdx: 0, MinTy: v8s16, MaxTy: s16MaxVector)
546 .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: s32MaxVector)
547 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: s64MaxVector)
548 .moreElementsToNextPow2(TypeIdx: 0);
549
550 getActionDefinitionsBuilder(Opcodes: {G_EXTRACT, G_INSERT})
551 .legalIf(Predicate: [=](const LegalityQuery &Query) {
552 unsigned SubIdx = Query.Opcode == G_EXTRACT ? 0 : 1;
553 unsigned FullIdx = Query.Opcode == G_EXTRACT ? 1 : 0;
554 return (HasAVX && typePairInSet(TypeIdx0: SubIdx, TypeIdx1: FullIdx,
555 TypesInit: {{v16s8, v32s8},
556 {v8s16, v16s16},
557 {v4s32, v8s32},
558 {v2s64, v4s64}})(Query)) ||
559 (HasAVX512 && typePairInSet(TypeIdx0: SubIdx, TypeIdx1: FullIdx,
560 TypesInit: {{v16s8, v64s8},
561 {v32s8, v64s8},
562 {v8s16, v32s16},
563 {v16s16, v32s16},
564 {v4s32, v16s32},
565 {v8s32, v16s32},
566 {v2s64, v8s64},
567 {v4s64, v8s64}})(Query));
568 });
569
570 // todo: only permit dst types up to max legal vector register size?
571 getActionDefinitionsBuilder(Opcode: G_CONCAT_VECTORS)
572 .legalFor(
573 Pred: HasSSE1,
574 Types: {{v32s8, v16s8}, {v16s16, v8s16}, {v8s32, v4s32}, {v4s64, v2s64}})
575 .legalFor(Pred: HasAVX, Types: {{v64s8, v16s8},
576 {v64s8, v32s8},
577 {v32s16, v8s16},
578 {v32s16, v16s16},
579 {v16s32, v4s32},
580 {v16s32, v8s32},
581 {v8s64, v2s64},
582 {v8s64, v4s64}});
583
584 // todo: vectors and address spaces
585 getActionDefinitionsBuilder(Opcode: G_SELECT)
586 .legalFor(Types: {{s16, s32}, {s32, s32}, {p0, s32}})
587 .legalFor(Pred: !HasCMOV, Types: {{s8, s32}})
588 .legalFor(Pred: Is64Bit, Types: {{s64, s32}})
589 .legalFor(Pred: UseX87, Types: {{s80, s32}})
590 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32)
591 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8)
592 .clampScalar(TypeIdx: 0, MinTy: HasCMOV ? s16 : s8, MaxTy: sMaxScalar);
593
594 // memory intrinsics
595 getActionDefinitionsBuilder(Opcodes: {G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
596
597 getActionDefinitionsBuilder(Opcodes: {G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE})
598 .lower();
599
600 // fp intrinsics
601 // fpclass for i686 is disabled for llvm issue #171992
602 getActionDefinitionsBuilder(Opcode: G_IS_FPCLASS)
603 .lowerFor(Pred: Is64Bit, Types: {{s1, s32}, {s1, s64}, {s1, s80}});
604
605 getActionDefinitionsBuilder(Opcodes: {G_INTRINSIC_ROUNDEVEN, G_INTRINSIC_TRUNC})
606 .scalarize(TypeIdx: 0)
607 .minScalar(TypeIdx: 0, Ty: LLT::scalar(SizeInBits: 32))
608 .libcall();
609
610 getLegacyLegalizerInfo().computeTables();
611 verify(MII: *STI.getInstrInfo());
612}
613
614bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
615 LostDebugLocObserver &LocObserver) const {
616 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
617 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
618 switch (MI.getOpcode()) {
619 default:
620 // No idea what to do.
621 return false;
622 case TargetOpcode::G_BUILD_VECTOR:
623 return legalizeBuildVector(MI, MRI, Helper);
624 case TargetOpcode::G_FPTOUI:
625 return legalizeFPTOUI(MI, MRI, Helper);
626 case TargetOpcode::G_UITOFP:
627 return legalizeUITOFP(MI, MRI, Helper);
628 case TargetOpcode::G_STORE:
629 return legalizeNarrowingStore(MI, MRI, Helper);
630 case TargetOpcode::G_SITOFP:
631 return legalizeSITOFP(MI, MRI, Helper);
632 case TargetOpcode::G_FPTOSI:
633 return legalizeFPTOSI(MI, MRI, Helper);
634 case TargetOpcode::G_GET_ROUNDING:
635 return legalizeGETROUNDING(MI, MRI, Helper);
636 case TargetOpcode::G_SET_ROUNDING:
637 return legalizeSETROUNDING(MI, MRI, Helper);
638 }
639 llvm_unreachable("expected switch to return");
640}
641
642bool X86LegalizerInfo::legalizeSITOFP(MachineInstr &MI,
643 MachineRegisterInfo &MRI,
644 LegalizerHelper &Helper) const {
645 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
646 MachineFunction &MF = *MI.getMF();
647 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
648
649 assert((SrcTy.getSizeInBits() == 16 || SrcTy.getSizeInBits() == 32 ||
650 SrcTy.getSizeInBits() == 64) &&
651 "Unexpected source type for SITOFP in X87 mode.");
652
653 TypeSize MemSize = SrcTy.getSizeInBytes();
654 MachinePointerInfo PtrInfo;
655 Align Alignmt = Helper.getStackTemporaryAlignment(Type: SrcTy);
656 auto SlotPointer = Helper.createStackTemporary(Bytes: MemSize, Alignment: Alignmt, PtrInfo);
657 MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
658 PtrInfo, F: MachineMemOperand::MOStore, Size: MemSize, BaseAlignment: Align(MemSize));
659
660 // Store the integer value on the FPU stack.
661 MIRBuilder.buildStore(Val: Src, Addr: SlotPointer, MMO&: *StoreMMO);
662
663 MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
664 PtrInfo, F: MachineMemOperand::MOLoad, Size: MemSize, BaseAlignment: Align(MemSize));
665 MIRBuilder.buildInstr(Opcode: X86::G_FILD)
666 .addDef(RegNo: Dst)
667 .addUse(RegNo: SlotPointer.getReg(Idx: 0))
668 .addMemOperand(MMO: LoadMMO);
669
670 MI.eraseFromParent();
671 return true;
672}
673
674bool X86LegalizerInfo::legalizeFPTOSI(MachineInstr &MI,
675 MachineRegisterInfo &MRI,
676 LegalizerHelper &Helper) const {
677 MachineFunction &MF = *MI.getMF();
678 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
679 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
680
681 TypeSize MemSize = DstTy.getSizeInBytes();
682 MachinePointerInfo PtrInfo;
683 Align Alignmt = Helper.getStackTemporaryAlignment(Type: DstTy);
684 auto SlotPointer = Helper.createStackTemporary(Bytes: MemSize, Alignment: Alignmt, PtrInfo);
685 MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
686 PtrInfo, F: MachineMemOperand::MOStore, Size: MemSize, BaseAlignment: Align(MemSize));
687
688 MIRBuilder.buildInstr(Opcode: X86::G_FIST)
689 .addUse(RegNo: Src)
690 .addUse(RegNo: SlotPointer.getReg(Idx: 0))
691 .addMemOperand(MMO: StoreMMO);
692
693 MIRBuilder.buildLoad(Res: Dst, Addr: SlotPointer, PtrInfo, Alignment: Align(MemSize));
694 MI.eraseFromParent();
695 return true;
696}
697
698bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI,
699 MachineRegisterInfo &MRI,
700 LegalizerHelper &Helper) const {
701 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
702 const auto &BuildVector = cast<GBuildVector>(Val&: MI);
703 Register Dst = BuildVector.getReg(Idx: 0);
704 LLT DstTy = MRI.getType(Reg: Dst);
705 MachineFunction &MF = MIRBuilder.getMF();
706 LLVMContext &Ctx = MF.getFunction().getContext();
707 uint64_t DstTySize = DstTy.getScalarSizeInBits();
708
709 SmallVector<Constant *, 4> CstIdxs;
710 for (unsigned i = 0; i < BuildVector.getNumSources(); ++i) {
711 Register Source = BuildVector.getSourceReg(I: i);
712
713 auto ValueAndReg = getIConstantVRegValWithLookThrough(VReg: Source, MRI);
714 if (ValueAndReg) {
715 CstIdxs.emplace_back(Args: ConstantInt::get(Context&: Ctx, V: ValueAndReg->Value));
716 continue;
717 }
718
719 auto FPValueAndReg = getFConstantVRegValWithLookThrough(VReg: Source, MRI);
720 if (FPValueAndReg) {
721 CstIdxs.emplace_back(Args: ConstantFP::get(Context&: Ctx, V: FPValueAndReg->Value));
722 continue;
723 }
724
725 if (getOpcodeDef<GImplicitDef>(Reg: Source, MRI)) {
726 CstIdxs.emplace_back(Args: UndefValue::get(T: Type::getIntNTy(C&: Ctx, N: DstTySize)));
727 continue;
728 }
729 return false;
730 }
731
732 Constant *ConstVal = ConstantVector::get(V: CstIdxs);
733
734 const DataLayout &DL = MIRBuilder.getDataLayout();
735 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
736 Align Alignment(DL.getABITypeAlign(Ty: ConstVal->getType()));
737 auto Addr = MIRBuilder.buildConstantPool(
738 Res: LLT::pointer(AddressSpace: AddrSpace, SizeInBits: DL.getPointerSizeInBits(AS: AddrSpace)),
739 Idx: MF.getConstantPool()->getConstantPoolIndex(C: ConstVal, Alignment));
740 MachineMemOperand *MMO =
741 MF.getMachineMemOperand(PtrInfo: MachinePointerInfo::getConstantPool(MF),
742 f: MachineMemOperand::MOLoad, MemTy: DstTy, base_alignment: Alignment);
743
744 MIRBuilder.buildLoad(Res: Dst, Addr, MMO&: *MMO);
745 MI.eraseFromParent();
746 return true;
747}
748
749bool X86LegalizerInfo::legalizeFPTOUI(MachineInstr &MI,
750 MachineRegisterInfo &MRI,
751 LegalizerHelper &Helper) const {
752 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
753 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
754 unsigned DstSizeInBits = DstTy.getScalarSizeInBits();
755 const LLT s32 = LLT::scalar(SizeInBits: 32);
756 const LLT s64 = LLT::scalar(SizeInBits: 64);
757
758 // Simply reuse FPTOSI when it is possible to widen the type
759 if (DstSizeInBits <= 32) {
760 auto Casted = MIRBuilder.buildFPTOSI(Dst: DstTy == s32 ? s64 : s32, Src0: Src);
761 MIRBuilder.buildTrunc(Res: Dst, Op: Casted);
762 MI.eraseFromParent();
763 return true;
764 }
765
766 return false;
767}
768
769bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI,
770 MachineRegisterInfo &MRI,
771 LegalizerHelper &Helper) const {
772 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
773 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
774 const LLT s32 = LLT::scalar(SizeInBits: 32);
775 const LLT s64 = LLT::scalar(SizeInBits: 64);
776
777 // Simply reuse SITOFP when it is possible to widen the type
778 if (SrcTy.getSizeInBits() <= 32) {
779 auto Ext = MIRBuilder.buildZExt(Res: SrcTy == s32 ? s64 : s32, Op: Src);
780 MIRBuilder.buildSITOFP(Dst, Src0: Ext);
781 MI.eraseFromParent();
782 return true;
783 }
784
785 return false;
786}
787
788bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI,
789 MachineRegisterInfo &MRI,
790 LegalizerHelper &Helper) const {
791 auto &Store = cast<GStore>(Val&: MI);
792 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
793 MachineMemOperand &MMO = **Store.memoperands_begin();
794 MachineFunction &MF = MIRBuilder.getMF();
795 LLT ValTy = MRI.getType(Reg: Store.getValueReg());
796 auto *NewMMO = MF.getMachineMemOperand(MMO: &MMO, PtrInfo: MMO.getPointerInfo(), Ty: ValTy);
797
798 Helper.Observer.changingInstr(MI&: Store);
799 Store.setMemRefs(MF, MemRefs: {NewMMO});
800 Helper.Observer.changedInstr(MI&: Store);
801 return true;
802}
803
804bool X86LegalizerInfo::legalizeGETROUNDING(MachineInstr &MI,
805 MachineRegisterInfo &MRI,
806 LegalizerHelper &Helper) const {
807 /*
808 The rounding mode is in bits 11:10 of FPSR, and has the following
809 settings:
810 00 Round to nearest
811 01 Round to -inf
812 10 Round to +inf
813 11 Round to 0
814
815 GET_ROUNDING, on the other hand, expects the following:
816 -1 Undefined
817 0 Round to 0
818 1 Round to nearest
819 2 Round to +inf
820 3 Round to -inf
821
822 To perform the conversion, we use a packed lookup table of the four 2-bit
823 values that we can index by FPSP[11:10]
824 0x2d --> (0b00,10,11,01) --> (0,2,3,1) >> FPSR[11:10]
825
826 (0x2d >> ((FPSR >> 9) & 6)) & 3
827 */
828
829 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
830 MachineFunction &MF = MIRBuilder.getMF();
831 Register Dst = MI.getOperand(i: 0).getReg();
832 LLT DstTy = MRI.getType(Reg: Dst);
833 const LLT s8 = LLT::scalar(SizeInBits: 8);
834 const LLT s16 = LLT::scalar(SizeInBits: 16);
835 const LLT s32 = LLT::scalar(SizeInBits: 32);
836
837 // Save FP Control Word to stack slot
838 int MemSize = 2;
839 Align Alignment = Align(2);
840 MachinePointerInfo PtrInfo;
841 auto StackTemp = Helper.createStackTemporary(Bytes: TypeSize::getFixed(ExactSize: MemSize),
842 Alignment, PtrInfo);
843 Register StackPtr = StackTemp.getReg(Idx: 0);
844
845 auto StoreMMO = MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOStore,
846 Size: MemSize, BaseAlignment: Alignment);
847
848 // Store FP Control Word to stack slot using G_FNSTCW16
849 MIRBuilder.buildInstr(Opcode: X86::G_FNSTCW16)
850 .addUse(RegNo: StackPtr)
851 .addMemOperand(MMO: StoreMMO);
852
853 // Load FP Control Word from stack slot
854 auto LoadMMO = MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOLoad,
855 Size: MemSize, BaseAlignment: Alignment);
856
857 auto CWD32 =
858 MIRBuilder.buildZExt(Res: s32, Op: MIRBuilder.buildLoad(Res: s16, Addr: StackPtr, MMO&: *LoadMMO));
859 auto Shifted8 = MIRBuilder.buildTrunc(
860 Res: s8, Op: MIRBuilder.buildLShr(Dst: s32, Src0: CWD32, Src1: MIRBuilder.buildConstant(Res: s8, Val: 9)));
861 auto Masked32 = MIRBuilder.buildZExt(
862 Res: s32, Op: MIRBuilder.buildAnd(Dst: s8, Src0: Shifted8, Src1: MIRBuilder.buildConstant(Res: s8, Val: 6)));
863
864 // LUT is a packed lookup table (0x2d) used to map the 2-bit x87 FPU rounding
865 // mode (from bits 11:10 of the control word) to the values expected by
866 // GET_ROUNDING. The mapping is performed by shifting LUT right by the
867 // extracted rounding mode and masking the result with 3 to obtain the final
868 auto LUT = MIRBuilder.buildConstant(Res: s32, Val: 0x2d);
869 auto LUTShifted = MIRBuilder.buildLShr(Dst: s32, Src0: LUT, Src1: Masked32);
870 auto RetVal =
871 MIRBuilder.buildAnd(Dst: s32, Src0: LUTShifted, Src1: MIRBuilder.buildConstant(Res: s32, Val: 3));
872 auto RetValTrunc = MIRBuilder.buildZExtOrTrunc(Res: DstTy, Op: RetVal);
873
874 MIRBuilder.buildCopy(Res: Dst, Op: RetValTrunc);
875
876 MI.eraseFromParent();
877 return true;
878}
879
880bool X86LegalizerInfo::legalizeSETROUNDING(MachineInstr &MI,
881 MachineRegisterInfo &MRI,
882 LegalizerHelper &Helper) const {
883 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
884 MachineFunction &MF = MIRBuilder.getMF();
885 Register Src = MI.getOperand(i: 0).getReg();
886 const LLT s8 = LLT::scalar(SizeInBits: 8);
887 const LLT s16 = LLT::scalar(SizeInBits: 16);
888 const LLT s32 = LLT::scalar(SizeInBits: 32);
889
890 // Allocate stack slot for control word and MXCSR (4 bytes).
891 int MemSize = 4;
892 Align Alignment = Align(4);
893 MachinePointerInfo PtrInfo;
894 auto StackTemp = Helper.createStackTemporary(Bytes: TypeSize::getFixed(ExactSize: MemSize),
895 Alignment, PtrInfo);
896 Register StackPtr = StackTemp.getReg(Idx: 0);
897
898 auto StoreMMO =
899 MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOStore, Size: 2, BaseAlignment: Align(2));
900 MIRBuilder.buildInstr(Opcode: X86::G_FNSTCW16)
901 .addUse(RegNo: StackPtr)
902 .addMemOperand(MMO: StoreMMO);
903
904 auto LoadMMO =
905 MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOLoad, Size: 2, BaseAlignment: Align(2));
906 auto CWD16 = MIRBuilder.buildLoad(Res: s16, Addr: StackPtr, MMO&: *LoadMMO);
907
908 // Clear RM field (bits 11:10)
909 auto ClearedCWD =
910 MIRBuilder.buildAnd(Dst: s16, Src0: CWD16, Src1: MIRBuilder.buildConstant(Res: s16, Val: 0xf3ff));
911
912 // Check if Src is a constant
913 auto *SrcDef = MRI.getVRegDef(Reg: Src);
914 Register RMBits;
915 Register MXCSRRMBits;
916
917 if (SrcDef && SrcDef->getOpcode() == TargetOpcode::G_CONSTANT) {
918 uint64_t RM = getIConstantFromReg(VReg: Src, MRI).getZExtValue();
919 int FieldVal = X86::getRoundingModeX86(RM);
920
921 if (FieldVal == X86::rmInvalid) {
922 FieldVal = X86::rmToNearest;
923 LLVMContext &C = MF.getFunction().getContext();
924 C.diagnose(DI: DiagnosticInfoUnsupported(
925 MF.getFunction(), "rounding mode is not supported by X86 hardware",
926 DiagnosticLocation(MI.getDebugLoc()), DS_Error));
927 return false;
928 }
929
930 FieldVal = FieldVal << 3;
931 RMBits = MIRBuilder.buildConstant(Res: s16, Val: FieldVal).getReg(Idx: 0);
932 MXCSRRMBits = MIRBuilder.buildConstant(Res: s32, Val: FieldVal).getReg(Idx: 0);
933 } else {
934 // Convert Src (rounding mode) to bits for control word
935 // (0xc9 << (2 * Src + 4)) & 0xc00
936 auto Src32 = MIRBuilder.buildZExtOrTrunc(Res: s32, Op: Src);
937 auto ShiftAmt = MIRBuilder.buildAdd(
938 Dst: s32, Src0: MIRBuilder.buildShl(Dst: s32, Src0: Src32, Src1: MIRBuilder.buildConstant(Res: s32, Val: 1)),
939 Src1: MIRBuilder.buildConstant(Res: s32, Val: 4));
940 auto ShiftAmt8 = MIRBuilder.buildTrunc(Res: s8, Op: ShiftAmt);
941 auto Shifted = MIRBuilder.buildShl(Dst: s16, Src0: MIRBuilder.buildConstant(Res: s16, Val: 0xc9),
942 Src1: ShiftAmt8);
943 RMBits =
944 MIRBuilder.buildAnd(Dst: s16, Src0: Shifted, Src1: MIRBuilder.buildConstant(Res: s16, Val: 0xc00))
945 .getReg(Idx: 0);
946
947 // For non-constant case, we still need to compute MXCSR bits dynamically
948 auto RMBits32 = MIRBuilder.buildZExt(Res: s32, Op: RMBits);
949 MXCSRRMBits =
950 MIRBuilder.buildShl(Dst: s32, Src0: RMBits32, Src1: MIRBuilder.buildConstant(Res: s32, Val: 3))
951 .getReg(Idx: 0);
952 }
953 // Update rounding mode bits
954 auto NewCWD =
955 MIRBuilder.buildOr(Dst: s16, Src0: ClearedCWD, Src1: RMBits, Flags: MachineInstr::Disjoint);
956
957 // Store new FP Control Word to stack
958 auto StoreNewMMO =
959 MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOStore, Size: 2, BaseAlignment: Align(2));
960 MIRBuilder.buildStore(Val: NewCWD, Addr: StackPtr, MMO&: *StoreNewMMO);
961
962 // Load FP control word from the slot using G_FLDCW16
963 auto LoadNewMMO =
964 MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOLoad, Size: 2, BaseAlignment: Align(2));
965 MIRBuilder.buildInstr(Opcode: X86::G_FLDCW16)
966 .addUse(RegNo: StackPtr)
967 .addMemOperand(MMO: LoadNewMMO);
968
969 if (Subtarget.hasSSE1()) {
970 // Store MXCSR to stack (use STMXCSR)
971 auto StoreMXCSRMMO = MF.getMachineMemOperand(
972 PtrInfo, F: MachineMemOperand::MOStore, Size: 4, BaseAlignment: Align(4));
973 MIRBuilder.buildInstr(Opcode: TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
974 .addIntrinsicID(ID: Intrinsic::x86_sse_stmxcsr)
975 .addUse(RegNo: StackPtr)
976 .addMemOperand(MMO: StoreMXCSRMMO);
977
978 // Load MXCSR from stack
979 auto LoadMXCSRMMO = MF.getMachineMemOperand(
980 PtrInfo, F: MachineMemOperand::MOLoad, Size: 4, BaseAlignment: Align(4));
981 auto MXCSR = MIRBuilder.buildLoad(Res: s32, Addr: StackPtr, MMO&: *LoadMXCSRMMO);
982
983 // Clear RM field (bits 14:13)
984 auto ClearedMXCSR = MIRBuilder.buildAnd(
985 Dst: s32, Src0: MXCSR, Src1: MIRBuilder.buildConstant(Res: s32, Val: 0xffff9fff));
986
987 // Update rounding mode bits
988 auto NewMXCSR = MIRBuilder.buildOr(Dst: s32, Src0: ClearedMXCSR, Src1: MXCSRRMBits);
989
990 // Store new MXCSR to stack
991 auto StoreNewMXCSRMMO = MF.getMachineMemOperand(
992 PtrInfo, F: MachineMemOperand::MOStore, Size: 4, BaseAlignment: Align(4));
993 MIRBuilder.buildStore(Val: NewMXCSR, Addr: StackPtr, MMO&: *StoreNewMXCSRMMO);
994
995 // Load MXCSR from stack (use LDMXCSR)
996 auto LoadNewMXCSRMMO = MF.getMachineMemOperand(
997 PtrInfo, F: MachineMemOperand::MOLoad, Size: 4, BaseAlignment: Align(4));
998 MIRBuilder.buildInstr(Opcode: TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
999 .addIntrinsicID(ID: Intrinsic::x86_sse_ldmxcsr)
1000 .addUse(RegNo: StackPtr)
1001 .addMemOperand(MMO: LoadNewMXCSRMMO);
1002 }
1003
1004 MI.eraseFromParent();
1005 return true;
1006}
1007
1008bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
1009 MachineInstr &MI) const {
1010 return true;
1011}
1012