1//===- X86LegalizerInfo.cpp --------------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for X86.
10/// \todo This should be generated by TableGen.
11//===----------------------------------------------------------------------===//
12
13#include "X86LegalizerInfo.h"
14#include "X86Subtarget.h"
15#include "X86TargetMachine.h"
16#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
17#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
18#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
19#include "llvm/CodeGen/MachineConstantPool.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/TargetOpcodes.h"
22#include "llvm/CodeGen/ValueTypes.h"
23#include "llvm/IR/DerivedTypes.h"
24#include "llvm/IR/IntrinsicsX86.h"
25#include "llvm/IR/Type.h"
26
27using namespace llvm;
28using namespace TargetOpcode;
29using namespace LegalizeActions;
30using namespace LegalityPredicates;
31
32X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
33 const X86TargetMachine &TM)
34 : Subtarget(STI) {
35
36 bool Is64Bit = Subtarget.is64Bit();
37 bool HasCMOV = Subtarget.canUseCMOV();
38 bool HasSSE1 = Subtarget.hasSSE1();
39 bool HasSSE2 = Subtarget.hasSSE2();
40 bool HasSSE41 = Subtarget.hasSSE41();
41 bool HasAVX = Subtarget.hasAVX();
42 bool HasAVX2 = Subtarget.hasAVX2();
43 bool HasAVX512 = Subtarget.hasAVX512();
44 bool HasVLX = Subtarget.hasVLX();
45 bool HasDQI = Subtarget.hasAVX512() && Subtarget.hasDQI();
46 bool HasBWI = Subtarget.hasAVX512() && Subtarget.hasBWI();
47 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
48 bool HasPOPCNT = Subtarget.hasPOPCNT();
49 bool HasLZCNT = Subtarget.hasLZCNT();
50 bool HasBMI = Subtarget.hasBMI();
51
52 const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: TM.getPointerSizeInBits(AS: 0));
53 const LLT s1 = LLT::scalar(SizeInBits: 1);
54 const LLT s8 = LLT::scalar(SizeInBits: 8);
55 const LLT s16 = LLT::scalar(SizeInBits: 16);
56 const LLT s32 = LLT::scalar(SizeInBits: 32);
57 const LLT s64 = LLT::scalar(SizeInBits: 64);
58 const LLT s80 = LLT::scalar(SizeInBits: 80);
59 const LLT s128 = LLT::scalar(SizeInBits: 128);
60 const LLT sMaxScalar = Subtarget.is64Bit() ? s64 : s32;
61 const LLT v2s32 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32);
62 const LLT v4s8 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 8);
63
64 const LLT v16s8 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8);
65 const LLT v8s16 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16);
66 const LLT v4s32 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32);
67 const LLT v2s64 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64);
68 const LLT v2p0 = LLT::fixed_vector(NumElements: 2, ScalarTy: p0);
69
70 const LLT v32s8 = LLT::fixed_vector(NumElements: 32, ScalarSizeInBits: 8);
71 const LLT v16s16 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 16);
72 const LLT v8s32 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 32);
73 const LLT v4s64 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 64);
74 const LLT v4p0 = LLT::fixed_vector(NumElements: 4, ScalarTy: p0);
75
76 const LLT v64s8 = LLT::fixed_vector(NumElements: 64, ScalarSizeInBits: 8);
77 const LLT v32s16 = LLT::fixed_vector(NumElements: 32, ScalarSizeInBits: 16);
78 const LLT v16s32 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 32);
79 const LLT v8s64 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 64);
80
81 const LLT s8MaxVector = HasAVX512 ? v64s8 : HasAVX ? v32s8 : v16s8;
82 const LLT s16MaxVector = HasAVX512 ? v32s16 : HasAVX ? v16s16 : v8s16;
83 const LLT s32MaxVector = HasAVX512 ? v16s32 : HasAVX ? v8s32 : v4s32;
84 const LLT s64MaxVector = HasAVX512 ? v8s64 : HasAVX ? v4s64 : v2s64;
85
86 // todo: AVX512 bool vector predicate types
87
88 // implicit/constants
89 // 32/64-bits needs support for s64/s128 to handle cases:
90 // s64 = EXTEND (G_IMPLICIT_DEF s32) -> s64 = G_IMPLICIT_DEF
91 // s128 = EXTEND (G_IMPLICIT_DEF s32/s64) -> s128 = G_IMPLICIT_DEF
92 getActionDefinitionsBuilder(
93 Opcodes: {G_IMPLICIT_DEF, G_PHI, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
94 .legalFor(Types: {p0, s1, s8, s16, s32, s64})
95 .legalFor(Pred: UseX87, Types: {s80})
96 .legalFor(Pred: Is64Bit, Types: {s128})
97 .legalFor(Pred: HasSSE2, Types: {v16s8, v8s16, v4s32, v2s64})
98 .legalFor(Pred: HasAVX, Types: {v32s8, v16s16, v8s32, v4s64})
99 .legalFor(Pred: HasAVX512, Types: {v64s8, v32s16, v16s32, v8s64})
100 .widenScalarOrEltToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8)
101 .clampScalarOrElt(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
102 .moreElementsToNextPow2(TypeIdx: 0)
103 .clampNumElements(TypeIdx: 0, MinTy: v16s8, MaxTy: s8MaxVector)
104 .clampNumElements(TypeIdx: 0, MinTy: v8s16, MaxTy: s16MaxVector)
105 .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: s32MaxVector)
106 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: s64MaxVector)
107 .clampMaxNumElements(TypeIdx: 0, EltTy: p0,
108 MaxElements: Is64Bit ? s64MaxVector.getNumElements()
109 : s32MaxVector.getNumElements())
110 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
111
112 getActionDefinitionsBuilder(Opcode: G_CONSTANT)
113 .legalFor(Types: {p0, s8, s16, s32})
114 .legalFor(Pred: Is64Bit, Types: {s64})
115 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8)
116 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar);
117
118 getActionDefinitionsBuilder(Opcodes: {G_LROUND, G_LLROUND})
119 .widenScalarIf(Predicate: typeIs(TypeIdx: 1, TypesInit: s16),
120 Mutation: [=](const LegalityQuery &) {
121 return std::pair<unsigned, LLT>(1, s32);
122 })
123 .libcall();
124
125 getActionDefinitionsBuilder(
126 Opcodes: {G_FCOS, G_FCOSH, G_FACOS, G_FSIN, G_FSINH, G_FASIN, G_FTAN,
127 G_FTANH, G_FATAN, G_FATAN2, G_FPOW, G_FEXP, G_FEXP2, G_FEXP10,
128 G_FLOG, G_FLOG2, G_FLOG10, G_FPOWI, G_FSINCOS, G_FCEIL, G_FFLOOR})
129 .libcall();
130
131 getActionDefinitionsBuilder(Opcode: G_FNEG)
132 .legalFor(Pred: UseX87 && !HasSSE1, Types: {s32})
133 .legalFor(Pred: UseX87 && !HasSSE2, Types: {s64})
134 .legalFor(Pred: UseX87, Types: {s80})
135 .lower();
136
137 getActionDefinitionsBuilder(Opcode: G_FSQRT)
138 .legalFor(Pred: HasSSE1 || UseX87, Types: {s32})
139 .legalFor(Pred: HasSSE2 || UseX87, Types: {s64})
140 .legalFor(Pred: UseX87, Types: {s80});
141
142 getActionDefinitionsBuilder(Opcodes: {G_GET_ROUNDING, G_SET_ROUNDING})
143 .customFor(Types: {s32});
144
145 // merge/unmerge
146 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
147 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
148 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
149 getActionDefinitionsBuilder(Opcode: Op)
150 .widenScalarToNextPow2(TypeIdx: LitTyIdx, /*Min=*/MinSize: 8)
151 .widenScalarToNextPow2(TypeIdx: BigTyIdx, /*Min=*/MinSize: 16)
152 .minScalar(TypeIdx: LitTyIdx, Ty: s8)
153 .minScalar(TypeIdx: BigTyIdx, Ty: s32)
154 .legalIf(Predicate: [=](const LegalityQuery &Q) {
155 switch (Q.Types[BigTyIdx].getSizeInBits()) {
156 case 16:
157 case 32:
158 case 64:
159 case 128:
160 case 256:
161 case 512:
162 break;
163 default:
164 return false;
165 }
166 switch (Q.Types[LitTyIdx].getSizeInBits()) {
167 case 8:
168 case 16:
169 case 32:
170 case 64:
171 case 128:
172 case 256:
173 return true;
174 default:
175 return false;
176 }
177 });
178 }
179
180 getActionDefinitionsBuilder(Opcodes: {G_UMIN, G_UMAX, G_SMIN, G_SMAX})
181 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32)
182 .lower();
183
184 // integer addition/subtraction
185 getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB})
186 .legalFor(Types: {s8, s16, s32})
187 .legalFor(Pred: Is64Bit, Types: {s64})
188 .legalFor(Pred: HasSSE2, Types: {v16s8, v8s16, v4s32, v2s64})
189 .legalFor(Pred: HasAVX2, Types: {v32s8, v16s16, v8s32, v4s64})
190 .legalFor(Pred: HasAVX512, Types: {v16s32, v8s64})
191 .legalFor(Pred: HasBWI, Types: {v64s8, v32s16})
192 .clampMinNumElements(TypeIdx: 0, EltTy: s8, MinElements: 16)
193 .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8)
194 .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4)
195 .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: 2)
196 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: HasBWI ? 64 : (HasAVX2 ? 32 : 16))
197 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasBWI ? 32 : (HasAVX2 ? 16 : 8))
198 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX2 ? 8 : 4))
199 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: HasAVX512 ? 8 : (HasAVX2 ? 4 : 2))
200 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32)
201 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
202 .scalarize(TypeIdx: 0);
203
204 getActionDefinitionsBuilder(Opcodes: {G_UADDE, G_UADDO, G_USUBE, G_USUBO})
205 .legalFor(Types: {{s8, s8}, {s16, s8}, {s32, s8}})
206 .legalFor(Pred: Is64Bit, Types: {{s64, s8}})
207 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32)
208 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
209 .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: s8)
210 .scalarize(TypeIdx: 0);
211
212 // integer multiply
213 getActionDefinitionsBuilder(Opcode: G_MUL)
214 .legalFor(Types: {s8, s16, s32})
215 .legalFor(Pred: Is64Bit, Types: {s64})
216 .legalFor(Pred: HasSSE2, Types: {v8s16})
217 .legalFor(Pred: HasSSE41, Types: {v4s32})
218 .legalFor(Pred: HasAVX2, Types: {v16s16, v8s32})
219 .legalFor(Pred: HasAVX512, Types: {v16s32})
220 .legalFor(Pred: HasDQI, Types: {v8s64})
221 .legalFor(Pred: HasDQI && HasVLX, Types: {v2s64, v4s64})
222 .legalFor(Pred: HasBWI, Types: {v32s16})
223 .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 8)
224 .clampMinNumElements(TypeIdx: 0, EltTy: s32, MinElements: 4)
225 .clampMinNumElements(TypeIdx: 0, EltTy: s64, MinElements: HasVLX ? 2 : 8)
226 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: HasBWI ? 32 : (HasAVX2 ? 16 : 8))
227 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: HasAVX512 ? 16 : (HasAVX2 ? 8 : 4))
228 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 8)
229 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32)
230 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
231 .scalarize(TypeIdx: 0);
232
233 getActionDefinitionsBuilder(Opcodes: {G_SMULH, G_UMULH})
234 .legalFor(Types: {s8, s16, s32})
235 .legalFor(Pred: Is64Bit, Types: {s64})
236 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32)
237 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
238 .scalarize(TypeIdx: 0);
239
240 // integer divisions
241 getActionDefinitionsBuilder(Opcodes: {G_SDIV, G_SREM, G_UDIV, G_UREM})
242 .legalFor(Types: {s8, s16, s32})
243 .legalFor(Pred: Is64Bit, Types: {s64})
244 .libcallFor(Types: {s64})
245 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar);
246
247 // integer shifts
248 getActionDefinitionsBuilder(Opcodes: {G_SHL, G_LSHR, G_ASHR})
249 .legalFor(Types: {{s8, s8}, {s16, s8}, {s32, s8}})
250 .legalFor(Pred: Is64Bit, Types: {{s64, s8}})
251 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
252 .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: s8);
253
254 // integer logic
255 getActionDefinitionsBuilder(Opcodes: {G_AND, G_OR, G_XOR})
256 .legalFor(Types: {s8, s16, s32})
257 .legalFor(Pred: Is64Bit, Types: {s64})
258 .legalFor(Pred: HasSSE2, Types: {v16s8, v8s16, v4s32, v2s64})
259 .legalFor(Pred: HasAVX, Types: {v32s8, v16s16, v8s32, v4s64})
260 .legalFor(Pred: HasAVX512, Types: {v64s8, v32s16, v16s32, v8s64})
261 .clampNumElements(TypeIdx: 0, MinTy: v16s8, MaxTy: s8MaxVector)
262 .clampNumElements(TypeIdx: 0, MinTy: v8s16, MaxTy: s16MaxVector)
263 .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: s32MaxVector)
264 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: s64MaxVector)
265 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32)
266 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
267 .scalarize(TypeIdx: 0);
268
269 // integer comparison
270 const std::initializer_list<LLT> IntTypes32 = {s8, s16, s32, p0};
271 const std::initializer_list<LLT> IntTypes64 = {s8, s16, s32, s64, p0};
272
273 getActionDefinitionsBuilder(Opcode: G_ICMP)
274 .legalForCartesianProduct(Types0: {s8}, Types1: Is64Bit ? IntTypes64 : IntTypes32)
275 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s8)
276 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 8)
277 .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: sMaxScalar);
278
279 // bswap
280 getActionDefinitionsBuilder(Opcode: G_BSWAP)
281 .legalFor(Types: {s32})
282 .legalFor(Pred: Is64Bit, Types: {s64})
283 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 32)
284 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sMaxScalar);
285
286 // popcount
287 getActionDefinitionsBuilder(Opcode: G_CTPOP)
288 .legalFor(Pred: HasPOPCNT, Types: {{s16, s16}, {s32, s32}})
289 .legalFor(Pred: HasPOPCNT && Is64Bit, Types: {{s64, s64}})
290 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16)
291 .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar)
292 .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1);
293
294 // count leading zeros (LZCNT)
295 getActionDefinitionsBuilder(Opcode: G_CTLZ)
296 .legalFor(Pred: HasLZCNT, Types: {{s16, s16}, {s32, s32}})
297 .legalFor(Pred: HasLZCNT && Is64Bit, Types: {{s64, s64}})
298 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16)
299 .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar)
300 .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1);
301
302 // count trailing zeros
303 getActionDefinitionsBuilder(Opcode: G_CTTZ_ZERO_POISON)
304 .legalFor(Types: {{s16, s16}, {s32, s32}})
305 .legalFor(Pred: Is64Bit, Types: {{s64, s64}})
306 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16)
307 .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar)
308 .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1);
309
310 getActionDefinitionsBuilder(Opcode: G_CTTZ)
311 .legalFor(Pred: HasBMI, Types: {{s16, s16}, {s32, s32}})
312 .legalFor(Pred: HasBMI && Is64Bit, Types: {{s64, s64}})
313 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 16)
314 .clampScalar(TypeIdx: 1, MinTy: s16, MaxTy: sMaxScalar)
315 .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1);
316
317 getActionDefinitionsBuilder(Opcode: G_BR).alwaysLegal();
318 getActionDefinitionsBuilder(Opcode: G_BRCOND).legalFor(Types: {s1});
319
320 // pointer handling
321 const std::initializer_list<LLT> PtrTypes32 = {s1, s8, s16, s32};
322 const std::initializer_list<LLT> PtrTypes64 = {s1, s8, s16, s32, s64};
323
324 getActionDefinitionsBuilder(Opcode: G_PTRTOINT)
325 .legalForCartesianProduct(Types0: Is64Bit ? PtrTypes64 : PtrTypes32, Types1: {p0})
326 .maxScalar(TypeIdx: 0, Ty: sMaxScalar)
327 .widenScalarToNextPow2(TypeIdx: 0, /*Min*/ MinSize: 8);
328
329 getActionDefinitionsBuilder(Opcode: G_INTTOPTR).legalFor(Types: {{p0, sMaxScalar}});
330
331 getActionDefinitionsBuilder(Opcode: G_CONSTANT_POOL).legalFor(Types: {p0});
332
333 getActionDefinitionsBuilder(Opcode: G_PTR_ADD)
334 .legalFor(Types: {{p0, s32}})
335 .legalFor(Pred: Is64Bit, Types: {{p0, s64}})
336 .widenScalarToNextPow2(TypeIdx: 1, /*Min*/ MinSize: 32)
337 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: sMaxScalar);
338
339 getActionDefinitionsBuilder(Opcode: G_FRAME_INDEX).legalFor(Types: {p0});
340
341 getActionDefinitionsBuilder(Opcode: G_GLOBAL_VALUE).customFor(Types: {p0});
342
343 // load/store: add more corner cases
344 for (unsigned Op : {G_LOAD, G_STORE}) {
345 auto &Action = getActionDefinitionsBuilder(Opcode: Op);
346 Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s8, .Align: 1},
347 {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: 1},
348 {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 1},
349 {.Type0: s80, .Type1: p0, .MemTy: s80, .Align: 1},
350 {.Type0: p0, .Type1: p0, .MemTy: p0, .Align: 1},
351 {.Type0: v4s8, .Type1: p0, .MemTy: v4s8, .Align: 1}});
352 if (Is64Bit)
353 Action.legalForTypesWithMemDesc(
354 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 1}, {.Type0: v2s32, .Type1: p0, .MemTy: v2s32, .Align: 1}});
355
356 if (HasSSE1)
357 Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v4s32, .Type1: p0, .MemTy: v4s32, .Align: 1}});
358 if (HasSSE2)
359 Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v16s8, .Type1: p0, .MemTy: v16s8, .Align: 1},
360 {.Type0: v8s16, .Type1: p0, .MemTy: v8s16, .Align: 1},
361 {.Type0: v2s64, .Type1: p0, .MemTy: v2s64, .Align: 1},
362 {.Type0: v2p0, .Type1: p0, .MemTy: v2p0, .Align: 1}});
363 if (HasAVX)
364 Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v32s8, .Type1: p0, .MemTy: v32s8, .Align: 1},
365 {.Type0: v16s16, .Type1: p0, .MemTy: v16s16, .Align: 1},
366 {.Type0: v8s32, .Type1: p0, .MemTy: v8s32, .Align: 1},
367 {.Type0: v4s64, .Type1: p0, .MemTy: v4s64, .Align: 1},
368 {.Type0: v4p0, .Type1: p0, .MemTy: v4p0, .Align: 1}});
369 if (HasAVX512)
370 Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: v64s8, .Type1: p0, .MemTy: v64s8, .Align: 1},
371 {.Type0: v32s16, .Type1: p0, .MemTy: v32s16, .Align: 1},
372 {.Type0: v16s32, .Type1: p0, .MemTy: v16s32, .Align: 1},
373 {.Type0: v8s64, .Type1: p0, .MemTy: v8s64, .Align: 1}});
374
375 // X86 supports extending loads but not stores for GPRs
376 if (Op == G_LOAD) {
377 Action.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s1, .Align: 1},
378 {.Type0: s16, .Type1: p0, .MemTy: s8, .Align: 1},
379 {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 1},
380 {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 1}});
381 if (Is64Bit)
382 Action.legalForTypesWithMemDesc(
383 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 1}});
384 } else {
385 Action.customIf(Predicate: [=](const LegalityQuery &Query) {
386 return Query.Types[0] != Query.MMODescrs[0].MemoryTy;
387 });
388 }
389 Action.widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8)
390 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
391 .scalarize(TypeIdx: 0);
392 }
393
394 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
395 auto &Action = getActionDefinitionsBuilder(Opcode: Op);
396 Action.legalForTypesWithMemDesc(
397 TypesAndMemDesc: {{.Type0: s16, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 1}});
398 if (Is64Bit)
399 Action.legalForTypesWithMemDesc(
400 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 1}});
401 // TODO - SSE41/AVX2/AVX512F/AVX512BW vector extensions
402 }
403
404 for (unsigned Op : {G_FPEXTLOAD, G_FPTRUNCSTORE}) {
405 auto &Action = getActionDefinitionsBuilder(Opcode: Op);
406 Action.legalForTypesWithMemDesc(
407 Pred: UseX87, TypesAndMemDesc: {{.Type0: s80, .Type1: p0, .MemTy: s32, .Align: 1}, {.Type0: s80, .Type1: p0, .MemTy: s64, .Align: 1}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 1}});
408 }
409
410 // sext, zext, and anyext
411 getActionDefinitionsBuilder(Opcode: G_ANYEXT)
412 .legalFor(Types: {s8, s16, s32, s128})
413 .legalFor(Pred: Is64Bit, Types: {s64})
414 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8)
415 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
416 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 8)
417 .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: sMaxScalar)
418 .scalarize(TypeIdx: 0);
419
420 getActionDefinitionsBuilder(Opcodes: {G_SEXT, G_ZEXT})
421 .legalFor(Types: {s8, s16, s32})
422 .legalFor(Pred: Is64Bit, Types: {s64})
423 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8)
424 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sMaxScalar)
425 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 8)
426 .clampScalar(TypeIdx: 1, MinTy: s8, MaxTy: sMaxScalar)
427 .scalarize(TypeIdx: 0);
428
429 getActionDefinitionsBuilder(Opcode: G_TRUNC).legalForCartesianProduct(
430 Types0: {s1, s8, s16, s32, s64}, Types1: {s8, s16, s32, s64, s128});
431
432 getActionDefinitionsBuilder(Opcode: G_SEXT_INREG).lower();
433
434 // fp constants
435 getActionDefinitionsBuilder(Opcode: G_FCONSTANT)
436 .legalFor(Types: {s32, s64})
437 .legalFor(Pred: UseX87, Types: {s80});
438
439 // fp arithmetic
440 getActionDefinitionsBuilder(Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV})
441 .legalFor(Types: {s32, s64})
442 .legalFor(Pred: HasSSE1, Types: {v4s32})
443 .legalFor(Pred: HasSSE2, Types: {v2s64})
444 .legalFor(Pred: HasAVX, Types: {v8s32, v4s64})
445 .legalFor(Pred: HasAVX512, Types: {v16s32, v8s64})
446 .legalFor(Pred: UseX87, Types: {s80});
447
448 getActionDefinitionsBuilder(Opcode: G_FABS)
449 .legalFor(Pred: UseX87, Types: {s80})
450 .legalFor(Pred: UseX87 && !Is64Bit, Types: {s64})
451 .lower();
452
453 // fp comparison
454 getActionDefinitionsBuilder(Opcode: G_FCMP)
455 .legalFor(Pred: HasSSE1 || UseX87, Types: {s8, s32})
456 .legalFor(Pred: HasSSE2 || UseX87, Types: {s8, s64})
457 .legalFor(Pred: UseX87, Types: {s8, s80})
458 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s8)
459 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32)
460 .widenScalarToNextPow2(TypeIdx: 1);
461
462 // fp conversions
463 getActionDefinitionsBuilder(Opcode: G_FPEXT)
464 .legalFor(Pred: HasSSE2, Types: {{s64, s32}})
465 .legalFor(Pred: HasAVX, Types: {{v4s64, v4s32}})
466 .legalFor(Pred: HasAVX512, Types: {{v8s64, v8s32}})
467 .lowerFor(Pred: UseX87, Types: {{s64, s32}, {s80, s32}, {s80, s64}})
468 .libcall();
469
470 getActionDefinitionsBuilder(Opcode: G_FPTRUNC)
471 .legalFor(Pred: HasSSE2, Types: {{s32, s64}})
472 .legalFor(Pred: HasAVX, Types: {{v4s32, v4s64}})
473 .legalFor(Pred: HasAVX512, Types: {{v8s32, v8s64}})
474 .lowerFor(Pred: UseX87, Types: {{s32, s64}, {s32, s80}, {s64, s80}});
475
476 getActionDefinitionsBuilder(Opcode: G_SITOFP)
477 .legalFor(Pred: HasSSE1, Types: {{s32, s32}})
478 .legalFor(Pred: HasSSE1 && Is64Bit, Types: {{s32, s64}})
479 .legalFor(Pred: HasSSE2, Types: {{s64, s32}})
480 .legalFor(Pred: HasSSE2 && Is64Bit, Types: {{s64, s64}})
481 .clampScalar(TypeIdx: 1, MinTy: (UseX87 && !HasSSE1) ? s16 : s32, MaxTy: sMaxScalar)
482 .widenScalarToNextPow2(TypeIdx: 1)
483 .customForCartesianProduct(Pred: UseX87, Types0: {s32, s64, s80}, Types1: {s16, s32, s64})
484 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32)
485 .widenScalarToNextPow2(TypeIdx: 0);
486
487 getActionDefinitionsBuilder(Opcode: G_FPTOSI)
488 .legalFor(Pred: HasSSE1, Types: {{s32, s32}})
489 .legalFor(Pred: HasSSE1 && Is64Bit, Types: {{s64, s32}})
490 .legalFor(Pred: HasSSE2, Types: {{s32, s64}})
491 .legalFor(Pred: HasSSE2 && Is64Bit, Types: {{s64, s64}})
492 .clampScalar(TypeIdx: 0, MinTy: (UseX87 && !HasSSE1) ? s16 : s32, MaxTy: sMaxScalar)
493 .widenScalarToNextPow2(TypeIdx: 0)
494 .customForCartesianProduct(Pred: UseX87, Types0: {s16, s32, s64}, Types1: {s32, s64, s80})
495 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32)
496 .widenScalarToNextPow2(TypeIdx: 1);
497
498 // For G_UITOFP and G_FPTOUI without AVX512, we have to custom legalize types
499 // <= s32 manually. Otherwise, in custom handler there is no way to
500 // understand whether s32 is an original type and we need to promote it to
501 // s64 or s32 is obtained after widening and we shouldn't widen it to s64.
502 //
503 // For AVX512 we simply widen types as there is direct mapping from opcodes
504 // to asm instructions.
505 getActionDefinitionsBuilder(Opcode: G_UITOFP)
506 .legalFor(Pred: HasAVX512, Types: {{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}})
507 .customIf(Predicate: [=](const LegalityQuery &Query) {
508 return !HasAVX512 &&
509 ((HasSSE1 && typeIs(TypeIdx: 0, TypesInit: s32)(Query)) ||
510 (HasSSE2 && typeIs(TypeIdx: 0, TypesInit: s64)(Query))) &&
511 scalarNarrowerThan(TypeIdx: 1, Size: Is64Bit ? 64 : 32)(Query);
512 })
513 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
514 // Lower conversions from s64
515 return !HasAVX512 &&
516 ((HasSSE1 && typeIs(TypeIdx: 0, TypesInit: s32)(Query)) ||
517 (HasSSE2 && typeIs(TypeIdx: 0, TypesInit: s64)(Query))) &&
518 (Is64Bit && typeIs(TypeIdx: 1, TypesInit: s64)(Query));
519 })
520 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32)
521 .widenScalarToNextPow2(TypeIdx: 0)
522 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: sMaxScalar)
523 .widenScalarToNextPow2(TypeIdx: 1);
524
525 getActionDefinitionsBuilder(Opcode: G_FPTOUI)
526 .legalFor(Pred: HasAVX512, Types: {{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}})
527 .customIf(Predicate: [=](const LegalityQuery &Query) {
528 return !HasAVX512 &&
529 ((HasSSE1 && typeIs(TypeIdx: 1, TypesInit: s32)(Query)) ||
530 (HasSSE2 && typeIs(TypeIdx: 1, TypesInit: s64)(Query))) &&
531 scalarNarrowerThan(TypeIdx: 0, Size: Is64Bit ? 64 : 32)(Query);
532 })
533 // TODO: replace with customized legalization using
534 // specifics of cvttsd2si. The selection of this node requires
535 // a vector type. Either G_SCALAR_TO_VECTOR is needed or more advanced
536 // support of G_BUILD_VECTOR/G_INSERT_VECTOR_ELT is required beforehand.
537 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
538 return !HasAVX512 &&
539 ((HasSSE1 && typeIs(TypeIdx: 1, TypesInit: s32)(Query)) ||
540 (HasSSE2 && typeIs(TypeIdx: 1, TypesInit: s64)(Query))) &&
541 (Is64Bit && typeIs(TypeIdx: 0, TypesInit: s64)(Query));
542 })
543 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sMaxScalar)
544 .widenScalarToNextPow2(TypeIdx: 0)
545 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: HasSSE2 ? s64 : s32)
546 .widenScalarToNextPow2(TypeIdx: 1);
547
548 // vector ops
549 getActionDefinitionsBuilder(Opcode: G_BUILD_VECTOR)
550 .customIf(Predicate: [=](const LegalityQuery &Query) {
551 return (HasSSE1 && typeInSet(TypeIdx: 0, TypesInit: {v4s32})(Query)) ||
552 (HasSSE2 && typeInSet(TypeIdx: 0, TypesInit: {v2s64, v8s16, v16s8})(Query)) ||
553 (HasAVX && typeInSet(TypeIdx: 0, TypesInit: {v4s64, v8s32, v16s16, v32s8})(Query)) ||
554 (HasAVX512 &&
555 typeInSet(TypeIdx: 0, TypesInit: {v8s64, v16s32, v32s16, v64s8})(Query));
556 })
557 .clampNumElements(TypeIdx: 0, MinTy: v16s8, MaxTy: s8MaxVector)
558 .clampNumElements(TypeIdx: 0, MinTy: v8s16, MaxTy: s16MaxVector)
559 .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: s32MaxVector)
560 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: s64MaxVector)
561 .moreElementsToNextPow2(TypeIdx: 0);
562
563 getActionDefinitionsBuilder(Opcodes: {G_EXTRACT, G_INSERT})
564 .legalIf(Predicate: [=](const LegalityQuery &Query) {
565 unsigned SubIdx = Query.Opcode == G_EXTRACT ? 0 : 1;
566 unsigned FullIdx = Query.Opcode == G_EXTRACT ? 1 : 0;
567 return (HasAVX && typePairInSet(TypeIdx0: SubIdx, TypeIdx1: FullIdx,
568 TypesInit: {{v16s8, v32s8},
569 {v8s16, v16s16},
570 {v4s32, v8s32},
571 {v2s64, v4s64}})(Query)) ||
572 (HasAVX512 && typePairInSet(TypeIdx0: SubIdx, TypeIdx1: FullIdx,
573 TypesInit: {{v16s8, v64s8},
574 {v32s8, v64s8},
575 {v8s16, v32s16},
576 {v16s16, v32s16},
577 {v4s32, v16s32},
578 {v8s32, v16s32},
579 {v2s64, v8s64},
580 {v4s64, v8s64}})(Query));
581 });
582
583 // todo: only permit dst types up to max legal vector register size?
584 getActionDefinitionsBuilder(Opcode: G_CONCAT_VECTORS)
585 .legalFor(
586 Pred: HasSSE1,
587 Types: {{v32s8, v16s8}, {v16s16, v8s16}, {v8s32, v4s32}, {v4s64, v2s64}})
588 .legalFor(Pred: HasAVX, Types: {{v64s8, v16s8},
589 {v64s8, v32s8},
590 {v32s16, v8s16},
591 {v32s16, v16s16},
592 {v16s32, v4s32},
593 {v16s32, v8s32},
594 {v8s64, v2s64},
595 {v8s64, v4s64}});
596
597 // todo: vectors and address spaces
598 getActionDefinitionsBuilder(Opcode: G_SELECT)
599 .legalFor(Types: {{s16, s32}, {s32, s32}, {p0, s32}})
600 .legalFor(Pred: !HasCMOV, Types: {{s8, s32}})
601 .legalFor(Pred: Is64Bit, Types: {{s64, s32}})
602 .legalFor(Pred: UseX87, Types: {{s80, s32}})
603 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32)
604 .widenScalarToNextPow2(TypeIdx: 0, /*Min=*/MinSize: 8)
605 .clampScalar(TypeIdx: 0, MinTy: HasCMOV ? s16 : s8, MaxTy: sMaxScalar);
606
607 // memory intrinsics
608 getActionDefinitionsBuilder(Opcodes: {G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
609
610 getActionDefinitionsBuilder(Opcodes: {G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE})
611 .lower();
612
613 // fp intrinsics
614 // fpclass for i686 is disabled for llvm issue #171992
615 getActionDefinitionsBuilder(Opcode: G_IS_FPCLASS)
616 .lowerFor(Pred: Is64Bit, Types: {{s1, s32}, {s1, s64}, {s1, s80}});
617
618 getActionDefinitionsBuilder(Opcodes: {G_INTRINSIC_ROUNDEVEN, G_INTRINSIC_TRUNC})
619 .scalarize(TypeIdx: 0)
620 .minScalar(TypeIdx: 0, Ty: LLT::scalar(SizeInBits: 32))
621 .libcall();
622
623 getActionDefinitionsBuilder(Opcodes: {G_INTRINSIC, G_INTRINSIC_W_SIDE_EFFECTS})
624 .alwaysLegal();
625 getActionDefinitionsBuilder(Opcodes: {G_TRAP, G_DEBUGTRAP, G_UBSANTRAP}).alwaysLegal();
626 getActionDefinitionsBuilder(Opcode: G_INVOKE_REGION_START).alwaysLegal();
627
628 getLegacyLegalizerInfo().computeTables();
629 verify(MII: *STI.getInstrInfo());
630}
631
632bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
633 LostDebugLocObserver &LocObserver) const {
634 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
635 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
636 switch (MI.getOpcode()) {
637 default:
638 // No idea what to do.
639 return false;
640 case TargetOpcode::G_BUILD_VECTOR:
641 return legalizeBuildVector(MI, MRI, Helper);
642 case TargetOpcode::G_FPTOUI:
643 return legalizeFPTOUI(MI, MRI, Helper);
644 case TargetOpcode::G_UITOFP:
645 return legalizeUITOFP(MI, MRI, Helper);
646 case TargetOpcode::G_STORE:
647 return legalizeNarrowingStore(MI, MRI, Helper);
648 case TargetOpcode::G_SITOFP:
649 return legalizeSITOFP(MI, MRI, Helper);
650 case TargetOpcode::G_FPTOSI:
651 return legalizeFPTOSI(MI, MRI, Helper);
652 case TargetOpcode::G_GET_ROUNDING:
653 return legalizeGETROUNDING(MI, MRI, Helper);
654 case TargetOpcode::G_SET_ROUNDING:
655 return legalizeSETROUNDING(MI, MRI, Helper);
656 case TargetOpcode::G_GLOBAL_VALUE:
657 return legalizeGLOBAL_VALUE(MI, MRI, Helper);
658 }
659 llvm_unreachable("expected switch to return");
660}
661
662bool X86LegalizerInfo::legalizeSITOFP(MachineInstr &MI,
663 MachineRegisterInfo &MRI,
664 LegalizerHelper &Helper) const {
665 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
666 MachineFunction &MF = *MI.getMF();
667 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
668
669 assert((SrcTy.getSizeInBits() == 16 || SrcTy.getSizeInBits() == 32 ||
670 SrcTy.getSizeInBits() == 64) &&
671 "Unexpected source type for SITOFP in X87 mode.");
672
673 TypeSize MemSize = SrcTy.getSizeInBytes();
674 MachinePointerInfo PtrInfo;
675 Align Alignmt = Helper.getStackTemporaryAlignment(Type: SrcTy);
676 auto SlotPointer = Helper.createStackTemporary(Bytes: MemSize, Alignment: Alignmt, PtrInfo);
677 MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
678 PtrInfo, F: MachineMemOperand::MOStore, Size: MemSize, BaseAlignment: Align(MemSize));
679
680 // Store the integer value on the FPU stack.
681 MIRBuilder.buildStore(Val: Src, Addr: SlotPointer, MMO&: *StoreMMO);
682
683 MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
684 PtrInfo, F: MachineMemOperand::MOLoad, Size: MemSize, BaseAlignment: Align(MemSize));
685 MIRBuilder.buildInstr(Opcode: X86::G_FILD)
686 .addDef(RegNo: Dst)
687 .addUse(RegNo: SlotPointer.getReg(Idx: 0))
688 .addMemOperand(MMO: LoadMMO);
689
690 MI.eraseFromParent();
691 return true;
692}
693
694bool X86LegalizerInfo::legalizeFPTOSI(MachineInstr &MI,
695 MachineRegisterInfo &MRI,
696 LegalizerHelper &Helper) const {
697 MachineFunction &MF = *MI.getMF();
698 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
699 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
700
701 TypeSize MemSize = DstTy.getSizeInBytes();
702 MachinePointerInfo PtrInfo;
703 Align Alignmt = Helper.getStackTemporaryAlignment(Type: DstTy);
704 auto SlotPointer = Helper.createStackTemporary(Bytes: MemSize, Alignment: Alignmt, PtrInfo);
705 MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
706 PtrInfo, F: MachineMemOperand::MOStore, Size: MemSize, BaseAlignment: Align(MemSize));
707
708 MIRBuilder.buildInstr(Opcode: X86::G_FIST)
709 .addUse(RegNo: Src)
710 .addUse(RegNo: SlotPointer.getReg(Idx: 0))
711 .addMemOperand(MMO: StoreMMO);
712
713 MIRBuilder.buildLoad(Res: Dst, Addr: SlotPointer, PtrInfo, Alignment: Align(MemSize));
714 MI.eraseFromParent();
715 return true;
716}
717
718bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI,
719 MachineRegisterInfo &MRI,
720 LegalizerHelper &Helper) const {
721 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
722 const auto &BuildVector = cast<GBuildVector>(Val&: MI);
723 Register Dst = BuildVector.getReg(Idx: 0);
724 LLT DstTy = MRI.getType(Reg: Dst);
725 MachineFunction &MF = MIRBuilder.getMF();
726 LLVMContext &Ctx = MF.getFunction().getContext();
727 uint64_t DstTySize = DstTy.getScalarSizeInBits();
728
729 SmallVector<Constant *, 4> CstIdxs;
730 for (unsigned i = 0; i < BuildVector.getNumSources(); ++i) {
731 Register Source = BuildVector.getSourceReg(I: i);
732
733 auto ValueAndReg = getIConstantVRegValWithLookThrough(VReg: Source, MRI);
734 if (ValueAndReg) {
735 CstIdxs.emplace_back(Args: ConstantInt::get(Context&: Ctx, V: ValueAndReg->Value));
736 continue;
737 }
738
739 auto FPValueAndReg = getFConstantVRegValWithLookThrough(VReg: Source, MRI);
740 if (FPValueAndReg) {
741 CstIdxs.emplace_back(Args: ConstantFP::get(Context&: Ctx, V: FPValueAndReg->Value));
742 continue;
743 }
744
745 if (getOpcodeDef<GImplicitDef>(Reg: Source, MRI)) {
746 CstIdxs.emplace_back(Args: UndefValue::get(T: Type::getIntNTy(C&: Ctx, N: DstTySize)));
747 continue;
748 }
749 return false;
750 }
751
752 Constant *ConstVal = ConstantVector::get(V: CstIdxs);
753
754 const DataLayout &DL = MIRBuilder.getDataLayout();
755 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
756 Align Alignment(DL.getABITypeAlign(Ty: ConstVal->getType()));
757 auto Addr = MIRBuilder.buildConstantPool(
758 Res: LLT::pointer(AddressSpace: AddrSpace, SizeInBits: DL.getPointerSizeInBits(AS: AddrSpace)),
759 Idx: MF.getConstantPool()->getConstantPoolIndex(C: ConstVal, Alignment));
760 MachineMemOperand *MMO =
761 MF.getMachineMemOperand(PtrInfo: MachinePointerInfo::getConstantPool(MF),
762 f: MachineMemOperand::MOLoad, MemTy: DstTy, base_alignment: Alignment);
763
764 MIRBuilder.buildLoad(Res: Dst, Addr, MMO&: *MMO);
765 MI.eraseFromParent();
766 return true;
767}
768
769bool X86LegalizerInfo::legalizeFPTOUI(MachineInstr &MI,
770 MachineRegisterInfo &MRI,
771 LegalizerHelper &Helper) const {
772 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
773 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
774 unsigned DstSizeInBits = DstTy.getScalarSizeInBits();
775 const LLT s32 = LLT::scalar(SizeInBits: 32);
776 const LLT s64 = LLT::scalar(SizeInBits: 64);
777
778 // Simply reuse FPTOSI when it is possible to widen the type
779 if (DstSizeInBits <= 32) {
780 auto Casted = MIRBuilder.buildFPTOSI(Dst: DstTy == s32 ? s64 : s32, Src0: Src);
781 MIRBuilder.buildTrunc(Res: Dst, Op: Casted);
782 MI.eraseFromParent();
783 return true;
784 }
785
786 return false;
787}
788
789bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI,
790 MachineRegisterInfo &MRI,
791 LegalizerHelper &Helper) const {
792 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
793 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
794 const LLT s32 = LLT::scalar(SizeInBits: 32);
795 const LLT s64 = LLT::scalar(SizeInBits: 64);
796
797 // Simply reuse SITOFP when it is possible to widen the type
798 if (SrcTy.getSizeInBits() <= 32) {
799 auto Ext = MIRBuilder.buildZExt(Res: SrcTy == s32 ? s64 : s32, Op: Src);
800 MIRBuilder.buildSITOFP(Dst, Src0: Ext);
801 MI.eraseFromParent();
802 return true;
803 }
804
805 return false;
806}
807
808bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI,
809 MachineRegisterInfo &MRI,
810 LegalizerHelper &Helper) const {
811 auto &Store = cast<GStore>(Val&: MI);
812 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
813 MachineMemOperand &MMO = **Store.memoperands_begin();
814 MachineFunction &MF = MIRBuilder.getMF();
815 LLT ValTy = MRI.getType(Reg: Store.getValueReg());
816 auto *NewMMO = MF.getMachineMemOperand(MMO: &MMO, PtrInfo: MMO.getPointerInfo(), Ty: ValTy);
817
818 Helper.Observer.changingInstr(MI&: Store);
819 Store.setMemRefs(MF, MemRefs: {NewMMO});
820 Helper.Observer.changedInstr(MI&: Store);
821 return true;
822}
823
824bool X86LegalizerInfo::legalizeGETROUNDING(MachineInstr &MI,
825 MachineRegisterInfo &MRI,
826 LegalizerHelper &Helper) const {
827 /*
828 The rounding mode is in bits 11:10 of FPSR, and has the following
829 settings:
830 00 Round to nearest
831 01 Round to -inf
832 10 Round to +inf
833 11 Round to 0
834
835 GET_ROUNDING, on the other hand, expects the following:
836 -1 Undefined
837 0 Round to 0
838 1 Round to nearest
839 2 Round to +inf
840 3 Round to -inf
841
842 To perform the conversion, we use a packed lookup table of the four 2-bit
843 values that we can index by FPSP[11:10]
844 0x2d --> (0b00,10,11,01) --> (0,2,3,1) >> FPSR[11:10]
845
846 (0x2d >> ((FPSR >> 9) & 6)) & 3
847 */
848
849 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
850 MachineFunction &MF = MIRBuilder.getMF();
851 Register Dst = MI.getOperand(i: 0).getReg();
852 LLT DstTy = MRI.getType(Reg: Dst);
853 const LLT s8 = LLT::scalar(SizeInBits: 8);
854 const LLT s16 = LLT::scalar(SizeInBits: 16);
855 const LLT s32 = LLT::scalar(SizeInBits: 32);
856
857 // Save FP Control Word to stack slot
858 int MemSize = 2;
859 Align Alignment = Align(2);
860 MachinePointerInfo PtrInfo;
861 auto StackTemp = Helper.createStackTemporary(Bytes: TypeSize::getFixed(ExactSize: MemSize),
862 Alignment, PtrInfo);
863 Register StackPtr = StackTemp.getReg(Idx: 0);
864
865 auto StoreMMO = MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOStore,
866 Size: MemSize, BaseAlignment: Alignment);
867
868 // Store FP Control Word to stack slot using G_FNSTCW16
869 MIRBuilder.buildInstr(Opcode: X86::G_FNSTCW16)
870 .addUse(RegNo: StackPtr)
871 .addMemOperand(MMO: StoreMMO);
872
873 // Load FP Control Word from stack slot
874 auto LoadMMO = MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOLoad,
875 Size: MemSize, BaseAlignment: Alignment);
876
877 auto CWD32 =
878 MIRBuilder.buildZExt(Res: s32, Op: MIRBuilder.buildLoad(Res: s16, Addr: StackPtr, MMO&: *LoadMMO));
879 auto Shifted8 = MIRBuilder.buildTrunc(
880 Res: s8, Op: MIRBuilder.buildLShr(Dst: s32, Src0: CWD32, Src1: MIRBuilder.buildConstant(Res: s8, Val: 9)));
881 auto Masked32 = MIRBuilder.buildZExt(
882 Res: s32, Op: MIRBuilder.buildAnd(Dst: s8, Src0: Shifted8, Src1: MIRBuilder.buildConstant(Res: s8, Val: 6)));
883
884 // LUT is a packed lookup table (0x2d) used to map the 2-bit x87 FPU rounding
885 // mode (from bits 11:10 of the control word) to the values expected by
886 // GET_ROUNDING. The mapping is performed by shifting LUT right by the
887 // extracted rounding mode and masking the result with 3 to obtain the final
888 auto LUT = MIRBuilder.buildConstant(Res: s32, Val: 0x2d);
889 auto LUTShifted = MIRBuilder.buildLShr(Dst: s32, Src0: LUT, Src1: Masked32);
890 auto RetVal =
891 MIRBuilder.buildAnd(Dst: s32, Src0: LUTShifted, Src1: MIRBuilder.buildConstant(Res: s32, Val: 3));
892 auto RetValTrunc = MIRBuilder.buildZExtOrTrunc(Res: DstTy, Op: RetVal);
893
894 MIRBuilder.buildCopy(Res: Dst, Op: RetValTrunc);
895
896 MI.eraseFromParent();
897 return true;
898}
899
900bool X86LegalizerInfo::legalizeSETROUNDING(MachineInstr &MI,
901 MachineRegisterInfo &MRI,
902 LegalizerHelper &Helper) const {
903 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
904 MachineFunction &MF = MIRBuilder.getMF();
905 Register Src = MI.getOperand(i: 0).getReg();
906 const LLT s8 = LLT::scalar(SizeInBits: 8);
907 const LLT s16 = LLT::scalar(SizeInBits: 16);
908 const LLT s32 = LLT::scalar(SizeInBits: 32);
909
910 // Allocate stack slot for control word and MXCSR (4 bytes).
911 int MemSize = 4;
912 Align Alignment = Align(4);
913 MachinePointerInfo PtrInfo;
914 auto StackTemp = Helper.createStackTemporary(Bytes: TypeSize::getFixed(ExactSize: MemSize),
915 Alignment, PtrInfo);
916 Register StackPtr = StackTemp.getReg(Idx: 0);
917
918 auto StoreMMO =
919 MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOStore, Size: 2, BaseAlignment: Align(2));
920 MIRBuilder.buildInstr(Opcode: X86::G_FNSTCW16)
921 .addUse(RegNo: StackPtr)
922 .addMemOperand(MMO: StoreMMO);
923
924 auto LoadMMO =
925 MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOLoad, Size: 2, BaseAlignment: Align(2));
926 auto CWD16 = MIRBuilder.buildLoad(Res: s16, Addr: StackPtr, MMO&: *LoadMMO);
927
928 // Clear RM field (bits 11:10)
929 auto ClearedCWD =
930 MIRBuilder.buildAnd(Dst: s16, Src0: CWD16, Src1: MIRBuilder.buildConstant(Res: s16, Val: 0xf3ff));
931
932 // Check if Src is a constant
933 auto *SrcDef = MRI.getVRegDef(Reg: Src);
934 Register RMBits;
935 Register MXCSRRMBits;
936
937 if (SrcDef && SrcDef->getOpcode() == TargetOpcode::G_CONSTANT) {
938 uint64_t RM = getIConstantFromReg(VReg: Src, MRI).getZExtValue();
939 int FieldVal = X86::getRoundingModeX86(RM);
940
941 if (FieldVal == X86::rmInvalid) {
942 FieldVal = X86::rmToNearest;
943 LLVMContext &C = MF.getFunction().getContext();
944 C.diagnose(DI: DiagnosticInfoUnsupported(
945 MF.getFunction(), "rounding mode is not supported by X86 hardware",
946 DiagnosticLocation(MI.getDebugLoc()), DS_Error));
947 return false;
948 }
949
950 FieldVal = FieldVal << 3;
951 RMBits = MIRBuilder.buildConstant(Res: s16, Val: FieldVal).getReg(Idx: 0);
952 MXCSRRMBits = MIRBuilder.buildConstant(Res: s32, Val: FieldVal).getReg(Idx: 0);
953 } else {
954 // Convert Src (rounding mode) to bits for control word
955 // (0xc9 << (2 * Src + 4)) & 0xc00
956 auto Src32 = MIRBuilder.buildZExtOrTrunc(Res: s32, Op: Src);
957 auto ShiftAmt = MIRBuilder.buildAdd(
958 Dst: s32, Src0: MIRBuilder.buildShl(Dst: s32, Src0: Src32, Src1: MIRBuilder.buildConstant(Res: s32, Val: 1)),
959 Src1: MIRBuilder.buildConstant(Res: s32, Val: 4));
960 auto ShiftAmt8 = MIRBuilder.buildTrunc(Res: s8, Op: ShiftAmt);
961 auto Shifted = MIRBuilder.buildShl(Dst: s16, Src0: MIRBuilder.buildConstant(Res: s16, Val: 0xc9),
962 Src1: ShiftAmt8);
963 RMBits =
964 MIRBuilder.buildAnd(Dst: s16, Src0: Shifted, Src1: MIRBuilder.buildConstant(Res: s16, Val: 0xc00))
965 .getReg(Idx: 0);
966
967 // For non-constant case, we still need to compute MXCSR bits dynamically
968 auto RMBits32 = MIRBuilder.buildZExt(Res: s32, Op: RMBits);
969 MXCSRRMBits =
970 MIRBuilder.buildShl(Dst: s32, Src0: RMBits32, Src1: MIRBuilder.buildConstant(Res: s32, Val: 3))
971 .getReg(Idx: 0);
972 }
973 // Update rounding mode bits
974 auto NewCWD =
975 MIRBuilder.buildOr(Dst: s16, Src0: ClearedCWD, Src1: RMBits, Flags: MachineInstr::Disjoint);
976
977 // Store new FP Control Word to stack
978 auto StoreNewMMO =
979 MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOStore, Size: 2, BaseAlignment: Align(2));
980 MIRBuilder.buildStore(Val: NewCWD, Addr: StackPtr, MMO&: *StoreNewMMO);
981
982 // Load FP control word from the slot using G_FLDCW16
983 auto LoadNewMMO =
984 MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOLoad, Size: 2, BaseAlignment: Align(2));
985 MIRBuilder.buildInstr(Opcode: X86::G_FLDCW16)
986 .addUse(RegNo: StackPtr)
987 .addMemOperand(MMO: LoadNewMMO);
988
989 if (Subtarget.hasSSE1()) {
990 // Store MXCSR to stack (use STMXCSR)
991 auto StoreMXCSRMMO = MF.getMachineMemOperand(
992 PtrInfo, F: MachineMemOperand::MOStore, Size: 4, BaseAlignment: Align(4));
993 MIRBuilder.buildInstr(Opcode: TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
994 .addIntrinsicID(ID: Intrinsic::x86_sse_stmxcsr)
995 .addUse(RegNo: StackPtr)
996 .addMemOperand(MMO: StoreMXCSRMMO);
997
998 // Load MXCSR from stack
999 auto LoadMXCSRMMO = MF.getMachineMemOperand(
1000 PtrInfo, F: MachineMemOperand::MOLoad, Size: 4, BaseAlignment: Align(4));
1001 auto MXCSR = MIRBuilder.buildLoad(Res: s32, Addr: StackPtr, MMO&: *LoadMXCSRMMO);
1002
1003 // Clear RM field (bits 14:13)
1004 auto ClearedMXCSR = MIRBuilder.buildAnd(
1005 Dst: s32, Src0: MXCSR, Src1: MIRBuilder.buildConstant(Res: s32, Val: 0xffff9fff));
1006
1007 // Update rounding mode bits
1008 auto NewMXCSR = MIRBuilder.buildOr(Dst: s32, Src0: ClearedMXCSR, Src1: MXCSRRMBits);
1009
1010 // Store new MXCSR to stack
1011 auto StoreNewMXCSRMMO = MF.getMachineMemOperand(
1012 PtrInfo, F: MachineMemOperand::MOStore, Size: 4, BaseAlignment: Align(4));
1013 MIRBuilder.buildStore(Val: NewMXCSR, Addr: StackPtr, MMO&: *StoreNewMXCSRMMO);
1014
1015 // Load MXCSR from stack (use LDMXCSR)
1016 auto LoadNewMXCSRMMO = MF.getMachineMemOperand(
1017 PtrInfo, F: MachineMemOperand::MOLoad, Size: 4, BaseAlignment: Align(4));
1018 MIRBuilder.buildInstr(Opcode: TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
1019 .addIntrinsicID(ID: Intrinsic::x86_sse_ldmxcsr)
1020 .addUse(RegNo: StackPtr)
1021 .addMemOperand(MMO: LoadNewMXCSRMMO);
1022 }
1023
1024 MI.eraseFromParent();
1025 return true;
1026}
1027
1028bool X86LegalizerInfo::legalizeGLOBAL_VALUE(MachineInstr &MI,
1029 MachineRegisterInfo &MRI,
1030 LegalizerHelper &Helper) const {
1031 const GlobalValue *GV = MI.getOperand(i: 1).getGlobal();
1032 Register Dst = MI.getOperand(i: 0).getReg();
1033 LLT DstTy = MRI.getType(Reg: Dst);
1034 unsigned GVOpFlags = Subtarget.classifyGlobalReference(GV);
1035
1036 // For stub references (GOT/PLT), we need G_WRAPPER_RIP + load
1037 if (isGlobalStubReference(TargetFlag: GVOpFlags)) {
1038 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1039 MachineFunction &MF = MIRBuilder.getMF();
1040
1041 Register StubAddr = MRI.createGenericVirtualRegister(Ty: DstTy);
1042 MIRBuilder.buildInstr(Opcode: X86::G_WRAPPER_RIP)
1043 .addDef(RegNo: StubAddr)
1044 .addGlobalAddress(GV);
1045
1046 MachineMemOperand *MMO = MF.getMachineMemOperand(
1047 PtrInfo: MachinePointerInfo::getGOT(MF), f: MachineMemOperand::MOLoad, MemTy: DstTy,
1048 base_alignment: Align(DstTy.getSizeInBytes()));
1049 MIRBuilder.buildLoad(Res: Dst, Addr: StubAddr, MMO&: *MMO);
1050 MI.eraseFromParent();
1051 }
1052 return true;
1053}
1054
1055bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
1056 MachineInstr &MI) const {
1057 return true;
1058}
1059