1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64LegalizerInfo.h"
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
18#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
19#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
20#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
21#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
22#include "llvm/CodeGen/GlobalISel/Utils.h"
23#include "llvm/CodeGen/MachineInstr.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/CodeGen/TargetOpcodes.h"
26#include "llvm/IR/DerivedTypes.h"
27#include "llvm/IR/Intrinsics.h"
28#include "llvm/IR/IntrinsicsAArch64.h"
29#include "llvm/IR/Type.h"
30#include "llvm/Support/MathExtras.h"
31#include <initializer_list>
32
33#define DEBUG_TYPE "aarch64-legalinfo"
34
35using namespace llvm;
36using namespace LegalizeActions;
37using namespace LegalizeMutations;
38using namespace LegalityPredicates;
39using namespace MIPatternMatch;
40
41AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
42 : ST(&ST) {
43 using namespace TargetOpcode;
44 const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64);
45 const LLT s8 = LLT::scalar(SizeInBits: 8);
46 const LLT s16 = LLT::scalar(SizeInBits: 16);
47 const LLT s32 = LLT::scalar(SizeInBits: 32);
48 const LLT s64 = LLT::scalar(SizeInBits: 64);
49 const LLT s128 = LLT::scalar(SizeInBits: 128);
50 const LLT v16s8 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8);
51 const LLT v8s8 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8);
52 const LLT v4s8 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 8);
53 const LLT v2s8 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 8);
54 const LLT v8s16 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16);
55 const LLT v4s16 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16);
56 const LLT v2s16 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16);
57 const LLT v2s32 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32);
58 const LLT v4s32 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32);
59 const LLT v2s64 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64);
60 const LLT v2p0 = LLT::fixed_vector(NumElements: 2, ScalarTy: p0);
61
62 const LLT nxv16s8 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: s8);
63 const LLT nxv8s16 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s16);
64 const LLT nxv4s32 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s32);
65 const LLT nxv2s64 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s64);
66
67 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
68 v16s8, v8s16, v4s32,
69 v2s64, v2p0,
70 /* End 128bit types */
71 /* Begin 64bit types */
72 v8s8, v4s16, v2s32};
73 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
74 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
75 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
76
77 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
78
79 // FIXME: support subtargets which have neon/fp-armv8 disabled.
80 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
81 getLegacyLegalizerInfo().computeTables();
82 return;
83 }
84
85 // Some instructions only support s16 if the subtarget has full 16-bit FP
86 // support.
87 const bool HasFP16 = ST.hasFullFP16();
88 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
89
90 const bool HasCSSC = ST.hasCSSC();
91 const bool HasRCPC3 = ST.hasRCPC3();
92 const bool HasSVE = ST.hasSVE();
93
94 getActionDefinitionsBuilder(
95 Opcodes: {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
96 .legalFor(Types: {p0, s8, s16, s32, s64})
97 .legalFor(Types: {v2s8, v4s8, v8s8, v16s8, v2s16, v4s16, v8s16, v2s32, v4s32,
98 v2s64, v2p0})
99 .widenScalarToNextPow2(TypeIdx: 0)
100 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64)
101 .moreElementsToNextPow2(TypeIdx: 0)
102 .widenVectorEltsToVectorMinSize(TypeIdx: 0, VectorSize: 64)
103 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
104 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
105 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
106 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
107 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2)
108 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
109
110 getActionDefinitionsBuilder(Opcode: G_PHI)
111 .legalFor(Types: {p0, s16, s32, s64})
112 .legalFor(Types: PackedVectorAllTypeList)
113 .widenScalarToNextPow2(TypeIdx: 0)
114 .moreElementsToNextPow2(TypeIdx: 0)
115 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
116 .clampScalar(TypeIdx: 0, MinTy: s16, MaxTy: s64)
117 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
118 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
119 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
120 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
121 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2);
122
123 getActionDefinitionsBuilder(Opcode: G_INSERT)
124 .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s32, s64, p0}), P1: typeInSet(TypeIdx: 1, TypesInit: {s8, s16, s32}),
125 args: smallerThan(TypeIdx0: 1, TypeIdx1: 0)))
126 .widenScalarToNextPow2(TypeIdx: 0)
127 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
128 .widenScalarToNextPow2(TypeIdx: 1)
129 .minScalar(TypeIdx: 1, Ty: s8)
130 .maxScalarIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {s32}), TypeIdx: 1, Ty: s16)
131 .maxScalarIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {s64, p0}), TypeIdx: 1, Ty: s32);
132
133 getActionDefinitionsBuilder(Opcode: G_EXTRACT)
134 .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s16, s32, s64, p0}),
135 P1: typeInSet(TypeIdx: 1, TypesInit: {s32, s64, s128, p0}), args: smallerThan(TypeIdx0: 0, TypeIdx1: 1)))
136 .widenScalarToNextPow2(TypeIdx: 1)
137 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s128)
138 .widenScalarToNextPow2(TypeIdx: 0)
139 .minScalar(TypeIdx: 0, Ty: s16)
140 .maxScalarIf(Predicate: typeInSet(TypeIdx: 1, TypesInit: {s32}), TypeIdx: 0, Ty: s16)
141 .maxScalarIf(Predicate: typeInSet(TypeIdx: 1, TypesInit: {s64, p0}), TypeIdx: 0, Ty: s32)
142 .maxScalarIf(Predicate: typeInSet(TypeIdx: 1, TypesInit: {s128}), TypeIdx: 0, Ty: s64);
143
144 getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB, G_AND, G_OR, G_XOR})
145 .legalFor(Types: {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
146 .legalFor(Pred: HasSVE, Types: {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
147 .widenScalarToNextPow2(TypeIdx: 0)
148 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
149 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 16)
150 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 8)
151 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
152 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
153 .minScalarOrEltIf(
154 Predicate: [=](const LegalityQuery &Query) {
155 return Query.Types[0].getNumElements() <= 2;
156 },
157 TypeIdx: 0, Ty: s32)
158 .minScalarOrEltIf(
159 Predicate: [=](const LegalityQuery &Query) {
160 return Query.Types[0].getNumElements() <= 4;
161 },
162 TypeIdx: 0, Ty: s16)
163 .minScalarOrEltIf(
164 Predicate: [=](const LegalityQuery &Query) {
165 return Query.Types[0].getNumElements() <= 16;
166 },
167 TypeIdx: 0, Ty: s8)
168 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
169 .moreElementsToNextPow2(TypeIdx: 0);
170
171 getActionDefinitionsBuilder(Opcode: G_MUL)
172 .legalFor(Types: {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
173 .widenScalarToNextPow2(TypeIdx: 0)
174 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
175 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 16)
176 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 8)
177 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
178 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
179 .minScalarOrEltIf(
180 Predicate: [=](const LegalityQuery &Query) {
181 return Query.Types[0].getNumElements() <= 2;
182 },
183 TypeIdx: 0, Ty: s32)
184 .minScalarOrEltIf(
185 Predicate: [=](const LegalityQuery &Query) {
186 return Query.Types[0].getNumElements() <= 4;
187 },
188 TypeIdx: 0, Ty: s16)
189 .minScalarOrEltIf(
190 Predicate: [=](const LegalityQuery &Query) {
191 return Query.Types[0].getNumElements() <= 16;
192 },
193 TypeIdx: 0, Ty: s8)
194 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
195 .moreElementsToNextPow2(TypeIdx: 0);
196
197 getActionDefinitionsBuilder(Opcodes: {G_SHL, G_ASHR, G_LSHR})
198 .customIf(Predicate: [=](const LegalityQuery &Query) {
199 const auto &SrcTy = Query.Types[0];
200 const auto &AmtTy = Query.Types[1];
201 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
202 AmtTy.getSizeInBits() == 32;
203 })
204 .legalFor(Types: {
205 {s32, s32},
206 {s32, s64},
207 {s64, s64},
208 {v8s8, v8s8},
209 {v16s8, v16s8},
210 {v4s16, v4s16},
211 {v8s16, v8s16},
212 {v2s32, v2s32},
213 {v4s32, v4s32},
214 {v2s64, v2s64},
215 })
216 .widenScalarToNextPow2(TypeIdx: 0)
217 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
218 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
219 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
220 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
221 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
222 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
223 .moreElementsToNextPow2(TypeIdx: 0)
224 .minScalarSameAs(TypeIdx: 1, LargeTypeIdx: 0)
225 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
226
227 getActionDefinitionsBuilder(Opcode: G_PTR_ADD)
228 .legalFor(Types: {{p0, s64}, {v2p0, v2s64}})
229 .clampScalarOrElt(TypeIdx: 1, MinTy: s64, MaxTy: s64)
230 .clampNumElements(TypeIdx: 0, MinTy: v2p0, MaxTy: v2p0);
231
232 getActionDefinitionsBuilder(Opcode: G_PTRMASK).legalFor(Types: {{p0, s64}});
233
234 getActionDefinitionsBuilder(Opcodes: {G_SDIV, G_UDIV})
235 .legalFor(Types: {s32, s64})
236 .libcallFor(Types: {s128})
237 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
238 .widenScalarToNextPow2(TypeIdx: 0)
239 .scalarize(TypeIdx: 0);
240
241 getActionDefinitionsBuilder(Opcodes: {G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
242 .lowerFor(Types: {s8, s16, s32, s64, v2s32, v4s32, v2s64})
243 .libcallFor(Types: {s128})
244 .widenScalarOrEltToNextPow2(TypeIdx: 0)
245 .minScalarOrElt(TypeIdx: 0, Ty: s32)
246 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
247 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
248 .scalarize(TypeIdx: 0);
249
250 getActionDefinitionsBuilder(Opcodes: {G_SMULO, G_UMULO})
251 .widenScalarToNextPow2(TypeIdx: 0, /*Min = */ MinSize: 32)
252 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
253 .lower();
254
255 getActionDefinitionsBuilder(Opcodes: {G_SMULH, G_UMULH})
256 .legalFor(Types: {s64, v16s8, v8s16, v4s32})
257 .lower();
258
259 getActionDefinitionsBuilder(Opcodes: {G_SMIN, G_SMAX, G_UMIN, G_UMAX})
260 .legalFor(Types: {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
261 .legalFor(Pred: HasCSSC, Types: {s32, s64})
262 .minScalar(Pred: HasCSSC, TypeIdx: 0, Ty: s32)
263 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
264 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
265 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
266 .lower();
267
268 // FIXME: Legal vector types are only legal with NEON.
269 getActionDefinitionsBuilder(Opcode: G_ABS)
270 .legalFor(Pred: HasCSSC, Types: {s32, s64})
271 .legalFor(Types: PackedVectorAllTypeList)
272 .customIf(Predicate: [=](const LegalityQuery &Q) {
273 // TODO: Fix suboptimal codegen for 128+ bit types.
274 LLT SrcTy = Q.Types[0];
275 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
276 })
277 .widenScalarIf(
278 Predicate: [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
279 Mutation: [=](const LegalityQuery &Query) { return std::make_pair(x: 0, y: v4s16); })
280 .widenScalarIf(
281 Predicate: [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
282 Mutation: [=](const LegalityQuery &Query) { return std::make_pair(x: 0, y: v2s32); })
283 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
284 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
285 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
286 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
287 .moreElementsToNextPow2(TypeIdx: 0)
288 .lower();
289
290 getActionDefinitionsBuilder(
291 Opcodes: {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
292 .legalFor(Types: {{s32, s32}, {s64, s32}})
293 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
294 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
295 .widenScalarToNextPow2(TypeIdx: 0);
296
297 getActionDefinitionsBuilder(Opcodes: {G_FSHL, G_FSHR})
298 .customFor(Types: {{s32, s32}, {s32, s64}, {s64, s64}})
299 .lower();
300
301 getActionDefinitionsBuilder(Opcode: G_ROTR)
302 .legalFor(Types: {{s32, s64}, {s64, s64}})
303 .customIf(Predicate: [=](const LegalityQuery &Q) {
304 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
305 })
306 .lower();
307 getActionDefinitionsBuilder(Opcode: G_ROTL).lower();
308
309 getActionDefinitionsBuilder(Opcodes: {G_SBFX, G_UBFX})
310 .customFor(Types: {{s32, s32}, {s64, s64}});
311
312 auto always = [=](const LegalityQuery &Q) { return true; };
313 getActionDefinitionsBuilder(Opcode: G_CTPOP)
314 .legalFor(Pred: HasCSSC, Types: {{s32, s32}, {s64, s64}})
315 .legalFor(Types: {{v8s8, v8s8}, {v16s8, v16s8}})
316 .customFor(Pred: !HasCSSC, Types: {{s32, s32}, {s64, s64}})
317 .customFor(Types: {{s128, s128},
318 {v4s16, v4s16},
319 {v8s16, v8s16},
320 {v2s32, v2s32},
321 {v4s32, v4s32},
322 {v2s64, v2s64}})
323 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s128)
324 .widenScalarToNextPow2(TypeIdx: 0)
325 .minScalarEltSameAsIf(Predicate: always, TypeIdx: 1, LargeTypeIdx: 0)
326 .maxScalarEltSameAsIf(Predicate: always, TypeIdx: 1, SmallTypeIdx: 0)
327 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
328 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
329 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
330 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
331 .moreElementsToNextPow2(TypeIdx: 0)
332 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
333
334 getActionDefinitionsBuilder(Opcode: G_CTLZ)
335 .legalFor(Types: {{s32, s32},
336 {s64, s64},
337 {v8s8, v8s8},
338 {v16s8, v16s8},
339 {v4s16, v4s16},
340 {v8s16, v8s16},
341 {v2s32, v2s32},
342 {v4s32, v4s32}})
343 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 32)
344 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
345 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
346 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
347 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
348 .moreElementsToNextPow2(TypeIdx: 0)
349 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 32), TypeIdx: 0)
350 .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1);
351
352 getActionDefinitionsBuilder(Opcode: G_CTLZ_ZERO_UNDEF).lower();
353
354 getActionDefinitionsBuilder(Opcode: G_CTTZ)
355 .lowerIf(Predicate: isVector(TypeIdx: 0))
356 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 32)
357 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
358 .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1)
359 .legalFor(Pred: HasCSSC, Types: {s32, s64})
360 .customFor(Pred: !HasCSSC, Types: {s32, s64});
361
362 getActionDefinitionsBuilder(Opcode: G_CTTZ_ZERO_UNDEF).lower();
363
364 getActionDefinitionsBuilder(Opcode: G_BITREVERSE)
365 .legalFor(Types: {s32, s64, v8s8, v16s8})
366 .widenScalarToNextPow2(TypeIdx: 0, /*Min = */ MinSize: 32)
367 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 0, MinSize: 8)
368 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
369 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
370 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
371 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
372 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
373 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
374 .moreElementsToNextPow2(TypeIdx: 0)
375 .lower();
376
377 getActionDefinitionsBuilder(Opcode: G_BSWAP)
378 .legalFor(Types: {s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
379 .widenScalarOrEltToNextPow2(TypeIdx: 0, MinSize: 16)
380 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
381 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
382 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
383 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
384 .moreElementsToNextPow2(TypeIdx: 0);
385
386 getActionDefinitionsBuilder(Opcodes: {G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
387 .legalFor(Types: {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
388 .legalFor(Pred: HasSVE, Types: {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
389 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
390 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
391 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
392 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
393 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
394 .moreElementsToNextPow2(TypeIdx: 0)
395 .lower();
396
397 getActionDefinitionsBuilder(
398 Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
399 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
400 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
401 .legalFor(Types: {s32, s64, v2s32, v4s32, v2s64})
402 .legalFor(Pred: HasFP16, Types: {s16, v4s16, v8s16})
403 .libcallFor(Types: {s128})
404 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
405 .minScalarOrElt(TypeIdx: 0, Ty: MinFPScalar)
406 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
407 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
408 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
409 .moreElementsToNextPow2(TypeIdx: 0);
410
411 getActionDefinitionsBuilder(Opcodes: {G_FABS, G_FNEG})
412 .legalFor(Types: {s32, s64, v2s32, v4s32, v2s64})
413 .legalFor(Pred: HasFP16, Types: {s16, v4s16, v8s16})
414 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
415 .lowerIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64))
416 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
417 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
418 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
419 .moreElementsToNextPow2(TypeIdx: 0)
420 .lowerFor(Types: {s16, v4s16, v8s16});
421
422 getActionDefinitionsBuilder(Opcode: G_FREM)
423 .libcallFor(Types: {s32, s64, s128})
424 .minScalar(TypeIdx: 0, Ty: s32)
425 .scalarize(TypeIdx: 0);
426
427 getActionDefinitionsBuilder(Opcodes: {G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT})
428 .legalFor(Types: {{s64, MinFPScalar}, {s64, s32}, {s64, s64}})
429 .libcallFor(Types: {{s64, s128}})
430 .minScalarOrElt(TypeIdx: 1, Ty: MinFPScalar);
431
432 getActionDefinitionsBuilder(Opcodes: {G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
433 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
434 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
435 G_FSINH, G_FTANH})
436 // We need a call for these, so we always need to scalarize.
437 .scalarize(TypeIdx: 0)
438 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
439 .minScalar(TypeIdx: 0, Ty: s32)
440 .libcallFor(Types: {s32, s64, s128});
441 getActionDefinitionsBuilder(Opcode: G_FPOWI)
442 .scalarize(TypeIdx: 0)
443 .minScalar(TypeIdx: 0, Ty: s32)
444 .libcallFor(Types: {{s32, s32}, {s64, s32}, {s128, s32}});
445
446 // TODO: Libcall support for s128.
447 // TODO: s16 should be legal with full FP16 support.
448 getActionDefinitionsBuilder(Opcodes: {G_LROUND, G_LLROUND})
449 .legalFor(Types: {{s64, s32}, {s64, s64}});
450
451 // TODO: Custom legalization for mismatched types.
452 getActionDefinitionsBuilder(Opcode: G_FCOPYSIGN)
453 .moreElementsIf(
454 Predicate: [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
455 Mutation: [=](const LegalityQuery &Query) {
456 const LLT Ty = Query.Types[0];
457 return std::pair(0, LLT::fixed_vector(NumElements: Ty == s16 ? 4 : 2, ScalarTy: Ty));
458 })
459 .lower();
460
461 getActionDefinitionsBuilder(Opcode: G_FMAD).lower();
462
463 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
464 auto &Actions = getActionDefinitionsBuilder(Opcode: Op);
465
466 if (Op == G_SEXTLOAD)
467 Actions.lowerIf(Predicate: atomicOrderingAtLeastOrStrongerThan(MMOIdx: 0, Ordering: AtomicOrdering::Unordered));
468
469 // Atomics have zero extending behavior.
470 Actions
471 .legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8},
472 {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 8},
473 {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 8},
474 {.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 2},
475 {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 2},
476 {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 4},
477 {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 8},
478 {.Type0: p0, .Type1: p0, .MemTy: s64, .Align: 8},
479 {.Type0: v2s32, .Type1: p0, .MemTy: s64, .Align: 8}})
480 .widenScalarToNextPow2(TypeIdx: 0)
481 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
482 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
483 // how to do that yet.
484 .unsupportedIfMemSizeNotPow2()
485 // Lower anything left over into G_*EXT and G_LOAD
486 .lower();
487 }
488
489 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
490 const LLT &ValTy = Query.Types[0];
491 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
492 };
493
494 getActionDefinitionsBuilder(Opcode: G_LOAD)
495 .customIf(Predicate: [=](const LegalityQuery &Query) {
496 return HasRCPC3 && Query.Types[0] == s128 &&
497 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
498 })
499 .customIf(Predicate: [=](const LegalityQuery &Query) {
500 return Query.Types[0] == s128 &&
501 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
502 })
503 .legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s8, .Align: 8},
504 {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: 8},
505 {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 8},
506 {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 8},
507 {.Type0: p0, .Type1: p0, .MemTy: s64, .Align: 8},
508 {.Type0: s128, .Type1: p0, .MemTy: s128, .Align: 8},
509 {.Type0: v8s8, .Type1: p0, .MemTy: s64, .Align: 8},
510 {.Type0: v16s8, .Type1: p0, .MemTy: s128, .Align: 8},
511 {.Type0: v4s16, .Type1: p0, .MemTy: s64, .Align: 8},
512 {.Type0: v8s16, .Type1: p0, .MemTy: s128, .Align: 8},
513 {.Type0: v2s32, .Type1: p0, .MemTy: s64, .Align: 8},
514 {.Type0: v4s32, .Type1: p0, .MemTy: s128, .Align: 8},
515 {.Type0: v2s64, .Type1: p0, .MemTy: s128, .Align: 8}})
516 // These extends are also legal
517 .legalForTypesWithMemDesc(
518 TypesAndMemDesc: {{.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8}, {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 8}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 8}})
519 .legalForTypesWithMemDesc(TypesAndMemDesc: {
520 // SVE vscale x 128 bit base sizes
521 {.Type0: nxv16s8, .Type1: p0, .MemTy: nxv16s8, .Align: 8},
522 {.Type0: nxv8s16, .Type1: p0, .MemTy: nxv8s16, .Align: 8},
523 {.Type0: nxv4s32, .Type1: p0, .MemTy: nxv4s32, .Align: 8},
524 {.Type0: nxv2s64, .Type1: p0, .MemTy: nxv2s64, .Align: 8},
525 })
526 .widenScalarToNextPow2(TypeIdx: 0, /* MinSize = */ 8)
527 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 16)
528 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 8)
529 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: 4)
530 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
531 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2)
532 .lowerIfMemSizeNotByteSizePow2()
533 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64)
534 .narrowScalarIf(
535 Predicate: [=](const LegalityQuery &Query) {
536 // Clamp extending load results to 32-bits.
537 return Query.Types[0].isScalar() &&
538 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
539 Query.Types[0].getSizeInBits() > 32;
540 },
541 Mutation: changeTo(TypeIdx: 0, Ty: s32))
542 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
543 .bitcastIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {v4s8}),
544 Mutation: [=](const LegalityQuery &Query) {
545 const LLT VecTy = Query.Types[0];
546 return std::pair(0, LLT::scalar(SizeInBits: VecTy.getSizeInBits()));
547 })
548 .customIf(Predicate: IsPtrVecPred)
549 .scalarizeIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {v2s16, v2s8}), TypeIdx: 0)
550 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
551
552 getActionDefinitionsBuilder(Opcode: G_STORE)
553 .customIf(Predicate: [=](const LegalityQuery &Query) {
554 return HasRCPC3 && Query.Types[0] == s128 &&
555 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
556 })
557 .customIf(Predicate: [=](const LegalityQuery &Query) {
558 return Query.Types[0] == s128 &&
559 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
560 })
561 .legalForTypesWithMemDesc(
562 TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s8, .Align: 8}, {.Type0: s16, .Type1: p0, .MemTy: s8, .Align: 8}, // truncstorei8 from s16
563 {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8}, // truncstorei8 from s32
564 {.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 8}, // truncstorei8 from s64
565 {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: 8}, {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 8}, // truncstorei16 from s32
566 {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 8}, // truncstorei16 from s64
567 {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8}, {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 8}, {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 8},
568 {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 8}, // truncstorei32 from s64
569 {.Type0: p0, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: s128, .Type1: p0, .MemTy: s128, .Align: 8}, {.Type0: v16s8, .Type1: p0, .MemTy: s128, .Align: 8},
570 {.Type0: v8s8, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: v4s16, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: v8s16, .Type1: p0, .MemTy: s128, .Align: 8},
571 {.Type0: v2s32, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: v4s32, .Type1: p0, .MemTy: s128, .Align: 8}, {.Type0: v2s64, .Type1: p0, .MemTy: s128, .Align: 8}})
572 .legalForTypesWithMemDesc(TypesAndMemDesc: {
573 // SVE vscale x 128 bit base sizes
574 // TODO: Add nxv2p0. Consider bitcastIf.
575 // See #92130
576 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
577 {.Type0: nxv16s8, .Type1: p0, .MemTy: nxv16s8, .Align: 8},
578 {.Type0: nxv8s16, .Type1: p0, .MemTy: nxv8s16, .Align: 8},
579 {.Type0: nxv4s32, .Type1: p0, .MemTy: nxv4s32, .Align: 8},
580 {.Type0: nxv2s64, .Type1: p0, .MemTy: nxv2s64, .Align: 8},
581 })
582 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64)
583 .minScalarOrElt(TypeIdx: 0, Ty: s8)
584 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
585 return Query.Types[0].isScalar() &&
586 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
587 })
588 // Maximum: sN * k = 128
589 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 16)
590 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 8)
591 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: 4)
592 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
593 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2)
594 .lowerIfMemSizeNotPow2()
595 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
596 .bitcastIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {v4s8}),
597 P1: LegalityPredicate([=](const LegalityQuery &Query) {
598 return Query.Types[0].getSizeInBits() ==
599 Query.MMODescrs[0].MemoryTy.getSizeInBits();
600 })),
601 Mutation: [=](const LegalityQuery &Query) {
602 const LLT VecTy = Query.Types[0];
603 return std::pair(0, LLT::scalar(SizeInBits: VecTy.getSizeInBits()));
604 })
605 .customIf(Predicate: IsPtrVecPred)
606 .scalarizeIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {v2s16, v2s8}), TypeIdx: 0)
607 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
608 .lower();
609
610 getActionDefinitionsBuilder(Opcode: G_INDEXED_STORE)
611 // Idx 0 == Ptr, Idx 1 == Val
612 // TODO: we can implement legalizations but as of now these are
613 // generated in a very specific way.
614 .legalForTypesWithMemDesc(TypesAndMemDesc: {
615 {.Type0: p0, .Type1: s8, .MemTy: s8, .Align: 8},
616 {.Type0: p0, .Type1: s16, .MemTy: s16, .Align: 8},
617 {.Type0: p0, .Type1: s32, .MemTy: s8, .Align: 8},
618 {.Type0: p0, .Type1: s32, .MemTy: s16, .Align: 8},
619 {.Type0: p0, .Type1: s32, .MemTy: s32, .Align: 8},
620 {.Type0: p0, .Type1: s64, .MemTy: s64, .Align: 8},
621 {.Type0: p0, .Type1: p0, .MemTy: p0, .Align: 8},
622 {.Type0: p0, .Type1: v8s8, .MemTy: v8s8, .Align: 8},
623 {.Type0: p0, .Type1: v16s8, .MemTy: v16s8, .Align: 8},
624 {.Type0: p0, .Type1: v4s16, .MemTy: v4s16, .Align: 8},
625 {.Type0: p0, .Type1: v8s16, .MemTy: v8s16, .Align: 8},
626 {.Type0: p0, .Type1: v2s32, .MemTy: v2s32, .Align: 8},
627 {.Type0: p0, .Type1: v4s32, .MemTy: v4s32, .Align: 8},
628 {.Type0: p0, .Type1: v2s64, .MemTy: v2s64, .Align: 8},
629 {.Type0: p0, .Type1: v2p0, .MemTy: v2p0, .Align: 8},
630 {.Type0: p0, .Type1: s128, .MemTy: s128, .Align: 8},
631 })
632 .unsupported();
633
634 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
635 LLT LdTy = Query.Types[0];
636 LLT PtrTy = Query.Types[1];
637 if (!llvm::is_contained(Range: PackedVectorAllTypesVec, Element: LdTy) &&
638 !llvm::is_contained(Range: ScalarAndPtrTypesVec, Element: LdTy) && LdTy != s128)
639 return false;
640 if (PtrTy != p0)
641 return false;
642 return true;
643 };
644 getActionDefinitionsBuilder(Opcode: G_INDEXED_LOAD)
645 .unsupportedIf(
646 Predicate: atomicOrderingAtLeastOrStrongerThan(MMOIdx: 0, Ordering: AtomicOrdering::Unordered))
647 .legalIf(Predicate: IndexedLoadBasicPred)
648 .unsupported();
649 getActionDefinitionsBuilder(Opcodes: {G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
650 .unsupportedIf(
651 Predicate: atomicOrderingAtLeastOrStrongerThan(MMOIdx: 0, Ordering: AtomicOrdering::Unordered))
652 .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s16, s32, s64}),
653 P1: LegalityPredicate([=](const LegalityQuery &Q) {
654 LLT LdTy = Q.Types[0];
655 LLT PtrTy = Q.Types[1];
656 LLT MemTy = Q.MMODescrs[0].MemoryTy;
657 if (PtrTy != p0)
658 return false;
659 if (LdTy == s16)
660 return MemTy == s8;
661 if (LdTy == s32)
662 return MemTy == s8 || MemTy == s16;
663 if (LdTy == s64)
664 return MemTy == s8 || MemTy == s16 || MemTy == s32;
665 return false;
666 })))
667 .unsupported();
668
669 // Constants
670 getActionDefinitionsBuilder(Opcode: G_CONSTANT)
671 .legalFor(Types: {p0, s8, s16, s32, s64})
672 .widenScalarToNextPow2(TypeIdx: 0)
673 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64);
674 getActionDefinitionsBuilder(Opcode: G_FCONSTANT)
675 .legalFor(Types: {s32, s64, s128})
676 .legalFor(Pred: HasFP16, Types: {s16})
677 .clampScalar(TypeIdx: 0, MinTy: MinFPScalar, MaxTy: s128);
678
679 // FIXME: fix moreElementsToNextPow2
680 getActionDefinitionsBuilder(Opcode: G_ICMP)
681 .legalFor(Types: {{s32, s32}, {s32, s64}, {s32, p0}})
682 .widenScalarOrEltToNextPow2(TypeIdx: 1)
683 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
684 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32)
685 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1)
686 .minScalarEltSameAsIf(
687 Predicate: [=](const LegalityQuery &Query) {
688 const LLT &Ty = Query.Types[0];
689 const LLT &SrcTy = Query.Types[1];
690 return Ty.isVector() && !SrcTy.isPointerVector() &&
691 Ty.getElementType() != SrcTy.getElementType();
692 },
693 TypeIdx: 0, LargeTypeIdx: 1)
694 .minScalarOrEltIf(
695 Predicate: [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
696 TypeIdx: 1, Ty: s32)
697 .minScalarOrEltIf(
698 Predicate: [=](const LegalityQuery &Query) {
699 return Query.Types[1].isPointerVector();
700 },
701 TypeIdx: 0, Ty: s64)
702 .moreElementsToNextPow2(TypeIdx: 1)
703 .clampNumElements(TypeIdx: 1, MinTy: v8s8, MaxTy: v16s8)
704 .clampNumElements(TypeIdx: 1, MinTy: v4s16, MaxTy: v8s16)
705 .clampNumElements(TypeIdx: 1, MinTy: v2s32, MaxTy: v4s32)
706 .clampNumElements(TypeIdx: 1, MinTy: v2s64, MaxTy: v2s64)
707 .clampNumElements(TypeIdx: 1, MinTy: v2p0, MaxTy: v2p0)
708 .customIf(Predicate: isVector(TypeIdx: 0));
709
710 getActionDefinitionsBuilder(Opcode: G_FCMP)
711 .legalFor(Types: {{s32, s32},
712 {s32, s64},
713 {v4s32, v4s32},
714 {v2s32, v2s32},
715 {v2s64, v2s64}})
716 .legalFor(Pred: HasFP16, Types: {{s32, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
717 .widenScalarOrEltToNextPow2(TypeIdx: 1)
718 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32)
719 .minScalarOrElt(TypeIdx: 1, Ty: MinFPScalar)
720 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1)
721 .minScalarEltSameAsIf(
722 Predicate: [=](const LegalityQuery &Query) {
723 const LLT &Ty = Query.Types[0];
724 const LLT &SrcTy = Query.Types[1];
725 return Ty.isVector() && !SrcTy.isPointerVector() &&
726 Ty.getElementType() != SrcTy.getElementType();
727 },
728 TypeIdx: 0, LargeTypeIdx: 1)
729 .clampNumElements(TypeIdx: 1, MinTy: v4s16, MaxTy: v8s16)
730 .clampNumElements(TypeIdx: 1, MinTy: v2s32, MaxTy: v4s32)
731 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
732 .moreElementsToNextPow2(TypeIdx: 1)
733 .libcallFor(Types: {{s32, s128}});
734
735 // Extensions
736 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
737 unsigned DstSize = Query.Types[0].getSizeInBits();
738
739 // Handle legal vectors using legalFor
740 if (Query.Types[0].isVector())
741 return false;
742
743 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(Value: DstSize))
744 return false; // Extending to a scalar s128 needs narrowing.
745
746 const LLT &SrcTy = Query.Types[1];
747
748 // Make sure we fit in a register otherwise. Don't bother checking that
749 // the source type is below 128 bits. We shouldn't be allowing anything
750 // through which is wider than the destination in the first place.
751 unsigned SrcSize = SrcTy.getSizeInBits();
752 if (SrcSize < 8 || !isPowerOf2_32(Value: SrcSize))
753 return false;
754
755 return true;
756 };
757 getActionDefinitionsBuilder(Opcodes: {G_ZEXT, G_SEXT, G_ANYEXT})
758 .legalIf(Predicate: ExtLegalFunc)
759 .legalFor(Types: {{v8s16, v8s8}, {v4s32, v4s16}, {v2s64, v2s32}})
760 .clampScalar(TypeIdx: 0, MinTy: s64, MaxTy: s64) // Just for s128, others are handled above.
761 .moreElementsToNextPow2(TypeIdx: 0)
762 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 8)
763 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 4)
764 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 2)
765 // Tries to convert a large EXTEND into two smaller EXTENDs
766 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
767 return (Query.Types[0].getScalarSizeInBits() >
768 Query.Types[1].getScalarSizeInBits() * 2) &&
769 Query.Types[0].isVector() &&
770 (Query.Types[1].getScalarSizeInBits() == 8 ||
771 Query.Types[1].getScalarSizeInBits() == 16);
772 })
773 .clampMinNumElements(TypeIdx: 1, EltTy: s8, MinElements: 8)
774 .clampMinNumElements(TypeIdx: 1, EltTy: s16, MinElements: 4)
775 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
776
777 getActionDefinitionsBuilder(Opcode: G_TRUNC)
778 .legalFor(Types: {{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
779 .moreElementsToNextPow2(TypeIdx: 0)
780 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 8)
781 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 4)
782 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: 2)
783 .minScalarOrEltIf(
784 Predicate: [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
785 TypeIdx: 0, Ty: s8)
786 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
787 LLT DstTy = Query.Types[0];
788 LLT SrcTy = Query.Types[1];
789 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
790 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
791 })
792 .clampMinNumElements(TypeIdx: 0, EltTy: s8, MinElements: 8)
793 .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 4)
794 .alwaysLegal();
795
796 getActionDefinitionsBuilder(Opcode: G_SEXT_INREG)
797 .legalFor(Types: {s32, s64})
798 .legalFor(Types: PackedVectorAllTypeList)
799 .maxScalar(TypeIdx: 0, Ty: s64)
800 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
801 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
802 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
803 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
804 .lower();
805
806 // FP conversions
807 getActionDefinitionsBuilder(Opcode: G_FPTRUNC)
808 .legalFor(
809 Types: {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
810 .libcallFor(Types: {{s16, s128}, {s32, s128}, {s64, s128}})
811 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v4s16)
812 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v2s32)
813 .scalarize(TypeIdx: 0);
814
815 getActionDefinitionsBuilder(Opcode: G_FPEXT)
816 .legalFor(
817 Types: {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
818 .libcallFor(Types: {{s128, s64}, {s128, s32}, {s128, s16}})
819 .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: v4s32)
820 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
821 .scalarize(TypeIdx: 0);
822
823 // Conversions
824 getActionDefinitionsBuilder(Opcodes: {G_FPTOSI, G_FPTOUI})
825 .legalFor(Types: {{s32, s32},
826 {s64, s32},
827 {s32, s64},
828 {s64, s64},
829 {v2s32, v2s32},
830 {v4s32, v4s32},
831 {v2s64, v2s64}})
832 .legalFor(Pred: HasFP16,
833 Types: {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
834 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
835 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1)
836 // The range of a fp16 value fits into an i17, so we can lower the width
837 // to i64.
838 .narrowScalarIf(
839 Predicate: [=](const LegalityQuery &Query) {
840 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
841 },
842 Mutation: changeTo(TypeIdx: 0, Ty: s64))
843 .moreElementsToNextPow2(TypeIdx: 0)
844 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 0)
845 .minScalar(TypeIdx: 0, Ty: s32)
846 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 1, /*MinSize=*/HasFP16 ? 16 : 32)
847 .widenScalarIf(
848 Predicate: [=](const LegalityQuery &Query) {
849 return Query.Types[0].getScalarSizeInBits() <= 64 &&
850 Query.Types[0].getScalarSizeInBits() >
851 Query.Types[1].getScalarSizeInBits();
852 },
853 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 1, FromTypeIdx: 0))
854 .widenScalarIf(
855 Predicate: [=](const LegalityQuery &Query) {
856 return Query.Types[1].getScalarSizeInBits() <= 64 &&
857 Query.Types[0].getScalarSizeInBits() <
858 Query.Types[1].getScalarSizeInBits();
859 },
860 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 0, FromTypeIdx: 1))
861 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
862 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
863 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
864 .libcallFor(
865 Types: {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
866
867 getActionDefinitionsBuilder(Opcodes: {G_FPTOSI_SAT, G_FPTOUI_SAT})
868 .legalFor(Types: {{s32, s32},
869 {s64, s32},
870 {s32, s64},
871 {s64, s64},
872 {v2s32, v2s32},
873 {v4s32, v4s32},
874 {v2s64, v2s64}})
875 .legalFor(Pred: HasFP16,
876 Types: {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
877 // Handle types larger than i64 by scalarizing/lowering.
878 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
879 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1)
880 // The range of a fp16 value fits into an i17, so we can lower the width
881 // to i64.
882 .narrowScalarIf(
883 Predicate: [=](const LegalityQuery &Query) {
884 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
885 },
886 Mutation: changeTo(TypeIdx: 0, Ty: s64))
887 .lowerIf(Predicate: ::any(P0: scalarWiderThan(TypeIdx: 0, Size: 64), P1: scalarWiderThan(TypeIdx: 1, Size: 64)), Mutation: 0)
888 .moreElementsToNextPow2(TypeIdx: 0)
889 .widenScalarToNextPow2(TypeIdx: 0, /*MinSize=*/32)
890 .minScalar(TypeIdx: 0, Ty: s32)
891 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 1, /*MinSize=*/HasFP16 ? 16 : 32)
892 .widenScalarIf(
893 Predicate: [=](const LegalityQuery &Query) {
894 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
895 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
896 ITySize > Query.Types[1].getScalarSizeInBits();
897 },
898 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 1, FromTypeIdx: 0))
899 .widenScalarIf(
900 Predicate: [=](const LegalityQuery &Query) {
901 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
902 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
903 Query.Types[0].getScalarSizeInBits() < FTySize;
904 },
905 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 0, FromTypeIdx: 1))
906 .widenScalarOrEltToNextPow2(TypeIdx: 0)
907 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
908 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
909 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2);
910
911 getActionDefinitionsBuilder(Opcodes: {G_SITOFP, G_UITOFP})
912 .legalFor(Types: {{s32, s32},
913 {s64, s32},
914 {s32, s64},
915 {s64, s64},
916 {v2s32, v2s32},
917 {v4s32, v4s32},
918 {v2s64, v2s64}})
919 .legalFor(Pred: HasFP16,
920 Types: {{s16, s32}, {s16, s64}, {v4s16, v4s16}, {v8s16, v8s16}})
921 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1)
922 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
923 .moreElementsToNextPow2(TypeIdx: 1)
924 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 1)
925 .minScalar(TypeIdx: 1, Ty: s32)
926 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 0, /*MinSize=*/HasFP16 ? 16 : 32)
927 .widenScalarIf(
928 Predicate: [=](const LegalityQuery &Query) {
929 return Query.Types[1].getScalarSizeInBits() <= 64 &&
930 Query.Types[0].getScalarSizeInBits() <
931 Query.Types[1].getScalarSizeInBits();
932 },
933 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 0, FromTypeIdx: 1))
934 .widenScalarIf(
935 Predicate: [=](const LegalityQuery &Query) {
936 return Query.Types[0].getScalarSizeInBits() <= 64 &&
937 Query.Types[0].getScalarSizeInBits() >
938 Query.Types[1].getScalarSizeInBits();
939 },
940 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 1, FromTypeIdx: 0))
941 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
942 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
943 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
944 .libcallFor(Types: {{s16, s128},
945 {s32, s128},
946 {s64, s128},
947 {s128, s128},
948 {s128, s32},
949 {s128, s64}});
950
951 // Control-flow
952 getActionDefinitionsBuilder(Opcode: G_BRCOND)
953 .legalFor(Types: {s32})
954 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32);
955 getActionDefinitionsBuilder(Opcode: G_BRINDIRECT).legalFor(Types: {p0});
956
957 getActionDefinitionsBuilder(Opcode: G_SELECT)
958 .legalFor(Types: {{s32, s32}, {s64, s32}, {p0, s32}})
959 .widenScalarToNextPow2(TypeIdx: 0)
960 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
961 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32)
962 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
963 .minScalarEltSameAsIf(Predicate: all(P0: isVector(TypeIdx: 0), P1: isVector(TypeIdx: 1)), TypeIdx: 1, LargeTypeIdx: 0)
964 .lowerIf(Predicate: isVector(TypeIdx: 0));
965
966 // Pointer-handling
967 getActionDefinitionsBuilder(Opcode: G_FRAME_INDEX).legalFor(Types: {p0});
968
969 if (TM.getCodeModel() == CodeModel::Small)
970 getActionDefinitionsBuilder(Opcode: G_GLOBAL_VALUE).custom();
971 else
972 getActionDefinitionsBuilder(Opcode: G_GLOBAL_VALUE).legalFor(Types: {p0});
973
974 getActionDefinitionsBuilder(Opcode: G_PTRAUTH_GLOBAL_VALUE)
975 .legalIf(Predicate: all(P0: typeIs(TypeIdx: 0, TypesInit: p0), P1: typeIs(TypeIdx: 1, TypesInit: p0)));
976
977 getActionDefinitionsBuilder(Opcode: G_PTRTOINT)
978 .legalFor(Types: {{s64, p0}, {v2s64, v2p0}})
979 .widenScalarToNextPow2(TypeIdx: 0, MinSize: 64)
980 .clampScalar(TypeIdx: 0, MinTy: s64, MaxTy: s64)
981 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2);
982
983 getActionDefinitionsBuilder(Opcode: G_INTTOPTR)
984 .unsupportedIf(Predicate: [&](const LegalityQuery &Query) {
985 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
986 })
987 .legalFor(Types: {{p0, s64}, {v2p0, v2s64}})
988 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2);
989
990 // Casts for 32 and 64-bit width type are just copies.
991 // Same for 128-bit width type, except they are on the FPR bank.
992 getActionDefinitionsBuilder(Opcode: G_BITCAST)
993 // Keeping 32-bit instructions legal to prevent regression in some tests
994 .legalForCartesianProduct(Types: {s32, v2s16, v4s8})
995 .legalForCartesianProduct(Types: {s64, v8s8, v4s16, v2s32})
996 .legalForCartesianProduct(Types: {s128, v16s8, v8s16, v4s32, v2s64, v2p0})
997 .customIf(Predicate: [=](const LegalityQuery &Query) {
998 // Handle casts from i1 vectors to scalars.
999 LLT DstTy = Query.Types[0];
1000 LLT SrcTy = Query.Types[1];
1001 return DstTy.isScalar() && SrcTy.isVector() &&
1002 SrcTy.getScalarSizeInBits() == 1;
1003 })
1004 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
1005 return Query.Types[0].isVector() != Query.Types[1].isVector();
1006 })
1007 .moreElementsToNextPow2(TypeIdx: 0)
1008 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
1009 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
1010 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
1011 .lower();
1012
1013 getActionDefinitionsBuilder(Opcode: G_VASTART).legalFor(Types: {p0});
1014
1015 // va_list must be a pointer, but most sized types are pretty easy to handle
1016 // as the destination.
1017 getActionDefinitionsBuilder(Opcode: G_VAARG)
1018 .customForCartesianProduct(Types0: {s8, s16, s32, s64, p0}, Types1: {p0})
1019 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64)
1020 .widenScalarToNextPow2(TypeIdx: 0, /*Min*/ MinSize: 8);
1021
1022 getActionDefinitionsBuilder(Opcode: G_ATOMIC_CMPXCHG_WITH_SUCCESS)
1023 .lowerIf(
1024 Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s8, s16, s32, s64, s128}), P1: typeIs(TypeIdx: 2, TypesInit: p0)));
1025
1026 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
1027
1028 getActionDefinitionsBuilder(Opcode: G_ATOMIC_CMPXCHG)
1029 .legalFor(Pred: !UseOutlineAtomics, Types: {{s32, p0}, {s64, p0}})
1030 .customFor(Pred: !UseOutlineAtomics, Types: {{s128, p0}})
1031 .libcallFor(Pred: UseOutlineAtomics,
1032 Types: {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
1033 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64);
1034
1035 getActionDefinitionsBuilder(Opcodes: {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
1036 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
1037 G_ATOMICRMW_XOR})
1038 .legalFor(Pred: !UseOutlineAtomics, Types: {{s32, p0}, {s64, p0}})
1039 .libcallFor(Pred: UseOutlineAtomics,
1040 Types: {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
1041 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64);
1042
1043 // Do not outline these atomics operations, as per comment in
1044 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
1045 getActionDefinitionsBuilder(
1046 Opcodes: {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
1047 .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s32, s64}), P1: typeIs(TypeIdx: 1, TypesInit: p0)))
1048 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64);
1049
1050 getActionDefinitionsBuilder(Opcode: G_BLOCK_ADDR).legalFor(Types: {p0});
1051
1052 // Merge/Unmerge
1053 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
1054 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
1055 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
1056 getActionDefinitionsBuilder(Opcode: Op)
1057 .widenScalarToNextPow2(TypeIdx: LitTyIdx, MinSize: 8)
1058 .widenScalarToNextPow2(TypeIdx: BigTyIdx, MinSize: 32)
1059 .clampScalar(TypeIdx: LitTyIdx, MinTy: s8, MaxTy: s64)
1060 .clampScalar(TypeIdx: BigTyIdx, MinTy: s32, MaxTy: s128)
1061 .legalIf(Predicate: [=](const LegalityQuery &Q) {
1062 switch (Q.Types[BigTyIdx].getSizeInBits()) {
1063 case 32:
1064 case 64:
1065 case 128:
1066 break;
1067 default:
1068 return false;
1069 }
1070 switch (Q.Types[LitTyIdx].getSizeInBits()) {
1071 case 8:
1072 case 16:
1073 case 32:
1074 case 64:
1075 return true;
1076 default:
1077 return false;
1078 }
1079 });
1080 }
1081
1082 // TODO : nxv4s16, nxv2s16, nxv2s32
1083 getActionDefinitionsBuilder(Opcode: G_EXTRACT_VECTOR_ELT)
1084 .legalFor(Pred: HasSVE, Types: {{s16, nxv16s8, s64},
1085 {s16, nxv8s16, s64},
1086 {s32, nxv4s32, s64},
1087 {s64, nxv2s64, s64}})
1088 .unsupportedIf(Predicate: [=](const LegalityQuery &Query) {
1089 const LLT &EltTy = Query.Types[1].getElementType();
1090 if (Query.Types[1].isScalableVector())
1091 return false;
1092 return Query.Types[0] != EltTy;
1093 })
1094 .minScalar(TypeIdx: 2, Ty: s64)
1095 .customIf(Predicate: [=](const LegalityQuery &Query) {
1096 const LLT &VecTy = Query.Types[1];
1097 return VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s16 ||
1098 VecTy == v4s16 || VecTy == v8s16 || VecTy == v2s32 ||
1099 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2p0;
1100 })
1101 .minScalarOrEltIf(
1102 Predicate: [=](const LegalityQuery &Query) {
1103 // We want to promote to <M x s1> to <M x s64> if that wouldn't
1104 // cause the total vec size to be > 128b.
1105 return Query.Types[1].isFixedVector() &&
1106 Query.Types[1].getNumElements() <= 2;
1107 },
1108 TypeIdx: 0, Ty: s64)
1109 .minScalarOrEltIf(
1110 Predicate: [=](const LegalityQuery &Query) {
1111 return Query.Types[1].isFixedVector() &&
1112 Query.Types[1].getNumElements() <= 4;
1113 },
1114 TypeIdx: 0, Ty: s32)
1115 .minScalarOrEltIf(
1116 Predicate: [=](const LegalityQuery &Query) {
1117 return Query.Types[1].isFixedVector() &&
1118 Query.Types[1].getNumElements() <= 8;
1119 },
1120 TypeIdx: 0, Ty: s16)
1121 .minScalarOrEltIf(
1122 Predicate: [=](const LegalityQuery &Query) {
1123 return Query.Types[1].isFixedVector() &&
1124 Query.Types[1].getNumElements() <= 16;
1125 },
1126 TypeIdx: 0, Ty: s8)
1127 .minScalarOrElt(TypeIdx: 0, Ty: s8) // Worst case, we need at least s8.
1128 .moreElementsToNextPow2(TypeIdx: 1)
1129 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1130 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1131 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1132 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 16)
1133 .clampMaxNumElements(TypeIdx: 1, EltTy: p0, MaxElements: 2);
1134
1135 getActionDefinitionsBuilder(Opcode: G_INSERT_VECTOR_ELT)
1136 .legalIf(
1137 Predicate: typeInSet(TypeIdx: 0, TypesInit: {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64, v2p0}))
1138 .legalFor(Pred: HasSVE, Types: {{nxv16s8, s32, s64},
1139 {nxv8s16, s32, s64},
1140 {nxv4s32, s32, s64},
1141 {nxv2s64, s64, s64}})
1142 .moreElementsToNextPow2(TypeIdx: 0)
1143 .widenVectorEltsToVectorMinSize(TypeIdx: 0, VectorSize: 64)
1144 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
1145 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
1146 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
1147 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
1148 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2);
1149
1150 getActionDefinitionsBuilder(Opcode: G_BUILD_VECTOR)
1151 .legalFor(Types: {{v8s8, s8},
1152 {v16s8, s8},
1153 {v4s16, s16},
1154 {v8s16, s16},
1155 {v2s32, s32},
1156 {v4s32, s32},
1157 {v2s64, s64},
1158 {v2p0, p0}})
1159 .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: v4s32)
1160 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
1161 .minScalarOrElt(TypeIdx: 0, Ty: s8)
1162 .widenVectorEltsToVectorMinSize(TypeIdx: 0, VectorSize: 64)
1163 .widenScalarOrEltToNextPow2(TypeIdx: 0)
1164 .minScalarSameAs(TypeIdx: 1, LargeTypeIdx: 0);
1165
1166 getActionDefinitionsBuilder(Opcode: G_BUILD_VECTOR_TRUNC).lower();
1167
1168 getActionDefinitionsBuilder(Opcode: G_SHUFFLE_VECTOR)
1169 .legalIf(Predicate: [=](const LegalityQuery &Query) {
1170 const LLT &DstTy = Query.Types[0];
1171 const LLT &SrcTy = Query.Types[1];
1172 // For now just support the TBL2 variant which needs the source vectors
1173 // to be the same size as the dest.
1174 if (DstTy != SrcTy)
1175 return false;
1176 return llvm::is_contained(
1177 Set: {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, Element: DstTy);
1178 })
1179 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors) or scalar
1180 // destinations, we just want those lowered into G_BUILD_VECTOR or
1181 // G_EXTRACT_ELEMENT.
1182 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
1183 return !Query.Types[0].isVector() || !Query.Types[1].isVector();
1184 })
1185 .moreElementsIf(
1186 Predicate: [](const LegalityQuery &Query) {
1187 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1188 Query.Types[0].getNumElements() >
1189 Query.Types[1].getNumElements();
1190 },
1191 Mutation: changeTo(TypeIdx: 1, FromTypeIdx: 0))
1192 .moreElementsToNextPow2(TypeIdx: 0)
1193 .moreElementsIf(
1194 Predicate: [](const LegalityQuery &Query) {
1195 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1196 Query.Types[0].getNumElements() <
1197 Query.Types[1].getNumElements();
1198 },
1199 Mutation: changeTo(TypeIdx: 0, FromTypeIdx: 1))
1200 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 0, MinSize: 8)
1201 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
1202 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
1203 .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: v4s32)
1204 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
1205 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
1206 .bitcastIf(Predicate: isPointerVector(TypeIdx: 0), Mutation: [=](const LegalityQuery &Query) {
1207 // Bitcast pointers vector to i64.
1208 const LLT DstTy = Query.Types[0];
1209 return std::pair(0, LLT::vector(EC: DstTy.getElementCount(), ScalarSizeInBits: 64));
1210 });
1211
1212 getActionDefinitionsBuilder(Opcode: G_CONCAT_VECTORS)
1213 .legalFor(Types: {{v16s8, v8s8}, {v8s16, v4s16}, {v4s32, v2s32}})
1214 .bitcastIf(
1215 Predicate: [=](const LegalityQuery &Query) {
1216 return Query.Types[0].getSizeInBits() <= 128 &&
1217 Query.Types[1].getSizeInBits() <= 64;
1218 },
1219 Mutation: [=](const LegalityQuery &Query) {
1220 const LLT DstTy = Query.Types[0];
1221 const LLT SrcTy = Query.Types[1];
1222 return std::pair(
1223 0, DstTy.changeElementSize(NewEltSize: SrcTy.getSizeInBits())
1224 .changeElementCount(
1225 EC: DstTy.getElementCount().divideCoefficientBy(
1226 RHS: SrcTy.getNumElements())));
1227 });
1228
1229 getActionDefinitionsBuilder(Opcode: G_EXTRACT_SUBVECTOR)
1230 .legalFor(Types: {{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1231 .widenScalarOrEltToNextPow2(TypeIdx: 0)
1232 .immIdx(ImmIdx: 0); // Inform verifier imm idx 0 is handled.
1233
1234 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1235 getActionDefinitionsBuilder(Opcode: G_SPLAT_VECTOR)
1236 .legalFor(Pred: HasSVE, Types: {{nxv4s32, s32}, {nxv2s64, s64}});
1237
1238 getActionDefinitionsBuilder(Opcode: G_JUMP_TABLE).legalFor(Types: {p0});
1239
1240 getActionDefinitionsBuilder(Opcode: G_BRJT).legalFor(Types: {{p0, s64}});
1241
1242 getActionDefinitionsBuilder(Opcode: G_DYN_STACKALLOC).custom();
1243
1244 getActionDefinitionsBuilder(Opcodes: {G_STACKSAVE, G_STACKRESTORE}).lower();
1245
1246 if (ST.hasMOPS()) {
1247 // G_BZERO is not supported. Currently it is only emitted by
1248 // PreLegalizerCombiner for G_MEMSET with zero constant.
1249 getActionDefinitionsBuilder(Opcode: G_BZERO).unsupported();
1250
1251 getActionDefinitionsBuilder(Opcode: G_MEMSET)
1252 .legalForCartesianProduct(Types0: {p0}, Types1: {s64}, Types2: {s64})
1253 .customForCartesianProduct(Types0: {p0}, Types1: {s8}, Types2: {s64})
1254 .immIdx(ImmIdx: 0); // Inform verifier imm idx 0 is handled.
1255
1256 getActionDefinitionsBuilder(Opcodes: {G_MEMCPY, G_MEMMOVE})
1257 .legalForCartesianProduct(Types0: {p0}, Types1: {p0}, Types2: {s64})
1258 .immIdx(ImmIdx: 0); // Inform verifier imm idx 0 is handled.
1259
1260 // G_MEMCPY_INLINE does not have a tailcall immediate
1261 getActionDefinitionsBuilder(Opcode: G_MEMCPY_INLINE)
1262 .legalForCartesianProduct(Types0: {p0}, Types1: {p0}, Types2: {s64});
1263
1264 } else {
1265 getActionDefinitionsBuilder(Opcodes: {G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1266 .libcall();
1267 }
1268
1269 // For fadd reductions we have pairwise operations available. We treat the
1270 // usual legal types as legal and handle the lowering to pairwise instructions
1271 // later.
1272 getActionDefinitionsBuilder(Opcode: G_VECREDUCE_FADD)
1273 .legalFor(Types: {{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1274 .legalFor(Pred: HasFP16, Types: {{s16, v4s16}, {s16, v8s16}})
1275 .minScalarOrElt(TypeIdx: 0, Ty: MinFPScalar)
1276 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1277 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1278 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1279 .lower();
1280
1281 // For fmul reductions we need to split up into individual operations. We
1282 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1283 // smaller types, followed by scalarizing what remains.
1284 getActionDefinitionsBuilder(Opcode: G_VECREDUCE_FMUL)
1285 .minScalarOrElt(TypeIdx: 0, Ty: MinFPScalar)
1286 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1287 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1288 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1289 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 2)
1290 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 4)
1291 .scalarize(TypeIdx: 1)
1292 .lower();
1293
1294 getActionDefinitionsBuilder(Opcodes: {G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1295 .scalarize(TypeIdx: 2)
1296 .lower();
1297
1298 getActionDefinitionsBuilder(Opcode: G_VECREDUCE_ADD)
1299 .legalFor(Types: {{s8, v8s8},
1300 {s8, v16s8},
1301 {s16, v4s16},
1302 {s16, v8s16},
1303 {s32, v2s32},
1304 {s32, v4s32},
1305 {s64, v2s64}})
1306 .moreElementsToNextPow2(TypeIdx: 1)
1307 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1308 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1309 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1310 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 16)
1311 .widenVectorEltsToVectorMinSize(TypeIdx: 1, VectorSize: 64)
1312 .scalarize(TypeIdx: 1);
1313
1314 getActionDefinitionsBuilder(Opcodes: {G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1315 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1316 .legalFor(Types: {{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1317 .legalFor(Pred: HasFP16, Types: {{s16, v4s16}, {s16, v8s16}})
1318 .minScalarOrElt(TypeIdx: 0, Ty: MinFPScalar)
1319 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1320 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1321 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1322 .lower();
1323
1324 getActionDefinitionsBuilder(Opcode: G_VECREDUCE_MUL)
1325 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 2)
1326 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 4)
1327 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 8)
1328 .scalarize(TypeIdx: 1)
1329 .lower();
1330
1331 getActionDefinitionsBuilder(
1332 Opcodes: {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1333 .legalFor(Types: {{s8, v8s8},
1334 {s8, v16s8},
1335 {s16, v4s16},
1336 {s16, v8s16},
1337 {s32, v2s32},
1338 {s32, v4s32}})
1339 .moreElementsIf(
1340 Predicate: [=](const LegalityQuery &Query) {
1341 return Query.Types[1].isVector() &&
1342 Query.Types[1].getElementType() != s8 &&
1343 Query.Types[1].getNumElements() & 1;
1344 },
1345 Mutation: LegalizeMutations::moreElementsToNextPow2(TypeIdx: 1))
1346 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1347 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1348 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1349 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 16)
1350 .scalarize(TypeIdx: 1)
1351 .lower();
1352
1353 getActionDefinitionsBuilder(
1354 Opcodes: {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1355 // Try to break down into smaller vectors as long as they're at least 64
1356 // bits. This lets us use vector operations for some parts of the
1357 // reduction.
1358 .fewerElementsIf(
1359 Predicate: [=](const LegalityQuery &Q) {
1360 LLT SrcTy = Q.Types[1];
1361 if (SrcTy.isScalar())
1362 return false;
1363 if (!isPowerOf2_32(Value: SrcTy.getNumElements()))
1364 return false;
1365 // We can usually perform 64b vector operations.
1366 return SrcTy.getSizeInBits() > 64;
1367 },
1368 Mutation: [=](const LegalityQuery &Q) {
1369 LLT SrcTy = Q.Types[1];
1370 return std::make_pair(x: 1, y: SrcTy.divide(Factor: 2));
1371 })
1372 .scalarize(TypeIdx: 1)
1373 .lower();
1374
1375 // TODO: Update this to correct handling when adding AArch64/SVE support.
1376 getActionDefinitionsBuilder(Opcode: G_VECTOR_COMPRESS).lower();
1377
1378 // Access to floating-point environment.
1379 getActionDefinitionsBuilder(Opcodes: {G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1380 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1381 .libcall();
1382
1383 getActionDefinitionsBuilder(Opcode: G_IS_FPCLASS).lower();
1384
1385 getActionDefinitionsBuilder(Opcode: G_PREFETCH).custom();
1386
1387 getActionDefinitionsBuilder(Opcodes: {G_SCMP, G_UCMP}).lower();
1388
1389 getLegacyLegalizerInfo().computeTables();
1390 verify(MII: *ST.getInstrInfo());
1391}
1392
1393bool AArch64LegalizerInfo::legalizeCustom(
1394 LegalizerHelper &Helper, MachineInstr &MI,
1395 LostDebugLocObserver &LocObserver) const {
1396 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1397 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1398 GISelChangeObserver &Observer = Helper.Observer;
1399 switch (MI.getOpcode()) {
1400 default:
1401 // No idea what to do.
1402 return false;
1403 case TargetOpcode::G_VAARG:
1404 return legalizeVaArg(MI, MRI, MIRBuilder);
1405 case TargetOpcode::G_LOAD:
1406 case TargetOpcode::G_STORE:
1407 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1408 case TargetOpcode::G_SHL:
1409 case TargetOpcode::G_ASHR:
1410 case TargetOpcode::G_LSHR:
1411 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1412 case TargetOpcode::G_GLOBAL_VALUE:
1413 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1414 case TargetOpcode::G_SBFX:
1415 case TargetOpcode::G_UBFX:
1416 return legalizeBitfieldExtract(MI, MRI, Helper);
1417 case TargetOpcode::G_FSHL:
1418 case TargetOpcode::G_FSHR:
1419 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1420 case TargetOpcode::G_ROTR:
1421 return legalizeRotate(MI, MRI, Helper);
1422 case TargetOpcode::G_CTPOP:
1423 return legalizeCTPOP(MI, MRI, Helper);
1424 case TargetOpcode::G_ATOMIC_CMPXCHG:
1425 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1426 case TargetOpcode::G_CTTZ:
1427 return legalizeCTTZ(MI, Helper);
1428 case TargetOpcode::G_BZERO:
1429 case TargetOpcode::G_MEMCPY:
1430 case TargetOpcode::G_MEMMOVE:
1431 case TargetOpcode::G_MEMSET:
1432 return legalizeMemOps(MI, Helper);
1433 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1434 return legalizeExtractVectorElt(MI, MRI, Helper);
1435 case TargetOpcode::G_DYN_STACKALLOC:
1436 return legalizeDynStackAlloc(MI, Helper);
1437 case TargetOpcode::G_PREFETCH:
1438 return legalizePrefetch(MI, Helper);
1439 case TargetOpcode::G_ABS:
1440 return Helper.lowerAbsToCNeg(MI);
1441 case TargetOpcode::G_ICMP:
1442 return legalizeICMP(MI, MRI, MIRBuilder);
1443 case TargetOpcode::G_BITCAST:
1444 return legalizeBitcast(MI, Helper);
1445 }
1446
1447 llvm_unreachable("expected switch to return");
1448}
1449
1450bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,
1451 LegalizerHelper &Helper) const {
1452 assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");
1453 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
1454 // We're trying to handle casts from i1 vectors to scalars but reloading from
1455 // stack.
1456 if (!DstTy.isScalar() || !SrcTy.isVector() ||
1457 SrcTy.getElementType() != LLT::scalar(SizeInBits: 1))
1458 return false;
1459
1460 Helper.createStackStoreLoad(Res: DstReg, Val: SrcReg);
1461 MI.eraseFromParent();
1462 return true;
1463}
1464
1465bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1466 MachineRegisterInfo &MRI,
1467 MachineIRBuilder &MIRBuilder,
1468 GISelChangeObserver &Observer,
1469 LegalizerHelper &Helper) const {
1470 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1471 MI.getOpcode() == TargetOpcode::G_FSHR);
1472
1473 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1474 // lowering
1475 Register ShiftNo = MI.getOperand(i: 3).getReg();
1476 LLT ShiftTy = MRI.getType(Reg: ShiftNo);
1477 auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ShiftNo, MRI);
1478
1479 // Adjust shift amount according to Opcode (FSHL/FSHR)
1480 // Convert FSHL to FSHR
1481 LLT OperationTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1482 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1483
1484 // Lower non-constant shifts and leave zero shifts to the optimizer.
1485 if (!VRegAndVal || VRegAndVal->Value.urem(RHS: BitWidth) == 0)
1486 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1487 LegalizerHelper::LegalizeResult::Legalized);
1488
1489 APInt Amount = VRegAndVal->Value.urem(RHS: BitWidth);
1490
1491 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1492
1493 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1494 // in the range of 0 <-> BitWidth, it is legal
1495 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1496 VRegAndVal->Value.ult(RHS: BitWidth))
1497 return true;
1498
1499 // Cast the ShiftNumber to a 64-bit type
1500 auto Cast64 = MIRBuilder.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: Amount.zext(width: 64));
1501
1502 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1503 Observer.changingInstr(MI);
1504 MI.getOperand(i: 3).setReg(Cast64.getReg(Idx: 0));
1505 Observer.changedInstr(MI);
1506 }
1507 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1508 // instruction
1509 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1510 MIRBuilder.buildInstr(Opc: TargetOpcode::G_FSHR, DstOps: {MI.getOperand(i: 0).getReg()},
1511 SrcOps: {MI.getOperand(i: 1).getReg(), MI.getOperand(i: 2).getReg(),
1512 Cast64.getReg(Idx: 0)});
1513 MI.eraseFromParent();
1514 }
1515 return true;
1516}
1517
1518bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1519 MachineRegisterInfo &MRI,
1520 MachineIRBuilder &MIRBuilder) const {
1521 Register DstReg = MI.getOperand(i: 0).getReg();
1522 Register SrcReg1 = MI.getOperand(i: 2).getReg();
1523 Register SrcReg2 = MI.getOperand(i: 3).getReg();
1524 LLT DstTy = MRI.getType(Reg: DstReg);
1525 LLT SrcTy = MRI.getType(Reg: SrcReg1);
1526
1527 // Check the vector types are legal
1528 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1529 DstTy.getNumElements() != SrcTy.getNumElements() ||
1530 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1531 return false;
1532
1533 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1534 // following passes
1535 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(i: 1).getPredicate();
1536 if (Pred != CmpInst::ICMP_NE)
1537 return true;
1538 Register CmpReg =
1539 MIRBuilder
1540 .buildICmp(Pred: CmpInst::ICMP_EQ, Res: MRI.getType(Reg: DstReg), Op0: SrcReg1, Op1: SrcReg2)
1541 .getReg(Idx: 0);
1542 MIRBuilder.buildNot(Dst: DstReg, Src0: CmpReg);
1543
1544 MI.eraseFromParent();
1545 return true;
1546}
1547
1548bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1549 MachineRegisterInfo &MRI,
1550 LegalizerHelper &Helper) const {
1551 // To allow for imported patterns to match, we ensure that the rotate amount
1552 // is 64b with an extension.
1553 Register AmtReg = MI.getOperand(i: 2).getReg();
1554 LLT AmtTy = MRI.getType(Reg: AmtReg);
1555 (void)AmtTy;
1556 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1557 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1558 auto NewAmt = Helper.MIRBuilder.buildZExt(Res: LLT::scalar(SizeInBits: 64), Op: AmtReg);
1559 Helper.Observer.changingInstr(MI);
1560 MI.getOperand(i: 2).setReg(NewAmt.getReg(Idx: 0));
1561 Helper.Observer.changedInstr(MI);
1562 return true;
1563}
1564
1565bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1566 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
1567 GISelChangeObserver &Observer) const {
1568 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1569 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1570 // G_ADD_LOW instructions.
1571 // By splitting this here, we can optimize accesses in the small code model by
1572 // folding in the G_ADD_LOW into the load/store offset.
1573 auto &GlobalOp = MI.getOperand(i: 1);
1574 // Don't modify an intrinsic call.
1575 if (GlobalOp.isSymbol())
1576 return true;
1577 const auto* GV = GlobalOp.getGlobal();
1578 if (GV->isThreadLocal())
1579 return true; // Don't want to modify TLS vars.
1580
1581 auto &TM = ST->getTargetLowering()->getTargetMachine();
1582 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1583
1584 if (OpFlags & AArch64II::MO_GOT)
1585 return true;
1586
1587 auto Offset = GlobalOp.getOffset();
1588 Register DstReg = MI.getOperand(i: 0).getReg();
1589 auto ADRP = MIRBuilder.buildInstr(Opc: AArch64::ADRP, DstOps: {LLT::pointer(AddressSpace: 0, SizeInBits: 64)}, SrcOps: {})
1590 .addGlobalAddress(GV, Offset, TargetFlags: OpFlags | AArch64II::MO_PAGE);
1591 // Set the regclass on the dest reg too.
1592 MRI.setRegClass(Reg: ADRP.getReg(Idx: 0), RC: &AArch64::GPR64RegClass);
1593
1594 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1595 // by creating a MOVK that sets bits 48-63 of the register to (global address
1596 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1597 // prevent an incorrect tag being generated during relocation when the
1598 // global appears before the code section. Without the offset, a global at
1599 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1600 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1601 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1602 // instead of `0xf`.
1603 // This assumes that we're in the small code model so we can assume a binary
1604 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1605 // binary must also be loaded into address range [0, 2^48). Both of these
1606 // properties need to be ensured at runtime when using tagged addresses.
1607 if (OpFlags & AArch64II::MO_TAGGED) {
1608 assert(!Offset &&
1609 "Should not have folded in an offset for a tagged global!");
1610 ADRP = MIRBuilder.buildInstr(Opc: AArch64::MOVKXi, DstOps: {LLT::pointer(AddressSpace: 0, SizeInBits: 64)}, SrcOps: {ADRP})
1611 .addGlobalAddress(GV, Offset: 0x100000000,
1612 TargetFlags: AArch64II::MO_PREL | AArch64II::MO_G3)
1613 .addImm(Val: 48);
1614 MRI.setRegClass(Reg: ADRP.getReg(Idx: 0), RC: &AArch64::GPR64RegClass);
1615 }
1616
1617 MIRBuilder.buildInstr(Opc: AArch64::G_ADD_LOW, DstOps: {DstReg}, SrcOps: {ADRP})
1618 .addGlobalAddress(GV, Offset,
1619 TargetFlags: OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1620 MI.eraseFromParent();
1621 return true;
1622}
1623
1624bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
1625 MachineInstr &MI) const {
1626 MachineIRBuilder &MIB = Helper.MIRBuilder;
1627 MachineRegisterInfo &MRI = *MIB.getMRI();
1628
1629 auto LowerBinOp = [&MI, &MIB](unsigned Opcode) {
1630 MIB.buildInstr(Opc: Opcode, DstOps: {MI.getOperand(i: 0)},
1631 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3)});
1632 MI.eraseFromParent();
1633 return true;
1634 };
1635
1636 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(Val&: MI).getIntrinsicID();
1637 switch (IntrinsicID) {
1638 case Intrinsic::vacopy: {
1639 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1640 unsigned VaListSize =
1641 (ST->isTargetDarwin() || ST->isTargetWindows())
1642 ? PtrSize
1643 : ST->isTargetILP32() ? 20 : 32;
1644
1645 MachineFunction &MF = *MI.getMF();
1646 auto Val = MF.getRegInfo().createGenericVirtualRegister(
1647 Ty: LLT::scalar(SizeInBits: VaListSize * 8));
1648 MIB.buildLoad(Res: Val, Addr: MI.getOperand(i: 2),
1649 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(),
1650 F: MachineMemOperand::MOLoad,
1651 Size: VaListSize, BaseAlignment: Align(PtrSize)));
1652 MIB.buildStore(Val, Addr: MI.getOperand(i: 1),
1653 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(),
1654 F: MachineMemOperand::MOStore,
1655 Size: VaListSize, BaseAlignment: Align(PtrSize)));
1656 MI.eraseFromParent();
1657 return true;
1658 }
1659 case Intrinsic::get_dynamic_area_offset: {
1660 MIB.buildConstant(Res: MI.getOperand(i: 0).getReg(), Val: 0);
1661 MI.eraseFromParent();
1662 return true;
1663 }
1664 case Intrinsic::aarch64_mops_memset_tag: {
1665 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1666 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1667 // the instruction).
1668 auto &Value = MI.getOperand(i: 3);
1669 Register ExtValueReg = MIB.buildAnyExt(Res: LLT::scalar(SizeInBits: 64), Op: Value).getReg(Idx: 0);
1670 Value.setReg(ExtValueReg);
1671 return true;
1672 }
1673 case Intrinsic::aarch64_prefetch: {
1674 auto &AddrVal = MI.getOperand(i: 1);
1675
1676 int64_t IsWrite = MI.getOperand(i: 2).getImm();
1677 int64_t Target = MI.getOperand(i: 3).getImm();
1678 int64_t IsStream = MI.getOperand(i: 4).getImm();
1679 int64_t IsData = MI.getOperand(i: 5).getImm();
1680
1681 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1682 (!IsData << 3) | // IsDataCache bit
1683 (Target << 1) | // Cache level bits
1684 (unsigned)IsStream; // Stream bit
1685
1686 MIB.buildInstr(Opcode: AArch64::G_AARCH64_PREFETCH).addImm(Val: PrfOp).add(MO: AddrVal);
1687 MI.eraseFromParent();
1688 return true;
1689 }
1690 case Intrinsic::aarch64_neon_uaddv:
1691 case Intrinsic::aarch64_neon_saddv:
1692 case Intrinsic::aarch64_neon_umaxv:
1693 case Intrinsic::aarch64_neon_smaxv:
1694 case Intrinsic::aarch64_neon_uminv:
1695 case Intrinsic::aarch64_neon_sminv: {
1696 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1697 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1698 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1699
1700 auto OldDst = MI.getOperand(i: 0).getReg();
1701 auto OldDstTy = MRI.getType(Reg: OldDst);
1702 LLT NewDstTy = MRI.getType(Reg: MI.getOperand(i: 2).getReg()).getElementType();
1703 if (OldDstTy == NewDstTy)
1704 return true;
1705
1706 auto NewDst = MRI.createGenericVirtualRegister(Ty: NewDstTy);
1707
1708 Helper.Observer.changingInstr(MI);
1709 MI.getOperand(i: 0).setReg(NewDst);
1710 Helper.Observer.changedInstr(MI);
1711
1712 MIB.setInsertPt(MBB&: MIB.getMBB(), II: ++MIB.getInsertPt());
1713 MIB.buildExtOrTrunc(ExtOpc: IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1714 Res: OldDst, Op: NewDst);
1715
1716 return true;
1717 }
1718 case Intrinsic::aarch64_neon_uaddlp:
1719 case Intrinsic::aarch64_neon_saddlp: {
1720 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1721 ? AArch64::G_UADDLP
1722 : AArch64::G_SADDLP;
1723 MIB.buildInstr(Opc, DstOps: {MI.getOperand(i: 0)}, SrcOps: {MI.getOperand(i: 2)});
1724 MI.eraseFromParent();
1725
1726 return true;
1727 }
1728 case Intrinsic::aarch64_neon_uaddlv:
1729 case Intrinsic::aarch64_neon_saddlv: {
1730 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1731 ? AArch64::G_UADDLV
1732 : AArch64::G_SADDLV;
1733 Register DstReg = MI.getOperand(i: 0).getReg();
1734 Register SrcReg = MI.getOperand(i: 2).getReg();
1735 LLT DstTy = MRI.getType(Reg: DstReg);
1736
1737 LLT MidTy, ExtTy;
1738 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1739 MidTy = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32);
1740 ExtTy = LLT::scalar(SizeInBits: 32);
1741 } else {
1742 MidTy = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64);
1743 ExtTy = LLT::scalar(SizeInBits: 64);
1744 }
1745
1746 Register MidReg =
1747 MIB.buildInstr(Opc, DstOps: {MidTy}, SrcOps: {SrcReg})->getOperand(i: 0).getReg();
1748 Register ZeroReg =
1749 MIB.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: 0)->getOperand(i: 0).getReg();
1750 Register ExtReg = MIB.buildInstr(Opc: AArch64::G_EXTRACT_VECTOR_ELT, DstOps: {ExtTy},
1751 SrcOps: {MidReg, ZeroReg})
1752 .getReg(Idx: 0);
1753
1754 if (DstTy.getScalarSizeInBits() < 32)
1755 MIB.buildTrunc(Res: DstReg, Op: ExtReg);
1756 else
1757 MIB.buildCopy(Res: DstReg, Op: ExtReg);
1758
1759 MI.eraseFromParent();
1760
1761 return true;
1762 }
1763 case Intrinsic::aarch64_neon_smax:
1764 return LowerBinOp(TargetOpcode::G_SMAX);
1765 case Intrinsic::aarch64_neon_smin:
1766 return LowerBinOp(TargetOpcode::G_SMIN);
1767 case Intrinsic::aarch64_neon_umax:
1768 return LowerBinOp(TargetOpcode::G_UMAX);
1769 case Intrinsic::aarch64_neon_umin:
1770 return LowerBinOp(TargetOpcode::G_UMIN);
1771 case Intrinsic::aarch64_neon_fmax:
1772 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1773 case Intrinsic::aarch64_neon_fmin:
1774 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1775 case Intrinsic::aarch64_neon_fmaxnm:
1776 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1777 case Intrinsic::aarch64_neon_fminnm:
1778 return LowerBinOp(TargetOpcode::G_FMINNUM);
1779 case Intrinsic::aarch64_neon_smull:
1780 return LowerBinOp(AArch64::G_SMULL);
1781 case Intrinsic::aarch64_neon_umull:
1782 return LowerBinOp(AArch64::G_UMULL);
1783 case Intrinsic::aarch64_neon_abs: {
1784 // Lower the intrinsic to G_ABS.
1785 MIB.buildInstr(Opc: TargetOpcode::G_ABS, DstOps: {MI.getOperand(i: 0)}, SrcOps: {MI.getOperand(i: 2)});
1786 MI.eraseFromParent();
1787 return true;
1788 }
1789 case Intrinsic::aarch64_neon_sqadd: {
1790 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1791 return LowerBinOp(TargetOpcode::G_SADDSAT);
1792 break;
1793 }
1794 case Intrinsic::aarch64_neon_sqsub: {
1795 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1796 return LowerBinOp(TargetOpcode::G_SSUBSAT);
1797 break;
1798 }
1799 case Intrinsic::aarch64_neon_uqadd: {
1800 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1801 return LowerBinOp(TargetOpcode::G_UADDSAT);
1802 break;
1803 }
1804 case Intrinsic::aarch64_neon_uqsub: {
1805 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1806 return LowerBinOp(TargetOpcode::G_USUBSAT);
1807 break;
1808 }
1809
1810 case Intrinsic::vector_reverse:
1811 // TODO: Add support for vector_reverse
1812 return false;
1813 }
1814
1815 return true;
1816}
1817
1818bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1819 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
1820 GISelChangeObserver &Observer) const {
1821 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1822 MI.getOpcode() == TargetOpcode::G_LSHR ||
1823 MI.getOpcode() == TargetOpcode::G_SHL);
1824 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1825 // imported patterns can select it later. Either way, it will be legal.
1826 Register AmtReg = MI.getOperand(i: 2).getReg();
1827 auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: AmtReg, MRI);
1828 if (!VRegAndVal)
1829 return true;
1830 // Check the shift amount is in range for an immediate form.
1831 int64_t Amount = VRegAndVal->Value.getSExtValue();
1832 if (Amount > 31)
1833 return true; // This will have to remain a register variant.
1834 auto ExtCst = MIRBuilder.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: Amount);
1835 Observer.changingInstr(MI);
1836 MI.getOperand(i: 2).setReg(ExtCst.getReg(Idx: 0));
1837 Observer.changedInstr(MI);
1838 return true;
1839}
1840
1841static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset,
1842 MachineRegisterInfo &MRI) {
1843 Base = Root;
1844 Offset = 0;
1845
1846 Register NewBase;
1847 int64_t NewOffset;
1848 if (mi_match(R: Root, MRI, P: m_GPtrAdd(L: m_Reg(R&: NewBase), R: m_ICst(Cst&: NewOffset))) &&
1849 isShiftedInt<7, 3>(x: NewOffset)) {
1850 Base = NewBase;
1851 Offset = NewOffset;
1852 }
1853}
1854
1855// FIXME: This should be removed and replaced with the generic bitcast legalize
1856// action.
1857bool AArch64LegalizerInfo::legalizeLoadStore(
1858 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
1859 GISelChangeObserver &Observer) const {
1860 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1861 MI.getOpcode() == TargetOpcode::G_LOAD);
1862 // Here we just try to handle vector loads/stores where our value type might
1863 // have pointer elements, which the SelectionDAG importer can't handle. To
1864 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1865 // the value to use s64 types.
1866
1867 // Custom legalization requires the instruction, if not deleted, must be fully
1868 // legalized. In order to allow further legalization of the inst, we create
1869 // a new instruction and erase the existing one.
1870
1871 Register ValReg = MI.getOperand(i: 0).getReg();
1872 const LLT ValTy = MRI.getType(Reg: ValReg);
1873
1874 if (ValTy == LLT::scalar(SizeInBits: 128)) {
1875
1876 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1877 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1878 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1879 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1880 bool IsRcpC3 =
1881 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1882
1883 LLT s64 = LLT::scalar(SizeInBits: 64);
1884
1885 unsigned Opcode;
1886 if (IsRcpC3) {
1887 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1888 } else {
1889 // For LSE2, loads/stores should have been converted to monotonic and had
1890 // a fence inserted after them.
1891 assert(Ordering == AtomicOrdering::Monotonic ||
1892 Ordering == AtomicOrdering::Unordered);
1893 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1894
1895 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1896 }
1897
1898 MachineInstrBuilder NewI;
1899 if (IsLoad) {
1900 NewI = MIRBuilder.buildInstr(Opc: Opcode, DstOps: {s64, s64}, SrcOps: {});
1901 MIRBuilder.buildMergeLikeInstr(
1902 Res: ValReg, Ops: {NewI->getOperand(i: 0), NewI->getOperand(i: 1)});
1903 } else {
1904 auto Split = MIRBuilder.buildUnmerge(Res: s64, Op: MI.getOperand(i: 0));
1905 NewI = MIRBuilder.buildInstr(
1906 Opc: Opcode, DstOps: {}, SrcOps: {Split->getOperand(i: 0), Split->getOperand(i: 1)});
1907 }
1908
1909 if (IsRcpC3) {
1910 NewI.addUse(RegNo: MI.getOperand(i: 1).getReg());
1911 } else {
1912 Register Base;
1913 int Offset;
1914 matchLDPSTPAddrMode(Root: MI.getOperand(i: 1).getReg(), Base, Offset, MRI);
1915 NewI.addUse(RegNo: Base);
1916 NewI.addImm(Val: Offset / 8);
1917 }
1918
1919 NewI.cloneMemRefs(OtherMI: MI);
1920 constrainSelectedInstRegOperands(I&: *NewI, TII: *ST->getInstrInfo(),
1921 TRI: *MRI.getTargetRegisterInfo(),
1922 RBI: *ST->getRegBankInfo());
1923 MI.eraseFromParent();
1924 return true;
1925 }
1926
1927 if (!ValTy.isPointerVector() ||
1928 ValTy.getElementType().getAddressSpace() != 0) {
1929 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1930 return false;
1931 }
1932
1933 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1934 const LLT NewTy = LLT::vector(EC: ValTy.getElementCount(), ScalarSizeInBits: PtrSize);
1935 auto &MMO = **MI.memoperands_begin();
1936 MMO.setType(NewTy);
1937
1938 if (MI.getOpcode() == TargetOpcode::G_STORE) {
1939 auto Bitcast = MIRBuilder.buildBitcast(Dst: NewTy, Src: ValReg);
1940 MIRBuilder.buildStore(Val: Bitcast.getReg(Idx: 0), Addr: MI.getOperand(i: 1), MMO);
1941 } else {
1942 auto NewLoad = MIRBuilder.buildLoad(Res: NewTy, Addr: MI.getOperand(i: 1), MMO);
1943 MIRBuilder.buildBitcast(Dst: ValReg, Src: NewLoad);
1944 }
1945 MI.eraseFromParent();
1946 return true;
1947}
1948
1949bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
1950 MachineRegisterInfo &MRI,
1951 MachineIRBuilder &MIRBuilder) const {
1952 MachineFunction &MF = MIRBuilder.getMF();
1953 Align Alignment(MI.getOperand(i: 2).getImm());
1954 Register Dst = MI.getOperand(i: 0).getReg();
1955 Register ListPtr = MI.getOperand(i: 1).getReg();
1956
1957 LLT PtrTy = MRI.getType(Reg: ListPtr);
1958 LLT IntPtrTy = LLT::scalar(SizeInBits: PtrTy.getSizeInBits());
1959
1960 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
1961 const Align PtrAlign = Align(PtrSize);
1962 auto List = MIRBuilder.buildLoad(
1963 Res: PtrTy, Addr: ListPtr,
1964 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOLoad,
1965 MemTy: PtrTy, base_alignment: PtrAlign));
1966
1967 MachineInstrBuilder DstPtr;
1968 if (Alignment > PtrAlign) {
1969 // Realign the list to the actual required alignment.
1970 auto AlignMinus1 =
1971 MIRBuilder.buildConstant(Res: IntPtrTy, Val: Alignment.value() - 1);
1972 auto ListTmp = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: List, Op1: AlignMinus1.getReg(Idx: 0));
1973 DstPtr = MIRBuilder.buildMaskLowPtrBits(Res: PtrTy, Op0: ListTmp, NumBits: Log2(A: Alignment));
1974 } else
1975 DstPtr = List;
1976
1977 LLT ValTy = MRI.getType(Reg: Dst);
1978 uint64_t ValSize = ValTy.getSizeInBits() / 8;
1979 MIRBuilder.buildLoad(
1980 Res: Dst, Addr: DstPtr,
1981 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOLoad,
1982 MemTy: ValTy, base_alignment: std::max(a: Alignment, b: PtrAlign)));
1983
1984 auto Size = MIRBuilder.buildConstant(Res: IntPtrTy, Val: alignTo(Size: ValSize, A: PtrAlign));
1985
1986 auto NewList = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: DstPtr, Op1: Size.getReg(Idx: 0));
1987
1988 MIRBuilder.buildStore(Val: NewList, Addr: ListPtr,
1989 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(),
1990 f: MachineMemOperand::MOStore,
1991 MemTy: PtrTy, base_alignment: PtrAlign));
1992
1993 MI.eraseFromParent();
1994 return true;
1995}
1996
1997bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1998 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
1999 // Only legal if we can select immediate forms.
2000 // TODO: Lower this otherwise.
2001 return getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: 2).getReg(), MRI) &&
2002 getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: 3).getReg(), MRI);
2003}
2004
2005bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
2006 MachineRegisterInfo &MRI,
2007 LegalizerHelper &Helper) const {
2008 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
2009 // it can be more efficiently lowered to the following sequence that uses
2010 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
2011 // registers are cheap.
2012 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
2013 // CNT V0.8B, V0.8B // 8xbyte pop-counts
2014 // ADDV B0, V0.8B // sum 8xbyte pop-counts
2015 // UMOV X0, V0.B[0] // copy byte result back to integer reg
2016 //
2017 // For 128 bit vector popcounts, we lower to the following sequence:
2018 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
2019 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
2020 // uaddlp.4s v0, v0 // v4s32, v2s64
2021 // uaddlp.2d v0, v0 // v2s64
2022 //
2023 // For 64 bit vector popcounts, we lower to the following sequence:
2024 // cnt.8b v0, v0 // v4s16, v2s32
2025 // uaddlp.4h v0, v0 // v4s16, v2s32
2026 // uaddlp.2s v0, v0 // v2s32
2027
2028 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2029 Register Dst = MI.getOperand(i: 0).getReg();
2030 Register Val = MI.getOperand(i: 1).getReg();
2031 LLT Ty = MRI.getType(Reg: Val);
2032 unsigned Size = Ty.getSizeInBits();
2033
2034 assert(Ty == MRI.getType(Dst) &&
2035 "Expected src and dst to have the same type!");
2036
2037 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
2038 LLT s64 = LLT::scalar(SizeInBits: 64);
2039
2040 auto Split = MIRBuilder.buildUnmerge(Res: s64, Op: Val);
2041 auto CTPOP1 = MIRBuilder.buildCTPOP(Dst: s64, Src0: Split->getOperand(i: 0));
2042 auto CTPOP2 = MIRBuilder.buildCTPOP(Dst: s64, Src0: Split->getOperand(i: 1));
2043 auto Add = MIRBuilder.buildAdd(Dst: s64, Src0: CTPOP1, Src1: CTPOP2);
2044
2045 MIRBuilder.buildZExt(Res: Dst, Op: Add);
2046 MI.eraseFromParent();
2047 return true;
2048 }
2049
2050 if (!ST->hasNEON() ||
2051 MI.getMF()->getFunction().hasFnAttribute(Kind: Attribute::NoImplicitFloat)) {
2052 // Use generic lowering when custom lowering is not possible.
2053 return Ty.isScalar() && (Size == 32 || Size == 64) &&
2054 Helper.lowerBitCount(MI) ==
2055 LegalizerHelper::LegalizeResult::Legalized;
2056 }
2057
2058 // Pre-conditioning: widen Val up to the nearest vector type.
2059 // s32,s64,v4s16,v2s32 -> v8i8
2060 // v8s16,v4s32,v2s64 -> v16i8
2061 LLT VTy = Size == 128 ? LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8) : LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8);
2062 if (Ty.isScalar()) {
2063 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
2064 if (Size == 32) {
2065 Val = MIRBuilder.buildZExt(Res: LLT::scalar(SizeInBits: 64), Op: Val).getReg(Idx: 0);
2066 }
2067 }
2068 Val = MIRBuilder.buildBitcast(Dst: VTy, Src: Val).getReg(Idx: 0);
2069
2070 // Count bits in each byte-sized lane.
2071 auto CTPOP = MIRBuilder.buildCTPOP(Dst: VTy, Src0: Val);
2072
2073 // Sum across lanes.
2074
2075 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2076 Ty.getScalarSizeInBits() != 16) {
2077 LLT Dt = Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) ? LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32) : Ty;
2078 auto Zeros = MIRBuilder.buildConstant(Res: Dt, Val: 0);
2079 auto Ones = MIRBuilder.buildConstant(Res: VTy, Val: 1);
2080 MachineInstrBuilder Sum;
2081
2082 if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64)) {
2083 auto UDOT =
2084 MIRBuilder.buildInstr(Opc: AArch64::G_UDOT, DstOps: {Dt}, SrcOps: {Zeros, Ones, CTPOP});
2085 Sum = MIRBuilder.buildInstr(Opc: AArch64::G_UADDLP, DstOps: {Ty}, SrcOps: {UDOT});
2086 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) {
2087 Sum = MIRBuilder.buildInstr(Opc: AArch64::G_UDOT, DstOps: {Dt}, SrcOps: {Zeros, Ones, CTPOP});
2088 } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32)) {
2089 Sum = MIRBuilder.buildInstr(Opc: AArch64::G_UDOT, DstOps: {Dt}, SrcOps: {Zeros, Ones, CTPOP});
2090 } else {
2091 llvm_unreachable("unexpected vector shape");
2092 }
2093
2094 Sum->getOperand(i: 0).setReg(Dst);
2095 MI.eraseFromParent();
2096 return true;
2097 }
2098
2099 Register HSum = CTPOP.getReg(Idx: 0);
2100 unsigned Opc;
2101 SmallVector<LLT> HAddTys;
2102 if (Ty.isScalar()) {
2103 Opc = Intrinsic::aarch64_neon_uaddlv;
2104 HAddTys.push_back(Elt: LLT::scalar(SizeInBits: 32));
2105 } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16)) {
2106 Opc = Intrinsic::aarch64_neon_uaddlp;
2107 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16));
2108 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) {
2109 Opc = Intrinsic::aarch64_neon_uaddlp;
2110 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16));
2111 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32));
2112 } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64)) {
2113 Opc = Intrinsic::aarch64_neon_uaddlp;
2114 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16));
2115 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32));
2116 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64));
2117 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16)) {
2118 Opc = Intrinsic::aarch64_neon_uaddlp;
2119 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16));
2120 } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32)) {
2121 Opc = Intrinsic::aarch64_neon_uaddlp;
2122 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16));
2123 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32));
2124 } else
2125 llvm_unreachable("unexpected vector shape");
2126 MachineInstrBuilder UADD;
2127 for (LLT HTy : HAddTys) {
2128 UADD = MIRBuilder.buildIntrinsic(ID: Opc, Res: {HTy}).addUse(RegNo: HSum);
2129 HSum = UADD.getReg(Idx: 0);
2130 }
2131
2132 // Post-conditioning.
2133 if (Ty.isScalar() && (Size == 64 || Size == 128))
2134 MIRBuilder.buildZExt(Res: Dst, Op: UADD);
2135 else
2136 UADD->getOperand(i: 0).setReg(Dst);
2137 MI.eraseFromParent();
2138 return true;
2139}
2140
2141bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2142 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2143 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2144 LLT s64 = LLT::scalar(SizeInBits: 64);
2145 auto Addr = MI.getOperand(i: 1).getReg();
2146 auto DesiredI = MIRBuilder.buildUnmerge(Res: {s64, s64}, Op: MI.getOperand(i: 2));
2147 auto NewI = MIRBuilder.buildUnmerge(Res: {s64, s64}, Op: MI.getOperand(i: 3));
2148 auto DstLo = MRI.createGenericVirtualRegister(Ty: s64);
2149 auto DstHi = MRI.createGenericVirtualRegister(Ty: s64);
2150
2151 MachineInstrBuilder CAS;
2152 if (ST->hasLSE()) {
2153 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2154 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2155 // the rest of the MIR so we must reassemble the extracted registers into a
2156 // 128-bit known-regclass one with code like this:
2157 //
2158 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2159 // %out = CASP %in1, ...
2160 // %OldLo = G_EXTRACT %out, 0
2161 // %OldHi = G_EXTRACT %out, 64
2162 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2163 unsigned Opcode;
2164 switch (Ordering) {
2165 case AtomicOrdering::Acquire:
2166 Opcode = AArch64::CASPAX;
2167 break;
2168 case AtomicOrdering::Release:
2169 Opcode = AArch64::CASPLX;
2170 break;
2171 case AtomicOrdering::AcquireRelease:
2172 case AtomicOrdering::SequentiallyConsistent:
2173 Opcode = AArch64::CASPALX;
2174 break;
2175 default:
2176 Opcode = AArch64::CASPX;
2177 break;
2178 }
2179
2180 LLT s128 = LLT::scalar(SizeInBits: 128);
2181 auto CASDst = MRI.createGenericVirtualRegister(Ty: s128);
2182 auto CASDesired = MRI.createGenericVirtualRegister(Ty: s128);
2183 auto CASNew = MRI.createGenericVirtualRegister(Ty: s128);
2184 MIRBuilder.buildInstr(Opc: TargetOpcode::REG_SEQUENCE, DstOps: {CASDesired}, SrcOps: {})
2185 .addUse(RegNo: DesiredI->getOperand(i: 0).getReg())
2186 .addImm(Val: AArch64::sube64)
2187 .addUse(RegNo: DesiredI->getOperand(i: 1).getReg())
2188 .addImm(Val: AArch64::subo64);
2189 MIRBuilder.buildInstr(Opc: TargetOpcode::REG_SEQUENCE, DstOps: {CASNew}, SrcOps: {})
2190 .addUse(RegNo: NewI->getOperand(i: 0).getReg())
2191 .addImm(Val: AArch64::sube64)
2192 .addUse(RegNo: NewI->getOperand(i: 1).getReg())
2193 .addImm(Val: AArch64::subo64);
2194
2195 CAS = MIRBuilder.buildInstr(Opc: Opcode, DstOps: {CASDst}, SrcOps: {CASDesired, CASNew, Addr});
2196
2197 MIRBuilder.buildExtract(Res: {DstLo}, Src: {CASDst}, Index: 0);
2198 MIRBuilder.buildExtract(Res: {DstHi}, Src: {CASDst}, Index: 64);
2199 } else {
2200 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2201 // can take arbitrary registers so it just has the normal GPR64 operands the
2202 // rest of AArch64 is expecting.
2203 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2204 unsigned Opcode;
2205 switch (Ordering) {
2206 case AtomicOrdering::Acquire:
2207 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2208 break;
2209 case AtomicOrdering::Release:
2210 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2211 break;
2212 case AtomicOrdering::AcquireRelease:
2213 case AtomicOrdering::SequentiallyConsistent:
2214 Opcode = AArch64::CMP_SWAP_128;
2215 break;
2216 default:
2217 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2218 break;
2219 }
2220
2221 auto Scratch = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
2222 CAS = MIRBuilder.buildInstr(Opc: Opcode, DstOps: {DstLo, DstHi, Scratch},
2223 SrcOps: {Addr, DesiredI->getOperand(i: 0),
2224 DesiredI->getOperand(i: 1), NewI->getOperand(i: 0),
2225 NewI->getOperand(i: 1)});
2226 }
2227
2228 CAS.cloneMemRefs(OtherMI: MI);
2229 constrainSelectedInstRegOperands(I&: *CAS, TII: *ST->getInstrInfo(),
2230 TRI: *MRI.getTargetRegisterInfo(),
2231 RBI: *ST->getRegBankInfo());
2232
2233 MIRBuilder.buildMergeLikeInstr(Res: MI.getOperand(i: 0), Ops: {DstLo, DstHi});
2234 MI.eraseFromParent();
2235 return true;
2236}
2237
2238bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2239 LegalizerHelper &Helper) const {
2240 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2241 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2242 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
2243 auto BitReverse = MIRBuilder.buildBitReverse(Dst: Ty, Src: MI.getOperand(i: 1));
2244 MIRBuilder.buildCTLZ(Dst: MI.getOperand(i: 0).getReg(), Src0: BitReverse);
2245 MI.eraseFromParent();
2246 return true;
2247}
2248
2249bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2250 LegalizerHelper &Helper) const {
2251 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2252
2253 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2254 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2255 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2256 // the instruction).
2257 auto &Value = MI.getOperand(i: 1);
2258 Register ExtValueReg =
2259 MIRBuilder.buildAnyExt(Res: LLT::scalar(SizeInBits: 64), Op: Value).getReg(Idx: 0);
2260 Value.setReg(ExtValueReg);
2261 return true;
2262 }
2263
2264 return false;
2265}
2266
2267bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2268 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2269 const GExtractVectorElement *Element = cast<GExtractVectorElement>(Val: &MI);
2270 auto VRegAndVal =
2271 getIConstantVRegValWithLookThrough(VReg: Element->getIndexReg(), MRI);
2272 if (VRegAndVal)
2273 return true;
2274 LLT VecTy = MRI.getType(Reg: Element->getVectorReg());
2275 if (VecTy.isScalableVector())
2276 return true;
2277 return Helper.lowerExtractInsertVectorElt(MI) !=
2278 LegalizerHelper::LegalizeResult::UnableToLegalize;
2279}
2280
2281bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2282 MachineInstr &MI, LegalizerHelper &Helper) const {
2283 MachineFunction &MF = *MI.getParent()->getParent();
2284 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2285 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2286
2287 // If stack probing is not enabled for this function, use the default
2288 // lowering.
2289 if (!MF.getFunction().hasFnAttribute(Kind: "probe-stack") ||
2290 MF.getFunction().getFnAttribute(Kind: "probe-stack").getValueAsString() !=
2291 "inline-asm") {
2292 Helper.lowerDynStackAlloc(MI);
2293 return true;
2294 }
2295
2296 Register Dst = MI.getOperand(i: 0).getReg();
2297 Register AllocSize = MI.getOperand(i: 1).getReg();
2298 Align Alignment = assumeAligned(Value: MI.getOperand(i: 2).getImm());
2299
2300 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2301 "Unexpected type for dynamic alloca");
2302 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2303 "Unexpected type for dynamic alloca");
2304
2305 LLT PtrTy = MRI.getType(Reg: Dst);
2306 Register SPReg =
2307 Helper.getTargetLowering().getStackPointerRegisterToSaveRestore();
2308 Register SPTmp =
2309 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2310 auto NewMI =
2311 MIRBuilder.buildInstr(Opc: AArch64::PROBED_STACKALLOC_DYN, DstOps: {}, SrcOps: {SPTmp});
2312 MRI.setRegClass(Reg: NewMI.getReg(Idx: 0), RC: &AArch64::GPR64commonRegClass);
2313 MIRBuilder.setInsertPt(MBB&: *NewMI->getParent(), II: NewMI);
2314 MIRBuilder.buildCopy(Res: Dst, Op: SPTmp);
2315
2316 MI.eraseFromParent();
2317 return true;
2318}
2319
2320bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2321 LegalizerHelper &Helper) const {
2322 MachineIRBuilder &MIB = Helper.MIRBuilder;
2323 auto &AddrVal = MI.getOperand(i: 0);
2324
2325 int64_t IsWrite = MI.getOperand(i: 1).getImm();
2326 int64_t Locality = MI.getOperand(i: 2).getImm();
2327 int64_t IsData = MI.getOperand(i: 3).getImm();
2328
2329 bool IsStream = Locality == 0;
2330 if (Locality != 0) {
2331 assert(Locality <= 3 && "Prefetch locality out-of-range");
2332 // The locality degree is the opposite of the cache speed.
2333 // Put the number the other way around.
2334 // The encoding starts at 0 for level 1
2335 Locality = 3 - Locality;
2336 }
2337
2338 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2339
2340 MIB.buildInstr(Opcode: AArch64::G_AARCH64_PREFETCH).addImm(Val: PrfOp).add(MO: AddrVal);
2341 MI.eraseFromParent();
2342 return true;
2343}
2344