1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64LegalizerInfo.h"
15#include "AArch64RegisterBankInfo.h"
16#include "AArch64Subtarget.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
19#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
20#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
21#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
22#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
23#include "llvm/CodeGen/GlobalISel/Utils.h"
24#include "llvm/CodeGen/MachineInstr.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/TargetOpcodes.h"
27#include "llvm/CodeGen/ValueTypes.h"
28#include "llvm/IR/DerivedTypes.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/Type.h"
32#include "llvm/Support/MathExtras.h"
33#include <initializer_list>
34
35#define DEBUG_TYPE "aarch64-legalinfo"
36
37using namespace llvm;
38using namespace LegalizeActions;
39using namespace LegalizeMutations;
40using namespace LegalityPredicates;
41using namespace MIPatternMatch;
42
43AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
44 : ST(&ST) {
45 using namespace TargetOpcode;
46 const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64);
47 const LLT s8 = LLT::scalar(SizeInBits: 8);
48 const LLT s16 = LLT::scalar(SizeInBits: 16);
49 const LLT s32 = LLT::scalar(SizeInBits: 32);
50 const LLT s64 = LLT::scalar(SizeInBits: 64);
51 const LLT s128 = LLT::scalar(SizeInBits: 128);
52 const LLT v16s8 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8);
53 const LLT v8s8 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8);
54 const LLT v4s8 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 8);
55 const LLT v2s8 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 8);
56 const LLT v8s16 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16);
57 const LLT v4s16 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16);
58 const LLT v2s16 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16);
59 const LLT v2s32 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32);
60 const LLT v4s32 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32);
61 const LLT v2s64 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64);
62 const LLT v2p0 = LLT::fixed_vector(NumElements: 2, ScalarTy: p0);
63
64 const LLT nxv16s8 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: s8);
65 const LLT nxv8s16 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s16);
66 const LLT nxv4s32 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s32);
67 const LLT nxv2s64 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s64);
68
69 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
70 v16s8, v8s16, v4s32,
71 v2s64, v2p0,
72 /* End 128bit types */
73 /* Begin 64bit types */
74 v8s8, v4s16, v2s32};
75 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
76 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
77 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
78
79 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
80
81 // FIXME: support subtargets which have neon/fp-armv8 disabled.
82 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
83 getLegacyLegalizerInfo().computeTables();
84 return;
85 }
86
87 // Some instructions only support s16 if the subtarget has full 16-bit FP
88 // support.
89 const bool HasFP16 = ST.hasFullFP16();
90 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
91
92 const bool HasCSSC = ST.hasCSSC();
93 const bool HasRCPC3 = ST.hasRCPC3();
94
95 getActionDefinitionsBuilder(
96 Opcodes: {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
97 .legalFor(Types: {p0, s8, s16, s32, s64})
98 .legalFor(Types: PackedVectorAllTypeList)
99 .widenScalarToNextPow2(TypeIdx: 0)
100 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64)
101 .moreElementsToNextPow2(TypeIdx: 0)
102 .widenVectorEltsToVectorMinSize(TypeIdx: 0, VectorSize: 64)
103 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
104 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
105 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
106 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64);
107
108 getActionDefinitionsBuilder(Opcode: G_PHI)
109 .legalFor(Types: {p0, s16, s32, s64})
110 .legalFor(Types: PackedVectorAllTypeList)
111 .widenScalarToNextPow2(TypeIdx: 0)
112 .clampScalar(TypeIdx: 0, MinTy: s16, MaxTy: s64)
113 // Maximum: sN * k = 128
114 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 16)
115 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 8)
116 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: 4)
117 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
118 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2);
119
120 getActionDefinitionsBuilder(Opcode: G_BSWAP)
121 .legalFor(Types: {s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
122 .widenScalarOrEltToNextPow2(TypeIdx: 0, MinSize: 16)
123 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
124 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
125 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
126 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
127 .moreElementsToNextPow2(TypeIdx: 0);
128
129 getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
130 .legalFor(Types: {s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
131 .widenScalarToNextPow2(TypeIdx: 0)
132 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
133 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 16)
134 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 8)
135 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
136 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
137 .minScalarOrEltIf(
138 Predicate: [=](const LegalityQuery &Query) {
139 return Query.Types[0].getNumElements() <= 2;
140 },
141 TypeIdx: 0, Ty: s32)
142 .minScalarOrEltIf(
143 Predicate: [=](const LegalityQuery &Query) {
144 return Query.Types[0].getNumElements() <= 4;
145 },
146 TypeIdx: 0, Ty: s16)
147 .minScalarOrEltIf(
148 Predicate: [=](const LegalityQuery &Query) {
149 return Query.Types[0].getNumElements() <= 16;
150 },
151 TypeIdx: 0, Ty: s8)
152 .moreElementsToNextPow2(TypeIdx: 0);
153
154 getActionDefinitionsBuilder(Opcodes: {G_SHL, G_ASHR, G_LSHR})
155 .customIf(Predicate: [=](const LegalityQuery &Query) {
156 const auto &SrcTy = Query.Types[0];
157 const auto &AmtTy = Query.Types[1];
158 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
159 AmtTy.getSizeInBits() == 32;
160 })
161 .legalFor(Types: {
162 {s32, s32},
163 {s32, s64},
164 {s64, s64},
165 {v8s8, v8s8},
166 {v16s8, v16s8},
167 {v4s16, v4s16},
168 {v8s16, v8s16},
169 {v2s32, v2s32},
170 {v4s32, v4s32},
171 {v2s64, v2s64},
172 })
173 .widenScalarToNextPow2(TypeIdx: 0)
174 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
175 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
176 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
177 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
178 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
179 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
180 .moreElementsToNextPow2(TypeIdx: 0)
181 .minScalarSameAs(TypeIdx: 1, LargeTypeIdx: 0);
182
183 getActionDefinitionsBuilder(Opcode: G_PTR_ADD)
184 .legalFor(Types: {{p0, s64}, {v2p0, v2s64}})
185 .clampScalarOrElt(TypeIdx: 1, MinTy: s64, MaxTy: s64)
186 .clampNumElements(TypeIdx: 0, MinTy: v2p0, MaxTy: v2p0);
187
188 getActionDefinitionsBuilder(Opcode: G_PTRMASK).legalFor(Types: {{p0, s64}});
189
190 getActionDefinitionsBuilder(Opcodes: {G_SDIV, G_UDIV})
191 .legalFor(Types: {s32, s64})
192 .libcallFor(Types: {s128})
193 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
194 .widenScalarToNextPow2(TypeIdx: 0)
195 .scalarize(TypeIdx: 0);
196
197 getActionDefinitionsBuilder(Opcodes: {G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
198 .lowerFor(Types: {s8, s16, s32, s64, v2s64, v4s32, v2s32})
199 .widenScalarOrEltToNextPow2(TypeIdx: 0)
200 .clampScalarOrElt(TypeIdx: 0, MinTy: s32, MaxTy: s64)
201 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
202 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
203 .moreElementsToNextPow2(TypeIdx: 0);
204
205
206 getActionDefinitionsBuilder(Opcodes: {G_SMULO, G_UMULO})
207 .widenScalarToNextPow2(TypeIdx: 0, /*Min = */ MinSize: 32)
208 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
209 .lower();
210
211 getActionDefinitionsBuilder(Opcodes: {G_SMULH, G_UMULH})
212 .legalFor(Types: {s64, v8s16, v16s8, v4s32})
213 .lower();
214
215 auto &MinMaxActions = getActionDefinitionsBuilder(
216 Opcodes: {G_SMIN, G_SMAX, G_UMIN, G_UMAX});
217 if (HasCSSC)
218 MinMaxActions
219 .legalFor(Types: {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
220 // Making clamping conditional on CSSC extension as without legal types we
221 // lower to CMP which can fold one of the two sxtb's we'd otherwise need
222 // if we detect a type smaller than 32-bit.
223 .minScalar(TypeIdx: 0, Ty: s32);
224 else
225 MinMaxActions
226 .legalFor(Types: {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32});
227 MinMaxActions
228 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
229 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
230 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
231 // FIXME: This sholdn't be needed as v2s64 types are going to
232 // be expanded anyway, but G_ICMP doesn't support splitting vectors yet
233 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
234 .lower();
235
236 getActionDefinitionsBuilder(
237 Opcodes: {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
238 .legalFor(Types: {{s32, s32}, {s64, s32}})
239 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
240 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
241 .widenScalarToNextPow2(TypeIdx: 0);
242
243 getActionDefinitionsBuilder(Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
244 G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM,
245 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR,
246 G_FRINT, G_FNEARBYINT, G_INTRINSIC_TRUNC,
247 G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
248 .legalFor(Types: {MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
249 .legalIf(Predicate: [=](const LegalityQuery &Query) {
250 const auto &Ty = Query.Types[0];
251 return (Ty == v8s16 || Ty == v4s16) && HasFP16;
252 })
253 .libcallFor(Types: {s128})
254 .minScalarOrElt(TypeIdx: 0, Ty: MinFPScalar)
255 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
256 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
257 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
258 .moreElementsToNextPow2(TypeIdx: 0);
259
260 getActionDefinitionsBuilder(Opcode: G_FREM)
261 .libcallFor(Types: {s32, s64})
262 .minScalar(TypeIdx: 0, Ty: s32)
263 .scalarize(TypeIdx: 0);
264
265 getActionDefinitionsBuilder(Opcodes: {G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT})
266 .legalFor(Types: {{s64, MinFPScalar}, {s64, s32}, {s64, s64}})
267 .libcallFor(Types: {{s64, s128}})
268 .minScalarOrElt(TypeIdx: 1, Ty: MinFPScalar);
269
270 getActionDefinitionsBuilder(
271 Opcodes: {G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10, G_FTAN, G_FEXP,
272 G_FEXP2, G_FEXP10, G_FACOS, G_FASIN, G_FATAN, G_FCOSH, G_FSINH, G_FTANH})
273 // We need a call for these, so we always need to scalarize.
274 .scalarize(TypeIdx: 0)
275 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
276 .minScalar(TypeIdx: 0, Ty: s32)
277 .libcallFor(Types: {s32, s64});
278 getActionDefinitionsBuilder(Opcode: G_FPOWI)
279 .scalarize(TypeIdx: 0)
280 .minScalar(TypeIdx: 0, Ty: s32)
281 .libcallFor(Types: {{s32, s32}, {s64, s32}});
282
283 getActionDefinitionsBuilder(Opcode: G_INSERT)
284 .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s32, s64, p0}),
285 P1: typeInSet(TypeIdx: 1, TypesInit: {s8, s16, s32}), args: smallerThan(TypeIdx0: 1, TypeIdx1: 0)))
286 .widenScalarToNextPow2(TypeIdx: 0)
287 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
288 .widenScalarToNextPow2(TypeIdx: 1)
289 .minScalar(TypeIdx: 1, Ty: s8)
290 .maxScalarIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {s32}), TypeIdx: 1, Ty: s16)
291 .maxScalarIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {s64, p0}), TypeIdx: 1, Ty: s32);
292
293 getActionDefinitionsBuilder(Opcode: G_EXTRACT)
294 .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s16, s32, s64, p0}),
295 P1: typeInSet(TypeIdx: 1, TypesInit: {s32, s64, s128, p0}), args: smallerThan(TypeIdx0: 0, TypeIdx1: 1)))
296 .widenScalarToNextPow2(TypeIdx: 1)
297 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s128)
298 .widenScalarToNextPow2(TypeIdx: 0)
299 .minScalar(TypeIdx: 0, Ty: s16)
300 .maxScalarIf(Predicate: typeInSet(TypeIdx: 1, TypesInit: {s32}), TypeIdx: 0, Ty: s16)
301 .maxScalarIf(Predicate: typeInSet(TypeIdx: 1, TypesInit: {s64, p0}), TypeIdx: 0, Ty: s32)
302 .maxScalarIf(Predicate: typeInSet(TypeIdx: 1, TypesInit: {s128}), TypeIdx: 0, Ty: s64);
303
304
305 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
306 auto &Actions = getActionDefinitionsBuilder(Opcode: Op);
307
308 if (Op == G_SEXTLOAD)
309 Actions.lowerIf(Predicate: atomicOrderingAtLeastOrStrongerThan(MMOIdx: 0, Ordering: AtomicOrdering::Unordered));
310
311 // Atomics have zero extending behavior.
312 Actions
313 .legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8},
314 {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 8},
315 {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 8},
316 {.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 2},
317 {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 2},
318 {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 4},
319 {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 8},
320 {.Type0: p0, .Type1: p0, .MemTy: s64, .Align: 8},
321 {.Type0: v2s32, .Type1: p0, .MemTy: s64, .Align: 8}})
322 .widenScalarToNextPow2(TypeIdx: 0)
323 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
324 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
325 // how to do that yet.
326 .unsupportedIfMemSizeNotPow2()
327 // Lower anything left over into G_*EXT and G_LOAD
328 .lower();
329 }
330
331 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
332 const LLT &ValTy = Query.Types[0];
333 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
334 };
335
336 auto &LoadActions = getActionDefinitionsBuilder(Opcode: G_LOAD);
337 auto &StoreActions = getActionDefinitionsBuilder(Opcode: G_STORE);
338
339 if (ST.hasSVE()) {
340 LoadActions.legalForTypesWithMemDesc(TypesAndMemDesc: {
341 // 128 bit base sizes
342 {.Type0: nxv16s8, .Type1: p0, .MemTy: nxv16s8, .Align: 8},
343 {.Type0: nxv8s16, .Type1: p0, .MemTy: nxv8s16, .Align: 8},
344 {.Type0: nxv4s32, .Type1: p0, .MemTy: nxv4s32, .Align: 8},
345 {.Type0: nxv2s64, .Type1: p0, .MemTy: nxv2s64, .Align: 8},
346 });
347
348 // TODO: Add nxv2p0. Consider bitcastIf.
349 // See #92130
350 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
351 StoreActions.legalForTypesWithMemDesc(TypesAndMemDesc: {
352 // 128 bit base sizes
353 {.Type0: nxv16s8, .Type1: p0, .MemTy: nxv16s8, .Align: 8},
354 {.Type0: nxv8s16, .Type1: p0, .MemTy: nxv8s16, .Align: 8},
355 {.Type0: nxv4s32, .Type1: p0, .MemTy: nxv4s32, .Align: 8},
356 {.Type0: nxv2s64, .Type1: p0, .MemTy: nxv2s64, .Align: 8},
357 });
358 }
359
360 LoadActions
361 .customIf(Predicate: [=](const LegalityQuery &Query) {
362 return HasRCPC3 && Query.Types[0] == s128 &&
363 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
364 })
365 .customIf(Predicate: [=](const LegalityQuery &Query) {
366 return Query.Types[0] == s128 &&
367 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
368 })
369 .legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s8, .Align: 8},
370 {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: 8},
371 {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 8},
372 {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 8},
373 {.Type0: p0, .Type1: p0, .MemTy: s64, .Align: 8},
374 {.Type0: s128, .Type1: p0, .MemTy: s128, .Align: 8},
375 {.Type0: v8s8, .Type1: p0, .MemTy: s64, .Align: 8},
376 {.Type0: v16s8, .Type1: p0, .MemTy: s128, .Align: 8},
377 {.Type0: v4s16, .Type1: p0, .MemTy: s64, .Align: 8},
378 {.Type0: v8s16, .Type1: p0, .MemTy: s128, .Align: 8},
379 {.Type0: v2s32, .Type1: p0, .MemTy: s64, .Align: 8},
380 {.Type0: v4s32, .Type1: p0, .MemTy: s128, .Align: 8},
381 {.Type0: v2s64, .Type1: p0, .MemTy: s128, .Align: 8}})
382 // These extends are also legal
383 .legalForTypesWithMemDesc(
384 TypesAndMemDesc: {{.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8}, {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 8}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 8}})
385 .widenScalarToNextPow2(TypeIdx: 0, /* MinSize = */ 8)
386 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 16)
387 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 8)
388 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: 4)
389 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
390 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2)
391 .lowerIfMemSizeNotByteSizePow2()
392 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64)
393 .narrowScalarIf(
394 Predicate: [=](const LegalityQuery &Query) {
395 // Clamp extending load results to 32-bits.
396 return Query.Types[0].isScalar() &&
397 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
398 Query.Types[0].getSizeInBits() > 32;
399 },
400 Mutation: changeTo(TypeIdx: 0, Ty: s32))
401 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
402 .bitcastIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {v4s8}),
403 Mutation: [=](const LegalityQuery &Query) {
404 const LLT VecTy = Query.Types[0];
405 return std::pair(0, LLT::scalar(SizeInBits: VecTy.getSizeInBits()));
406 })
407 .customIf(Predicate: IsPtrVecPred)
408 .scalarizeIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {v2s16, v2s8}), TypeIdx: 0);
409
410 StoreActions
411 .customIf(Predicate: [=](const LegalityQuery &Query) {
412 return HasRCPC3 && Query.Types[0] == s128 &&
413 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
414 })
415 .customIf(Predicate: [=](const LegalityQuery &Query) {
416 return Query.Types[0] == s128 &&
417 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
418 })
419 .legalForTypesWithMemDesc(
420 TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s8, .Align: 8}, {.Type0: s16, .Type1: p0, .MemTy: s8, .Align: 8}, // truncstorei8 from s16
421 {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8}, // truncstorei8 from s32
422 {.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 8}, // truncstorei8 from s64
423 {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: 8}, {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 8}, // truncstorei16 from s32
424 {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 8}, // truncstorei16 from s64
425 {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8}, {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 8}, {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 8},
426 {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 8}, // truncstorei32 from s64
427 {.Type0: p0, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: s128, .Type1: p0, .MemTy: s128, .Align: 8}, {.Type0: v16s8, .Type1: p0, .MemTy: s128, .Align: 8},
428 {.Type0: v8s8, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: v4s16, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: v8s16, .Type1: p0, .MemTy: s128, .Align: 8},
429 {.Type0: v2s32, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: v4s32, .Type1: p0, .MemTy: s128, .Align: 8}, {.Type0: v2s64, .Type1: p0, .MemTy: s128, .Align: 8}})
430 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64)
431 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
432 return Query.Types[0].isScalar() &&
433 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
434 })
435 // Maximum: sN * k = 128
436 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 16)
437 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 8)
438 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: 4)
439 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
440 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2)
441 .lowerIfMemSizeNotPow2()
442 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
443 .bitcastIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {v4s8}),
444 Mutation: [=](const LegalityQuery &Query) {
445 const LLT VecTy = Query.Types[0];
446 return std::pair(0, LLT::scalar(SizeInBits: VecTy.getSizeInBits()));
447 })
448 .customIf(Predicate: IsPtrVecPred)
449 .scalarizeIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {v2s16, v2s8}), TypeIdx: 0);
450
451 getActionDefinitionsBuilder(Opcode: G_INDEXED_STORE)
452 // Idx 0 == Ptr, Idx 1 == Val
453 // TODO: we can implement legalizations but as of now these are
454 // generated in a very specific way.
455 .legalForTypesWithMemDesc(TypesAndMemDesc: {
456 {.Type0: p0, .Type1: s8, .MemTy: s8, .Align: 8},
457 {.Type0: p0, .Type1: s16, .MemTy: s16, .Align: 8},
458 {.Type0: p0, .Type1: s32, .MemTy: s8, .Align: 8},
459 {.Type0: p0, .Type1: s32, .MemTy: s16, .Align: 8},
460 {.Type0: p0, .Type1: s32, .MemTy: s32, .Align: 8},
461 {.Type0: p0, .Type1: s64, .MemTy: s64, .Align: 8},
462 {.Type0: p0, .Type1: p0, .MemTy: p0, .Align: 8},
463 {.Type0: p0, .Type1: v8s8, .MemTy: v8s8, .Align: 8},
464 {.Type0: p0, .Type1: v16s8, .MemTy: v16s8, .Align: 8},
465 {.Type0: p0, .Type1: v4s16, .MemTy: v4s16, .Align: 8},
466 {.Type0: p0, .Type1: v8s16, .MemTy: v8s16, .Align: 8},
467 {.Type0: p0, .Type1: v2s32, .MemTy: v2s32, .Align: 8},
468 {.Type0: p0, .Type1: v4s32, .MemTy: v4s32, .Align: 8},
469 {.Type0: p0, .Type1: v2s64, .MemTy: v2s64, .Align: 8},
470 {.Type0: p0, .Type1: v2p0, .MemTy: v2p0, .Align: 8},
471 {.Type0: p0, .Type1: s128, .MemTy: s128, .Align: 8},
472 })
473 .unsupported();
474
475 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
476 LLT LdTy = Query.Types[0];
477 LLT PtrTy = Query.Types[1];
478 if (!llvm::is_contained(Range: PackedVectorAllTypesVec, Element: LdTy) &&
479 !llvm::is_contained(Range: ScalarAndPtrTypesVec, Element: LdTy) && LdTy != s128)
480 return false;
481 if (PtrTy != p0)
482 return false;
483 return true;
484 };
485 getActionDefinitionsBuilder(Opcode: G_INDEXED_LOAD)
486 .unsupportedIf(
487 Predicate: atomicOrderingAtLeastOrStrongerThan(MMOIdx: 0, Ordering: AtomicOrdering::Unordered))
488 .legalIf(Predicate: IndexedLoadBasicPred)
489 .unsupported();
490 getActionDefinitionsBuilder(Opcodes: {G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
491 .unsupportedIf(
492 Predicate: atomicOrderingAtLeastOrStrongerThan(MMOIdx: 0, Ordering: AtomicOrdering::Unordered))
493 .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s16, s32, s64}),
494 P1: LegalityPredicate([=](const LegalityQuery &Q) {
495 LLT LdTy = Q.Types[0];
496 LLT PtrTy = Q.Types[1];
497 LLT MemTy = Q.MMODescrs[0].MemoryTy;
498 if (PtrTy != p0)
499 return false;
500 if (LdTy == s16)
501 return MemTy == s8;
502 if (LdTy == s32)
503 return MemTy == s8 || MemTy == s16;
504 if (LdTy == s64)
505 return MemTy == s8 || MemTy == s16 || MemTy == s32;
506 return false;
507 })))
508 .unsupported();
509
510 // Constants
511 getActionDefinitionsBuilder(Opcode: G_CONSTANT)
512 .legalFor(Types: {p0, s8, s16, s32, s64})
513 .widenScalarToNextPow2(TypeIdx: 0)
514 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64);
515 getActionDefinitionsBuilder(Opcode: G_FCONSTANT)
516 .legalIf(Predicate: [=](const LegalityQuery &Query) {
517 const auto &Ty = Query.Types[0];
518 if (HasFP16 && Ty == s16)
519 return true;
520 return Ty == s32 || Ty == s64 || Ty == s128;
521 })
522 .clampScalar(TypeIdx: 0, MinTy: MinFPScalar, MaxTy: s128);
523
524 // FIXME: fix moreElementsToNextPow2
525 getActionDefinitionsBuilder(Opcode: G_ICMP)
526 .legalFor(Types: {{s32, s32}, {s32, s64}, {s32, p0}})
527 .widenScalarOrEltToNextPow2(TypeIdx: 1)
528 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
529 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32)
530 .minScalarEltSameAsIf(
531 Predicate: [=](const LegalityQuery &Query) {
532 const LLT &Ty = Query.Types[0];
533 const LLT &SrcTy = Query.Types[1];
534 return Ty.isVector() && !SrcTy.isPointerVector() &&
535 Ty.getElementType() != SrcTy.getElementType();
536 },
537 TypeIdx: 0, LargeTypeIdx: 1)
538 .minScalarOrEltIf(
539 Predicate: [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
540 TypeIdx: 1, Ty: s32)
541 .minScalarOrEltIf(
542 Predicate: [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, TypeIdx: 0,
543 Ty: s64)
544 .moreElementsToNextPow2(TypeIdx: 1)
545 .clampNumElements(TypeIdx: 1, MinTy: v8s8, MaxTy: v16s8)
546 .clampNumElements(TypeIdx: 1, MinTy: v4s16, MaxTy: v8s16)
547 .clampNumElements(TypeIdx: 1, MinTy: v2s32, MaxTy: v4s32)
548 .clampNumElements(TypeIdx: 1, MinTy: v2s64, MaxTy: v2s64)
549 .customIf(Predicate: isVector(TypeIdx: 0));
550
551 getActionDefinitionsBuilder(Opcode: G_FCMP)
552 .legalFor(Types: {{s32, MinFPScalar},
553 {s32, s32},
554 {s32, s64},
555 {v4s32, v4s32},
556 {v2s32, v2s32},
557 {v2s64, v2s64}})
558 .legalIf(Predicate: [=](const LegalityQuery &Query) {
559 const auto &Ty = Query.Types[1];
560 return (Ty == v8s16 || Ty == v4s16) && Ty == Query.Types[0] && HasFP16;
561 })
562 .widenScalarOrEltToNextPow2(TypeIdx: 1)
563 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32)
564 .clampScalarOrElt(TypeIdx: 1, MinTy: MinFPScalar, MaxTy: s64)
565 .minScalarEltSameAsIf(
566 Predicate: [=](const LegalityQuery &Query) {
567 const LLT &Ty = Query.Types[0];
568 const LLT &SrcTy = Query.Types[1];
569 return Ty.isVector() && !SrcTy.isPointerVector() &&
570 Ty.getElementType() != SrcTy.getElementType();
571 },
572 TypeIdx: 0, LargeTypeIdx: 1)
573 .clampNumElements(TypeIdx: 1, MinTy: v4s16, MaxTy: v8s16)
574 .clampNumElements(TypeIdx: 1, MinTy: v2s32, MaxTy: v4s32)
575 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
576 .moreElementsToNextPow2(TypeIdx: 1);
577
578 // Extensions
579 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
580 unsigned DstSize = Query.Types[0].getSizeInBits();
581
582 // Handle legal vectors using legalFor
583 if (Query.Types[0].isVector())
584 return false;
585
586 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(Value: DstSize))
587 return false; // Extending to a scalar s128 needs narrowing.
588
589 const LLT &SrcTy = Query.Types[1];
590
591 // Make sure we fit in a register otherwise. Don't bother checking that
592 // the source type is below 128 bits. We shouldn't be allowing anything
593 // through which is wider than the destination in the first place.
594 unsigned SrcSize = SrcTy.getSizeInBits();
595 if (SrcSize < 8 || !isPowerOf2_32(Value: SrcSize))
596 return false;
597
598 return true;
599 };
600 getActionDefinitionsBuilder(Opcodes: {G_ZEXT, G_SEXT, G_ANYEXT})
601 .legalIf(Predicate: ExtLegalFunc)
602 .legalFor(Types: {{v2s64, v2s32}, {v4s32, v4s16}, {v8s16, v8s8}})
603 .clampScalar(TypeIdx: 0, MinTy: s64, MaxTy: s64) // Just for s128, others are handled above.
604 .moreElementsToNextPow2(TypeIdx: 0)
605 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 8)
606 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 4)
607 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 2)
608 // Tries to convert a large EXTEND into two smaller EXTENDs
609 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
610 return (Query.Types[0].getScalarSizeInBits() >
611 Query.Types[1].getScalarSizeInBits() * 2) &&
612 Query.Types[0].isVector() &&
613 (Query.Types[1].getScalarSizeInBits() == 8 ||
614 Query.Types[1].getScalarSizeInBits() == 16);
615 })
616 .clampMinNumElements(TypeIdx: 1, EltTy: s8, MinElements: 8)
617 .clampMinNumElements(TypeIdx: 1, EltTy: s16, MinElements: 4);
618
619 getActionDefinitionsBuilder(Opcode: G_TRUNC)
620 .legalFor(Types: {{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
621 .moreElementsToNextPow2(TypeIdx: 0)
622 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 8)
623 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 4)
624 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: 2)
625 .minScalarOrEltIf(
626 Predicate: [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
627 TypeIdx: 0, Ty: s8)
628 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
629 LLT DstTy = Query.Types[0];
630 LLT SrcTy = Query.Types[1];
631 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
632 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
633 })
634 .clampMinNumElements(TypeIdx: 0, EltTy: s8, MinElements: 8)
635 .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 4)
636 .alwaysLegal();
637
638 getActionDefinitionsBuilder(Opcode: G_SEXT_INREG)
639 .legalFor(Types: {s32, s64})
640 .legalFor(Types: PackedVectorAllTypeList)
641 .maxScalar(TypeIdx: 0, Ty: s64)
642 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
643 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
644 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
645 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
646 .lower();
647
648 // FP conversions
649 getActionDefinitionsBuilder(Opcode: G_FPTRUNC)
650 .legalFor(
651 Types: {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
652 .libcallFor(Types: {{s16, s128}, {s32, s128}, {s64, s128}})
653 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v4s16)
654 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v2s32)
655 .scalarize(TypeIdx: 0);
656
657 getActionDefinitionsBuilder(Opcode: G_FPEXT)
658 .legalFor(
659 Types: {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
660 .libcallFor(Types: {{s128, s64}, {s128, s32}, {s128, s16}})
661 .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: v4s32)
662 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
663 .scalarize(TypeIdx: 0);
664
665 // Conversions
666 getActionDefinitionsBuilder(Opcodes: {G_FPTOSI, G_FPTOUI})
667 .legalFor(Types: {{s32, s32},
668 {s64, s32},
669 {s32, s64},
670 {s64, s64},
671 {v2s64, v2s64},
672 {v4s32, v4s32},
673 {v2s32, v2s32}})
674 .legalIf(Predicate: [=](const LegalityQuery &Query) {
675 return HasFP16 &&
676 (Query.Types[1] == s16 || Query.Types[1] == v4s16 ||
677 Query.Types[1] == v8s16) &&
678 (Query.Types[0] == s32 || Query.Types[0] == s64 ||
679 Query.Types[0] == v4s16 || Query.Types[0] == v8s16);
680 })
681 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
682 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1)
683 // The range of a fp16 value fits into an i17, so we can lower the width
684 // to i64.
685 .narrowScalarIf(
686 Predicate: [=](const LegalityQuery &Query) {
687 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
688 },
689 Mutation: changeTo(TypeIdx: 0, Ty: s64))
690 .moreElementsToNextPow2(TypeIdx: 0)
691 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 0)
692 .minScalar(TypeIdx: 0, Ty: s32)
693 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 1, /*MinSize=*/HasFP16 ? 16 : 32)
694 .widenScalarIf(
695 Predicate: [=](const LegalityQuery &Query) {
696 return Query.Types[0].getScalarSizeInBits() <= 64 &&
697 Query.Types[0].getScalarSizeInBits() >
698 Query.Types[1].getScalarSizeInBits();
699 },
700 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 1, FromTypeIdx: 0))
701 .widenScalarIf(
702 Predicate: [=](const LegalityQuery &Query) {
703 return Query.Types[1].getScalarSizeInBits() <= 64 &&
704 Query.Types[0].getScalarSizeInBits() <
705 Query.Types[1].getScalarSizeInBits();
706 },
707 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 0, FromTypeIdx: 1))
708 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
709 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
710 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
711 .libcallFor(
712 Types: {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
713
714 getActionDefinitionsBuilder(Opcodes: {G_SITOFP, G_UITOFP})
715 .legalFor(Types: {{s32, s32},
716 {s64, s32},
717 {s32, s64},
718 {s64, s64},
719 {v2s64, v2s64},
720 {v4s32, v4s32},
721 {v2s32, v2s32}})
722 .legalIf(Predicate: [=](const LegalityQuery &Query) {
723 return HasFP16 &&
724 (Query.Types[0] == s16 || Query.Types[0] == v4s16 ||
725 Query.Types[0] == v8s16) &&
726 (Query.Types[1] == s32 || Query.Types[1] == s64 ||
727 Query.Types[1] == v4s16 || Query.Types[1] == v8s16);
728 })
729 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1)
730 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
731 .moreElementsToNextPow2(TypeIdx: 1)
732 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 1)
733 .minScalar(TypeIdx: 1, Ty: s32)
734 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 0, /*MinSize=*/HasFP16 ? 16 : 32)
735 .widenScalarIf(
736 Predicate: [=](const LegalityQuery &Query) {
737 return Query.Types[1].getScalarSizeInBits() <= 64 &&
738 Query.Types[0].getScalarSizeInBits() <
739 Query.Types[1].getScalarSizeInBits();
740 },
741 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 0, FromTypeIdx: 1))
742 .widenScalarIf(
743 Predicate: [=](const LegalityQuery &Query) {
744 return Query.Types[0].getScalarSizeInBits() <= 64 &&
745 Query.Types[0].getScalarSizeInBits() >
746 Query.Types[1].getScalarSizeInBits();
747 },
748 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 1, FromTypeIdx: 0))
749 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
750 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
751 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
752 .libcallFor(Types: {{s16, s128},
753 {s32, s128},
754 {s64, s128},
755 {s128, s128},
756 {s128, s32},
757 {s128, s64}});
758
759 // Control-flow
760 getActionDefinitionsBuilder(Opcode: G_BRCOND)
761 .legalFor(Types: {s32})
762 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32);
763 getActionDefinitionsBuilder(Opcode: G_BRINDIRECT).legalFor(Types: {p0});
764
765 getActionDefinitionsBuilder(Opcode: G_SELECT)
766 .legalFor(Types: {{s32, s32}, {s64, s32}, {p0, s32}})
767 .widenScalarToNextPow2(TypeIdx: 0)
768 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
769 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32)
770 .minScalarEltSameAsIf(Predicate: all(P0: isVector(TypeIdx: 0), P1: isVector(TypeIdx: 1)), TypeIdx: 1, LargeTypeIdx: 0)
771 .lowerIf(Predicate: isVector(TypeIdx: 0));
772
773 // Pointer-handling
774 getActionDefinitionsBuilder(Opcode: G_FRAME_INDEX).legalFor(Types: {p0});
775
776 if (TM.getCodeModel() == CodeModel::Small)
777 getActionDefinitionsBuilder(Opcode: G_GLOBAL_VALUE).custom();
778 else
779 getActionDefinitionsBuilder(Opcode: G_GLOBAL_VALUE).legalFor(Types: {p0});
780
781 getActionDefinitionsBuilder(Opcode: G_PTRAUTH_GLOBAL_VALUE)
782 .legalIf(Predicate: all(P0: typeIs(TypeIdx: 0, TypesInit: p0), P1: typeIs(TypeIdx: 1, TypesInit: p0)));
783
784 getActionDefinitionsBuilder(Opcode: G_PTRTOINT)
785 .legalFor(Types: {{s64, p0}, {v2s64, v2p0}})
786 .widenScalarToNextPow2(TypeIdx: 0, MinSize: 64)
787 .clampScalar(TypeIdx: 0, MinTy: s64, MaxTy: s64);
788
789 getActionDefinitionsBuilder(Opcode: G_INTTOPTR)
790 .unsupportedIf(Predicate: [&](const LegalityQuery &Query) {
791 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
792 })
793 .legalFor(Types: {{p0, s64}, {v2p0, v2s64}});
794
795 // Casts for 32 and 64-bit width type are just copies.
796 // Same for 128-bit width type, except they are on the FPR bank.
797 getActionDefinitionsBuilder(Opcode: G_BITCAST)
798 // Keeping 32-bit instructions legal to prevent regression in some tests
799 .legalForCartesianProduct(Types: {s32, v2s16, v4s8})
800 .legalForCartesianProduct(Types: {s64, v8s8, v4s16, v2s32})
801 .legalForCartesianProduct(Types: {s128, v16s8, v8s16, v4s32, v2s64, v2p0})
802 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
803 return Query.Types[0].isVector() != Query.Types[1].isVector();
804 })
805 .moreElementsToNextPow2(TypeIdx: 0)
806 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
807 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
808 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
809 .lower();
810
811 getActionDefinitionsBuilder(Opcode: G_VASTART).legalFor(Types: {p0});
812
813 // va_list must be a pointer, but most sized types are pretty easy to handle
814 // as the destination.
815 getActionDefinitionsBuilder(Opcode: G_VAARG)
816 .customForCartesianProduct(Types0: {s8, s16, s32, s64, p0}, Types1: {p0})
817 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64)
818 .widenScalarToNextPow2(TypeIdx: 0, /*Min*/ MinSize: 8);
819
820 getActionDefinitionsBuilder(Opcode: G_ATOMIC_CMPXCHG_WITH_SUCCESS)
821 .lowerIf(
822 Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s8, s16, s32, s64, s128}), P1: typeIs(TypeIdx: 2, TypesInit: p0)));
823
824 LegalityPredicate UseOutlineAtomics = [&ST](const LegalityQuery &Query) {
825 return ST.outlineAtomics() && !ST.hasLSE();
826 };
827
828 getActionDefinitionsBuilder(Opcode: G_ATOMIC_CMPXCHG)
829 .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s32, s64}), P1: typeIs(TypeIdx: 1, TypesInit: p0),
830 args: predNot(P: UseOutlineAtomics)))
831 .customIf(Predicate: all(P0: typeIs(TypeIdx: 0, TypesInit: s128), P1: predNot(P: UseOutlineAtomics)))
832 .customIf(Predicate: [UseOutlineAtomics](const LegalityQuery &Query) {
833 return Query.Types[0].getSizeInBits() == 128 &&
834 !UseOutlineAtomics(Query);
835 })
836 .libcallIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s8, s16, s32, s64, s128}), P1: typeIs(TypeIdx: 1, TypesInit: p0),
837 args: UseOutlineAtomics))
838 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64);
839
840 getActionDefinitionsBuilder(Opcodes: {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
841 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
842 G_ATOMICRMW_XOR})
843 .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s32, s64}), P1: typeIs(TypeIdx: 1, TypesInit: p0),
844 args: predNot(P: UseOutlineAtomics)))
845 .libcallIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s8, s16, s32, s64}), P1: typeIs(TypeIdx: 1, TypesInit: p0),
846 args: UseOutlineAtomics))
847 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64);
848
849 // Do not outline these atomics operations, as per comment in
850 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
851 getActionDefinitionsBuilder(
852 Opcodes: {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
853 .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s32, s64}), P1: typeIs(TypeIdx: 1, TypesInit: p0)))
854 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64);
855
856 getActionDefinitionsBuilder(Opcode: G_BLOCK_ADDR).legalFor(Types: {p0});
857
858 // Merge/Unmerge
859 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
860 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
861 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
862 getActionDefinitionsBuilder(Opcode: Op)
863 .widenScalarToNextPow2(TypeIdx: LitTyIdx, MinSize: 8)
864 .widenScalarToNextPow2(TypeIdx: BigTyIdx, MinSize: 32)
865 .clampScalar(TypeIdx: LitTyIdx, MinTy: s8, MaxTy: s64)
866 .clampScalar(TypeIdx: BigTyIdx, MinTy: s32, MaxTy: s128)
867 .legalIf(Predicate: [=](const LegalityQuery &Q) {
868 switch (Q.Types[BigTyIdx].getSizeInBits()) {
869 case 32:
870 case 64:
871 case 128:
872 break;
873 default:
874 return false;
875 }
876 switch (Q.Types[LitTyIdx].getSizeInBits()) {
877 case 8:
878 case 16:
879 case 32:
880 case 64:
881 return true;
882 default:
883 return false;
884 }
885 });
886 }
887
888 getActionDefinitionsBuilder(Opcode: G_EXTRACT_VECTOR_ELT)
889 .unsupportedIf(Predicate: [=](const LegalityQuery &Query) {
890 const LLT &EltTy = Query.Types[1].getElementType();
891 return Query.Types[0] != EltTy;
892 })
893 .minScalar(TypeIdx: 2, Ty: s64)
894 .customIf(Predicate: [=](const LegalityQuery &Query) {
895 const LLT &VecTy = Query.Types[1];
896 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
897 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
898 VecTy == v8s8 || VecTy == v16s8 || VecTy == v2p0;
899 })
900 .minScalarOrEltIf(
901 Predicate: [=](const LegalityQuery &Query) {
902 // We want to promote to <M x s1> to <M x s64> if that wouldn't
903 // cause the total vec size to be > 128b.
904 return Query.Types[1].getNumElements() <= 2;
905 },
906 TypeIdx: 0, Ty: s64)
907 .minScalarOrEltIf(
908 Predicate: [=](const LegalityQuery &Query) {
909 return Query.Types[1].getNumElements() <= 4;
910 },
911 TypeIdx: 0, Ty: s32)
912 .minScalarOrEltIf(
913 Predicate: [=](const LegalityQuery &Query) {
914 return Query.Types[1].getNumElements() <= 8;
915 },
916 TypeIdx: 0, Ty: s16)
917 .minScalarOrEltIf(
918 Predicate: [=](const LegalityQuery &Query) {
919 return Query.Types[1].getNumElements() <= 16;
920 },
921 TypeIdx: 0, Ty: s8)
922 .minScalarOrElt(TypeIdx: 0, Ty: s8) // Worst case, we need at least s8.
923 .moreElementsToNextPow2(TypeIdx: 1)
924 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
925 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
926 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
927 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 16)
928 .clampMaxNumElements(TypeIdx: 1, EltTy: p0, MaxElements: 2);
929
930 getActionDefinitionsBuilder(Opcode: G_INSERT_VECTOR_ELT)
931 .legalIf(
932 Predicate: typeInSet(TypeIdx: 0, TypesInit: {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64, v2p0}))
933 .moreElementsToNextPow2(TypeIdx: 0)
934 .widenVectorEltsToVectorMinSize(TypeIdx: 0, VectorSize: 64)
935 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
936 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
937 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
938 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
939 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2);
940
941 getActionDefinitionsBuilder(Opcode: G_BUILD_VECTOR)
942 .legalFor(Types: {{v8s8, s8},
943 {v16s8, s8},
944 {v4s16, s16},
945 {v8s16, s16},
946 {v2s32, s32},
947 {v4s32, s32},
948 {v2p0, p0},
949 {v2s64, s64}})
950 .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: v4s32)
951 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
952 .minScalarOrElt(TypeIdx: 0, Ty: s8)
953 .widenVectorEltsToVectorMinSize(TypeIdx: 0, VectorSize: 64)
954 .minScalarSameAs(TypeIdx: 1, LargeTypeIdx: 0);
955
956 getActionDefinitionsBuilder(Opcode: G_BUILD_VECTOR_TRUNC).lower();
957
958 getActionDefinitionsBuilder(Opcode: G_CTLZ)
959 .legalForCartesianProduct(
960 Types: {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
961 .scalarize(TypeIdx: 1)
962 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 32)
963 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
964 .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1);
965 getActionDefinitionsBuilder(Opcode: G_CTLZ_ZERO_UNDEF).lower();
966
967 // TODO: Custom lowering for v2s32, v4s32, v2s64.
968 getActionDefinitionsBuilder(Opcode: G_BITREVERSE)
969 .legalFor(Types: {s32, s64, v8s8, v16s8})
970 .widenScalarToNextPow2(TypeIdx: 0, /*Min = */ MinSize: 32)
971 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64);
972
973 getActionDefinitionsBuilder(Opcode: G_CTTZ_ZERO_UNDEF).lower();
974
975 getActionDefinitionsBuilder(Opcode: G_CTTZ)
976 .lowerIf(Predicate: isVector(TypeIdx: 0))
977 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 32)
978 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
979 .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1)
980 .legalIf(Predicate: [=](const LegalityQuery &Query) {
981 return (HasCSSC && typeInSet(TypeIdx: 0, TypesInit: {s32, s64})(Query));
982 })
983 .customIf(Predicate: [=](const LegalityQuery &Query) {
984 return (!HasCSSC && typeInSet(TypeIdx: 0, TypesInit: {s32, s64})(Query));
985 });
986
987 getActionDefinitionsBuilder(Opcode: G_SHUFFLE_VECTOR)
988 .legalIf(Predicate: [=](const LegalityQuery &Query) {
989 const LLT &DstTy = Query.Types[0];
990 const LLT &SrcTy = Query.Types[1];
991 // For now just support the TBL2 variant which needs the source vectors
992 // to be the same size as the dest.
993 if (DstTy != SrcTy)
994 return false;
995 return llvm::is_contained(
996 Set: {v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, Element: DstTy);
997 })
998 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
999 // just want those lowered into G_BUILD_VECTOR
1000 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
1001 return !Query.Types[1].isVector();
1002 })
1003 .moreElementsIf(
1004 Predicate: [](const LegalityQuery &Query) {
1005 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1006 Query.Types[0].getNumElements() >
1007 Query.Types[1].getNumElements();
1008 },
1009 Mutation: changeTo(TypeIdx: 1, FromTypeIdx: 0))
1010 .moreElementsToNextPow2(TypeIdx: 0)
1011 .moreElementsIf(
1012 Predicate: [](const LegalityQuery &Query) {
1013 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
1014 Query.Types[0].getNumElements() <
1015 Query.Types[1].getNumElements();
1016 },
1017 Mutation: changeTo(TypeIdx: 0, FromTypeIdx: 1))
1018 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 0, MinSize: 8)
1019 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
1020 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
1021 .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: v4s32)
1022 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64);
1023
1024 getActionDefinitionsBuilder(Opcode: G_CONCAT_VECTORS)
1025 .legalFor(Types: {{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}})
1026 .bitcastIf(
1027 Predicate: [=](const LegalityQuery &Query) {
1028 return Query.Types[0].getSizeInBits() <= 128 &&
1029 Query.Types[1].getSizeInBits() <= 64;
1030 },
1031 Mutation: [=](const LegalityQuery &Query) {
1032 const LLT DstTy = Query.Types[0];
1033 const LLT SrcTy = Query.Types[1];
1034 return std::pair(
1035 0, DstTy.changeElementSize(NewEltSize: SrcTy.getSizeInBits())
1036 .changeElementCount(
1037 EC: DstTy.getElementCount().divideCoefficientBy(
1038 RHS: SrcTy.getNumElements())));
1039 });
1040
1041 getActionDefinitionsBuilder(Opcode: G_JUMP_TABLE).legalFor(Types: {p0});
1042
1043 getActionDefinitionsBuilder(Opcode: G_BRJT).legalFor(Types: {{p0, s64}});
1044
1045 getActionDefinitionsBuilder(Opcode: G_DYN_STACKALLOC).custom();
1046
1047 getActionDefinitionsBuilder(Opcodes: {G_STACKSAVE, G_STACKRESTORE}).lower();
1048
1049 if (ST.hasMOPS()) {
1050 // G_BZERO is not supported. Currently it is only emitted by
1051 // PreLegalizerCombiner for G_MEMSET with zero constant.
1052 getActionDefinitionsBuilder(Opcode: G_BZERO).unsupported();
1053
1054 getActionDefinitionsBuilder(Opcode: G_MEMSET)
1055 .legalForCartesianProduct(Types0: {p0}, Types1: {s64}, Types2: {s64})
1056 .customForCartesianProduct(Types0: {p0}, Types1: {s8}, Types2: {s64})
1057 .immIdx(ImmIdx: 0); // Inform verifier imm idx 0 is handled.
1058
1059 getActionDefinitionsBuilder(Opcodes: {G_MEMCPY, G_MEMMOVE})
1060 .legalForCartesianProduct(Types0: {p0}, Types1: {p0}, Types2: {s64})
1061 .immIdx(ImmIdx: 0); // Inform verifier imm idx 0 is handled.
1062
1063 // G_MEMCPY_INLINE does not have a tailcall immediate
1064 getActionDefinitionsBuilder(Opcode: G_MEMCPY_INLINE)
1065 .legalForCartesianProduct(Types0: {p0}, Types1: {p0}, Types2: {s64});
1066
1067 } else {
1068 getActionDefinitionsBuilder(Opcodes: {G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1069 .libcall();
1070 }
1071
1072 // FIXME: Legal vector types are only legal with NEON.
1073 auto &ABSActions = getActionDefinitionsBuilder(Opcode: G_ABS);
1074 if (HasCSSC)
1075 ABSActions
1076 .legalFor(Types: {s32, s64});
1077 ABSActions.legalFor(Types: PackedVectorAllTypeList)
1078 .customIf(Predicate: [=](const LegalityQuery &Q) {
1079 // TODO: Fix suboptimal codegen for 128+ bit types.
1080 LLT SrcTy = Q.Types[0];
1081 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
1082 })
1083 .widenScalarIf(
1084 Predicate: [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
1085 Mutation: [=](const LegalityQuery &Query) { return std::make_pair(x: 0, y: v4s16); })
1086 .widenScalarIf(
1087 Predicate: [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
1088 Mutation: [=](const LegalityQuery &Query) { return std::make_pair(x: 0, y: v2s32); })
1089 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
1090 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
1091 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
1092 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
1093 .moreElementsToNextPow2(TypeIdx: 0)
1094 .lower();
1095
1096 // For fadd reductions we have pairwise operations available. We treat the
1097 // usual legal types as legal and handle the lowering to pairwise instructions
1098 // later.
1099 getActionDefinitionsBuilder(Opcode: G_VECREDUCE_FADD)
1100 .legalFor(Types: {{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1101 .legalIf(Predicate: [=](const LegalityQuery &Query) {
1102 const auto &Ty = Query.Types[1];
1103 return (Ty == v4s16 || Ty == v8s16) && HasFP16;
1104 })
1105 .minScalarOrElt(TypeIdx: 0, Ty: MinFPScalar)
1106 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1107 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1108 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1109 .lower();
1110
1111 // For fmul reductions we need to split up into individual operations. We
1112 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1113 // smaller types, followed by scalarizing what remains.
1114 getActionDefinitionsBuilder(Opcode: G_VECREDUCE_FMUL)
1115 .minScalarOrElt(TypeIdx: 0, Ty: MinFPScalar)
1116 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1117 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1118 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1119 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 2)
1120 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 4)
1121 .scalarize(TypeIdx: 1)
1122 .lower();
1123
1124 getActionDefinitionsBuilder(Opcodes: {G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1125 .scalarize(TypeIdx: 2)
1126 .lower();
1127
1128 getActionDefinitionsBuilder(Opcode: G_VECREDUCE_ADD)
1129 .legalFor(Types: {{s8, v16s8},
1130 {s8, v8s8},
1131 {s16, v8s16},
1132 {s16, v4s16},
1133 {s32, v4s32},
1134 {s32, v2s32},
1135 {s64, v2s64}})
1136 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1137 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1138 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1139 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 16)
1140 .lower();
1141
1142 getActionDefinitionsBuilder(Opcodes: {G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1143 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1144 .legalFor(Types: {{s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
1145 .legalIf(Predicate: [=](const LegalityQuery &Query) {
1146 const auto &Ty = Query.Types[1];
1147 return Query.Types[0] == s16 && (Ty == v8s16 || Ty == v4s16) && HasFP16;
1148 })
1149 .minScalarOrElt(TypeIdx: 0, Ty: MinFPScalar)
1150 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1151 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1152 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1153 .lower();
1154
1155 getActionDefinitionsBuilder(Opcode: G_VECREDUCE_MUL)
1156 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 2)
1157 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 4)
1158 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 8)
1159 .scalarize(TypeIdx: 1)
1160 .lower();
1161
1162 getActionDefinitionsBuilder(
1163 Opcodes: {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1164 .legalFor(Types: {{s8, v8s8},
1165 {s8, v16s8},
1166 {s16, v4s16},
1167 {s16, v8s16},
1168 {s32, v2s32},
1169 {s32, v4s32}})
1170 .moreElementsIf(
1171 Predicate: [=](const LegalityQuery &Query) {
1172 return Query.Types[1].isVector() &&
1173 Query.Types[1].getElementType() != s8 &&
1174 Query.Types[1].getNumElements() & 1;
1175 },
1176 Mutation: LegalizeMutations::moreElementsToNextPow2(TypeIdx: 1))
1177 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1178 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1179 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1180 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 16)
1181 .scalarize(TypeIdx: 1)
1182 .lower();
1183
1184 getActionDefinitionsBuilder(
1185 Opcodes: {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1186 // Try to break down into smaller vectors as long as they're at least 64
1187 // bits. This lets us use vector operations for some parts of the
1188 // reduction.
1189 .fewerElementsIf(
1190 Predicate: [=](const LegalityQuery &Q) {
1191 LLT SrcTy = Q.Types[1];
1192 if (SrcTy.isScalar())
1193 return false;
1194 if (!isPowerOf2_32(Value: SrcTy.getNumElements()))
1195 return false;
1196 // We can usually perform 64b vector operations.
1197 return SrcTy.getSizeInBits() > 64;
1198 },
1199 Mutation: [=](const LegalityQuery &Q) {
1200 LLT SrcTy = Q.Types[1];
1201 return std::make_pair(x: 1, y: SrcTy.divide(Factor: 2));
1202 })
1203 .scalarize(TypeIdx: 1)
1204 .lower();
1205
1206 // TODO: Update this to correct handling when adding AArch64/SVE support.
1207 getActionDefinitionsBuilder(Opcode: G_VECTOR_COMPRESS).lower();
1208
1209 getActionDefinitionsBuilder(Opcodes: {G_FSHL, G_FSHR})
1210 .customFor(Types: {{s32, s32}, {s32, s64}, {s64, s64}})
1211 .lower();
1212
1213 getActionDefinitionsBuilder(Opcode: G_ROTR)
1214 .legalFor(Types: {{s32, s64}, {s64, s64}})
1215 .customIf(Predicate: [=](const LegalityQuery &Q) {
1216 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
1217 })
1218 .lower();
1219 getActionDefinitionsBuilder(Opcode: G_ROTL).lower();
1220
1221 getActionDefinitionsBuilder(Opcodes: {G_SBFX, G_UBFX})
1222 .customFor(Types: {{s32, s32}, {s64, s64}});
1223
1224 auto always = [=](const LegalityQuery &Q) { return true; };
1225 auto &CTPOPActions = getActionDefinitionsBuilder(Opcode: G_CTPOP);
1226 if (HasCSSC)
1227 CTPOPActions
1228 .legalFor(Types: {{s32, s32},
1229 {s64, s64},
1230 {v8s8, v8s8},
1231 {v16s8, v16s8}})
1232 .customFor(Types: {{s128, s128},
1233 {v2s64, v2s64},
1234 {v2s32, v2s32},
1235 {v4s32, v4s32},
1236 {v4s16, v4s16},
1237 {v8s16, v8s16}});
1238 else
1239 CTPOPActions
1240 .legalFor(Types: {{v8s8, v8s8},
1241 {v16s8, v16s8}})
1242 .customFor(Types: {{s32, s32},
1243 {s64, s64},
1244 {s128, s128},
1245 {v2s64, v2s64},
1246 {v2s32, v2s32},
1247 {v4s32, v4s32},
1248 {v4s16, v4s16},
1249 {v8s16, v8s16}});
1250 CTPOPActions
1251 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s128)
1252 .widenScalarToNextPow2(TypeIdx: 0)
1253 .minScalarEltSameAsIf(Predicate: always, TypeIdx: 1, LargeTypeIdx: 0)
1254 .maxScalarEltSameAsIf(Predicate: always, TypeIdx: 1, SmallTypeIdx: 0);
1255
1256 getActionDefinitionsBuilder(Opcodes: {G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
1257 .legalFor(Types: {v2s64, v2s32, v4s32, v4s16, v8s16, v8s8, v16s8})
1258 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
1259 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
1260 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
1261 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
1262 .moreElementsToNextPow2(TypeIdx: 0)
1263 .lower();
1264
1265 // TODO: Libcall support for s128.
1266 // TODO: s16 should be legal with full FP16 support.
1267 getActionDefinitionsBuilder(Opcodes: {G_LROUND, G_LLROUND})
1268 .legalFor(Types: {{s64, s32}, {s64, s64}});
1269
1270 // TODO: Custom legalization for mismatched types.
1271 getActionDefinitionsBuilder(Opcode: G_FCOPYSIGN)
1272 .moreElementsIf(
1273 Predicate: [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
1274 Mutation: [=](const LegalityQuery &Query) {
1275 const LLT Ty = Query.Types[0];
1276 return std::pair(0, LLT::fixed_vector(NumElements: Ty == s16 ? 4 : 2, ScalarTy: Ty));
1277 })
1278 .lower();
1279
1280 getActionDefinitionsBuilder(Opcode: G_FMAD).lower();
1281
1282 // Access to floating-point environment.
1283 getActionDefinitionsBuilder(Opcodes: {G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1284 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1285 .libcall();
1286
1287 getActionDefinitionsBuilder(Opcode: G_IS_FPCLASS).lower();
1288
1289 getActionDefinitionsBuilder(Opcode: G_PREFETCH).custom();
1290
1291 getActionDefinitionsBuilder(Opcodes: {G_SCMP, G_UCMP}).lower();
1292
1293 getLegacyLegalizerInfo().computeTables();
1294 verify(MII: *ST.getInstrInfo());
1295}
1296
1297bool AArch64LegalizerInfo::legalizeCustom(
1298 LegalizerHelper &Helper, MachineInstr &MI,
1299 LostDebugLocObserver &LocObserver) const {
1300 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1301 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1302 GISelChangeObserver &Observer = Helper.Observer;
1303 switch (MI.getOpcode()) {
1304 default:
1305 // No idea what to do.
1306 return false;
1307 case TargetOpcode::G_VAARG:
1308 return legalizeVaArg(MI, MRI, MIRBuilder);
1309 case TargetOpcode::G_LOAD:
1310 case TargetOpcode::G_STORE:
1311 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1312 case TargetOpcode::G_SHL:
1313 case TargetOpcode::G_ASHR:
1314 case TargetOpcode::G_LSHR:
1315 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1316 case TargetOpcode::G_GLOBAL_VALUE:
1317 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1318 case TargetOpcode::G_SBFX:
1319 case TargetOpcode::G_UBFX:
1320 return legalizeBitfieldExtract(MI, MRI, Helper);
1321 case TargetOpcode::G_FSHL:
1322 case TargetOpcode::G_FSHR:
1323 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1324 case TargetOpcode::G_ROTR:
1325 return legalizeRotate(MI, MRI, Helper);
1326 case TargetOpcode::G_CTPOP:
1327 return legalizeCTPOP(MI, MRI, Helper);
1328 case TargetOpcode::G_ATOMIC_CMPXCHG:
1329 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1330 case TargetOpcode::G_CTTZ:
1331 return legalizeCTTZ(MI, Helper);
1332 case TargetOpcode::G_BZERO:
1333 case TargetOpcode::G_MEMCPY:
1334 case TargetOpcode::G_MEMMOVE:
1335 case TargetOpcode::G_MEMSET:
1336 return legalizeMemOps(MI, Helper);
1337 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1338 return legalizeExtractVectorElt(MI, MRI, Helper);
1339 case TargetOpcode::G_DYN_STACKALLOC:
1340 return legalizeDynStackAlloc(MI, Helper);
1341 case TargetOpcode::G_PREFETCH:
1342 return legalizePrefetch(MI, Helper);
1343 case TargetOpcode::G_ABS:
1344 return Helper.lowerAbsToCNeg(MI);
1345 case TargetOpcode::G_ICMP:
1346 return legalizeICMP(MI, MRI, MIRBuilder);
1347 }
1348
1349 llvm_unreachable("expected switch to return");
1350}
1351
1352bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1353 MachineRegisterInfo &MRI,
1354 MachineIRBuilder &MIRBuilder,
1355 GISelChangeObserver &Observer,
1356 LegalizerHelper &Helper) const {
1357 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1358 MI.getOpcode() == TargetOpcode::G_FSHR);
1359
1360 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1361 // lowering
1362 Register ShiftNo = MI.getOperand(i: 3).getReg();
1363 LLT ShiftTy = MRI.getType(Reg: ShiftNo);
1364 auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ShiftNo, MRI);
1365
1366 // Adjust shift amount according to Opcode (FSHL/FSHR)
1367 // Convert FSHL to FSHR
1368 LLT OperationTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1369 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1370
1371 // Lower non-constant shifts and leave zero shifts to the optimizer.
1372 if (!VRegAndVal || VRegAndVal->Value.urem(RHS: BitWidth) == 0)
1373 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1374 LegalizerHelper::LegalizeResult::Legalized);
1375
1376 APInt Amount = VRegAndVal->Value.urem(RHS: BitWidth);
1377
1378 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1379
1380 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1381 // in the range of 0 <-> BitWidth, it is legal
1382 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1383 VRegAndVal->Value.ult(RHS: BitWidth))
1384 return true;
1385
1386 // Cast the ShiftNumber to a 64-bit type
1387 auto Cast64 = MIRBuilder.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: Amount.zext(width: 64));
1388
1389 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1390 Observer.changingInstr(MI);
1391 MI.getOperand(i: 3).setReg(Cast64.getReg(Idx: 0));
1392 Observer.changedInstr(MI);
1393 }
1394 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1395 // instruction
1396 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1397 MIRBuilder.buildInstr(Opc: TargetOpcode::G_FSHR, DstOps: {MI.getOperand(i: 0).getReg()},
1398 SrcOps: {MI.getOperand(i: 1).getReg(), MI.getOperand(i: 2).getReg(),
1399 Cast64.getReg(Idx: 0)});
1400 MI.eraseFromParent();
1401 }
1402 return true;
1403}
1404
1405bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1406 MachineRegisterInfo &MRI,
1407 MachineIRBuilder &MIRBuilder) const {
1408 Register DstReg = MI.getOperand(i: 0).getReg();
1409 Register SrcReg1 = MI.getOperand(i: 2).getReg();
1410 Register SrcReg2 = MI.getOperand(i: 3).getReg();
1411 LLT DstTy = MRI.getType(Reg: DstReg);
1412 LLT SrcTy = MRI.getType(Reg: SrcReg1);
1413
1414 // Check the vector types are legal
1415 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1416 DstTy.getNumElements() != SrcTy.getNumElements() ||
1417 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1418 return false;
1419
1420 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1421 // following passes
1422 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(i: 1).getPredicate();
1423 if (Pred != CmpInst::ICMP_NE)
1424 return true;
1425 Register CmpReg =
1426 MIRBuilder
1427 .buildICmp(Pred: CmpInst::ICMP_EQ, Res: MRI.getType(Reg: DstReg), Op0: SrcReg1, Op1: SrcReg2)
1428 .getReg(Idx: 0);
1429 MIRBuilder.buildNot(Dst: DstReg, Src0: CmpReg);
1430
1431 MI.eraseFromParent();
1432 return true;
1433}
1434
1435bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1436 MachineRegisterInfo &MRI,
1437 LegalizerHelper &Helper) const {
1438 // To allow for imported patterns to match, we ensure that the rotate amount
1439 // is 64b with an extension.
1440 Register AmtReg = MI.getOperand(i: 2).getReg();
1441 LLT AmtTy = MRI.getType(Reg: AmtReg);
1442 (void)AmtTy;
1443 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1444 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1445 auto NewAmt = Helper.MIRBuilder.buildZExt(Res: LLT::scalar(SizeInBits: 64), Op: AmtReg);
1446 Helper.Observer.changingInstr(MI);
1447 MI.getOperand(i: 2).setReg(NewAmt.getReg(Idx: 0));
1448 Helper.Observer.changedInstr(MI);
1449 return true;
1450}
1451
1452bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1453 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
1454 GISelChangeObserver &Observer) const {
1455 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1456 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1457 // G_ADD_LOW instructions.
1458 // By splitting this here, we can optimize accesses in the small code model by
1459 // folding in the G_ADD_LOW into the load/store offset.
1460 auto &GlobalOp = MI.getOperand(i: 1);
1461 // Don't modify an intrinsic call.
1462 if (GlobalOp.isSymbol())
1463 return true;
1464 const auto* GV = GlobalOp.getGlobal();
1465 if (GV->isThreadLocal())
1466 return true; // Don't want to modify TLS vars.
1467
1468 auto &TM = ST->getTargetLowering()->getTargetMachine();
1469 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1470
1471 if (OpFlags & AArch64II::MO_GOT)
1472 return true;
1473
1474 auto Offset = GlobalOp.getOffset();
1475 Register DstReg = MI.getOperand(i: 0).getReg();
1476 auto ADRP = MIRBuilder.buildInstr(Opc: AArch64::ADRP, DstOps: {LLT::pointer(AddressSpace: 0, SizeInBits: 64)}, SrcOps: {})
1477 .addGlobalAddress(GV, Offset, TargetFlags: OpFlags | AArch64II::MO_PAGE);
1478 // Set the regclass on the dest reg too.
1479 MRI.setRegClass(Reg: ADRP.getReg(Idx: 0), RC: &AArch64::GPR64RegClass);
1480
1481 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1482 // by creating a MOVK that sets bits 48-63 of the register to (global address
1483 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1484 // prevent an incorrect tag being generated during relocation when the
1485 // global appears before the code section. Without the offset, a global at
1486 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1487 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1488 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1489 // instead of `0xf`.
1490 // This assumes that we're in the small code model so we can assume a binary
1491 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1492 // binary must also be loaded into address range [0, 2^48). Both of these
1493 // properties need to be ensured at runtime when using tagged addresses.
1494 if (OpFlags & AArch64II::MO_TAGGED) {
1495 assert(!Offset &&
1496 "Should not have folded in an offset for a tagged global!");
1497 ADRP = MIRBuilder.buildInstr(Opc: AArch64::MOVKXi, DstOps: {LLT::pointer(AddressSpace: 0, SizeInBits: 64)}, SrcOps: {ADRP})
1498 .addGlobalAddress(GV, Offset: 0x100000000,
1499 TargetFlags: AArch64II::MO_PREL | AArch64II::MO_G3)
1500 .addImm(Val: 48);
1501 MRI.setRegClass(Reg: ADRP.getReg(Idx: 0), RC: &AArch64::GPR64RegClass);
1502 }
1503
1504 MIRBuilder.buildInstr(Opc: AArch64::G_ADD_LOW, DstOps: {DstReg}, SrcOps: {ADRP})
1505 .addGlobalAddress(GV, Offset,
1506 TargetFlags: OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1507 MI.eraseFromParent();
1508 return true;
1509}
1510
1511bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
1512 MachineInstr &MI) const {
1513 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(Val&: MI).getIntrinsicID();
1514 switch (IntrinsicID) {
1515 case Intrinsic::vacopy: {
1516 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1517 unsigned VaListSize =
1518 (ST->isTargetDarwin() || ST->isTargetWindows())
1519 ? PtrSize
1520 : ST->isTargetILP32() ? 20 : 32;
1521
1522 MachineFunction &MF = *MI.getMF();
1523 auto Val = MF.getRegInfo().createGenericVirtualRegister(
1524 Ty: LLT::scalar(SizeInBits: VaListSize * 8));
1525 MachineIRBuilder MIB(MI);
1526 MIB.buildLoad(Res: Val, Addr: MI.getOperand(i: 2),
1527 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(),
1528 F: MachineMemOperand::MOLoad,
1529 Size: VaListSize, BaseAlignment: Align(PtrSize)));
1530 MIB.buildStore(Val, Addr: MI.getOperand(i: 1),
1531 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(),
1532 F: MachineMemOperand::MOStore,
1533 Size: VaListSize, BaseAlignment: Align(PtrSize)));
1534 MI.eraseFromParent();
1535 return true;
1536 }
1537 case Intrinsic::get_dynamic_area_offset: {
1538 MachineIRBuilder &MIB = Helper.MIRBuilder;
1539 MIB.buildConstant(Res: MI.getOperand(i: 0).getReg(), Val: 0);
1540 MI.eraseFromParent();
1541 return true;
1542 }
1543 case Intrinsic::aarch64_mops_memset_tag: {
1544 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1545 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1546 // the instruction).
1547 MachineIRBuilder MIB(MI);
1548 auto &Value = MI.getOperand(i: 3);
1549 Register ExtValueReg = MIB.buildAnyExt(Res: LLT::scalar(SizeInBits: 64), Op: Value).getReg(Idx: 0);
1550 Value.setReg(ExtValueReg);
1551 return true;
1552 }
1553 case Intrinsic::aarch64_prefetch: {
1554 MachineIRBuilder MIB(MI);
1555 auto &AddrVal = MI.getOperand(i: 1);
1556
1557 int64_t IsWrite = MI.getOperand(i: 2).getImm();
1558 int64_t Target = MI.getOperand(i: 3).getImm();
1559 int64_t IsStream = MI.getOperand(i: 4).getImm();
1560 int64_t IsData = MI.getOperand(i: 5).getImm();
1561
1562 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1563 (!IsData << 3) | // IsDataCache bit
1564 (Target << 1) | // Cache level bits
1565 (unsigned)IsStream; // Stream bit
1566
1567 MIB.buildInstr(Opcode: AArch64::G_AARCH64_PREFETCH).addImm(Val: PrfOp).add(MO: AddrVal);
1568 MI.eraseFromParent();
1569 return true;
1570 }
1571 case Intrinsic::aarch64_neon_uaddv:
1572 case Intrinsic::aarch64_neon_saddv:
1573 case Intrinsic::aarch64_neon_umaxv:
1574 case Intrinsic::aarch64_neon_smaxv:
1575 case Intrinsic::aarch64_neon_uminv:
1576 case Intrinsic::aarch64_neon_sminv: {
1577 MachineIRBuilder MIB(MI);
1578 MachineRegisterInfo &MRI = *MIB.getMRI();
1579 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1580 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1581 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1582
1583 auto OldDst = MI.getOperand(i: 0).getReg();
1584 auto OldDstTy = MRI.getType(Reg: OldDst);
1585 LLT NewDstTy = MRI.getType(Reg: MI.getOperand(i: 2).getReg()).getElementType();
1586 if (OldDstTy == NewDstTy)
1587 return true;
1588
1589 auto NewDst = MRI.createGenericVirtualRegister(Ty: NewDstTy);
1590
1591 Helper.Observer.changingInstr(MI);
1592 MI.getOperand(i: 0).setReg(NewDst);
1593 Helper.Observer.changedInstr(MI);
1594
1595 MIB.setInsertPt(MBB&: MIB.getMBB(), II: ++MIB.getInsertPt());
1596 MIB.buildExtOrTrunc(ExtOpc: IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1597 Res: OldDst, Op: NewDst);
1598
1599 return true;
1600 }
1601 case Intrinsic::aarch64_neon_uaddlp:
1602 case Intrinsic::aarch64_neon_saddlp: {
1603 MachineIRBuilder MIB(MI);
1604
1605 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1606 ? AArch64::G_UADDLP
1607 : AArch64::G_SADDLP;
1608 MIB.buildInstr(Opc, DstOps: {MI.getOperand(i: 0)}, SrcOps: {MI.getOperand(i: 2)});
1609 MI.eraseFromParent();
1610
1611 return true;
1612 }
1613 case Intrinsic::aarch64_neon_uaddlv:
1614 case Intrinsic::aarch64_neon_saddlv: {
1615 MachineIRBuilder MIB(MI);
1616 MachineRegisterInfo &MRI = *MIB.getMRI();
1617
1618 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1619 ? AArch64::G_UADDLV
1620 : AArch64::G_SADDLV;
1621 Register DstReg = MI.getOperand(i: 0).getReg();
1622 Register SrcReg = MI.getOperand(i: 2).getReg();
1623 LLT DstTy = MRI.getType(Reg: DstReg);
1624
1625 LLT MidTy, ExtTy;
1626 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1627 MidTy = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32);
1628 ExtTy = LLT::scalar(SizeInBits: 32);
1629 } else {
1630 MidTy = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64);
1631 ExtTy = LLT::scalar(SizeInBits: 64);
1632 }
1633
1634 Register MidReg =
1635 MIB.buildInstr(Opc, DstOps: {MidTy}, SrcOps: {SrcReg})->getOperand(i: 0).getReg();
1636 Register ZeroReg =
1637 MIB.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: 0)->getOperand(i: 0).getReg();
1638 Register ExtReg = MIB.buildInstr(Opc: AArch64::G_EXTRACT_VECTOR_ELT, DstOps: {ExtTy},
1639 SrcOps: {MidReg, ZeroReg})
1640 .getReg(Idx: 0);
1641
1642 if (DstTy.getScalarSizeInBits() < 32)
1643 MIB.buildTrunc(Res: DstReg, Op: ExtReg);
1644 else
1645 MIB.buildCopy(Res: DstReg, Op: ExtReg);
1646
1647 MI.eraseFromParent();
1648
1649 return true;
1650 }
1651 case Intrinsic::aarch64_neon_smax:
1652 case Intrinsic::aarch64_neon_smin:
1653 case Intrinsic::aarch64_neon_umax:
1654 case Intrinsic::aarch64_neon_umin:
1655 case Intrinsic::aarch64_neon_fmax:
1656 case Intrinsic::aarch64_neon_fmin:
1657 case Intrinsic::aarch64_neon_fmaxnm:
1658 case Intrinsic::aarch64_neon_fminnm: {
1659 MachineIRBuilder MIB(MI);
1660 if (IntrinsicID == Intrinsic::aarch64_neon_smax)
1661 MIB.buildSMax(Dst: MI.getOperand(i: 0), Src0: MI.getOperand(i: 2), Src1: MI.getOperand(i: 3));
1662 else if (IntrinsicID == Intrinsic::aarch64_neon_smin)
1663 MIB.buildSMin(Dst: MI.getOperand(i: 0), Src0: MI.getOperand(i: 2), Src1: MI.getOperand(i: 3));
1664 else if (IntrinsicID == Intrinsic::aarch64_neon_umax)
1665 MIB.buildUMax(Dst: MI.getOperand(i: 0), Src0: MI.getOperand(i: 2), Src1: MI.getOperand(i: 3));
1666 else if (IntrinsicID == Intrinsic::aarch64_neon_umin)
1667 MIB.buildUMin(Dst: MI.getOperand(i: 0), Src0: MI.getOperand(i: 2), Src1: MI.getOperand(i: 3));
1668 else if (IntrinsicID == Intrinsic::aarch64_neon_fmax)
1669 MIB.buildInstr(Opc: TargetOpcode::G_FMAXIMUM, DstOps: {MI.getOperand(i: 0)},
1670 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3)});
1671 else if (IntrinsicID == Intrinsic::aarch64_neon_fmin)
1672 MIB.buildInstr(Opc: TargetOpcode::G_FMINIMUM, DstOps: {MI.getOperand(i: 0)},
1673 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3)});
1674 else if (IntrinsicID == Intrinsic::aarch64_neon_fmaxnm)
1675 MIB.buildInstr(Opc: TargetOpcode::G_FMAXNUM, DstOps: {MI.getOperand(i: 0)},
1676 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3)});
1677 else if (IntrinsicID == Intrinsic::aarch64_neon_fminnm)
1678 MIB.buildInstr(Opc: TargetOpcode::G_FMINNUM, DstOps: {MI.getOperand(i: 0)},
1679 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3)});
1680 MI.eraseFromParent();
1681 return true;
1682 }
1683 case Intrinsic::vector_reverse:
1684 // TODO: Add support for vector_reverse
1685 return false;
1686 }
1687
1688 return true;
1689}
1690
1691bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1692 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
1693 GISelChangeObserver &Observer) const {
1694 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1695 MI.getOpcode() == TargetOpcode::G_LSHR ||
1696 MI.getOpcode() == TargetOpcode::G_SHL);
1697 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1698 // imported patterns can select it later. Either way, it will be legal.
1699 Register AmtReg = MI.getOperand(i: 2).getReg();
1700 auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: AmtReg, MRI);
1701 if (!VRegAndVal)
1702 return true;
1703 // Check the shift amount is in range for an immediate form.
1704 int64_t Amount = VRegAndVal->Value.getSExtValue();
1705 if (Amount > 31)
1706 return true; // This will have to remain a register variant.
1707 auto ExtCst = MIRBuilder.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: Amount);
1708 Observer.changingInstr(MI);
1709 MI.getOperand(i: 2).setReg(ExtCst.getReg(Idx: 0));
1710 Observer.changedInstr(MI);
1711 return true;
1712}
1713
1714static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset,
1715 MachineRegisterInfo &MRI) {
1716 Base = Root;
1717 Offset = 0;
1718
1719 Register NewBase;
1720 int64_t NewOffset;
1721 if (mi_match(R: Root, MRI, P: m_GPtrAdd(L: m_Reg(R&: NewBase), R: m_ICst(Cst&: NewOffset))) &&
1722 isShiftedInt<7, 3>(x: NewOffset)) {
1723 Base = NewBase;
1724 Offset = NewOffset;
1725 }
1726}
1727
1728// FIXME: This should be removed and replaced with the generic bitcast legalize
1729// action.
1730bool AArch64LegalizerInfo::legalizeLoadStore(
1731 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
1732 GISelChangeObserver &Observer) const {
1733 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1734 MI.getOpcode() == TargetOpcode::G_LOAD);
1735 // Here we just try to handle vector loads/stores where our value type might
1736 // have pointer elements, which the SelectionDAG importer can't handle. To
1737 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1738 // the value to use s64 types.
1739
1740 // Custom legalization requires the instruction, if not deleted, must be fully
1741 // legalized. In order to allow further legalization of the inst, we create
1742 // a new instruction and erase the existing one.
1743
1744 Register ValReg = MI.getOperand(i: 0).getReg();
1745 const LLT ValTy = MRI.getType(Reg: ValReg);
1746
1747 if (ValTy == LLT::scalar(SizeInBits: 128)) {
1748
1749 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1750 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1751 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1752 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1753 bool IsRcpC3 =
1754 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1755
1756 LLT s64 = LLT::scalar(SizeInBits: 64);
1757
1758 unsigned Opcode;
1759 if (IsRcpC3) {
1760 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1761 } else {
1762 // For LSE2, loads/stores should have been converted to monotonic and had
1763 // a fence inserted after them.
1764 assert(Ordering == AtomicOrdering::Monotonic ||
1765 Ordering == AtomicOrdering::Unordered);
1766 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1767
1768 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1769 }
1770
1771 MachineInstrBuilder NewI;
1772 if (IsLoad) {
1773 NewI = MIRBuilder.buildInstr(Opc: Opcode, DstOps: {s64, s64}, SrcOps: {});
1774 MIRBuilder.buildMergeLikeInstr(
1775 Res: ValReg, Ops: {NewI->getOperand(i: 0), NewI->getOperand(i: 1)});
1776 } else {
1777 auto Split = MIRBuilder.buildUnmerge(Res: s64, Op: MI.getOperand(i: 0));
1778 NewI = MIRBuilder.buildInstr(
1779 Opc: Opcode, DstOps: {}, SrcOps: {Split->getOperand(i: 0), Split->getOperand(i: 1)});
1780 }
1781
1782 if (IsRcpC3) {
1783 NewI.addUse(RegNo: MI.getOperand(i: 1).getReg());
1784 } else {
1785 Register Base;
1786 int Offset;
1787 matchLDPSTPAddrMode(Root: MI.getOperand(i: 1).getReg(), Base, Offset, MRI);
1788 NewI.addUse(RegNo: Base);
1789 NewI.addImm(Val: Offset / 8);
1790 }
1791
1792 NewI.cloneMemRefs(OtherMI: MI);
1793 constrainSelectedInstRegOperands(I&: *NewI, TII: *ST->getInstrInfo(),
1794 TRI: *MRI.getTargetRegisterInfo(),
1795 RBI: *ST->getRegBankInfo());
1796 MI.eraseFromParent();
1797 return true;
1798 }
1799
1800 if (!ValTy.isPointerVector() ||
1801 ValTy.getElementType().getAddressSpace() != 0) {
1802 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1803 return false;
1804 }
1805
1806 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1807 const LLT NewTy = LLT::vector(EC: ValTy.getElementCount(), ScalarSizeInBits: PtrSize);
1808 auto &MMO = **MI.memoperands_begin();
1809 MMO.setType(NewTy);
1810
1811 if (MI.getOpcode() == TargetOpcode::G_STORE) {
1812 auto Bitcast = MIRBuilder.buildBitcast(Dst: NewTy, Src: ValReg);
1813 MIRBuilder.buildStore(Val: Bitcast.getReg(Idx: 0), Addr: MI.getOperand(i: 1), MMO);
1814 } else {
1815 auto NewLoad = MIRBuilder.buildLoad(Res: NewTy, Addr: MI.getOperand(i: 1), MMO);
1816 MIRBuilder.buildBitcast(Dst: ValReg, Src: NewLoad);
1817 }
1818 MI.eraseFromParent();
1819 return true;
1820}
1821
1822bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
1823 MachineRegisterInfo &MRI,
1824 MachineIRBuilder &MIRBuilder) const {
1825 MachineFunction &MF = MIRBuilder.getMF();
1826 Align Alignment(MI.getOperand(i: 2).getImm());
1827 Register Dst = MI.getOperand(i: 0).getReg();
1828 Register ListPtr = MI.getOperand(i: 1).getReg();
1829
1830 LLT PtrTy = MRI.getType(Reg: ListPtr);
1831 LLT IntPtrTy = LLT::scalar(SizeInBits: PtrTy.getSizeInBits());
1832
1833 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
1834 const Align PtrAlign = Align(PtrSize);
1835 auto List = MIRBuilder.buildLoad(
1836 Res: PtrTy, Addr: ListPtr,
1837 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOLoad,
1838 MemTy: PtrTy, base_alignment: PtrAlign));
1839
1840 MachineInstrBuilder DstPtr;
1841 if (Alignment > PtrAlign) {
1842 // Realign the list to the actual required alignment.
1843 auto AlignMinus1 =
1844 MIRBuilder.buildConstant(Res: IntPtrTy, Val: Alignment.value() - 1);
1845 auto ListTmp = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: List, Op1: AlignMinus1.getReg(Idx: 0));
1846 DstPtr = MIRBuilder.buildMaskLowPtrBits(Res: PtrTy, Op0: ListTmp, NumBits: Log2(A: Alignment));
1847 } else
1848 DstPtr = List;
1849
1850 LLT ValTy = MRI.getType(Reg: Dst);
1851 uint64_t ValSize = ValTy.getSizeInBits() / 8;
1852 MIRBuilder.buildLoad(
1853 Res: Dst, Addr: DstPtr,
1854 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOLoad,
1855 MemTy: ValTy, base_alignment: std::max(a: Alignment, b: PtrAlign)));
1856
1857 auto Size = MIRBuilder.buildConstant(Res: IntPtrTy, Val: alignTo(Size: ValSize, A: PtrAlign));
1858
1859 auto NewList = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: DstPtr, Op1: Size.getReg(Idx: 0));
1860
1861 MIRBuilder.buildStore(Val: NewList, Addr: ListPtr,
1862 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(),
1863 f: MachineMemOperand::MOStore,
1864 MemTy: PtrTy, base_alignment: PtrAlign));
1865
1866 MI.eraseFromParent();
1867 return true;
1868}
1869
1870bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1871 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
1872 // Only legal if we can select immediate forms.
1873 // TODO: Lower this otherwise.
1874 return getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: 2).getReg(), MRI) &&
1875 getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: 3).getReg(), MRI);
1876}
1877
1878bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
1879 MachineRegisterInfo &MRI,
1880 LegalizerHelper &Helper) const {
1881 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
1882 // it can be more efficiently lowered to the following sequence that uses
1883 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
1884 // registers are cheap.
1885 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
1886 // CNT V0.8B, V0.8B // 8xbyte pop-counts
1887 // ADDV B0, V0.8B // sum 8xbyte pop-counts
1888 // UMOV X0, V0.B[0] // copy byte result back to integer reg
1889 //
1890 // For 128 bit vector popcounts, we lower to the following sequence:
1891 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
1892 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
1893 // uaddlp.4s v0, v0 // v4s32, v2s64
1894 // uaddlp.2d v0, v0 // v2s64
1895 //
1896 // For 64 bit vector popcounts, we lower to the following sequence:
1897 // cnt.8b v0, v0 // v4s16, v2s32
1898 // uaddlp.4h v0, v0 // v4s16, v2s32
1899 // uaddlp.2s v0, v0 // v2s32
1900
1901 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1902 Register Dst = MI.getOperand(i: 0).getReg();
1903 Register Val = MI.getOperand(i: 1).getReg();
1904 LLT Ty = MRI.getType(Reg: Val);
1905 unsigned Size = Ty.getSizeInBits();
1906
1907 assert(Ty == MRI.getType(Dst) &&
1908 "Expected src and dst to have the same type!");
1909
1910 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
1911 LLT s64 = LLT::scalar(SizeInBits: 64);
1912
1913 auto Split = MIRBuilder.buildUnmerge(Res: s64, Op: Val);
1914 auto CTPOP1 = MIRBuilder.buildCTPOP(Dst: s64, Src0: Split->getOperand(i: 0));
1915 auto CTPOP2 = MIRBuilder.buildCTPOP(Dst: s64, Src0: Split->getOperand(i: 1));
1916 auto Add = MIRBuilder.buildAdd(Dst: s64, Src0: CTPOP1, Src1: CTPOP2);
1917
1918 MIRBuilder.buildZExt(Res: Dst, Op: Add);
1919 MI.eraseFromParent();
1920 return true;
1921 }
1922
1923 if (!ST->hasNEON() ||
1924 MI.getMF()->getFunction().hasFnAttribute(Kind: Attribute::NoImplicitFloat)) {
1925 // Use generic lowering when custom lowering is not possible.
1926 return Ty.isScalar() && (Size == 32 || Size == 64) &&
1927 Helper.lowerBitCount(MI) ==
1928 LegalizerHelper::LegalizeResult::Legalized;
1929 }
1930
1931 // Pre-conditioning: widen Val up to the nearest vector type.
1932 // s32,s64,v4s16,v2s32 -> v8i8
1933 // v8s16,v4s32,v2s64 -> v16i8
1934 LLT VTy = Size == 128 ? LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8) : LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8);
1935 if (Ty.isScalar()) {
1936 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
1937 if (Size == 32) {
1938 Val = MIRBuilder.buildZExt(Res: LLT::scalar(SizeInBits: 64), Op: Val).getReg(Idx: 0);
1939 }
1940 }
1941 Val = MIRBuilder.buildBitcast(Dst: VTy, Src: Val).getReg(Idx: 0);
1942
1943 // Count bits in each byte-sized lane.
1944 auto CTPOP = MIRBuilder.buildCTPOP(Dst: VTy, Src0: Val);
1945
1946 // Sum across lanes.
1947
1948 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
1949 Ty.getScalarSizeInBits() != 16) {
1950 LLT Dt = Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) ? LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32) : Ty;
1951 auto Zeros = MIRBuilder.buildConstant(Res: Dt, Val: 0);
1952 auto Ones = MIRBuilder.buildConstant(Res: VTy, Val: 1);
1953 MachineInstrBuilder Sum;
1954
1955 if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64)) {
1956 auto UDOT =
1957 MIRBuilder.buildInstr(Opc: AArch64::G_UDOT, DstOps: {Dt}, SrcOps: {Zeros, Ones, CTPOP});
1958 Sum = MIRBuilder.buildInstr(Opc: AArch64::G_UADDLP, DstOps: {Ty}, SrcOps: {UDOT});
1959 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) {
1960 Sum = MIRBuilder.buildInstr(Opc: AArch64::G_UDOT, DstOps: {Dt}, SrcOps: {Zeros, Ones, CTPOP});
1961 } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32)) {
1962 Sum = MIRBuilder.buildInstr(Opc: AArch64::G_UDOT, DstOps: {Dt}, SrcOps: {Zeros, Ones, CTPOP});
1963 } else {
1964 llvm_unreachable("unexpected vector shape");
1965 }
1966
1967 Sum->getOperand(i: 0).setReg(Dst);
1968 MI.eraseFromParent();
1969 return true;
1970 }
1971
1972 Register HSum = CTPOP.getReg(Idx: 0);
1973 unsigned Opc;
1974 SmallVector<LLT> HAddTys;
1975 if (Ty.isScalar()) {
1976 Opc = Intrinsic::aarch64_neon_uaddlv;
1977 HAddTys.push_back(Elt: LLT::scalar(SizeInBits: 32));
1978 } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16)) {
1979 Opc = Intrinsic::aarch64_neon_uaddlp;
1980 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16));
1981 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) {
1982 Opc = Intrinsic::aarch64_neon_uaddlp;
1983 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16));
1984 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32));
1985 } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64)) {
1986 Opc = Intrinsic::aarch64_neon_uaddlp;
1987 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16));
1988 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32));
1989 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64));
1990 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16)) {
1991 Opc = Intrinsic::aarch64_neon_uaddlp;
1992 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16));
1993 } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32)) {
1994 Opc = Intrinsic::aarch64_neon_uaddlp;
1995 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16));
1996 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32));
1997 } else
1998 llvm_unreachable("unexpected vector shape");
1999 MachineInstrBuilder UADD;
2000 for (LLT HTy : HAddTys) {
2001 UADD = MIRBuilder.buildIntrinsic(ID: Opc, Res: {HTy}).addUse(RegNo: HSum);
2002 HSum = UADD.getReg(Idx: 0);
2003 }
2004
2005 // Post-conditioning.
2006 if (Ty.isScalar() && (Size == 64 || Size == 128))
2007 MIRBuilder.buildZExt(Res: Dst, Op: UADD);
2008 else
2009 UADD->getOperand(i: 0).setReg(Dst);
2010 MI.eraseFromParent();
2011 return true;
2012}
2013
2014bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2015 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2016 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2017 LLT s64 = LLT::scalar(SizeInBits: 64);
2018 auto Addr = MI.getOperand(i: 1).getReg();
2019 auto DesiredI = MIRBuilder.buildUnmerge(Res: {s64, s64}, Op: MI.getOperand(i: 2));
2020 auto NewI = MIRBuilder.buildUnmerge(Res: {s64, s64}, Op: MI.getOperand(i: 3));
2021 auto DstLo = MRI.createGenericVirtualRegister(Ty: s64);
2022 auto DstHi = MRI.createGenericVirtualRegister(Ty: s64);
2023
2024 MachineInstrBuilder CAS;
2025 if (ST->hasLSE()) {
2026 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2027 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2028 // the rest of the MIR so we must reassemble the extracted registers into a
2029 // 128-bit known-regclass one with code like this:
2030 //
2031 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2032 // %out = CASP %in1, ...
2033 // %OldLo = G_EXTRACT %out, 0
2034 // %OldHi = G_EXTRACT %out, 64
2035 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2036 unsigned Opcode;
2037 switch (Ordering) {
2038 case AtomicOrdering::Acquire:
2039 Opcode = AArch64::CASPAX;
2040 break;
2041 case AtomicOrdering::Release:
2042 Opcode = AArch64::CASPLX;
2043 break;
2044 case AtomicOrdering::AcquireRelease:
2045 case AtomicOrdering::SequentiallyConsistent:
2046 Opcode = AArch64::CASPALX;
2047 break;
2048 default:
2049 Opcode = AArch64::CASPX;
2050 break;
2051 }
2052
2053 LLT s128 = LLT::scalar(SizeInBits: 128);
2054 auto CASDst = MRI.createGenericVirtualRegister(Ty: s128);
2055 auto CASDesired = MRI.createGenericVirtualRegister(Ty: s128);
2056 auto CASNew = MRI.createGenericVirtualRegister(Ty: s128);
2057 MIRBuilder.buildInstr(Opc: TargetOpcode::REG_SEQUENCE, DstOps: {CASDesired}, SrcOps: {})
2058 .addUse(RegNo: DesiredI->getOperand(i: 0).getReg())
2059 .addImm(Val: AArch64::sube64)
2060 .addUse(RegNo: DesiredI->getOperand(i: 1).getReg())
2061 .addImm(Val: AArch64::subo64);
2062 MIRBuilder.buildInstr(Opc: TargetOpcode::REG_SEQUENCE, DstOps: {CASNew}, SrcOps: {})
2063 .addUse(RegNo: NewI->getOperand(i: 0).getReg())
2064 .addImm(Val: AArch64::sube64)
2065 .addUse(RegNo: NewI->getOperand(i: 1).getReg())
2066 .addImm(Val: AArch64::subo64);
2067
2068 CAS = MIRBuilder.buildInstr(Opc: Opcode, DstOps: {CASDst}, SrcOps: {CASDesired, CASNew, Addr});
2069
2070 MIRBuilder.buildExtract(Res: {DstLo}, Src: {CASDst}, Index: 0);
2071 MIRBuilder.buildExtract(Res: {DstHi}, Src: {CASDst}, Index: 64);
2072 } else {
2073 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2074 // can take arbitrary registers so it just has the normal GPR64 operands the
2075 // rest of AArch64 is expecting.
2076 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2077 unsigned Opcode;
2078 switch (Ordering) {
2079 case AtomicOrdering::Acquire:
2080 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2081 break;
2082 case AtomicOrdering::Release:
2083 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2084 break;
2085 case AtomicOrdering::AcquireRelease:
2086 case AtomicOrdering::SequentiallyConsistent:
2087 Opcode = AArch64::CMP_SWAP_128;
2088 break;
2089 default:
2090 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2091 break;
2092 }
2093
2094 auto Scratch = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
2095 CAS = MIRBuilder.buildInstr(Opc: Opcode, DstOps: {DstLo, DstHi, Scratch},
2096 SrcOps: {Addr, DesiredI->getOperand(i: 0),
2097 DesiredI->getOperand(i: 1), NewI->getOperand(i: 0),
2098 NewI->getOperand(i: 1)});
2099 }
2100
2101 CAS.cloneMemRefs(OtherMI: MI);
2102 constrainSelectedInstRegOperands(I&: *CAS, TII: *ST->getInstrInfo(),
2103 TRI: *MRI.getTargetRegisterInfo(),
2104 RBI: *ST->getRegBankInfo());
2105
2106 MIRBuilder.buildMergeLikeInstr(Res: MI.getOperand(i: 0), Ops: {DstLo, DstHi});
2107 MI.eraseFromParent();
2108 return true;
2109}
2110
2111bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2112 LegalizerHelper &Helper) const {
2113 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2114 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2115 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
2116 auto BitReverse = MIRBuilder.buildBitReverse(Dst: Ty, Src: MI.getOperand(i: 1));
2117 MIRBuilder.buildCTLZ(Dst: MI.getOperand(i: 0).getReg(), Src0: BitReverse);
2118 MI.eraseFromParent();
2119 return true;
2120}
2121
2122bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2123 LegalizerHelper &Helper) const {
2124 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2125
2126 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2127 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2128 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2129 // the instruction).
2130 auto &Value = MI.getOperand(i: 1);
2131 Register ExtValueReg =
2132 MIRBuilder.buildAnyExt(Res: LLT::scalar(SizeInBits: 64), Op: Value).getReg(Idx: 0);
2133 Value.setReg(ExtValueReg);
2134 return true;
2135 }
2136
2137 return false;
2138}
2139
2140bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2141 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2142 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
2143 auto VRegAndVal =
2144 getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: 2).getReg(), MRI);
2145 if (VRegAndVal)
2146 return true;
2147 return Helper.lowerExtractInsertVectorElt(MI) !=
2148 LegalizerHelper::LegalizeResult::UnableToLegalize;
2149}
2150
2151bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2152 MachineInstr &MI, LegalizerHelper &Helper) const {
2153 MachineFunction &MF = *MI.getParent()->getParent();
2154 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2155 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2156
2157 // If stack probing is not enabled for this function, use the default
2158 // lowering.
2159 if (!MF.getFunction().hasFnAttribute(Kind: "probe-stack") ||
2160 MF.getFunction().getFnAttribute(Kind: "probe-stack").getValueAsString() !=
2161 "inline-asm") {
2162 Helper.lowerDynStackAlloc(MI);
2163 return true;
2164 }
2165
2166 Register Dst = MI.getOperand(i: 0).getReg();
2167 Register AllocSize = MI.getOperand(i: 1).getReg();
2168 Align Alignment = assumeAligned(Value: MI.getOperand(i: 2).getImm());
2169
2170 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2171 "Unexpected type for dynamic alloca");
2172 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2173 "Unexpected type for dynamic alloca");
2174
2175 LLT PtrTy = MRI.getType(Reg: Dst);
2176 Register SPReg =
2177 Helper.getTargetLowering().getStackPointerRegisterToSaveRestore();
2178 Register SPTmp =
2179 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2180 auto NewMI =
2181 MIRBuilder.buildInstr(Opc: AArch64::PROBED_STACKALLOC_DYN, DstOps: {}, SrcOps: {SPTmp});
2182 MRI.setRegClass(Reg: NewMI.getReg(Idx: 0), RC: &AArch64::GPR64commonRegClass);
2183 MIRBuilder.setInsertPt(MBB&: *NewMI->getParent(), II: NewMI);
2184 MIRBuilder.buildCopy(Res: Dst, Op: SPTmp);
2185
2186 MI.eraseFromParent();
2187 return true;
2188}
2189
2190bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2191 LegalizerHelper &Helper) const {
2192 MachineIRBuilder &MIB = Helper.MIRBuilder;
2193 auto &AddrVal = MI.getOperand(i: 0);
2194
2195 int64_t IsWrite = MI.getOperand(i: 1).getImm();
2196 int64_t Locality = MI.getOperand(i: 2).getImm();
2197 int64_t IsData = MI.getOperand(i: 3).getImm();
2198
2199 bool IsStream = Locality == 0;
2200 if (Locality != 0) {
2201 assert(Locality <= 3 && "Prefetch locality out-of-range");
2202 // The locality degree is the opposite of the cache speed.
2203 // Put the number the other way around.
2204 // The encoding starts at 0 for level 1
2205 Locality = 3 - Locality;
2206 }
2207
2208 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2209
2210 MIB.buildInstr(Opcode: AArch64::G_AARCH64_PREFETCH).addImm(Val: PrfOp).add(MO: AddrVal);
2211 MI.eraseFromParent();
2212 return true;
2213}
2214