1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64LegalizerInfo.h"
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
18#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
19#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
20#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
21#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
22#include "llvm/CodeGen/GlobalISel/Utils.h"
23#include "llvm/CodeGen/MachineInstr.h"
24#include "llvm/CodeGen/MachineInstrBuilder.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/TargetOpcodes.h"
27#include "llvm/IR/DerivedTypes.h"
28#include "llvm/IR/Intrinsics.h"
29#include "llvm/IR/IntrinsicsAArch64.h"
30#include "llvm/IR/Type.h"
31#include "llvm/Support/MathExtras.h"
32#include <initializer_list>
33
34#define DEBUG_TYPE "aarch64-legalinfo"
35
36using namespace llvm;
37using namespace LegalizeActions;
38using namespace LegalizeMutations;
39using namespace LegalityPredicates;
40using namespace MIPatternMatch;
41
42AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
43 : ST(&ST) {
44 using namespace TargetOpcode;
45 const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64);
46 const LLT s8 = LLT::scalar(SizeInBits: 8);
47 const LLT s16 = LLT::scalar(SizeInBits: 16);
48 const LLT s32 = LLT::scalar(SizeInBits: 32);
49 const LLT s64 = LLT::scalar(SizeInBits: 64);
50 const LLT s128 = LLT::scalar(SizeInBits: 128);
51 const LLT v16s8 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8);
52 const LLT v8s8 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8);
53 const LLT v4s8 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 8);
54 const LLT v2s8 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 8);
55 const LLT v8s16 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16);
56 const LLT v4s16 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16);
57 const LLT v2s16 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16);
58 const LLT v2s32 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32);
59 const LLT v4s32 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32);
60 const LLT v2s64 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64);
61 const LLT v2p0 = LLT::fixed_vector(NumElements: 2, ScalarTy: p0);
62
63 const LLT nxv16s8 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: s8);
64 const LLT nxv8s16 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s16);
65 const LLT nxv4s32 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s32);
66 const LLT nxv2s64 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s64);
67
68 const LLT bf16 = LLT::bfloat16();
69 const LLT v4bf16 = LLT::fixed_vector(NumElements: 4, ScalarTy: bf16);
70 const LLT v8bf16 = LLT::fixed_vector(NumElements: 8, ScalarTy: bf16);
71
72 const LLT f16 = LLT::float16();
73 const LLT v4f16 = LLT::fixed_vector(NumElements: 4, ScalarTy: f16);
74 const LLT v8f16 = LLT::fixed_vector(NumElements: 8, ScalarTy: f16);
75
76 const LLT f32 = LLT::float32();
77 const LLT v2f32 = LLT::fixed_vector(NumElements: 2, ScalarTy: f32);
78 const LLT v4f32 = LLT::fixed_vector(NumElements: 4, ScalarTy: f32);
79
80 const LLT f64 = LLT::float64();
81 const LLT v2f64 = LLT::fixed_vector(NumElements: 2, ScalarTy: f64);
82
83 const LLT f128 = LLT::float128();
84
85 const LLT i8 = LLT::integer(SizeInBits: 8);
86 const LLT v8i8 = LLT::fixed_vector(NumElements: 8, ScalarTy: i8);
87 const LLT v16i8 = LLT::fixed_vector(NumElements: 16, ScalarTy: i8);
88
89 const LLT i16 = LLT::integer(SizeInBits: 16);
90 const LLT v8i16 = LLT::fixed_vector(NumElements: 8, ScalarTy: i16);
91 const LLT v4i16 = LLT::fixed_vector(NumElements: 4, ScalarTy: i16);
92
93 const LLT i32 = LLT::integer(SizeInBits: 32);
94 const LLT v2i32 = LLT::fixed_vector(NumElements: 2, ScalarTy: i32);
95 const LLT v4i32 = LLT::fixed_vector(NumElements: 4, ScalarTy: i32);
96
97 const LLT i64 = LLT::integer(SizeInBits: 64);
98 const LLT v2i64 = LLT::fixed_vector(NumElements: 2, ScalarTy: i64);
99
100 const LLT i128 = LLT::integer(SizeInBits: 128);
101
102 const LLT nxv16i8 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: i8);
103 const LLT nxv8i16 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: i16);
104 const LLT nxv4i32 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: i32);
105 const LLT nxv2i64 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: i64);
106
107 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
108 v16s8, v8s16, v4s32,
109 v2s64, v2p0,
110 /* End 128bit types */
111 /* Begin 64bit types */
112 v8s8, v4s16, v2s32};
113 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
114 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
115 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
116
117 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
118
119 // FIXME: support subtargets which have neon/fp-armv8 disabled.
120 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
121 getLegacyLegalizerInfo().computeTables();
122 return;
123 }
124
125 // Some instructions only support s16 if the subtarget has full 16-bit FP
126 // support.
127 const bool HasFP16 = ST.hasFullFP16();
128 const LLT &MinFPScalar = HasFP16 ? f16 : f32;
129
130 const bool HasCSSC = ST.hasCSSC();
131 const bool HasRCPC3 = ST.hasRCPC3();
132 const bool HasSVE = ST.hasSVE();
133
134 getActionDefinitionsBuilder(
135 Opcodes: {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
136 .legalFor(Types: {p0, s8, s16, s32, s64, s128})
137 .legalFor(Types: {v2s8, v4s8, v8s8, v16s8, v2s16, v4s16, v8s16, v2s32, v4s32,
138 v2s64, v2p0})
139 .widenScalarToNextPow2(TypeIdx: 0)
140 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64)
141 .moreElementsToNextPow2(TypeIdx: 0)
142 .widenVectorEltsToVectorMinSize(TypeIdx: 0, VectorSize: 64)
143 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
144 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
145 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
146 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
147 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2)
148 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
149
150 getActionDefinitionsBuilder(Opcode: G_PHI)
151 .legalFor(Types: {p0, s16, s32, s64})
152 .legalFor(Types: PackedVectorAllTypeList)
153 .widenScalarToNextPow2(TypeIdx: 0)
154 .moreElementsToNextPow2(TypeIdx: 0)
155 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
156 .clampScalar(TypeIdx: 0, MinTy: s16, MaxTy: s64)
157 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
158 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
159 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
160 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
161 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2);
162
163 getActionDefinitionsBuilder(Opcode: G_INSERT)
164 .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s32, s64, p0}), P1: typeInSet(TypeIdx: 1, TypesInit: {s8, s16, s32}),
165 args: smallerThan(TypeIdx0: 1, TypeIdx1: 0)))
166 .widenScalarToNextPow2(TypeIdx: 0)
167 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
168 .widenScalarToNextPow2(TypeIdx: 1)
169 .minScalar(TypeIdx: 1, Ty: s8)
170 .maxScalarIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {s32}), TypeIdx: 1, Ty: s16)
171 .maxScalarIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {s64, p0}), TypeIdx: 1, Ty: s32);
172
173 getActionDefinitionsBuilder(Opcode: G_EXTRACT)
174 .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s16, s32, s64, p0}),
175 P1: typeInSet(TypeIdx: 1, TypesInit: {s32, s64, s128, p0}), args: smallerThan(TypeIdx0: 0, TypeIdx1: 1)))
176 .widenScalarToNextPow2(TypeIdx: 1)
177 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s128)
178 .widenScalarToNextPow2(TypeIdx: 0)
179 .minScalar(TypeIdx: 0, Ty: s16)
180 .maxScalarIf(Predicate: typeInSet(TypeIdx: 1, TypesInit: {s32}), TypeIdx: 0, Ty: s16)
181 .maxScalarIf(Predicate: typeInSet(TypeIdx: 1, TypesInit: {s64, p0}), TypeIdx: 0, Ty: s32)
182 .maxScalarIf(Predicate: typeInSet(TypeIdx: 1, TypesInit: {s128}), TypeIdx: 0, Ty: s64);
183
184 getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB, G_AND, G_OR, G_XOR})
185 .legalFor(Types: {i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
186 .legalFor(Pred: HasSVE, Types: {nxv16i8, nxv8i16, nxv4i32, nxv2i64})
187 .widenScalarToNextPow2(TypeIdx: 0)
188 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
189 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 16)
190 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 8)
191 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
192 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
193 .minScalarOrEltIf(
194 Predicate: [=](const LegalityQuery &Query) {
195 return Query.Types[0].getNumElements() <= 2;
196 },
197 TypeIdx: 0, Ty: s32)
198 .minScalarOrEltIf(
199 Predicate: [=](const LegalityQuery &Query) {
200 return Query.Types[0].getNumElements() <= 4;
201 },
202 TypeIdx: 0, Ty: s16)
203 .minScalarOrEltIf(
204 Predicate: [=](const LegalityQuery &Query) {
205 return Query.Types[0].getNumElements() <= 16;
206 },
207 TypeIdx: 0, Ty: s8)
208 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
209 .moreElementsToNextPow2(TypeIdx: 0);
210
211 getActionDefinitionsBuilder(Opcode: G_MUL)
212 .legalFor(Types: {i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
213 .widenScalarToNextPow2(TypeIdx: 0)
214 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
215 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 16)
216 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 8)
217 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
218 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
219 .minScalarOrEltIf(
220 Predicate: [=](const LegalityQuery &Query) {
221 return Query.Types[0].getNumElements() <= 2;
222 },
223 TypeIdx: 0, Ty: s32)
224 .minScalarOrEltIf(
225 Predicate: [=](const LegalityQuery &Query) {
226 return Query.Types[0].getNumElements() <= 4;
227 },
228 TypeIdx: 0, Ty: s16)
229 .minScalarOrEltIf(
230 Predicate: [=](const LegalityQuery &Query) {
231 return Query.Types[0].getNumElements() <= 16;
232 },
233 TypeIdx: 0, Ty: s8)
234 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
235 .moreElementsToNextPow2(TypeIdx: 0);
236
237 getActionDefinitionsBuilder(Opcodes: {G_SHL, G_ASHR, G_LSHR})
238 .customIf(Predicate: [=](const LegalityQuery &Query) {
239 const auto &SrcTy = Query.Types[0];
240 const auto &AmtTy = Query.Types[1];
241 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
242 AmtTy.getSizeInBits() == 32;
243 })
244 .legalFor(Types: {
245 {i32, i32},
246 {i32, i64},
247 {i64, i64},
248 {v8i8, v8i8},
249 {v16i8, v16i8},
250 {v4i16, v4i16},
251 {v8i16, v8i16},
252 {v2i32, v2i32},
253 {v4i32, v4i32},
254 {v2i64, v2i64},
255 })
256 .widenScalarToNextPow2(TypeIdx: 0)
257 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
258 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
259 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
260 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
261 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
262 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
263 .moreElementsToNextPow2(TypeIdx: 0)
264 .minScalarSameAs(TypeIdx: 1, LargeTypeIdx: 0)
265 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
266 .minScalarEltSameAsIf(Predicate: isVector(TypeIdx: 0), TypeIdx: 1, LargeTypeIdx: 0)
267 .maxScalarEltSameAsIf(Predicate: isVector(TypeIdx: 0), TypeIdx: 1, SmallTypeIdx: 0);
268
269 getActionDefinitionsBuilder(Opcode: G_PTR_ADD)
270 .legalFor(Types: {{p0, i64}, {v2p0, v2i64}})
271 .clampScalarOrElt(TypeIdx: 1, MinTy: s64, MaxTy: s64)
272 .clampNumElements(TypeIdx: 0, MinTy: v2p0, MaxTy: v2p0);
273
274 getActionDefinitionsBuilder(Opcode: G_PTRMASK).legalFor(Types: {{p0, s64}});
275
276 getActionDefinitionsBuilder(Opcodes: {G_SDIV, G_UDIV})
277 .legalFor(Types: {i32, i64})
278 .libcallFor(Types: {i128})
279 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
280 .widenScalarToNextPow2(TypeIdx: 0)
281 .scalarize(TypeIdx: 0);
282
283 getActionDefinitionsBuilder(Opcodes: {G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
284 .lowerFor(Types: {i8, i16, i32, i64, v2i32, v4i32, v2i64})
285 .libcallFor(Types: {i128})
286 .widenScalarOrEltToNextPow2(TypeIdx: 0)
287 .minScalarOrElt(TypeIdx: 0, Ty: s32)
288 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
289 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
290 .scalarize(TypeIdx: 0);
291
292 getActionDefinitionsBuilder(Opcodes: {G_SMULO, G_UMULO})
293 .widenScalarToNextPow2(TypeIdx: 0, /*Min = */ MinSize: 32)
294 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
295 .lower();
296
297 getActionDefinitionsBuilder(Opcodes: {G_SMULH, G_UMULH})
298 .legalFor(Types: {i64, v16i8, v8i16, v4i32})
299 .lower();
300
301 getActionDefinitionsBuilder(Opcodes: {G_SMULFIX, G_UMULFIX}).lower();
302
303 getActionDefinitionsBuilder(Opcodes: {G_SMIN, G_SMAX, G_UMIN, G_UMAX})
304 .legalFor(Types: {v8i8, v16i8, v4i16, v8i16, v2i32, v4i32})
305 .legalFor(Pred: HasCSSC, Types: {i32, i64})
306 .minScalar(Pred: HasCSSC, TypeIdx: 0, Ty: s32)
307 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
308 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
309 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
310 .lower();
311
312 // FIXME: Legal vector types are only legal with NEON.
313 getActionDefinitionsBuilder(Opcode: G_ABS)
314 .legalFor(Pred: HasCSSC, Types: {i32, i64})
315 .legalFor(Types: {v16i8, v8i16, v4i32, v2i64, v2p0, v8i8, v4i16, v2i32})
316 .customIf(Predicate: [=](const LegalityQuery &Q) {
317 // TODO: Fix suboptimal codegen for 128+ bit types.
318 LLT SrcTy = Q.Types[0];
319 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
320 })
321 .widenScalarIf(
322 Predicate: [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
323 Mutation: [=](const LegalityQuery &Query) { return std::make_pair(x: 0, y: v4i16); })
324 .widenScalarIf(
325 Predicate: [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
326 Mutation: [=](const LegalityQuery &Query) { return std::make_pair(x: 0, y: v2i32); })
327 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
328 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
329 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
330 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
331 .moreElementsToNextPow2(TypeIdx: 0)
332 .lower();
333
334 getActionDefinitionsBuilder(
335 Opcodes: {G_ABDS, G_ABDU, G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL})
336 .legalFor(Types: {v8i8, v16i8, v4i16, v8i16, v2i32, v4i32})
337 .lower();
338
339 getActionDefinitionsBuilder(
340 Opcodes: {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
341 .legalFor(Types: {{i32, i32}, {i64, i32}})
342 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
343 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
344 .widenScalarToNextPow2(TypeIdx: 0);
345
346 getActionDefinitionsBuilder(Opcodes: {G_FSHL, G_FSHR})
347 .customFor(Types: {{i32, i32}, {i32, i64}, {i64, i64}})
348 .lower();
349
350 getActionDefinitionsBuilder(Opcode: G_ROTR)
351 .legalFor(Types: {{i32, i64}, {i64, i64}})
352 .customIf(Predicate: [=](const LegalityQuery &Q) {
353 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
354 })
355 .lower();
356 getActionDefinitionsBuilder(Opcode: G_ROTL).lower();
357
358 getActionDefinitionsBuilder(Opcodes: {G_SBFX, G_UBFX})
359 .customFor(Types: {{s32, s32}, {s64, s64}});
360
361 auto always = [=](const LegalityQuery &Q) { return true; };
362 getActionDefinitionsBuilder(Opcode: G_CTPOP)
363 .legalFor(Pred: HasCSSC, Types: {{i32, i32}, {i64, i64}})
364 .legalFor(Types: {{v8i8, v8i8}, {v16i8, v16i8}})
365 .customFor(Pred: !HasCSSC, Types: {{s32, s32}, {s64, s64}})
366 .customFor(Types: {{s128, s128},
367 {v4s16, v4s16},
368 {v8s16, v8s16},
369 {v2s32, v2s32},
370 {v4s32, v4s32},
371 {v2s64, v2s64}})
372 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s128)
373 .widenScalarToNextPow2(TypeIdx: 0)
374 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 0, MinSize: 8)
375 .minScalarEltSameAsIf(Predicate: always, TypeIdx: 1, LargeTypeIdx: 0)
376 .maxScalarEltSameAsIf(Predicate: always, TypeIdx: 1, SmallTypeIdx: 0)
377 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
378 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
379 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
380 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
381 .moreElementsToNextPow2(TypeIdx: 0)
382 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
383
384 getActionDefinitionsBuilder(Opcodes: {G_CTLZ, G_CTLS})
385 .legalFor(Types: {{i32, i32},
386 {i64, i64},
387 {v8i8, v8i8},
388 {v16i8, v16i8},
389 {v4i16, v4i16},
390 {v8i16, v8i16},
391 {v2i32, v2i32},
392 {v4i32, v4i32}})
393 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 32)
394 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
395 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 1, /*Min=*/MinSize: 8)
396 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
397 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
398 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
399 .moreElementsToNextPow2(TypeIdx: 0)
400 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 32), TypeIdx: 0)
401 .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1);
402
403 getActionDefinitionsBuilder(Opcode: G_CTLZ_ZERO_POISON).lower();
404
405 getActionDefinitionsBuilder(Opcode: G_CTTZ)
406 .lowerIf(Predicate: isVector(TypeIdx: 0))
407 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 32)
408 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
409 .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1)
410 .legalFor(Pred: HasCSSC, Types: {s32, s64})
411 .customFor(Pred: !HasCSSC, Types: {s32, s64});
412
413 getActionDefinitionsBuilder(Opcode: G_CTTZ_ZERO_POISON).lower();
414
415 getActionDefinitionsBuilder(Opcode: G_BITREVERSE)
416 .legalFor(Types: {i32, i64, v8i8, v16i8})
417 .widenScalarToNextPow2(TypeIdx: 0, /*Min = */ MinSize: 32)
418 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 0, MinSize: 8)
419 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
420 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
421 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
422 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
423 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
424 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
425 .moreElementsToNextPow2(TypeIdx: 0)
426 .lower();
427
428 getActionDefinitionsBuilder(Opcode: G_CLMUL).legalFor(Types: {v8i8, v16i8});
429
430 getActionDefinitionsBuilder(Opcode: G_BSWAP)
431 .legalFor(Types: {i32, i64, v4i16, v8i16, v2i32, v4i32, v2i64})
432 .widenScalarOrEltToNextPow2(TypeIdx: 0, MinSize: 16)
433 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
434 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
435 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
436 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
437 .moreElementsToNextPow2(TypeIdx: 0);
438
439 getActionDefinitionsBuilder(Opcodes: {G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
440 .legalFor(Types: {v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
441 .legalFor(Pred: HasSVE, Types: {nxv16i8, nxv8i16, nxv4i32, nxv2i64})
442 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
443 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
444 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
445 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
446 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
447 .moreElementsToNextPow2(TypeIdx: 0)
448 .lower();
449
450 getActionDefinitionsBuilder(
451 Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
452 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
453 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
454 .legalFor(Types: {f32, f64, v2f32, v4f32, v2f64})
455 .legalFor(Pred: HasFP16, Types: {f16, v4f16, v8f16})
456 .libcallFor(Types: {f128})
457 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
458 .widenScalarIf(
459 Predicate: [=](const LegalityQuery &Q) {
460 return (!HasFP16 && Q.Types[0].getScalarType().isFloat16()) ||
461 Q.Types[0].getScalarType().isBFloat16();
462 },
463 Mutation: changeElementTo(TypeIdx: 0, Ty: f32))
464 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
465 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
466 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
467 .moreElementsToNextPow2(TypeIdx: 0);
468
469 getActionDefinitionsBuilder(Opcodes: {G_FABS, G_FNEG})
470 .legalFor(Types: {f32, f64, v2f32, v4f32, v2f64})
471 .legalFor(Pred: HasFP16, Types: {f16, bf16, v4f16, v4bf16, v8f16, v8bf16})
472 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
473 .lowerIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64))
474 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
475 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
476 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
477 .moreElementsToNextPow2(TypeIdx: 0)
478 .lowerFor(Types: {f16, bf16, v4f16, v4bf16, v8f16, v8bf16});
479
480 getActionDefinitionsBuilder(Opcodes: {G_FREM, G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
481 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
482 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
483 G_FSINH, G_FTANH, G_FMODF})
484 .libcallFor(Types: {f32, f64, f128})
485 .widenScalarFor(Types: {f16, bf16}, Mutation: changeElementTo(TypeIdx: 0, Ty: f32))
486 .scalarize(TypeIdx: 0);
487 getActionDefinitionsBuilder(Opcodes: {G_FPOWI, G_FLDEXP})
488 .libcallFor(Types: {{f32, i32}, {f64, i32}, {f128, i32}})
489 .widenScalarFor(Types: {f16, bf16}, Mutation: changeElementTo(TypeIdx: 0, Ty: f32))
490 .scalarize(TypeIdx: 0);
491
492 getActionDefinitionsBuilder(Opcodes: {G_LROUND, G_INTRINSIC_LRINT})
493 .legalFor(Types: {{i32, f32}, {i32, f64}, {i64, f32}, {i64, f64}})
494 .legalFor(Pred: HasFP16, Types: {{i32, f16}, {i64, f16}})
495 .minScalar(TypeIdx: 1, Ty: s32)
496 .libcallFor(Types: {{s64, s128}})
497 .lower();
498 getActionDefinitionsBuilder(Opcodes: {G_LLROUND, G_INTRINSIC_LLRINT})
499 .legalFor(Types: {{i64, f32}, {i64, f64}})
500 .legalFor(Pred: HasFP16, Types: {{i64, f16}})
501 .minScalar(TypeIdx: 0, Ty: s64)
502 .minScalar(TypeIdx: 1, Ty: s32)
503 .libcallFor(Types: {{s64, s128}})
504 .lower();
505
506 // TODO: Custom legalization for mismatched types.
507 getActionDefinitionsBuilder(Opcode: G_FCOPYSIGN)
508 .moreElementsIf(
509 Predicate: [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
510 Mutation: [=](const LegalityQuery &Query) {
511 const LLT Ty = Query.Types[0];
512 return std::pair(0, LLT::fixed_vector(NumElements: Ty == s16 ? 4 : 2, ScalarTy: Ty));
513 })
514 .lower();
515
516 getActionDefinitionsBuilder(Opcode: G_FMAD).lower();
517
518 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
519 auto &Actions = getActionDefinitionsBuilder(Opcode: Op);
520
521 if (Op == G_SEXTLOAD)
522 Actions.lowerIf(Predicate: atomicOrderingAtLeastOrStrongerThan(MMOIdx: 0, Ordering: AtomicOrdering::Unordered));
523
524 // Atomics have zero extending behavior.
525 Actions
526 .legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8},
527 {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 8},
528 {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 8},
529 {.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 2},
530 {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 2},
531 {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 4},
532 {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 8},
533 {.Type0: p0, .Type1: p0, .MemTy: s64, .Align: 8},
534 {.Type0: v2s32, .Type1: p0, .MemTy: s64, .Align: 8}})
535 .widenScalarToNextPow2(TypeIdx: 0)
536 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
537 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
538 // how to do that yet.
539 .unsupportedIfMemSizeNotPow2()
540 // Lower anything left over into G_*EXT and G_LOAD
541 .lower();
542 }
543
544 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
545 const LLT &ValTy = Query.Types[0];
546 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
547 };
548
549 getActionDefinitionsBuilder(Opcode: G_LOAD)
550 .customIf(Predicate: [=](const LegalityQuery &Query) {
551 return HasRCPC3 && Query.Types[0] == s128 &&
552 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
553 })
554 .customIf(Predicate: [=](const LegalityQuery &Query) {
555 return Query.Types[0] == s128 &&
556 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
557 })
558 .legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s8, .Align: 8},
559 {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: 8},
560 {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 8},
561 {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 8},
562 {.Type0: p0, .Type1: p0, .MemTy: s64, .Align: 8},
563 {.Type0: s128, .Type1: p0, .MemTy: s128, .Align: 8},
564 {.Type0: v8s8, .Type1: p0, .MemTy: s64, .Align: 8},
565 {.Type0: v16s8, .Type1: p0, .MemTy: s128, .Align: 8},
566 {.Type0: v4s16, .Type1: p0, .MemTy: s64, .Align: 8},
567 {.Type0: v8s16, .Type1: p0, .MemTy: s128, .Align: 8},
568 {.Type0: v2s32, .Type1: p0, .MemTy: s64, .Align: 8},
569 {.Type0: v4s32, .Type1: p0, .MemTy: s128, .Align: 8},
570 {.Type0: v2s64, .Type1: p0, .MemTy: s128, .Align: 8}})
571 // These extends are also legal
572 .legalForTypesWithMemDesc(
573 TypesAndMemDesc: {{.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8}, {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 8}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 8}})
574 .legalForTypesWithMemDesc(TypesAndMemDesc: {
575 // SVE vscale x 128 bit base sizes
576 {.Type0: nxv16s8, .Type1: p0, .MemTy: nxv16s8, .Align: 8},
577 {.Type0: nxv8s16, .Type1: p0, .MemTy: nxv8s16, .Align: 8},
578 {.Type0: nxv4s32, .Type1: p0, .MemTy: nxv4s32, .Align: 8},
579 {.Type0: nxv2s64, .Type1: p0, .MemTy: nxv2s64, .Align: 8},
580 })
581 .widenScalarToNextPow2(TypeIdx: 0, /* MinSize = */ 8)
582 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 16)
583 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 8)
584 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: 4)
585 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
586 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2)
587 .lowerIfMemSizeNotByteSizePow2()
588 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64)
589 .narrowScalarIf(
590 Predicate: [=](const LegalityQuery &Query) {
591 // Clamp extending load results to 32-bits.
592 return Query.Types[0].isScalar() &&
593 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
594 Query.Types[0].getSizeInBits() > 32;
595 },
596 Mutation: changeTo(TypeIdx: 0, Ty: s32))
597 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
598 .bitcastIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {v4s8}),
599 Mutation: [=](const LegalityQuery &Query) {
600 const LLT VecTy = Query.Types[0];
601 return std::pair(0, LLT::integer(SizeInBits: VecTy.getSizeInBits()));
602 })
603 .customIf(Predicate: IsPtrVecPred)
604 .scalarizeIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {v2s16, v2s8}), TypeIdx: 0)
605 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
606
607 getActionDefinitionsBuilder(Opcode: G_STORE)
608 .customIf(Predicate: [=](const LegalityQuery &Query) {
609 return HasRCPC3 && Query.Types[0] == s128 &&
610 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
611 })
612 .customIf(Predicate: [=](const LegalityQuery &Query) {
613 return Query.Types[0] == s128 &&
614 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
615 })
616 .widenScalarIf(
617 Predicate: all(P0: scalarNarrowerThan(TypeIdx: 0, Size: 32),
618 P1: atomicOrderingAtLeastOrStrongerThan(MMOIdx: 0, Ordering: AtomicOrdering::Release)),
619 Mutation: changeTo(TypeIdx: 0, Ty: s32))
620 .legalForTypesWithMemDesc(
621 TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s8, .Align: 8}, {.Type0: s16, .Type1: p0, .MemTy: s8, .Align: 8}, // truncstorei8 from s16
622 {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8}, // truncstorei8 from s32
623 {.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 8}, // truncstorei8 from s64
624 {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: 8}, {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 8}, // truncstorei16 from s32
625 {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 8}, // truncstorei16 from s64
626 {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8}, {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 8}, {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 8},
627 {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 8}, // truncstorei32 from s64
628 {.Type0: p0, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: s128, .Type1: p0, .MemTy: s128, .Align: 8}, {.Type0: v16s8, .Type1: p0, .MemTy: s128, .Align: 8},
629 {.Type0: v8s8, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: v4s16, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: v8s16, .Type1: p0, .MemTy: s128, .Align: 8},
630 {.Type0: v2s32, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: v4s32, .Type1: p0, .MemTy: s128, .Align: 8}, {.Type0: v2s64, .Type1: p0, .MemTy: s128, .Align: 8}})
631 .legalForTypesWithMemDesc(TypesAndMemDesc: {
632 // SVE vscale x 128 bit base sizes
633 // TODO: Add nxv2p0. Consider bitcastIf.
634 // See #92130
635 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
636 {.Type0: nxv16s8, .Type1: p0, .MemTy: nxv16s8, .Align: 8},
637 {.Type0: nxv8s16, .Type1: p0, .MemTy: nxv8s16, .Align: 8},
638 {.Type0: nxv4s32, .Type1: p0, .MemTy: nxv4s32, .Align: 8},
639 {.Type0: nxv2s64, .Type1: p0, .MemTy: nxv2s64, .Align: 8},
640 })
641 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64)
642 .minScalarOrElt(TypeIdx: 0, Ty: s8)
643 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
644 return Query.Types[0].isScalar() &&
645 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
646 })
647 // Maximum: sN * k = 128
648 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 16)
649 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 8)
650 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: 4)
651 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
652 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2)
653 .lowerIfMemSizeNotPow2()
654 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
655 .bitcastIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {v4s8}),
656 P1: LegalityPredicate([=](const LegalityQuery &Query) {
657 return Query.Types[0].getSizeInBits() ==
658 Query.MMODescrs[0].MemoryTy.getSizeInBits();
659 })),
660 Mutation: [=](const LegalityQuery &Query) {
661 const LLT VecTy = Query.Types[0];
662 return std::pair(0, LLT::integer(SizeInBits: VecTy.getSizeInBits()));
663 })
664 .customIf(Predicate: IsPtrVecPred)
665 .scalarizeIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {v2s16, v2s8}), TypeIdx: 0)
666 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
667 .lower();
668
669 getActionDefinitionsBuilder(Opcode: G_INDEXED_STORE)
670 // Idx 0 == Ptr, Idx 1 == Val
671 // TODO: we can implement legalizations but as of now these are
672 // generated in a very specific way.
673 .legalForTypesWithMemDesc(TypesAndMemDesc: {
674 {.Type0: p0, .Type1: s8, .MemTy: s8, .Align: 8},
675 {.Type0: p0, .Type1: s16, .MemTy: s16, .Align: 8},
676 {.Type0: p0, .Type1: s32, .MemTy: s8, .Align: 8},
677 {.Type0: p0, .Type1: s32, .MemTy: s16, .Align: 8},
678 {.Type0: p0, .Type1: s32, .MemTy: s32, .Align: 8},
679 {.Type0: p0, .Type1: s64, .MemTy: s64, .Align: 8},
680 {.Type0: p0, .Type1: p0, .MemTy: p0, .Align: 8},
681 {.Type0: p0, .Type1: v8s8, .MemTy: v8s8, .Align: 8},
682 {.Type0: p0, .Type1: v16s8, .MemTy: v16s8, .Align: 8},
683 {.Type0: p0, .Type1: v4s16, .MemTy: v4s16, .Align: 8},
684 {.Type0: p0, .Type1: v8s16, .MemTy: v8s16, .Align: 8},
685 {.Type0: p0, .Type1: v2s32, .MemTy: v2s32, .Align: 8},
686 {.Type0: p0, .Type1: v4s32, .MemTy: v4s32, .Align: 8},
687 {.Type0: p0, .Type1: v2s64, .MemTy: v2s64, .Align: 8},
688 {.Type0: p0, .Type1: v2p0, .MemTy: v2p0, .Align: 8},
689 {.Type0: p0, .Type1: s128, .MemTy: s128, .Align: 8},
690 })
691 .unsupported();
692
693 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
694 LLT LdTy = Query.Types[0];
695 LLT PtrTy = Query.Types[1];
696 if (!llvm::is_contained(Range: PackedVectorAllTypesVec, Element: LdTy) &&
697 !llvm::is_contained(Range: ScalarAndPtrTypesVec, Element: LdTy) && LdTy != s128)
698 return false;
699 if (PtrTy != p0)
700 return false;
701 return true;
702 };
703 getActionDefinitionsBuilder(Opcode: G_INDEXED_LOAD)
704 .unsupportedIf(
705 Predicate: atomicOrderingAtLeastOrStrongerThan(MMOIdx: 0, Ordering: AtomicOrdering::Unordered))
706 .legalIf(Predicate: IndexedLoadBasicPred)
707 .unsupported();
708 getActionDefinitionsBuilder(Opcodes: {G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
709 .unsupportedIf(
710 Predicate: atomicOrderingAtLeastOrStrongerThan(MMOIdx: 0, Ordering: AtomicOrdering::Unordered))
711 .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s16, s32, s64}),
712 P1: LegalityPredicate([=](const LegalityQuery &Q) {
713 LLT LdTy = Q.Types[0];
714 LLT PtrTy = Q.Types[1];
715 LLT MemTy = Q.MMODescrs[0].MemoryTy;
716 if (PtrTy != p0)
717 return false;
718 if (LdTy == s16)
719 return MemTy == s8;
720 if (LdTy == s32)
721 return MemTy == s8 || MemTy == s16;
722 if (LdTy == s64)
723 return MemTy == s8 || MemTy == s16 || MemTy == s32;
724 return false;
725 })))
726 .unsupported();
727
728 // Constants
729 getActionDefinitionsBuilder(Opcode: G_CONSTANT)
730 .legalFor(Types: {p0, s8, s16, s32, s64})
731 .widenScalarToNextPow2(TypeIdx: 0)
732 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64);
733 getActionDefinitionsBuilder(Opcode: G_FCONSTANT)
734 .legalFor(Types: {s16, s32, s64, s128});
735
736 // FIXME: fix moreElementsToNextPow2
737 getActionDefinitionsBuilder(Opcode: G_ICMP)
738 .legalFor(Types: {{i32, i32}, {i32, i64}, {i32, p0}})
739 .widenScalarOrEltToNextPow2(TypeIdx: 1)
740 .minScalarOrElt(TypeIdx: 1, Ty: s8)
741 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
742 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32)
743 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1)
744 .minScalarEltSameAsIf(
745 Predicate: [=](const LegalityQuery &Query) {
746 const LLT &Ty = Query.Types[0];
747 const LLT &SrcTy = Query.Types[1];
748 return Ty.isVector() && !SrcTy.isPointerVector() &&
749 Ty.getElementType() != SrcTy.getElementType();
750 },
751 TypeIdx: 0, LargeTypeIdx: 1)
752 .minScalarOrEltIf(
753 Predicate: [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
754 TypeIdx: 1, Ty: s32)
755 .minScalarOrEltIf(
756 Predicate: [=](const LegalityQuery &Query) {
757 return Query.Types[1].isPointerVector();
758 },
759 TypeIdx: 0, Ty: s64)
760 .moreElementsToNextPow2(TypeIdx: 1)
761 .clampNumElements(TypeIdx: 1, MinTy: v8s8, MaxTy: v16s8)
762 .clampNumElements(TypeIdx: 1, MinTy: v4s16, MaxTy: v8s16)
763 .clampNumElements(TypeIdx: 1, MinTy: v2s32, MaxTy: v4s32)
764 .clampNumElements(TypeIdx: 1, MinTy: v2s64, MaxTy: v2s64)
765 .clampNumElements(TypeIdx: 1, MinTy: v2p0, MaxTy: v2p0)
766 .customIf(Predicate: isVector(TypeIdx: 0));
767
768 getActionDefinitionsBuilder(Opcode: G_FCMP)
769 .legalFor(Types: {{i32, f32},
770 {i32, f64},
771 {v4i32, v4f32},
772 {v2i32, v2f32},
773 {v2i64, v2f64}})
774 .legalFor(Pred: HasFP16, Types: {{i32, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
775 .widenScalarOrEltToNextPow2(TypeIdx: 1)
776 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32)
777 .widenScalarIf(
778 Predicate: [=](const LegalityQuery &Q) {
779 return (!HasFP16 && Q.Types[1].getScalarType().isFloat16()) ||
780 Q.Types[1].getScalarType().isBFloat16();
781 },
782 Mutation: changeElementTo(TypeIdx: 1, Ty: f32))
783 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1)
784 .minScalarEltSameAsIf(
785 Predicate: [=](const LegalityQuery &Query) {
786 const LLT &Ty = Query.Types[0];
787 const LLT &SrcTy = Query.Types[1];
788 return Ty.isVector() && !SrcTy.isPointerVector() &&
789 Ty.getElementType() != SrcTy.getElementType();
790 },
791 TypeIdx: 0, LargeTypeIdx: 1)
792 .clampNumElements(TypeIdx: 1, MinTy: v4s16, MaxTy: v8s16)
793 .clampNumElements(TypeIdx: 1, MinTy: v2s32, MaxTy: v4s32)
794 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
795 .moreElementsToNextPow2(TypeIdx: 1)
796 .libcallFor(Types: {{s32, s128}});
797
798 // Extensions
799 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
800 unsigned DstSize = Query.Types[0].getSizeInBits();
801
802 // Handle legal vectors using legalFor
803 if (Query.Types[0].isVector())
804 return false;
805
806 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(Value: DstSize))
807 return false; // Extending to a scalar s128 needs narrowing.
808
809 const LLT &SrcTy = Query.Types[1];
810
811 // Make sure we fit in a register otherwise. Don't bother checking that
812 // the source type is below 128 bits. We shouldn't be allowing anything
813 // through which is wider than the destination in the first place.
814 unsigned SrcSize = SrcTy.getSizeInBits();
815 if (SrcSize < 8 || !isPowerOf2_32(Value: SrcSize))
816 return false;
817
818 return true;
819 };
820 getActionDefinitionsBuilder(Opcodes: {G_ZEXT, G_SEXT, G_ANYEXT})
821 .legalIf(Predicate: ExtLegalFunc)
822 .legalFor(Types: {{v8s16, v8s8}, {v4s32, v4s16}, {v2s64, v2s32}})
823 .clampScalar(TypeIdx: 0, MinTy: s64, MaxTy: s64) // Just for s128, others are handled above.
824 .moreElementsToNextPow2(TypeIdx: 0)
825 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 8)
826 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 4)
827 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 2)
828 // Tries to convert a large EXTEND into two smaller EXTENDs
829 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
830 return (Query.Types[0].getScalarSizeInBits() >
831 Query.Types[1].getScalarSizeInBits() * 2) &&
832 Query.Types[0].isVector() &&
833 (Query.Types[1].getScalarSizeInBits() == 8 ||
834 Query.Types[1].getScalarSizeInBits() == 16);
835 })
836 .clampMinNumElements(TypeIdx: 1, EltTy: s8, MinElements: 8)
837 .clampMinNumElements(TypeIdx: 1, EltTy: s16, MinElements: 4)
838 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
839
840 getActionDefinitionsBuilder(Opcode: G_TRUNC)
841 .legalFor(Types: {{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
842 .moreElementsToNextPow2(TypeIdx: 0)
843 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 8)
844 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 4)
845 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: 2)
846 .minScalarOrEltIf(
847 Predicate: [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
848 TypeIdx: 0, Ty: s8)
849 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
850 LLT DstTy = Query.Types[0];
851 LLT SrcTy = Query.Types[1];
852 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
853 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
854 })
855 .clampMinNumElements(TypeIdx: 0, EltTy: s8, MinElements: 8)
856 .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 4)
857 .alwaysLegal();
858
859 getActionDefinitionsBuilder(Opcodes: {G_TRUNC_SSAT_S, G_TRUNC_SSAT_U, G_TRUNC_USAT_U})
860 .legalFor(Types: {{v8i8, v8i16}, {v4i16, v4i32}, {v2i32, v2i64}})
861 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v2s32);
862
863 getActionDefinitionsBuilder(Opcode: G_SEXT_INREG)
864 .legalFor(Types: {i32, i64, v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64})
865 .maxScalar(TypeIdx: 0, Ty: s64)
866 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
867 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
868 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
869 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
870 .lower();
871
872 // FP conversions
873 getActionDefinitionsBuilder(Opcode: G_FPTRUNC)
874 .legalFor(
875 Types: {{f16, f32}, {f16, f64}, {f32, f64}, {v4f16, v4f32}, {v2f32, v2f64}})
876 .legalFor(Pred: ST.hasBF16(), Types: {{bf16, f32}, {v4bf16, v4f32}})
877 .libcallFor(Types: {{f16, f128}, {f32, f128}, {f64, f128}})
878 .moreElementsToNextPow2(TypeIdx: 1)
879 .customIf(Predicate: [](const LegalityQuery &Q) {
880 LLT DstTy = Q.Types[0];
881 LLT SrcTy = Q.Types[1];
882 return SrcTy.getScalarSizeInBits() == 64 &&
883 DstTy.getScalarSizeInBits() == 16;
884 })
885 .lowerFor(Types: {{bf16, f32}, {v4bf16, v4f32}})
886 // Clamp based on input
887 .clampNumElements(TypeIdx: 1, MinTy: v4s32, MaxTy: v4s32)
888 .clampNumElements(TypeIdx: 1, MinTy: v2s64, MaxTy: v2s64)
889 .scalarize(TypeIdx: 0);
890
891 getActionDefinitionsBuilder(Opcode: G_FPEXT)
892 .legalFor(Types: {{f32, f16},
893 {f64, f16},
894 {f32, bf16},
895 {f64, f32},
896 {v4f32, v4f16},
897 {v4f32, v4bf16},
898 {v2f64, v2f32}})
899 .libcallFor(Types: {{f128, f64}, {f128, f32}, {f128, f16}})
900 .moreElementsToNextPow2(TypeIdx: 0)
901 .widenScalarIf(
902 Predicate: [](const LegalityQuery &Q) {
903 LLT DstTy = Q.Types[0];
904 LLT SrcTy = Q.Types[1];
905 return SrcTy.isVector() && DstTy.isVector() &&
906 SrcTy.getScalarSizeInBits() == 16 &&
907 DstTy.getScalarSizeInBits() == 64;
908 },
909 Mutation: changeElementTo(TypeIdx: 1, Ty: f32))
910 .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: v4s32)
911 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
912 .scalarize(TypeIdx: 0);
913
914 // Conversions
915 getActionDefinitionsBuilder(Opcodes: {G_FPTOSI, G_FPTOUI})
916 .legalFor(Types: {{i32, f32},
917 {i64, f32},
918 {i32, f64},
919 {i64, f64},
920 {v2i32, v2f32},
921 {v4i32, v4f32},
922 {v2i64, v2f64}})
923 .legalFor(Pred: HasFP16,
924 Types: {{i32, f16}, {i64, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
925 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
926 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1)
927 // The range of a fp16 value fits into an i17, so we can lower the width
928 // to i64.
929 .narrowScalarIf(
930 Predicate: [=](const LegalityQuery &Query) {
931 return Query.Types[1] == f16 && Query.Types[0].getSizeInBits() > 64;
932 },
933 Mutation: changeTo(TypeIdx: 0, Ty: i64))
934 .moreElementsToNextPow2(TypeIdx: 0)
935 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 0)
936 .minScalar(TypeIdx: 0, Ty: s32)
937 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 1, /*MinSize=*/HasFP16 ? 16 : 32)
938 .widenScalarIf(
939 Predicate: [=](const LegalityQuery &Query) {
940 return Query.Types[0].getScalarSizeInBits() <= 64 &&
941 Query.Types[0].getScalarSizeInBits() >
942 Query.Types[1].getScalarSizeInBits();
943 },
944 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 1, FromTypeIdx: 0))
945 .widenScalarIf(
946 Predicate: [=](const LegalityQuery &Query) {
947 return Query.Types[1].getScalarSizeInBits() <= 64 &&
948 Query.Types[0].getScalarSizeInBits() <
949 Query.Types[1].getScalarSizeInBits();
950 },
951 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 0, FromTypeIdx: 1))
952 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
953 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
954 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
955 .libcallFor(
956 Types: {{i32, f128}, {i64, f128}, {i128, f128}, {i128, f32}, {i128, f64}});
957
958 getActionDefinitionsBuilder(Opcodes: {G_FPTOSI_SAT, G_FPTOUI_SAT})
959 .legalFor(Types: {{i32, f32},
960 {i64, f32},
961 {i32, f64},
962 {i64, f64},
963 {v2i32, v2f32},
964 {v4i32, v4f32},
965 {v2i64, v2f64}})
966 .legalFor(
967 Pred: HasFP16,
968 Types: {{i16, f16}, {i32, f16}, {i64, f16}, {v4i16, v4f16}, {v8i16, v8f16}})
969 // Handle types larger than i64 by scalarizing/lowering.
970 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
971 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1)
972 // The range of a fp16 value fits into an i17, so we can lower the width
973 // to i64.
974 .narrowScalarIf(
975 Predicate: [=](const LegalityQuery &Query) {
976 return Query.Types[1] == f16 && Query.Types[0].getSizeInBits() > 64;
977 },
978 Mutation: changeTo(TypeIdx: 0, Ty: i64))
979 .lowerIf(Predicate: ::any(P0: scalarWiderThan(TypeIdx: 0, Size: 64), P1: scalarWiderThan(TypeIdx: 1, Size: 64)), Mutation: 0)
980 .moreElementsToNextPow2(TypeIdx: 0)
981 .widenScalarToNextPow2(TypeIdx: 0, /*MinSize=*/32)
982 .minScalar(TypeIdx: 0, Ty: s32)
983 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 1, /*MinSize=*/HasFP16 ? 16 : 32)
984 .widenScalarIf(
985 Predicate: [=](const LegalityQuery &Query) {
986 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
987 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
988 ITySize > Query.Types[1].getScalarSizeInBits();
989 },
990 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 1, FromTypeIdx: 0))
991 .widenScalarIf(
992 Predicate: [=](const LegalityQuery &Query) {
993 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
994 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
995 Query.Types[0].getScalarSizeInBits() < FTySize;
996 },
997 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 0, FromTypeIdx: 1))
998 .widenScalarOrEltToNextPow2(TypeIdx: 0)
999 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
1000 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
1001 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2);
1002
1003 getActionDefinitionsBuilder(Opcodes: {G_SITOFP, G_UITOFP})
1004 .legalFor(Types: {{f32, i32},
1005 {f64, i32},
1006 {f32, i64},
1007 {f64, i64},
1008 {v2f32, v2i32},
1009 {v4f32, v4i32},
1010 {v2f64, v2i64}})
1011 .legalFor(Pred: HasFP16,
1012 Types: {{f16, i32}, {f16, i64}, {v4f16, v4i16}, {v8f16, v8i16}})
1013 .unsupportedIf(Predicate: [&](const LegalityQuery &Query) {
1014 return Query.Types[0].getScalarType().isBFloat16();
1015 })
1016 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1)
1017 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
1018 .moreElementsToNextPow2(TypeIdx: 1)
1019 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 1)
1020 .minScalar(TypeIdx: 1, Ty: f32)
1021 .lowerIf(Predicate: [](const LegalityQuery &Query) {
1022 return Query.Types[1].isVector() &&
1023 Query.Types[1].getScalarSizeInBits() == 64 &&
1024 Query.Types[0].getScalarSizeInBits() == 16;
1025 })
1026 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 0, /*MinSize=*/HasFP16 ? 16 : 32)
1027 .scalarizeIf(
1028 // v2i64->v2f32 needs to scalarize to avoid double-rounding issues.
1029 Predicate: [](const LegalityQuery &Query) {
1030 return Query.Types[0].getScalarSizeInBits() == 32 &&
1031 Query.Types[1].getScalarSizeInBits() == 64;
1032 },
1033 TypeIdx: 0)
1034 .widenScalarIf(
1035 Predicate: [](const LegalityQuery &Query) {
1036 return Query.Types[1].getScalarSizeInBits() <= 64 &&
1037 Query.Types[0].getScalarSizeInBits() <
1038 Query.Types[1].getScalarSizeInBits();
1039 },
1040 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 0, FromTypeIdx: 1))
1041 .widenScalarIf(
1042 Predicate: [](const LegalityQuery &Query) {
1043 return Query.Types[0].getScalarSizeInBits() <= 64 &&
1044 Query.Types[0].getScalarSizeInBits() >
1045 Query.Types[1].getScalarSizeInBits();
1046 },
1047 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 1, FromTypeIdx: 0))
1048 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
1049 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
1050 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
1051 .libcallFor(Types: {{f16, i128},
1052 {f32, i128},
1053 {f64, i128},
1054 {f128, i128},
1055 {f128, i32},
1056 {f128, i64}});
1057
1058 // Control-flow
1059 getActionDefinitionsBuilder(Opcode: G_BR).alwaysLegal();
1060 getActionDefinitionsBuilder(Opcode: G_BRCOND)
1061 .legalFor(Types: {s32})
1062 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32);
1063 getActionDefinitionsBuilder(Opcode: G_BRINDIRECT).legalFor(Types: {p0});
1064
1065 getActionDefinitionsBuilder(Opcode: G_SELECT)
1066 .legalFor(Types: {{s32, s32}, {s64, s32}, {p0, s32}})
1067 .widenScalarToNextPow2(TypeIdx: 0)
1068 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
1069 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32)
1070 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
1071 .minScalarEltSameAsIf(Predicate: all(P0: isVector(TypeIdx: 0), P1: isVector(TypeIdx: 1)), TypeIdx: 1, LargeTypeIdx: 0)
1072 .lowerIf(Predicate: isVector(TypeIdx: 0));
1073
1074 // Pointer-handling
1075 getActionDefinitionsBuilder(Opcode: G_FRAME_INDEX).legalFor(Types: {p0});
1076
1077 if (TM.getCodeModel() == CodeModel::Small)
1078 getActionDefinitionsBuilder(Opcode: G_GLOBAL_VALUE).custom();
1079 else
1080 getActionDefinitionsBuilder(Opcode: G_GLOBAL_VALUE).legalFor(Types: {p0});
1081
1082 getActionDefinitionsBuilder(Opcode: G_PTRAUTH_GLOBAL_VALUE)
1083 .legalIf(Predicate: all(P0: typeIs(TypeIdx: 0, TypesInit: p0), P1: typeIs(TypeIdx: 1, TypesInit: p0)));
1084
1085 getActionDefinitionsBuilder(Opcode: G_PTRTOINT)
1086 .legalFor(Types: {{i64, p0}, {v2i64, v2p0}})
1087 .widenScalarToNextPow2(TypeIdx: 0, MinSize: 64)
1088 .clampScalar(TypeIdx: 0, MinTy: s64, MaxTy: s64)
1089 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2);
1090
1091 getActionDefinitionsBuilder(Opcode: G_INTTOPTR)
1092 .unsupportedIf(Predicate: [&](const LegalityQuery &Query) {
1093 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
1094 })
1095 .legalFor(Types: {{p0, i64}, {v2p0, v2i64}})
1096 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2);
1097
1098 // Casts for 32 and 64-bit width type are just copies.
1099 // Same for 128-bit width type, except they are on the FPR bank.
1100 getActionDefinitionsBuilder(Opcode: G_BITCAST)
1101 .legalForCartesianProduct(Types: {s16})
1102 // Keeping 32-bit instructions legal to prevent regression in some tests
1103 .legalForCartesianProduct(Types: {s32, v2s16, v4s8})
1104 .legalForCartesianProduct(Types: {s64, v8s8, v4s16, v2s32})
1105 .legalForCartesianProduct(Types: {s128, v16s8, v8s16, v4s32, v2s64, v2p0})
1106 .customIf(Predicate: [=](const LegalityQuery &Query) {
1107 // Handle casts from i1 vectors to scalars.
1108 LLT DstTy = Query.Types[0];
1109 LLT SrcTy = Query.Types[1];
1110 return DstTy.isScalar() && SrcTy.isVector() &&
1111 SrcTy.getScalarSizeInBits() == 1;
1112 })
1113 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
1114 return Query.Types[0].isVector() != Query.Types[1].isVector();
1115 })
1116 .moreElementsToNextPow2(TypeIdx: 0)
1117 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
1118 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
1119 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
1120 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
1121 .lower();
1122
1123 getActionDefinitionsBuilder(Opcode: G_VASTART).legalFor(Types: {p0});
1124
1125 // va_list must be a pointer, but most sized types are pretty easy to handle
1126 // as the destination.
1127 getActionDefinitionsBuilder(Opcode: G_VAARG)
1128 .customForCartesianProduct(Types0: {s8, s16, s32, s64, p0}, Types1: {p0})
1129 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64)
1130 .widenScalarToNextPow2(TypeIdx: 0, /*Min*/ MinSize: 8);
1131
1132 getActionDefinitionsBuilder(Opcode: G_ATOMIC_CMPXCHG_WITH_SUCCESS)
1133 .lowerIf(
1134 Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s8, s16, s32, s64, s128}), P1: typeIs(TypeIdx: 2, TypesInit: p0)));
1135
1136 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
1137
1138 getActionDefinitionsBuilder(Opcode: G_ATOMIC_CMPXCHG)
1139 .legalFor(Pred: !UseOutlineAtomics, Types: {{s32, p0}, {s64, p0}})
1140 .customFor(Pred: !UseOutlineAtomics, Types: {{s128, p0}})
1141 .libcallFor(Pred: UseOutlineAtomics,
1142 Types: {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
1143 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64);
1144
1145 getActionDefinitionsBuilder(Opcodes: {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
1146 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
1147 G_ATOMICRMW_XOR})
1148 .legalFor(Pred: !UseOutlineAtomics, Types: {{s32, p0}, {s64, p0}})
1149 .libcallFor(Pred: UseOutlineAtomics,
1150 Types: {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
1151 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64);
1152
1153 // Do not outline these atomics operations, as per comment in
1154 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
1155 getActionDefinitionsBuilder(
1156 Opcodes: {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
1157 .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s32, s64}), P1: typeIs(TypeIdx: 1, TypesInit: p0)))
1158 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64);
1159
1160 getActionDefinitionsBuilder(Opcode: G_BLOCK_ADDR).legalFor(Types: {p0});
1161
1162 // Merge/Unmerge
1163 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
1164 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
1165 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
1166 getActionDefinitionsBuilder(Opcode: Op)
1167 .widenScalarToNextPow2(TypeIdx: LitTyIdx, MinSize: 8)
1168 .widenScalarToNextPow2(TypeIdx: BigTyIdx, MinSize: 32)
1169 .clampScalar(TypeIdx: LitTyIdx, MinTy: s8, MaxTy: s64)
1170 .clampScalar(TypeIdx: BigTyIdx, MinTy: s32, MaxTy: s128)
1171 .legalIf(Predicate: [=](const LegalityQuery &Q) {
1172 switch (Q.Types[BigTyIdx].getSizeInBits()) {
1173 case 32:
1174 case 64:
1175 case 128:
1176 break;
1177 default:
1178 return false;
1179 }
1180 switch (Q.Types[LitTyIdx].getSizeInBits()) {
1181 case 8:
1182 case 16:
1183 case 32:
1184 case 64:
1185 return true;
1186 default:
1187 return false;
1188 }
1189 });
1190 }
1191
1192 // TODO : nxv4s16, nxv2s16, nxv2s32
1193 getActionDefinitionsBuilder(Opcode: G_EXTRACT_VECTOR_ELT)
1194 .legalFor(Pred: HasSVE, Types: {{s16, nxv16s8, s64},
1195 {s16, nxv8s16, s64},
1196 {s32, nxv4s32, s64},
1197 {s64, nxv2s64, s64}})
1198 .unsupportedIf(Predicate: [=](const LegalityQuery &Query) {
1199 const LLT &EltTy = Query.Types[1].getElementType();
1200 if (Query.Types[1].isScalableVector())
1201 return false;
1202 return Query.Types[0] != EltTy;
1203 })
1204 .minScalar(TypeIdx: 2, Ty: s64)
1205 .customIf(Predicate: [=](const LegalityQuery &Query) {
1206 const LLT &VecTy = Query.Types[1];
1207 return VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s16 ||
1208 VecTy == v4s16 || VecTy == v8s16 || VecTy == v2s32 ||
1209 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2p0;
1210 })
1211 .minScalarOrEltIf(
1212 Predicate: [=](const LegalityQuery &Query) {
1213 // We want to promote to <M x s1> to <M x s64> if that wouldn't
1214 // cause the total vec size to be > 128b.
1215 return Query.Types[1].isFixedVector() &&
1216 Query.Types[1].getNumElements() <= 2;
1217 },
1218 TypeIdx: 0, Ty: s64)
1219 .minScalarOrEltIf(
1220 Predicate: [=](const LegalityQuery &Query) {
1221 return Query.Types[1].isFixedVector() &&
1222 Query.Types[1].getNumElements() <= 4;
1223 },
1224 TypeIdx: 0, Ty: s32)
1225 .minScalarOrEltIf(
1226 Predicate: [=](const LegalityQuery &Query) {
1227 return Query.Types[1].isFixedVector() &&
1228 Query.Types[1].getNumElements() <= 8;
1229 },
1230 TypeIdx: 0, Ty: s16)
1231 .minScalarOrEltIf(
1232 Predicate: [=](const LegalityQuery &Query) {
1233 return Query.Types[1].isFixedVector() &&
1234 Query.Types[1].getNumElements() <= 16;
1235 },
1236 TypeIdx: 0, Ty: s8)
1237 .minScalarOrElt(TypeIdx: 0, Ty: s8) // Worst case, we need at least s8.
1238 .moreElementsToNextPow2(TypeIdx: 1)
1239 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1240 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1241 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1242 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 16)
1243 .clampMaxNumElements(TypeIdx: 1, EltTy: p0, MaxElements: 2)
1244 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1);
1245
1246 getActionDefinitionsBuilder(Opcode: G_INSERT_VECTOR_ELT)
1247 .legalIf(
1248 Predicate: typeInSet(TypeIdx: 0, TypesInit: {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64, v2p0}))
1249 .legalFor(Pred: HasSVE, Types: {{nxv16s8, s32, s64},
1250 {nxv8s16, s32, s64},
1251 {nxv4s32, s32, s64},
1252 {nxv2s64, s64, s64}})
1253 .moreElementsToNextPow2(TypeIdx: 0)
1254 .widenVectorEltsToVectorMinSize(TypeIdx: 0, VectorSize: 64)
1255 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
1256 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
1257 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
1258 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
1259 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2)
1260 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
1261
1262 getActionDefinitionsBuilder(Opcode: G_BUILD_VECTOR)
1263 .legalFor(Types: {{v8s8, s8},
1264 {v16s8, s8},
1265 {v4s16, s16},
1266 {v8s16, s16},
1267 {v2s32, s32},
1268 {v4s32, s32},
1269 {v2s64, s64},
1270 {v2p0, p0}})
1271 .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: v4s32)
1272 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
1273 .minScalarOrElt(TypeIdx: 0, Ty: s8)
1274 .widenVectorEltsToVectorMinSize(TypeIdx: 0, VectorSize: 64)
1275 .widenScalarOrEltToNextPow2(TypeIdx: 0)
1276 .minScalarSameAs(TypeIdx: 1, LargeTypeIdx: 0);
1277
1278 getActionDefinitionsBuilder(Opcode: G_BUILD_VECTOR_TRUNC).lower();
1279
1280 getActionDefinitionsBuilder(Opcode: G_SHUFFLE_VECTOR)
1281 .legalIf(Predicate: [=](const LegalityQuery &Query) {
1282 const LLT &DstTy = Query.Types[0];
1283 const LLT &SrcTy = Query.Types[1];
1284 // For now just support the TBL2 variant which needs the source vectors
1285 // to be the same size as the dest.
1286 if (DstTy != SrcTy)
1287 return false;
1288 return llvm::is_contained(
1289 Set: {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, Element: DstTy);
1290 })
1291 .moreElementsIf(
1292 Predicate: [](const LegalityQuery &Query) {
1293 return Query.Types[0].getNumElements() >
1294 Query.Types[1].getNumElements();
1295 },
1296 Mutation: changeTo(TypeIdx: 1, FromTypeIdx: 0))
1297 .moreElementsToNextPow2(TypeIdx: 0)
1298 .moreElementsIf(
1299 Predicate: [](const LegalityQuery &Query) {
1300 return Query.Types[0].getNumElements() <
1301 Query.Types[1].getNumElements();
1302 },
1303 Mutation: changeTo(TypeIdx: 0, FromTypeIdx: 1))
1304 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 0, MinSize: 8)
1305 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
1306 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
1307 .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: v4s32)
1308 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
1309 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
1310 .bitcastIf(Predicate: isPointerVector(TypeIdx: 0), Mutation: [=](const LegalityQuery &Query) {
1311 // Bitcast pointers vector to i64.
1312 const LLT DstTy = Query.Types[0];
1313 return std::pair(
1314 0, LLT::vector(EC: DstTy.getElementCount(), ScalarTy: LLT::integer(SizeInBits: 64)));
1315 });
1316
1317 getActionDefinitionsBuilder(Opcode: G_CONCAT_VECTORS)
1318 .legalFor(Types: {{v16s8, v8s8}, {v8s16, v4s16}, {v4s32, v2s32}})
1319 .customIf(Predicate: [=](const LegalityQuery &Query) {
1320 return Query.Types[0].isFixedVector() &&
1321 Query.Types[0].getScalarSizeInBits() < 8;
1322 })
1323 .bitcastIf(
1324 Predicate: [=](const LegalityQuery &Query) {
1325 return Query.Types[0].isFixedVector() &&
1326 Query.Types[1].isFixedVector() &&
1327 Query.Types[0].getScalarSizeInBits() >= 8 &&
1328 isPowerOf2_64(Value: Query.Types[0].getScalarSizeInBits()) &&
1329 Query.Types[0].getSizeInBits() <= 128 &&
1330 Query.Types[1].getSizeInBits() <= 64;
1331 },
1332 Mutation: [=](const LegalityQuery &Query) {
1333 const LLT DstTy = Query.Types[0];
1334 const LLT SrcTy = Query.Types[1];
1335 return std::pair(
1336 0, DstTy.changeElementSize(NewEltSize: SrcTy.getSizeInBits())
1337 .changeElementCount(
1338 EC: DstTy.getElementCount().divideCoefficientBy(
1339 RHS: SrcTy.getNumElements())));
1340 });
1341
1342 getActionDefinitionsBuilder(Opcode: G_EXTRACT_SUBVECTOR)
1343 .legalFor(Types: {{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1344 .widenScalarOrEltToNextPow2(TypeIdx: 0)
1345 .immIdx(ImmIdx: 0); // Inform verifier imm idx 0 is handled.
1346
1347 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1348 getActionDefinitionsBuilder(Opcode: G_SPLAT_VECTOR)
1349 .legalFor(Pred: HasSVE, Types: {{nxv4s32, s32}, {nxv2s64, s64}});
1350
1351 getActionDefinitionsBuilder(Opcode: G_JUMP_TABLE).legalFor(Types: {p0});
1352
1353 getActionDefinitionsBuilder(Opcode: G_BRJT).legalFor(Types: {{p0, s64}});
1354
1355 getActionDefinitionsBuilder(Opcodes: {G_TRAP, G_DEBUGTRAP, G_UBSANTRAP}).alwaysLegal();
1356
1357 getActionDefinitionsBuilder(Opcode: G_DYN_STACKALLOC).custom();
1358
1359 getActionDefinitionsBuilder(Opcodes: {G_STACKSAVE, G_STACKRESTORE}).lower();
1360
1361 if (ST.hasMOPS()) {
1362 // G_BZERO is not supported. Currently it is only emitted by
1363 // PreLegalizerCombiner for G_MEMSET with zero constant.
1364 getActionDefinitionsBuilder(Opcode: G_BZERO).unsupported();
1365
1366 getActionDefinitionsBuilder(Opcode: G_MEMSET)
1367 .legalForCartesianProduct(Types0: {p0}, Types1: {s64}, Types2: {s64})
1368 .customForCartesianProduct(Types0: {p0}, Types1: {s8}, Types2: {s64})
1369 .immIdx(ImmIdx: 0); // Inform verifier imm idx 0 is handled.
1370
1371 getActionDefinitionsBuilder(Opcodes: {G_MEMCPY, G_MEMMOVE})
1372 .legalForCartesianProduct(Types0: {p0}, Types1: {p0}, Types2: {s64})
1373 .immIdx(ImmIdx: 0); // Inform verifier imm idx 0 is handled.
1374
1375 // G_MEMCPY_INLINE does not have a tailcall immediate
1376 getActionDefinitionsBuilder(Opcode: G_MEMCPY_INLINE)
1377 .legalForCartesianProduct(Types0: {p0}, Types1: {p0}, Types2: {s64});
1378
1379 getActionDefinitionsBuilder(Opcode: G_MEMSET_INLINE)
1380 .legalForCartesianProduct(Types0: {p0}, Types1: {s64}, Types2: {s64})
1381 .customForCartesianProduct(Types0: {p0}, Types1: {s8}, Types2: {s64});
1382 } else {
1383 getActionDefinitionsBuilder(Opcodes: {G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1384 .libcall();
1385 }
1386
1387 // For fadd reductions we have pairwise operations available. We treat the
1388 // usual legal types as legal and handle the lowering to pairwise instructions
1389 // later.
1390 getActionDefinitionsBuilder(Opcode: G_VECREDUCE_FADD)
1391 .legalFor(Types: {{f32, v2f32}, {f32, v4f32}, {f64, v2f64}})
1392 .legalFor(Pred: HasFP16, Types: {{f16, v4f16}, {f16, v8f16}})
1393 .minScalarOrElt(TypeIdx: 0, Ty: MinFPScalar)
1394 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1395 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1396 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1397 .moreElementsToNextPow2(TypeIdx: 1)
1398 .scalarize(TypeIdx: 1)
1399 .lower();
1400
1401 // For fmul reductions we need to split up into individual operations. We
1402 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1403 // smaller types, followed by scalarizing what remains.
1404 getActionDefinitionsBuilder(Opcode: G_VECREDUCE_FMUL)
1405 .minScalarOrElt(TypeIdx: 0, Ty: MinFPScalar)
1406 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1407 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1408 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1409 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 2)
1410 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 4)
1411 .scalarize(TypeIdx: 1)
1412 .lower();
1413
1414 getActionDefinitionsBuilder(Opcodes: {G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1415 .scalarize(TypeIdx: 2)
1416 .lower();
1417
1418 getActionDefinitionsBuilder(Opcode: G_VECREDUCE_ADD)
1419 .legalFor(Types: {{i8, v8i8},
1420 {i8, v16i8},
1421 {i16, v4i16},
1422 {i16, v8i16},
1423 {i32, v2i32},
1424 {i32, v4i32},
1425 {i64, v2i64}})
1426 .moreElementsToNextPow2(TypeIdx: 1)
1427 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1428 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1429 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1430 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 16)
1431 .widenVectorEltsToVectorMinSize(TypeIdx: 1, VectorSize: 64)
1432 .scalarize(TypeIdx: 1);
1433
1434 getActionDefinitionsBuilder(Opcodes: {G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1435 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1436 .legalFor(Types: {{f32, v2f32}, {f32, v4f32}, {f64, v2f64}})
1437 .legalFor(Pred: HasFP16, Types: {{f16, v4f16}, {f16, v8f16}})
1438 .minScalarOrElt(TypeIdx: 0, Ty: MinFPScalar)
1439 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1440 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1441 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1442 .scalarize(TypeIdx: 1)
1443 .lower();
1444
1445 getActionDefinitionsBuilder(Opcode: G_VECREDUCE_MUL)
1446 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 2)
1447 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 4)
1448 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 8)
1449 .scalarize(TypeIdx: 1)
1450 .lower();
1451
1452 getActionDefinitionsBuilder(
1453 Opcodes: {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1454 .legalFor(Types: {{i8, v8i8},
1455 {i8, v16i8},
1456 {i16, v4i16},
1457 {i16, v8i16},
1458 {i32, v2i32},
1459 {i32, v4i32}})
1460 .moreElementsIf(
1461 Predicate: [=](const LegalityQuery &Query) {
1462 return Query.Types[1].isVector() &&
1463 Query.Types[1].getElementType() != s8 &&
1464 Query.Types[1].getNumElements() & 1;
1465 },
1466 Mutation: LegalizeMutations::moreElementsToNextPow2(TypeIdx: 1))
1467 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1468 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1469 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1470 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 16)
1471 .scalarize(TypeIdx: 1)
1472 .lower();
1473
1474 getActionDefinitionsBuilder(
1475 Opcodes: {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1476 // Try to break down into smaller vectors as long as they're at least 64
1477 // bits. This lets us use vector operations for some parts of the
1478 // reduction.
1479 .fewerElementsIf(
1480 Predicate: [=](const LegalityQuery &Q) {
1481 LLT SrcTy = Q.Types[1];
1482 if (SrcTy.isScalar())
1483 return false;
1484 if (!isPowerOf2_32(Value: SrcTy.getNumElements()))
1485 return false;
1486 // We can usually perform 64b vector operations.
1487 return SrcTy.getSizeInBits() > 64;
1488 },
1489 Mutation: [=](const LegalityQuery &Q) {
1490 LLT SrcTy = Q.Types[1];
1491 return std::make_pair(x: 1, y: SrcTy.divide(Factor: 2));
1492 })
1493 .scalarize(TypeIdx: 1)
1494 .lower();
1495
1496 // TODO: Update this to correct handling when adding AArch64/SVE support.
1497 getActionDefinitionsBuilder(Opcode: G_VECTOR_COMPRESS).lower();
1498
1499 // Access to floating-point environment.
1500 getActionDefinitionsBuilder(Opcodes: {G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1501 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1502 .libcall();
1503
1504 getActionDefinitionsBuilder(Opcode: G_IS_FPCLASS).lower();
1505
1506 getActionDefinitionsBuilder(Opcode: G_PREFETCH).custom();
1507
1508 getActionDefinitionsBuilder(Opcodes: {G_SCMP, G_UCMP}).lower();
1509
1510 getActionDefinitionsBuilder(Opcodes: {G_INTRINSIC, G_INTRINSIC_W_SIDE_EFFECTS})
1511 .alwaysLegal();
1512 getActionDefinitionsBuilder(Opcode: G_FENCE).alwaysLegal();
1513 getActionDefinitionsBuilder(Opcode: G_INVOKE_REGION_START).alwaysLegal();
1514
1515 getLegacyLegalizerInfo().computeTables();
1516 verify(MII: *ST.getInstrInfo());
1517}
1518
1519bool AArch64LegalizerInfo::legalizeCustom(
1520 LegalizerHelper &Helper, MachineInstr &MI,
1521 LostDebugLocObserver &LocObserver) const {
1522 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1523 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1524 GISelChangeObserver &Observer = Helper.Observer;
1525 switch (MI.getOpcode()) {
1526 default:
1527 // No idea what to do.
1528 return false;
1529 case TargetOpcode::G_VAARG:
1530 return legalizeVaArg(MI, MRI, MIRBuilder);
1531 case TargetOpcode::G_LOAD:
1532 case TargetOpcode::G_STORE:
1533 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1534 case TargetOpcode::G_SHL:
1535 case TargetOpcode::G_ASHR:
1536 case TargetOpcode::G_LSHR:
1537 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1538 case TargetOpcode::G_GLOBAL_VALUE:
1539 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1540 case TargetOpcode::G_SBFX:
1541 case TargetOpcode::G_UBFX:
1542 return legalizeBitfieldExtract(MI, MRI, Helper);
1543 case TargetOpcode::G_FSHL:
1544 case TargetOpcode::G_FSHR:
1545 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1546 case TargetOpcode::G_ROTR:
1547 return legalizeRotate(MI, MRI, Helper);
1548 case TargetOpcode::G_CTPOP:
1549 return legalizeCTPOP(MI, MRI, Helper);
1550 case TargetOpcode::G_ATOMIC_CMPXCHG:
1551 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1552 case TargetOpcode::G_CTTZ:
1553 return legalizeCTTZ(MI, Helper);
1554 case TargetOpcode::G_BZERO:
1555 case TargetOpcode::G_MEMCPY:
1556 case TargetOpcode::G_MEMMOVE:
1557 case TargetOpcode::G_MEMSET:
1558 case TargetOpcode::G_MEMSET_INLINE:
1559 return legalizeMemOps(MI, Helper);
1560 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1561 return legalizeExtractVectorElt(MI, MRI, Helper);
1562 case TargetOpcode::G_DYN_STACKALLOC:
1563 return legalizeDynStackAlloc(MI, Helper);
1564 case TargetOpcode::G_PREFETCH:
1565 return legalizePrefetch(MI, Helper);
1566 case TargetOpcode::G_ABS:
1567 return Helper.lowerAbsToCNeg(MI);
1568 case TargetOpcode::G_ICMP:
1569 return legalizeICMP(MI, MRI, MIRBuilder);
1570 case TargetOpcode::G_BITCAST:
1571 return legalizeBitcast(MI, Helper);
1572 case TargetOpcode::G_CONCAT_VECTORS:
1573 return legalizeConcatVectors(MI, MRI, MIRBuilder);
1574 case TargetOpcode::G_FPTRUNC:
1575 // In order to lower f16 to f64 properly, we need to use f32 as an
1576 // intermediary
1577 return legalizeFptrunc(MI, MIRBuilder, MRI);
1578 }
1579
1580 llvm_unreachable("expected switch to return");
1581}
1582
1583bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,
1584 LegalizerHelper &Helper) const {
1585 assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");
1586 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
1587 // We're trying to handle casts from i1 vectors to scalars but reloading from
1588 // stack.
1589 if (!DstTy.isScalar() || !SrcTy.isVector() ||
1590 SrcTy.getElementType() != LLT::scalar(SizeInBits: 1))
1591 return false;
1592
1593 Helper.createStackStoreLoad(Res: DstReg, Val: SrcReg);
1594 MI.eraseFromParent();
1595 return true;
1596}
1597
1598bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1599 MachineRegisterInfo &MRI,
1600 MachineIRBuilder &MIRBuilder,
1601 GISelChangeObserver &Observer,
1602 LegalizerHelper &Helper) const {
1603 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1604 MI.getOpcode() == TargetOpcode::G_FSHR);
1605
1606 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1607 // lowering
1608 Register ShiftNo = MI.getOperand(i: 3).getReg();
1609 LLT ShiftTy = MRI.getType(Reg: ShiftNo);
1610 auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ShiftNo, MRI);
1611
1612 // Adjust shift amount according to Opcode (FSHL/FSHR)
1613 // Convert FSHL to FSHR
1614 LLT OperationTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1615 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1616
1617 // Lower non-constant shifts and leave zero shifts to the optimizer.
1618 if (!VRegAndVal || VRegAndVal->Value.urem(RHS: BitWidth) == 0)
1619 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1620 LegalizerHelper::LegalizeResult::Legalized);
1621
1622 APInt Amount = VRegAndVal->Value.urem(RHS: BitWidth);
1623
1624 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1625
1626 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1627 // in the range of 0 <-> BitWidth, it is legal
1628 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1629 VRegAndVal->Value.ult(RHS: BitWidth))
1630 return true;
1631
1632 // Cast the ShiftNumber to a 64-bit type
1633 auto Cast64 = MIRBuilder.buildConstant(Res: LLT::integer(SizeInBits: 64), Val: Amount.zext(width: 64));
1634
1635 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1636 Observer.changingInstr(MI);
1637 MI.getOperand(i: 3).setReg(Cast64.getReg(Idx: 0));
1638 Observer.changedInstr(MI);
1639 }
1640 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1641 // instruction
1642 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1643 MIRBuilder.buildInstr(Opc: TargetOpcode::G_FSHR, DstOps: {MI.getOperand(i: 0).getReg()},
1644 SrcOps: {MI.getOperand(i: 1).getReg(), MI.getOperand(i: 2).getReg(),
1645 Cast64.getReg(Idx: 0)});
1646 MI.eraseFromParent();
1647 }
1648 return true;
1649}
1650
1651bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1652 MachineRegisterInfo &MRI,
1653 MachineIRBuilder &MIRBuilder) const {
1654 Register DstReg = MI.getOperand(i: 0).getReg();
1655 Register SrcReg1 = MI.getOperand(i: 2).getReg();
1656 Register SrcReg2 = MI.getOperand(i: 3).getReg();
1657 LLT DstTy = MRI.getType(Reg: DstReg);
1658 LLT SrcTy = MRI.getType(Reg: SrcReg1);
1659
1660 // Check the vector types are legal
1661 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1662 DstTy.getNumElements() != SrcTy.getNumElements() ||
1663 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1664 return false;
1665
1666 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1667 // following passes
1668 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(i: 1).getPredicate();
1669 if (Pred != CmpInst::ICMP_NE)
1670 return true;
1671 Register CmpReg =
1672 MIRBuilder
1673 .buildICmp(Pred: CmpInst::ICMP_EQ, Res: MRI.getType(Reg: DstReg), Op0: SrcReg1, Op1: SrcReg2)
1674 .getReg(Idx: 0);
1675 MIRBuilder.buildNot(Dst: DstReg, Src0: CmpReg);
1676
1677 MI.eraseFromParent();
1678 return true;
1679}
1680
1681bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1682 MachineRegisterInfo &MRI,
1683 LegalizerHelper &Helper) const {
1684 // To allow for imported patterns to match, we ensure that the rotate amount
1685 // is 64b with an extension.
1686 Register AmtReg = MI.getOperand(i: 2).getReg();
1687 LLT AmtTy = MRI.getType(Reg: AmtReg);
1688 (void)AmtTy;
1689 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1690 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1691 auto NewAmt = Helper.MIRBuilder.buildZExt(Res: LLT::integer(SizeInBits: 64), Op: AmtReg);
1692 Helper.Observer.changingInstr(MI);
1693 MI.getOperand(i: 2).setReg(NewAmt.getReg(Idx: 0));
1694 Helper.Observer.changedInstr(MI);
1695 return true;
1696}
1697
1698bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1699 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
1700 GISelChangeObserver &Observer) const {
1701 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1702 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1703 // G_ADD_LOW instructions.
1704 // By splitting this here, we can optimize accesses in the small code model by
1705 // folding in the G_ADD_LOW into the load/store offset.
1706 auto &GlobalOp = MI.getOperand(i: 1);
1707 // Don't modify an intrinsic call.
1708 if (GlobalOp.isSymbol())
1709 return true;
1710 const auto* GV = GlobalOp.getGlobal();
1711 if (GV->isThreadLocal())
1712 return true; // Don't want to modify TLS vars.
1713
1714 auto &TM = ST->getTargetLowering()->getTargetMachine();
1715 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1716
1717 if (OpFlags & AArch64II::MO_GOT)
1718 return true;
1719
1720 auto Offset = GlobalOp.getOffset();
1721 Register DstReg = MI.getOperand(i: 0).getReg();
1722 auto ADRP = MIRBuilder.buildInstr(Opc: AArch64::ADRP, DstOps: {LLT::pointer(AddressSpace: 0, SizeInBits: 64)}, SrcOps: {})
1723 .addGlobalAddress(GV, Offset, TargetFlags: OpFlags | AArch64II::MO_PAGE);
1724 // Set the regclass on the dest reg too.
1725 MRI.setRegClass(Reg: ADRP.getReg(Idx: 0), RC: &AArch64::GPR64RegClass);
1726
1727 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1728 // by creating a MOVK that sets bits 48-63 of the register to (global address
1729 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1730 // prevent an incorrect tag being generated during relocation when the
1731 // global appears before the code section. Without the offset, a global at
1732 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1733 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1734 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1735 // instead of `0xf`.
1736 // This assumes that we're in the small code model so we can assume a binary
1737 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1738 // binary must also be loaded into address range [0, 2^48). Both of these
1739 // properties need to be ensured at runtime when using tagged addresses.
1740 if (OpFlags & AArch64II::MO_TAGGED) {
1741 assert(!Offset &&
1742 "Should not have folded in an offset for a tagged global!");
1743 ADRP = MIRBuilder.buildInstr(Opc: AArch64::MOVKXi, DstOps: {LLT::pointer(AddressSpace: 0, SizeInBits: 64)}, SrcOps: {ADRP})
1744 .addGlobalAddress(GV, Offset: 0x100000000,
1745 TargetFlags: AArch64II::MO_PREL | AArch64II::MO_G3)
1746 .addImm(Val: 48);
1747 MRI.setRegClass(Reg: ADRP.getReg(Idx: 0), RC: &AArch64::GPR64RegClass);
1748 }
1749
1750 MIRBuilder.buildInstr(Opc: AArch64::G_ADD_LOW, DstOps: {DstReg}, SrcOps: {ADRP})
1751 .addGlobalAddress(GV, Offset,
1752 TargetFlags: OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1753 MI.eraseFromParent();
1754 return true;
1755}
1756
1757bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
1758 MachineInstr &MI) const {
1759 MachineIRBuilder &MIB = Helper.MIRBuilder;
1760 MachineRegisterInfo &MRI = *MIB.getMRI();
1761
1762 auto LowerUnaryOp = [&MI, &MIB](unsigned Opcode) {
1763 MIB.buildInstr(Opc: Opcode, DstOps: {MI.getOperand(i: 0)}, SrcOps: {MI.getOperand(i: 2)});
1764 MI.eraseFromParent();
1765 return true;
1766 };
1767 auto LowerBinOp = [&MI, &MIB](unsigned Opcode) {
1768 MIB.buildInstr(Opc: Opcode, DstOps: {MI.getOperand(i: 0)},
1769 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3)});
1770 MI.eraseFromParent();
1771 return true;
1772 };
1773 auto LowerTriOp = [&MI, &MIB](unsigned Opcode) {
1774 MIB.buildInstr(Opc: Opcode, DstOps: {MI.getOperand(i: 0)},
1775 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3), MI.getOperand(i: 4)});
1776 MI.eraseFromParent();
1777 return true;
1778 };
1779
1780 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(Val&: MI).getIntrinsicID();
1781 switch (IntrinsicID) {
1782 case Intrinsic::vacopy: {
1783 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1784 unsigned VaListSize =
1785 (ST->isTargetDarwin() || ST->isTargetWindows())
1786 ? PtrSize
1787 : ST->isTargetILP32() ? 20 : 32;
1788
1789 MachineFunction &MF = *MI.getMF();
1790 auto Val = MF.getRegInfo().createGenericVirtualRegister(
1791 Ty: LLT::scalar(SizeInBits: VaListSize * 8));
1792 MIB.buildLoad(Res: Val, Addr: MI.getOperand(i: 2),
1793 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(),
1794 F: MachineMemOperand::MOLoad,
1795 Size: VaListSize, BaseAlignment: Align(PtrSize)));
1796 MIB.buildStore(Val, Addr: MI.getOperand(i: 1),
1797 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(),
1798 F: MachineMemOperand::MOStore,
1799 Size: VaListSize, BaseAlignment: Align(PtrSize)));
1800 MI.eraseFromParent();
1801 return true;
1802 }
1803 case Intrinsic::get_dynamic_area_offset: {
1804 MIB.buildConstant(Res: MI.getOperand(i: 0).getReg(), Val: 0);
1805 MI.eraseFromParent();
1806 return true;
1807 }
1808 case Intrinsic::aarch64_mops_memset_tag: {
1809 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1810 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1811 // the instruction).
1812 auto &Value = MI.getOperand(i: 3);
1813 Register ExtValueReg = MIB.buildAnyExt(Res: LLT::integer(SizeInBits: 64), Op: Value).getReg(Idx: 0);
1814 Value.setReg(ExtValueReg);
1815 return true;
1816 }
1817 case Intrinsic::aarch64_prefetch: {
1818 auto &AddrVal = MI.getOperand(i: 1);
1819
1820 int64_t IsWrite = MI.getOperand(i: 2).getImm();
1821 int64_t Target = MI.getOperand(i: 3).getImm();
1822 int64_t IsStream = MI.getOperand(i: 4).getImm();
1823 int64_t IsData = MI.getOperand(i: 5).getImm();
1824
1825 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1826 (!IsData << 3) | // IsDataCache bit
1827 (Target << 1) | // Cache level bits
1828 (unsigned)IsStream; // Stream bit
1829
1830 MIB.buildInstr(Opcode: AArch64::G_AARCH64_PREFETCH).addImm(Val: PrfOp).add(MO: AddrVal);
1831 MI.eraseFromParent();
1832 return true;
1833 }
1834 case Intrinsic::aarch64_range_prefetch: {
1835 auto &AddrVal = MI.getOperand(i: 1);
1836
1837 int64_t IsWrite = MI.getOperand(i: 2).getImm();
1838 int64_t IsStream = MI.getOperand(i: 3).getImm();
1839 unsigned PrfOp = (IsStream << 2) | IsWrite;
1840
1841 MIB.buildInstr(Opcode: AArch64::G_AARCH64_RANGE_PREFETCH)
1842 .addImm(Val: PrfOp)
1843 .add(MO: AddrVal)
1844 .addUse(RegNo: MI.getOperand(i: 4).getReg()); // Metadata
1845 MI.eraseFromParent();
1846 return true;
1847 }
1848 case Intrinsic::aarch64_prefetch_ir: {
1849 auto &AddrVal = MI.getOperand(i: 1);
1850 MIB.buildInstr(Opcode: AArch64::G_AARCH64_PREFETCH).addImm(Val: 24).add(MO: AddrVal);
1851 MI.eraseFromParent();
1852 return true;
1853 }
1854 case Intrinsic::aarch64_neon_uaddv:
1855 case Intrinsic::aarch64_neon_saddv:
1856 case Intrinsic::aarch64_neon_umaxv:
1857 case Intrinsic::aarch64_neon_smaxv:
1858 case Intrinsic::aarch64_neon_uminv:
1859 case Intrinsic::aarch64_neon_sminv: {
1860 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1861 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1862 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1863
1864 auto OldDst = MI.getOperand(i: 0).getReg();
1865 auto OldDstTy = MRI.getType(Reg: OldDst);
1866 LLT NewDstTy = MRI.getType(Reg: MI.getOperand(i: 2).getReg()).getElementType();
1867 if (OldDstTy == NewDstTy)
1868 return true;
1869
1870 auto NewDst = MRI.createGenericVirtualRegister(Ty: NewDstTy);
1871
1872 Helper.Observer.changingInstr(MI);
1873 MI.getOperand(i: 0).setReg(NewDst);
1874 Helper.Observer.changedInstr(MI);
1875
1876 MIB.setInsertPt(MBB&: MIB.getMBB(), II: ++MIB.getInsertPt());
1877 MIB.buildExtOrTrunc(ExtOpc: IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1878 Res: OldDst, Op: NewDst);
1879
1880 return true;
1881 }
1882 case Intrinsic::aarch64_neon_uaddlp:
1883 case Intrinsic::aarch64_neon_saddlp: {
1884 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1885 ? AArch64::G_UADDLP
1886 : AArch64::G_SADDLP;
1887 MIB.buildInstr(Opc, DstOps: {MI.getOperand(i: 0)}, SrcOps: {MI.getOperand(i: 2)});
1888 MI.eraseFromParent();
1889
1890 return true;
1891 }
1892 case Intrinsic::aarch64_neon_uaddlv:
1893 case Intrinsic::aarch64_neon_saddlv: {
1894 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1895 ? AArch64::G_UADDLV
1896 : AArch64::G_SADDLV;
1897 Register DstReg = MI.getOperand(i: 0).getReg();
1898 Register SrcReg = MI.getOperand(i: 2).getReg();
1899 LLT DstTy = MRI.getType(Reg: DstReg);
1900
1901 LLT MidTy, ExtTy;
1902 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1903 ExtTy = LLT::integer(SizeInBits: 32);
1904 MidTy = LLT::fixed_vector(NumElements: 4, ScalarTy: ExtTy);
1905 } else {
1906 ExtTy = LLT::integer(SizeInBits: 64);
1907 MidTy = LLT::fixed_vector(NumElements: 2, ScalarTy: ExtTy);
1908 }
1909
1910 Register MidReg =
1911 MIB.buildInstr(Opc, DstOps: {MidTy}, SrcOps: {SrcReg})->getOperand(i: 0).getReg();
1912 Register ZeroReg =
1913 MIB.buildConstant(Res: LLT::integer(SizeInBits: 64), Val: 0)->getOperand(i: 0).getReg();
1914 Register ExtReg = MIB.buildInstr(Opc: AArch64::G_EXTRACT_VECTOR_ELT, DstOps: {ExtTy},
1915 SrcOps: {MidReg, ZeroReg})
1916 .getReg(Idx: 0);
1917
1918 if (DstTy.getScalarSizeInBits() < 32)
1919 MIB.buildTrunc(Res: DstReg, Op: ExtReg);
1920 else
1921 MIB.buildCopy(Res: DstReg, Op: ExtReg);
1922
1923 MI.eraseFromParent();
1924
1925 return true;
1926 }
1927 case Intrinsic::aarch64_neon_smax:
1928 return LowerBinOp(TargetOpcode::G_SMAX);
1929 case Intrinsic::aarch64_neon_smin:
1930 return LowerBinOp(TargetOpcode::G_SMIN);
1931 case Intrinsic::aarch64_neon_umax:
1932 return LowerBinOp(TargetOpcode::G_UMAX);
1933 case Intrinsic::aarch64_neon_umin:
1934 return LowerBinOp(TargetOpcode::G_UMIN);
1935 case Intrinsic::aarch64_neon_fmax:
1936 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1937 case Intrinsic::aarch64_neon_fmin:
1938 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1939 case Intrinsic::aarch64_neon_fmaxnm:
1940 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1941 case Intrinsic::aarch64_neon_fminnm:
1942 return LowerBinOp(TargetOpcode::G_FMINNUM);
1943 case Intrinsic::aarch64_neon_pmul:
1944 return LowerBinOp(TargetOpcode::G_CLMUL);
1945 case Intrinsic::aarch64_neon_pmull:
1946 case Intrinsic::aarch64_neon_pmull64:
1947 return LowerBinOp(AArch64::G_PMULL);
1948 case Intrinsic::aarch64_neon_smull:
1949 return LowerBinOp(AArch64::G_SMULL);
1950 case Intrinsic::aarch64_neon_umull:
1951 return LowerBinOp(AArch64::G_UMULL);
1952 case Intrinsic::aarch64_neon_sabd:
1953 return LowerBinOp(TargetOpcode::G_ABDS);
1954 case Intrinsic::aarch64_neon_uabd:
1955 return LowerBinOp(TargetOpcode::G_ABDU);
1956 case Intrinsic::aarch64_neon_uhadd:
1957 return LowerBinOp(TargetOpcode::G_UAVGFLOOR);
1958 case Intrinsic::aarch64_neon_urhadd:
1959 return LowerBinOp(TargetOpcode::G_UAVGCEIL);
1960 case Intrinsic::aarch64_neon_shadd:
1961 return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
1962 case Intrinsic::aarch64_neon_srhadd:
1963 return LowerBinOp(TargetOpcode::G_SAVGCEIL);
1964 case Intrinsic::aarch64_neon_sqshrn: {
1965 if (!MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1966 return true;
1967 // Create right shift instruction. Store the output register in Shr.
1968 auto Shr = MIB.buildInstr(Opc: AArch64::G_VASHR,
1969 DstOps: {MRI.getType(Reg: MI.getOperand(i: 2).getReg())},
1970 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3).getImm()});
1971 // Build the narrow intrinsic, taking in Shr.
1972 MIB.buildInstr(Opc: TargetOpcode::G_TRUNC_SSAT_S, DstOps: {MI.getOperand(i: 0)}, SrcOps: {Shr});
1973 MI.eraseFromParent();
1974 return true;
1975 }
1976 case Intrinsic::aarch64_neon_sqshrun: {
1977 if (!MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1978 return true;
1979 // Create right shift instruction. Store the output register in Shr.
1980 auto Shr = MIB.buildInstr(Opc: AArch64::G_VASHR,
1981 DstOps: {MRI.getType(Reg: MI.getOperand(i: 2).getReg())},
1982 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3).getImm()});
1983 // Build the narrow intrinsic, taking in Shr.
1984 MIB.buildInstr(Opc: TargetOpcode::G_TRUNC_SSAT_U, DstOps: {MI.getOperand(i: 0)}, SrcOps: {Shr});
1985 MI.eraseFromParent();
1986 return true;
1987 }
1988 case Intrinsic::aarch64_neon_sqrshrn: {
1989 if (!MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1990 return true;
1991 // Create right shift instruction. Store the output register in Shr.
1992 auto Shr = MIB.buildInstr(Opc: AArch64::G_SRSHR_I,
1993 DstOps: {MRI.getType(Reg: MI.getOperand(i: 2).getReg())},
1994 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3).getImm()});
1995 // Build the narrow intrinsic, taking in Shr.
1996 MIB.buildInstr(Opc: TargetOpcode::G_TRUNC_SSAT_S, DstOps: {MI.getOperand(i: 0)}, SrcOps: {Shr});
1997 MI.eraseFromParent();
1998 return true;
1999 }
2000 case Intrinsic::aarch64_neon_sqrshrun: {
2001 if (!MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
2002 return true;
2003 // Create right shift instruction. Store the output register in Shr.
2004 auto Shr = MIB.buildInstr(Opc: AArch64::G_SRSHR_I,
2005 DstOps: {MRI.getType(Reg: MI.getOperand(i: 2).getReg())},
2006 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3).getImm()});
2007 // Build the narrow intrinsic, taking in Shr.
2008 MIB.buildInstr(Opc: TargetOpcode::G_TRUNC_SSAT_U, DstOps: {MI.getOperand(i: 0)}, SrcOps: {Shr});
2009 MI.eraseFromParent();
2010 return true;
2011 }
2012 case Intrinsic::aarch64_neon_uqrshrn: {
2013 if (!MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
2014 return true;
2015 // Create right shift instruction. Store the output register in Shr.
2016 auto Shr = MIB.buildInstr(Opc: AArch64::G_URSHR_I,
2017 DstOps: {MRI.getType(Reg: MI.getOperand(i: 2).getReg())},
2018 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3).getImm()});
2019 // Build the narrow intrinsic, taking in Shr.
2020 MIB.buildInstr(Opc: TargetOpcode::G_TRUNC_USAT_U, DstOps: {MI.getOperand(i: 0)}, SrcOps: {Shr});
2021 MI.eraseFromParent();
2022 return true;
2023 }
2024 case Intrinsic::aarch64_neon_uqshrn: {
2025 if (!MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
2026 return true;
2027 // Create right shift instruction. Store the output register in Shr.
2028 auto Shr = MIB.buildInstr(Opc: AArch64::G_VLSHR,
2029 DstOps: {MRI.getType(Reg: MI.getOperand(i: 2).getReg())},
2030 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3).getImm()});
2031 // Build the narrow intrinsic, taking in Shr.
2032 MIB.buildInstr(Opc: TargetOpcode::G_TRUNC_USAT_U, DstOps: {MI.getOperand(i: 0)}, SrcOps: {Shr});
2033 MI.eraseFromParent();
2034 return true;
2035 }
2036 case Intrinsic::aarch64_neon_sqshlu: {
2037 // Check if last operand is constant vector dup
2038 auto ShiftAmount = isConstantOrConstantSplatVector(
2039 MI&: *MRI.getVRegDef(Reg: MI.getOperand(i: 3).getReg()), MRI);
2040 if (ShiftAmount) {
2041 // If so, create a new intrinsic with the correct shift amount
2042 MIB.buildInstr(Opc: AArch64::G_SQSHLU_I, DstOps: {MI.getOperand(i: 0)},
2043 SrcOps: {MI.getOperand(i: 2)})
2044 .addImm(Val: ShiftAmount->getSExtValue());
2045 MI.eraseFromParent();
2046 return true;
2047 }
2048 return false;
2049 }
2050 case Intrinsic::aarch64_neon_vsli: {
2051 MIB.buildInstr(
2052 Opc: AArch64::G_SLI, DstOps: {MI.getOperand(i: 0)},
2053 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3), MI.getOperand(i: 4).getImm()});
2054 MI.eraseFromParent();
2055 break;
2056 }
2057 case Intrinsic::aarch64_neon_vsri: {
2058 MIB.buildInstr(
2059 Opc: AArch64::G_SRI, DstOps: {MI.getOperand(i: 0)},
2060 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3), MI.getOperand(i: 4).getImm()});
2061 MI.eraseFromParent();
2062 break;
2063 }
2064 case Intrinsic::aarch64_neon_abs: {
2065 // Lower the intrinsic to G_ABS.
2066 MIB.buildInstr(Opc: TargetOpcode::G_ABS, DstOps: {MI.getOperand(i: 0)}, SrcOps: {MI.getOperand(i: 2)});
2067 MI.eraseFromParent();
2068 return true;
2069 }
2070 case Intrinsic::aarch64_neon_sqadd: {
2071 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
2072 return LowerBinOp(TargetOpcode::G_SADDSAT);
2073 break;
2074 }
2075 case Intrinsic::aarch64_neon_sqsub: {
2076 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
2077 return LowerBinOp(TargetOpcode::G_SSUBSAT);
2078 break;
2079 }
2080 case Intrinsic::aarch64_neon_uqadd: {
2081 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
2082 return LowerBinOp(TargetOpcode::G_UADDSAT);
2083 break;
2084 }
2085 case Intrinsic::aarch64_neon_uqsub: {
2086 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
2087 return LowerBinOp(TargetOpcode::G_USUBSAT);
2088 break;
2089 }
2090 case Intrinsic::aarch64_neon_udot:
2091 return LowerTriOp(AArch64::G_UDOT);
2092 case Intrinsic::aarch64_neon_sdot:
2093 return LowerTriOp(AArch64::G_SDOT);
2094 case Intrinsic::aarch64_neon_usdot:
2095 return LowerTriOp(AArch64::G_USDOT);
2096 case Intrinsic::aarch64_neon_sqxtn:
2097 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
2098 case Intrinsic::aarch64_neon_sqxtun:
2099 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_U);
2100 case Intrinsic::aarch64_neon_uqxtn:
2101 return LowerUnaryOp(TargetOpcode::G_TRUNC_USAT_U);
2102 case Intrinsic::aarch64_neon_fcvtzu:
2103 return LowerUnaryOp(TargetOpcode::G_FPTOUI_SAT);
2104 case Intrinsic::aarch64_neon_fcvtzs:
2105 return LowerUnaryOp(TargetOpcode::G_FPTOSI_SAT);
2106 case Intrinsic::aarch64_neon_cls:
2107 return LowerUnaryOp(TargetOpcode::G_CTLS);
2108
2109 case Intrinsic::vector_reverse:
2110 // TODO: Add support for vector_reverse
2111 return false;
2112 }
2113
2114 return true;
2115}
2116
2117bool AArch64LegalizerInfo::legalizeShlAshrLshr(
2118 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
2119 GISelChangeObserver &Observer) const {
2120 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
2121 MI.getOpcode() == TargetOpcode::G_LSHR ||
2122 MI.getOpcode() == TargetOpcode::G_SHL);
2123 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
2124 // imported patterns can select it later. Either way, it will be legal.
2125 Register AmtReg = MI.getOperand(i: 2).getReg();
2126 LLT AmtRegEltTy = MRI.getType(Reg: AmtReg).getScalarType();
2127 auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: AmtReg, MRI);
2128 if (!VRegAndVal)
2129 return true;
2130 // Check the shift amount is in range for an immediate form.
2131 int64_t Amount = VRegAndVal->Value.getSExtValue();
2132 if (Amount > 31)
2133 return true; // This will have to remain a register variant.
2134 auto ExtCst =
2135 MIRBuilder.buildConstant(Res: AmtRegEltTy.changeElementSize(NewEltSize: 64), Val: Amount);
2136 Observer.changingInstr(MI);
2137 MI.getOperand(i: 2).setReg(ExtCst.getReg(Idx: 0));
2138 Observer.changedInstr(MI);
2139 return true;
2140}
2141
2142static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset,
2143 MachineRegisterInfo &MRI) {
2144 Base = Root;
2145 Offset = 0;
2146
2147 Register NewBase;
2148 int64_t NewOffset;
2149 if (mi_match(R: Root, MRI, P: m_GPtrAdd(L: m_Reg(R&: NewBase), R: m_ICst(Cst&: NewOffset))) &&
2150 isShiftedInt<7, 3>(x: NewOffset)) {
2151 Base = NewBase;
2152 Offset = NewOffset;
2153 }
2154}
2155
2156// FIXME: This should be removed and replaced with the generic bitcast legalize
2157// action.
2158bool AArch64LegalizerInfo::legalizeLoadStore(
2159 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
2160 GISelChangeObserver &Observer) const {
2161 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
2162 MI.getOpcode() == TargetOpcode::G_LOAD);
2163 // Here we just try to handle vector loads/stores where our value type might
2164 // have pointer elements, which the SelectionDAG importer can't handle. To
2165 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
2166 // the value to use s64 types.
2167
2168 // Custom legalization requires the instruction, if not deleted, must be fully
2169 // legalized. In order to allow further legalization of the inst, we create
2170 // a new instruction and erase the existing one.
2171
2172 Register ValReg = MI.getOperand(i: 0).getReg();
2173 const LLT ValTy = MRI.getType(Reg: ValReg);
2174
2175 if (ValTy == LLT::scalar(SizeInBits: 128)) {
2176
2177 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
2178 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2179 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
2180 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
2181 bool IsRcpC3 =
2182 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
2183
2184 LLT s64 = LLT::integer(SizeInBits: 64);
2185
2186 unsigned Opcode;
2187 if (IsRcpC3) {
2188 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
2189 } else {
2190 // For LSE2, loads/stores should have been converted to monotonic and had
2191 // a fence inserted after them.
2192 assert(Ordering == AtomicOrdering::Monotonic ||
2193 Ordering == AtomicOrdering::Unordered);
2194 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
2195
2196 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
2197 }
2198
2199 MachineInstrBuilder NewI;
2200 if (IsLoad) {
2201 NewI = MIRBuilder.buildInstr(Opc: Opcode, DstOps: {s64, s64}, SrcOps: {});
2202 MIRBuilder.buildMergeLikeInstr(
2203 Res: ValReg, Ops: {NewI->getOperand(i: 0), NewI->getOperand(i: 1)});
2204 } else {
2205 auto Split = MIRBuilder.buildUnmerge(Res: s64, Op: MI.getOperand(i: 0));
2206 NewI = MIRBuilder.buildInstr(
2207 Opc: Opcode, DstOps: {}, SrcOps: {Split->getOperand(i: 0), Split->getOperand(i: 1)});
2208 }
2209
2210 if (IsRcpC3) {
2211 NewI.addUse(RegNo: MI.getOperand(i: 1).getReg());
2212 } else {
2213 Register Base;
2214 int Offset;
2215 matchLDPSTPAddrMode(Root: MI.getOperand(i: 1).getReg(), Base, Offset, MRI);
2216 NewI.addUse(RegNo: Base);
2217 NewI.addImm(Val: Offset / 8);
2218 }
2219
2220 NewI.cloneMemRefs(OtherMI: MI);
2221 constrainSelectedInstRegOperands(I&: *NewI, TII: *ST->getInstrInfo(),
2222 TRI: *MRI.getTargetRegisterInfo(),
2223 RBI: *ST->getRegBankInfo());
2224 MI.eraseFromParent();
2225 return true;
2226 }
2227
2228 if (!ValTy.isPointerVector() ||
2229 ValTy.getElementType().getAddressSpace() != 0) {
2230 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
2231 return false;
2232 }
2233
2234 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
2235 const LLT NewTy = LLT::vector(EC: ValTy.getElementCount(), ScalarTy: LLT::integer(SizeInBits: PtrSize));
2236 auto &MMO = **MI.memoperands_begin();
2237 MMO.setType(NewTy);
2238
2239 if (MI.getOpcode() == TargetOpcode::G_STORE) {
2240 auto Bitcast = MIRBuilder.buildBitcast(Dst: NewTy, Src: ValReg);
2241 MIRBuilder.buildStore(Val: Bitcast.getReg(Idx: 0), Addr: MI.getOperand(i: 1), MMO);
2242 } else {
2243 auto NewLoad = MIRBuilder.buildLoad(Res: NewTy, Addr: MI.getOperand(i: 1), MMO);
2244 MIRBuilder.buildBitcast(Dst: ValReg, Src: NewLoad);
2245 }
2246 MI.eraseFromParent();
2247 return true;
2248}
2249
2250bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
2251 MachineRegisterInfo &MRI,
2252 MachineIRBuilder &MIRBuilder) const {
2253 MachineFunction &MF = MIRBuilder.getMF();
2254 Align Alignment(MI.getOperand(i: 2).getImm());
2255 Register Dst = MI.getOperand(i: 0).getReg();
2256 Register ListPtr = MI.getOperand(i: 1).getReg();
2257
2258 LLT PtrTy = MRI.getType(Reg: ListPtr);
2259 LLT IntPtrTy = LLT::scalar(SizeInBits: PtrTy.getSizeInBits());
2260
2261 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
2262 const Align PtrAlign = Align(PtrSize);
2263 auto List = MIRBuilder.buildLoad(
2264 Res: PtrTy, Addr: ListPtr,
2265 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOLoad,
2266 MemTy: PtrTy, base_alignment: PtrAlign));
2267
2268 MachineInstrBuilder DstPtr;
2269 if (Alignment > PtrAlign) {
2270 // Realign the list to the actual required alignment.
2271 auto AlignMinus1 =
2272 MIRBuilder.buildConstant(Res: IntPtrTy, Val: Alignment.value() - 1);
2273 auto ListTmp = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: List, Op1: AlignMinus1.getReg(Idx: 0));
2274 DstPtr = MIRBuilder.buildMaskLowPtrBits(Res: PtrTy, Op0: ListTmp, NumBits: Log2(A: Alignment));
2275 } else
2276 DstPtr = List;
2277
2278 LLT ValTy = MRI.getType(Reg: Dst);
2279 uint64_t ValSize = ValTy.getSizeInBits() / 8;
2280 MIRBuilder.buildLoad(
2281 Res: Dst, Addr: DstPtr,
2282 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOLoad,
2283 MemTy: ValTy, base_alignment: std::max(a: Alignment, b: PtrAlign)));
2284
2285 auto Size = MIRBuilder.buildConstant(Res: IntPtrTy, Val: alignTo(Size: ValSize, A: PtrAlign));
2286
2287 auto NewList = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: DstPtr, Op1: Size.getReg(Idx: 0));
2288
2289 MIRBuilder.buildStore(Val: NewList, Addr: ListPtr,
2290 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(),
2291 f: MachineMemOperand::MOStore,
2292 MemTy: PtrTy, base_alignment: PtrAlign));
2293
2294 MI.eraseFromParent();
2295 return true;
2296}
2297
2298bool AArch64LegalizerInfo::legalizeBitfieldExtract(
2299 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2300 // Only legal if we can select immediate forms.
2301 // TODO: Lower this otherwise.
2302 return getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: 2).getReg(), MRI) &&
2303 getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: 3).getReg(), MRI);
2304}
2305
2306bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
2307 MachineRegisterInfo &MRI,
2308 LegalizerHelper &Helper) const {
2309 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
2310 // it can be more efficiently lowered to the following sequence that uses
2311 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
2312 // registers are cheap.
2313 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
2314 // CNT V0.8B, V0.8B // 8xbyte pop-counts
2315 // ADDV B0, V0.8B // sum 8xbyte pop-counts
2316 // UMOV X0, V0.B[0] // copy byte result back to integer reg
2317 //
2318 // For 128 bit vector popcounts, we lower to the following sequence:
2319 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
2320 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
2321 // uaddlp.4s v0, v0 // v4s32, v2s64
2322 // uaddlp.2d v0, v0 // v2s64
2323 //
2324 // For 64 bit vector popcounts, we lower to the following sequence:
2325 // cnt.8b v0, v0 // v4s16, v2s32
2326 // uaddlp.4h v0, v0 // v4s16, v2s32
2327 // uaddlp.2s v0, v0 // v2s32
2328
2329 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2330 Register Dst = MI.getOperand(i: 0).getReg();
2331 Register Val = MI.getOperand(i: 1).getReg();
2332 LLT Ty = MRI.getType(Reg: Val);
2333
2334 LLT i64 = LLT::integer(SizeInBits: 64);
2335 LLT i32 = LLT::integer(SizeInBits: 32);
2336 LLT i16 = LLT::integer(SizeInBits: 16);
2337 LLT i8 = LLT::integer(SizeInBits: 8);
2338 unsigned Size = Ty.getSizeInBits();
2339
2340 assert(Ty == MRI.getType(Dst) &&
2341 "Expected src and dst to have the same type!");
2342
2343 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
2344
2345 auto Split = MIRBuilder.buildUnmerge(Res: i64, Op: Val);
2346 auto CTPOP1 = MIRBuilder.buildCTPOP(Dst: i64, Src0: Split->getOperand(i: 0));
2347 auto CTPOP2 = MIRBuilder.buildCTPOP(Dst: i64, Src0: Split->getOperand(i: 1));
2348 auto Add = MIRBuilder.buildAdd(Dst: i64, Src0: CTPOP1, Src1: CTPOP2);
2349
2350 MIRBuilder.buildZExt(Res: Dst, Op: Add);
2351 MI.eraseFromParent();
2352 return true;
2353 }
2354
2355 if (!ST->hasNEON() ||
2356 MI.getMF()->getFunction().hasFnAttribute(Kind: Attribute::NoImplicitFloat)) {
2357 // Use generic lowering when custom lowering is not possible.
2358 return Ty.isScalar() && (Size == 32 || Size == 64) &&
2359 Helper.lowerBitCount(MI) ==
2360 LegalizerHelper::LegalizeResult::Legalized;
2361 }
2362
2363 // Pre-conditioning: widen Val up to the nearest vector type.
2364 // s32,s64,v4s16,v2s32 -> v8i8
2365 // v8s16,v4s32,v2s64 -> v16i8
2366 LLT VTy = Size == 128 ? LLT::fixed_vector(NumElements: 16, ScalarTy: i8) : LLT::fixed_vector(NumElements: 8, ScalarTy: i8);
2367 if (Ty.isScalar()) {
2368 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
2369 if (Size == 32) {
2370 Val = MIRBuilder.buildZExt(Res: i64, Op: Val).getReg(Idx: 0);
2371 }
2372 }
2373 Val = MIRBuilder.buildBitcast(Dst: VTy, Src: Val).getReg(Idx: 0);
2374
2375 // Count bits in each byte-sized lane.
2376 auto CTPOP = MIRBuilder.buildCTPOP(Dst: VTy, Src0: Val);
2377
2378 // Sum across lanes.
2379 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2380 Ty.getScalarSizeInBits() != 16) {
2381 LLT Dt = Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: i64) ? LLT::fixed_vector(NumElements: 4, ScalarTy: i32) : Ty;
2382 auto Zeros = MIRBuilder.buildConstant(Res: Dt, Val: 0);
2383 auto Ones = MIRBuilder.buildConstant(Res: VTy, Val: 1);
2384 MachineInstrBuilder Sum;
2385
2386 if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: i64)) {
2387 auto UDOT =
2388 MIRBuilder.buildInstr(Opc: AArch64::G_UDOT, DstOps: {Dt}, SrcOps: {Zeros, Ones, CTPOP});
2389 Sum = MIRBuilder.buildInstr(Opc: AArch64::G_UADDLP, DstOps: {Ty}, SrcOps: {UDOT});
2390 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: i32)) {
2391 Sum = MIRBuilder.buildInstr(Opc: AArch64::G_UDOT, DstOps: {Dt}, SrcOps: {Zeros, Ones, CTPOP});
2392 } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: i32)) {
2393 Sum = MIRBuilder.buildInstr(Opc: AArch64::G_UDOT, DstOps: {Dt}, SrcOps: {Zeros, Ones, CTPOP});
2394 } else {
2395 llvm_unreachable("unexpected vector shape");
2396 }
2397
2398 Sum->getOperand(i: 0).setReg(Dst);
2399 MI.eraseFromParent();
2400 return true;
2401 }
2402
2403 Register HSum = CTPOP.getReg(Idx: 0);
2404 unsigned Opc;
2405 SmallVector<LLT> HAddTys;
2406 if (Ty.isScalar()) {
2407 Opc = Intrinsic::aarch64_neon_uaddlv;
2408 HAddTys.push_back(Elt: i32);
2409 } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: i16)) {
2410 Opc = Intrinsic::aarch64_neon_uaddlp;
2411 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 8, ScalarTy: i16));
2412 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: i32)) {
2413 Opc = Intrinsic::aarch64_neon_uaddlp;
2414 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 8, ScalarTy: i16));
2415 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 4, ScalarTy: i32));
2416 } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: i64)) {
2417 Opc = Intrinsic::aarch64_neon_uaddlp;
2418 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 8, ScalarTy: i16));
2419 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 4, ScalarTy: i32));
2420 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 2, ScalarTy: i64));
2421 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: i16)) {
2422 Opc = Intrinsic::aarch64_neon_uaddlp;
2423 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 4, ScalarTy: i16));
2424 } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: i32)) {
2425 Opc = Intrinsic::aarch64_neon_uaddlp;
2426 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 4, ScalarTy: i16));
2427 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 2, ScalarTy: i32));
2428 } else
2429 llvm_unreachable("unexpected vector shape");
2430 MachineInstrBuilder UADD;
2431 for (LLT HTy : HAddTys) {
2432 UADD = MIRBuilder.buildIntrinsic(ID: Opc, Res: {HTy}).addUse(RegNo: HSum);
2433 HSum = UADD.getReg(Idx: 0);
2434 }
2435
2436 // Post-conditioning.
2437 if (Ty.isScalar() && (Size == 64 || Size == 128))
2438 MIRBuilder.buildZExt(Res: Dst, Op: UADD);
2439 else
2440 UADD->getOperand(i: 0).setReg(Dst);
2441 MI.eraseFromParent();
2442 return true;
2443}
2444
2445bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2446 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2447 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2448 LLT i64 = LLT::integer(SizeInBits: 64);
2449 auto Addr = MI.getOperand(i: 1).getReg();
2450 auto DesiredI = MIRBuilder.buildUnmerge(Res: {i64, i64}, Op: MI.getOperand(i: 2));
2451 auto NewI = MIRBuilder.buildUnmerge(Res: {i64, i64}, Op: MI.getOperand(i: 3));
2452 auto DstLo = MRI.createGenericVirtualRegister(Ty: i64);
2453 auto DstHi = MRI.createGenericVirtualRegister(Ty: i64);
2454
2455 MachineInstrBuilder CAS;
2456 if (ST->hasLSE()) {
2457 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2458 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2459 // the rest of the MIR so we must reassemble the extracted registers into a
2460 // 128-bit known-regclass one with code like this:
2461 //
2462 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2463 // %out = CASP %in1, ...
2464 // %OldLo = G_EXTRACT %out, 0
2465 // %OldHi = G_EXTRACT %out, 64
2466 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2467 unsigned Opcode;
2468 switch (Ordering) {
2469 case AtomicOrdering::Acquire:
2470 Opcode = AArch64::CASPAX;
2471 break;
2472 case AtomicOrdering::Release:
2473 Opcode = AArch64::CASPLX;
2474 break;
2475 case AtomicOrdering::AcquireRelease:
2476 case AtomicOrdering::SequentiallyConsistent:
2477 Opcode = AArch64::CASPALX;
2478 break;
2479 default:
2480 Opcode = AArch64::CASPX;
2481 break;
2482 }
2483
2484 LLT s128 = LLT::scalar(SizeInBits: 128);
2485 auto CASDst = MRI.createGenericVirtualRegister(Ty: s128);
2486 auto CASDesired = MRI.createGenericVirtualRegister(Ty: s128);
2487 auto CASNew = MRI.createGenericVirtualRegister(Ty: s128);
2488 MIRBuilder.buildInstr(Opc: TargetOpcode::REG_SEQUENCE, DstOps: {CASDesired}, SrcOps: {})
2489 .addUse(RegNo: DesiredI->getOperand(i: 0).getReg())
2490 .addImm(Val: AArch64::sube64)
2491 .addUse(RegNo: DesiredI->getOperand(i: 1).getReg())
2492 .addImm(Val: AArch64::subo64);
2493 MIRBuilder.buildInstr(Opc: TargetOpcode::REG_SEQUENCE, DstOps: {CASNew}, SrcOps: {})
2494 .addUse(RegNo: NewI->getOperand(i: 0).getReg())
2495 .addImm(Val: AArch64::sube64)
2496 .addUse(RegNo: NewI->getOperand(i: 1).getReg())
2497 .addImm(Val: AArch64::subo64);
2498
2499 CAS = MIRBuilder.buildInstr(Opc: Opcode, DstOps: {CASDst}, SrcOps: {CASDesired, CASNew, Addr});
2500
2501 MIRBuilder.buildExtract(Res: {DstLo}, Src: {CASDst}, Index: 0);
2502 MIRBuilder.buildExtract(Res: {DstHi}, Src: {CASDst}, Index: 64);
2503 } else {
2504 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2505 // can take arbitrary registers so it just has the normal GPR64 operands the
2506 // rest of AArch64 is expecting.
2507 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2508 unsigned Opcode;
2509 switch (Ordering) {
2510 case AtomicOrdering::Acquire:
2511 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2512 break;
2513 case AtomicOrdering::Release:
2514 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2515 break;
2516 case AtomicOrdering::AcquireRelease:
2517 case AtomicOrdering::SequentiallyConsistent:
2518 Opcode = AArch64::CMP_SWAP_128;
2519 break;
2520 default:
2521 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2522 break;
2523 }
2524
2525 auto Scratch = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
2526 CAS = MIRBuilder.buildInstr(Opc: Opcode, DstOps: {DstLo, DstHi, Scratch},
2527 SrcOps: {Addr, DesiredI->getOperand(i: 0),
2528 DesiredI->getOperand(i: 1), NewI->getOperand(i: 0),
2529 NewI->getOperand(i: 1)});
2530 }
2531
2532 CAS.cloneMemRefs(OtherMI: MI);
2533 constrainSelectedInstRegOperands(I&: *CAS, TII: *ST->getInstrInfo(),
2534 TRI: *MRI.getTargetRegisterInfo(),
2535 RBI: *ST->getRegBankInfo());
2536
2537 MIRBuilder.buildMergeLikeInstr(Res: MI.getOperand(i: 0), Ops: {DstLo, DstHi});
2538 MI.eraseFromParent();
2539 return true;
2540}
2541
2542bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2543 LegalizerHelper &Helper) const {
2544 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2545 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2546 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
2547 auto BitReverse = MIRBuilder.buildBitReverse(Dst: Ty, Src: MI.getOperand(i: 1));
2548 MIRBuilder.buildCTLZ(Dst: MI.getOperand(i: 0).getReg(), Src0: BitReverse);
2549 MI.eraseFromParent();
2550 return true;
2551}
2552
2553bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2554 LegalizerHelper &Helper) const {
2555 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2556
2557 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2558 if (MI.getOpcode() == TargetOpcode::G_MEMSET ||
2559 MI.getOpcode() == TargetOpcode::G_MEMSET_INLINE) {
2560 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2561 // the instruction).
2562 auto &Value = MI.getOperand(i: 1);
2563 Register ExtValueReg =
2564 MIRBuilder.buildAnyExt(Res: LLT::integer(SizeInBits: 64), Op: Value).getReg(Idx: 0);
2565 Value.setReg(ExtValueReg);
2566 return true;
2567 }
2568
2569 return false;
2570}
2571
2572bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2573 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2574 const GExtractVectorElement *Element = cast<GExtractVectorElement>(Val: &MI);
2575 auto VRegAndVal =
2576 getIConstantVRegValWithLookThrough(VReg: Element->getIndexReg(), MRI);
2577 if (VRegAndVal)
2578 return true;
2579 LLT VecTy = MRI.getType(Reg: Element->getVectorReg());
2580 if (VecTy.isScalableVector())
2581 return true;
2582 return Helper.lowerExtractInsertVectorElt(MI) !=
2583 LegalizerHelper::LegalizeResult::UnableToLegalize;
2584}
2585
2586bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2587 MachineInstr &MI, LegalizerHelper &Helper) const {
2588 MachineFunction &MF = *MI.getParent()->getParent();
2589 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2590 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2591
2592 // If stack probing is not enabled for this function, use the default
2593 // lowering.
2594 if (!MF.getFunction().hasFnAttribute(Kind: "probe-stack") ||
2595 MF.getFunction().getFnAttribute(Kind: "probe-stack").getValueAsString() !=
2596 "inline-asm") {
2597 Helper.lowerDynStackAlloc(MI);
2598 return true;
2599 }
2600
2601 Register Dst = MI.getOperand(i: 0).getReg();
2602 Register AllocSize = MI.getOperand(i: 1).getReg();
2603 Align Alignment = assumeAligned(Value: MI.getOperand(i: 2).getImm());
2604
2605 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2606 "Unexpected type for dynamic alloca");
2607 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2608 "Unexpected type for dynamic alloca");
2609
2610 LLT PtrTy = MRI.getType(Reg: Dst);
2611 Register SPReg =
2612 Helper.getTargetLowering().getStackPointerRegisterToSaveRestore();
2613 Register SPTmp =
2614 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2615 auto NewMI =
2616 MIRBuilder.buildInstr(Opc: AArch64::PROBED_STACKALLOC_DYN, DstOps: {}, SrcOps: {SPTmp});
2617 MRI.setRegClass(Reg: NewMI.getReg(Idx: 0), RC: &AArch64::GPR64commonRegClass);
2618 MIRBuilder.setInsertPt(MBB&: *NewMI->getParent(), II: NewMI);
2619 MIRBuilder.buildCopy(Res: Dst, Op: SPTmp);
2620
2621 MI.eraseFromParent();
2622 return true;
2623}
2624
2625bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2626 LegalizerHelper &Helper) const {
2627 MachineIRBuilder &MIB = Helper.MIRBuilder;
2628 auto &AddrVal = MI.getOperand(i: 0);
2629
2630 int64_t IsWrite = MI.getOperand(i: 1).getImm();
2631 int64_t Locality = MI.getOperand(i: 2).getImm();
2632 int64_t IsData = MI.getOperand(i: 3).getImm();
2633
2634 bool IsStream = Locality == 0;
2635 if (Locality != 0) {
2636 assert(Locality <= 3 && "Prefetch locality out-of-range");
2637 // The locality degree is the opposite of the cache speed.
2638 // Put the number the other way around.
2639 // The encoding starts at 0 for level 1
2640 Locality = 3 - Locality;
2641 }
2642
2643 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2644
2645 MIB.buildInstr(Opcode: AArch64::G_AARCH64_PREFETCH).addImm(Val: PrfOp).add(MO: AddrVal);
2646 MI.eraseFromParent();
2647 return true;
2648}
2649
2650bool AArch64LegalizerInfo::legalizeConcatVectors(
2651 MachineInstr &MI, MachineRegisterInfo &MRI,
2652 MachineIRBuilder &MIRBuilder) const {
2653 // Widen sub-byte element vectors to byte-sized elements before concatenating.
2654 // This is analogous to SDAG's integer type promotion for sub-byte types.
2655 auto &Concat = cast<GConcatVectors>(Val&: MI);
2656 Register DstReg = Concat.getReg(Idx: 0);
2657 LLT DstTy = MRI.getType(Reg: DstReg);
2658 assert(DstTy.getScalarSizeInBits() < 8 && "Expected dst ty to be < 8b");
2659
2660 unsigned WideEltSize =
2661 std::max(a: 8u, b: (unsigned)PowerOf2Ceil(A: DstTy.getScalarSizeInBits()));
2662 LLT SrcTy = MRI.getType(Reg: Concat.getSourceReg(I: 0));
2663 LLT WideSrcTy = SrcTy.changeElementSize(NewEltSize: WideEltSize);
2664 LLT WideDstTy = DstTy.changeElementSize(NewEltSize: WideEltSize);
2665
2666 SmallVector<Register> WideSrcs;
2667 for (unsigned I = 0; I < Concat.getNumSources(); ++I) {
2668 auto Wide = MIRBuilder.buildAnyExt(Res: WideSrcTy, Op: Concat.getSourceReg(I));
2669 WideSrcs.push_back(Elt: Wide.getReg(Idx: 0));
2670 }
2671
2672 auto WideConcat = MIRBuilder.buildConcatVectors(Res: WideDstTy, Ops: WideSrcs);
2673 MIRBuilder.buildTrunc(Res: DstReg, Op: WideConcat);
2674 MI.eraseFromParent();
2675 return true;
2676}
2677
2678bool AArch64LegalizerInfo::legalizeFptrunc(MachineInstr &MI,
2679 MachineIRBuilder &MIRBuilder,
2680 MachineRegisterInfo &MRI) const {
2681 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
2682
2683 // This function legalizes f64 -> bf16 and f64 -> f16 truncations via f64 ->
2684 // f32 G_FPTRUNC_ODD and f32 -> [b]f16 G_FPTRUNC, which apparently avoids the
2685 // usual double-rounding issue that could be present from using twin
2686 // G_FPTRUNC.
2687
2688 if (DstTy.isBFloat16() && SrcTy.isFloat64()) {
2689 auto Mid =
2690 MIRBuilder.buildInstr(Opc: AArch64::G_FPTRUNC_ODD, DstOps: {LLT::float32()}, SrcOps: {Src});
2691 MIRBuilder.buildInstr(Opc: AArch64::G_FPTRUNC, DstOps: {Dst}, SrcOps: {Mid});
2692 MI.eraseFromParent();
2693 return true;
2694 }
2695
2696 assert(SrcTy.isFixedVector() && isPowerOf2_32(SrcTy.getNumElements()) &&
2697 "Expected a power of 2 elements");
2698
2699 // We must mutate types here as FPTrunc may be used on a IEEE floating point
2700 // or a brainfloat.
2701 LLT v2s16 = DstTy.changeElementCount(NumElements: 2);
2702 LLT v4s16 = DstTy.changeElementCount(NumElements: 4);
2703 LLT v2s32 = SrcTy.changeElementCount(NumElements: 2).changeElementSize(NewEltSize: 32);
2704 LLT v4s32 = SrcTy.changeElementCount(NumElements: 4).changeElementSize(NewEltSize: 32);
2705 LLT v2s64 = SrcTy.changeElementCount(NumElements: 2);
2706
2707 SmallVector<Register> RegsToUnmergeTo;
2708 SmallVector<Register> TruncOddDstRegs;
2709 SmallVector<Register> RegsToMerge;
2710
2711 unsigned ElemCount = SrcTy.getNumElements();
2712
2713 // Find the biggest size chunks we can work with
2714 int StepSize = ElemCount % 4 ? 2 : 4;
2715
2716 // If we have a power of 2 greater than 2, we need to first unmerge into
2717 // enough pieces
2718 if (ElemCount <= 2)
2719 RegsToUnmergeTo.push_back(Elt: Src);
2720 else {
2721 for (unsigned i = 0; i < ElemCount / 2; ++i)
2722 RegsToUnmergeTo.push_back(Elt: MRI.createGenericVirtualRegister(Ty: v2s64));
2723
2724 MIRBuilder.buildUnmerge(Res: RegsToUnmergeTo, Op: Src);
2725 }
2726
2727 // Create all of the round-to-odd instructions and store them
2728 for (auto SrcReg : RegsToUnmergeTo) {
2729 Register Mid =
2730 MIRBuilder.buildInstr(Opc: AArch64::G_FPTRUNC_ODD, DstOps: {v2s32}, SrcOps: {SrcReg})
2731 .getReg(Idx: 0);
2732 TruncOddDstRegs.push_back(Elt: Mid);
2733 }
2734
2735 // Truncate 4s32 to 4s16 if we can to reduce instruction count, otherwise
2736 // truncate 2s32 to 2s16.
2737 unsigned Index = 0;
2738 for (unsigned LoopIter = 0; LoopIter < ElemCount / StepSize; ++LoopIter) {
2739 if (StepSize == 4) {
2740 Register ConcatDst =
2741 MIRBuilder
2742 .buildMergeLikeInstr(
2743 Res: {v4s32}, Ops: {TruncOddDstRegs[Index++], TruncOddDstRegs[Index++]})
2744 .getReg(Idx: 0);
2745
2746 RegsToMerge.push_back(
2747 Elt: MIRBuilder.buildFPTrunc(Res: v4s16, Op: ConcatDst).getReg(Idx: 0));
2748 } else {
2749 RegsToMerge.push_back(
2750 Elt: MIRBuilder.buildFPTrunc(Res: v2s16, Op: TruncOddDstRegs[Index++]).getReg(Idx: 0));
2751 }
2752 }
2753
2754 // If there is only one register, replace the destination
2755 if (RegsToMerge.size() == 1) {
2756 MRI.replaceRegWith(FromReg: Dst, ToReg: RegsToMerge.pop_back_val());
2757 MI.eraseFromParent();
2758 return true;
2759 }
2760
2761 // Merge the rest of the instructions & replace the register
2762 Register Fin = MIRBuilder.buildMergeLikeInstr(Res: DstTy, Ops: RegsToMerge).getReg(Idx: 0);
2763 MRI.replaceRegWith(FromReg: Dst, ToReg: Fin);
2764 MI.eraseFromParent();
2765 return true;
2766}
2767