1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64LegalizerInfo.h"
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
18#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
19#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
20#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
21#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
22#include "llvm/CodeGen/GlobalISel/Utils.h"
23#include "llvm/CodeGen/MachineInstr.h"
24#include "llvm/CodeGen/MachineInstrBuilder.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/TargetOpcodes.h"
27#include "llvm/IR/DerivedTypes.h"
28#include "llvm/IR/Intrinsics.h"
29#include "llvm/IR/IntrinsicsAArch64.h"
30#include "llvm/IR/Type.h"
31#include "llvm/Support/MathExtras.h"
32#include <initializer_list>
33
34#define DEBUG_TYPE "aarch64-legalinfo"
35
36using namespace llvm;
37using namespace LegalizeActions;
38using namespace LegalizeMutations;
39using namespace LegalityPredicates;
40using namespace MIPatternMatch;
41
42AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
43 : ST(&ST) {
44 using namespace TargetOpcode;
45 const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64);
46 const LLT s8 = LLT::scalar(SizeInBits: 8);
47 const LLT s16 = LLT::scalar(SizeInBits: 16);
48 const LLT s32 = LLT::scalar(SizeInBits: 32);
49 const LLT s64 = LLT::scalar(SizeInBits: 64);
50 const LLT s128 = LLT::scalar(SizeInBits: 128);
51 const LLT v16s8 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8);
52 const LLT v8s8 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8);
53 const LLT v4s8 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 8);
54 const LLT v2s8 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 8);
55 const LLT v8s16 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16);
56 const LLT v4s16 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16);
57 const LLT v2s16 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16);
58 const LLT v2s32 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32);
59 const LLT v4s32 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32);
60 const LLT v2s64 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64);
61 const LLT v2p0 = LLT::fixed_vector(NumElements: 2, ScalarTy: p0);
62
63 const LLT nxv16s8 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: s8);
64 const LLT nxv8s16 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s16);
65 const LLT nxv4s32 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s32);
66 const LLT nxv2s64 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s64);
67
68 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
69 v16s8, v8s16, v4s32,
70 v2s64, v2p0,
71 /* End 128bit types */
72 /* Begin 64bit types */
73 v8s8, v4s16, v2s32};
74 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
75 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
76 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
77
78 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
79
80 // FIXME: support subtargets which have neon/fp-armv8 disabled.
81 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
82 getLegacyLegalizerInfo().computeTables();
83 return;
84 }
85
86 // Some instructions only support s16 if the subtarget has full 16-bit FP
87 // support.
88 const bool HasFP16 = ST.hasFullFP16();
89 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
90
91 const bool HasCSSC = ST.hasCSSC();
92 const bool HasRCPC3 = ST.hasRCPC3();
93 const bool HasSVE = ST.hasSVE();
94
95 getActionDefinitionsBuilder(
96 Opcodes: {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
97 .legalFor(Types: {p0, s8, s16, s32, s64})
98 .legalFor(Types: {v2s8, v4s8, v8s8, v16s8, v2s16, v4s16, v8s16, v2s32, v4s32,
99 v2s64, v2p0})
100 .widenScalarToNextPow2(TypeIdx: 0)
101 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64)
102 .moreElementsToNextPow2(TypeIdx: 0)
103 .widenVectorEltsToVectorMinSize(TypeIdx: 0, VectorSize: 64)
104 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
105 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
106 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
107 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
108 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2)
109 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
110
111 getActionDefinitionsBuilder(Opcode: G_PHI)
112 .legalFor(Types: {p0, s16, s32, s64})
113 .legalFor(Types: PackedVectorAllTypeList)
114 .widenScalarToNextPow2(TypeIdx: 0)
115 .moreElementsToNextPow2(TypeIdx: 0)
116 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
117 .clampScalar(TypeIdx: 0, MinTy: s16, MaxTy: s64)
118 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
119 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
120 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
121 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
122 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2);
123
124 getActionDefinitionsBuilder(Opcode: G_INSERT)
125 .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s32, s64, p0}), P1: typeInSet(TypeIdx: 1, TypesInit: {s8, s16, s32}),
126 args: smallerThan(TypeIdx0: 1, TypeIdx1: 0)))
127 .widenScalarToNextPow2(TypeIdx: 0)
128 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
129 .widenScalarToNextPow2(TypeIdx: 1)
130 .minScalar(TypeIdx: 1, Ty: s8)
131 .maxScalarIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {s32}), TypeIdx: 1, Ty: s16)
132 .maxScalarIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {s64, p0}), TypeIdx: 1, Ty: s32);
133
134 getActionDefinitionsBuilder(Opcode: G_EXTRACT)
135 .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s16, s32, s64, p0}),
136 P1: typeInSet(TypeIdx: 1, TypesInit: {s32, s64, s128, p0}), args: smallerThan(TypeIdx0: 0, TypeIdx1: 1)))
137 .widenScalarToNextPow2(TypeIdx: 1)
138 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s128)
139 .widenScalarToNextPow2(TypeIdx: 0)
140 .minScalar(TypeIdx: 0, Ty: s16)
141 .maxScalarIf(Predicate: typeInSet(TypeIdx: 1, TypesInit: {s32}), TypeIdx: 0, Ty: s16)
142 .maxScalarIf(Predicate: typeInSet(TypeIdx: 1, TypesInit: {s64, p0}), TypeIdx: 0, Ty: s32)
143 .maxScalarIf(Predicate: typeInSet(TypeIdx: 1, TypesInit: {s128}), TypeIdx: 0, Ty: s64);
144
145 getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB, G_AND, G_OR, G_XOR})
146 .legalFor(Types: {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
147 .legalFor(Pred: HasSVE, Types: {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
148 .widenScalarToNextPow2(TypeIdx: 0)
149 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
150 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 16)
151 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 8)
152 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
153 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
154 .minScalarOrEltIf(
155 Predicate: [=](const LegalityQuery &Query) {
156 return Query.Types[0].getNumElements() <= 2;
157 },
158 TypeIdx: 0, Ty: s32)
159 .minScalarOrEltIf(
160 Predicate: [=](const LegalityQuery &Query) {
161 return Query.Types[0].getNumElements() <= 4;
162 },
163 TypeIdx: 0, Ty: s16)
164 .minScalarOrEltIf(
165 Predicate: [=](const LegalityQuery &Query) {
166 return Query.Types[0].getNumElements() <= 16;
167 },
168 TypeIdx: 0, Ty: s8)
169 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
170 .moreElementsToNextPow2(TypeIdx: 0);
171
172 getActionDefinitionsBuilder(Opcode: G_MUL)
173 .legalFor(Types: {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
174 .widenScalarToNextPow2(TypeIdx: 0)
175 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
176 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 16)
177 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 8)
178 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
179 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
180 .minScalarOrEltIf(
181 Predicate: [=](const LegalityQuery &Query) {
182 return Query.Types[0].getNumElements() <= 2;
183 },
184 TypeIdx: 0, Ty: s32)
185 .minScalarOrEltIf(
186 Predicate: [=](const LegalityQuery &Query) {
187 return Query.Types[0].getNumElements() <= 4;
188 },
189 TypeIdx: 0, Ty: s16)
190 .minScalarOrEltIf(
191 Predicate: [=](const LegalityQuery &Query) {
192 return Query.Types[0].getNumElements() <= 16;
193 },
194 TypeIdx: 0, Ty: s8)
195 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
196 .moreElementsToNextPow2(TypeIdx: 0);
197
198 getActionDefinitionsBuilder(Opcodes: {G_SHL, G_ASHR, G_LSHR})
199 .customIf(Predicate: [=](const LegalityQuery &Query) {
200 const auto &SrcTy = Query.Types[0];
201 const auto &AmtTy = Query.Types[1];
202 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
203 AmtTy.getSizeInBits() == 32;
204 })
205 .legalFor(Types: {
206 {s32, s32},
207 {s32, s64},
208 {s64, s64},
209 {v8s8, v8s8},
210 {v16s8, v16s8},
211 {v4s16, v4s16},
212 {v8s16, v8s16},
213 {v2s32, v2s32},
214 {v4s32, v4s32},
215 {v2s64, v2s64},
216 })
217 .widenScalarToNextPow2(TypeIdx: 0)
218 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
219 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
220 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
221 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
222 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
223 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
224 .moreElementsToNextPow2(TypeIdx: 0)
225 .minScalarSameAs(TypeIdx: 1, LargeTypeIdx: 0)
226 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
227 .minScalarEltSameAsIf(Predicate: isVector(TypeIdx: 0), TypeIdx: 1, LargeTypeIdx: 0)
228 .maxScalarEltSameAsIf(Predicate: isVector(TypeIdx: 0), TypeIdx: 1, SmallTypeIdx: 0);
229
230 getActionDefinitionsBuilder(Opcode: G_PTR_ADD)
231 .legalFor(Types: {{p0, s64}, {v2p0, v2s64}})
232 .clampScalarOrElt(TypeIdx: 1, MinTy: s64, MaxTy: s64)
233 .clampNumElements(TypeIdx: 0, MinTy: v2p0, MaxTy: v2p0);
234
235 getActionDefinitionsBuilder(Opcode: G_PTRMASK).legalFor(Types: {{p0, s64}});
236
237 getActionDefinitionsBuilder(Opcodes: {G_SDIV, G_UDIV})
238 .legalFor(Types: {s32, s64})
239 .libcallFor(Types: {s128})
240 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
241 .widenScalarToNextPow2(TypeIdx: 0)
242 .scalarize(TypeIdx: 0);
243
244 getActionDefinitionsBuilder(Opcodes: {G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
245 .lowerFor(Types: {s8, s16, s32, s64, v2s32, v4s32, v2s64})
246 .libcallFor(Types: {s128})
247 .widenScalarOrEltToNextPow2(TypeIdx: 0)
248 .minScalarOrElt(TypeIdx: 0, Ty: s32)
249 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
250 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
251 .scalarize(TypeIdx: 0);
252
253 getActionDefinitionsBuilder(Opcodes: {G_SMULO, G_UMULO})
254 .widenScalarToNextPow2(TypeIdx: 0, /*Min = */ MinSize: 32)
255 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
256 .lower();
257
258 getActionDefinitionsBuilder(Opcodes: {G_SMULH, G_UMULH})
259 .legalFor(Types: {s64, v16s8, v8s16, v4s32})
260 .lower();
261
262 getActionDefinitionsBuilder(Opcodes: {G_SMIN, G_SMAX, G_UMIN, G_UMAX})
263 .legalFor(Types: {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
264 .legalFor(Pred: HasCSSC, Types: {s32, s64})
265 .minScalar(Pred: HasCSSC, TypeIdx: 0, Ty: s32)
266 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
267 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
268 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
269 .lower();
270
271 // FIXME: Legal vector types are only legal with NEON.
272 getActionDefinitionsBuilder(Opcode: G_ABS)
273 .legalFor(Pred: HasCSSC, Types: {s32, s64})
274 .legalFor(Types: PackedVectorAllTypeList)
275 .customIf(Predicate: [=](const LegalityQuery &Q) {
276 // TODO: Fix suboptimal codegen for 128+ bit types.
277 LLT SrcTy = Q.Types[0];
278 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
279 })
280 .widenScalarIf(
281 Predicate: [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
282 Mutation: [=](const LegalityQuery &Query) { return std::make_pair(x: 0, y: v4s16); })
283 .widenScalarIf(
284 Predicate: [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
285 Mutation: [=](const LegalityQuery &Query) { return std::make_pair(x: 0, y: v2s32); })
286 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
287 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
288 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
289 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
290 .moreElementsToNextPow2(TypeIdx: 0)
291 .lower();
292
293 getActionDefinitionsBuilder(
294 Opcodes: {G_ABDS, G_ABDU, G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL})
295 .legalFor(Types: {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
296 .lower();
297
298 getActionDefinitionsBuilder(
299 Opcodes: {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
300 .legalFor(Types: {{s32, s32}, {s64, s32}})
301 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
302 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
303 .widenScalarToNextPow2(TypeIdx: 0);
304
305 getActionDefinitionsBuilder(Opcodes: {G_FSHL, G_FSHR})
306 .customFor(Types: {{s32, s32}, {s32, s64}, {s64, s64}})
307 .lower();
308
309 getActionDefinitionsBuilder(Opcode: G_ROTR)
310 .legalFor(Types: {{s32, s64}, {s64, s64}})
311 .customIf(Predicate: [=](const LegalityQuery &Q) {
312 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
313 })
314 .lower();
315 getActionDefinitionsBuilder(Opcode: G_ROTL).lower();
316
317 getActionDefinitionsBuilder(Opcodes: {G_SBFX, G_UBFX})
318 .customFor(Types: {{s32, s32}, {s64, s64}});
319
320 auto always = [=](const LegalityQuery &Q) { return true; };
321 getActionDefinitionsBuilder(Opcode: G_CTPOP)
322 .legalFor(Pred: HasCSSC, Types: {{s32, s32}, {s64, s64}})
323 .legalFor(Types: {{v8s8, v8s8}, {v16s8, v16s8}})
324 .customFor(Pred: !HasCSSC, Types: {{s32, s32}, {s64, s64}})
325 .customFor(Types: {{s128, s128},
326 {v4s16, v4s16},
327 {v8s16, v8s16},
328 {v2s32, v2s32},
329 {v4s32, v4s32},
330 {v2s64, v2s64}})
331 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s128)
332 .widenScalarToNextPow2(TypeIdx: 0)
333 .minScalarEltSameAsIf(Predicate: always, TypeIdx: 1, LargeTypeIdx: 0)
334 .maxScalarEltSameAsIf(Predicate: always, TypeIdx: 1, SmallTypeIdx: 0)
335 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
336 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
337 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
338 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
339 .moreElementsToNextPow2(TypeIdx: 0)
340 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
341
342 getActionDefinitionsBuilder(Opcodes: {G_CTLZ, G_CTLS})
343 .legalFor(Types: {{s32, s32},
344 {s64, s64},
345 {v8s8, v8s8},
346 {v16s8, v16s8},
347 {v4s16, v4s16},
348 {v8s16, v8s16},
349 {v2s32, v2s32},
350 {v4s32, v4s32}})
351 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 32)
352 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
353 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
354 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
355 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
356 .moreElementsToNextPow2(TypeIdx: 0)
357 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 32), TypeIdx: 0)
358 .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1);
359
360 getActionDefinitionsBuilder(Opcode: G_CTLZ_ZERO_UNDEF).lower();
361
362 getActionDefinitionsBuilder(Opcode: G_CTTZ)
363 .lowerIf(Predicate: isVector(TypeIdx: 0))
364 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 32)
365 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
366 .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1)
367 .legalFor(Pred: HasCSSC, Types: {s32, s64})
368 .customFor(Pred: !HasCSSC, Types: {s32, s64});
369
370 getActionDefinitionsBuilder(Opcode: G_CTTZ_ZERO_UNDEF).lower();
371
372 getActionDefinitionsBuilder(Opcode: G_BITREVERSE)
373 .legalFor(Types: {s32, s64, v8s8, v16s8})
374 .widenScalarToNextPow2(TypeIdx: 0, /*Min = */ MinSize: 32)
375 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 0, MinSize: 8)
376 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
377 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
378 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
379 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
380 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
381 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
382 .moreElementsToNextPow2(TypeIdx: 0)
383 .lower();
384
385 getActionDefinitionsBuilder(Opcode: G_BSWAP)
386 .legalFor(Types: {s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
387 .widenScalarOrEltToNextPow2(TypeIdx: 0, MinSize: 16)
388 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
389 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
390 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
391 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
392 .moreElementsToNextPow2(TypeIdx: 0);
393
394 getActionDefinitionsBuilder(Opcodes: {G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
395 .legalFor(Types: {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
396 .legalFor(Pred: HasSVE, Types: {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
397 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
398 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
399 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
400 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
401 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
402 .moreElementsToNextPow2(TypeIdx: 0)
403 .lower();
404
405 getActionDefinitionsBuilder(
406 Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
407 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
408 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
409 .legalFor(Types: {s32, s64, v2s32, v4s32, v2s64})
410 .legalFor(Pred: HasFP16, Types: {s16, v4s16, v8s16})
411 .libcallFor(Types: {s128})
412 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
413 .minScalarOrElt(TypeIdx: 0, Ty: MinFPScalar)
414 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
415 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
416 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
417 .moreElementsToNextPow2(TypeIdx: 0);
418
419 getActionDefinitionsBuilder(Opcodes: {G_FABS, G_FNEG})
420 .legalFor(Types: {s32, s64, v2s32, v4s32, v2s64})
421 .legalFor(Pred: HasFP16, Types: {s16, v4s16, v8s16})
422 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
423 .lowerIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64))
424 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
425 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
426 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
427 .moreElementsToNextPow2(TypeIdx: 0)
428 .lowerFor(Types: {s16, v4s16, v8s16});
429
430 getActionDefinitionsBuilder(Opcode: G_FREM)
431 .libcallFor(Types: {s32, s64, s128})
432 .minScalar(TypeIdx: 0, Ty: s32)
433 .scalarize(TypeIdx: 0);
434
435 getActionDefinitionsBuilder(Opcodes: {G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
436 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
437 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
438 G_FSINH, G_FTANH, G_FMODF})
439 // We need a call for these, so we always need to scalarize.
440 .scalarize(TypeIdx: 0)
441 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
442 .minScalar(TypeIdx: 0, Ty: s32)
443 .libcallFor(Types: {s32, s64, s128});
444 getActionDefinitionsBuilder(Opcodes: {G_FPOWI, G_FLDEXP})
445 .scalarize(TypeIdx: 0)
446 .minScalar(TypeIdx: 0, Ty: s32)
447 .libcallFor(Types: {{s32, s32}, {s64, s32}, {s128, s32}});
448
449 getActionDefinitionsBuilder(Opcodes: {G_LROUND, G_INTRINSIC_LRINT})
450 .legalFor(Types: {{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}})
451 .legalFor(Pred: HasFP16, Types: {{s32, s16}, {s64, s16}})
452 .minScalar(TypeIdx: 1, Ty: s32)
453 .libcallFor(Types: {{s64, s128}})
454 .lower();
455 getActionDefinitionsBuilder(Opcodes: {G_LLROUND, G_INTRINSIC_LLRINT})
456 .legalFor(Types: {{s64, s32}, {s64, s64}})
457 .legalFor(Pred: HasFP16, Types: {{s64, s16}})
458 .minScalar(TypeIdx: 0, Ty: s64)
459 .minScalar(TypeIdx: 1, Ty: s32)
460 .libcallFor(Types: {{s64, s128}})
461 .lower();
462
463 // TODO: Custom legalization for mismatched types.
464 getActionDefinitionsBuilder(Opcode: G_FCOPYSIGN)
465 .moreElementsIf(
466 Predicate: [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
467 Mutation: [=](const LegalityQuery &Query) {
468 const LLT Ty = Query.Types[0];
469 return std::pair(0, LLT::fixed_vector(NumElements: Ty == s16 ? 4 : 2, ScalarTy: Ty));
470 })
471 .lower();
472
473 getActionDefinitionsBuilder(Opcode: G_FMAD).lower();
474
475 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
476 auto &Actions = getActionDefinitionsBuilder(Opcode: Op);
477
478 if (Op == G_SEXTLOAD)
479 Actions.lowerIf(Predicate: atomicOrderingAtLeastOrStrongerThan(MMOIdx: 0, Ordering: AtomicOrdering::Unordered));
480
481 // Atomics have zero extending behavior.
482 Actions
483 .legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8},
484 {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 8},
485 {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 8},
486 {.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 2},
487 {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 2},
488 {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 4},
489 {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 8},
490 {.Type0: p0, .Type1: p0, .MemTy: s64, .Align: 8},
491 {.Type0: v2s32, .Type1: p0, .MemTy: s64, .Align: 8}})
492 .widenScalarToNextPow2(TypeIdx: 0)
493 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
494 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
495 // how to do that yet.
496 .unsupportedIfMemSizeNotPow2()
497 // Lower anything left over into G_*EXT and G_LOAD
498 .lower();
499 }
500
501 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
502 const LLT &ValTy = Query.Types[0];
503 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
504 };
505
506 getActionDefinitionsBuilder(Opcode: G_LOAD)
507 .customIf(Predicate: [=](const LegalityQuery &Query) {
508 return HasRCPC3 && Query.Types[0] == s128 &&
509 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
510 })
511 .customIf(Predicate: [=](const LegalityQuery &Query) {
512 return Query.Types[0] == s128 &&
513 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
514 })
515 .legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s8, .Align: 8},
516 {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: 8},
517 {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 8},
518 {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 8},
519 {.Type0: p0, .Type1: p0, .MemTy: s64, .Align: 8},
520 {.Type0: s128, .Type1: p0, .MemTy: s128, .Align: 8},
521 {.Type0: v8s8, .Type1: p0, .MemTy: s64, .Align: 8},
522 {.Type0: v16s8, .Type1: p0, .MemTy: s128, .Align: 8},
523 {.Type0: v4s16, .Type1: p0, .MemTy: s64, .Align: 8},
524 {.Type0: v8s16, .Type1: p0, .MemTy: s128, .Align: 8},
525 {.Type0: v2s32, .Type1: p0, .MemTy: s64, .Align: 8},
526 {.Type0: v4s32, .Type1: p0, .MemTy: s128, .Align: 8},
527 {.Type0: v2s64, .Type1: p0, .MemTy: s128, .Align: 8}})
528 // These extends are also legal
529 .legalForTypesWithMemDesc(
530 TypesAndMemDesc: {{.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8}, {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 8}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 8}})
531 .legalForTypesWithMemDesc(TypesAndMemDesc: {
532 // SVE vscale x 128 bit base sizes
533 {.Type0: nxv16s8, .Type1: p0, .MemTy: nxv16s8, .Align: 8},
534 {.Type0: nxv8s16, .Type1: p0, .MemTy: nxv8s16, .Align: 8},
535 {.Type0: nxv4s32, .Type1: p0, .MemTy: nxv4s32, .Align: 8},
536 {.Type0: nxv2s64, .Type1: p0, .MemTy: nxv2s64, .Align: 8},
537 })
538 .widenScalarToNextPow2(TypeIdx: 0, /* MinSize = */ 8)
539 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 16)
540 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 8)
541 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: 4)
542 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
543 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2)
544 .lowerIfMemSizeNotByteSizePow2()
545 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64)
546 .narrowScalarIf(
547 Predicate: [=](const LegalityQuery &Query) {
548 // Clamp extending load results to 32-bits.
549 return Query.Types[0].isScalar() &&
550 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
551 Query.Types[0].getSizeInBits() > 32;
552 },
553 Mutation: changeTo(TypeIdx: 0, Ty: s32))
554 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
555 .bitcastIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {v4s8}),
556 Mutation: [=](const LegalityQuery &Query) {
557 const LLT VecTy = Query.Types[0];
558 return std::pair(0, LLT::scalar(SizeInBits: VecTy.getSizeInBits()));
559 })
560 .customIf(Predicate: IsPtrVecPred)
561 .scalarizeIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {v2s16, v2s8}), TypeIdx: 0)
562 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
563
564 getActionDefinitionsBuilder(Opcode: G_STORE)
565 .customIf(Predicate: [=](const LegalityQuery &Query) {
566 return HasRCPC3 && Query.Types[0] == s128 &&
567 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
568 })
569 .customIf(Predicate: [=](const LegalityQuery &Query) {
570 return Query.Types[0] == s128 &&
571 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
572 })
573 .widenScalarIf(
574 Predicate: all(P0: scalarNarrowerThan(TypeIdx: 0, Size: 32),
575 P1: atomicOrderingAtLeastOrStrongerThan(MMOIdx: 0, Ordering: AtomicOrdering::Release)),
576 Mutation: changeTo(TypeIdx: 0, Ty: s32))
577 .legalForTypesWithMemDesc(
578 TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s8, .Align: 8}, {.Type0: s16, .Type1: p0, .MemTy: s8, .Align: 8}, // truncstorei8 from s16
579 {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8}, // truncstorei8 from s32
580 {.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 8}, // truncstorei8 from s64
581 {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: 8}, {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 8}, // truncstorei16 from s32
582 {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 8}, // truncstorei16 from s64
583 {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8}, {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 8}, {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 8},
584 {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 8}, // truncstorei32 from s64
585 {.Type0: p0, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: s128, .Type1: p0, .MemTy: s128, .Align: 8}, {.Type0: v16s8, .Type1: p0, .MemTy: s128, .Align: 8},
586 {.Type0: v8s8, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: v4s16, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: v8s16, .Type1: p0, .MemTy: s128, .Align: 8},
587 {.Type0: v2s32, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: v4s32, .Type1: p0, .MemTy: s128, .Align: 8}, {.Type0: v2s64, .Type1: p0, .MemTy: s128, .Align: 8}})
588 .legalForTypesWithMemDesc(TypesAndMemDesc: {
589 // SVE vscale x 128 bit base sizes
590 // TODO: Add nxv2p0. Consider bitcastIf.
591 // See #92130
592 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
593 {.Type0: nxv16s8, .Type1: p0, .MemTy: nxv16s8, .Align: 8},
594 {.Type0: nxv8s16, .Type1: p0, .MemTy: nxv8s16, .Align: 8},
595 {.Type0: nxv4s32, .Type1: p0, .MemTy: nxv4s32, .Align: 8},
596 {.Type0: nxv2s64, .Type1: p0, .MemTy: nxv2s64, .Align: 8},
597 })
598 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64)
599 .minScalarOrElt(TypeIdx: 0, Ty: s8)
600 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
601 return Query.Types[0].isScalar() &&
602 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
603 })
604 // Maximum: sN * k = 128
605 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 16)
606 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 8)
607 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: 4)
608 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
609 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2)
610 .lowerIfMemSizeNotPow2()
611 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
612 .bitcastIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {v4s8}),
613 P1: LegalityPredicate([=](const LegalityQuery &Query) {
614 return Query.Types[0].getSizeInBits() ==
615 Query.MMODescrs[0].MemoryTy.getSizeInBits();
616 })),
617 Mutation: [=](const LegalityQuery &Query) {
618 const LLT VecTy = Query.Types[0];
619 return std::pair(0, LLT::scalar(SizeInBits: VecTy.getSizeInBits()));
620 })
621 .customIf(Predicate: IsPtrVecPred)
622 .scalarizeIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {v2s16, v2s8}), TypeIdx: 0)
623 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
624 .lower();
625
626 getActionDefinitionsBuilder(Opcode: G_INDEXED_STORE)
627 // Idx 0 == Ptr, Idx 1 == Val
628 // TODO: we can implement legalizations but as of now these are
629 // generated in a very specific way.
630 .legalForTypesWithMemDesc(TypesAndMemDesc: {
631 {.Type0: p0, .Type1: s8, .MemTy: s8, .Align: 8},
632 {.Type0: p0, .Type1: s16, .MemTy: s16, .Align: 8},
633 {.Type0: p0, .Type1: s32, .MemTy: s8, .Align: 8},
634 {.Type0: p0, .Type1: s32, .MemTy: s16, .Align: 8},
635 {.Type0: p0, .Type1: s32, .MemTy: s32, .Align: 8},
636 {.Type0: p0, .Type1: s64, .MemTy: s64, .Align: 8},
637 {.Type0: p0, .Type1: p0, .MemTy: p0, .Align: 8},
638 {.Type0: p0, .Type1: v8s8, .MemTy: v8s8, .Align: 8},
639 {.Type0: p0, .Type1: v16s8, .MemTy: v16s8, .Align: 8},
640 {.Type0: p0, .Type1: v4s16, .MemTy: v4s16, .Align: 8},
641 {.Type0: p0, .Type1: v8s16, .MemTy: v8s16, .Align: 8},
642 {.Type0: p0, .Type1: v2s32, .MemTy: v2s32, .Align: 8},
643 {.Type0: p0, .Type1: v4s32, .MemTy: v4s32, .Align: 8},
644 {.Type0: p0, .Type1: v2s64, .MemTy: v2s64, .Align: 8},
645 {.Type0: p0, .Type1: v2p0, .MemTy: v2p0, .Align: 8},
646 {.Type0: p0, .Type1: s128, .MemTy: s128, .Align: 8},
647 })
648 .unsupported();
649
650 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
651 LLT LdTy = Query.Types[0];
652 LLT PtrTy = Query.Types[1];
653 if (!llvm::is_contained(Range: PackedVectorAllTypesVec, Element: LdTy) &&
654 !llvm::is_contained(Range: ScalarAndPtrTypesVec, Element: LdTy) && LdTy != s128)
655 return false;
656 if (PtrTy != p0)
657 return false;
658 return true;
659 };
660 getActionDefinitionsBuilder(Opcode: G_INDEXED_LOAD)
661 .unsupportedIf(
662 Predicate: atomicOrderingAtLeastOrStrongerThan(MMOIdx: 0, Ordering: AtomicOrdering::Unordered))
663 .legalIf(Predicate: IndexedLoadBasicPred)
664 .unsupported();
665 getActionDefinitionsBuilder(Opcodes: {G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
666 .unsupportedIf(
667 Predicate: atomicOrderingAtLeastOrStrongerThan(MMOIdx: 0, Ordering: AtomicOrdering::Unordered))
668 .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s16, s32, s64}),
669 P1: LegalityPredicate([=](const LegalityQuery &Q) {
670 LLT LdTy = Q.Types[0];
671 LLT PtrTy = Q.Types[1];
672 LLT MemTy = Q.MMODescrs[0].MemoryTy;
673 if (PtrTy != p0)
674 return false;
675 if (LdTy == s16)
676 return MemTy == s8;
677 if (LdTy == s32)
678 return MemTy == s8 || MemTy == s16;
679 if (LdTy == s64)
680 return MemTy == s8 || MemTy == s16 || MemTy == s32;
681 return false;
682 })))
683 .unsupported();
684
685 // Constants
686 getActionDefinitionsBuilder(Opcode: G_CONSTANT)
687 .legalFor(Types: {p0, s8, s16, s32, s64})
688 .widenScalarToNextPow2(TypeIdx: 0)
689 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64);
690 getActionDefinitionsBuilder(Opcode: G_FCONSTANT)
691 // Always legalize s16 to prevent G_FCONSTANT being widened to G_CONSTANT
692 .legalFor(Types: {s16, s32, s64, s128})
693 .clampScalar(TypeIdx: 0, MinTy: MinFPScalar, MaxTy: s128);
694
695 // FIXME: fix moreElementsToNextPow2
696 getActionDefinitionsBuilder(Opcode: G_ICMP)
697 .legalFor(Types: {{s32, s32}, {s32, s64}, {s32, p0}})
698 .widenScalarOrEltToNextPow2(TypeIdx: 1)
699 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
700 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32)
701 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1)
702 .minScalarEltSameAsIf(
703 Predicate: [=](const LegalityQuery &Query) {
704 const LLT &Ty = Query.Types[0];
705 const LLT &SrcTy = Query.Types[1];
706 return Ty.isVector() && !SrcTy.isPointerVector() &&
707 Ty.getElementType() != SrcTy.getElementType();
708 },
709 TypeIdx: 0, LargeTypeIdx: 1)
710 .minScalarOrEltIf(
711 Predicate: [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
712 TypeIdx: 1, Ty: s32)
713 .minScalarOrEltIf(
714 Predicate: [=](const LegalityQuery &Query) {
715 return Query.Types[1].isPointerVector();
716 },
717 TypeIdx: 0, Ty: s64)
718 .moreElementsToNextPow2(TypeIdx: 1)
719 .clampNumElements(TypeIdx: 1, MinTy: v8s8, MaxTy: v16s8)
720 .clampNumElements(TypeIdx: 1, MinTy: v4s16, MaxTy: v8s16)
721 .clampNumElements(TypeIdx: 1, MinTy: v2s32, MaxTy: v4s32)
722 .clampNumElements(TypeIdx: 1, MinTy: v2s64, MaxTy: v2s64)
723 .clampNumElements(TypeIdx: 1, MinTy: v2p0, MaxTy: v2p0)
724 .customIf(Predicate: isVector(TypeIdx: 0));
725
726 getActionDefinitionsBuilder(Opcode: G_FCMP)
727 .legalFor(Types: {{s32, s32},
728 {s32, s64},
729 {v4s32, v4s32},
730 {v2s32, v2s32},
731 {v2s64, v2s64}})
732 .legalFor(Pred: HasFP16, Types: {{s32, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
733 .widenScalarOrEltToNextPow2(TypeIdx: 1)
734 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32)
735 .minScalarOrElt(TypeIdx: 1, Ty: MinFPScalar)
736 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1)
737 .minScalarEltSameAsIf(
738 Predicate: [=](const LegalityQuery &Query) {
739 const LLT &Ty = Query.Types[0];
740 const LLT &SrcTy = Query.Types[1];
741 return Ty.isVector() && !SrcTy.isPointerVector() &&
742 Ty.getElementType() != SrcTy.getElementType();
743 },
744 TypeIdx: 0, LargeTypeIdx: 1)
745 .clampNumElements(TypeIdx: 1, MinTy: v4s16, MaxTy: v8s16)
746 .clampNumElements(TypeIdx: 1, MinTy: v2s32, MaxTy: v4s32)
747 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
748 .moreElementsToNextPow2(TypeIdx: 1)
749 .libcallFor(Types: {{s32, s128}});
750
751 // Extensions
752 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
753 unsigned DstSize = Query.Types[0].getSizeInBits();
754
755 // Handle legal vectors using legalFor
756 if (Query.Types[0].isVector())
757 return false;
758
759 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(Value: DstSize))
760 return false; // Extending to a scalar s128 needs narrowing.
761
762 const LLT &SrcTy = Query.Types[1];
763
764 // Make sure we fit in a register otherwise. Don't bother checking that
765 // the source type is below 128 bits. We shouldn't be allowing anything
766 // through which is wider than the destination in the first place.
767 unsigned SrcSize = SrcTy.getSizeInBits();
768 if (SrcSize < 8 || !isPowerOf2_32(Value: SrcSize))
769 return false;
770
771 return true;
772 };
773 getActionDefinitionsBuilder(Opcodes: {G_ZEXT, G_SEXT, G_ANYEXT})
774 .legalIf(Predicate: ExtLegalFunc)
775 .legalFor(Types: {{v8s16, v8s8}, {v4s32, v4s16}, {v2s64, v2s32}})
776 .clampScalar(TypeIdx: 0, MinTy: s64, MaxTy: s64) // Just for s128, others are handled above.
777 .moreElementsToNextPow2(TypeIdx: 0)
778 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 8)
779 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 4)
780 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 2)
781 // Tries to convert a large EXTEND into two smaller EXTENDs
782 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
783 return (Query.Types[0].getScalarSizeInBits() >
784 Query.Types[1].getScalarSizeInBits() * 2) &&
785 Query.Types[0].isVector() &&
786 (Query.Types[1].getScalarSizeInBits() == 8 ||
787 Query.Types[1].getScalarSizeInBits() == 16);
788 })
789 .clampMinNumElements(TypeIdx: 1, EltTy: s8, MinElements: 8)
790 .clampMinNumElements(TypeIdx: 1, EltTy: s16, MinElements: 4)
791 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
792
793 getActionDefinitionsBuilder(Opcode: G_TRUNC)
794 .legalFor(Types: {{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
795 .moreElementsToNextPow2(TypeIdx: 0)
796 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 8)
797 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 4)
798 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: 2)
799 .minScalarOrEltIf(
800 Predicate: [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
801 TypeIdx: 0, Ty: s8)
802 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
803 LLT DstTy = Query.Types[0];
804 LLT SrcTy = Query.Types[1];
805 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
806 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
807 })
808 .clampMinNumElements(TypeIdx: 0, EltTy: s8, MinElements: 8)
809 .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 4)
810 .alwaysLegal();
811
812 getActionDefinitionsBuilder(Opcodes: {G_TRUNC_SSAT_S, G_TRUNC_SSAT_U, G_TRUNC_USAT_U})
813 .legalFor(Types: {{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
814 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v2s32);
815
816 getActionDefinitionsBuilder(Opcode: G_SEXT_INREG)
817 .legalFor(Types: {s32, s64})
818 .legalFor(Types: PackedVectorAllTypeList)
819 .maxScalar(TypeIdx: 0, Ty: s64)
820 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
821 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
822 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
823 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
824 .lower();
825
826 // FP conversions
827 getActionDefinitionsBuilder(Opcode: G_FPTRUNC)
828 .legalFor(
829 Types: {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
830 .libcallFor(Types: {{s16, s128}, {s32, s128}, {s64, s128}})
831 .moreElementsToNextPow2(TypeIdx: 1)
832 .customIf(Predicate: [](const LegalityQuery &Q) {
833 LLT DstTy = Q.Types[0];
834 LLT SrcTy = Q.Types[1];
835 return SrcTy.isFixedVector() && DstTy.isFixedVector() &&
836 SrcTy.getScalarSizeInBits() == 64 &&
837 DstTy.getScalarSizeInBits() == 16;
838 })
839 // Clamp based on input
840 .clampNumElements(TypeIdx: 1, MinTy: v4s32, MaxTy: v4s32)
841 .clampNumElements(TypeIdx: 1, MinTy: v2s64, MaxTy: v2s64)
842 .scalarize(TypeIdx: 0);
843
844 getActionDefinitionsBuilder(Opcode: G_FPEXT)
845 .legalFor(
846 Types: {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
847 .libcallFor(Types: {{s128, s64}, {s128, s32}, {s128, s16}})
848 .moreElementsToNextPow2(TypeIdx: 0)
849 .widenScalarIf(
850 Predicate: [](const LegalityQuery &Q) {
851 LLT DstTy = Q.Types[0];
852 LLT SrcTy = Q.Types[1];
853 return SrcTy.isVector() && DstTy.isVector() &&
854 SrcTy.getScalarSizeInBits() == 16 &&
855 DstTy.getScalarSizeInBits() == 64;
856 },
857 Mutation: changeElementTo(TypeIdx: 1, Ty: s32))
858 .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: v4s32)
859 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
860 .scalarize(TypeIdx: 0);
861
862 // Conversions
863 getActionDefinitionsBuilder(Opcodes: {G_FPTOSI, G_FPTOUI})
864 .legalFor(Types: {{s32, s32},
865 {s64, s32},
866 {s32, s64},
867 {s64, s64},
868 {v2s32, v2s32},
869 {v4s32, v4s32},
870 {v2s64, v2s64}})
871 .legalFor(Pred: HasFP16,
872 Types: {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
873 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
874 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1)
875 // The range of a fp16 value fits into an i17, so we can lower the width
876 // to i64.
877 .narrowScalarIf(
878 Predicate: [=](const LegalityQuery &Query) {
879 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
880 },
881 Mutation: changeTo(TypeIdx: 0, Ty: s64))
882 .moreElementsToNextPow2(TypeIdx: 0)
883 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 0)
884 .minScalar(TypeIdx: 0, Ty: s32)
885 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 1, /*MinSize=*/HasFP16 ? 16 : 32)
886 .widenScalarIf(
887 Predicate: [=](const LegalityQuery &Query) {
888 return Query.Types[0].getScalarSizeInBits() <= 64 &&
889 Query.Types[0].getScalarSizeInBits() >
890 Query.Types[1].getScalarSizeInBits();
891 },
892 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 1, FromTypeIdx: 0))
893 .widenScalarIf(
894 Predicate: [=](const LegalityQuery &Query) {
895 return Query.Types[1].getScalarSizeInBits() <= 64 &&
896 Query.Types[0].getScalarSizeInBits() <
897 Query.Types[1].getScalarSizeInBits();
898 },
899 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 0, FromTypeIdx: 1))
900 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
901 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
902 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
903 .libcallFor(
904 Types: {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
905
906 getActionDefinitionsBuilder(Opcodes: {G_FPTOSI_SAT, G_FPTOUI_SAT})
907 .legalFor(Types: {{s32, s32},
908 {s64, s32},
909 {s32, s64},
910 {s64, s64},
911 {v2s32, v2s32},
912 {v4s32, v4s32},
913 {v2s64, v2s64}})
914 .legalFor(
915 Pred: HasFP16,
916 Types: {{s16, s16}, {s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
917 // Handle types larger than i64 by scalarizing/lowering.
918 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
919 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1)
920 // The range of a fp16 value fits into an i17, so we can lower the width
921 // to i64.
922 .narrowScalarIf(
923 Predicate: [=](const LegalityQuery &Query) {
924 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
925 },
926 Mutation: changeTo(TypeIdx: 0, Ty: s64))
927 .lowerIf(Predicate: ::any(P0: scalarWiderThan(TypeIdx: 0, Size: 64), P1: scalarWiderThan(TypeIdx: 1, Size: 64)), Mutation: 0)
928 .moreElementsToNextPow2(TypeIdx: 0)
929 .widenScalarToNextPow2(TypeIdx: 0, /*MinSize=*/32)
930 .minScalar(TypeIdx: 0, Ty: s32)
931 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 1, /*MinSize=*/HasFP16 ? 16 : 32)
932 .widenScalarIf(
933 Predicate: [=](const LegalityQuery &Query) {
934 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
935 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
936 ITySize > Query.Types[1].getScalarSizeInBits();
937 },
938 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 1, FromTypeIdx: 0))
939 .widenScalarIf(
940 Predicate: [=](const LegalityQuery &Query) {
941 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
942 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
943 Query.Types[0].getScalarSizeInBits() < FTySize;
944 },
945 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 0, FromTypeIdx: 1))
946 .widenScalarOrEltToNextPow2(TypeIdx: 0)
947 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
948 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
949 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2);
950
951 getActionDefinitionsBuilder(Opcodes: {G_SITOFP, G_UITOFP})
952 .legalFor(Types: {{s32, s32},
953 {s64, s32},
954 {s32, s64},
955 {s64, s64},
956 {v2s32, v2s32},
957 {v4s32, v4s32},
958 {v2s64, v2s64}})
959 .legalFor(Pred: HasFP16,
960 Types: {{s16, s32}, {s16, s64}, {v4s16, v4s16}, {v8s16, v8s16}})
961 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1)
962 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
963 .moreElementsToNextPow2(TypeIdx: 1)
964 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 1)
965 .minScalar(TypeIdx: 1, Ty: s32)
966 .lowerIf(Predicate: [](const LegalityQuery &Query) {
967 return Query.Types[1].isVector() &&
968 Query.Types[1].getScalarSizeInBits() == 64 &&
969 Query.Types[0].getScalarSizeInBits() == 16;
970 })
971 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 0, /*MinSize=*/HasFP16 ? 16 : 32)
972 .scalarizeIf(
973 // v2i64->v2f32 needs to scalarize to avoid double-rounding issues.
974 Predicate: [](const LegalityQuery &Query) {
975 return Query.Types[0].getScalarSizeInBits() == 32 &&
976 Query.Types[1].getScalarSizeInBits() == 64;
977 },
978 TypeIdx: 0)
979 .widenScalarIf(
980 Predicate: [](const LegalityQuery &Query) {
981 return Query.Types[1].getScalarSizeInBits() <= 64 &&
982 Query.Types[0].getScalarSizeInBits() <
983 Query.Types[1].getScalarSizeInBits();
984 },
985 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 0, FromTypeIdx: 1))
986 .widenScalarIf(
987 Predicate: [](const LegalityQuery &Query) {
988 return Query.Types[0].getScalarSizeInBits() <= 64 &&
989 Query.Types[0].getScalarSizeInBits() >
990 Query.Types[1].getScalarSizeInBits();
991 },
992 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 1, FromTypeIdx: 0))
993 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
994 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
995 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
996 .libcallFor(Types: {{s16, s128},
997 {s32, s128},
998 {s64, s128},
999 {s128, s128},
1000 {s128, s32},
1001 {s128, s64}});
1002
1003 // Control-flow
1004 getActionDefinitionsBuilder(Opcode: G_BR).alwaysLegal();
1005 getActionDefinitionsBuilder(Opcode: G_BRCOND)
1006 .legalFor(Types: {s32})
1007 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32);
1008 getActionDefinitionsBuilder(Opcode: G_BRINDIRECT).legalFor(Types: {p0});
1009
1010 getActionDefinitionsBuilder(Opcode: G_SELECT)
1011 .legalFor(Types: {{s32, s32}, {s64, s32}, {p0, s32}})
1012 .widenScalarToNextPow2(TypeIdx: 0)
1013 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
1014 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32)
1015 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
1016 .minScalarEltSameAsIf(Predicate: all(P0: isVector(TypeIdx: 0), P1: isVector(TypeIdx: 1)), TypeIdx: 1, LargeTypeIdx: 0)
1017 .lowerIf(Predicate: isVector(TypeIdx: 0));
1018
1019 // Pointer-handling
1020 getActionDefinitionsBuilder(Opcode: G_FRAME_INDEX).legalFor(Types: {p0});
1021
1022 if (TM.getCodeModel() == CodeModel::Small)
1023 getActionDefinitionsBuilder(Opcode: G_GLOBAL_VALUE).custom();
1024 else
1025 getActionDefinitionsBuilder(Opcode: G_GLOBAL_VALUE).legalFor(Types: {p0});
1026
1027 getActionDefinitionsBuilder(Opcode: G_PTRAUTH_GLOBAL_VALUE)
1028 .legalIf(Predicate: all(P0: typeIs(TypeIdx: 0, TypesInit: p0), P1: typeIs(TypeIdx: 1, TypesInit: p0)));
1029
1030 getActionDefinitionsBuilder(Opcode: G_PTRTOINT)
1031 .legalFor(Types: {{s64, p0}, {v2s64, v2p0}})
1032 .widenScalarToNextPow2(TypeIdx: 0, MinSize: 64)
1033 .clampScalar(TypeIdx: 0, MinTy: s64, MaxTy: s64)
1034 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2);
1035
1036 getActionDefinitionsBuilder(Opcode: G_INTTOPTR)
1037 .unsupportedIf(Predicate: [&](const LegalityQuery &Query) {
1038 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
1039 })
1040 .legalFor(Types: {{p0, s64}, {v2p0, v2s64}})
1041 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2);
1042
1043 // Casts for 32 and 64-bit width type are just copies.
1044 // Same for 128-bit width type, except they are on the FPR bank.
1045 getActionDefinitionsBuilder(Opcode: G_BITCAST)
1046 // Keeping 32-bit instructions legal to prevent regression in some tests
1047 .legalForCartesianProduct(Types: {s32, v2s16, v4s8})
1048 .legalForCartesianProduct(Types: {s64, v8s8, v4s16, v2s32})
1049 .legalForCartesianProduct(Types: {s128, v16s8, v8s16, v4s32, v2s64, v2p0})
1050 .customIf(Predicate: [=](const LegalityQuery &Query) {
1051 // Handle casts from i1 vectors to scalars.
1052 LLT DstTy = Query.Types[0];
1053 LLT SrcTy = Query.Types[1];
1054 return DstTy.isScalar() && SrcTy.isVector() &&
1055 SrcTy.getScalarSizeInBits() == 1;
1056 })
1057 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
1058 return Query.Types[0].isVector() != Query.Types[1].isVector();
1059 })
1060 .moreElementsToNextPow2(TypeIdx: 0)
1061 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
1062 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
1063 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
1064 .lower();
1065
1066 getActionDefinitionsBuilder(Opcode: G_VASTART).legalFor(Types: {p0});
1067
1068 // va_list must be a pointer, but most sized types are pretty easy to handle
1069 // as the destination.
1070 getActionDefinitionsBuilder(Opcode: G_VAARG)
1071 .customForCartesianProduct(Types0: {s8, s16, s32, s64, p0}, Types1: {p0})
1072 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64)
1073 .widenScalarToNextPow2(TypeIdx: 0, /*Min*/ MinSize: 8);
1074
1075 getActionDefinitionsBuilder(Opcode: G_ATOMIC_CMPXCHG_WITH_SUCCESS)
1076 .lowerIf(
1077 Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s8, s16, s32, s64, s128}), P1: typeIs(TypeIdx: 2, TypesInit: p0)));
1078
1079 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
1080
1081 getActionDefinitionsBuilder(Opcode: G_ATOMIC_CMPXCHG)
1082 .legalFor(Pred: !UseOutlineAtomics, Types: {{s32, p0}, {s64, p0}})
1083 .customFor(Pred: !UseOutlineAtomics, Types: {{s128, p0}})
1084 .libcallFor(Pred: UseOutlineAtomics,
1085 Types: {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
1086 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64);
1087
1088 getActionDefinitionsBuilder(Opcodes: {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
1089 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
1090 G_ATOMICRMW_XOR})
1091 .legalFor(Pred: !UseOutlineAtomics, Types: {{s32, p0}, {s64, p0}})
1092 .libcallFor(Pred: UseOutlineAtomics,
1093 Types: {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
1094 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64);
1095
1096 // Do not outline these atomics operations, as per comment in
1097 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
1098 getActionDefinitionsBuilder(
1099 Opcodes: {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
1100 .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s32, s64}), P1: typeIs(TypeIdx: 1, TypesInit: p0)))
1101 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64);
1102
1103 getActionDefinitionsBuilder(Opcode: G_BLOCK_ADDR).legalFor(Types: {p0});
1104
1105 // Merge/Unmerge
1106 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
1107 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
1108 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
1109 getActionDefinitionsBuilder(Opcode: Op)
1110 .widenScalarToNextPow2(TypeIdx: LitTyIdx, MinSize: 8)
1111 .widenScalarToNextPow2(TypeIdx: BigTyIdx, MinSize: 32)
1112 .clampScalar(TypeIdx: LitTyIdx, MinTy: s8, MaxTy: s64)
1113 .clampScalar(TypeIdx: BigTyIdx, MinTy: s32, MaxTy: s128)
1114 .legalIf(Predicate: [=](const LegalityQuery &Q) {
1115 switch (Q.Types[BigTyIdx].getSizeInBits()) {
1116 case 32:
1117 case 64:
1118 case 128:
1119 break;
1120 default:
1121 return false;
1122 }
1123 switch (Q.Types[LitTyIdx].getSizeInBits()) {
1124 case 8:
1125 case 16:
1126 case 32:
1127 case 64:
1128 return true;
1129 default:
1130 return false;
1131 }
1132 });
1133 }
1134
1135 // TODO : nxv4s16, nxv2s16, nxv2s32
1136 getActionDefinitionsBuilder(Opcode: G_EXTRACT_VECTOR_ELT)
1137 .legalFor(Pred: HasSVE, Types: {{s16, nxv16s8, s64},
1138 {s16, nxv8s16, s64},
1139 {s32, nxv4s32, s64},
1140 {s64, nxv2s64, s64}})
1141 .unsupportedIf(Predicate: [=](const LegalityQuery &Query) {
1142 const LLT &EltTy = Query.Types[1].getElementType();
1143 if (Query.Types[1].isScalableVector())
1144 return false;
1145 return Query.Types[0] != EltTy;
1146 })
1147 .minScalar(TypeIdx: 2, Ty: s64)
1148 .customIf(Predicate: [=](const LegalityQuery &Query) {
1149 const LLT &VecTy = Query.Types[1];
1150 return VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s16 ||
1151 VecTy == v4s16 || VecTy == v8s16 || VecTy == v2s32 ||
1152 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2p0;
1153 })
1154 .minScalarOrEltIf(
1155 Predicate: [=](const LegalityQuery &Query) {
1156 // We want to promote to <M x s1> to <M x s64> if that wouldn't
1157 // cause the total vec size to be > 128b.
1158 return Query.Types[1].isFixedVector() &&
1159 Query.Types[1].getNumElements() <= 2;
1160 },
1161 TypeIdx: 0, Ty: s64)
1162 .minScalarOrEltIf(
1163 Predicate: [=](const LegalityQuery &Query) {
1164 return Query.Types[1].isFixedVector() &&
1165 Query.Types[1].getNumElements() <= 4;
1166 },
1167 TypeIdx: 0, Ty: s32)
1168 .minScalarOrEltIf(
1169 Predicate: [=](const LegalityQuery &Query) {
1170 return Query.Types[1].isFixedVector() &&
1171 Query.Types[1].getNumElements() <= 8;
1172 },
1173 TypeIdx: 0, Ty: s16)
1174 .minScalarOrEltIf(
1175 Predicate: [=](const LegalityQuery &Query) {
1176 return Query.Types[1].isFixedVector() &&
1177 Query.Types[1].getNumElements() <= 16;
1178 },
1179 TypeIdx: 0, Ty: s8)
1180 .minScalarOrElt(TypeIdx: 0, Ty: s8) // Worst case, we need at least s8.
1181 .moreElementsToNextPow2(TypeIdx: 1)
1182 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1183 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1184 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1185 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 16)
1186 .clampMaxNumElements(TypeIdx: 1, EltTy: p0, MaxElements: 2)
1187 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1);
1188
1189 getActionDefinitionsBuilder(Opcode: G_INSERT_VECTOR_ELT)
1190 .legalIf(
1191 Predicate: typeInSet(TypeIdx: 0, TypesInit: {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64, v2p0}))
1192 .legalFor(Pred: HasSVE, Types: {{nxv16s8, s32, s64},
1193 {nxv8s16, s32, s64},
1194 {nxv4s32, s32, s64},
1195 {nxv2s64, s64, s64}})
1196 .moreElementsToNextPow2(TypeIdx: 0)
1197 .widenVectorEltsToVectorMinSize(TypeIdx: 0, VectorSize: 64)
1198 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
1199 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
1200 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
1201 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
1202 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2)
1203 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
1204
1205 getActionDefinitionsBuilder(Opcode: G_BUILD_VECTOR)
1206 .legalFor(Types: {{v8s8, s8},
1207 {v16s8, s8},
1208 {v4s16, s16},
1209 {v8s16, s16},
1210 {v2s32, s32},
1211 {v4s32, s32},
1212 {v2s64, s64},
1213 {v2p0, p0}})
1214 .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: v4s32)
1215 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
1216 .minScalarOrElt(TypeIdx: 0, Ty: s8)
1217 .widenVectorEltsToVectorMinSize(TypeIdx: 0, VectorSize: 64)
1218 .widenScalarOrEltToNextPow2(TypeIdx: 0)
1219 .minScalarSameAs(TypeIdx: 1, LargeTypeIdx: 0);
1220
1221 getActionDefinitionsBuilder(Opcode: G_BUILD_VECTOR_TRUNC).lower();
1222
1223 getActionDefinitionsBuilder(Opcode: G_SHUFFLE_VECTOR)
1224 .legalIf(Predicate: [=](const LegalityQuery &Query) {
1225 const LLT &DstTy = Query.Types[0];
1226 const LLT &SrcTy = Query.Types[1];
1227 // For now just support the TBL2 variant which needs the source vectors
1228 // to be the same size as the dest.
1229 if (DstTy != SrcTy)
1230 return false;
1231 return llvm::is_contained(
1232 Set: {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, Element: DstTy);
1233 })
1234 .moreElementsIf(
1235 Predicate: [](const LegalityQuery &Query) {
1236 return Query.Types[0].getNumElements() >
1237 Query.Types[1].getNumElements();
1238 },
1239 Mutation: changeTo(TypeIdx: 1, FromTypeIdx: 0))
1240 .moreElementsToNextPow2(TypeIdx: 0)
1241 .moreElementsIf(
1242 Predicate: [](const LegalityQuery &Query) {
1243 return Query.Types[0].getNumElements() <
1244 Query.Types[1].getNumElements();
1245 },
1246 Mutation: changeTo(TypeIdx: 0, FromTypeIdx: 1))
1247 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 0, MinSize: 8)
1248 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
1249 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
1250 .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: v4s32)
1251 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
1252 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
1253 .bitcastIf(Predicate: isPointerVector(TypeIdx: 0), Mutation: [=](const LegalityQuery &Query) {
1254 // Bitcast pointers vector to i64.
1255 const LLT DstTy = Query.Types[0];
1256 return std::pair(0, LLT::vector(EC: DstTy.getElementCount(), ScalarSizeInBits: 64));
1257 });
1258
1259 getActionDefinitionsBuilder(Opcode: G_CONCAT_VECTORS)
1260 .legalFor(Types: {{v16s8, v8s8}, {v8s16, v4s16}, {v4s32, v2s32}})
1261 .bitcastIf(
1262 Predicate: [=](const LegalityQuery &Query) {
1263 return Query.Types[0].isFixedVector() &&
1264 Query.Types[1].isFixedVector() &&
1265 Query.Types[0].getSizeInBits() <= 128 &&
1266 Query.Types[1].getSizeInBits() <= 64;
1267 },
1268 Mutation: [=](const LegalityQuery &Query) {
1269 const LLT DstTy = Query.Types[0];
1270 const LLT SrcTy = Query.Types[1];
1271 return std::pair(
1272 0, DstTy.changeElementSize(NewEltSize: SrcTy.getSizeInBits())
1273 .changeElementCount(
1274 EC: DstTy.getElementCount().divideCoefficientBy(
1275 RHS: SrcTy.getNumElements())));
1276 });
1277
1278 getActionDefinitionsBuilder(Opcode: G_EXTRACT_SUBVECTOR)
1279 .legalFor(Types: {{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1280 .widenScalarOrEltToNextPow2(TypeIdx: 0)
1281 .immIdx(ImmIdx: 0); // Inform verifier imm idx 0 is handled.
1282
1283 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1284 getActionDefinitionsBuilder(Opcode: G_SPLAT_VECTOR)
1285 .legalFor(Pred: HasSVE, Types: {{nxv4s32, s32}, {nxv2s64, s64}});
1286
1287 getActionDefinitionsBuilder(Opcode: G_JUMP_TABLE).legalFor(Types: {p0});
1288
1289 getActionDefinitionsBuilder(Opcode: G_BRJT).legalFor(Types: {{p0, s64}});
1290
1291 getActionDefinitionsBuilder(Opcodes: {G_TRAP, G_DEBUGTRAP, G_UBSANTRAP}).alwaysLegal();
1292
1293 getActionDefinitionsBuilder(Opcode: G_DYN_STACKALLOC).custom();
1294
1295 getActionDefinitionsBuilder(Opcodes: {G_STACKSAVE, G_STACKRESTORE}).lower();
1296
1297 if (ST.hasMOPS()) {
1298 // G_BZERO is not supported. Currently it is only emitted by
1299 // PreLegalizerCombiner for G_MEMSET with zero constant.
1300 getActionDefinitionsBuilder(Opcode: G_BZERO).unsupported();
1301
1302 getActionDefinitionsBuilder(Opcode: G_MEMSET)
1303 .legalForCartesianProduct(Types0: {p0}, Types1: {s64}, Types2: {s64})
1304 .customForCartesianProduct(Types0: {p0}, Types1: {s8}, Types2: {s64})
1305 .immIdx(ImmIdx: 0); // Inform verifier imm idx 0 is handled.
1306
1307 getActionDefinitionsBuilder(Opcodes: {G_MEMCPY, G_MEMMOVE})
1308 .legalForCartesianProduct(Types0: {p0}, Types1: {p0}, Types2: {s64})
1309 .immIdx(ImmIdx: 0); // Inform verifier imm idx 0 is handled.
1310
1311 // G_MEMCPY_INLINE does not have a tailcall immediate
1312 getActionDefinitionsBuilder(Opcode: G_MEMCPY_INLINE)
1313 .legalForCartesianProduct(Types0: {p0}, Types1: {p0}, Types2: {s64});
1314
1315 } else {
1316 getActionDefinitionsBuilder(Opcodes: {G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1317 .libcall();
1318 }
1319
1320 // For fadd reductions we have pairwise operations available. We treat the
1321 // usual legal types as legal and handle the lowering to pairwise instructions
1322 // later.
1323 getActionDefinitionsBuilder(Opcode: G_VECREDUCE_FADD)
1324 .legalFor(Types: {{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1325 .legalFor(Pred: HasFP16, Types: {{s16, v4s16}, {s16, v8s16}})
1326 .minScalarOrElt(TypeIdx: 0, Ty: MinFPScalar)
1327 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1328 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1329 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1330 .moreElementsToNextPow2(TypeIdx: 1)
1331 .scalarize(TypeIdx: 1)
1332 .lower();
1333
1334 // For fmul reductions we need to split up into individual operations. We
1335 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1336 // smaller types, followed by scalarizing what remains.
1337 getActionDefinitionsBuilder(Opcode: G_VECREDUCE_FMUL)
1338 .minScalarOrElt(TypeIdx: 0, Ty: MinFPScalar)
1339 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1340 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1341 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1342 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 2)
1343 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 4)
1344 .scalarize(TypeIdx: 1)
1345 .lower();
1346
1347 getActionDefinitionsBuilder(Opcodes: {G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1348 .scalarize(TypeIdx: 2)
1349 .lower();
1350
1351 getActionDefinitionsBuilder(Opcode: G_VECREDUCE_ADD)
1352 .legalFor(Types: {{s8, v8s8},
1353 {s8, v16s8},
1354 {s16, v4s16},
1355 {s16, v8s16},
1356 {s32, v2s32},
1357 {s32, v4s32},
1358 {s64, v2s64}})
1359 .moreElementsToNextPow2(TypeIdx: 1)
1360 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1361 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1362 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1363 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 16)
1364 .widenVectorEltsToVectorMinSize(TypeIdx: 1, VectorSize: 64)
1365 .scalarize(TypeIdx: 1);
1366
1367 getActionDefinitionsBuilder(Opcodes: {G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1368 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1369 .legalFor(Types: {{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1370 .legalFor(Pred: HasFP16, Types: {{s16, v4s16}, {s16, v8s16}})
1371 .minScalarOrElt(TypeIdx: 0, Ty: MinFPScalar)
1372 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1373 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1374 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1375 .scalarize(TypeIdx: 1)
1376 .lower();
1377
1378 getActionDefinitionsBuilder(Opcode: G_VECREDUCE_MUL)
1379 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 2)
1380 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 4)
1381 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 8)
1382 .scalarize(TypeIdx: 1)
1383 .lower();
1384
1385 getActionDefinitionsBuilder(
1386 Opcodes: {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1387 .legalFor(Types: {{s8, v8s8},
1388 {s8, v16s8},
1389 {s16, v4s16},
1390 {s16, v8s16},
1391 {s32, v2s32},
1392 {s32, v4s32}})
1393 .moreElementsIf(
1394 Predicate: [=](const LegalityQuery &Query) {
1395 return Query.Types[1].isVector() &&
1396 Query.Types[1].getElementType() != s8 &&
1397 Query.Types[1].getNumElements() & 1;
1398 },
1399 Mutation: LegalizeMutations::moreElementsToNextPow2(TypeIdx: 1))
1400 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1401 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1402 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1403 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 16)
1404 .scalarize(TypeIdx: 1)
1405 .lower();
1406
1407 getActionDefinitionsBuilder(
1408 Opcodes: {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1409 // Try to break down into smaller vectors as long as they're at least 64
1410 // bits. This lets us use vector operations for some parts of the
1411 // reduction.
1412 .fewerElementsIf(
1413 Predicate: [=](const LegalityQuery &Q) {
1414 LLT SrcTy = Q.Types[1];
1415 if (SrcTy.isScalar())
1416 return false;
1417 if (!isPowerOf2_32(Value: SrcTy.getNumElements()))
1418 return false;
1419 // We can usually perform 64b vector operations.
1420 return SrcTy.getSizeInBits() > 64;
1421 },
1422 Mutation: [=](const LegalityQuery &Q) {
1423 LLT SrcTy = Q.Types[1];
1424 return std::make_pair(x: 1, y: SrcTy.divide(Factor: 2));
1425 })
1426 .scalarize(TypeIdx: 1)
1427 .lower();
1428
1429 // TODO: Update this to correct handling when adding AArch64/SVE support.
1430 getActionDefinitionsBuilder(Opcode: G_VECTOR_COMPRESS).lower();
1431
1432 // Access to floating-point environment.
1433 getActionDefinitionsBuilder(Opcodes: {G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1434 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1435 .libcall();
1436
1437 getActionDefinitionsBuilder(Opcode: G_IS_FPCLASS).lower();
1438
1439 getActionDefinitionsBuilder(Opcode: G_PREFETCH).custom();
1440
1441 getActionDefinitionsBuilder(Opcodes: {G_SCMP, G_UCMP}).lower();
1442
1443 getLegacyLegalizerInfo().computeTables();
1444 verify(MII: *ST.getInstrInfo());
1445}
1446
1447bool AArch64LegalizerInfo::legalizeCustom(
1448 LegalizerHelper &Helper, MachineInstr &MI,
1449 LostDebugLocObserver &LocObserver) const {
1450 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1451 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1452 GISelChangeObserver &Observer = Helper.Observer;
1453 switch (MI.getOpcode()) {
1454 default:
1455 // No idea what to do.
1456 return false;
1457 case TargetOpcode::G_VAARG:
1458 return legalizeVaArg(MI, MRI, MIRBuilder);
1459 case TargetOpcode::G_LOAD:
1460 case TargetOpcode::G_STORE:
1461 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1462 case TargetOpcode::G_SHL:
1463 case TargetOpcode::G_ASHR:
1464 case TargetOpcode::G_LSHR:
1465 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1466 case TargetOpcode::G_GLOBAL_VALUE:
1467 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1468 case TargetOpcode::G_SBFX:
1469 case TargetOpcode::G_UBFX:
1470 return legalizeBitfieldExtract(MI, MRI, Helper);
1471 case TargetOpcode::G_FSHL:
1472 case TargetOpcode::G_FSHR:
1473 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1474 case TargetOpcode::G_ROTR:
1475 return legalizeRotate(MI, MRI, Helper);
1476 case TargetOpcode::G_CTPOP:
1477 return legalizeCTPOP(MI, MRI, Helper);
1478 case TargetOpcode::G_ATOMIC_CMPXCHG:
1479 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1480 case TargetOpcode::G_CTTZ:
1481 return legalizeCTTZ(MI, Helper);
1482 case TargetOpcode::G_BZERO:
1483 case TargetOpcode::G_MEMCPY:
1484 case TargetOpcode::G_MEMMOVE:
1485 case TargetOpcode::G_MEMSET:
1486 return legalizeMemOps(MI, Helper);
1487 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1488 return legalizeExtractVectorElt(MI, MRI, Helper);
1489 case TargetOpcode::G_DYN_STACKALLOC:
1490 return legalizeDynStackAlloc(MI, Helper);
1491 case TargetOpcode::G_PREFETCH:
1492 return legalizePrefetch(MI, Helper);
1493 case TargetOpcode::G_ABS:
1494 return Helper.lowerAbsToCNeg(MI);
1495 case TargetOpcode::G_ICMP:
1496 return legalizeICMP(MI, MRI, MIRBuilder);
1497 case TargetOpcode::G_BITCAST:
1498 return legalizeBitcast(MI, Helper);
1499 case TargetOpcode::G_FPTRUNC:
1500 // In order to lower f16 to f64 properly, we need to use f32 as an
1501 // intermediary
1502 return legalizeFptrunc(MI, MIRBuilder, MRI);
1503 }
1504
1505 llvm_unreachable("expected switch to return");
1506}
1507
1508bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,
1509 LegalizerHelper &Helper) const {
1510 assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");
1511 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
1512 // We're trying to handle casts from i1 vectors to scalars but reloading from
1513 // stack.
1514 if (!DstTy.isScalar() || !SrcTy.isVector() ||
1515 SrcTy.getElementType() != LLT::scalar(SizeInBits: 1))
1516 return false;
1517
1518 Helper.createStackStoreLoad(Res: DstReg, Val: SrcReg);
1519 MI.eraseFromParent();
1520 return true;
1521}
1522
1523bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1524 MachineRegisterInfo &MRI,
1525 MachineIRBuilder &MIRBuilder,
1526 GISelChangeObserver &Observer,
1527 LegalizerHelper &Helper) const {
1528 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1529 MI.getOpcode() == TargetOpcode::G_FSHR);
1530
1531 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1532 // lowering
1533 Register ShiftNo = MI.getOperand(i: 3).getReg();
1534 LLT ShiftTy = MRI.getType(Reg: ShiftNo);
1535 auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ShiftNo, MRI);
1536
1537 // Adjust shift amount according to Opcode (FSHL/FSHR)
1538 // Convert FSHL to FSHR
1539 LLT OperationTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1540 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1541
1542 // Lower non-constant shifts and leave zero shifts to the optimizer.
1543 if (!VRegAndVal || VRegAndVal->Value.urem(RHS: BitWidth) == 0)
1544 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1545 LegalizerHelper::LegalizeResult::Legalized);
1546
1547 APInt Amount = VRegAndVal->Value.urem(RHS: BitWidth);
1548
1549 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1550
1551 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1552 // in the range of 0 <-> BitWidth, it is legal
1553 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1554 VRegAndVal->Value.ult(RHS: BitWidth))
1555 return true;
1556
1557 // Cast the ShiftNumber to a 64-bit type
1558 auto Cast64 = MIRBuilder.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: Amount.zext(width: 64));
1559
1560 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1561 Observer.changingInstr(MI);
1562 MI.getOperand(i: 3).setReg(Cast64.getReg(Idx: 0));
1563 Observer.changedInstr(MI);
1564 }
1565 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1566 // instruction
1567 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1568 MIRBuilder.buildInstr(Opc: TargetOpcode::G_FSHR, DstOps: {MI.getOperand(i: 0).getReg()},
1569 SrcOps: {MI.getOperand(i: 1).getReg(), MI.getOperand(i: 2).getReg(),
1570 Cast64.getReg(Idx: 0)});
1571 MI.eraseFromParent();
1572 }
1573 return true;
1574}
1575
1576bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1577 MachineRegisterInfo &MRI,
1578 MachineIRBuilder &MIRBuilder) const {
1579 Register DstReg = MI.getOperand(i: 0).getReg();
1580 Register SrcReg1 = MI.getOperand(i: 2).getReg();
1581 Register SrcReg2 = MI.getOperand(i: 3).getReg();
1582 LLT DstTy = MRI.getType(Reg: DstReg);
1583 LLT SrcTy = MRI.getType(Reg: SrcReg1);
1584
1585 // Check the vector types are legal
1586 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1587 DstTy.getNumElements() != SrcTy.getNumElements() ||
1588 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1589 return false;
1590
1591 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1592 // following passes
1593 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(i: 1).getPredicate();
1594 if (Pred != CmpInst::ICMP_NE)
1595 return true;
1596 Register CmpReg =
1597 MIRBuilder
1598 .buildICmp(Pred: CmpInst::ICMP_EQ, Res: MRI.getType(Reg: DstReg), Op0: SrcReg1, Op1: SrcReg2)
1599 .getReg(Idx: 0);
1600 MIRBuilder.buildNot(Dst: DstReg, Src0: CmpReg);
1601
1602 MI.eraseFromParent();
1603 return true;
1604}
1605
1606bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1607 MachineRegisterInfo &MRI,
1608 LegalizerHelper &Helper) const {
1609 // To allow for imported patterns to match, we ensure that the rotate amount
1610 // is 64b with an extension.
1611 Register AmtReg = MI.getOperand(i: 2).getReg();
1612 LLT AmtTy = MRI.getType(Reg: AmtReg);
1613 (void)AmtTy;
1614 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1615 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1616 auto NewAmt = Helper.MIRBuilder.buildZExt(Res: LLT::scalar(SizeInBits: 64), Op: AmtReg);
1617 Helper.Observer.changingInstr(MI);
1618 MI.getOperand(i: 2).setReg(NewAmt.getReg(Idx: 0));
1619 Helper.Observer.changedInstr(MI);
1620 return true;
1621}
1622
1623bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1624 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
1625 GISelChangeObserver &Observer) const {
1626 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1627 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1628 // G_ADD_LOW instructions.
1629 // By splitting this here, we can optimize accesses in the small code model by
1630 // folding in the G_ADD_LOW into the load/store offset.
1631 auto &GlobalOp = MI.getOperand(i: 1);
1632 // Don't modify an intrinsic call.
1633 if (GlobalOp.isSymbol())
1634 return true;
1635 const auto* GV = GlobalOp.getGlobal();
1636 if (GV->isThreadLocal())
1637 return true; // Don't want to modify TLS vars.
1638
1639 auto &TM = ST->getTargetLowering()->getTargetMachine();
1640 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1641
1642 if (OpFlags & AArch64II::MO_GOT)
1643 return true;
1644
1645 auto Offset = GlobalOp.getOffset();
1646 Register DstReg = MI.getOperand(i: 0).getReg();
1647 auto ADRP = MIRBuilder.buildInstr(Opc: AArch64::ADRP, DstOps: {LLT::pointer(AddressSpace: 0, SizeInBits: 64)}, SrcOps: {})
1648 .addGlobalAddress(GV, Offset, TargetFlags: OpFlags | AArch64II::MO_PAGE);
1649 // Set the regclass on the dest reg too.
1650 MRI.setRegClass(Reg: ADRP.getReg(Idx: 0), RC: &AArch64::GPR64RegClass);
1651
1652 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1653 // by creating a MOVK that sets bits 48-63 of the register to (global address
1654 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1655 // prevent an incorrect tag being generated during relocation when the
1656 // global appears before the code section. Without the offset, a global at
1657 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1658 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1659 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1660 // instead of `0xf`.
1661 // This assumes that we're in the small code model so we can assume a binary
1662 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1663 // binary must also be loaded into address range [0, 2^48). Both of these
1664 // properties need to be ensured at runtime when using tagged addresses.
1665 if (OpFlags & AArch64II::MO_TAGGED) {
1666 assert(!Offset &&
1667 "Should not have folded in an offset for a tagged global!");
1668 ADRP = MIRBuilder.buildInstr(Opc: AArch64::MOVKXi, DstOps: {LLT::pointer(AddressSpace: 0, SizeInBits: 64)}, SrcOps: {ADRP})
1669 .addGlobalAddress(GV, Offset: 0x100000000,
1670 TargetFlags: AArch64II::MO_PREL | AArch64II::MO_G3)
1671 .addImm(Val: 48);
1672 MRI.setRegClass(Reg: ADRP.getReg(Idx: 0), RC: &AArch64::GPR64RegClass);
1673 }
1674
1675 MIRBuilder.buildInstr(Opc: AArch64::G_ADD_LOW, DstOps: {DstReg}, SrcOps: {ADRP})
1676 .addGlobalAddress(GV, Offset,
1677 TargetFlags: OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1678 MI.eraseFromParent();
1679 return true;
1680}
1681
1682bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
1683 MachineInstr &MI) const {
1684 MachineIRBuilder &MIB = Helper.MIRBuilder;
1685 MachineRegisterInfo &MRI = *MIB.getMRI();
1686
1687 auto LowerUnaryOp = [&MI, &MIB](unsigned Opcode) {
1688 MIB.buildInstr(Opc: Opcode, DstOps: {MI.getOperand(i: 0)}, SrcOps: {MI.getOperand(i: 2)});
1689 MI.eraseFromParent();
1690 return true;
1691 };
1692 auto LowerBinOp = [&MI, &MIB](unsigned Opcode) {
1693 MIB.buildInstr(Opc: Opcode, DstOps: {MI.getOperand(i: 0)},
1694 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3)});
1695 MI.eraseFromParent();
1696 return true;
1697 };
1698 auto LowerTriOp = [&MI, &MIB](unsigned Opcode) {
1699 MIB.buildInstr(Opc: Opcode, DstOps: {MI.getOperand(i: 0)},
1700 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3), MI.getOperand(i: 4)});
1701 MI.eraseFromParent();
1702 return true;
1703 };
1704
1705 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(Val&: MI).getIntrinsicID();
1706 switch (IntrinsicID) {
1707 case Intrinsic::vacopy: {
1708 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1709 unsigned VaListSize =
1710 (ST->isTargetDarwin() || ST->isTargetWindows())
1711 ? PtrSize
1712 : ST->isTargetILP32() ? 20 : 32;
1713
1714 MachineFunction &MF = *MI.getMF();
1715 auto Val = MF.getRegInfo().createGenericVirtualRegister(
1716 Ty: LLT::scalar(SizeInBits: VaListSize * 8));
1717 MIB.buildLoad(Res: Val, Addr: MI.getOperand(i: 2),
1718 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(),
1719 F: MachineMemOperand::MOLoad,
1720 Size: VaListSize, BaseAlignment: Align(PtrSize)));
1721 MIB.buildStore(Val, Addr: MI.getOperand(i: 1),
1722 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(),
1723 F: MachineMemOperand::MOStore,
1724 Size: VaListSize, BaseAlignment: Align(PtrSize)));
1725 MI.eraseFromParent();
1726 return true;
1727 }
1728 case Intrinsic::get_dynamic_area_offset: {
1729 MIB.buildConstant(Res: MI.getOperand(i: 0).getReg(), Val: 0);
1730 MI.eraseFromParent();
1731 return true;
1732 }
1733 case Intrinsic::aarch64_mops_memset_tag: {
1734 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1735 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1736 // the instruction).
1737 auto &Value = MI.getOperand(i: 3);
1738 Register ExtValueReg = MIB.buildAnyExt(Res: LLT::scalar(SizeInBits: 64), Op: Value).getReg(Idx: 0);
1739 Value.setReg(ExtValueReg);
1740 return true;
1741 }
1742 case Intrinsic::aarch64_prefetch: {
1743 auto &AddrVal = MI.getOperand(i: 1);
1744
1745 int64_t IsWrite = MI.getOperand(i: 2).getImm();
1746 int64_t Target = MI.getOperand(i: 3).getImm();
1747 int64_t IsStream = MI.getOperand(i: 4).getImm();
1748 int64_t IsData = MI.getOperand(i: 5).getImm();
1749
1750 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1751 (!IsData << 3) | // IsDataCache bit
1752 (Target << 1) | // Cache level bits
1753 (unsigned)IsStream; // Stream bit
1754
1755 MIB.buildInstr(Opcode: AArch64::G_AARCH64_PREFETCH).addImm(Val: PrfOp).add(MO: AddrVal);
1756 MI.eraseFromParent();
1757 return true;
1758 }
1759 case Intrinsic::aarch64_range_prefetch: {
1760 auto &AddrVal = MI.getOperand(i: 1);
1761
1762 int64_t IsWrite = MI.getOperand(i: 2).getImm();
1763 int64_t IsStream = MI.getOperand(i: 3).getImm();
1764 unsigned PrfOp = (IsStream << 2) | IsWrite;
1765
1766 MIB.buildInstr(Opcode: AArch64::G_AARCH64_RANGE_PREFETCH)
1767 .addImm(Val: PrfOp)
1768 .add(MO: AddrVal)
1769 .addUse(RegNo: MI.getOperand(i: 4).getReg()); // Metadata
1770 MI.eraseFromParent();
1771 return true;
1772 }
1773 case Intrinsic::aarch64_prefetch_ir: {
1774 auto &AddrVal = MI.getOperand(i: 1);
1775 MIB.buildInstr(Opcode: AArch64::G_AARCH64_PREFETCH).addImm(Val: 24).add(MO: AddrVal);
1776 MI.eraseFromParent();
1777 return true;
1778 }
1779 case Intrinsic::aarch64_neon_uaddv:
1780 case Intrinsic::aarch64_neon_saddv:
1781 case Intrinsic::aarch64_neon_umaxv:
1782 case Intrinsic::aarch64_neon_smaxv:
1783 case Intrinsic::aarch64_neon_uminv:
1784 case Intrinsic::aarch64_neon_sminv: {
1785 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1786 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1787 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1788
1789 auto OldDst = MI.getOperand(i: 0).getReg();
1790 auto OldDstTy = MRI.getType(Reg: OldDst);
1791 LLT NewDstTy = MRI.getType(Reg: MI.getOperand(i: 2).getReg()).getElementType();
1792 if (OldDstTy == NewDstTy)
1793 return true;
1794
1795 auto NewDst = MRI.createGenericVirtualRegister(Ty: NewDstTy);
1796
1797 Helper.Observer.changingInstr(MI);
1798 MI.getOperand(i: 0).setReg(NewDst);
1799 Helper.Observer.changedInstr(MI);
1800
1801 MIB.setInsertPt(MBB&: MIB.getMBB(), II: ++MIB.getInsertPt());
1802 MIB.buildExtOrTrunc(ExtOpc: IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1803 Res: OldDst, Op: NewDst);
1804
1805 return true;
1806 }
1807 case Intrinsic::aarch64_neon_uaddlp:
1808 case Intrinsic::aarch64_neon_saddlp: {
1809 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1810 ? AArch64::G_UADDLP
1811 : AArch64::G_SADDLP;
1812 MIB.buildInstr(Opc, DstOps: {MI.getOperand(i: 0)}, SrcOps: {MI.getOperand(i: 2)});
1813 MI.eraseFromParent();
1814
1815 return true;
1816 }
1817 case Intrinsic::aarch64_neon_uaddlv:
1818 case Intrinsic::aarch64_neon_saddlv: {
1819 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1820 ? AArch64::G_UADDLV
1821 : AArch64::G_SADDLV;
1822 Register DstReg = MI.getOperand(i: 0).getReg();
1823 Register SrcReg = MI.getOperand(i: 2).getReg();
1824 LLT DstTy = MRI.getType(Reg: DstReg);
1825
1826 LLT MidTy, ExtTy;
1827 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1828 MidTy = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32);
1829 ExtTy = LLT::scalar(SizeInBits: 32);
1830 } else {
1831 MidTy = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64);
1832 ExtTy = LLT::scalar(SizeInBits: 64);
1833 }
1834
1835 Register MidReg =
1836 MIB.buildInstr(Opc, DstOps: {MidTy}, SrcOps: {SrcReg})->getOperand(i: 0).getReg();
1837 Register ZeroReg =
1838 MIB.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: 0)->getOperand(i: 0).getReg();
1839 Register ExtReg = MIB.buildInstr(Opc: AArch64::G_EXTRACT_VECTOR_ELT, DstOps: {ExtTy},
1840 SrcOps: {MidReg, ZeroReg})
1841 .getReg(Idx: 0);
1842
1843 if (DstTy.getScalarSizeInBits() < 32)
1844 MIB.buildTrunc(Res: DstReg, Op: ExtReg);
1845 else
1846 MIB.buildCopy(Res: DstReg, Op: ExtReg);
1847
1848 MI.eraseFromParent();
1849
1850 return true;
1851 }
1852 case Intrinsic::aarch64_neon_smax:
1853 return LowerBinOp(TargetOpcode::G_SMAX);
1854 case Intrinsic::aarch64_neon_smin:
1855 return LowerBinOp(TargetOpcode::G_SMIN);
1856 case Intrinsic::aarch64_neon_umax:
1857 return LowerBinOp(TargetOpcode::G_UMAX);
1858 case Intrinsic::aarch64_neon_umin:
1859 return LowerBinOp(TargetOpcode::G_UMIN);
1860 case Intrinsic::aarch64_neon_fmax:
1861 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1862 case Intrinsic::aarch64_neon_fmin:
1863 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1864 case Intrinsic::aarch64_neon_fmaxnm:
1865 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1866 case Intrinsic::aarch64_neon_fminnm:
1867 return LowerBinOp(TargetOpcode::G_FMINNUM);
1868 case Intrinsic::aarch64_neon_pmull:
1869 case Intrinsic::aarch64_neon_pmull64:
1870 return LowerBinOp(AArch64::G_PMULL);
1871 case Intrinsic::aarch64_neon_smull:
1872 return LowerBinOp(AArch64::G_SMULL);
1873 case Intrinsic::aarch64_neon_umull:
1874 return LowerBinOp(AArch64::G_UMULL);
1875 case Intrinsic::aarch64_neon_sabd:
1876 return LowerBinOp(TargetOpcode::G_ABDS);
1877 case Intrinsic::aarch64_neon_uabd:
1878 return LowerBinOp(TargetOpcode::G_ABDU);
1879 case Intrinsic::aarch64_neon_uhadd:
1880 return LowerBinOp(TargetOpcode::G_UAVGFLOOR);
1881 case Intrinsic::aarch64_neon_urhadd:
1882 return LowerBinOp(TargetOpcode::G_UAVGCEIL);
1883 case Intrinsic::aarch64_neon_shadd:
1884 return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
1885 case Intrinsic::aarch64_neon_srhadd:
1886 return LowerBinOp(TargetOpcode::G_SAVGCEIL);
1887 case Intrinsic::aarch64_neon_sqshrn: {
1888 if (!MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1889 return true;
1890 // Create right shift instruction. Store the output register in Shr.
1891 auto Shr = MIB.buildInstr(Opc: AArch64::G_VASHR,
1892 DstOps: {MRI.getType(Reg: MI.getOperand(i: 2).getReg())},
1893 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3).getImm()});
1894 // Build the narrow intrinsic, taking in Shr.
1895 MIB.buildInstr(Opc: TargetOpcode::G_TRUNC_SSAT_S, DstOps: {MI.getOperand(i: 0)}, SrcOps: {Shr});
1896 MI.eraseFromParent();
1897 return true;
1898 }
1899 case Intrinsic::aarch64_neon_sqshrun: {
1900 if (!MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1901 return true;
1902 // Create right shift instruction. Store the output register in Shr.
1903 auto Shr = MIB.buildInstr(Opc: AArch64::G_VASHR,
1904 DstOps: {MRI.getType(Reg: MI.getOperand(i: 2).getReg())},
1905 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3).getImm()});
1906 // Build the narrow intrinsic, taking in Shr.
1907 MIB.buildInstr(Opc: TargetOpcode::G_TRUNC_SSAT_U, DstOps: {MI.getOperand(i: 0)}, SrcOps: {Shr});
1908 MI.eraseFromParent();
1909 return true;
1910 }
1911 case Intrinsic::aarch64_neon_sqrshrn: {
1912 if (!MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1913 return true;
1914 // Create right shift instruction. Store the output register in Shr.
1915 auto Shr = MIB.buildInstr(Opc: AArch64::G_SRSHR_I,
1916 DstOps: {MRI.getType(Reg: MI.getOperand(i: 2).getReg())},
1917 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3).getImm()});
1918 // Build the narrow intrinsic, taking in Shr.
1919 MIB.buildInstr(Opc: TargetOpcode::G_TRUNC_SSAT_S, DstOps: {MI.getOperand(i: 0)}, SrcOps: {Shr});
1920 MI.eraseFromParent();
1921 return true;
1922 }
1923 case Intrinsic::aarch64_neon_sqrshrun: {
1924 if (!MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1925 return true;
1926 // Create right shift instruction. Store the output register in Shr.
1927 auto Shr = MIB.buildInstr(Opc: AArch64::G_SRSHR_I,
1928 DstOps: {MRI.getType(Reg: MI.getOperand(i: 2).getReg())},
1929 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3).getImm()});
1930 // Build the narrow intrinsic, taking in Shr.
1931 MIB.buildInstr(Opc: TargetOpcode::G_TRUNC_SSAT_U, DstOps: {MI.getOperand(i: 0)}, SrcOps: {Shr});
1932 MI.eraseFromParent();
1933 return true;
1934 }
1935 case Intrinsic::aarch64_neon_uqrshrn: {
1936 if (!MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1937 return true;
1938 // Create right shift instruction. Store the output register in Shr.
1939 auto Shr = MIB.buildInstr(Opc: AArch64::G_URSHR_I,
1940 DstOps: {MRI.getType(Reg: MI.getOperand(i: 2).getReg())},
1941 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3).getImm()});
1942 // Build the narrow intrinsic, taking in Shr.
1943 MIB.buildInstr(Opc: TargetOpcode::G_TRUNC_USAT_U, DstOps: {MI.getOperand(i: 0)}, SrcOps: {Shr});
1944 MI.eraseFromParent();
1945 return true;
1946 }
1947 case Intrinsic::aarch64_neon_uqshrn: {
1948 if (!MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1949 return true;
1950 // Create right shift instruction. Store the output register in Shr.
1951 auto Shr = MIB.buildInstr(Opc: AArch64::G_VLSHR,
1952 DstOps: {MRI.getType(Reg: MI.getOperand(i: 2).getReg())},
1953 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3).getImm()});
1954 // Build the narrow intrinsic, taking in Shr.
1955 MIB.buildInstr(Opc: TargetOpcode::G_TRUNC_USAT_U, DstOps: {MI.getOperand(i: 0)}, SrcOps: {Shr});
1956 MI.eraseFromParent();
1957 return true;
1958 }
1959 case Intrinsic::aarch64_neon_sqshlu: {
1960 // Check if last operand is constant vector dup
1961 auto ShiftAmount = isConstantOrConstantSplatVector(
1962 MI&: *MRI.getVRegDef(Reg: MI.getOperand(i: 3).getReg()), MRI);
1963 if (ShiftAmount) {
1964 // If so, create a new intrinsic with the correct shift amount
1965 MIB.buildInstr(Opc: AArch64::G_SQSHLU_I, DstOps: {MI.getOperand(i: 0)},
1966 SrcOps: {MI.getOperand(i: 2)})
1967 .addImm(Val: ShiftAmount->getSExtValue());
1968 MI.eraseFromParent();
1969 return true;
1970 }
1971 return false;
1972 }
1973 case Intrinsic::aarch64_neon_vsli: {
1974 MIB.buildInstr(
1975 Opc: AArch64::G_SLI, DstOps: {MI.getOperand(i: 0)},
1976 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3), MI.getOperand(i: 4).getImm()});
1977 MI.eraseFromParent();
1978 break;
1979 }
1980 case Intrinsic::aarch64_neon_vsri: {
1981 MIB.buildInstr(
1982 Opc: AArch64::G_SRI, DstOps: {MI.getOperand(i: 0)},
1983 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3), MI.getOperand(i: 4).getImm()});
1984 MI.eraseFromParent();
1985 break;
1986 }
1987 case Intrinsic::aarch64_neon_abs: {
1988 // Lower the intrinsic to G_ABS.
1989 MIB.buildInstr(Opc: TargetOpcode::G_ABS, DstOps: {MI.getOperand(i: 0)}, SrcOps: {MI.getOperand(i: 2)});
1990 MI.eraseFromParent();
1991 return true;
1992 }
1993 case Intrinsic::aarch64_neon_sqadd: {
1994 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1995 return LowerBinOp(TargetOpcode::G_SADDSAT);
1996 break;
1997 }
1998 case Intrinsic::aarch64_neon_sqsub: {
1999 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
2000 return LowerBinOp(TargetOpcode::G_SSUBSAT);
2001 break;
2002 }
2003 case Intrinsic::aarch64_neon_uqadd: {
2004 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
2005 return LowerBinOp(TargetOpcode::G_UADDSAT);
2006 break;
2007 }
2008 case Intrinsic::aarch64_neon_uqsub: {
2009 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
2010 return LowerBinOp(TargetOpcode::G_USUBSAT);
2011 break;
2012 }
2013 case Intrinsic::aarch64_neon_udot:
2014 return LowerTriOp(AArch64::G_UDOT);
2015 case Intrinsic::aarch64_neon_sdot:
2016 return LowerTriOp(AArch64::G_SDOT);
2017 case Intrinsic::aarch64_neon_usdot:
2018 return LowerTriOp(AArch64::G_USDOT);
2019 case Intrinsic::aarch64_neon_sqxtn:
2020 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
2021 case Intrinsic::aarch64_neon_sqxtun:
2022 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_U);
2023 case Intrinsic::aarch64_neon_uqxtn:
2024 return LowerUnaryOp(TargetOpcode::G_TRUNC_USAT_U);
2025 case Intrinsic::aarch64_neon_fcvtzu:
2026 return LowerUnaryOp(TargetOpcode::G_FPTOUI_SAT);
2027 case Intrinsic::aarch64_neon_fcvtzs:
2028 return LowerUnaryOp(TargetOpcode::G_FPTOSI_SAT);
2029
2030 case Intrinsic::vector_reverse:
2031 // TODO: Add support for vector_reverse
2032 return false;
2033 }
2034
2035 return true;
2036}
2037
2038bool AArch64LegalizerInfo::legalizeShlAshrLshr(
2039 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
2040 GISelChangeObserver &Observer) const {
2041 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
2042 MI.getOpcode() == TargetOpcode::G_LSHR ||
2043 MI.getOpcode() == TargetOpcode::G_SHL);
2044 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
2045 // imported patterns can select it later. Either way, it will be legal.
2046 Register AmtReg = MI.getOperand(i: 2).getReg();
2047 auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: AmtReg, MRI);
2048 if (!VRegAndVal)
2049 return true;
2050 // Check the shift amount is in range for an immediate form.
2051 int64_t Amount = VRegAndVal->Value.getSExtValue();
2052 if (Amount > 31)
2053 return true; // This will have to remain a register variant.
2054 auto ExtCst = MIRBuilder.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: Amount);
2055 Observer.changingInstr(MI);
2056 MI.getOperand(i: 2).setReg(ExtCst.getReg(Idx: 0));
2057 Observer.changedInstr(MI);
2058 return true;
2059}
2060
2061static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset,
2062 MachineRegisterInfo &MRI) {
2063 Base = Root;
2064 Offset = 0;
2065
2066 Register NewBase;
2067 int64_t NewOffset;
2068 if (mi_match(R: Root, MRI, P: m_GPtrAdd(L: m_Reg(R&: NewBase), R: m_ICst(Cst&: NewOffset))) &&
2069 isShiftedInt<7, 3>(x: NewOffset)) {
2070 Base = NewBase;
2071 Offset = NewOffset;
2072 }
2073}
2074
2075// FIXME: This should be removed and replaced with the generic bitcast legalize
2076// action.
2077bool AArch64LegalizerInfo::legalizeLoadStore(
2078 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
2079 GISelChangeObserver &Observer) const {
2080 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
2081 MI.getOpcode() == TargetOpcode::G_LOAD);
2082 // Here we just try to handle vector loads/stores where our value type might
2083 // have pointer elements, which the SelectionDAG importer can't handle. To
2084 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
2085 // the value to use s64 types.
2086
2087 // Custom legalization requires the instruction, if not deleted, must be fully
2088 // legalized. In order to allow further legalization of the inst, we create
2089 // a new instruction and erase the existing one.
2090
2091 Register ValReg = MI.getOperand(i: 0).getReg();
2092 const LLT ValTy = MRI.getType(Reg: ValReg);
2093
2094 if (ValTy == LLT::scalar(SizeInBits: 128)) {
2095
2096 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
2097 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2098 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
2099 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
2100 bool IsRcpC3 =
2101 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
2102
2103 LLT s64 = LLT::scalar(SizeInBits: 64);
2104
2105 unsigned Opcode;
2106 if (IsRcpC3) {
2107 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
2108 } else {
2109 // For LSE2, loads/stores should have been converted to monotonic and had
2110 // a fence inserted after them.
2111 assert(Ordering == AtomicOrdering::Monotonic ||
2112 Ordering == AtomicOrdering::Unordered);
2113 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
2114
2115 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
2116 }
2117
2118 MachineInstrBuilder NewI;
2119 if (IsLoad) {
2120 NewI = MIRBuilder.buildInstr(Opc: Opcode, DstOps: {s64, s64}, SrcOps: {});
2121 MIRBuilder.buildMergeLikeInstr(
2122 Res: ValReg, Ops: {NewI->getOperand(i: 0), NewI->getOperand(i: 1)});
2123 } else {
2124 auto Split = MIRBuilder.buildUnmerge(Res: s64, Op: MI.getOperand(i: 0));
2125 NewI = MIRBuilder.buildInstr(
2126 Opc: Opcode, DstOps: {}, SrcOps: {Split->getOperand(i: 0), Split->getOperand(i: 1)});
2127 }
2128
2129 if (IsRcpC3) {
2130 NewI.addUse(RegNo: MI.getOperand(i: 1).getReg());
2131 } else {
2132 Register Base;
2133 int Offset;
2134 matchLDPSTPAddrMode(Root: MI.getOperand(i: 1).getReg(), Base, Offset, MRI);
2135 NewI.addUse(RegNo: Base);
2136 NewI.addImm(Val: Offset / 8);
2137 }
2138
2139 NewI.cloneMemRefs(OtherMI: MI);
2140 constrainSelectedInstRegOperands(I&: *NewI, TII: *ST->getInstrInfo(),
2141 TRI: *MRI.getTargetRegisterInfo(),
2142 RBI: *ST->getRegBankInfo());
2143 MI.eraseFromParent();
2144 return true;
2145 }
2146
2147 if (!ValTy.isPointerVector() ||
2148 ValTy.getElementType().getAddressSpace() != 0) {
2149 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
2150 return false;
2151 }
2152
2153 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
2154 const LLT NewTy = LLT::vector(EC: ValTy.getElementCount(), ScalarSizeInBits: PtrSize);
2155 auto &MMO = **MI.memoperands_begin();
2156 MMO.setType(NewTy);
2157
2158 if (MI.getOpcode() == TargetOpcode::G_STORE) {
2159 auto Bitcast = MIRBuilder.buildBitcast(Dst: NewTy, Src: ValReg);
2160 MIRBuilder.buildStore(Val: Bitcast.getReg(Idx: 0), Addr: MI.getOperand(i: 1), MMO);
2161 } else {
2162 auto NewLoad = MIRBuilder.buildLoad(Res: NewTy, Addr: MI.getOperand(i: 1), MMO);
2163 MIRBuilder.buildBitcast(Dst: ValReg, Src: NewLoad);
2164 }
2165 MI.eraseFromParent();
2166 return true;
2167}
2168
2169bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
2170 MachineRegisterInfo &MRI,
2171 MachineIRBuilder &MIRBuilder) const {
2172 MachineFunction &MF = MIRBuilder.getMF();
2173 Align Alignment(MI.getOperand(i: 2).getImm());
2174 Register Dst = MI.getOperand(i: 0).getReg();
2175 Register ListPtr = MI.getOperand(i: 1).getReg();
2176
2177 LLT PtrTy = MRI.getType(Reg: ListPtr);
2178 LLT IntPtrTy = LLT::scalar(SizeInBits: PtrTy.getSizeInBits());
2179
2180 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
2181 const Align PtrAlign = Align(PtrSize);
2182 auto List = MIRBuilder.buildLoad(
2183 Res: PtrTy, Addr: ListPtr,
2184 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOLoad,
2185 MemTy: PtrTy, base_alignment: PtrAlign));
2186
2187 MachineInstrBuilder DstPtr;
2188 if (Alignment > PtrAlign) {
2189 // Realign the list to the actual required alignment.
2190 auto AlignMinus1 =
2191 MIRBuilder.buildConstant(Res: IntPtrTy, Val: Alignment.value() - 1);
2192 auto ListTmp = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: List, Op1: AlignMinus1.getReg(Idx: 0));
2193 DstPtr = MIRBuilder.buildMaskLowPtrBits(Res: PtrTy, Op0: ListTmp, NumBits: Log2(A: Alignment));
2194 } else
2195 DstPtr = List;
2196
2197 LLT ValTy = MRI.getType(Reg: Dst);
2198 uint64_t ValSize = ValTy.getSizeInBits() / 8;
2199 MIRBuilder.buildLoad(
2200 Res: Dst, Addr: DstPtr,
2201 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOLoad,
2202 MemTy: ValTy, base_alignment: std::max(a: Alignment, b: PtrAlign)));
2203
2204 auto Size = MIRBuilder.buildConstant(Res: IntPtrTy, Val: alignTo(Size: ValSize, A: PtrAlign));
2205
2206 auto NewList = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: DstPtr, Op1: Size.getReg(Idx: 0));
2207
2208 MIRBuilder.buildStore(Val: NewList, Addr: ListPtr,
2209 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(),
2210 f: MachineMemOperand::MOStore,
2211 MemTy: PtrTy, base_alignment: PtrAlign));
2212
2213 MI.eraseFromParent();
2214 return true;
2215}
2216
2217bool AArch64LegalizerInfo::legalizeBitfieldExtract(
2218 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2219 // Only legal if we can select immediate forms.
2220 // TODO: Lower this otherwise.
2221 return getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: 2).getReg(), MRI) &&
2222 getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: 3).getReg(), MRI);
2223}
2224
2225bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
2226 MachineRegisterInfo &MRI,
2227 LegalizerHelper &Helper) const {
2228 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
2229 // it can be more efficiently lowered to the following sequence that uses
2230 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
2231 // registers are cheap.
2232 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
2233 // CNT V0.8B, V0.8B // 8xbyte pop-counts
2234 // ADDV B0, V0.8B // sum 8xbyte pop-counts
2235 // UMOV X0, V0.B[0] // copy byte result back to integer reg
2236 //
2237 // For 128 bit vector popcounts, we lower to the following sequence:
2238 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
2239 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
2240 // uaddlp.4s v0, v0 // v4s32, v2s64
2241 // uaddlp.2d v0, v0 // v2s64
2242 //
2243 // For 64 bit vector popcounts, we lower to the following sequence:
2244 // cnt.8b v0, v0 // v4s16, v2s32
2245 // uaddlp.4h v0, v0 // v4s16, v2s32
2246 // uaddlp.2s v0, v0 // v2s32
2247
2248 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2249 Register Dst = MI.getOperand(i: 0).getReg();
2250 Register Val = MI.getOperand(i: 1).getReg();
2251 LLT Ty = MRI.getType(Reg: Val);
2252 unsigned Size = Ty.getSizeInBits();
2253
2254 assert(Ty == MRI.getType(Dst) &&
2255 "Expected src and dst to have the same type!");
2256
2257 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
2258 LLT s64 = LLT::scalar(SizeInBits: 64);
2259
2260 auto Split = MIRBuilder.buildUnmerge(Res: s64, Op: Val);
2261 auto CTPOP1 = MIRBuilder.buildCTPOP(Dst: s64, Src0: Split->getOperand(i: 0));
2262 auto CTPOP2 = MIRBuilder.buildCTPOP(Dst: s64, Src0: Split->getOperand(i: 1));
2263 auto Add = MIRBuilder.buildAdd(Dst: s64, Src0: CTPOP1, Src1: CTPOP2);
2264
2265 MIRBuilder.buildZExt(Res: Dst, Op: Add);
2266 MI.eraseFromParent();
2267 return true;
2268 }
2269
2270 if (!ST->hasNEON() ||
2271 MI.getMF()->getFunction().hasFnAttribute(Kind: Attribute::NoImplicitFloat)) {
2272 // Use generic lowering when custom lowering is not possible.
2273 return Ty.isScalar() && (Size == 32 || Size == 64) &&
2274 Helper.lowerBitCount(MI) ==
2275 LegalizerHelper::LegalizeResult::Legalized;
2276 }
2277
2278 // Pre-conditioning: widen Val up to the nearest vector type.
2279 // s32,s64,v4s16,v2s32 -> v8i8
2280 // v8s16,v4s32,v2s64 -> v16i8
2281 LLT VTy = Size == 128 ? LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8) : LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8);
2282 if (Ty.isScalar()) {
2283 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
2284 if (Size == 32) {
2285 Val = MIRBuilder.buildZExt(Res: LLT::scalar(SizeInBits: 64), Op: Val).getReg(Idx: 0);
2286 }
2287 }
2288 Val = MIRBuilder.buildBitcast(Dst: VTy, Src: Val).getReg(Idx: 0);
2289
2290 // Count bits in each byte-sized lane.
2291 auto CTPOP = MIRBuilder.buildCTPOP(Dst: VTy, Src0: Val);
2292
2293 // Sum across lanes.
2294
2295 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2296 Ty.getScalarSizeInBits() != 16) {
2297 LLT Dt = Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) ? LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32) : Ty;
2298 auto Zeros = MIRBuilder.buildConstant(Res: Dt, Val: 0);
2299 auto Ones = MIRBuilder.buildConstant(Res: VTy, Val: 1);
2300 MachineInstrBuilder Sum;
2301
2302 if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64)) {
2303 auto UDOT =
2304 MIRBuilder.buildInstr(Opc: AArch64::G_UDOT, DstOps: {Dt}, SrcOps: {Zeros, Ones, CTPOP});
2305 Sum = MIRBuilder.buildInstr(Opc: AArch64::G_UADDLP, DstOps: {Ty}, SrcOps: {UDOT});
2306 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) {
2307 Sum = MIRBuilder.buildInstr(Opc: AArch64::G_UDOT, DstOps: {Dt}, SrcOps: {Zeros, Ones, CTPOP});
2308 } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32)) {
2309 Sum = MIRBuilder.buildInstr(Opc: AArch64::G_UDOT, DstOps: {Dt}, SrcOps: {Zeros, Ones, CTPOP});
2310 } else {
2311 llvm_unreachable("unexpected vector shape");
2312 }
2313
2314 Sum->getOperand(i: 0).setReg(Dst);
2315 MI.eraseFromParent();
2316 return true;
2317 }
2318
2319 Register HSum = CTPOP.getReg(Idx: 0);
2320 unsigned Opc;
2321 SmallVector<LLT> HAddTys;
2322 if (Ty.isScalar()) {
2323 Opc = Intrinsic::aarch64_neon_uaddlv;
2324 HAddTys.push_back(Elt: LLT::scalar(SizeInBits: 32));
2325 } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16)) {
2326 Opc = Intrinsic::aarch64_neon_uaddlp;
2327 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16));
2328 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) {
2329 Opc = Intrinsic::aarch64_neon_uaddlp;
2330 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16));
2331 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32));
2332 } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64)) {
2333 Opc = Intrinsic::aarch64_neon_uaddlp;
2334 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16));
2335 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32));
2336 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64));
2337 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16)) {
2338 Opc = Intrinsic::aarch64_neon_uaddlp;
2339 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16));
2340 } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32)) {
2341 Opc = Intrinsic::aarch64_neon_uaddlp;
2342 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16));
2343 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32));
2344 } else
2345 llvm_unreachable("unexpected vector shape");
2346 MachineInstrBuilder UADD;
2347 for (LLT HTy : HAddTys) {
2348 UADD = MIRBuilder.buildIntrinsic(ID: Opc, Res: {HTy}).addUse(RegNo: HSum);
2349 HSum = UADD.getReg(Idx: 0);
2350 }
2351
2352 // Post-conditioning.
2353 if (Ty.isScalar() && (Size == 64 || Size == 128))
2354 MIRBuilder.buildZExt(Res: Dst, Op: UADD);
2355 else
2356 UADD->getOperand(i: 0).setReg(Dst);
2357 MI.eraseFromParent();
2358 return true;
2359}
2360
2361bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2362 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2363 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2364 LLT s64 = LLT::scalar(SizeInBits: 64);
2365 auto Addr = MI.getOperand(i: 1).getReg();
2366 auto DesiredI = MIRBuilder.buildUnmerge(Res: {s64, s64}, Op: MI.getOperand(i: 2));
2367 auto NewI = MIRBuilder.buildUnmerge(Res: {s64, s64}, Op: MI.getOperand(i: 3));
2368 auto DstLo = MRI.createGenericVirtualRegister(Ty: s64);
2369 auto DstHi = MRI.createGenericVirtualRegister(Ty: s64);
2370
2371 MachineInstrBuilder CAS;
2372 if (ST->hasLSE()) {
2373 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2374 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2375 // the rest of the MIR so we must reassemble the extracted registers into a
2376 // 128-bit known-regclass one with code like this:
2377 //
2378 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2379 // %out = CASP %in1, ...
2380 // %OldLo = G_EXTRACT %out, 0
2381 // %OldHi = G_EXTRACT %out, 64
2382 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2383 unsigned Opcode;
2384 switch (Ordering) {
2385 case AtomicOrdering::Acquire:
2386 Opcode = AArch64::CASPAX;
2387 break;
2388 case AtomicOrdering::Release:
2389 Opcode = AArch64::CASPLX;
2390 break;
2391 case AtomicOrdering::AcquireRelease:
2392 case AtomicOrdering::SequentiallyConsistent:
2393 Opcode = AArch64::CASPALX;
2394 break;
2395 default:
2396 Opcode = AArch64::CASPX;
2397 break;
2398 }
2399
2400 LLT s128 = LLT::scalar(SizeInBits: 128);
2401 auto CASDst = MRI.createGenericVirtualRegister(Ty: s128);
2402 auto CASDesired = MRI.createGenericVirtualRegister(Ty: s128);
2403 auto CASNew = MRI.createGenericVirtualRegister(Ty: s128);
2404 MIRBuilder.buildInstr(Opc: TargetOpcode::REG_SEQUENCE, DstOps: {CASDesired}, SrcOps: {})
2405 .addUse(RegNo: DesiredI->getOperand(i: 0).getReg())
2406 .addImm(Val: AArch64::sube64)
2407 .addUse(RegNo: DesiredI->getOperand(i: 1).getReg())
2408 .addImm(Val: AArch64::subo64);
2409 MIRBuilder.buildInstr(Opc: TargetOpcode::REG_SEQUENCE, DstOps: {CASNew}, SrcOps: {})
2410 .addUse(RegNo: NewI->getOperand(i: 0).getReg())
2411 .addImm(Val: AArch64::sube64)
2412 .addUse(RegNo: NewI->getOperand(i: 1).getReg())
2413 .addImm(Val: AArch64::subo64);
2414
2415 CAS = MIRBuilder.buildInstr(Opc: Opcode, DstOps: {CASDst}, SrcOps: {CASDesired, CASNew, Addr});
2416
2417 MIRBuilder.buildExtract(Res: {DstLo}, Src: {CASDst}, Index: 0);
2418 MIRBuilder.buildExtract(Res: {DstHi}, Src: {CASDst}, Index: 64);
2419 } else {
2420 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2421 // can take arbitrary registers so it just has the normal GPR64 operands the
2422 // rest of AArch64 is expecting.
2423 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2424 unsigned Opcode;
2425 switch (Ordering) {
2426 case AtomicOrdering::Acquire:
2427 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2428 break;
2429 case AtomicOrdering::Release:
2430 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2431 break;
2432 case AtomicOrdering::AcquireRelease:
2433 case AtomicOrdering::SequentiallyConsistent:
2434 Opcode = AArch64::CMP_SWAP_128;
2435 break;
2436 default:
2437 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2438 break;
2439 }
2440
2441 auto Scratch = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
2442 CAS = MIRBuilder.buildInstr(Opc: Opcode, DstOps: {DstLo, DstHi, Scratch},
2443 SrcOps: {Addr, DesiredI->getOperand(i: 0),
2444 DesiredI->getOperand(i: 1), NewI->getOperand(i: 0),
2445 NewI->getOperand(i: 1)});
2446 }
2447
2448 CAS.cloneMemRefs(OtherMI: MI);
2449 constrainSelectedInstRegOperands(I&: *CAS, TII: *ST->getInstrInfo(),
2450 TRI: *MRI.getTargetRegisterInfo(),
2451 RBI: *ST->getRegBankInfo());
2452
2453 MIRBuilder.buildMergeLikeInstr(Res: MI.getOperand(i: 0), Ops: {DstLo, DstHi});
2454 MI.eraseFromParent();
2455 return true;
2456}
2457
2458bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2459 LegalizerHelper &Helper) const {
2460 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2461 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2462 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
2463 auto BitReverse = MIRBuilder.buildBitReverse(Dst: Ty, Src: MI.getOperand(i: 1));
2464 MIRBuilder.buildCTLZ(Dst: MI.getOperand(i: 0).getReg(), Src0: BitReverse);
2465 MI.eraseFromParent();
2466 return true;
2467}
2468
2469bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2470 LegalizerHelper &Helper) const {
2471 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2472
2473 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2474 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2475 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2476 // the instruction).
2477 auto &Value = MI.getOperand(i: 1);
2478 Register ExtValueReg =
2479 MIRBuilder.buildAnyExt(Res: LLT::scalar(SizeInBits: 64), Op: Value).getReg(Idx: 0);
2480 Value.setReg(ExtValueReg);
2481 return true;
2482 }
2483
2484 return false;
2485}
2486
2487bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2488 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2489 const GExtractVectorElement *Element = cast<GExtractVectorElement>(Val: &MI);
2490 auto VRegAndVal =
2491 getIConstantVRegValWithLookThrough(VReg: Element->getIndexReg(), MRI);
2492 if (VRegAndVal)
2493 return true;
2494 LLT VecTy = MRI.getType(Reg: Element->getVectorReg());
2495 if (VecTy.isScalableVector())
2496 return true;
2497 return Helper.lowerExtractInsertVectorElt(MI) !=
2498 LegalizerHelper::LegalizeResult::UnableToLegalize;
2499}
2500
2501bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2502 MachineInstr &MI, LegalizerHelper &Helper) const {
2503 MachineFunction &MF = *MI.getParent()->getParent();
2504 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2505 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2506
2507 // If stack probing is not enabled for this function, use the default
2508 // lowering.
2509 if (!MF.getFunction().hasFnAttribute(Kind: "probe-stack") ||
2510 MF.getFunction().getFnAttribute(Kind: "probe-stack").getValueAsString() !=
2511 "inline-asm") {
2512 Helper.lowerDynStackAlloc(MI);
2513 return true;
2514 }
2515
2516 Register Dst = MI.getOperand(i: 0).getReg();
2517 Register AllocSize = MI.getOperand(i: 1).getReg();
2518 Align Alignment = assumeAligned(Value: MI.getOperand(i: 2).getImm());
2519
2520 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2521 "Unexpected type for dynamic alloca");
2522 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2523 "Unexpected type for dynamic alloca");
2524
2525 LLT PtrTy = MRI.getType(Reg: Dst);
2526 Register SPReg =
2527 Helper.getTargetLowering().getStackPointerRegisterToSaveRestore();
2528 Register SPTmp =
2529 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2530 auto NewMI =
2531 MIRBuilder.buildInstr(Opc: AArch64::PROBED_STACKALLOC_DYN, DstOps: {}, SrcOps: {SPTmp});
2532 MRI.setRegClass(Reg: NewMI.getReg(Idx: 0), RC: &AArch64::GPR64commonRegClass);
2533 MIRBuilder.setInsertPt(MBB&: *NewMI->getParent(), II: NewMI);
2534 MIRBuilder.buildCopy(Res: Dst, Op: SPTmp);
2535
2536 MI.eraseFromParent();
2537 return true;
2538}
2539
2540bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2541 LegalizerHelper &Helper) const {
2542 MachineIRBuilder &MIB = Helper.MIRBuilder;
2543 auto &AddrVal = MI.getOperand(i: 0);
2544
2545 int64_t IsWrite = MI.getOperand(i: 1).getImm();
2546 int64_t Locality = MI.getOperand(i: 2).getImm();
2547 int64_t IsData = MI.getOperand(i: 3).getImm();
2548
2549 bool IsStream = Locality == 0;
2550 if (Locality != 0) {
2551 assert(Locality <= 3 && "Prefetch locality out-of-range");
2552 // The locality degree is the opposite of the cache speed.
2553 // Put the number the other way around.
2554 // The encoding starts at 0 for level 1
2555 Locality = 3 - Locality;
2556 }
2557
2558 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2559
2560 MIB.buildInstr(Opcode: AArch64::G_AARCH64_PREFETCH).addImm(Val: PrfOp).add(MO: AddrVal);
2561 MI.eraseFromParent();
2562 return true;
2563}
2564
2565bool AArch64LegalizerInfo::legalizeFptrunc(MachineInstr &MI,
2566 MachineIRBuilder &MIRBuilder,
2567 MachineRegisterInfo &MRI) const {
2568 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
2569 assert(SrcTy.isFixedVector() && isPowerOf2_32(SrcTy.getNumElements()) &&
2570 "Expected a power of 2 elements");
2571
2572 LLT s16 = LLT::scalar(SizeInBits: 16);
2573 LLT s32 = LLT::scalar(SizeInBits: 32);
2574 LLT s64 = LLT::scalar(SizeInBits: 64);
2575 LLT v2s16 = LLT::fixed_vector(NumElements: 2, ScalarTy: s16);
2576 LLT v4s16 = LLT::fixed_vector(NumElements: 4, ScalarTy: s16);
2577 LLT v2s32 = LLT::fixed_vector(NumElements: 2, ScalarTy: s32);
2578 LLT v4s32 = LLT::fixed_vector(NumElements: 4, ScalarTy: s32);
2579 LLT v2s64 = LLT::fixed_vector(NumElements: 2, ScalarTy: s64);
2580
2581 SmallVector<Register> RegsToUnmergeTo;
2582 SmallVector<Register> TruncOddDstRegs;
2583 SmallVector<Register> RegsToMerge;
2584
2585 unsigned ElemCount = SrcTy.getNumElements();
2586
2587 // Find the biggest size chunks we can work with
2588 int StepSize = ElemCount % 4 ? 2 : 4;
2589
2590 // If we have a power of 2 greater than 2, we need to first unmerge into
2591 // enough pieces
2592 if (ElemCount <= 2)
2593 RegsToUnmergeTo.push_back(Elt: Src);
2594 else {
2595 for (unsigned i = 0; i < ElemCount / 2; ++i)
2596 RegsToUnmergeTo.push_back(Elt: MRI.createGenericVirtualRegister(Ty: v2s64));
2597
2598 MIRBuilder.buildUnmerge(Res: RegsToUnmergeTo, Op: Src);
2599 }
2600
2601 // Create all of the round-to-odd instructions and store them
2602 for (auto SrcReg : RegsToUnmergeTo) {
2603 Register Mid =
2604 MIRBuilder.buildInstr(Opc: AArch64::G_FPTRUNC_ODD, DstOps: {v2s32}, SrcOps: {SrcReg})
2605 .getReg(Idx: 0);
2606 TruncOddDstRegs.push_back(Elt: Mid);
2607 }
2608
2609 // Truncate 4s32 to 4s16 if we can to reduce instruction count, otherwise
2610 // truncate 2s32 to 2s16.
2611 unsigned Index = 0;
2612 for (unsigned LoopIter = 0; LoopIter < ElemCount / StepSize; ++LoopIter) {
2613 if (StepSize == 4) {
2614 Register ConcatDst =
2615 MIRBuilder
2616 .buildMergeLikeInstr(
2617 Res: {v4s32}, Ops: {TruncOddDstRegs[Index++], TruncOddDstRegs[Index++]})
2618 .getReg(Idx: 0);
2619
2620 RegsToMerge.push_back(
2621 Elt: MIRBuilder.buildFPTrunc(Res: v4s16, Op: ConcatDst).getReg(Idx: 0));
2622 } else {
2623 RegsToMerge.push_back(
2624 Elt: MIRBuilder.buildFPTrunc(Res: v2s16, Op: TruncOddDstRegs[Index++]).getReg(Idx: 0));
2625 }
2626 }
2627
2628 // If there is only one register, replace the destination
2629 if (RegsToMerge.size() == 1) {
2630 MRI.replaceRegWith(FromReg: Dst, ToReg: RegsToMerge.pop_back_val());
2631 MI.eraseFromParent();
2632 return true;
2633 }
2634
2635 // Merge the rest of the instructions & replace the register
2636 Register Fin = MIRBuilder.buildMergeLikeInstr(Res: DstTy, Ops: RegsToMerge).getReg(Idx: 0);
2637 MRI.replaceRegWith(FromReg: Dst, ToReg: Fin);
2638 MI.eraseFromParent();
2639 return true;
2640}
2641