1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64LegalizerInfo.h"
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
18#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
19#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
20#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
21#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
22#include "llvm/CodeGen/GlobalISel/Utils.h"
23#include "llvm/CodeGen/MachineInstr.h"
24#include "llvm/CodeGen/MachineInstrBuilder.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/TargetOpcodes.h"
27#include "llvm/IR/DerivedTypes.h"
28#include "llvm/IR/Intrinsics.h"
29#include "llvm/IR/IntrinsicsAArch64.h"
30#include "llvm/IR/Type.h"
31#include "llvm/Support/MathExtras.h"
32#include <initializer_list>
33
34#define DEBUG_TYPE "aarch64-legalinfo"
35
36using namespace llvm;
37using namespace LegalizeActions;
38using namespace LegalizeMutations;
39using namespace LegalityPredicates;
40using namespace MIPatternMatch;
41
42AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
43 : ST(&ST) {
44 using namespace TargetOpcode;
45 const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64);
46 const LLT s8 = LLT::scalar(SizeInBits: 8);
47 const LLT s16 = LLT::scalar(SizeInBits: 16);
48 const LLT s32 = LLT::scalar(SizeInBits: 32);
49 const LLT s64 = LLT::scalar(SizeInBits: 64);
50 const LLT s128 = LLT::scalar(SizeInBits: 128);
51 const LLT v16s8 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8);
52 const LLT v8s8 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8);
53 const LLT v4s8 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 8);
54 const LLT v2s8 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 8);
55 const LLT v8s16 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16);
56 const LLT v4s16 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16);
57 const LLT v2s16 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16);
58 const LLT v2s32 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32);
59 const LLT v4s32 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32);
60 const LLT v2s64 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64);
61 const LLT v2p0 = LLT::fixed_vector(NumElements: 2, ScalarTy: p0);
62
63 const LLT nxv16s8 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: s8);
64 const LLT nxv8s16 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s16);
65 const LLT nxv4s32 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s32);
66 const LLT nxv2s64 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s64);
67
68 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
69 v16s8, v8s16, v4s32,
70 v2s64, v2p0,
71 /* End 128bit types */
72 /* Begin 64bit types */
73 v8s8, v4s16, v2s32};
74 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
75 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
76 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
77
78 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
79
80 // FIXME: support subtargets which have neon/fp-armv8 disabled.
81 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
82 getLegacyLegalizerInfo().computeTables();
83 return;
84 }
85
86 // Some instructions only support s16 if the subtarget has full 16-bit FP
87 // support.
88 const bool HasFP16 = ST.hasFullFP16();
89 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
90
91 const bool HasCSSC = ST.hasCSSC();
92 const bool HasRCPC3 = ST.hasRCPC3();
93 const bool HasSVE = ST.hasSVE();
94
95 getActionDefinitionsBuilder(
96 Opcodes: {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
97 .legalFor(Types: {p0, s8, s16, s32, s64})
98 .legalFor(Types: {v2s8, v4s8, v8s8, v16s8, v2s16, v4s16, v8s16, v2s32, v4s32,
99 v2s64, v2p0})
100 .widenScalarToNextPow2(TypeIdx: 0)
101 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64)
102 .moreElementsToNextPow2(TypeIdx: 0)
103 .widenVectorEltsToVectorMinSize(TypeIdx: 0, VectorSize: 64)
104 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
105 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
106 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
107 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
108 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2)
109 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
110
111 getActionDefinitionsBuilder(Opcode: G_PHI)
112 .legalFor(Types: {p0, s16, s32, s64})
113 .legalFor(Types: PackedVectorAllTypeList)
114 .widenScalarToNextPow2(TypeIdx: 0)
115 .moreElementsToNextPow2(TypeIdx: 0)
116 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
117 .clampScalar(TypeIdx: 0, MinTy: s16, MaxTy: s64)
118 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
119 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
120 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
121 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
122 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2);
123
124 getActionDefinitionsBuilder(Opcode: G_INSERT)
125 .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s32, s64, p0}), P1: typeInSet(TypeIdx: 1, TypesInit: {s8, s16, s32}),
126 args: smallerThan(TypeIdx0: 1, TypeIdx1: 0)))
127 .widenScalarToNextPow2(TypeIdx: 0)
128 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
129 .widenScalarToNextPow2(TypeIdx: 1)
130 .minScalar(TypeIdx: 1, Ty: s8)
131 .maxScalarIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {s32}), TypeIdx: 1, Ty: s16)
132 .maxScalarIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {s64, p0}), TypeIdx: 1, Ty: s32);
133
134 getActionDefinitionsBuilder(Opcode: G_EXTRACT)
135 .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s16, s32, s64, p0}),
136 P1: typeInSet(TypeIdx: 1, TypesInit: {s32, s64, s128, p0}), args: smallerThan(TypeIdx0: 0, TypeIdx1: 1)))
137 .widenScalarToNextPow2(TypeIdx: 1)
138 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s128)
139 .widenScalarToNextPow2(TypeIdx: 0)
140 .minScalar(TypeIdx: 0, Ty: s16)
141 .maxScalarIf(Predicate: typeInSet(TypeIdx: 1, TypesInit: {s32}), TypeIdx: 0, Ty: s16)
142 .maxScalarIf(Predicate: typeInSet(TypeIdx: 1, TypesInit: {s64, p0}), TypeIdx: 0, Ty: s32)
143 .maxScalarIf(Predicate: typeInSet(TypeIdx: 1, TypesInit: {s128}), TypeIdx: 0, Ty: s64);
144
145 getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB, G_AND, G_OR, G_XOR})
146 .legalFor(Types: {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
147 .legalFor(Pred: HasSVE, Types: {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
148 .widenScalarToNextPow2(TypeIdx: 0)
149 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
150 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 16)
151 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 8)
152 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
153 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
154 .minScalarOrEltIf(
155 Predicate: [=](const LegalityQuery &Query) {
156 return Query.Types[0].getNumElements() <= 2;
157 },
158 TypeIdx: 0, Ty: s32)
159 .minScalarOrEltIf(
160 Predicate: [=](const LegalityQuery &Query) {
161 return Query.Types[0].getNumElements() <= 4;
162 },
163 TypeIdx: 0, Ty: s16)
164 .minScalarOrEltIf(
165 Predicate: [=](const LegalityQuery &Query) {
166 return Query.Types[0].getNumElements() <= 16;
167 },
168 TypeIdx: 0, Ty: s8)
169 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
170 .moreElementsToNextPow2(TypeIdx: 0);
171
172 getActionDefinitionsBuilder(Opcode: G_MUL)
173 .legalFor(Types: {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
174 .widenScalarToNextPow2(TypeIdx: 0)
175 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
176 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 16)
177 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 8)
178 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
179 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
180 .minScalarOrEltIf(
181 Predicate: [=](const LegalityQuery &Query) {
182 return Query.Types[0].getNumElements() <= 2;
183 },
184 TypeIdx: 0, Ty: s32)
185 .minScalarOrEltIf(
186 Predicate: [=](const LegalityQuery &Query) {
187 return Query.Types[0].getNumElements() <= 4;
188 },
189 TypeIdx: 0, Ty: s16)
190 .minScalarOrEltIf(
191 Predicate: [=](const LegalityQuery &Query) {
192 return Query.Types[0].getNumElements() <= 16;
193 },
194 TypeIdx: 0, Ty: s8)
195 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
196 .moreElementsToNextPow2(TypeIdx: 0);
197
198 getActionDefinitionsBuilder(Opcodes: {G_SHL, G_ASHR, G_LSHR})
199 .customIf(Predicate: [=](const LegalityQuery &Query) {
200 const auto &SrcTy = Query.Types[0];
201 const auto &AmtTy = Query.Types[1];
202 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
203 AmtTy.getSizeInBits() == 32;
204 })
205 .legalFor(Types: {
206 {s32, s32},
207 {s32, s64},
208 {s64, s64},
209 {v8s8, v8s8},
210 {v16s8, v16s8},
211 {v4s16, v4s16},
212 {v8s16, v8s16},
213 {v2s32, v2s32},
214 {v4s32, v4s32},
215 {v2s64, v2s64},
216 })
217 .widenScalarToNextPow2(TypeIdx: 0)
218 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
219 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
220 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
221 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
222 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
223 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
224 .moreElementsToNextPow2(TypeIdx: 0)
225 .minScalarSameAs(TypeIdx: 1, LargeTypeIdx: 0)
226 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
227 .minScalarEltSameAsIf(Predicate: isVector(TypeIdx: 0), TypeIdx: 1, LargeTypeIdx: 0)
228 .maxScalarEltSameAsIf(Predicate: isVector(TypeIdx: 0), TypeIdx: 1, SmallTypeIdx: 0);
229
230 getActionDefinitionsBuilder(Opcode: G_PTR_ADD)
231 .legalFor(Types: {{p0, s64}, {v2p0, v2s64}})
232 .clampScalarOrElt(TypeIdx: 1, MinTy: s64, MaxTy: s64)
233 .clampNumElements(TypeIdx: 0, MinTy: v2p0, MaxTy: v2p0);
234
235 getActionDefinitionsBuilder(Opcode: G_PTRMASK).legalFor(Types: {{p0, s64}});
236
237 getActionDefinitionsBuilder(Opcodes: {G_SDIV, G_UDIV})
238 .legalFor(Types: {s32, s64})
239 .libcallFor(Types: {s128})
240 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
241 .widenScalarToNextPow2(TypeIdx: 0)
242 .scalarize(TypeIdx: 0);
243
244 getActionDefinitionsBuilder(Opcodes: {G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
245 .lowerFor(Types: {s8, s16, s32, s64, v2s32, v4s32, v2s64})
246 .libcallFor(Types: {s128})
247 .widenScalarOrEltToNextPow2(TypeIdx: 0)
248 .minScalarOrElt(TypeIdx: 0, Ty: s32)
249 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
250 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
251 .scalarize(TypeIdx: 0);
252
253 getActionDefinitionsBuilder(Opcodes: {G_SMULO, G_UMULO})
254 .widenScalarToNextPow2(TypeIdx: 0, /*Min = */ MinSize: 32)
255 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
256 .lower();
257
258 getActionDefinitionsBuilder(Opcodes: {G_SMULH, G_UMULH})
259 .legalFor(Types: {s64, v16s8, v8s16, v4s32})
260 .lower();
261
262 getActionDefinitionsBuilder(Opcodes: {G_SMIN, G_SMAX, G_UMIN, G_UMAX})
263 .legalFor(Types: {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
264 .legalFor(Pred: HasCSSC, Types: {s32, s64})
265 .minScalar(Pred: HasCSSC, TypeIdx: 0, Ty: s32)
266 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
267 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
268 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
269 .lower();
270
271 // FIXME: Legal vector types are only legal with NEON.
272 getActionDefinitionsBuilder(Opcode: G_ABS)
273 .legalFor(Pred: HasCSSC, Types: {s32, s64})
274 .legalFor(Types: PackedVectorAllTypeList)
275 .customIf(Predicate: [=](const LegalityQuery &Q) {
276 // TODO: Fix suboptimal codegen for 128+ bit types.
277 LLT SrcTy = Q.Types[0];
278 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
279 })
280 .widenScalarIf(
281 Predicate: [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
282 Mutation: [=](const LegalityQuery &Query) { return std::make_pair(x: 0, y: v4s16); })
283 .widenScalarIf(
284 Predicate: [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
285 Mutation: [=](const LegalityQuery &Query) { return std::make_pair(x: 0, y: v2s32); })
286 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
287 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
288 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
289 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
290 .moreElementsToNextPow2(TypeIdx: 0)
291 .lower();
292
293 getActionDefinitionsBuilder(
294 Opcodes: {G_ABDS, G_ABDU, G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL})
295 .legalFor(Types: {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
296 .lower();
297
298 getActionDefinitionsBuilder(
299 Opcodes: {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
300 .legalFor(Types: {{s32, s32}, {s64, s32}})
301 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
302 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
303 .widenScalarToNextPow2(TypeIdx: 0);
304
305 getActionDefinitionsBuilder(Opcodes: {G_FSHL, G_FSHR})
306 .customFor(Types: {{s32, s32}, {s32, s64}, {s64, s64}})
307 .lower();
308
309 getActionDefinitionsBuilder(Opcode: G_ROTR)
310 .legalFor(Types: {{s32, s64}, {s64, s64}})
311 .customIf(Predicate: [=](const LegalityQuery &Q) {
312 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
313 })
314 .lower();
315 getActionDefinitionsBuilder(Opcode: G_ROTL).lower();
316
317 getActionDefinitionsBuilder(Opcodes: {G_SBFX, G_UBFX})
318 .customFor(Types: {{s32, s32}, {s64, s64}});
319
320 auto always = [=](const LegalityQuery &Q) { return true; };
321 getActionDefinitionsBuilder(Opcode: G_CTPOP)
322 .legalFor(Pred: HasCSSC, Types: {{s32, s32}, {s64, s64}})
323 .legalFor(Types: {{v8s8, v8s8}, {v16s8, v16s8}})
324 .customFor(Pred: !HasCSSC, Types: {{s32, s32}, {s64, s64}})
325 .customFor(Types: {{s128, s128},
326 {v4s16, v4s16},
327 {v8s16, v8s16},
328 {v2s32, v2s32},
329 {v4s32, v4s32},
330 {v2s64, v2s64}})
331 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s128)
332 .widenScalarToNextPow2(TypeIdx: 0)
333 .minScalarEltSameAsIf(Predicate: always, TypeIdx: 1, LargeTypeIdx: 0)
334 .maxScalarEltSameAsIf(Predicate: always, TypeIdx: 1, SmallTypeIdx: 0)
335 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
336 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
337 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
338 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
339 .moreElementsToNextPow2(TypeIdx: 0)
340 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
341
342 getActionDefinitionsBuilder(Opcodes: {G_CTLZ, G_CTLS})
343 .legalFor(Types: {{s32, s32},
344 {s64, s64},
345 {v8s8, v8s8},
346 {v16s8, v16s8},
347 {v4s16, v4s16},
348 {v8s16, v8s16},
349 {v2s32, v2s32},
350 {v4s32, v4s32}})
351 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 32)
352 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
353 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
354 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
355 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
356 .moreElementsToNextPow2(TypeIdx: 0)
357 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 32), TypeIdx: 0)
358 .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1);
359
360 getActionDefinitionsBuilder(Opcode: G_CTLZ_ZERO_UNDEF).lower();
361
362 getActionDefinitionsBuilder(Opcode: G_CTTZ)
363 .lowerIf(Predicate: isVector(TypeIdx: 0))
364 .widenScalarToNextPow2(TypeIdx: 1, /*Min=*/MinSize: 32)
365 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
366 .scalarSameSizeAs(TypeIdx: 0, SameSizeIdx: 1)
367 .legalFor(Pred: HasCSSC, Types: {s32, s64})
368 .customFor(Pred: !HasCSSC, Types: {s32, s64});
369
370 getActionDefinitionsBuilder(Opcode: G_CTTZ_ZERO_UNDEF).lower();
371
372 getActionDefinitionsBuilder(Opcode: G_BITREVERSE)
373 .legalFor(Types: {s32, s64, v8s8, v16s8})
374 .widenScalarToNextPow2(TypeIdx: 0, /*Min = */ MinSize: 32)
375 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 0, MinSize: 8)
376 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
377 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
378 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
379 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
380 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
381 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
382 .moreElementsToNextPow2(TypeIdx: 0)
383 .lower();
384
385 getActionDefinitionsBuilder(Opcode: G_BSWAP)
386 .legalFor(Types: {s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
387 .widenScalarOrEltToNextPow2(TypeIdx: 0, MinSize: 16)
388 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
389 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
390 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
391 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
392 .moreElementsToNextPow2(TypeIdx: 0);
393
394 getActionDefinitionsBuilder(Opcodes: {G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
395 .legalFor(Types: {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
396 .legalFor(Pred: HasSVE, Types: {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
397 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
398 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
399 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
400 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
401 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
402 .moreElementsToNextPow2(TypeIdx: 0)
403 .lower();
404
405 getActionDefinitionsBuilder(
406 Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
407 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
408 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
409 .legalFor(Types: {s32, s64, v2s32, v4s32, v2s64})
410 .legalFor(Pred: HasFP16, Types: {s16, v4s16, v8s16})
411 .libcallFor(Types: {s128})
412 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
413 .minScalarOrElt(TypeIdx: 0, Ty: MinFPScalar)
414 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
415 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
416 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
417 .moreElementsToNextPow2(TypeIdx: 0);
418
419 getActionDefinitionsBuilder(Opcodes: {G_FABS, G_FNEG})
420 .legalFor(Types: {s32, s64, v2s32, v4s32, v2s64})
421 .legalFor(Pred: HasFP16, Types: {s16, v4s16, v8s16})
422 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
423 .lowerIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64))
424 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
425 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
426 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
427 .moreElementsToNextPow2(TypeIdx: 0)
428 .lowerFor(Types: {s16, v4s16, v8s16});
429
430 getActionDefinitionsBuilder(Opcode: G_FREM)
431 .libcallFor(Types: {s32, s64, s128})
432 .minScalar(TypeIdx: 0, Ty: s32)
433 .scalarize(TypeIdx: 0);
434
435 getActionDefinitionsBuilder(Opcodes: {G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
436 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
437 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
438 G_FSINH, G_FTANH, G_FMODF})
439 // We need a call for these, so we always need to scalarize.
440 .scalarize(TypeIdx: 0)
441 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
442 .minScalar(TypeIdx: 0, Ty: s32)
443 .libcallFor(Types: {s32, s64, s128});
444 getActionDefinitionsBuilder(Opcodes: {G_FPOWI, G_FLDEXP})
445 .scalarize(TypeIdx: 0)
446 .minScalar(TypeIdx: 0, Ty: s32)
447 .libcallFor(Types: {{s32, s32}, {s64, s32}, {s128, s32}});
448
449 getActionDefinitionsBuilder(Opcodes: {G_LROUND, G_INTRINSIC_LRINT})
450 .legalFor(Types: {{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}})
451 .legalFor(Pred: HasFP16, Types: {{s32, s16}, {s64, s16}})
452 .minScalar(TypeIdx: 1, Ty: s32)
453 .libcallFor(Types: {{s64, s128}})
454 .lower();
455 getActionDefinitionsBuilder(Opcodes: {G_LLROUND, G_INTRINSIC_LLRINT})
456 .legalFor(Types: {{s64, s32}, {s64, s64}})
457 .legalFor(Pred: HasFP16, Types: {{s64, s16}})
458 .minScalar(TypeIdx: 0, Ty: s64)
459 .minScalar(TypeIdx: 1, Ty: s32)
460 .libcallFor(Types: {{s64, s128}})
461 .lower();
462
463 // TODO: Custom legalization for mismatched types.
464 getActionDefinitionsBuilder(Opcode: G_FCOPYSIGN)
465 .moreElementsIf(
466 Predicate: [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
467 Mutation: [=](const LegalityQuery &Query) {
468 const LLT Ty = Query.Types[0];
469 return std::pair(0, LLT::fixed_vector(NumElements: Ty == s16 ? 4 : 2, ScalarTy: Ty));
470 })
471 .lower();
472
473 getActionDefinitionsBuilder(Opcode: G_FMAD).lower();
474
475 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
476 auto &Actions = getActionDefinitionsBuilder(Opcode: Op);
477
478 if (Op == G_SEXTLOAD)
479 Actions.lowerIf(Predicate: atomicOrderingAtLeastOrStrongerThan(MMOIdx: 0, Ordering: AtomicOrdering::Unordered));
480
481 // Atomics have zero extending behavior.
482 Actions
483 .legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8},
484 {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 8},
485 {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 8},
486 {.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 2},
487 {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 2},
488 {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 4},
489 {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 8},
490 {.Type0: p0, .Type1: p0, .MemTy: s64, .Align: 8},
491 {.Type0: v2s32, .Type1: p0, .MemTy: s64, .Align: 8}})
492 .widenScalarToNextPow2(TypeIdx: 0)
493 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
494 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
495 // how to do that yet.
496 .unsupportedIfMemSizeNotPow2()
497 // Lower anything left over into G_*EXT and G_LOAD
498 .lower();
499 }
500
501 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
502 const LLT &ValTy = Query.Types[0];
503 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
504 };
505
506 getActionDefinitionsBuilder(Opcode: G_LOAD)
507 .customIf(Predicate: [=](const LegalityQuery &Query) {
508 return HasRCPC3 && Query.Types[0] == s128 &&
509 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
510 })
511 .customIf(Predicate: [=](const LegalityQuery &Query) {
512 return Query.Types[0] == s128 &&
513 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
514 })
515 .legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s8, .Align: 8},
516 {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: 8},
517 {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 8},
518 {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 8},
519 {.Type0: p0, .Type1: p0, .MemTy: s64, .Align: 8},
520 {.Type0: s128, .Type1: p0, .MemTy: s128, .Align: 8},
521 {.Type0: v8s8, .Type1: p0, .MemTy: s64, .Align: 8},
522 {.Type0: v16s8, .Type1: p0, .MemTy: s128, .Align: 8},
523 {.Type0: v4s16, .Type1: p0, .MemTy: s64, .Align: 8},
524 {.Type0: v8s16, .Type1: p0, .MemTy: s128, .Align: 8},
525 {.Type0: v2s32, .Type1: p0, .MemTy: s64, .Align: 8},
526 {.Type0: v4s32, .Type1: p0, .MemTy: s128, .Align: 8},
527 {.Type0: v2s64, .Type1: p0, .MemTy: s128, .Align: 8}})
528 // These extends are also legal
529 .legalForTypesWithMemDesc(
530 TypesAndMemDesc: {{.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8}, {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 8}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 8}})
531 .legalForTypesWithMemDesc(TypesAndMemDesc: {
532 // SVE vscale x 128 bit base sizes
533 {.Type0: nxv16s8, .Type1: p0, .MemTy: nxv16s8, .Align: 8},
534 {.Type0: nxv8s16, .Type1: p0, .MemTy: nxv8s16, .Align: 8},
535 {.Type0: nxv4s32, .Type1: p0, .MemTy: nxv4s32, .Align: 8},
536 {.Type0: nxv2s64, .Type1: p0, .MemTy: nxv2s64, .Align: 8},
537 })
538 .widenScalarToNextPow2(TypeIdx: 0, /* MinSize = */ 8)
539 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 16)
540 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 8)
541 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: 4)
542 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
543 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2)
544 .lowerIfMemSizeNotByteSizePow2()
545 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64)
546 .narrowScalarIf(
547 Predicate: [=](const LegalityQuery &Query) {
548 // Clamp extending load results to 32-bits.
549 return Query.Types[0].isScalar() &&
550 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
551 Query.Types[0].getSizeInBits() > 32;
552 },
553 Mutation: changeTo(TypeIdx: 0, Ty: s32))
554 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
555 .bitcastIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {v4s8}),
556 Mutation: [=](const LegalityQuery &Query) {
557 const LLT VecTy = Query.Types[0];
558 return std::pair(0, LLT::scalar(SizeInBits: VecTy.getSizeInBits()));
559 })
560 .customIf(Predicate: IsPtrVecPred)
561 .scalarizeIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {v2s16, v2s8}), TypeIdx: 0)
562 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
563
564 getActionDefinitionsBuilder(Opcode: G_STORE)
565 .customIf(Predicate: [=](const LegalityQuery &Query) {
566 return HasRCPC3 && Query.Types[0] == s128 &&
567 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
568 })
569 .customIf(Predicate: [=](const LegalityQuery &Query) {
570 return Query.Types[0] == s128 &&
571 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
572 })
573 .widenScalarIf(
574 Predicate: all(P0: scalarNarrowerThan(TypeIdx: 0, Size: 32),
575 P1: atomicOrderingAtLeastOrStrongerThan(MMOIdx: 0, Ordering: AtomicOrdering::Release)),
576 Mutation: changeTo(TypeIdx: 0, Ty: s32))
577 .legalForTypesWithMemDesc(
578 TypesAndMemDesc: {{.Type0: s8, .Type1: p0, .MemTy: s8, .Align: 8}, {.Type0: s16, .Type1: p0, .MemTy: s8, .Align: 8}, // truncstorei8 from s16
579 {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8}, // truncstorei8 from s32
580 {.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 8}, // truncstorei8 from s64
581 {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: 8}, {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 8}, // truncstorei16 from s32
582 {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 8}, // truncstorei16 from s64
583 {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8}, {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 8}, {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 8},
584 {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 8}, // truncstorei32 from s64
585 {.Type0: p0, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: s128, .Type1: p0, .MemTy: s128, .Align: 8}, {.Type0: v16s8, .Type1: p0, .MemTy: s128, .Align: 8},
586 {.Type0: v8s8, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: v4s16, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: v8s16, .Type1: p0, .MemTy: s128, .Align: 8},
587 {.Type0: v2s32, .Type1: p0, .MemTy: s64, .Align: 8}, {.Type0: v4s32, .Type1: p0, .MemTy: s128, .Align: 8}, {.Type0: v2s64, .Type1: p0, .MemTy: s128, .Align: 8}})
588 .legalForTypesWithMemDesc(TypesAndMemDesc: {
589 // SVE vscale x 128 bit base sizes
590 // TODO: Add nxv2p0. Consider bitcastIf.
591 // See #92130
592 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
593 {.Type0: nxv16s8, .Type1: p0, .MemTy: nxv16s8, .Align: 8},
594 {.Type0: nxv8s16, .Type1: p0, .MemTy: nxv8s16, .Align: 8},
595 {.Type0: nxv4s32, .Type1: p0, .MemTy: nxv4s32, .Align: 8},
596 {.Type0: nxv2s64, .Type1: p0, .MemTy: nxv2s64, .Align: 8},
597 })
598 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64)
599 .minScalarOrElt(TypeIdx: 0, Ty: s8)
600 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
601 return Query.Types[0].isScalar() &&
602 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
603 })
604 // Maximum: sN * k = 128
605 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 16)
606 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 8)
607 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: 4)
608 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
609 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2)
610 .lowerIfMemSizeNotPow2()
611 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
612 .bitcastIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {v4s8}),
613 P1: LegalityPredicate([=](const LegalityQuery &Query) {
614 return Query.Types[0].getSizeInBits() ==
615 Query.MMODescrs[0].MemoryTy.getSizeInBits();
616 })),
617 Mutation: [=](const LegalityQuery &Query) {
618 const LLT VecTy = Query.Types[0];
619 return std::pair(0, LLT::scalar(SizeInBits: VecTy.getSizeInBits()));
620 })
621 .customIf(Predicate: IsPtrVecPred)
622 .scalarizeIf(Predicate: typeInSet(TypeIdx: 0, TypesInit: {v2s16, v2s8}), TypeIdx: 0)
623 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
624 .lower();
625
626 getActionDefinitionsBuilder(Opcode: G_INDEXED_STORE)
627 // Idx 0 == Ptr, Idx 1 == Val
628 // TODO: we can implement legalizations but as of now these are
629 // generated in a very specific way.
630 .legalForTypesWithMemDesc(TypesAndMemDesc: {
631 {.Type0: p0, .Type1: s8, .MemTy: s8, .Align: 8},
632 {.Type0: p0, .Type1: s16, .MemTy: s16, .Align: 8},
633 {.Type0: p0, .Type1: s32, .MemTy: s8, .Align: 8},
634 {.Type0: p0, .Type1: s32, .MemTy: s16, .Align: 8},
635 {.Type0: p0, .Type1: s32, .MemTy: s32, .Align: 8},
636 {.Type0: p0, .Type1: s64, .MemTy: s64, .Align: 8},
637 {.Type0: p0, .Type1: p0, .MemTy: p0, .Align: 8},
638 {.Type0: p0, .Type1: v8s8, .MemTy: v8s8, .Align: 8},
639 {.Type0: p0, .Type1: v16s8, .MemTy: v16s8, .Align: 8},
640 {.Type0: p0, .Type1: v4s16, .MemTy: v4s16, .Align: 8},
641 {.Type0: p0, .Type1: v8s16, .MemTy: v8s16, .Align: 8},
642 {.Type0: p0, .Type1: v2s32, .MemTy: v2s32, .Align: 8},
643 {.Type0: p0, .Type1: v4s32, .MemTy: v4s32, .Align: 8},
644 {.Type0: p0, .Type1: v2s64, .MemTy: v2s64, .Align: 8},
645 {.Type0: p0, .Type1: v2p0, .MemTy: v2p0, .Align: 8},
646 {.Type0: p0, .Type1: s128, .MemTy: s128, .Align: 8},
647 })
648 .unsupported();
649
650 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
651 LLT LdTy = Query.Types[0];
652 LLT PtrTy = Query.Types[1];
653 if (!llvm::is_contained(Range: PackedVectorAllTypesVec, Element: LdTy) &&
654 !llvm::is_contained(Range: ScalarAndPtrTypesVec, Element: LdTy) && LdTy != s128)
655 return false;
656 if (PtrTy != p0)
657 return false;
658 return true;
659 };
660 getActionDefinitionsBuilder(Opcode: G_INDEXED_LOAD)
661 .unsupportedIf(
662 Predicate: atomicOrderingAtLeastOrStrongerThan(MMOIdx: 0, Ordering: AtomicOrdering::Unordered))
663 .legalIf(Predicate: IndexedLoadBasicPred)
664 .unsupported();
665 getActionDefinitionsBuilder(Opcodes: {G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
666 .unsupportedIf(
667 Predicate: atomicOrderingAtLeastOrStrongerThan(MMOIdx: 0, Ordering: AtomicOrdering::Unordered))
668 .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s16, s32, s64}),
669 P1: LegalityPredicate([=](const LegalityQuery &Q) {
670 LLT LdTy = Q.Types[0];
671 LLT PtrTy = Q.Types[1];
672 LLT MemTy = Q.MMODescrs[0].MemoryTy;
673 if (PtrTy != p0)
674 return false;
675 if (LdTy == s16)
676 return MemTy == s8;
677 if (LdTy == s32)
678 return MemTy == s8 || MemTy == s16;
679 if (LdTy == s64)
680 return MemTy == s8 || MemTy == s16 || MemTy == s32;
681 return false;
682 })))
683 .unsupported();
684
685 // Constants
686 getActionDefinitionsBuilder(Opcode: G_CONSTANT)
687 .legalFor(Types: {p0, s8, s16, s32, s64})
688 .widenScalarToNextPow2(TypeIdx: 0)
689 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64);
690 getActionDefinitionsBuilder(Opcode: G_FCONSTANT)
691 // Always legalize s16 to prevent G_FCONSTANT being widened to G_CONSTANT
692 .legalFor(Types: {s16, s32, s64, s128})
693 .clampScalar(TypeIdx: 0, MinTy: MinFPScalar, MaxTy: s128);
694
695 // FIXME: fix moreElementsToNextPow2
696 getActionDefinitionsBuilder(Opcode: G_ICMP)
697 .legalFor(Types: {{s32, s32}, {s32, s64}, {s32, p0}})
698 .widenScalarOrEltToNextPow2(TypeIdx: 1)
699 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s64)
700 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32)
701 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1)
702 .minScalarEltSameAsIf(
703 Predicate: [=](const LegalityQuery &Query) {
704 const LLT &Ty = Query.Types[0];
705 const LLT &SrcTy = Query.Types[1];
706 return Ty.isVector() && !SrcTy.isPointerVector() &&
707 Ty.getElementType() != SrcTy.getElementType();
708 },
709 TypeIdx: 0, LargeTypeIdx: 1)
710 .minScalarOrEltIf(
711 Predicate: [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
712 TypeIdx: 1, Ty: s32)
713 .minScalarOrEltIf(
714 Predicate: [=](const LegalityQuery &Query) {
715 return Query.Types[1].isPointerVector();
716 },
717 TypeIdx: 0, Ty: s64)
718 .moreElementsToNextPow2(TypeIdx: 1)
719 .clampNumElements(TypeIdx: 1, MinTy: v8s8, MaxTy: v16s8)
720 .clampNumElements(TypeIdx: 1, MinTy: v4s16, MaxTy: v8s16)
721 .clampNumElements(TypeIdx: 1, MinTy: v2s32, MaxTy: v4s32)
722 .clampNumElements(TypeIdx: 1, MinTy: v2s64, MaxTy: v2s64)
723 .clampNumElements(TypeIdx: 1, MinTy: v2p0, MaxTy: v2p0)
724 .customIf(Predicate: isVector(TypeIdx: 0));
725
726 getActionDefinitionsBuilder(Opcode: G_FCMP)
727 .legalFor(Types: {{s32, s32},
728 {s32, s64},
729 {v4s32, v4s32},
730 {v2s32, v2s32},
731 {v2s64, v2s64}})
732 .legalFor(Pred: HasFP16, Types: {{s32, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
733 .widenScalarOrEltToNextPow2(TypeIdx: 1)
734 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32)
735 .minScalarOrElt(TypeIdx: 1, Ty: MinFPScalar)
736 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1)
737 .minScalarEltSameAsIf(
738 Predicate: [=](const LegalityQuery &Query) {
739 const LLT &Ty = Query.Types[0];
740 const LLT &SrcTy = Query.Types[1];
741 return Ty.isVector() && !SrcTy.isPointerVector() &&
742 Ty.getElementType() != SrcTy.getElementType();
743 },
744 TypeIdx: 0, LargeTypeIdx: 1)
745 .clampNumElements(TypeIdx: 1, MinTy: v4s16, MaxTy: v8s16)
746 .clampNumElements(TypeIdx: 1, MinTy: v2s32, MaxTy: v4s32)
747 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
748 .moreElementsToNextPow2(TypeIdx: 1)
749 .libcallFor(Types: {{s32, s128}});
750
751 // Extensions
752 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
753 unsigned DstSize = Query.Types[0].getSizeInBits();
754
755 // Handle legal vectors using legalFor
756 if (Query.Types[0].isVector())
757 return false;
758
759 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(Value: DstSize))
760 return false; // Extending to a scalar s128 needs narrowing.
761
762 const LLT &SrcTy = Query.Types[1];
763
764 // Make sure we fit in a register otherwise. Don't bother checking that
765 // the source type is below 128 bits. We shouldn't be allowing anything
766 // through which is wider than the destination in the first place.
767 unsigned SrcSize = SrcTy.getSizeInBits();
768 if (SrcSize < 8 || !isPowerOf2_32(Value: SrcSize))
769 return false;
770
771 return true;
772 };
773 getActionDefinitionsBuilder(Opcodes: {G_ZEXT, G_SEXT, G_ANYEXT})
774 .legalIf(Predicate: ExtLegalFunc)
775 .legalFor(Types: {{v8s16, v8s8}, {v4s32, v4s16}, {v2s64, v2s32}})
776 .clampScalar(TypeIdx: 0, MinTy: s64, MaxTy: s64) // Just for s128, others are handled above.
777 .moreElementsToNextPow2(TypeIdx: 0)
778 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 8)
779 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 4)
780 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 2)
781 // Tries to convert a large EXTEND into two smaller EXTENDs
782 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
783 return (Query.Types[0].getScalarSizeInBits() >
784 Query.Types[1].getScalarSizeInBits() * 2) &&
785 Query.Types[0].isVector() &&
786 (Query.Types[1].getScalarSizeInBits() == 8 ||
787 Query.Types[1].getScalarSizeInBits() == 16);
788 })
789 .clampMinNumElements(TypeIdx: 1, EltTy: s8, MinElements: 8)
790 .clampMinNumElements(TypeIdx: 1, EltTy: s16, MinElements: 4)
791 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
792
793 getActionDefinitionsBuilder(Opcode: G_TRUNC)
794 .legalFor(Types: {{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
795 .moreElementsToNextPow2(TypeIdx: 0)
796 .clampMaxNumElements(TypeIdx: 0, EltTy: s8, MaxElements: 8)
797 .clampMaxNumElements(TypeIdx: 0, EltTy: s16, MaxElements: 4)
798 .clampMaxNumElements(TypeIdx: 0, EltTy: s32, MaxElements: 2)
799 .minScalarOrEltIf(
800 Predicate: [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
801 TypeIdx: 0, Ty: s8)
802 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
803 LLT DstTy = Query.Types[0];
804 LLT SrcTy = Query.Types[1];
805 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
806 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
807 })
808 .clampMinNumElements(TypeIdx: 0, EltTy: s8, MinElements: 8)
809 .clampMinNumElements(TypeIdx: 0, EltTy: s16, MinElements: 4)
810 .alwaysLegal();
811
812 getActionDefinitionsBuilder(Opcodes: {G_TRUNC_SSAT_S, G_TRUNC_SSAT_U, G_TRUNC_USAT_U})
813 .legalFor(Types: {{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}});
814
815 getActionDefinitionsBuilder(Opcode: G_SEXT_INREG)
816 .legalFor(Types: {s32, s64})
817 .legalFor(Types: PackedVectorAllTypeList)
818 .maxScalar(TypeIdx: 0, Ty: s64)
819 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
820 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
821 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
822 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
823 .lower();
824
825 // FP conversions
826 getActionDefinitionsBuilder(Opcode: G_FPTRUNC)
827 .legalFor(
828 Types: {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
829 .libcallFor(Types: {{s16, s128}, {s32, s128}, {s64, s128}})
830 .moreElementsToNextPow2(TypeIdx: 1)
831 .customIf(Predicate: [](const LegalityQuery &Q) {
832 LLT DstTy = Q.Types[0];
833 LLT SrcTy = Q.Types[1];
834 return SrcTy.isFixedVector() && DstTy.isFixedVector() &&
835 SrcTy.getScalarSizeInBits() == 64 &&
836 DstTy.getScalarSizeInBits() == 16;
837 })
838 // Clamp based on input
839 .clampNumElements(TypeIdx: 1, MinTy: v4s32, MaxTy: v4s32)
840 .clampNumElements(TypeIdx: 1, MinTy: v2s64, MaxTy: v2s64)
841 .scalarize(TypeIdx: 0);
842
843 getActionDefinitionsBuilder(Opcode: G_FPEXT)
844 .legalFor(
845 Types: {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
846 .libcallFor(Types: {{s128, s64}, {s128, s32}, {s128, s16}})
847 .moreElementsToNextPow2(TypeIdx: 0)
848 .widenScalarIf(
849 Predicate: [](const LegalityQuery &Q) {
850 LLT DstTy = Q.Types[0];
851 LLT SrcTy = Q.Types[1];
852 return SrcTy.isVector() && DstTy.isVector() &&
853 SrcTy.getScalarSizeInBits() == 16 &&
854 DstTy.getScalarSizeInBits() == 64;
855 },
856 Mutation: changeElementTo(TypeIdx: 1, Ty: s32))
857 .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: v4s32)
858 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
859 .scalarize(TypeIdx: 0);
860
861 // Conversions
862 getActionDefinitionsBuilder(Opcodes: {G_FPTOSI, G_FPTOUI})
863 .legalFor(Types: {{s32, s32},
864 {s64, s32},
865 {s32, s64},
866 {s64, s64},
867 {v2s32, v2s32},
868 {v4s32, v4s32},
869 {v2s64, v2s64}})
870 .legalFor(Pred: HasFP16,
871 Types: {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
872 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
873 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1)
874 // The range of a fp16 value fits into an i17, so we can lower the width
875 // to i64.
876 .narrowScalarIf(
877 Predicate: [=](const LegalityQuery &Query) {
878 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
879 },
880 Mutation: changeTo(TypeIdx: 0, Ty: s64))
881 .moreElementsToNextPow2(TypeIdx: 0)
882 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 0)
883 .minScalar(TypeIdx: 0, Ty: s32)
884 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 1, /*MinSize=*/HasFP16 ? 16 : 32)
885 .widenScalarIf(
886 Predicate: [=](const LegalityQuery &Query) {
887 return Query.Types[0].getScalarSizeInBits() <= 64 &&
888 Query.Types[0].getScalarSizeInBits() >
889 Query.Types[1].getScalarSizeInBits();
890 },
891 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 1, FromTypeIdx: 0))
892 .widenScalarIf(
893 Predicate: [=](const LegalityQuery &Query) {
894 return Query.Types[1].getScalarSizeInBits() <= 64 &&
895 Query.Types[0].getScalarSizeInBits() <
896 Query.Types[1].getScalarSizeInBits();
897 },
898 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 0, FromTypeIdx: 1))
899 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
900 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
901 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
902 .libcallFor(
903 Types: {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
904
905 getActionDefinitionsBuilder(Opcodes: {G_FPTOSI_SAT, G_FPTOUI_SAT})
906 .legalFor(Types: {{s32, s32},
907 {s64, s32},
908 {s32, s64},
909 {s64, s64},
910 {v2s32, v2s32},
911 {v4s32, v4s32},
912 {v2s64, v2s64}})
913 .legalFor(
914 Pred: HasFP16,
915 Types: {{s16, s16}, {s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
916 // Handle types larger than i64 by scalarizing/lowering.
917 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
918 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1)
919 // The range of a fp16 value fits into an i17, so we can lower the width
920 // to i64.
921 .narrowScalarIf(
922 Predicate: [=](const LegalityQuery &Query) {
923 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
924 },
925 Mutation: changeTo(TypeIdx: 0, Ty: s64))
926 .lowerIf(Predicate: ::any(P0: scalarWiderThan(TypeIdx: 0, Size: 64), P1: scalarWiderThan(TypeIdx: 1, Size: 64)), Mutation: 0)
927 .moreElementsToNextPow2(TypeIdx: 0)
928 .widenScalarToNextPow2(TypeIdx: 0, /*MinSize=*/32)
929 .minScalar(TypeIdx: 0, Ty: s32)
930 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 1, /*MinSize=*/HasFP16 ? 16 : 32)
931 .widenScalarIf(
932 Predicate: [=](const LegalityQuery &Query) {
933 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
934 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
935 ITySize > Query.Types[1].getScalarSizeInBits();
936 },
937 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 1, FromTypeIdx: 0))
938 .widenScalarIf(
939 Predicate: [=](const LegalityQuery &Query) {
940 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
941 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
942 Query.Types[0].getScalarSizeInBits() < FTySize;
943 },
944 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 0, FromTypeIdx: 1))
945 .widenScalarOrEltToNextPow2(TypeIdx: 0)
946 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
947 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
948 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2);
949
950 getActionDefinitionsBuilder(Opcodes: {G_SITOFP, G_UITOFP})
951 .legalFor(Types: {{s32, s32},
952 {s64, s32},
953 {s32, s64},
954 {s64, s64},
955 {v2s32, v2s32},
956 {v4s32, v4s32},
957 {v2s64, v2s64}})
958 .legalFor(Pred: HasFP16,
959 Types: {{s16, s32}, {s16, s64}, {v4s16, v4s16}, {v8s16, v8s16}})
960 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1)
961 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
962 .moreElementsToNextPow2(TypeIdx: 1)
963 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 1)
964 .minScalar(TypeIdx: 1, Ty: s32)
965 .lowerIf(Predicate: [](const LegalityQuery &Query) {
966 return Query.Types[1].isVector() &&
967 Query.Types[1].getScalarSizeInBits() == 64 &&
968 Query.Types[0].getScalarSizeInBits() == 16;
969 })
970 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 0, /*MinSize=*/HasFP16 ? 16 : 32)
971 .scalarizeIf(
972 // v2i64->v2f32 needs to scalarize to avoid double-rounding issues.
973 Predicate: [](const LegalityQuery &Query) {
974 return Query.Types[0].getScalarSizeInBits() == 32 &&
975 Query.Types[1].getScalarSizeInBits() == 64;
976 },
977 TypeIdx: 0)
978 .widenScalarIf(
979 Predicate: [](const LegalityQuery &Query) {
980 return Query.Types[1].getScalarSizeInBits() <= 64 &&
981 Query.Types[0].getScalarSizeInBits() <
982 Query.Types[1].getScalarSizeInBits();
983 },
984 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 0, FromTypeIdx: 1))
985 .widenScalarIf(
986 Predicate: [](const LegalityQuery &Query) {
987 return Query.Types[0].getScalarSizeInBits() <= 64 &&
988 Query.Types[0].getScalarSizeInBits() >
989 Query.Types[1].getScalarSizeInBits();
990 },
991 Mutation: LegalizeMutations::changeElementSizeTo(TypeIdx: 1, FromTypeIdx: 0))
992 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
993 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
994 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
995 .libcallFor(Types: {{s16, s128},
996 {s32, s128},
997 {s64, s128},
998 {s128, s128},
999 {s128, s32},
1000 {s128, s64}});
1001
1002 // Control-flow
1003 getActionDefinitionsBuilder(Opcode: G_BR).alwaysLegal();
1004 getActionDefinitionsBuilder(Opcode: G_BRCOND)
1005 .legalFor(Types: {s32})
1006 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s32);
1007 getActionDefinitionsBuilder(Opcode: G_BRINDIRECT).legalFor(Types: {p0});
1008
1009 getActionDefinitionsBuilder(Opcode: G_SELECT)
1010 .legalFor(Types: {{s32, s32}, {s64, s32}, {p0, s32}})
1011 .widenScalarToNextPow2(TypeIdx: 0)
1012 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64)
1013 .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: s32)
1014 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
1015 .minScalarEltSameAsIf(Predicate: all(P0: isVector(TypeIdx: 0), P1: isVector(TypeIdx: 1)), TypeIdx: 1, LargeTypeIdx: 0)
1016 .lowerIf(Predicate: isVector(TypeIdx: 0));
1017
1018 // Pointer-handling
1019 getActionDefinitionsBuilder(Opcode: G_FRAME_INDEX).legalFor(Types: {p0});
1020
1021 if (TM.getCodeModel() == CodeModel::Small)
1022 getActionDefinitionsBuilder(Opcode: G_GLOBAL_VALUE).custom();
1023 else
1024 getActionDefinitionsBuilder(Opcode: G_GLOBAL_VALUE).legalFor(Types: {p0});
1025
1026 getActionDefinitionsBuilder(Opcode: G_PTRAUTH_GLOBAL_VALUE)
1027 .legalIf(Predicate: all(P0: typeIs(TypeIdx: 0, TypesInit: p0), P1: typeIs(TypeIdx: 1, TypesInit: p0)));
1028
1029 getActionDefinitionsBuilder(Opcode: G_PTRTOINT)
1030 .legalFor(Types: {{s64, p0}, {v2s64, v2p0}})
1031 .widenScalarToNextPow2(TypeIdx: 0, MinSize: 64)
1032 .clampScalar(TypeIdx: 0, MinTy: s64, MaxTy: s64)
1033 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2);
1034
1035 getActionDefinitionsBuilder(Opcode: G_INTTOPTR)
1036 .unsupportedIf(Predicate: [&](const LegalityQuery &Query) {
1037 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
1038 })
1039 .legalFor(Types: {{p0, s64}, {v2p0, v2s64}})
1040 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2);
1041
1042 // Casts for 32 and 64-bit width type are just copies.
1043 // Same for 128-bit width type, except they are on the FPR bank.
1044 getActionDefinitionsBuilder(Opcode: G_BITCAST)
1045 // Keeping 32-bit instructions legal to prevent regression in some tests
1046 .legalForCartesianProduct(Types: {s32, v2s16, v4s8})
1047 .legalForCartesianProduct(Types: {s64, v8s8, v4s16, v2s32})
1048 .legalForCartesianProduct(Types: {s128, v16s8, v8s16, v4s32, v2s64, v2p0})
1049 .customIf(Predicate: [=](const LegalityQuery &Query) {
1050 // Handle casts from i1 vectors to scalars.
1051 LLT DstTy = Query.Types[0];
1052 LLT SrcTy = Query.Types[1];
1053 return DstTy.isScalar() && SrcTy.isVector() &&
1054 SrcTy.getScalarSizeInBits() == 1;
1055 })
1056 .lowerIf(Predicate: [=](const LegalityQuery &Query) {
1057 return Query.Types[0].isVector() != Query.Types[1].isVector();
1058 })
1059 .moreElementsToNextPow2(TypeIdx: 0)
1060 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
1061 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
1062 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
1063 .lower();
1064
1065 getActionDefinitionsBuilder(Opcode: G_VASTART).legalFor(Types: {p0});
1066
1067 // va_list must be a pointer, but most sized types are pretty easy to handle
1068 // as the destination.
1069 getActionDefinitionsBuilder(Opcode: G_VAARG)
1070 .customForCartesianProduct(Types0: {s8, s16, s32, s64, p0}, Types1: {p0})
1071 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: s64)
1072 .widenScalarToNextPow2(TypeIdx: 0, /*Min*/ MinSize: 8);
1073
1074 getActionDefinitionsBuilder(Opcode: G_ATOMIC_CMPXCHG_WITH_SUCCESS)
1075 .lowerIf(
1076 Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s8, s16, s32, s64, s128}), P1: typeIs(TypeIdx: 2, TypesInit: p0)));
1077
1078 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
1079
1080 getActionDefinitionsBuilder(Opcode: G_ATOMIC_CMPXCHG)
1081 .legalFor(Pred: !UseOutlineAtomics, Types: {{s32, p0}, {s64, p0}})
1082 .customFor(Pred: !UseOutlineAtomics, Types: {{s128, p0}})
1083 .libcallFor(Pred: UseOutlineAtomics,
1084 Types: {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
1085 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64);
1086
1087 getActionDefinitionsBuilder(Opcodes: {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
1088 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
1089 G_ATOMICRMW_XOR})
1090 .legalFor(Pred: !UseOutlineAtomics, Types: {{s32, p0}, {s64, p0}})
1091 .libcallFor(Pred: UseOutlineAtomics,
1092 Types: {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
1093 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64);
1094
1095 // Do not outline these atomics operations, as per comment in
1096 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
1097 getActionDefinitionsBuilder(
1098 Opcodes: {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
1099 .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s32, s64}), P1: typeIs(TypeIdx: 1, TypesInit: p0)))
1100 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: s64);
1101
1102 getActionDefinitionsBuilder(Opcode: G_BLOCK_ADDR).legalFor(Types: {p0});
1103
1104 // Merge/Unmerge
1105 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
1106 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
1107 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
1108 getActionDefinitionsBuilder(Opcode: Op)
1109 .widenScalarToNextPow2(TypeIdx: LitTyIdx, MinSize: 8)
1110 .widenScalarToNextPow2(TypeIdx: BigTyIdx, MinSize: 32)
1111 .clampScalar(TypeIdx: LitTyIdx, MinTy: s8, MaxTy: s64)
1112 .clampScalar(TypeIdx: BigTyIdx, MinTy: s32, MaxTy: s128)
1113 .legalIf(Predicate: [=](const LegalityQuery &Q) {
1114 switch (Q.Types[BigTyIdx].getSizeInBits()) {
1115 case 32:
1116 case 64:
1117 case 128:
1118 break;
1119 default:
1120 return false;
1121 }
1122 switch (Q.Types[LitTyIdx].getSizeInBits()) {
1123 case 8:
1124 case 16:
1125 case 32:
1126 case 64:
1127 return true;
1128 default:
1129 return false;
1130 }
1131 });
1132 }
1133
1134 // TODO : nxv4s16, nxv2s16, nxv2s32
1135 getActionDefinitionsBuilder(Opcode: G_EXTRACT_VECTOR_ELT)
1136 .legalFor(Pred: HasSVE, Types: {{s16, nxv16s8, s64},
1137 {s16, nxv8s16, s64},
1138 {s32, nxv4s32, s64},
1139 {s64, nxv2s64, s64}})
1140 .unsupportedIf(Predicate: [=](const LegalityQuery &Query) {
1141 const LLT &EltTy = Query.Types[1].getElementType();
1142 if (Query.Types[1].isScalableVector())
1143 return false;
1144 return Query.Types[0] != EltTy;
1145 })
1146 .minScalar(TypeIdx: 2, Ty: s64)
1147 .customIf(Predicate: [=](const LegalityQuery &Query) {
1148 const LLT &VecTy = Query.Types[1];
1149 return VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s16 ||
1150 VecTy == v4s16 || VecTy == v8s16 || VecTy == v2s32 ||
1151 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2p0;
1152 })
1153 .minScalarOrEltIf(
1154 Predicate: [=](const LegalityQuery &Query) {
1155 // We want to promote to <M x s1> to <M x s64> if that wouldn't
1156 // cause the total vec size to be > 128b.
1157 return Query.Types[1].isFixedVector() &&
1158 Query.Types[1].getNumElements() <= 2;
1159 },
1160 TypeIdx: 0, Ty: s64)
1161 .minScalarOrEltIf(
1162 Predicate: [=](const LegalityQuery &Query) {
1163 return Query.Types[1].isFixedVector() &&
1164 Query.Types[1].getNumElements() <= 4;
1165 },
1166 TypeIdx: 0, Ty: s32)
1167 .minScalarOrEltIf(
1168 Predicate: [=](const LegalityQuery &Query) {
1169 return Query.Types[1].isFixedVector() &&
1170 Query.Types[1].getNumElements() <= 8;
1171 },
1172 TypeIdx: 0, Ty: s16)
1173 .minScalarOrEltIf(
1174 Predicate: [=](const LegalityQuery &Query) {
1175 return Query.Types[1].isFixedVector() &&
1176 Query.Types[1].getNumElements() <= 16;
1177 },
1178 TypeIdx: 0, Ty: s8)
1179 .minScalarOrElt(TypeIdx: 0, Ty: s8) // Worst case, we need at least s8.
1180 .moreElementsToNextPow2(TypeIdx: 1)
1181 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1182 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1183 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1184 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 16)
1185 .clampMaxNumElements(TypeIdx: 1, EltTy: p0, MaxElements: 2)
1186 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 1, Size: 64), TypeIdx: 1);
1187
1188 getActionDefinitionsBuilder(Opcode: G_INSERT_VECTOR_ELT)
1189 .legalIf(
1190 Predicate: typeInSet(TypeIdx: 0, TypesInit: {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64, v2p0}))
1191 .legalFor(Pred: HasSVE, Types: {{nxv16s8, s32, s64},
1192 {nxv8s16, s32, s64},
1193 {nxv4s32, s32, s64},
1194 {nxv2s64, s64, s64}})
1195 .moreElementsToNextPow2(TypeIdx: 0)
1196 .widenVectorEltsToVectorMinSize(TypeIdx: 0, VectorSize: 64)
1197 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
1198 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
1199 .clampNumElements(TypeIdx: 0, MinTy: v2s32, MaxTy: v4s32)
1200 .clampMaxNumElements(TypeIdx: 0, EltTy: s64, MaxElements: 2)
1201 .clampMaxNumElements(TypeIdx: 0, EltTy: p0, MaxElements: 2)
1202 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0);
1203
1204 getActionDefinitionsBuilder(Opcode: G_BUILD_VECTOR)
1205 .legalFor(Types: {{v8s8, s8},
1206 {v16s8, s8},
1207 {v4s16, s16},
1208 {v8s16, s16},
1209 {v2s32, s32},
1210 {v4s32, s32},
1211 {v2s64, s64},
1212 {v2p0, p0}})
1213 .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: v4s32)
1214 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
1215 .minScalarOrElt(TypeIdx: 0, Ty: s8)
1216 .widenVectorEltsToVectorMinSize(TypeIdx: 0, VectorSize: 64)
1217 .widenScalarOrEltToNextPow2(TypeIdx: 0)
1218 .minScalarSameAs(TypeIdx: 1, LargeTypeIdx: 0);
1219
1220 getActionDefinitionsBuilder(Opcode: G_BUILD_VECTOR_TRUNC).lower();
1221
1222 getActionDefinitionsBuilder(Opcode: G_SHUFFLE_VECTOR)
1223 .legalIf(Predicate: [=](const LegalityQuery &Query) {
1224 const LLT &DstTy = Query.Types[0];
1225 const LLT &SrcTy = Query.Types[1];
1226 // For now just support the TBL2 variant which needs the source vectors
1227 // to be the same size as the dest.
1228 if (DstTy != SrcTy)
1229 return false;
1230 return llvm::is_contained(
1231 Set: {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, Element: DstTy);
1232 })
1233 .moreElementsIf(
1234 Predicate: [](const LegalityQuery &Query) {
1235 return Query.Types[0].getNumElements() >
1236 Query.Types[1].getNumElements();
1237 },
1238 Mutation: changeTo(TypeIdx: 1, FromTypeIdx: 0))
1239 .moreElementsToNextPow2(TypeIdx: 0)
1240 .moreElementsIf(
1241 Predicate: [](const LegalityQuery &Query) {
1242 return Query.Types[0].getNumElements() <
1243 Query.Types[1].getNumElements();
1244 },
1245 Mutation: changeTo(TypeIdx: 0, FromTypeIdx: 1))
1246 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 0, MinSize: 8)
1247 .clampNumElements(TypeIdx: 0, MinTy: v8s8, MaxTy: v16s8)
1248 .clampNumElements(TypeIdx: 0, MinTy: v4s16, MaxTy: v8s16)
1249 .clampNumElements(TypeIdx: 0, MinTy: v4s32, MaxTy: v4s32)
1250 .clampNumElements(TypeIdx: 0, MinTy: v2s64, MaxTy: v2s64)
1251 .scalarizeIf(Predicate: scalarOrEltWiderThan(TypeIdx: 0, Size: 64), TypeIdx: 0)
1252 .bitcastIf(Predicate: isPointerVector(TypeIdx: 0), Mutation: [=](const LegalityQuery &Query) {
1253 // Bitcast pointers vector to i64.
1254 const LLT DstTy = Query.Types[0];
1255 return std::pair(0, LLT::vector(EC: DstTy.getElementCount(), ScalarSizeInBits: 64));
1256 });
1257
1258 getActionDefinitionsBuilder(Opcode: G_CONCAT_VECTORS)
1259 .legalFor(Types: {{v16s8, v8s8}, {v8s16, v4s16}, {v4s32, v2s32}})
1260 .bitcastIf(
1261 Predicate: [=](const LegalityQuery &Query) {
1262 return Query.Types[0].isFixedVector() &&
1263 Query.Types[1].isFixedVector() &&
1264 Query.Types[0].getSizeInBits() <= 128 &&
1265 Query.Types[1].getSizeInBits() <= 64;
1266 },
1267 Mutation: [=](const LegalityQuery &Query) {
1268 const LLT DstTy = Query.Types[0];
1269 const LLT SrcTy = Query.Types[1];
1270 return std::pair(
1271 0, DstTy.changeElementSize(NewEltSize: SrcTy.getSizeInBits())
1272 .changeElementCount(
1273 EC: DstTy.getElementCount().divideCoefficientBy(
1274 RHS: SrcTy.getNumElements())));
1275 });
1276
1277 getActionDefinitionsBuilder(Opcode: G_EXTRACT_SUBVECTOR)
1278 .legalFor(Types: {{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1279 .widenScalarOrEltToNextPow2(TypeIdx: 0)
1280 .immIdx(ImmIdx: 0); // Inform verifier imm idx 0 is handled.
1281
1282 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1283 getActionDefinitionsBuilder(Opcode: G_SPLAT_VECTOR)
1284 .legalFor(Pred: HasSVE, Types: {{nxv4s32, s32}, {nxv2s64, s64}});
1285
1286 getActionDefinitionsBuilder(Opcode: G_JUMP_TABLE).legalFor(Types: {p0});
1287
1288 getActionDefinitionsBuilder(Opcode: G_BRJT).legalFor(Types: {{p0, s64}});
1289
1290 getActionDefinitionsBuilder(Opcodes: {G_TRAP, G_DEBUGTRAP, G_UBSANTRAP}).alwaysLegal();
1291
1292 getActionDefinitionsBuilder(Opcode: G_DYN_STACKALLOC).custom();
1293
1294 getActionDefinitionsBuilder(Opcodes: {G_STACKSAVE, G_STACKRESTORE}).lower();
1295
1296 if (ST.hasMOPS()) {
1297 // G_BZERO is not supported. Currently it is only emitted by
1298 // PreLegalizerCombiner for G_MEMSET with zero constant.
1299 getActionDefinitionsBuilder(Opcode: G_BZERO).unsupported();
1300
1301 getActionDefinitionsBuilder(Opcode: G_MEMSET)
1302 .legalForCartesianProduct(Types0: {p0}, Types1: {s64}, Types2: {s64})
1303 .customForCartesianProduct(Types0: {p0}, Types1: {s8}, Types2: {s64})
1304 .immIdx(ImmIdx: 0); // Inform verifier imm idx 0 is handled.
1305
1306 getActionDefinitionsBuilder(Opcodes: {G_MEMCPY, G_MEMMOVE})
1307 .legalForCartesianProduct(Types0: {p0}, Types1: {p0}, Types2: {s64})
1308 .immIdx(ImmIdx: 0); // Inform verifier imm idx 0 is handled.
1309
1310 // G_MEMCPY_INLINE does not have a tailcall immediate
1311 getActionDefinitionsBuilder(Opcode: G_MEMCPY_INLINE)
1312 .legalForCartesianProduct(Types0: {p0}, Types1: {p0}, Types2: {s64});
1313
1314 } else {
1315 getActionDefinitionsBuilder(Opcodes: {G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1316 .libcall();
1317 }
1318
1319 // For fadd reductions we have pairwise operations available. We treat the
1320 // usual legal types as legal and handle the lowering to pairwise instructions
1321 // later.
1322 getActionDefinitionsBuilder(Opcode: G_VECREDUCE_FADD)
1323 .legalFor(Types: {{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1324 .legalFor(Pred: HasFP16, Types: {{s16, v4s16}, {s16, v8s16}})
1325 .minScalarOrElt(TypeIdx: 0, Ty: MinFPScalar)
1326 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1327 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1328 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1329 .moreElementsToNextPow2(TypeIdx: 1)
1330 .scalarize(TypeIdx: 1)
1331 .lower();
1332
1333 // For fmul reductions we need to split up into individual operations. We
1334 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1335 // smaller types, followed by scalarizing what remains.
1336 getActionDefinitionsBuilder(Opcode: G_VECREDUCE_FMUL)
1337 .minScalarOrElt(TypeIdx: 0, Ty: MinFPScalar)
1338 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1339 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1340 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1341 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 2)
1342 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 4)
1343 .scalarize(TypeIdx: 1)
1344 .lower();
1345
1346 getActionDefinitionsBuilder(Opcodes: {G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1347 .scalarize(TypeIdx: 2)
1348 .lower();
1349
1350 getActionDefinitionsBuilder(Opcode: G_VECREDUCE_ADD)
1351 .legalFor(Types: {{s8, v8s8},
1352 {s8, v16s8},
1353 {s16, v4s16},
1354 {s16, v8s16},
1355 {s32, v2s32},
1356 {s32, v4s32},
1357 {s64, v2s64}})
1358 .moreElementsToNextPow2(TypeIdx: 1)
1359 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1360 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1361 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1362 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 16)
1363 .widenVectorEltsToVectorMinSize(TypeIdx: 1, VectorSize: 64)
1364 .scalarize(TypeIdx: 1);
1365
1366 getActionDefinitionsBuilder(Opcodes: {G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1367 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1368 .legalFor(Types: {{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1369 .legalFor(Pred: HasFP16, Types: {{s16, v4s16}, {s16, v8s16}})
1370 .minScalarOrElt(TypeIdx: 0, Ty: MinFPScalar)
1371 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1372 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1373 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1374 .scalarize(TypeIdx: 1)
1375 .lower();
1376
1377 getActionDefinitionsBuilder(Opcode: G_VECREDUCE_MUL)
1378 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 2)
1379 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 4)
1380 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 8)
1381 .scalarize(TypeIdx: 1)
1382 .lower();
1383
1384 getActionDefinitionsBuilder(
1385 Opcodes: {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1386 .legalFor(Types: {{s8, v8s8},
1387 {s8, v16s8},
1388 {s16, v4s16},
1389 {s16, v8s16},
1390 {s32, v2s32},
1391 {s32, v4s32}})
1392 .moreElementsIf(
1393 Predicate: [=](const LegalityQuery &Query) {
1394 return Query.Types[1].isVector() &&
1395 Query.Types[1].getElementType() != s8 &&
1396 Query.Types[1].getNumElements() & 1;
1397 },
1398 Mutation: LegalizeMutations::moreElementsToNextPow2(TypeIdx: 1))
1399 .clampMaxNumElements(TypeIdx: 1, EltTy: s64, MaxElements: 2)
1400 .clampMaxNumElements(TypeIdx: 1, EltTy: s32, MaxElements: 4)
1401 .clampMaxNumElements(TypeIdx: 1, EltTy: s16, MaxElements: 8)
1402 .clampMaxNumElements(TypeIdx: 1, EltTy: s8, MaxElements: 16)
1403 .scalarize(TypeIdx: 1)
1404 .lower();
1405
1406 getActionDefinitionsBuilder(
1407 Opcodes: {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1408 // Try to break down into smaller vectors as long as they're at least 64
1409 // bits. This lets us use vector operations for some parts of the
1410 // reduction.
1411 .fewerElementsIf(
1412 Predicate: [=](const LegalityQuery &Q) {
1413 LLT SrcTy = Q.Types[1];
1414 if (SrcTy.isScalar())
1415 return false;
1416 if (!isPowerOf2_32(Value: SrcTy.getNumElements()))
1417 return false;
1418 // We can usually perform 64b vector operations.
1419 return SrcTy.getSizeInBits() > 64;
1420 },
1421 Mutation: [=](const LegalityQuery &Q) {
1422 LLT SrcTy = Q.Types[1];
1423 return std::make_pair(x: 1, y: SrcTy.divide(Factor: 2));
1424 })
1425 .scalarize(TypeIdx: 1)
1426 .lower();
1427
1428 // TODO: Update this to correct handling when adding AArch64/SVE support.
1429 getActionDefinitionsBuilder(Opcode: G_VECTOR_COMPRESS).lower();
1430
1431 // Access to floating-point environment.
1432 getActionDefinitionsBuilder(Opcodes: {G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1433 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1434 .libcall();
1435
1436 getActionDefinitionsBuilder(Opcode: G_IS_FPCLASS).lower();
1437
1438 getActionDefinitionsBuilder(Opcode: G_PREFETCH).custom();
1439
1440 getActionDefinitionsBuilder(Opcodes: {G_SCMP, G_UCMP}).lower();
1441
1442 getLegacyLegalizerInfo().computeTables();
1443 verify(MII: *ST.getInstrInfo());
1444}
1445
1446bool AArch64LegalizerInfo::legalizeCustom(
1447 LegalizerHelper &Helper, MachineInstr &MI,
1448 LostDebugLocObserver &LocObserver) const {
1449 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1450 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1451 GISelChangeObserver &Observer = Helper.Observer;
1452 switch (MI.getOpcode()) {
1453 default:
1454 // No idea what to do.
1455 return false;
1456 case TargetOpcode::G_VAARG:
1457 return legalizeVaArg(MI, MRI, MIRBuilder);
1458 case TargetOpcode::G_LOAD:
1459 case TargetOpcode::G_STORE:
1460 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1461 case TargetOpcode::G_SHL:
1462 case TargetOpcode::G_ASHR:
1463 case TargetOpcode::G_LSHR:
1464 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1465 case TargetOpcode::G_GLOBAL_VALUE:
1466 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1467 case TargetOpcode::G_SBFX:
1468 case TargetOpcode::G_UBFX:
1469 return legalizeBitfieldExtract(MI, MRI, Helper);
1470 case TargetOpcode::G_FSHL:
1471 case TargetOpcode::G_FSHR:
1472 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1473 case TargetOpcode::G_ROTR:
1474 return legalizeRotate(MI, MRI, Helper);
1475 case TargetOpcode::G_CTPOP:
1476 return legalizeCTPOP(MI, MRI, Helper);
1477 case TargetOpcode::G_ATOMIC_CMPXCHG:
1478 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1479 case TargetOpcode::G_CTTZ:
1480 return legalizeCTTZ(MI, Helper);
1481 case TargetOpcode::G_BZERO:
1482 case TargetOpcode::G_MEMCPY:
1483 case TargetOpcode::G_MEMMOVE:
1484 case TargetOpcode::G_MEMSET:
1485 return legalizeMemOps(MI, Helper);
1486 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1487 return legalizeExtractVectorElt(MI, MRI, Helper);
1488 case TargetOpcode::G_DYN_STACKALLOC:
1489 return legalizeDynStackAlloc(MI, Helper);
1490 case TargetOpcode::G_PREFETCH:
1491 return legalizePrefetch(MI, Helper);
1492 case TargetOpcode::G_ABS:
1493 return Helper.lowerAbsToCNeg(MI);
1494 case TargetOpcode::G_ICMP:
1495 return legalizeICMP(MI, MRI, MIRBuilder);
1496 case TargetOpcode::G_BITCAST:
1497 return legalizeBitcast(MI, Helper);
1498 case TargetOpcode::G_FPTRUNC:
1499 // In order to lower f16 to f64 properly, we need to use f32 as an
1500 // intermediary
1501 return legalizeFptrunc(MI, MIRBuilder, MRI);
1502 }
1503
1504 llvm_unreachable("expected switch to return");
1505}
1506
1507bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,
1508 LegalizerHelper &Helper) const {
1509 assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");
1510 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
1511 // We're trying to handle casts from i1 vectors to scalars but reloading from
1512 // stack.
1513 if (!DstTy.isScalar() || !SrcTy.isVector() ||
1514 SrcTy.getElementType() != LLT::scalar(SizeInBits: 1))
1515 return false;
1516
1517 Helper.createStackStoreLoad(Res: DstReg, Val: SrcReg);
1518 MI.eraseFromParent();
1519 return true;
1520}
1521
1522bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1523 MachineRegisterInfo &MRI,
1524 MachineIRBuilder &MIRBuilder,
1525 GISelChangeObserver &Observer,
1526 LegalizerHelper &Helper) const {
1527 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1528 MI.getOpcode() == TargetOpcode::G_FSHR);
1529
1530 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1531 // lowering
1532 Register ShiftNo = MI.getOperand(i: 3).getReg();
1533 LLT ShiftTy = MRI.getType(Reg: ShiftNo);
1534 auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ShiftNo, MRI);
1535
1536 // Adjust shift amount according to Opcode (FSHL/FSHR)
1537 // Convert FSHL to FSHR
1538 LLT OperationTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1539 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1540
1541 // Lower non-constant shifts and leave zero shifts to the optimizer.
1542 if (!VRegAndVal || VRegAndVal->Value.urem(RHS: BitWidth) == 0)
1543 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1544 LegalizerHelper::LegalizeResult::Legalized);
1545
1546 APInt Amount = VRegAndVal->Value.urem(RHS: BitWidth);
1547
1548 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1549
1550 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1551 // in the range of 0 <-> BitWidth, it is legal
1552 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1553 VRegAndVal->Value.ult(RHS: BitWidth))
1554 return true;
1555
1556 // Cast the ShiftNumber to a 64-bit type
1557 auto Cast64 = MIRBuilder.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: Amount.zext(width: 64));
1558
1559 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1560 Observer.changingInstr(MI);
1561 MI.getOperand(i: 3).setReg(Cast64.getReg(Idx: 0));
1562 Observer.changedInstr(MI);
1563 }
1564 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1565 // instruction
1566 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1567 MIRBuilder.buildInstr(Opc: TargetOpcode::G_FSHR, DstOps: {MI.getOperand(i: 0).getReg()},
1568 SrcOps: {MI.getOperand(i: 1).getReg(), MI.getOperand(i: 2).getReg(),
1569 Cast64.getReg(Idx: 0)});
1570 MI.eraseFromParent();
1571 }
1572 return true;
1573}
1574
1575bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1576 MachineRegisterInfo &MRI,
1577 MachineIRBuilder &MIRBuilder) const {
1578 Register DstReg = MI.getOperand(i: 0).getReg();
1579 Register SrcReg1 = MI.getOperand(i: 2).getReg();
1580 Register SrcReg2 = MI.getOperand(i: 3).getReg();
1581 LLT DstTy = MRI.getType(Reg: DstReg);
1582 LLT SrcTy = MRI.getType(Reg: SrcReg1);
1583
1584 // Check the vector types are legal
1585 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1586 DstTy.getNumElements() != SrcTy.getNumElements() ||
1587 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1588 return false;
1589
1590 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1591 // following passes
1592 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(i: 1).getPredicate();
1593 if (Pred != CmpInst::ICMP_NE)
1594 return true;
1595 Register CmpReg =
1596 MIRBuilder
1597 .buildICmp(Pred: CmpInst::ICMP_EQ, Res: MRI.getType(Reg: DstReg), Op0: SrcReg1, Op1: SrcReg2)
1598 .getReg(Idx: 0);
1599 MIRBuilder.buildNot(Dst: DstReg, Src0: CmpReg);
1600
1601 MI.eraseFromParent();
1602 return true;
1603}
1604
1605bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1606 MachineRegisterInfo &MRI,
1607 LegalizerHelper &Helper) const {
1608 // To allow for imported patterns to match, we ensure that the rotate amount
1609 // is 64b with an extension.
1610 Register AmtReg = MI.getOperand(i: 2).getReg();
1611 LLT AmtTy = MRI.getType(Reg: AmtReg);
1612 (void)AmtTy;
1613 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1614 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1615 auto NewAmt = Helper.MIRBuilder.buildZExt(Res: LLT::scalar(SizeInBits: 64), Op: AmtReg);
1616 Helper.Observer.changingInstr(MI);
1617 MI.getOperand(i: 2).setReg(NewAmt.getReg(Idx: 0));
1618 Helper.Observer.changedInstr(MI);
1619 return true;
1620}
1621
1622bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1623 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
1624 GISelChangeObserver &Observer) const {
1625 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1626 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1627 // G_ADD_LOW instructions.
1628 // By splitting this here, we can optimize accesses in the small code model by
1629 // folding in the G_ADD_LOW into the load/store offset.
1630 auto &GlobalOp = MI.getOperand(i: 1);
1631 // Don't modify an intrinsic call.
1632 if (GlobalOp.isSymbol())
1633 return true;
1634 const auto* GV = GlobalOp.getGlobal();
1635 if (GV->isThreadLocal())
1636 return true; // Don't want to modify TLS vars.
1637
1638 auto &TM = ST->getTargetLowering()->getTargetMachine();
1639 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1640
1641 if (OpFlags & AArch64II::MO_GOT)
1642 return true;
1643
1644 auto Offset = GlobalOp.getOffset();
1645 Register DstReg = MI.getOperand(i: 0).getReg();
1646 auto ADRP = MIRBuilder.buildInstr(Opc: AArch64::ADRP, DstOps: {LLT::pointer(AddressSpace: 0, SizeInBits: 64)}, SrcOps: {})
1647 .addGlobalAddress(GV, Offset, TargetFlags: OpFlags | AArch64II::MO_PAGE);
1648 // Set the regclass on the dest reg too.
1649 MRI.setRegClass(Reg: ADRP.getReg(Idx: 0), RC: &AArch64::GPR64RegClass);
1650
1651 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1652 // by creating a MOVK that sets bits 48-63 of the register to (global address
1653 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1654 // prevent an incorrect tag being generated during relocation when the
1655 // global appears before the code section. Without the offset, a global at
1656 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1657 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1658 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1659 // instead of `0xf`.
1660 // This assumes that we're in the small code model so we can assume a binary
1661 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1662 // binary must also be loaded into address range [0, 2^48). Both of these
1663 // properties need to be ensured at runtime when using tagged addresses.
1664 if (OpFlags & AArch64II::MO_TAGGED) {
1665 assert(!Offset &&
1666 "Should not have folded in an offset for a tagged global!");
1667 ADRP = MIRBuilder.buildInstr(Opc: AArch64::MOVKXi, DstOps: {LLT::pointer(AddressSpace: 0, SizeInBits: 64)}, SrcOps: {ADRP})
1668 .addGlobalAddress(GV, Offset: 0x100000000,
1669 TargetFlags: AArch64II::MO_PREL | AArch64II::MO_G3)
1670 .addImm(Val: 48);
1671 MRI.setRegClass(Reg: ADRP.getReg(Idx: 0), RC: &AArch64::GPR64RegClass);
1672 }
1673
1674 MIRBuilder.buildInstr(Opc: AArch64::G_ADD_LOW, DstOps: {DstReg}, SrcOps: {ADRP})
1675 .addGlobalAddress(GV, Offset,
1676 TargetFlags: OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1677 MI.eraseFromParent();
1678 return true;
1679}
1680
1681bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
1682 MachineInstr &MI) const {
1683 MachineIRBuilder &MIB = Helper.MIRBuilder;
1684 MachineRegisterInfo &MRI = *MIB.getMRI();
1685
1686 auto LowerUnaryOp = [&MI, &MIB](unsigned Opcode) {
1687 MIB.buildInstr(Opc: Opcode, DstOps: {MI.getOperand(i: 0)}, SrcOps: {MI.getOperand(i: 2)});
1688 MI.eraseFromParent();
1689 return true;
1690 };
1691 auto LowerBinOp = [&MI, &MIB](unsigned Opcode) {
1692 MIB.buildInstr(Opc: Opcode, DstOps: {MI.getOperand(i: 0)},
1693 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3)});
1694 MI.eraseFromParent();
1695 return true;
1696 };
1697 auto LowerTriOp = [&MI, &MIB](unsigned Opcode) {
1698 MIB.buildInstr(Opc: Opcode, DstOps: {MI.getOperand(i: 0)},
1699 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3), MI.getOperand(i: 4)});
1700 MI.eraseFromParent();
1701 return true;
1702 };
1703
1704 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(Val&: MI).getIntrinsicID();
1705 switch (IntrinsicID) {
1706 case Intrinsic::vacopy: {
1707 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1708 unsigned VaListSize =
1709 (ST->isTargetDarwin() || ST->isTargetWindows())
1710 ? PtrSize
1711 : ST->isTargetILP32() ? 20 : 32;
1712
1713 MachineFunction &MF = *MI.getMF();
1714 auto Val = MF.getRegInfo().createGenericVirtualRegister(
1715 Ty: LLT::scalar(SizeInBits: VaListSize * 8));
1716 MIB.buildLoad(Res: Val, Addr: MI.getOperand(i: 2),
1717 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(),
1718 F: MachineMemOperand::MOLoad,
1719 Size: VaListSize, BaseAlignment: Align(PtrSize)));
1720 MIB.buildStore(Val, Addr: MI.getOperand(i: 1),
1721 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(),
1722 F: MachineMemOperand::MOStore,
1723 Size: VaListSize, BaseAlignment: Align(PtrSize)));
1724 MI.eraseFromParent();
1725 return true;
1726 }
1727 case Intrinsic::get_dynamic_area_offset: {
1728 MIB.buildConstant(Res: MI.getOperand(i: 0).getReg(), Val: 0);
1729 MI.eraseFromParent();
1730 return true;
1731 }
1732 case Intrinsic::aarch64_mops_memset_tag: {
1733 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1734 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1735 // the instruction).
1736 auto &Value = MI.getOperand(i: 3);
1737 Register ExtValueReg = MIB.buildAnyExt(Res: LLT::scalar(SizeInBits: 64), Op: Value).getReg(Idx: 0);
1738 Value.setReg(ExtValueReg);
1739 return true;
1740 }
1741 case Intrinsic::aarch64_prefetch: {
1742 auto &AddrVal = MI.getOperand(i: 1);
1743
1744 int64_t IsWrite = MI.getOperand(i: 2).getImm();
1745 int64_t Target = MI.getOperand(i: 3).getImm();
1746 int64_t IsStream = MI.getOperand(i: 4).getImm();
1747 int64_t IsData = MI.getOperand(i: 5).getImm();
1748
1749 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1750 (!IsData << 3) | // IsDataCache bit
1751 (Target << 1) | // Cache level bits
1752 (unsigned)IsStream; // Stream bit
1753
1754 MIB.buildInstr(Opcode: AArch64::G_AARCH64_PREFETCH).addImm(Val: PrfOp).add(MO: AddrVal);
1755 MI.eraseFromParent();
1756 return true;
1757 }
1758 case Intrinsic::aarch64_range_prefetch: {
1759 auto &AddrVal = MI.getOperand(i: 1);
1760
1761 int64_t IsWrite = MI.getOperand(i: 2).getImm();
1762 int64_t IsStream = MI.getOperand(i: 3).getImm();
1763 unsigned PrfOp = (IsStream << 2) | IsWrite;
1764
1765 MIB.buildInstr(Opcode: AArch64::G_AARCH64_RANGE_PREFETCH)
1766 .addImm(Val: PrfOp)
1767 .add(MO: AddrVal)
1768 .addUse(RegNo: MI.getOperand(i: 4).getReg()); // Metadata
1769 MI.eraseFromParent();
1770 return true;
1771 }
1772 case Intrinsic::aarch64_neon_uaddv:
1773 case Intrinsic::aarch64_neon_saddv:
1774 case Intrinsic::aarch64_neon_umaxv:
1775 case Intrinsic::aarch64_neon_smaxv:
1776 case Intrinsic::aarch64_neon_uminv:
1777 case Intrinsic::aarch64_neon_sminv: {
1778 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1779 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1780 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1781
1782 auto OldDst = MI.getOperand(i: 0).getReg();
1783 auto OldDstTy = MRI.getType(Reg: OldDst);
1784 LLT NewDstTy = MRI.getType(Reg: MI.getOperand(i: 2).getReg()).getElementType();
1785 if (OldDstTy == NewDstTy)
1786 return true;
1787
1788 auto NewDst = MRI.createGenericVirtualRegister(Ty: NewDstTy);
1789
1790 Helper.Observer.changingInstr(MI);
1791 MI.getOperand(i: 0).setReg(NewDst);
1792 Helper.Observer.changedInstr(MI);
1793
1794 MIB.setInsertPt(MBB&: MIB.getMBB(), II: ++MIB.getInsertPt());
1795 MIB.buildExtOrTrunc(ExtOpc: IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1796 Res: OldDst, Op: NewDst);
1797
1798 return true;
1799 }
1800 case Intrinsic::aarch64_neon_uaddlp:
1801 case Intrinsic::aarch64_neon_saddlp: {
1802 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1803 ? AArch64::G_UADDLP
1804 : AArch64::G_SADDLP;
1805 MIB.buildInstr(Opc, DstOps: {MI.getOperand(i: 0)}, SrcOps: {MI.getOperand(i: 2)});
1806 MI.eraseFromParent();
1807
1808 return true;
1809 }
1810 case Intrinsic::aarch64_neon_uaddlv:
1811 case Intrinsic::aarch64_neon_saddlv: {
1812 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1813 ? AArch64::G_UADDLV
1814 : AArch64::G_SADDLV;
1815 Register DstReg = MI.getOperand(i: 0).getReg();
1816 Register SrcReg = MI.getOperand(i: 2).getReg();
1817 LLT DstTy = MRI.getType(Reg: DstReg);
1818
1819 LLT MidTy, ExtTy;
1820 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1821 MidTy = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32);
1822 ExtTy = LLT::scalar(SizeInBits: 32);
1823 } else {
1824 MidTy = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64);
1825 ExtTy = LLT::scalar(SizeInBits: 64);
1826 }
1827
1828 Register MidReg =
1829 MIB.buildInstr(Opc, DstOps: {MidTy}, SrcOps: {SrcReg})->getOperand(i: 0).getReg();
1830 Register ZeroReg =
1831 MIB.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: 0)->getOperand(i: 0).getReg();
1832 Register ExtReg = MIB.buildInstr(Opc: AArch64::G_EXTRACT_VECTOR_ELT, DstOps: {ExtTy},
1833 SrcOps: {MidReg, ZeroReg})
1834 .getReg(Idx: 0);
1835
1836 if (DstTy.getScalarSizeInBits() < 32)
1837 MIB.buildTrunc(Res: DstReg, Op: ExtReg);
1838 else
1839 MIB.buildCopy(Res: DstReg, Op: ExtReg);
1840
1841 MI.eraseFromParent();
1842
1843 return true;
1844 }
1845 case Intrinsic::aarch64_neon_smax:
1846 return LowerBinOp(TargetOpcode::G_SMAX);
1847 case Intrinsic::aarch64_neon_smin:
1848 return LowerBinOp(TargetOpcode::G_SMIN);
1849 case Intrinsic::aarch64_neon_umax:
1850 return LowerBinOp(TargetOpcode::G_UMAX);
1851 case Intrinsic::aarch64_neon_umin:
1852 return LowerBinOp(TargetOpcode::G_UMIN);
1853 case Intrinsic::aarch64_neon_fmax:
1854 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1855 case Intrinsic::aarch64_neon_fmin:
1856 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1857 case Intrinsic::aarch64_neon_fmaxnm:
1858 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1859 case Intrinsic::aarch64_neon_fminnm:
1860 return LowerBinOp(TargetOpcode::G_FMINNUM);
1861 case Intrinsic::aarch64_neon_pmull:
1862 case Intrinsic::aarch64_neon_pmull64:
1863 return LowerBinOp(AArch64::G_PMULL);
1864 case Intrinsic::aarch64_neon_smull:
1865 return LowerBinOp(AArch64::G_SMULL);
1866 case Intrinsic::aarch64_neon_umull:
1867 return LowerBinOp(AArch64::G_UMULL);
1868 case Intrinsic::aarch64_neon_sabd:
1869 return LowerBinOp(TargetOpcode::G_ABDS);
1870 case Intrinsic::aarch64_neon_uabd:
1871 return LowerBinOp(TargetOpcode::G_ABDU);
1872 case Intrinsic::aarch64_neon_uhadd:
1873 return LowerBinOp(TargetOpcode::G_UAVGFLOOR);
1874 case Intrinsic::aarch64_neon_urhadd:
1875 return LowerBinOp(TargetOpcode::G_UAVGCEIL);
1876 case Intrinsic::aarch64_neon_shadd:
1877 return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
1878 case Intrinsic::aarch64_neon_srhadd:
1879 return LowerBinOp(TargetOpcode::G_SAVGCEIL);
1880 case Intrinsic::aarch64_neon_sqshrn: {
1881 if (!MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1882 return true;
1883 // Create right shift instruction. Store the output register in Shr.
1884 auto Shr = MIB.buildInstr(Opc: AArch64::G_VASHR,
1885 DstOps: {MRI.getType(Reg: MI.getOperand(i: 2).getReg())},
1886 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3).getImm()});
1887 // Build the narrow intrinsic, taking in Shr.
1888 MIB.buildInstr(Opc: TargetOpcode::G_TRUNC_SSAT_S, DstOps: {MI.getOperand(i: 0)}, SrcOps: {Shr});
1889 MI.eraseFromParent();
1890 return true;
1891 }
1892 case Intrinsic::aarch64_neon_sqshrun: {
1893 if (!MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1894 return true;
1895 // Create right shift instruction. Store the output register in Shr.
1896 auto Shr = MIB.buildInstr(Opc: AArch64::G_VASHR,
1897 DstOps: {MRI.getType(Reg: MI.getOperand(i: 2).getReg())},
1898 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3).getImm()});
1899 // Build the narrow intrinsic, taking in Shr.
1900 MIB.buildInstr(Opc: TargetOpcode::G_TRUNC_SSAT_U, DstOps: {MI.getOperand(i: 0)}, SrcOps: {Shr});
1901 MI.eraseFromParent();
1902 return true;
1903 }
1904 case Intrinsic::aarch64_neon_sqrshrn: {
1905 if (!MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1906 return true;
1907 // Create right shift instruction. Store the output register in Shr.
1908 auto Shr = MIB.buildInstr(Opc: AArch64::G_SRSHR_I,
1909 DstOps: {MRI.getType(Reg: MI.getOperand(i: 2).getReg())},
1910 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3).getImm()});
1911 // Build the narrow intrinsic, taking in Shr.
1912 MIB.buildInstr(Opc: TargetOpcode::G_TRUNC_SSAT_S, DstOps: {MI.getOperand(i: 0)}, SrcOps: {Shr});
1913 MI.eraseFromParent();
1914 return true;
1915 }
1916 case Intrinsic::aarch64_neon_sqrshrun: {
1917 if (!MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1918 return true;
1919 // Create right shift instruction. Store the output register in Shr.
1920 auto Shr = MIB.buildInstr(Opc: AArch64::G_SRSHR_I,
1921 DstOps: {MRI.getType(Reg: MI.getOperand(i: 2).getReg())},
1922 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3).getImm()});
1923 // Build the narrow intrinsic, taking in Shr.
1924 MIB.buildInstr(Opc: TargetOpcode::G_TRUNC_SSAT_U, DstOps: {MI.getOperand(i: 0)}, SrcOps: {Shr});
1925 MI.eraseFromParent();
1926 return true;
1927 }
1928 case Intrinsic::aarch64_neon_uqrshrn: {
1929 if (!MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1930 return true;
1931 // Create right shift instruction. Store the output register in Shr.
1932 auto Shr = MIB.buildInstr(Opc: AArch64::G_URSHR_I,
1933 DstOps: {MRI.getType(Reg: MI.getOperand(i: 2).getReg())},
1934 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3).getImm()});
1935 // Build the narrow intrinsic, taking in Shr.
1936 MIB.buildInstr(Opc: TargetOpcode::G_TRUNC_USAT_U, DstOps: {MI.getOperand(i: 0)}, SrcOps: {Shr});
1937 MI.eraseFromParent();
1938 return true;
1939 }
1940 case Intrinsic::aarch64_neon_uqshrn: {
1941 if (!MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1942 return true;
1943 // Create right shift instruction. Store the output register in Shr.
1944 auto Shr = MIB.buildInstr(Opc: AArch64::G_VLSHR,
1945 DstOps: {MRI.getType(Reg: MI.getOperand(i: 2).getReg())},
1946 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3).getImm()});
1947 // Build the narrow intrinsic, taking in Shr.
1948 MIB.buildInstr(Opc: TargetOpcode::G_TRUNC_USAT_U, DstOps: {MI.getOperand(i: 0)}, SrcOps: {Shr});
1949 MI.eraseFromParent();
1950 return true;
1951 }
1952 case Intrinsic::aarch64_neon_sqshlu: {
1953 // Check if last operand is constant vector dup
1954 auto ShiftAmount = isConstantOrConstantSplatVector(
1955 MI&: *MRI.getVRegDef(Reg: MI.getOperand(i: 3).getReg()), MRI);
1956 if (ShiftAmount) {
1957 // If so, create a new intrinsic with the correct shift amount
1958 MIB.buildInstr(Opc: AArch64::G_SQSHLU_I, DstOps: {MI.getOperand(i: 0)},
1959 SrcOps: {MI.getOperand(i: 2)})
1960 .addImm(Val: ShiftAmount->getSExtValue());
1961 MI.eraseFromParent();
1962 return true;
1963 }
1964 return false;
1965 }
1966 case Intrinsic::aarch64_neon_vsli: {
1967 MIB.buildInstr(
1968 Opc: AArch64::G_SLI, DstOps: {MI.getOperand(i: 0)},
1969 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3), MI.getOperand(i: 4).getImm()});
1970 MI.eraseFromParent();
1971 break;
1972 }
1973 case Intrinsic::aarch64_neon_vsri: {
1974 MIB.buildInstr(
1975 Opc: AArch64::G_SRI, DstOps: {MI.getOperand(i: 0)},
1976 SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3), MI.getOperand(i: 4).getImm()});
1977 MI.eraseFromParent();
1978 break;
1979 }
1980 case Intrinsic::aarch64_neon_abs: {
1981 // Lower the intrinsic to G_ABS.
1982 MIB.buildInstr(Opc: TargetOpcode::G_ABS, DstOps: {MI.getOperand(i: 0)}, SrcOps: {MI.getOperand(i: 2)});
1983 MI.eraseFromParent();
1984 return true;
1985 }
1986 case Intrinsic::aarch64_neon_sqadd: {
1987 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1988 return LowerBinOp(TargetOpcode::G_SADDSAT);
1989 break;
1990 }
1991 case Intrinsic::aarch64_neon_sqsub: {
1992 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1993 return LowerBinOp(TargetOpcode::G_SSUBSAT);
1994 break;
1995 }
1996 case Intrinsic::aarch64_neon_uqadd: {
1997 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
1998 return LowerBinOp(TargetOpcode::G_UADDSAT);
1999 break;
2000 }
2001 case Intrinsic::aarch64_neon_uqsub: {
2002 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isVector())
2003 return LowerBinOp(TargetOpcode::G_USUBSAT);
2004 break;
2005 }
2006 case Intrinsic::aarch64_neon_udot:
2007 return LowerTriOp(AArch64::G_UDOT);
2008 case Intrinsic::aarch64_neon_sdot:
2009 return LowerTriOp(AArch64::G_SDOT);
2010 case Intrinsic::aarch64_neon_usdot:
2011 return LowerTriOp(AArch64::G_USDOT);
2012 case Intrinsic::aarch64_neon_sqxtn:
2013 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
2014 case Intrinsic::aarch64_neon_sqxtun:
2015 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_U);
2016 case Intrinsic::aarch64_neon_uqxtn:
2017 return LowerUnaryOp(TargetOpcode::G_TRUNC_USAT_U);
2018
2019 case Intrinsic::vector_reverse:
2020 // TODO: Add support for vector_reverse
2021 return false;
2022 }
2023
2024 return true;
2025}
2026
2027bool AArch64LegalizerInfo::legalizeShlAshrLshr(
2028 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
2029 GISelChangeObserver &Observer) const {
2030 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
2031 MI.getOpcode() == TargetOpcode::G_LSHR ||
2032 MI.getOpcode() == TargetOpcode::G_SHL);
2033 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
2034 // imported patterns can select it later. Either way, it will be legal.
2035 Register AmtReg = MI.getOperand(i: 2).getReg();
2036 auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: AmtReg, MRI);
2037 if (!VRegAndVal)
2038 return true;
2039 // Check the shift amount is in range for an immediate form.
2040 int64_t Amount = VRegAndVal->Value.getSExtValue();
2041 if (Amount > 31)
2042 return true; // This will have to remain a register variant.
2043 auto ExtCst = MIRBuilder.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: Amount);
2044 Observer.changingInstr(MI);
2045 MI.getOperand(i: 2).setReg(ExtCst.getReg(Idx: 0));
2046 Observer.changedInstr(MI);
2047 return true;
2048}
2049
2050static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset,
2051 MachineRegisterInfo &MRI) {
2052 Base = Root;
2053 Offset = 0;
2054
2055 Register NewBase;
2056 int64_t NewOffset;
2057 if (mi_match(R: Root, MRI, P: m_GPtrAdd(L: m_Reg(R&: NewBase), R: m_ICst(Cst&: NewOffset))) &&
2058 isShiftedInt<7, 3>(x: NewOffset)) {
2059 Base = NewBase;
2060 Offset = NewOffset;
2061 }
2062}
2063
2064// FIXME: This should be removed and replaced with the generic bitcast legalize
2065// action.
2066bool AArch64LegalizerInfo::legalizeLoadStore(
2067 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
2068 GISelChangeObserver &Observer) const {
2069 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
2070 MI.getOpcode() == TargetOpcode::G_LOAD);
2071 // Here we just try to handle vector loads/stores where our value type might
2072 // have pointer elements, which the SelectionDAG importer can't handle. To
2073 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
2074 // the value to use s64 types.
2075
2076 // Custom legalization requires the instruction, if not deleted, must be fully
2077 // legalized. In order to allow further legalization of the inst, we create
2078 // a new instruction and erase the existing one.
2079
2080 Register ValReg = MI.getOperand(i: 0).getReg();
2081 const LLT ValTy = MRI.getType(Reg: ValReg);
2082
2083 if (ValTy == LLT::scalar(SizeInBits: 128)) {
2084
2085 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
2086 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2087 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
2088 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
2089 bool IsRcpC3 =
2090 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
2091
2092 LLT s64 = LLT::scalar(SizeInBits: 64);
2093
2094 unsigned Opcode;
2095 if (IsRcpC3) {
2096 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
2097 } else {
2098 // For LSE2, loads/stores should have been converted to monotonic and had
2099 // a fence inserted after them.
2100 assert(Ordering == AtomicOrdering::Monotonic ||
2101 Ordering == AtomicOrdering::Unordered);
2102 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
2103
2104 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
2105 }
2106
2107 MachineInstrBuilder NewI;
2108 if (IsLoad) {
2109 NewI = MIRBuilder.buildInstr(Opc: Opcode, DstOps: {s64, s64}, SrcOps: {});
2110 MIRBuilder.buildMergeLikeInstr(
2111 Res: ValReg, Ops: {NewI->getOperand(i: 0), NewI->getOperand(i: 1)});
2112 } else {
2113 auto Split = MIRBuilder.buildUnmerge(Res: s64, Op: MI.getOperand(i: 0));
2114 NewI = MIRBuilder.buildInstr(
2115 Opc: Opcode, DstOps: {}, SrcOps: {Split->getOperand(i: 0), Split->getOperand(i: 1)});
2116 }
2117
2118 if (IsRcpC3) {
2119 NewI.addUse(RegNo: MI.getOperand(i: 1).getReg());
2120 } else {
2121 Register Base;
2122 int Offset;
2123 matchLDPSTPAddrMode(Root: MI.getOperand(i: 1).getReg(), Base, Offset, MRI);
2124 NewI.addUse(RegNo: Base);
2125 NewI.addImm(Val: Offset / 8);
2126 }
2127
2128 NewI.cloneMemRefs(OtherMI: MI);
2129 constrainSelectedInstRegOperands(I&: *NewI, TII: *ST->getInstrInfo(),
2130 TRI: *MRI.getTargetRegisterInfo(),
2131 RBI: *ST->getRegBankInfo());
2132 MI.eraseFromParent();
2133 return true;
2134 }
2135
2136 if (!ValTy.isPointerVector() ||
2137 ValTy.getElementType().getAddressSpace() != 0) {
2138 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
2139 return false;
2140 }
2141
2142 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
2143 const LLT NewTy = LLT::vector(EC: ValTy.getElementCount(), ScalarSizeInBits: PtrSize);
2144 auto &MMO = **MI.memoperands_begin();
2145 MMO.setType(NewTy);
2146
2147 if (MI.getOpcode() == TargetOpcode::G_STORE) {
2148 auto Bitcast = MIRBuilder.buildBitcast(Dst: NewTy, Src: ValReg);
2149 MIRBuilder.buildStore(Val: Bitcast.getReg(Idx: 0), Addr: MI.getOperand(i: 1), MMO);
2150 } else {
2151 auto NewLoad = MIRBuilder.buildLoad(Res: NewTy, Addr: MI.getOperand(i: 1), MMO);
2152 MIRBuilder.buildBitcast(Dst: ValReg, Src: NewLoad);
2153 }
2154 MI.eraseFromParent();
2155 return true;
2156}
2157
2158bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
2159 MachineRegisterInfo &MRI,
2160 MachineIRBuilder &MIRBuilder) const {
2161 MachineFunction &MF = MIRBuilder.getMF();
2162 Align Alignment(MI.getOperand(i: 2).getImm());
2163 Register Dst = MI.getOperand(i: 0).getReg();
2164 Register ListPtr = MI.getOperand(i: 1).getReg();
2165
2166 LLT PtrTy = MRI.getType(Reg: ListPtr);
2167 LLT IntPtrTy = LLT::scalar(SizeInBits: PtrTy.getSizeInBits());
2168
2169 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
2170 const Align PtrAlign = Align(PtrSize);
2171 auto List = MIRBuilder.buildLoad(
2172 Res: PtrTy, Addr: ListPtr,
2173 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOLoad,
2174 MemTy: PtrTy, base_alignment: PtrAlign));
2175
2176 MachineInstrBuilder DstPtr;
2177 if (Alignment > PtrAlign) {
2178 // Realign the list to the actual required alignment.
2179 auto AlignMinus1 =
2180 MIRBuilder.buildConstant(Res: IntPtrTy, Val: Alignment.value() - 1);
2181 auto ListTmp = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: List, Op1: AlignMinus1.getReg(Idx: 0));
2182 DstPtr = MIRBuilder.buildMaskLowPtrBits(Res: PtrTy, Op0: ListTmp, NumBits: Log2(A: Alignment));
2183 } else
2184 DstPtr = List;
2185
2186 LLT ValTy = MRI.getType(Reg: Dst);
2187 uint64_t ValSize = ValTy.getSizeInBits() / 8;
2188 MIRBuilder.buildLoad(
2189 Res: Dst, Addr: DstPtr,
2190 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOLoad,
2191 MemTy: ValTy, base_alignment: std::max(a: Alignment, b: PtrAlign)));
2192
2193 auto Size = MIRBuilder.buildConstant(Res: IntPtrTy, Val: alignTo(Size: ValSize, A: PtrAlign));
2194
2195 auto NewList = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: DstPtr, Op1: Size.getReg(Idx: 0));
2196
2197 MIRBuilder.buildStore(Val: NewList, Addr: ListPtr,
2198 MMO&: *MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(),
2199 f: MachineMemOperand::MOStore,
2200 MemTy: PtrTy, base_alignment: PtrAlign));
2201
2202 MI.eraseFromParent();
2203 return true;
2204}
2205
2206bool AArch64LegalizerInfo::legalizeBitfieldExtract(
2207 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2208 // Only legal if we can select immediate forms.
2209 // TODO: Lower this otherwise.
2210 return getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: 2).getReg(), MRI) &&
2211 getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: 3).getReg(), MRI);
2212}
2213
2214bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
2215 MachineRegisterInfo &MRI,
2216 LegalizerHelper &Helper) const {
2217 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
2218 // it can be more efficiently lowered to the following sequence that uses
2219 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
2220 // registers are cheap.
2221 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
2222 // CNT V0.8B, V0.8B // 8xbyte pop-counts
2223 // ADDV B0, V0.8B // sum 8xbyte pop-counts
2224 // UMOV X0, V0.B[0] // copy byte result back to integer reg
2225 //
2226 // For 128 bit vector popcounts, we lower to the following sequence:
2227 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
2228 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
2229 // uaddlp.4s v0, v0 // v4s32, v2s64
2230 // uaddlp.2d v0, v0 // v2s64
2231 //
2232 // For 64 bit vector popcounts, we lower to the following sequence:
2233 // cnt.8b v0, v0 // v4s16, v2s32
2234 // uaddlp.4h v0, v0 // v4s16, v2s32
2235 // uaddlp.2s v0, v0 // v2s32
2236
2237 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2238 Register Dst = MI.getOperand(i: 0).getReg();
2239 Register Val = MI.getOperand(i: 1).getReg();
2240 LLT Ty = MRI.getType(Reg: Val);
2241 unsigned Size = Ty.getSizeInBits();
2242
2243 assert(Ty == MRI.getType(Dst) &&
2244 "Expected src and dst to have the same type!");
2245
2246 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
2247 LLT s64 = LLT::scalar(SizeInBits: 64);
2248
2249 auto Split = MIRBuilder.buildUnmerge(Res: s64, Op: Val);
2250 auto CTPOP1 = MIRBuilder.buildCTPOP(Dst: s64, Src0: Split->getOperand(i: 0));
2251 auto CTPOP2 = MIRBuilder.buildCTPOP(Dst: s64, Src0: Split->getOperand(i: 1));
2252 auto Add = MIRBuilder.buildAdd(Dst: s64, Src0: CTPOP1, Src1: CTPOP2);
2253
2254 MIRBuilder.buildZExt(Res: Dst, Op: Add);
2255 MI.eraseFromParent();
2256 return true;
2257 }
2258
2259 if (!ST->hasNEON() ||
2260 MI.getMF()->getFunction().hasFnAttribute(Kind: Attribute::NoImplicitFloat)) {
2261 // Use generic lowering when custom lowering is not possible.
2262 return Ty.isScalar() && (Size == 32 || Size == 64) &&
2263 Helper.lowerBitCount(MI) ==
2264 LegalizerHelper::LegalizeResult::Legalized;
2265 }
2266
2267 // Pre-conditioning: widen Val up to the nearest vector type.
2268 // s32,s64,v4s16,v2s32 -> v8i8
2269 // v8s16,v4s32,v2s64 -> v16i8
2270 LLT VTy = Size == 128 ? LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8) : LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8);
2271 if (Ty.isScalar()) {
2272 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
2273 if (Size == 32) {
2274 Val = MIRBuilder.buildZExt(Res: LLT::scalar(SizeInBits: 64), Op: Val).getReg(Idx: 0);
2275 }
2276 }
2277 Val = MIRBuilder.buildBitcast(Dst: VTy, Src: Val).getReg(Idx: 0);
2278
2279 // Count bits in each byte-sized lane.
2280 auto CTPOP = MIRBuilder.buildCTPOP(Dst: VTy, Src0: Val);
2281
2282 // Sum across lanes.
2283
2284 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2285 Ty.getScalarSizeInBits() != 16) {
2286 LLT Dt = Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) ? LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32) : Ty;
2287 auto Zeros = MIRBuilder.buildConstant(Res: Dt, Val: 0);
2288 auto Ones = MIRBuilder.buildConstant(Res: VTy, Val: 1);
2289 MachineInstrBuilder Sum;
2290
2291 if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64)) {
2292 auto UDOT =
2293 MIRBuilder.buildInstr(Opc: AArch64::G_UDOT, DstOps: {Dt}, SrcOps: {Zeros, Ones, CTPOP});
2294 Sum = MIRBuilder.buildInstr(Opc: AArch64::G_UADDLP, DstOps: {Ty}, SrcOps: {UDOT});
2295 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) {
2296 Sum = MIRBuilder.buildInstr(Opc: AArch64::G_UDOT, DstOps: {Dt}, SrcOps: {Zeros, Ones, CTPOP});
2297 } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32)) {
2298 Sum = MIRBuilder.buildInstr(Opc: AArch64::G_UDOT, DstOps: {Dt}, SrcOps: {Zeros, Ones, CTPOP});
2299 } else {
2300 llvm_unreachable("unexpected vector shape");
2301 }
2302
2303 Sum->getOperand(i: 0).setReg(Dst);
2304 MI.eraseFromParent();
2305 return true;
2306 }
2307
2308 Register HSum = CTPOP.getReg(Idx: 0);
2309 unsigned Opc;
2310 SmallVector<LLT> HAddTys;
2311 if (Ty.isScalar()) {
2312 Opc = Intrinsic::aarch64_neon_uaddlv;
2313 HAddTys.push_back(Elt: LLT::scalar(SizeInBits: 32));
2314 } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16)) {
2315 Opc = Intrinsic::aarch64_neon_uaddlp;
2316 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16));
2317 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) {
2318 Opc = Intrinsic::aarch64_neon_uaddlp;
2319 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16));
2320 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32));
2321 } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64)) {
2322 Opc = Intrinsic::aarch64_neon_uaddlp;
2323 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16));
2324 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32));
2325 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64));
2326 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16)) {
2327 Opc = Intrinsic::aarch64_neon_uaddlp;
2328 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16));
2329 } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32)) {
2330 Opc = Intrinsic::aarch64_neon_uaddlp;
2331 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16));
2332 HAddTys.push_back(Elt: LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32));
2333 } else
2334 llvm_unreachable("unexpected vector shape");
2335 MachineInstrBuilder UADD;
2336 for (LLT HTy : HAddTys) {
2337 UADD = MIRBuilder.buildIntrinsic(ID: Opc, Res: {HTy}).addUse(RegNo: HSum);
2338 HSum = UADD.getReg(Idx: 0);
2339 }
2340
2341 // Post-conditioning.
2342 if (Ty.isScalar() && (Size == 64 || Size == 128))
2343 MIRBuilder.buildZExt(Res: Dst, Op: UADD);
2344 else
2345 UADD->getOperand(i: 0).setReg(Dst);
2346 MI.eraseFromParent();
2347 return true;
2348}
2349
2350bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2351 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2352 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2353 LLT s64 = LLT::scalar(SizeInBits: 64);
2354 auto Addr = MI.getOperand(i: 1).getReg();
2355 auto DesiredI = MIRBuilder.buildUnmerge(Res: {s64, s64}, Op: MI.getOperand(i: 2));
2356 auto NewI = MIRBuilder.buildUnmerge(Res: {s64, s64}, Op: MI.getOperand(i: 3));
2357 auto DstLo = MRI.createGenericVirtualRegister(Ty: s64);
2358 auto DstHi = MRI.createGenericVirtualRegister(Ty: s64);
2359
2360 MachineInstrBuilder CAS;
2361 if (ST->hasLSE()) {
2362 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2363 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2364 // the rest of the MIR so we must reassemble the extracted registers into a
2365 // 128-bit known-regclass one with code like this:
2366 //
2367 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2368 // %out = CASP %in1, ...
2369 // %OldLo = G_EXTRACT %out, 0
2370 // %OldHi = G_EXTRACT %out, 64
2371 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2372 unsigned Opcode;
2373 switch (Ordering) {
2374 case AtomicOrdering::Acquire:
2375 Opcode = AArch64::CASPAX;
2376 break;
2377 case AtomicOrdering::Release:
2378 Opcode = AArch64::CASPLX;
2379 break;
2380 case AtomicOrdering::AcquireRelease:
2381 case AtomicOrdering::SequentiallyConsistent:
2382 Opcode = AArch64::CASPALX;
2383 break;
2384 default:
2385 Opcode = AArch64::CASPX;
2386 break;
2387 }
2388
2389 LLT s128 = LLT::scalar(SizeInBits: 128);
2390 auto CASDst = MRI.createGenericVirtualRegister(Ty: s128);
2391 auto CASDesired = MRI.createGenericVirtualRegister(Ty: s128);
2392 auto CASNew = MRI.createGenericVirtualRegister(Ty: s128);
2393 MIRBuilder.buildInstr(Opc: TargetOpcode::REG_SEQUENCE, DstOps: {CASDesired}, SrcOps: {})
2394 .addUse(RegNo: DesiredI->getOperand(i: 0).getReg())
2395 .addImm(Val: AArch64::sube64)
2396 .addUse(RegNo: DesiredI->getOperand(i: 1).getReg())
2397 .addImm(Val: AArch64::subo64);
2398 MIRBuilder.buildInstr(Opc: TargetOpcode::REG_SEQUENCE, DstOps: {CASNew}, SrcOps: {})
2399 .addUse(RegNo: NewI->getOperand(i: 0).getReg())
2400 .addImm(Val: AArch64::sube64)
2401 .addUse(RegNo: NewI->getOperand(i: 1).getReg())
2402 .addImm(Val: AArch64::subo64);
2403
2404 CAS = MIRBuilder.buildInstr(Opc: Opcode, DstOps: {CASDst}, SrcOps: {CASDesired, CASNew, Addr});
2405
2406 MIRBuilder.buildExtract(Res: {DstLo}, Src: {CASDst}, Index: 0);
2407 MIRBuilder.buildExtract(Res: {DstHi}, Src: {CASDst}, Index: 64);
2408 } else {
2409 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2410 // can take arbitrary registers so it just has the normal GPR64 operands the
2411 // rest of AArch64 is expecting.
2412 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2413 unsigned Opcode;
2414 switch (Ordering) {
2415 case AtomicOrdering::Acquire:
2416 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2417 break;
2418 case AtomicOrdering::Release:
2419 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2420 break;
2421 case AtomicOrdering::AcquireRelease:
2422 case AtomicOrdering::SequentiallyConsistent:
2423 Opcode = AArch64::CMP_SWAP_128;
2424 break;
2425 default:
2426 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2427 break;
2428 }
2429
2430 auto Scratch = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
2431 CAS = MIRBuilder.buildInstr(Opc: Opcode, DstOps: {DstLo, DstHi, Scratch},
2432 SrcOps: {Addr, DesiredI->getOperand(i: 0),
2433 DesiredI->getOperand(i: 1), NewI->getOperand(i: 0),
2434 NewI->getOperand(i: 1)});
2435 }
2436
2437 CAS.cloneMemRefs(OtherMI: MI);
2438 constrainSelectedInstRegOperands(I&: *CAS, TII: *ST->getInstrInfo(),
2439 TRI: *MRI.getTargetRegisterInfo(),
2440 RBI: *ST->getRegBankInfo());
2441
2442 MIRBuilder.buildMergeLikeInstr(Res: MI.getOperand(i: 0), Ops: {DstLo, DstHi});
2443 MI.eraseFromParent();
2444 return true;
2445}
2446
2447bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2448 LegalizerHelper &Helper) const {
2449 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2450 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2451 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
2452 auto BitReverse = MIRBuilder.buildBitReverse(Dst: Ty, Src: MI.getOperand(i: 1));
2453 MIRBuilder.buildCTLZ(Dst: MI.getOperand(i: 0).getReg(), Src0: BitReverse);
2454 MI.eraseFromParent();
2455 return true;
2456}
2457
2458bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2459 LegalizerHelper &Helper) const {
2460 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2461
2462 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2463 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2464 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2465 // the instruction).
2466 auto &Value = MI.getOperand(i: 1);
2467 Register ExtValueReg =
2468 MIRBuilder.buildAnyExt(Res: LLT::scalar(SizeInBits: 64), Op: Value).getReg(Idx: 0);
2469 Value.setReg(ExtValueReg);
2470 return true;
2471 }
2472
2473 return false;
2474}
2475
2476bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2477 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2478 const GExtractVectorElement *Element = cast<GExtractVectorElement>(Val: &MI);
2479 auto VRegAndVal =
2480 getIConstantVRegValWithLookThrough(VReg: Element->getIndexReg(), MRI);
2481 if (VRegAndVal)
2482 return true;
2483 LLT VecTy = MRI.getType(Reg: Element->getVectorReg());
2484 if (VecTy.isScalableVector())
2485 return true;
2486 return Helper.lowerExtractInsertVectorElt(MI) !=
2487 LegalizerHelper::LegalizeResult::UnableToLegalize;
2488}
2489
2490bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2491 MachineInstr &MI, LegalizerHelper &Helper) const {
2492 MachineFunction &MF = *MI.getParent()->getParent();
2493 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2494 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2495
2496 // If stack probing is not enabled for this function, use the default
2497 // lowering.
2498 if (!MF.getFunction().hasFnAttribute(Kind: "probe-stack") ||
2499 MF.getFunction().getFnAttribute(Kind: "probe-stack").getValueAsString() !=
2500 "inline-asm") {
2501 Helper.lowerDynStackAlloc(MI);
2502 return true;
2503 }
2504
2505 Register Dst = MI.getOperand(i: 0).getReg();
2506 Register AllocSize = MI.getOperand(i: 1).getReg();
2507 Align Alignment = assumeAligned(Value: MI.getOperand(i: 2).getImm());
2508
2509 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2510 "Unexpected type for dynamic alloca");
2511 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2512 "Unexpected type for dynamic alloca");
2513
2514 LLT PtrTy = MRI.getType(Reg: Dst);
2515 Register SPReg =
2516 Helper.getTargetLowering().getStackPointerRegisterToSaveRestore();
2517 Register SPTmp =
2518 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2519 auto NewMI =
2520 MIRBuilder.buildInstr(Opc: AArch64::PROBED_STACKALLOC_DYN, DstOps: {}, SrcOps: {SPTmp});
2521 MRI.setRegClass(Reg: NewMI.getReg(Idx: 0), RC: &AArch64::GPR64commonRegClass);
2522 MIRBuilder.setInsertPt(MBB&: *NewMI->getParent(), II: NewMI);
2523 MIRBuilder.buildCopy(Res: Dst, Op: SPTmp);
2524
2525 MI.eraseFromParent();
2526 return true;
2527}
2528
2529bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2530 LegalizerHelper &Helper) const {
2531 MachineIRBuilder &MIB = Helper.MIRBuilder;
2532 auto &AddrVal = MI.getOperand(i: 0);
2533
2534 int64_t IsWrite = MI.getOperand(i: 1).getImm();
2535 int64_t Locality = MI.getOperand(i: 2).getImm();
2536 int64_t IsData = MI.getOperand(i: 3).getImm();
2537
2538 bool IsStream = Locality == 0;
2539 if (Locality != 0) {
2540 assert(Locality <= 3 && "Prefetch locality out-of-range");
2541 // The locality degree is the opposite of the cache speed.
2542 // Put the number the other way around.
2543 // The encoding starts at 0 for level 1
2544 Locality = 3 - Locality;
2545 }
2546
2547 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2548
2549 MIB.buildInstr(Opcode: AArch64::G_AARCH64_PREFETCH).addImm(Val: PrfOp).add(MO: AddrVal);
2550 MI.eraseFromParent();
2551 return true;
2552}
2553
2554bool AArch64LegalizerInfo::legalizeFptrunc(MachineInstr &MI,
2555 MachineIRBuilder &MIRBuilder,
2556 MachineRegisterInfo &MRI) const {
2557 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
2558 assert(SrcTy.isFixedVector() && isPowerOf2_32(SrcTy.getNumElements()) &&
2559 "Expected a power of 2 elements");
2560
2561 LLT s16 = LLT::scalar(SizeInBits: 16);
2562 LLT s32 = LLT::scalar(SizeInBits: 32);
2563 LLT s64 = LLT::scalar(SizeInBits: 64);
2564 LLT v2s16 = LLT::fixed_vector(NumElements: 2, ScalarTy: s16);
2565 LLT v4s16 = LLT::fixed_vector(NumElements: 4, ScalarTy: s16);
2566 LLT v2s32 = LLT::fixed_vector(NumElements: 2, ScalarTy: s32);
2567 LLT v4s32 = LLT::fixed_vector(NumElements: 4, ScalarTy: s32);
2568 LLT v2s64 = LLT::fixed_vector(NumElements: 2, ScalarTy: s64);
2569
2570 SmallVector<Register> RegsToUnmergeTo;
2571 SmallVector<Register> TruncOddDstRegs;
2572 SmallVector<Register> RegsToMerge;
2573
2574 unsigned ElemCount = SrcTy.getNumElements();
2575
2576 // Find the biggest size chunks we can work with
2577 int StepSize = ElemCount % 4 ? 2 : 4;
2578
2579 // If we have a power of 2 greater than 2, we need to first unmerge into
2580 // enough pieces
2581 if (ElemCount <= 2)
2582 RegsToUnmergeTo.push_back(Elt: Src);
2583 else {
2584 for (unsigned i = 0; i < ElemCount / 2; ++i)
2585 RegsToUnmergeTo.push_back(Elt: MRI.createGenericVirtualRegister(Ty: v2s64));
2586
2587 MIRBuilder.buildUnmerge(Res: RegsToUnmergeTo, Op: Src);
2588 }
2589
2590 // Create all of the round-to-odd instructions and store them
2591 for (auto SrcReg : RegsToUnmergeTo) {
2592 Register Mid =
2593 MIRBuilder.buildInstr(Opc: AArch64::G_FPTRUNC_ODD, DstOps: {v2s32}, SrcOps: {SrcReg})
2594 .getReg(Idx: 0);
2595 TruncOddDstRegs.push_back(Elt: Mid);
2596 }
2597
2598 // Truncate 4s32 to 4s16 if we can to reduce instruction count, otherwise
2599 // truncate 2s32 to 2s16.
2600 unsigned Index = 0;
2601 for (unsigned LoopIter = 0; LoopIter < ElemCount / StepSize; ++LoopIter) {
2602 if (StepSize == 4) {
2603 Register ConcatDst =
2604 MIRBuilder
2605 .buildMergeLikeInstr(
2606 Res: {v4s32}, Ops: {TruncOddDstRegs[Index++], TruncOddDstRegs[Index++]})
2607 .getReg(Idx: 0);
2608
2609 RegsToMerge.push_back(
2610 Elt: MIRBuilder.buildFPTrunc(Res: v4s16, Op: ConcatDst).getReg(Idx: 0));
2611 } else {
2612 RegsToMerge.push_back(
2613 Elt: MIRBuilder.buildFPTrunc(Res: v2s16, Op: TruncOddDstRegs[Index++]).getReg(Idx: 0));
2614 }
2615 }
2616
2617 // If there is only one register, replace the destination
2618 if (RegsToMerge.size() == 1) {
2619 MRI.replaceRegWith(FromReg: Dst, ToReg: RegsToMerge.pop_back_val());
2620 MI.eraseFromParent();
2621 return true;
2622 }
2623
2624 // Merge the rest of the instructions & replace the register
2625 Register Fin = MIRBuilder.buildMergeLikeInstr(Res: DstTy, Ops: RegsToMerge).getReg(Idx: 0);
2626 MRI.replaceRegWith(FromReg: Dst, ToReg: Fin);
2627 MI.eraseFromParent();
2628 return true;
2629}
2630