1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16#include "llvm/CodeGen/GlobalISel/CallLowering.h"
17#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
19#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
21#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
22#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
23#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
24#include "llvm/CodeGen/GlobalISel/Utils.h"
25#include "llvm/CodeGen/LowLevelTypeUtils.h"
26#include "llvm/CodeGen/MachineConstantPool.h"
27#include "llvm/CodeGen/MachineFrameInfo.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29#include "llvm/CodeGen/RuntimeLibcallUtil.h"
30#include "llvm/CodeGen/TargetFrameLowering.h"
31#include "llvm/CodeGen/TargetInstrInfo.h"
32#include "llvm/CodeGen/TargetLowering.h"
33#include "llvm/CodeGen/TargetOpcodes.h"
34#include "llvm/CodeGen/TargetSubtargetInfo.h"
35#include "llvm/IR/Instructions.h"
36#include "llvm/Support/Debug.h"
37#include "llvm/Support/MathExtras.h"
38#include "llvm/Support/raw_ostream.h"
39#include "llvm/Target/TargetMachine.h"
40#include <cassert>
41#include <numeric>
42#include <optional>
43
44#define DEBUG_TYPE "legalizer"
45
46using namespace llvm;
47using namespace LegalizeActions;
48using namespace MIPatternMatch;
49
50/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
51///
52/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
53/// with any leftover piece as type \p LeftoverTy
54///
55/// Returns -1 in the first element of the pair if the breakdown is not
56/// satisfiable.
57static std::pair<int, int>
58getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
59 assert(!LeftoverTy.isValid() && "this is an out argument");
60
61 unsigned Size = OrigTy.getSizeInBits();
62 unsigned NarrowSize = NarrowTy.getSizeInBits();
63 unsigned NumParts = Size / NarrowSize;
64 unsigned LeftoverSize = Size - NumParts * NarrowSize;
65 assert(Size > NarrowSize);
66
67 if (LeftoverSize == 0)
68 return {NumParts, 0};
69
70 if (NarrowTy.isVector()) {
71 unsigned EltSize = OrigTy.getScalarSizeInBits();
72 if (LeftoverSize % EltSize != 0)
73 return {-1, -1};
74 LeftoverTy = OrigTy.changeElementCount(
75 EC: ElementCount::getFixed(MinVal: LeftoverSize / EltSize));
76 } else {
77 LeftoverTy = LLT::integer(SizeInBits: LeftoverSize);
78 }
79
80 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
81 return std::make_pair(x&: NumParts, y&: NumLeftover);
82}
83
84static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
85
86 if (!Ty.isScalar())
87 return nullptr;
88
89 switch (Ty.getSizeInBits()) {
90 case 16:
91 return Type::getHalfTy(C&: Ctx);
92 case 32:
93 return Type::getFloatTy(C&: Ctx);
94 case 64:
95 return Type::getDoubleTy(C&: Ctx);
96 case 80:
97 return Type::getX86_FP80Ty(C&: Ctx);
98 case 128:
99 return Type::getFP128Ty(C&: Ctx);
100 default:
101 return nullptr;
102 }
103}
104
105LegalizerHelper::LegalizerHelper(MachineFunction &MF,
106 GISelChangeObserver &Observer,
107 MachineIRBuilder &Builder,
108 const LibcallLoweringInfo *Libcalls)
109 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
110 LI(*MF.getSubtarget().getLegalizerInfo()),
111 TLI(*MF.getSubtarget().getTargetLowering()), Libcalls(Libcalls) {}
112
113LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
114 GISelChangeObserver &Observer,
115 MachineIRBuilder &B,
116 const LibcallLoweringInfo *Libcalls,
117 GISelValueTracking *VT)
118 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
119 TLI(*MF.getSubtarget().getTargetLowering()), Libcalls(Libcalls), VT(VT) {}
120
121LegalizerHelper::LegalizeResult
122LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
123 LostDebugLocObserver &LocObserver) {
124 LLVM_DEBUG(dbgs() << "\nLegalizing: " << MI);
125
126 MIRBuilder.setInstrAndDebugLoc(MI);
127
128 if (isa<GIntrinsic>(Val: MI))
129 return LI.legalizeIntrinsic(Helper&: *this, MI) ? Legalized : UnableToLegalize;
130 auto Step = LI.getAction(MI, MRI);
131 switch (Step.Action) {
132 case Legal:
133 LLVM_DEBUG(dbgs() << ".. Already legal\n");
134 return AlreadyLegal;
135 case Libcall:
136 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
137 return libcall(MI, LocObserver);
138 case NarrowScalar:
139 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
140 return narrowScalar(MI, TypeIdx: Step.TypeIdx, NarrowTy: Step.NewType);
141 case WidenScalar:
142 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
143 return widenScalar(MI, TypeIdx: Step.TypeIdx, WideTy: Step.NewType);
144 case Bitcast:
145 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
146 return bitcast(MI, TypeIdx: Step.TypeIdx, Ty: Step.NewType);
147 case Lower:
148 LLVM_DEBUG(dbgs() << ".. Lower\n");
149 return lower(MI, TypeIdx: Step.TypeIdx, Ty: Step.NewType);
150 case FewerElements:
151 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
152 return fewerElementsVector(MI, TypeIdx: Step.TypeIdx, NarrowTy: Step.NewType);
153 case MoreElements:
154 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
155 return moreElementsVector(MI, TypeIdx: Step.TypeIdx, MoreTy: Step.NewType);
156 case Custom:
157 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
158 return LI.legalizeCustom(Helper&: *this, MI, LocObserver) ? Legalized
159 : UnableToLegalize;
160 default:
161 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
162 return UnableToLegalize;
163 }
164}
165
166void LegalizerHelper::insertParts(Register DstReg,
167 LLT ResultTy, LLT PartTy,
168 ArrayRef<Register> PartRegs,
169 LLT LeftoverTy,
170 ArrayRef<Register> LeftoverRegs) {
171 if (!LeftoverTy.isValid()) {
172 assert(LeftoverRegs.empty());
173
174 if (!ResultTy.isVector()) {
175 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: PartRegs);
176 return;
177 }
178
179 if (PartTy.isVector())
180 MIRBuilder.buildConcatVectors(Res: DstReg, Ops: PartRegs);
181 else
182 MIRBuilder.buildBuildVector(Res: DstReg, Ops: PartRegs);
183 return;
184 }
185
186 // Merge sub-vectors with different number of elements and insert into DstReg.
187 if (ResultTy.isVector()) {
188 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
189 SmallVector<Register, 8> AllRegs(PartRegs);
190 AllRegs.append(in_start: LeftoverRegs.begin(), in_end: LeftoverRegs.end());
191 return mergeMixedSubvectors(DstReg, PartRegs: AllRegs);
192 }
193
194 SmallVector<Register> GCDRegs;
195 LLT GCDTy = getGCDType(OrigTy: getGCDType(OrigTy: ResultTy, TargetTy: LeftoverTy), TargetTy: PartTy);
196 for (auto PartReg : concat<const Register>(Ranges&: PartRegs, Ranges&: LeftoverRegs))
197 extractGCDType(Parts&: GCDRegs, GCDTy, SrcReg: PartReg);
198 LLT ResultLCMTy = buildLCMMergePieces(DstTy: ResultTy, NarrowTy: LeftoverTy, GCDTy, VRegs&: GCDRegs);
199 buildWidenedRemergeToDst(DstReg, LCMTy: ResultLCMTy, RemergeRegs: GCDRegs);
200}
201
202void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
203 Register Reg) {
204 LLT Ty = MRI.getType(Reg);
205 SmallVector<Register, 8> RegElts;
206 extractParts(Reg, Ty: Ty.getScalarType(), NumParts: Ty.getNumElements(), VRegs&: RegElts,
207 MIRBuilder, MRI);
208 Elts.append(RHS: RegElts);
209}
210
211/// Merge \p PartRegs with different types into \p DstReg.
212void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
213 ArrayRef<Register> PartRegs) {
214 SmallVector<Register, 8> AllElts;
215 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
216 appendVectorElts(Elts&: AllElts, Reg: PartRegs[i]);
217
218 Register Leftover = PartRegs[PartRegs.size() - 1];
219 if (!MRI.getType(Reg: Leftover).isVector())
220 AllElts.push_back(Elt: Leftover);
221 else
222 appendVectorElts(Elts&: AllElts, Reg: Leftover);
223
224 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: AllElts);
225}
226
227/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
228static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
229 const MachineInstr &MI) {
230 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
231
232 const int StartIdx = Regs.size();
233 const int NumResults = MI.getNumOperands() - 1;
234 Regs.resize(N: Regs.size() + NumResults);
235 for (int I = 0; I != NumResults; ++I)
236 Regs[StartIdx + I] = MI.getOperand(i: I).getReg();
237}
238
239void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
240 LLT GCDTy, Register SrcReg) {
241 LLT SrcTy = MRI.getType(Reg: SrcReg);
242 if (SrcTy == GCDTy) {
243 // If the source already evenly divides the result type, we don't need to do
244 // anything.
245 Parts.push_back(Elt: SrcReg);
246 } else {
247 // Need to split into common type sized pieces.
248 auto Unmerge = MIRBuilder.buildUnmerge(Res: GCDTy, Op: SrcReg);
249 getUnmergeResults(Regs&: Parts, MI: *Unmerge);
250 }
251}
252
253LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
254 LLT NarrowTy, Register SrcReg) {
255 LLT SrcTy = MRI.getType(Reg: SrcReg);
256 LLT GCDTy = getGCDType(OrigTy: getGCDType(OrigTy: SrcTy, TargetTy: NarrowTy), TargetTy: DstTy);
257 extractGCDType(Parts, GCDTy, SrcReg);
258 return GCDTy;
259}
260
261LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
262 SmallVectorImpl<Register> &VRegs,
263 unsigned PadStrategy) {
264 LLT LCMTy = getLCMType(OrigTy: DstTy, TargetTy: NarrowTy);
265
266 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
267 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
268 int NumOrigSrc = VRegs.size();
269
270 Register PadReg;
271
272 // Get a value we can use to pad the source value if the sources won't evenly
273 // cover the result type.
274 if (NumOrigSrc < NumParts * NumSubParts) {
275 if (PadStrategy == TargetOpcode::G_ZEXT)
276 PadReg = MIRBuilder.buildConstant(Res: GCDTy, Val: 0).getReg(Idx: 0);
277 else if (PadStrategy == TargetOpcode::G_ANYEXT)
278 PadReg = MIRBuilder.buildUndef(Res: GCDTy).getReg(Idx: 0);
279 else {
280 assert(PadStrategy == TargetOpcode::G_SEXT);
281
282 // Shift the sign bit of the low register through the high register.
283 auto ShiftAmt =
284 MIRBuilder.buildConstant(Res: LLT::integer(SizeInBits: 64), Val: GCDTy.getSizeInBits() - 1);
285 PadReg = MIRBuilder.buildAShr(Dst: GCDTy, Src0: VRegs.back(), Src1: ShiftAmt).getReg(Idx: 0);
286 }
287 }
288
289 // Registers for the final merge to be produced.
290 SmallVector<Register, 4> Remerge(NumParts);
291
292 // Registers needed for intermediate merges, which will be merged into a
293 // source for Remerge.
294 SmallVector<Register, 4> SubMerge(NumSubParts);
295
296 // Once we've fully read off the end of the original source bits, we can reuse
297 // the same high bits for remaining padding elements.
298 Register AllPadReg;
299
300 // Build merges to the LCM type to cover the original result type.
301 for (int I = 0; I != NumParts; ++I) {
302 bool AllMergePartsArePadding = true;
303
304 // Build the requested merges to the requested type.
305 for (int J = 0; J != NumSubParts; ++J) {
306 int Idx = I * NumSubParts + J;
307 if (Idx >= NumOrigSrc) {
308 SubMerge[J] = PadReg;
309 continue;
310 }
311
312 SubMerge[J] = VRegs[Idx];
313
314 // There are meaningful bits here we can't reuse later.
315 AllMergePartsArePadding = false;
316 }
317
318 // If we've filled up a complete piece with padding bits, we can directly
319 // emit the natural sized constant if applicable, rather than a merge of
320 // smaller constants.
321 if (AllMergePartsArePadding && !AllPadReg) {
322 if (PadStrategy == TargetOpcode::G_ANYEXT)
323 AllPadReg = MIRBuilder.buildUndef(Res: NarrowTy).getReg(Idx: 0);
324 else if (PadStrategy == TargetOpcode::G_ZEXT)
325 AllPadReg = MIRBuilder.buildConstant(Res: NarrowTy, Val: 0).getReg(Idx: 0);
326
327 // If this is a sign extension, we can't materialize a trivial constant
328 // with the right type and have to produce a merge.
329 }
330
331 if (AllPadReg) {
332 // Avoid creating additional instructions if we're just adding additional
333 // copies of padding bits.
334 Remerge[I] = AllPadReg;
335 continue;
336 }
337
338 if (NumSubParts == 1)
339 Remerge[I] = SubMerge[0];
340 else
341 Remerge[I] = MIRBuilder.buildMergeLikeInstr(Res: NarrowTy, Ops: SubMerge).getReg(Idx: 0);
342
343 // In the sign extend padding case, re-use the first all-signbit merge.
344 if (AllMergePartsArePadding && !AllPadReg)
345 AllPadReg = Remerge[I];
346 }
347
348 VRegs = std::move(Remerge);
349 return LCMTy;
350}
351
352void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
353 ArrayRef<Register> RemergeRegs) {
354 LLT DstTy = MRI.getType(Reg: DstReg);
355
356 // Create the merge to the widened source, and extract the relevant bits into
357 // the result.
358
359 if (DstTy == LCMTy) {
360 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: RemergeRegs);
361 return;
362 }
363
364 auto Remerge = MIRBuilder.buildMergeLikeInstr(Res: LCMTy, Ops: RemergeRegs);
365 if (DstTy.isScalar() && LCMTy.isScalar()) {
366 MIRBuilder.buildTrunc(Res: DstReg, Op: Remerge);
367 return;
368 }
369
370 if (LCMTy.isVector()) {
371 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
372 SmallVector<Register, 8> UnmergeDefs(NumDefs);
373 UnmergeDefs[0] = DstReg;
374 for (unsigned I = 1; I != NumDefs; ++I)
375 UnmergeDefs[I] = MRI.createGenericVirtualRegister(Ty: DstTy);
376
377 MIRBuilder.buildUnmerge(Res: UnmergeDefs,
378 Op: MIRBuilder.buildMergeLikeInstr(Res: LCMTy, Ops: RemergeRegs));
379 return;
380 }
381
382 llvm_unreachable("unhandled case");
383}
384
385static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
386#define RTLIBCASE_INT(LibcallPrefix) \
387 do { \
388 switch (Size) { \
389 case 32: \
390 return RTLIB::LibcallPrefix##32; \
391 case 64: \
392 return RTLIB::LibcallPrefix##64; \
393 case 128: \
394 return RTLIB::LibcallPrefix##128; \
395 default: \
396 llvm_unreachable("unexpected size"); \
397 } \
398 } while (0)
399
400#define RTLIBCASE(LibcallPrefix) \
401 do { \
402 switch (Size) { \
403 case 32: \
404 return RTLIB::LibcallPrefix##32; \
405 case 64: \
406 return RTLIB::LibcallPrefix##64; \
407 case 80: \
408 return RTLIB::LibcallPrefix##80; \
409 case 128: \
410 return RTLIB::LibcallPrefix##128; \
411 default: \
412 llvm_unreachable("unexpected size"); \
413 } \
414 } while (0)
415
416 switch (Opcode) {
417 case TargetOpcode::G_LROUND:
418 RTLIBCASE(LROUND_F);
419 case TargetOpcode::G_LLROUND:
420 RTLIBCASE(LLROUND_F);
421 case TargetOpcode::G_MUL:
422 RTLIBCASE_INT(MUL_I);
423 case TargetOpcode::G_SDIV:
424 RTLIBCASE_INT(SDIV_I);
425 case TargetOpcode::G_UDIV:
426 RTLIBCASE_INT(UDIV_I);
427 case TargetOpcode::G_SREM:
428 RTLIBCASE_INT(SREM_I);
429 case TargetOpcode::G_UREM:
430 RTLIBCASE_INT(UREM_I);
431 case TargetOpcode::G_CTLZ_ZERO_POISON:
432 RTLIBCASE_INT(CTLZ_I);
433 case TargetOpcode::G_FADD:
434 RTLIBCASE(ADD_F);
435 case TargetOpcode::G_FSUB:
436 RTLIBCASE(SUB_F);
437 case TargetOpcode::G_FMUL:
438 RTLIBCASE(MUL_F);
439 case TargetOpcode::G_FDIV:
440 RTLIBCASE(DIV_F);
441 case TargetOpcode::G_FEXP:
442 RTLIBCASE(EXP_F);
443 case TargetOpcode::G_FEXP2:
444 RTLIBCASE(EXP2_F);
445 case TargetOpcode::G_FEXP10:
446 RTLIBCASE(EXP10_F);
447 case TargetOpcode::G_FREM:
448 RTLIBCASE(REM_F);
449 case TargetOpcode::G_FPOW:
450 RTLIBCASE(POW_F);
451 case TargetOpcode::G_FPOWI:
452 RTLIBCASE(POWI_F);
453 case TargetOpcode::G_FMA:
454 RTLIBCASE(FMA_F);
455 case TargetOpcode::G_FSIN:
456 RTLIBCASE(SIN_F);
457 case TargetOpcode::G_FCOS:
458 RTLIBCASE(COS_F);
459 case TargetOpcode::G_FTAN:
460 RTLIBCASE(TAN_F);
461 case TargetOpcode::G_FASIN:
462 RTLIBCASE(ASIN_F);
463 case TargetOpcode::G_FACOS:
464 RTLIBCASE(ACOS_F);
465 case TargetOpcode::G_FATAN:
466 RTLIBCASE(ATAN_F);
467 case TargetOpcode::G_FATAN2:
468 RTLIBCASE(ATAN2_F);
469 case TargetOpcode::G_FSINH:
470 RTLIBCASE(SINH_F);
471 case TargetOpcode::G_FCOSH:
472 RTLIBCASE(COSH_F);
473 case TargetOpcode::G_FTANH:
474 RTLIBCASE(TANH_F);
475 case TargetOpcode::G_FSINCOS:
476 RTLIBCASE(SINCOS_F);
477 case TargetOpcode::G_FMODF:
478 RTLIBCASE(MODF_F);
479 case TargetOpcode::G_FLOG10:
480 RTLIBCASE(LOG10_F);
481 case TargetOpcode::G_FLOG:
482 RTLIBCASE(LOG_F);
483 case TargetOpcode::G_FLOG2:
484 RTLIBCASE(LOG2_F);
485 case TargetOpcode::G_FLDEXP:
486 RTLIBCASE(LDEXP_F);
487 case TargetOpcode::G_FCEIL:
488 RTLIBCASE(CEIL_F);
489 case TargetOpcode::G_FFLOOR:
490 RTLIBCASE(FLOOR_F);
491 case TargetOpcode::G_FMINNUM:
492 RTLIBCASE(FMIN_F);
493 case TargetOpcode::G_FMAXNUM:
494 RTLIBCASE(FMAX_F);
495 case TargetOpcode::G_FMINIMUMNUM:
496 RTLIBCASE(FMINIMUM_NUM_F);
497 case TargetOpcode::G_FMAXIMUMNUM:
498 RTLIBCASE(FMAXIMUM_NUM_F);
499 case TargetOpcode::G_FSQRT:
500 RTLIBCASE(SQRT_F);
501 case TargetOpcode::G_FRINT:
502 RTLIBCASE(RINT_F);
503 case TargetOpcode::G_FNEARBYINT:
504 RTLIBCASE(NEARBYINT_F);
505 case TargetOpcode::G_INTRINSIC_TRUNC:
506 RTLIBCASE(TRUNC_F);
507 case TargetOpcode::G_INTRINSIC_ROUND:
508 RTLIBCASE(ROUND_F);
509 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
510 RTLIBCASE(ROUNDEVEN_F);
511 case TargetOpcode::G_INTRINSIC_LRINT:
512 RTLIBCASE(LRINT_F);
513 case TargetOpcode::G_INTRINSIC_LLRINT:
514 RTLIBCASE(LLRINT_F);
515 }
516 llvm_unreachable("Unknown libcall function");
517#undef RTLIBCASE_INT
518#undef RTLIBCASE
519}
520
521/// True if an instruction is in tail position in its caller. Intended for
522/// legalizing libcalls as tail calls when possible.
523static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result,
524 MachineInstr &MI,
525 const TargetInstrInfo &TII,
526 MachineRegisterInfo &MRI) {
527 MachineBasicBlock &MBB = *MI.getParent();
528 const Function &F = MBB.getParent()->getFunction();
529
530 // Conservatively require the attributes of the call to match those of
531 // the return. Ignore NoAlias and NonNull because they don't affect the
532 // call sequence.
533 AttributeList CallerAttrs = F.getAttributes();
534 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
535 .removeAttribute(Val: Attribute::NoAlias)
536 .removeAttribute(Val: Attribute::NonNull)
537 .hasAttributes())
538 return false;
539
540 // It's not safe to eliminate the sign / zero extension of the return value.
541 if (CallerAttrs.hasRetAttr(Kind: Attribute::ZExt) ||
542 CallerAttrs.hasRetAttr(Kind: Attribute::SExt))
543 return false;
544
545 // Only tail call if the following instruction is a standard return or if we
546 // have a `thisreturn` callee, and a sequence like:
547 //
548 // G_MEMCPY %0, %1, %2
549 // $x0 = COPY %0
550 // RET_ReallyLR implicit $x0
551 auto Next = next_nodbg(It: MI.getIterator(), End: MBB.instr_end());
552 if (Next != MBB.instr_end() && Next->isCopy()) {
553 if (MI.getOpcode() == TargetOpcode::G_BZERO)
554 return false;
555
556 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
557 // mempy/etc routines return the same parameter. For other it will be the
558 // returned value.
559 Register VReg = MI.getOperand(i: 0).getReg();
560 if (!VReg.isVirtual() || VReg != Next->getOperand(i: 1).getReg())
561 return false;
562
563 Register PReg = Next->getOperand(i: 0).getReg();
564 if (!PReg.isPhysical())
565 return false;
566
567 auto Ret = next_nodbg(It: Next, End: MBB.instr_end());
568 if (Ret == MBB.instr_end() || !Ret->isReturn())
569 return false;
570
571 if (Ret->getNumImplicitOperands() != 1)
572 return false;
573
574 if (!Ret->getOperand(i: 0).isReg() || PReg != Ret->getOperand(i: 0).getReg())
575 return false;
576
577 // Skip over the COPY that we just validated.
578 Next = Ret;
579 }
580
581 if (Next == MBB.instr_end() || TII.isTailCall(Inst: *Next) || !Next->isReturn())
582 return false;
583
584 return true;
585}
586
587LegalizerHelper::LegalizeResult LegalizerHelper::createLibcall(
588 const char *Name, const CallLowering::ArgInfo &Result,
589 ArrayRef<CallLowering::ArgInfo> Args, const CallingConv::ID CC,
590 LostDebugLocObserver &LocObserver, MachineInstr *MI) const {
591 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
592
593 CallLowering::CallLoweringInfo Info;
594 Info.CallConv = CC;
595 Info.Callee = MachineOperand::CreateES(SymName: Name);
596 Info.OrigRet = Result;
597 if (MI)
598 Info.IsTailCall =
599 (Result.Ty->isVoidTy() ||
600 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
601 isLibCallInTailPosition(Result, MI&: *MI, TII: MIRBuilder.getTII(),
602 MRI&: *MIRBuilder.getMRI());
603
604 llvm::append_range(C&: Info.OrigArgs, R&: Args);
605 if (!CLI.lowerCall(MIRBuilder, Info))
606 return LegalizerHelper::UnableToLegalize;
607
608 if (MI && Info.LoweredTailCall) {
609 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
610
611 // Check debug locations before removing the return.
612 LocObserver.checkpoint(CheckDebugLocs: true);
613
614 // We must have a return following the call (or debug insts) to get past
615 // isLibCallInTailPosition.
616 do {
617 MachineInstr *Next = MI->getNextNode();
618 assert(Next &&
619 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
620 "Expected instr following MI to be return or debug inst?");
621 // We lowered a tail call, so the call is now the return from the block.
622 // Delete the old return.
623 Next->eraseFromParent();
624 } while (MI->getNextNode());
625
626 // We expect to lose the debug location from the return.
627 LocObserver.checkpoint(CheckDebugLocs: false);
628 }
629 return LegalizerHelper::Legalized;
630}
631
632LegalizerHelper::LegalizeResult LegalizerHelper::createLibcall(
633 RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result,
634 ArrayRef<CallLowering::ArgInfo> Args, LostDebugLocObserver &LocObserver,
635 MachineInstr *MI) const {
636 if (!Libcalls)
637 return LegalizerHelper::UnableToLegalize;
638
639 RTLIB::LibcallImpl LibcallImpl = Libcalls->getLibcallImpl(Call: Libcall);
640 if (LibcallImpl == RTLIB::Unsupported)
641 return LegalizerHelper::UnableToLegalize;
642
643 StringRef Name = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(CallImpl: LibcallImpl);
644 const CallingConv::ID CC = Libcalls->getLibcallImplCallingConv(Call: LibcallImpl);
645 return createLibcall(Name: Name.data(), Result, Args, CC, LocObserver, MI);
646}
647
648// Useful for libcalls where all operands have the same type.
649LegalizerHelper::LegalizeResult
650LegalizerHelper::simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
651 unsigned Size, Type *OpType,
652 LostDebugLocObserver &LocObserver) const {
653 auto Libcall = getRTLibDesc(Opcode: MI.getOpcode(), Size);
654
655 // FIXME: What does the original arg index mean here?
656 SmallVector<CallLowering::ArgInfo, 3> Args;
657 for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands()))
658 Args.push_back(Elt: {MO.getReg(), OpType, 0});
659 return createLibcall(Libcall, Result: {MI.getOperand(i: 0).getReg(), OpType, 0}, Args,
660 LocObserver, MI: &MI);
661}
662
663LegalizerHelper::LegalizeResult LegalizerHelper::emitSincosLibcall(
664 MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType,
665 LostDebugLocObserver &LocObserver) {
666 MachineFunction &MF = *MI.getMF();
667 MachineRegisterInfo &MRI = MF.getRegInfo();
668
669 Register DstSin = MI.getOperand(i: 0).getReg();
670 Register DstCos = MI.getOperand(i: 1).getReg();
671 Register Src = MI.getOperand(i: 2).getReg();
672 LLT DstTy = MRI.getType(Reg: DstSin);
673
674 int MemSize = DstTy.getSizeInBytes();
675 Align Alignment = getStackTemporaryAlignment(Type: DstTy);
676 const DataLayout &DL = MIRBuilder.getDataLayout();
677 unsigned AddrSpace = DL.getAllocaAddrSpace();
678 MachinePointerInfo PtrInfo;
679
680 Register StackPtrSin =
681 createStackTemporary(Bytes: TypeSize::getFixed(ExactSize: MemSize), Alignment, PtrInfo)
682 .getReg(Idx: 0);
683 Register StackPtrCos =
684 createStackTemporary(Bytes: TypeSize::getFixed(ExactSize: MemSize), Alignment, PtrInfo)
685 .getReg(Idx: 0);
686
687 auto &Ctx = MF.getFunction().getContext();
688 auto LibcallResult = createLibcall(
689 Libcall: getRTLibDesc(Opcode: MI.getOpcode(), Size), Result: {{0}, Type::getVoidTy(C&: Ctx), 0},
690 Args: {{Src, OpType, 0},
691 {StackPtrSin, PointerType::get(C&: Ctx, AddressSpace: AddrSpace), 1},
692 {StackPtrCos, PointerType::get(C&: Ctx, AddressSpace: AddrSpace), 2}},
693 LocObserver, MI: &MI);
694
695 if (LibcallResult != LegalizeResult::Legalized)
696 return LegalizerHelper::UnableToLegalize;
697
698 MachineMemOperand *LoadMMOSin = MF.getMachineMemOperand(
699 PtrInfo, F: MachineMemOperand::MOLoad, Size: MemSize, BaseAlignment: Alignment);
700 MachineMemOperand *LoadMMOCos = MF.getMachineMemOperand(
701 PtrInfo, F: MachineMemOperand::MOLoad, Size: MemSize, BaseAlignment: Alignment);
702
703 MIRBuilder.buildLoad(Res: DstSin, Addr: StackPtrSin, MMO&: *LoadMMOSin);
704 MIRBuilder.buildLoad(Res: DstCos, Addr: StackPtrCos, MMO&: *LoadMMOCos);
705 MI.eraseFromParent();
706
707 return LegalizerHelper::Legalized;
708}
709
710LegalizerHelper::LegalizeResult
711LegalizerHelper::emitModfLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
712 unsigned Size, Type *OpType,
713 LostDebugLocObserver &LocObserver) {
714 MachineFunction &MF = MIRBuilder.getMF();
715 MachineRegisterInfo &MRI = MF.getRegInfo();
716
717 Register DstFrac = MI.getOperand(i: 0).getReg();
718 Register DstInt = MI.getOperand(i: 1).getReg();
719 Register Src = MI.getOperand(i: 2).getReg();
720 LLT DstTy = MRI.getType(Reg: DstFrac);
721
722 int MemSize = DstTy.getSizeInBytes();
723 Align Alignment = getStackTemporaryAlignment(Type: DstTy);
724 const DataLayout &DL = MIRBuilder.getDataLayout();
725 unsigned AddrSpace = DL.getAllocaAddrSpace();
726 MachinePointerInfo PtrInfo;
727
728 Register StackPtrInt =
729 createStackTemporary(Bytes: TypeSize::getFixed(ExactSize: MemSize), Alignment, PtrInfo)
730 .getReg(Idx: 0);
731
732 auto &Ctx = MF.getFunction().getContext();
733 auto LibcallResult = createLibcall(
734 Libcall: getRTLibDesc(Opcode: MI.getOpcode(), Size), Result: {DstFrac, OpType, 0},
735 Args: {{Src, OpType, 0}, {StackPtrInt, PointerType::get(C&: Ctx, AddressSpace: AddrSpace), 1}},
736 LocObserver, MI: &MI);
737
738 if (LibcallResult != LegalizeResult::Legalized)
739 return LegalizerHelper::UnableToLegalize;
740
741 MachineMemOperand *LoadMMOInt = MF.getMachineMemOperand(
742 PtrInfo, F: MachineMemOperand::MOLoad, Size: MemSize, BaseAlignment: Alignment);
743
744 MIRBuilder.buildLoad(Res: DstInt, Addr: StackPtrInt, MMO&: *LoadMMOInt);
745 MI.eraseFromParent();
746
747 return LegalizerHelper::Legalized;
748}
749
750static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
751 Type *FromType) {
752 auto ToMVT = MVT::getVT(Ty: ToType);
753 auto FromMVT = MVT::getVT(Ty: FromType);
754
755 switch (Opcode) {
756 case TargetOpcode::G_FPEXT:
757 return RTLIB::getFPEXT(OpVT: FromMVT, RetVT: ToMVT);
758 case TargetOpcode::G_FPTRUNC:
759 return RTLIB::getFPROUND(OpVT: FromMVT, RetVT: ToMVT);
760 case TargetOpcode::G_FPTOSI:
761 return RTLIB::getFPTOSINT(OpVT: FromMVT, RetVT: ToMVT);
762 case TargetOpcode::G_FPTOUI:
763 return RTLIB::getFPTOUINT(OpVT: FromMVT, RetVT: ToMVT);
764 case TargetOpcode::G_SITOFP:
765 return RTLIB::getSINTTOFP(OpVT: FromMVT, RetVT: ToMVT);
766 case TargetOpcode::G_UITOFP:
767 return RTLIB::getUINTTOFP(OpVT: FromMVT, RetVT: ToMVT);
768 }
769 llvm_unreachable("Unsupported libcall function");
770}
771
772LegalizerHelper::LegalizeResult LegalizerHelper::conversionLibcall(
773 MachineInstr &MI, Type *ToType, Type *FromType,
774 LostDebugLocObserver &LocObserver, bool IsSigned) const {
775 CallLowering::ArgInfo Arg = {MI.getOperand(i: 1).getReg(), FromType, 0};
776 if (FromType->isIntegerTy()) {
777 if (TLI.shouldSignExtendTypeInLibCall(Ty: FromType, IsSigned))
778 Arg.Flags[0].setSExt();
779 else
780 Arg.Flags[0].setZExt();
781 }
782
783 RTLIB::Libcall Libcall = getConvRTLibDesc(Opcode: MI.getOpcode(), ToType, FromType);
784 return createLibcall(Libcall, Result: {MI.getOperand(i: 0).getReg(), ToType, 0}, Args: Arg,
785 LocObserver, MI: &MI);
786}
787
788LegalizerHelper::LegalizeResult
789LegalizerHelper::createMemLibcall(MachineRegisterInfo &MRI, MachineInstr &MI,
790 LostDebugLocObserver &LocObserver) const {
791 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
792
793 SmallVector<CallLowering::ArgInfo, 3> Args;
794 // Add all the args, except for the last which is an imm denoting 'tail'.
795 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
796 Register Reg = MI.getOperand(i).getReg();
797
798 // Need derive an IR type for call lowering.
799 LLT OpLLT = MRI.getType(Reg);
800 Type *OpTy = nullptr;
801 if (OpLLT.isPointer())
802 OpTy = PointerType::get(C&: Ctx, AddressSpace: OpLLT.getAddressSpace());
803 else
804 OpTy = IntegerType::get(C&: Ctx, NumBits: OpLLT.getSizeInBits());
805 Args.push_back(Elt: {Reg, OpTy, 0});
806 }
807
808 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
809 RTLIB::Libcall RTLibcall;
810 unsigned Opc = MI.getOpcode();
811 switch (Opc) {
812 case TargetOpcode::G_BZERO:
813 RTLibcall = RTLIB::BZERO;
814 break;
815 case TargetOpcode::G_MEMCPY:
816 RTLibcall = RTLIB::MEMCPY;
817 Args[0].Flags[0].setReturned();
818 break;
819 case TargetOpcode::G_MEMMOVE:
820 RTLibcall = RTLIB::MEMMOVE;
821 Args[0].Flags[0].setReturned();
822 break;
823 case TargetOpcode::G_MEMSET:
824 RTLibcall = RTLIB::MEMSET;
825 Args[0].Flags[0].setReturned();
826 break;
827 default:
828 llvm_unreachable("unsupported opcode");
829 }
830
831 if (!Libcalls) // FIXME: Should be mandatory
832 return LegalizerHelper::UnableToLegalize;
833
834 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(Call: RTLibcall);
835
836 // Unsupported libcall on the target.
837 if (RTLibcallImpl == RTLIB::Unsupported) {
838 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
839 << MIRBuilder.getTII().getName(Opc) << "\n");
840 return LegalizerHelper::UnableToLegalize;
841 }
842
843 CallLowering::CallLoweringInfo Info;
844 Info.CallConv = Libcalls->getLibcallImplCallingConv(Call: RTLibcallImpl);
845
846 StringRef LibcallName =
847 RTLIB::RuntimeLibcallsInfo::getLibcallImplName(CallImpl: RTLibcallImpl);
848 Info.Callee = MachineOperand::CreateES(SymName: LibcallName.data());
849 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(C&: Ctx), 0);
850 Info.IsTailCall =
851 MI.getOperand(i: MI.getNumOperands() - 1).getImm() &&
852 isLibCallInTailPosition(Result: Info.OrigRet, MI, TII: MIRBuilder.getTII(), MRI);
853
854 llvm::append_range(C&: Info.OrigArgs, R&: Args);
855 if (!CLI.lowerCall(MIRBuilder, Info))
856 return LegalizerHelper::UnableToLegalize;
857
858 if (Info.LoweredTailCall) {
859 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
860
861 // Check debug locations before removing the return.
862 LocObserver.checkpoint(CheckDebugLocs: true);
863
864 // We must have a return following the call (or debug insts) to get past
865 // isLibCallInTailPosition.
866 do {
867 MachineInstr *Next = MI.getNextNode();
868 assert(Next &&
869 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
870 "Expected instr following MI to be return or debug inst?");
871 // We lowered a tail call, so the call is now the return from the block.
872 // Delete the old return.
873 Next->eraseFromParent();
874 } while (MI.getNextNode());
875
876 // We expect to lose the debug location from the return.
877 LocObserver.checkpoint(CheckDebugLocs: false);
878 }
879
880 return LegalizerHelper::Legalized;
881}
882
883static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
884 unsigned Opc = MI.getOpcode();
885 auto &AtomicMI = cast<GMemOperation>(Val&: MI);
886 auto &MMO = AtomicMI.getMMO();
887 auto Ordering = MMO.getMergedOrdering();
888 LLT MemType = MMO.getMemoryType();
889 uint64_t MemSize = MemType.getSizeInBytes();
890 if (MemType.isVector())
891 return RTLIB::UNKNOWN_LIBCALL;
892
893#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
894#define LCALL5(A) \
895 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
896 switch (Opc) {
897 case TargetOpcode::G_ATOMIC_CMPXCHG:
898 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
899 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
900 return getOutlineAtomicHelper(LC, Order: Ordering, MemSize);
901 }
902 case TargetOpcode::G_ATOMICRMW_XCHG: {
903 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
904 return getOutlineAtomicHelper(LC, Order: Ordering, MemSize);
905 }
906 case TargetOpcode::G_ATOMICRMW_ADD:
907 case TargetOpcode::G_ATOMICRMW_SUB: {
908 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
909 return getOutlineAtomicHelper(LC, Order: Ordering, MemSize);
910 }
911 case TargetOpcode::G_ATOMICRMW_AND: {
912 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
913 return getOutlineAtomicHelper(LC, Order: Ordering, MemSize);
914 }
915 case TargetOpcode::G_ATOMICRMW_OR: {
916 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
917 return getOutlineAtomicHelper(LC, Order: Ordering, MemSize);
918 }
919 case TargetOpcode::G_ATOMICRMW_XOR: {
920 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
921 return getOutlineAtomicHelper(LC, Order: Ordering, MemSize);
922 }
923 default:
924 return RTLIB::UNKNOWN_LIBCALL;
925 }
926#undef LCALLS
927#undef LCALL5
928}
929
930LegalizerHelper::LegalizeResult
931LegalizerHelper::createAtomicLibcall(MachineInstr &MI) const {
932 auto &Ctx = MIRBuilder.getContext();
933
934 Type *RetTy;
935 SmallVector<Register> RetRegs;
936 SmallVector<CallLowering::ArgInfo, 3> Args;
937 unsigned Opc = MI.getOpcode();
938 switch (Opc) {
939 case TargetOpcode::G_ATOMIC_CMPXCHG:
940 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
941 Register Success;
942 LLT SuccessLLT;
943 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
944 MI.getFirst4RegLLTs();
945 RetRegs.push_back(Elt: Ret);
946 RetTy = IntegerType::get(C&: Ctx, NumBits: RetLLT.getSizeInBits());
947 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
948 std::tie(args&: Ret, args&: RetLLT, args&: Success, args&: SuccessLLT, args&: Mem, args&: MemLLT, args&: Cmp, args&: CmpLLT, args&: New,
949 args&: NewLLT) = MI.getFirst5RegLLTs();
950 RetRegs.push_back(Elt: Success);
951 RetTy = StructType::get(
952 Context&: Ctx, Elements: {RetTy, IntegerType::get(C&: Ctx, NumBits: SuccessLLT.getSizeInBits())});
953 }
954 Args.push_back(Elt: {Cmp, IntegerType::get(C&: Ctx, NumBits: CmpLLT.getSizeInBits()), 0});
955 Args.push_back(Elt: {New, IntegerType::get(C&: Ctx, NumBits: NewLLT.getSizeInBits()), 0});
956 Args.push_back(Elt: {Mem, PointerType::get(C&: Ctx, AddressSpace: MemLLT.getAddressSpace()), 0});
957 break;
958 }
959 case TargetOpcode::G_ATOMICRMW_XCHG:
960 case TargetOpcode::G_ATOMICRMW_ADD:
961 case TargetOpcode::G_ATOMICRMW_SUB:
962 case TargetOpcode::G_ATOMICRMW_AND:
963 case TargetOpcode::G_ATOMICRMW_OR:
964 case TargetOpcode::G_ATOMICRMW_XOR: {
965 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
966 RetRegs.push_back(Elt: Ret);
967 RetTy = IntegerType::get(C&: Ctx, NumBits: RetLLT.getSizeInBits());
968 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
969 Val =
970 MIRBuilder.buildXor(Dst: ValLLT, Src0: MIRBuilder.buildConstant(Res: ValLLT, Val: -1), Src1: Val)
971 .getReg(Idx: 0);
972 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
973 Val =
974 MIRBuilder.buildSub(Dst: ValLLT, Src0: MIRBuilder.buildConstant(Res: ValLLT, Val: 0), Src1: Val)
975 .getReg(Idx: 0);
976 Args.push_back(Elt: {Val, IntegerType::get(C&: Ctx, NumBits: ValLLT.getSizeInBits()), 0});
977 Args.push_back(Elt: {Mem, PointerType::get(C&: Ctx, AddressSpace: MemLLT.getAddressSpace()), 0});
978 break;
979 }
980 default:
981 llvm_unreachable("unsupported opcode");
982 }
983
984 if (!Libcalls) // FIXME: Should be mandatory
985 return LegalizerHelper::UnableToLegalize;
986
987 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
988 RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
989 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(Call: RTLibcall);
990
991 // Unsupported libcall on the target.
992 if (RTLibcallImpl == RTLIB::Unsupported) {
993 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
994 << MIRBuilder.getTII().getName(Opc) << "\n");
995 return LegalizerHelper::UnableToLegalize;
996 }
997
998 CallLowering::CallLoweringInfo Info;
999 Info.CallConv = Libcalls->getLibcallImplCallingConv(Call: RTLibcallImpl);
1000
1001 StringRef LibcallName =
1002 RTLIB::RuntimeLibcallsInfo::getLibcallImplName(CallImpl: RTLibcallImpl);
1003 Info.Callee = MachineOperand::CreateES(SymName: LibcallName.data());
1004 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
1005
1006 llvm::append_range(C&: Info.OrigArgs, R&: Args);
1007 if (!CLI.lowerCall(MIRBuilder, Info))
1008 return LegalizerHelper::UnableToLegalize;
1009
1010 return LegalizerHelper::Legalized;
1011}
1012
1013static RTLIB::Libcall
1014getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI) {
1015 RTLIB::Libcall RTLibcall;
1016 switch (MI.getOpcode()) {
1017 case TargetOpcode::G_GET_FPENV:
1018 RTLibcall = RTLIB::FEGETENV;
1019 break;
1020 case TargetOpcode::G_SET_FPENV:
1021 case TargetOpcode::G_RESET_FPENV:
1022 RTLibcall = RTLIB::FESETENV;
1023 break;
1024 case TargetOpcode::G_GET_FPMODE:
1025 RTLibcall = RTLIB::FEGETMODE;
1026 break;
1027 case TargetOpcode::G_SET_FPMODE:
1028 case TargetOpcode::G_RESET_FPMODE:
1029 RTLibcall = RTLIB::FESETMODE;
1030 break;
1031 default:
1032 llvm_unreachable("Unexpected opcode");
1033 }
1034 return RTLibcall;
1035}
1036
1037// Some library functions that read FP state (fegetmode, fegetenv) write the
1038// state into a region in memory. IR intrinsics that do the same operations
1039// (get_fpmode, get_fpenv) return the state as integer value. To implement these
1040// intrinsics via the library functions, we need to use temporary variable,
1041// for example:
1042//
1043// %0:_(s32) = G_GET_FPMODE
1044//
1045// is transformed to:
1046//
1047// %1:_(p0) = G_FRAME_INDEX %stack.0
1048// BL &fegetmode
1049// %0:_(s32) = G_LOAD % 1
1050//
1051LegalizerHelper::LegalizeResult
1052LegalizerHelper::createGetStateLibcall(MachineInstr &MI,
1053 LostDebugLocObserver &LocObserver) {
1054 const DataLayout &DL = MIRBuilder.getDataLayout();
1055 auto &MF = MIRBuilder.getMF();
1056 auto &MRI = *MIRBuilder.getMRI();
1057 auto &Ctx = MF.getFunction().getContext();
1058
1059 // Create temporary, where library function will put the read state.
1060 Register Dst = MI.getOperand(i: 0).getReg();
1061 LLT StateTy = MRI.getType(Reg: Dst);
1062 TypeSize StateSize = StateTy.getSizeInBytes();
1063 Align TempAlign = getStackTemporaryAlignment(Type: StateTy);
1064 MachinePointerInfo TempPtrInfo;
1065 auto Temp = createStackTemporary(Bytes: StateSize, Alignment: TempAlign, PtrInfo&: TempPtrInfo);
1066
1067 // Create a call to library function, with the temporary as an argument.
1068 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1069 Type *StatePtrTy = PointerType::get(C&: Ctx, AddressSpace: TempAddrSpace);
1070 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1071 auto Res = createLibcall(
1072 Libcall: RTLibcall, Result: CallLowering::ArgInfo({0}, Type::getVoidTy(C&: Ctx), 0),
1073 Args: CallLowering::ArgInfo({Temp.getReg(Idx: 0), StatePtrTy, 0}), LocObserver,
1074 MI: nullptr);
1075 if (Res != LegalizerHelper::Legalized)
1076 return Res;
1077
1078 // Create a load from the temporary.
1079 MachineMemOperand *MMO = MF.getMachineMemOperand(
1080 PtrInfo: TempPtrInfo, f: MachineMemOperand::MOLoad, MemTy: StateTy, base_alignment: TempAlign);
1081 MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_LOAD, Res: Dst, Addr: Temp, MMO&: *MMO);
1082
1083 return LegalizerHelper::Legalized;
1084}
1085
1086// Similar to `createGetStateLibcall` the function calls a library function
1087// using transient space in stack. In this case the library function reads
1088// content of memory region.
1089LegalizerHelper::LegalizeResult
1090LegalizerHelper::createSetStateLibcall(MachineInstr &MI,
1091 LostDebugLocObserver &LocObserver) {
1092 const DataLayout &DL = MIRBuilder.getDataLayout();
1093 auto &MF = MIRBuilder.getMF();
1094 auto &MRI = *MIRBuilder.getMRI();
1095 auto &Ctx = MF.getFunction().getContext();
1096
1097 // Create temporary, where library function will get the new state.
1098 Register Src = MI.getOperand(i: 0).getReg();
1099 LLT StateTy = MRI.getType(Reg: Src);
1100 TypeSize StateSize = StateTy.getSizeInBytes();
1101 Align TempAlign = getStackTemporaryAlignment(Type: StateTy);
1102 MachinePointerInfo TempPtrInfo;
1103 auto Temp = createStackTemporary(Bytes: StateSize, Alignment: TempAlign, PtrInfo&: TempPtrInfo);
1104
1105 // Put the new state into the temporary.
1106 MachineMemOperand *MMO = MF.getMachineMemOperand(
1107 PtrInfo: TempPtrInfo, f: MachineMemOperand::MOStore, MemTy: StateTy, base_alignment: TempAlign);
1108 MIRBuilder.buildStore(Val: Src, Addr: Temp, MMO&: *MMO);
1109
1110 // Create a call to library function, with the temporary as an argument.
1111 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1112 Type *StatePtrTy = PointerType::get(C&: Ctx, AddressSpace: TempAddrSpace);
1113 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1114 return createLibcall(Libcall: RTLibcall,
1115 Result: CallLowering::ArgInfo({0}, Type::getVoidTy(C&: Ctx), 0),
1116 Args: CallLowering::ArgInfo({Temp.getReg(Idx: 0), StatePtrTy, 0}),
1117 LocObserver, MI: nullptr);
1118}
1119
1120/// Returns the corresponding libcall for the given Pred and
1121/// the ICMP predicate that should be generated to compare with #0
1122/// after the libcall.
1123static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1124getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size) {
1125#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1126 do { \
1127 switch (Size) { \
1128 case 32: \
1129 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1130 case 64: \
1131 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1132 case 128: \
1133 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1134 default: \
1135 llvm_unreachable("unexpected size"); \
1136 } \
1137 } while (0)
1138
1139 switch (Pred) {
1140 case CmpInst::FCMP_OEQ:
1141 RTLIBCASE_CMP(OEQ_F, CmpInst::ICMP_EQ);
1142 case CmpInst::FCMP_UNE:
1143 RTLIBCASE_CMP(UNE_F, CmpInst::ICMP_NE);
1144 case CmpInst::FCMP_OGE:
1145 RTLIBCASE_CMP(OGE_F, CmpInst::ICMP_SGE);
1146 case CmpInst::FCMP_OLT:
1147 RTLIBCASE_CMP(OLT_F, CmpInst::ICMP_SLT);
1148 case CmpInst::FCMP_OLE:
1149 RTLIBCASE_CMP(OLE_F, CmpInst::ICMP_SLE);
1150 case CmpInst::FCMP_OGT:
1151 RTLIBCASE_CMP(OGT_F, CmpInst::ICMP_SGT);
1152 case CmpInst::FCMP_UNO:
1153 RTLIBCASE_CMP(UO_F, CmpInst::ICMP_NE);
1154 default:
1155 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1156 }
1157}
1158
1159LegalizerHelper::LegalizeResult
1160LegalizerHelper::createFCMPLibcall(MachineInstr &MI,
1161 LostDebugLocObserver &LocObserver) {
1162 auto &MF = MIRBuilder.getMF();
1163 auto &Ctx = MF.getFunction().getContext();
1164 const GFCmp *Cmp = cast<GFCmp>(Val: &MI);
1165
1166 LLT OpLLT = MRI.getType(Reg: Cmp->getLHSReg());
1167 unsigned Size = OpLLT.getSizeInBits();
1168 if ((Size != 32 && Size != 64 && Size != 128) ||
1169 OpLLT != MRI.getType(Reg: Cmp->getRHSReg()))
1170 return UnableToLegalize;
1171
1172 Type *OpType = getFloatTypeForLLT(Ctx, Ty: OpLLT);
1173
1174 // DstReg type is s32
1175 const Register DstReg = Cmp->getReg(Idx: 0);
1176 LLT DstTy = MRI.getType(Reg: DstReg);
1177 const auto Cond = Cmp->getCond();
1178
1179 // Reference:
1180 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1181 // Generates a libcall followed by ICMP.
1182 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1183 const CmpInst::Predicate ICmpPred,
1184 const DstOp &Res) -> Register {
1185 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1186 LLT TempLLT = LLT::integer(SizeInBits: 32);
1187 Register Temp = MRI.createGenericVirtualRegister(Ty: TempLLT);
1188 // Generate libcall, holding result in Temp
1189 const auto Status = createLibcall(
1190 Libcall, Result: {Temp, Type::getInt32Ty(C&: Ctx), 0},
1191 Args: {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1192 LocObserver, MI: &MI);
1193 if (!Status)
1194 return {};
1195
1196 // Compare temp with #0 to get the final result.
1197 return MIRBuilder
1198 .buildICmp(Pred: ICmpPred, Res, Op0: Temp, Op1: MIRBuilder.buildConstant(Res: TempLLT, Val: 0))
1199 .getReg(Idx: 0);
1200 };
1201
1202 // Simple case if we have a direct mapping from predicate to libcall
1203 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Pred: Cond, Size);
1204 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1205 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1206 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1207 return Legalized;
1208 }
1209 return UnableToLegalize;
1210 }
1211
1212 // No direct mapping found, should be generated as combination of libcalls.
1213
1214 switch (Cond) {
1215 case CmpInst::FCMP_UEQ: {
1216 // FCMP_UEQ: unordered or equal
1217 // Convert into (FCMP_OEQ || FCMP_UNO).
1218
1219 const auto [OeqLibcall, OeqPred] =
1220 getFCMPLibcallDesc(Pred: CmpInst::FCMP_OEQ, Size);
1221 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1222
1223 const auto [UnoLibcall, UnoPred] =
1224 getFCMPLibcallDesc(Pred: CmpInst::FCMP_UNO, Size);
1225 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1226 if (Oeq && Uno)
1227 MIRBuilder.buildOr(Dst: DstReg, Src0: Oeq, Src1: Uno);
1228 else
1229 return UnableToLegalize;
1230
1231 break;
1232 }
1233 case CmpInst::FCMP_ONE: {
1234 // FCMP_ONE: ordered and operands are unequal
1235 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1236
1237 // We inverse the predicate instead of generating a NOT
1238 // to save one instruction.
1239 // On AArch64 isel can even select two cmp into a single ccmp.
1240 const auto [OeqLibcall, OeqPred] =
1241 getFCMPLibcallDesc(Pred: CmpInst::FCMP_OEQ, Size);
1242 const auto NotOeq =
1243 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(pred: OeqPred), DstTy);
1244
1245 const auto [UnoLibcall, UnoPred] =
1246 getFCMPLibcallDesc(Pred: CmpInst::FCMP_UNO, Size);
1247 const auto NotUno =
1248 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(pred: UnoPred), DstTy);
1249
1250 if (NotOeq && NotUno)
1251 MIRBuilder.buildAnd(Dst: DstReg, Src0: NotOeq, Src1: NotUno);
1252 else
1253 return UnableToLegalize;
1254
1255 break;
1256 }
1257 case CmpInst::FCMP_ULT:
1258 case CmpInst::FCMP_UGE:
1259 case CmpInst::FCMP_UGT:
1260 case CmpInst::FCMP_ULE:
1261 case CmpInst::FCMP_ORD: {
1262 // Convert into: !(inverse(Pred))
1263 // E.g. FCMP_ULT becomes !FCMP_OGE
1264 // This is equivalent to the following, but saves some instructions.
1265 // MIRBuilder.buildNot(
1266 // PredTy,
1267 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1268 // Op1, Op2));
1269 const auto [InversedLibcall, InversedPred] =
1270 getFCMPLibcallDesc(Pred: CmpInst::getInversePredicate(pred: Cond), Size);
1271 if (!BuildLibcall(InversedLibcall,
1272 CmpInst::getInversePredicate(pred: InversedPred), DstReg))
1273 return UnableToLegalize;
1274 break;
1275 }
1276 default:
1277 return UnableToLegalize;
1278 }
1279
1280 return Legalized;
1281}
1282
1283// The function is used to legalize operations that set default environment
1284// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1285// On most targets supported in glibc FE_DFL_MODE is defined as
1286// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1287// it is not true, the target must provide custom lowering.
1288LegalizerHelper::LegalizeResult
1289LegalizerHelper::createResetStateLibcall(MachineInstr &MI,
1290 LostDebugLocObserver &LocObserver) {
1291 const DataLayout &DL = MIRBuilder.getDataLayout();
1292 auto &MF = MIRBuilder.getMF();
1293 auto &Ctx = MF.getFunction().getContext();
1294
1295 // Create an argument for the library function.
1296 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1297 Type *StatePtrTy = PointerType::get(C&: Ctx, AddressSpace: AddrSpace);
1298 unsigned PtrSize = DL.getPointerSizeInBits(AS: AddrSpace);
1299 LLT MemTy = LLT::pointer(AddressSpace: AddrSpace, SizeInBits: PtrSize);
1300 auto DefValue = MIRBuilder.buildConstant(Res: LLT::integer(SizeInBits: PtrSize), Val: -1LL);
1301 DstOp Dest(MRI.createGenericVirtualRegister(Ty: MemTy));
1302 MIRBuilder.buildIntToPtr(Dst: Dest, Src: DefValue);
1303
1304 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1305 return createLibcall(
1306 Libcall: RTLibcall, Result: CallLowering::ArgInfo({0}, Type::getVoidTy(C&: Ctx), 0),
1307 Args: CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}), LocObserver, MI: &MI);
1308}
1309
1310LegalizerHelper::LegalizeResult
1311LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
1312 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1313
1314 switch (MI.getOpcode()) {
1315 default:
1316 return UnableToLegalize;
1317 case TargetOpcode::G_MUL:
1318 case TargetOpcode::G_SDIV:
1319 case TargetOpcode::G_UDIV:
1320 case TargetOpcode::G_SREM:
1321 case TargetOpcode::G_UREM:
1322 case TargetOpcode::G_CTLZ_ZERO_POISON: {
1323 LLT LLTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1324 unsigned Size = LLTy.getSizeInBits();
1325 Type *HLTy = IntegerType::get(C&: Ctx, NumBits: Size);
1326 auto Status = simpleLibcall(MI, MIRBuilder, Size, OpType: HLTy, LocObserver);
1327 if (Status != Legalized)
1328 return Status;
1329 break;
1330 }
1331 case TargetOpcode::G_FADD:
1332 case TargetOpcode::G_FSUB:
1333 case TargetOpcode::G_FMUL:
1334 case TargetOpcode::G_FDIV:
1335 case TargetOpcode::G_FMA:
1336 case TargetOpcode::G_FPOW:
1337 case TargetOpcode::G_FREM:
1338 case TargetOpcode::G_FCOS:
1339 case TargetOpcode::G_FSIN:
1340 case TargetOpcode::G_FTAN:
1341 case TargetOpcode::G_FACOS:
1342 case TargetOpcode::G_FASIN:
1343 case TargetOpcode::G_FATAN:
1344 case TargetOpcode::G_FATAN2:
1345 case TargetOpcode::G_FCOSH:
1346 case TargetOpcode::G_FSINH:
1347 case TargetOpcode::G_FTANH:
1348 case TargetOpcode::G_FLOG10:
1349 case TargetOpcode::G_FLOG:
1350 case TargetOpcode::G_FLOG2:
1351 case TargetOpcode::G_FEXP:
1352 case TargetOpcode::G_FEXP2:
1353 case TargetOpcode::G_FEXP10:
1354 case TargetOpcode::G_FCEIL:
1355 case TargetOpcode::G_FFLOOR:
1356 case TargetOpcode::G_FMINNUM:
1357 case TargetOpcode::G_FMAXNUM:
1358 case TargetOpcode::G_FMINIMUMNUM:
1359 case TargetOpcode::G_FMAXIMUMNUM:
1360 case TargetOpcode::G_FSQRT:
1361 case TargetOpcode::G_FRINT:
1362 case TargetOpcode::G_FNEARBYINT:
1363 case TargetOpcode::G_INTRINSIC_TRUNC:
1364 case TargetOpcode::G_INTRINSIC_ROUND:
1365 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1366 LLT LLTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1367 unsigned Size = LLTy.getSizeInBits();
1368 Type *HLTy = getFloatTypeForLLT(Ctx, Ty: LLTy);
1369 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1370 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1371 return UnableToLegalize;
1372 }
1373 auto Status = simpleLibcall(MI, MIRBuilder, Size, OpType: HLTy, LocObserver);
1374 if (Status != Legalized)
1375 return Status;
1376 break;
1377 }
1378 case TargetOpcode::G_FSINCOS: {
1379 LLT LLTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1380 unsigned Size = LLTy.getSizeInBits();
1381 Type *HLTy = getFloatTypeForLLT(Ctx, Ty: LLTy);
1382 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1383 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1384 return UnableToLegalize;
1385 }
1386 return emitSincosLibcall(MI, MIRBuilder, Size, OpType: HLTy, LocObserver);
1387 }
1388 case TargetOpcode::G_FMODF: {
1389 LLT LLTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1390 unsigned Size = LLTy.getSizeInBits();
1391 Type *HLTy = getFloatTypeForLLT(Ctx, Ty: LLTy);
1392 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1393 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1394 return UnableToLegalize;
1395 }
1396 return emitModfLibcall(MI, MIRBuilder, Size, OpType: HLTy, LocObserver);
1397 }
1398 case TargetOpcode::G_LROUND:
1399 case TargetOpcode::G_LLROUND:
1400 case TargetOpcode::G_INTRINSIC_LRINT:
1401 case TargetOpcode::G_INTRINSIC_LLRINT: {
1402 LLT LLTy = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
1403 unsigned Size = LLTy.getSizeInBits();
1404 Type *HLTy = getFloatTypeForLLT(Ctx, Ty: LLTy);
1405 Type *ITy = IntegerType::get(
1406 C&: Ctx, NumBits: MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getSizeInBits());
1407 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1408 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1409 return UnableToLegalize;
1410 }
1411 auto Libcall = getRTLibDesc(Opcode: MI.getOpcode(), Size);
1412 LegalizeResult Status =
1413 createLibcall(Libcall, Result: {MI.getOperand(i: 0).getReg(), ITy, 0},
1414 Args: {{MI.getOperand(i: 1).getReg(), HLTy, 0}}, LocObserver, MI: &MI);
1415 if (Status != Legalized)
1416 return Status;
1417 MI.eraseFromParent();
1418 return Legalized;
1419 }
1420 case TargetOpcode::G_FPOWI:
1421 case TargetOpcode::G_FLDEXP: {
1422 LLT LLTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1423 unsigned Size = LLTy.getSizeInBits();
1424 Type *HLTy = getFloatTypeForLLT(Ctx, Ty: LLTy);
1425 Type *ITy = IntegerType::get(
1426 C&: Ctx, NumBits: MRI.getType(Reg: MI.getOperand(i: 2).getReg()).getSizeInBits());
1427 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1428 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1429 return UnableToLegalize;
1430 }
1431 auto Libcall = getRTLibDesc(Opcode: MI.getOpcode(), Size);
1432 SmallVector<CallLowering::ArgInfo, 2> Args = {
1433 {MI.getOperand(i: 1).getReg(), HLTy, 0},
1434 {MI.getOperand(i: 2).getReg(), ITy, 1}};
1435 Args[1].Flags[0].setSExt();
1436 LegalizeResult Status = createLibcall(
1437 Libcall, Result: {MI.getOperand(i: 0).getReg(), HLTy, 0}, Args, LocObserver, MI: &MI);
1438 if (Status != Legalized)
1439 return Status;
1440 break;
1441 }
1442 case TargetOpcode::G_FPEXT:
1443 case TargetOpcode::G_FPTRUNC: {
1444 Type *FromTy = getFloatTypeForLLT(Ctx, Ty: MRI.getType(Reg: MI.getOperand(i: 1).getReg()));
1445 Type *ToTy = getFloatTypeForLLT(Ctx, Ty: MRI.getType(Reg: MI.getOperand(i: 0).getReg()));
1446 if (!FromTy || !ToTy)
1447 return UnableToLegalize;
1448 LegalizeResult Status = conversionLibcall(MI, ToType: ToTy, FromType: FromTy, LocObserver);
1449 if (Status != Legalized)
1450 return Status;
1451 break;
1452 }
1453 case TargetOpcode::G_FCMP: {
1454 LegalizeResult Status = createFCMPLibcall(MI, LocObserver);
1455 if (Status != Legalized)
1456 return Status;
1457 MI.eraseFromParent();
1458 return Status;
1459 }
1460 case TargetOpcode::G_FPTOSI:
1461 case TargetOpcode::G_FPTOUI: {
1462 // FIXME: Support other types
1463 Type *FromTy =
1464 getFloatTypeForLLT(Ctx, Ty: MRI.getType(Reg: MI.getOperand(i: 1).getReg()));
1465 unsigned ToSize = MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getSizeInBits();
1466 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1467 return UnableToLegalize;
1468 LegalizeResult Status = conversionLibcall(MI, ToType: Type::getIntNTy(C&: Ctx, N: ToSize),
1469 FromType: FromTy, LocObserver);
1470 if (Status != Legalized)
1471 return Status;
1472 break;
1473 }
1474 case TargetOpcode::G_SITOFP:
1475 case TargetOpcode::G_UITOFP: {
1476 unsigned FromSize = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits();
1477 Type *ToTy =
1478 getFloatTypeForLLT(Ctx, Ty: MRI.getType(Reg: MI.getOperand(i: 0).getReg()));
1479 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1480 return UnableToLegalize;
1481 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1482 LegalizeResult Status = conversionLibcall(
1483 MI, ToType: ToTy, FromType: Type::getIntNTy(C&: Ctx, N: FromSize), LocObserver, IsSigned);
1484 if (Status != Legalized)
1485 return Status;
1486 break;
1487 }
1488 case TargetOpcode::G_ATOMICRMW_XCHG:
1489 case TargetOpcode::G_ATOMICRMW_ADD:
1490 case TargetOpcode::G_ATOMICRMW_SUB:
1491 case TargetOpcode::G_ATOMICRMW_AND:
1492 case TargetOpcode::G_ATOMICRMW_OR:
1493 case TargetOpcode::G_ATOMICRMW_XOR:
1494 case TargetOpcode::G_ATOMIC_CMPXCHG:
1495 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1496 auto Status = createAtomicLibcall(MI);
1497 if (Status != Legalized)
1498 return Status;
1499 break;
1500 }
1501 case TargetOpcode::G_BZERO:
1502 case TargetOpcode::G_MEMCPY:
1503 case TargetOpcode::G_MEMMOVE:
1504 case TargetOpcode::G_MEMSET: {
1505 LegalizeResult Result =
1506 createMemLibcall(MRI&: *MIRBuilder.getMRI(), MI, LocObserver);
1507 if (Result != Legalized)
1508 return Result;
1509 MI.eraseFromParent();
1510 return Result;
1511 }
1512 case TargetOpcode::G_GET_FPENV:
1513 case TargetOpcode::G_GET_FPMODE: {
1514 LegalizeResult Result = createGetStateLibcall(MI, LocObserver);
1515 if (Result != Legalized)
1516 return Result;
1517 break;
1518 }
1519 case TargetOpcode::G_SET_FPENV:
1520 case TargetOpcode::G_SET_FPMODE: {
1521 LegalizeResult Result = createSetStateLibcall(MI, LocObserver);
1522 if (Result != Legalized)
1523 return Result;
1524 break;
1525 }
1526 case TargetOpcode::G_RESET_FPENV:
1527 case TargetOpcode::G_RESET_FPMODE: {
1528 LegalizeResult Result = createResetStateLibcall(MI, LocObserver);
1529 if (Result != Legalized)
1530 return Result;
1531 break;
1532 }
1533 }
1534
1535 MI.eraseFromParent();
1536 return Legalized;
1537}
1538
1539LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
1540 unsigned TypeIdx,
1541 LLT NarrowTy) {
1542 uint64_t SizeOp0 = MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getSizeInBits();
1543 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1544
1545 switch (MI.getOpcode()) {
1546 default:
1547 return UnableToLegalize;
1548 case TargetOpcode::G_IMPLICIT_DEF: {
1549 Register DstReg = MI.getOperand(i: 0).getReg();
1550 LLT DstTy = MRI.getType(Reg: DstReg);
1551
1552 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1553 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1554 // FIXME: Although this would also be legal for the general case, it causes
1555 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1556 // combines not being hit). This seems to be a problem related to the
1557 // artifact combiner.
1558 if (SizeOp0 % NarrowSize != 0) {
1559 LLT ImplicitTy = DstTy.changeElementType(NewEltTy: NarrowTy);
1560 Register ImplicitReg = MIRBuilder.buildUndef(Res: ImplicitTy).getReg(Idx: 0);
1561 MIRBuilder.buildAnyExt(Res: DstReg, Op: ImplicitReg);
1562
1563 MI.eraseFromParent();
1564 return Legalized;
1565 }
1566
1567 int NumParts = SizeOp0 / NarrowSize;
1568
1569 SmallVector<Register, 2> DstRegs;
1570 for (int i = 0; i < NumParts; ++i)
1571 DstRegs.push_back(Elt: MIRBuilder.buildUndef(Res: NarrowTy).getReg(Idx: 0));
1572
1573 if (DstTy.isVector())
1574 MIRBuilder.buildBuildVector(Res: DstReg, Ops: DstRegs);
1575 else
1576 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: DstRegs);
1577 MI.eraseFromParent();
1578 return Legalized;
1579 }
1580 case TargetOpcode::G_CONSTANT: {
1581 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1582 const APInt &Val = MI.getOperand(i: 1).getCImm()->getValue();
1583 unsigned TotalSize = Ty.getSizeInBits();
1584 unsigned NarrowSize = NarrowTy.getSizeInBits();
1585 int NumParts = TotalSize / NarrowSize;
1586
1587 SmallVector<Register, 4> PartRegs;
1588 for (int I = 0; I != NumParts; ++I) {
1589 unsigned Offset = I * NarrowSize;
1590 auto K = MIRBuilder.buildConstant(Res: NarrowTy,
1591 Val: Val.lshr(shiftAmt: Offset).trunc(width: NarrowSize));
1592 PartRegs.push_back(Elt: K.getReg(Idx: 0));
1593 }
1594
1595 LLT LeftoverTy;
1596 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1597 SmallVector<Register, 1> LeftoverRegs;
1598 if (LeftoverBits != 0) {
1599 LeftoverTy = LLT::scalar(SizeInBits: LeftoverBits);
1600 auto K = MIRBuilder.buildConstant(
1601 Res: LeftoverTy,
1602 Val: Val.lshr(shiftAmt: NumParts * NarrowSize).trunc(width: LeftoverBits));
1603 LeftoverRegs.push_back(Elt: K.getReg(Idx: 0));
1604 }
1605
1606 insertParts(DstReg: MI.getOperand(i: 0).getReg(),
1607 ResultTy: Ty, PartTy: NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1608
1609 MI.eraseFromParent();
1610 return Legalized;
1611 }
1612 case TargetOpcode::G_SEXT:
1613 case TargetOpcode::G_ZEXT:
1614 case TargetOpcode::G_ANYEXT:
1615 return narrowScalarExt(MI, TypeIdx, Ty: NarrowTy);
1616 case TargetOpcode::G_TRUNC: {
1617 if (TypeIdx != 1)
1618 return UnableToLegalize;
1619
1620 uint64_t SizeOp1 = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits();
1621 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1622 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1623 return UnableToLegalize;
1624 }
1625
1626 auto Unmerge = MIRBuilder.buildUnmerge(Res: NarrowTy, Op: MI.getOperand(i: 1));
1627 MIRBuilder.buildCopy(Res: MI.getOperand(i: 0), Op: Unmerge.getReg(Idx: 0));
1628 MI.eraseFromParent();
1629 return Legalized;
1630 }
1631 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1632 case TargetOpcode::G_FREEZE: {
1633 if (TypeIdx != 0)
1634 return UnableToLegalize;
1635
1636 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1637 // Should widen scalar first
1638 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1639 return UnableToLegalize;
1640
1641 auto Unmerge = MIRBuilder.buildUnmerge(Res: NarrowTy, Op: MI.getOperand(i: 1).getReg());
1642 SmallVector<Register, 8> Parts;
1643 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1644 Parts.push_back(
1645 Elt: MIRBuilder.buildInstr(Opc: MI.getOpcode(), DstOps: {NarrowTy}, SrcOps: {Unmerge.getReg(Idx: i)})
1646 .getReg(Idx: 0));
1647 }
1648
1649 MIRBuilder.buildMergeLikeInstr(Res: MI.getOperand(i: 0).getReg(), Ops: Parts);
1650 MI.eraseFromParent();
1651 return Legalized;
1652 }
1653 case TargetOpcode::G_ADD:
1654 case TargetOpcode::G_SUB:
1655 case TargetOpcode::G_SADDO:
1656 case TargetOpcode::G_SSUBO:
1657 case TargetOpcode::G_SADDE:
1658 case TargetOpcode::G_SSUBE:
1659 case TargetOpcode::G_UADDO:
1660 case TargetOpcode::G_USUBO:
1661 case TargetOpcode::G_UADDE:
1662 case TargetOpcode::G_USUBE:
1663 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1664 case TargetOpcode::G_MUL:
1665 case TargetOpcode::G_UMULH:
1666 return narrowScalarMul(MI, Ty: NarrowTy);
1667 case TargetOpcode::G_EXTRACT:
1668 return narrowScalarExtract(MI, TypeIdx, Ty: NarrowTy);
1669 case TargetOpcode::G_INSERT:
1670 return narrowScalarInsert(MI, TypeIdx, Ty: NarrowTy);
1671 case TargetOpcode::G_LOAD: {
1672 auto &LoadMI = cast<GLoad>(Val&: MI);
1673 Register DstReg = LoadMI.getDstReg();
1674 LLT DstTy = MRI.getType(Reg: DstReg);
1675 if (DstTy.isVector())
1676 return UnableToLegalize;
1677
1678 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1679 Register TmpReg = MRI.createGenericVirtualRegister(Ty: NarrowTy);
1680 MIRBuilder.buildLoad(Res: TmpReg, Addr: LoadMI.getPointerReg(), MMO&: LoadMI.getMMO());
1681 MIRBuilder.buildAnyExt(Res: DstReg, Op: TmpReg);
1682 LoadMI.eraseFromParent();
1683 return Legalized;
1684 }
1685
1686 return reduceLoadStoreWidth(MI&: LoadMI, TypeIdx, NarrowTy);
1687 }
1688 case TargetOpcode::G_ZEXTLOAD:
1689 case TargetOpcode::G_SEXTLOAD:
1690 case TargetOpcode::G_FPEXTLOAD: {
1691 auto &LoadMI = cast<GExtLoad>(Val&: MI);
1692 Register DstReg = LoadMI.getDstReg();
1693 Register PtrReg = LoadMI.getPointerReg();
1694
1695 Register TmpReg = MRI.createGenericVirtualRegister(Ty: NarrowTy);
1696 auto &MMO = LoadMI.getMMO();
1697 unsigned MemSize = MMO.getSizeInBits().getValue();
1698
1699 if (MemSize == NarrowSize) {
1700 MIRBuilder.buildLoad(Res: TmpReg, Addr: PtrReg, MMO);
1701 } else if (MemSize < NarrowSize) {
1702 MIRBuilder.buildLoadInstr(Opcode: LoadMI.getOpcode(), Res: TmpReg, Addr: PtrReg, MMO);
1703 } else if (MemSize > NarrowSize) {
1704 // FIXME: Need to split the load.
1705 return UnableToLegalize;
1706 }
1707
1708 if (isa<GZExtLoad>(Val: LoadMI))
1709 MIRBuilder.buildZExt(Res: DstReg, Op: TmpReg);
1710 else if (isa<GSExtLoad>(Val: LoadMI))
1711 MIRBuilder.buildSExt(Res: DstReg, Op: TmpReg);
1712 else
1713 MIRBuilder.buildFPExt(Res: DstReg, Op: TmpReg);
1714
1715 LoadMI.eraseFromParent();
1716 return Legalized;
1717 }
1718 case TargetOpcode::G_STORE: {
1719 auto &StoreMI = cast<GStore>(Val&: MI);
1720
1721 Register SrcReg = StoreMI.getValueReg();
1722 LLT SrcTy = MRI.getType(Reg: SrcReg);
1723 if (SrcTy.isVector())
1724 return UnableToLegalize;
1725
1726 int NumParts = SizeOp0 / NarrowSize;
1727 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1728 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1729 if (SrcTy.isVector() && LeftoverBits != 0)
1730 return UnableToLegalize;
1731
1732 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1733 Register TmpReg = MRI.createGenericVirtualRegister(Ty: NarrowTy);
1734 MIRBuilder.buildTrunc(Res: TmpReg, Op: SrcReg);
1735 MIRBuilder.buildStore(Val: TmpReg, Addr: StoreMI.getPointerReg(), MMO&: StoreMI.getMMO());
1736 StoreMI.eraseFromParent();
1737 return Legalized;
1738 }
1739
1740 return reduceLoadStoreWidth(MI&: StoreMI, TypeIdx: 0, NarrowTy);
1741 }
1742 case TargetOpcode::G_FPTRUNCSTORE: {
1743 auto &StoreMI = cast<GFPTruncStore>(Val&: MI);
1744 Register SrcReg = StoreMI.getValueReg();
1745 Register PtrReg = StoreMI.getPointerReg();
1746
1747 auto &MMO = StoreMI.getMMO();
1748 unsigned MemSize = MMO.getSizeInBits().getValue();
1749 if (MemSize > NarrowSize) {
1750 return UnableToLegalize;
1751 }
1752
1753 auto TmpReg = MIRBuilder.buildFPTrunc(Res: NarrowTy, Op: SrcReg);
1754 if (MemSize == NarrowSize) {
1755 MIRBuilder.buildStore(Val: TmpReg, Addr: PtrReg, MMO);
1756 } else if (MemSize < NarrowSize) {
1757 MIRBuilder.buildStoreInstr(Opcode: TargetOpcode::G_FPTRUNCSTORE, Val: TmpReg, Addr: PtrReg,
1758 MMO);
1759 }
1760
1761 StoreMI.eraseFromParent();
1762 return Legalized;
1763 }
1764 case TargetOpcode::G_SELECT:
1765 return narrowScalarSelect(MI, TypeIdx, Ty: NarrowTy);
1766 case TargetOpcode::G_AND:
1767 case TargetOpcode::G_OR:
1768 case TargetOpcode::G_XOR: {
1769 // Legalize bitwise operation:
1770 // A = BinOp<Ty> B, C
1771 // into:
1772 // B1, ..., BN = G_UNMERGE_VALUES B
1773 // C1, ..., CN = G_UNMERGE_VALUES C
1774 // A1 = BinOp<Ty/N> B1, C2
1775 // ...
1776 // AN = BinOp<Ty/N> BN, CN
1777 // A = G_MERGE_VALUES A1, ..., AN
1778 return narrowScalarBasic(MI, TypeIdx, Ty: NarrowTy);
1779 }
1780 case TargetOpcode::G_SHL:
1781 case TargetOpcode::G_LSHR:
1782 case TargetOpcode::G_ASHR:
1783 return narrowScalarShift(MI, TypeIdx, Ty: NarrowTy);
1784 case TargetOpcode::G_CTLZ:
1785 case TargetOpcode::G_CTLZ_ZERO_POISON:
1786 case TargetOpcode::G_CTTZ:
1787 case TargetOpcode::G_CTTZ_ZERO_POISON:
1788 case TargetOpcode::G_CTLS:
1789 case TargetOpcode::G_CTPOP:
1790 if (TypeIdx == 1)
1791 switch (MI.getOpcode()) {
1792 case TargetOpcode::G_CTLZ:
1793 case TargetOpcode::G_CTLZ_ZERO_POISON:
1794 return narrowScalarCTLZ(MI, TypeIdx, Ty: NarrowTy);
1795 case TargetOpcode::G_CTTZ:
1796 case TargetOpcode::G_CTTZ_ZERO_POISON:
1797 return narrowScalarCTTZ(MI, TypeIdx, Ty: NarrowTy);
1798 case TargetOpcode::G_CTPOP:
1799 return narrowScalarCTPOP(MI, TypeIdx, Ty: NarrowTy);
1800 case TargetOpcode::G_CTLS:
1801 return narrowScalarCTLS(MI, TypeIdx, Ty: NarrowTy);
1802 default:
1803 return UnableToLegalize;
1804 }
1805
1806 Observer.changingInstr(MI);
1807 narrowScalarDst(MI, NarrowTy, OpIdx: 0, ExtOpcode: TargetOpcode::G_ZEXT);
1808 Observer.changedInstr(MI);
1809 return Legalized;
1810 case TargetOpcode::G_INTTOPTR:
1811 if (TypeIdx != 1)
1812 return UnableToLegalize;
1813
1814 Observer.changingInstr(MI);
1815 narrowScalarSrc(MI, NarrowTy, OpIdx: 1);
1816 Observer.changedInstr(MI);
1817 return Legalized;
1818 case TargetOpcode::G_PTRTOINT:
1819 if (TypeIdx != 0)
1820 return UnableToLegalize;
1821
1822 Observer.changingInstr(MI);
1823 narrowScalarDst(MI, NarrowTy, OpIdx: 0, ExtOpcode: TargetOpcode::G_ZEXT);
1824 Observer.changedInstr(MI);
1825 return Legalized;
1826 case TargetOpcode::G_PHI: {
1827 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1828 // NarrowSize.
1829 if (SizeOp0 % NarrowSize != 0)
1830 return UnableToLegalize;
1831
1832 unsigned NumParts = SizeOp0 / NarrowSize;
1833 SmallVector<Register, 2> DstRegs(NumParts);
1834 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1835 Observer.changingInstr(MI);
1836 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1837 MachineBasicBlock &OpMBB = *MI.getOperand(i: i + 1).getMBB();
1838 MIRBuilder.setInsertPt(MBB&: OpMBB, II: OpMBB.getFirstTerminatorForward());
1839 extractParts(Reg: MI.getOperand(i).getReg(), Ty: NarrowTy, NumParts,
1840 VRegs&: SrcRegs[i / 2], MIRBuilder, MRI);
1841 }
1842 MachineBasicBlock &MBB = *MI.getParent();
1843 MIRBuilder.setInsertPt(MBB, II: MI);
1844 for (unsigned i = 0; i < NumParts; ++i) {
1845 DstRegs[i] = MRI.createGenericVirtualRegister(Ty: NarrowTy);
1846 MachineInstrBuilder MIB =
1847 MIRBuilder.buildInstr(Opcode: TargetOpcode::G_PHI).addDef(RegNo: DstRegs[i]);
1848 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1849 MIB.addUse(RegNo: SrcRegs[j / 2][i]).add(MO: MI.getOperand(i: j + 1));
1850 }
1851 MIRBuilder.setInsertPt(MBB, II: MBB.getFirstNonPHI());
1852 MIRBuilder.buildMergeLikeInstr(Res: MI.getOperand(i: 0), Ops: DstRegs);
1853 Observer.changedInstr(MI);
1854 MI.eraseFromParent();
1855 return Legalized;
1856 }
1857 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1858 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1859 if (TypeIdx != 2)
1860 return UnableToLegalize;
1861
1862 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1863 Observer.changingInstr(MI);
1864 narrowScalarSrc(MI, NarrowTy, OpIdx);
1865 Observer.changedInstr(MI);
1866 return Legalized;
1867 }
1868 case TargetOpcode::G_ICMP: {
1869 Register LHS = MI.getOperand(i: 2).getReg();
1870 LLT SrcTy = MRI.getType(Reg: LHS);
1871 CmpInst::Predicate Pred =
1872 static_cast<CmpInst::Predicate>(MI.getOperand(i: 1).getPredicate());
1873
1874 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1875 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1876 if (!extractParts(Reg: LHS, RegTy: SrcTy, MainTy: NarrowTy, LeftoverTy, VRegs&: LHSPartRegs,
1877 LeftoverVRegs&: LHSLeftoverRegs, MIRBuilder, MRI))
1878 return UnableToLegalize;
1879
1880 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1881 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1882 if (!extractParts(Reg: MI.getOperand(i: 3).getReg(), RegTy: SrcTy, MainTy: NarrowTy, LeftoverTy&: Unused,
1883 VRegs&: RHSPartRegs, LeftoverVRegs&: RHSLeftoverRegs, MIRBuilder, MRI))
1884 return UnableToLegalize;
1885
1886 // We now have the LHS and RHS of the compare split into narrow-type
1887 // registers, plus potentially some leftover type.
1888 Register Dst = MI.getOperand(i: 0).getReg();
1889 LLT ResTy = MRI.getType(Reg: Dst);
1890 if (ICmpInst::isEquality(P: Pred)) {
1891 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1892 // them together. For each equal part, the result should be all 0s. For
1893 // each non-equal part, we'll get at least one 1.
1894 auto Zero = MIRBuilder.buildConstant(Res: NarrowTy, Val: 0);
1895 SmallVector<Register, 4> Xors;
1896 for (auto LHSAndRHS : zip(t&: LHSPartRegs, u&: RHSPartRegs)) {
1897 auto LHS = std::get<0>(t&: LHSAndRHS);
1898 auto RHS = std::get<1>(t&: LHSAndRHS);
1899 auto Xor = MIRBuilder.buildXor(Dst: NarrowTy, Src0: LHS, Src1: RHS).getReg(Idx: 0);
1900 Xors.push_back(Elt: Xor);
1901 }
1902
1903 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1904 // to the desired narrow type so that we can OR them together later.
1905 SmallVector<Register, 4> WidenedXors;
1906 for (auto LHSAndRHS : zip(t&: LHSLeftoverRegs, u&: RHSLeftoverRegs)) {
1907 auto LHS = std::get<0>(t&: LHSAndRHS);
1908 auto RHS = std::get<1>(t&: LHSAndRHS);
1909 auto Xor = MIRBuilder.buildXor(Dst: LeftoverTy, Src0: LHS, Src1: RHS).getReg(Idx: 0);
1910 LLT GCDTy = extractGCDType(Parts&: WidenedXors, DstTy: NarrowTy, NarrowTy: LeftoverTy, SrcReg: Xor);
1911 buildLCMMergePieces(DstTy: LeftoverTy, NarrowTy, GCDTy, VRegs&: WidenedXors,
1912 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1913 llvm::append_range(C&: Xors, R&: WidenedXors);
1914 }
1915
1916 // Now, for each part we broke up, we know if they are equal/not equal
1917 // based off the G_XOR. We can OR these all together and compare against
1918 // 0 to get the result.
1919 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1920 auto Or = MIRBuilder.buildOr(Dst: NarrowTy, Src0: Xors[0], Src1: Xors[1]);
1921 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1922 Or = MIRBuilder.buildOr(Dst: NarrowTy, Src0: Or, Src1: Xors[I]);
1923 MIRBuilder.buildICmp(Pred, Res: Dst, Op0: Or, Op1: Zero);
1924 } else {
1925 Register CmpIn;
1926 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1927 Register CmpOut;
1928 CmpInst::Predicate PartPred;
1929
1930 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1931 PartPred = Pred;
1932 CmpOut = Dst;
1933 } else {
1934 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1935 CmpOut = MRI.createGenericVirtualRegister(Ty: ResTy);
1936 }
1937
1938 if (!CmpIn) {
1939 MIRBuilder.buildICmp(Pred: PartPred, Res: CmpOut, Op0: LHSPartRegs[I],
1940 Op1: RHSPartRegs[I]);
1941 } else {
1942 auto Cmp = MIRBuilder.buildICmp(Pred: PartPred, Res: ResTy, Op0: LHSPartRegs[I],
1943 Op1: RHSPartRegs[I]);
1944 auto CmpEq = MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_EQ, Res: ResTy,
1945 Op0: LHSPartRegs[I], Op1: RHSPartRegs[I]);
1946 MIRBuilder.buildSelect(Res: CmpOut, Tst: CmpEq, Op0: CmpIn, Op1: Cmp);
1947 }
1948
1949 CmpIn = CmpOut;
1950 }
1951
1952 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1953 Register CmpOut;
1954 CmpInst::Predicate PartPred;
1955
1956 if (I == E - 1) {
1957 PartPred = Pred;
1958 CmpOut = Dst;
1959 } else {
1960 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1961 CmpOut = MRI.createGenericVirtualRegister(Ty: ResTy);
1962 }
1963
1964 if (!CmpIn) {
1965 MIRBuilder.buildICmp(Pred: PartPred, Res: CmpOut, Op0: LHSLeftoverRegs[I],
1966 Op1: RHSLeftoverRegs[I]);
1967 } else {
1968 auto Cmp = MIRBuilder.buildICmp(Pred: PartPred, Res: ResTy, Op0: LHSLeftoverRegs[I],
1969 Op1: RHSLeftoverRegs[I]);
1970 auto CmpEq =
1971 MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_EQ, Res: ResTy,
1972 Op0: LHSLeftoverRegs[I], Op1: RHSLeftoverRegs[I]);
1973 MIRBuilder.buildSelect(Res: CmpOut, Tst: CmpEq, Op0: CmpIn, Op1: Cmp);
1974 }
1975
1976 CmpIn = CmpOut;
1977 }
1978 }
1979 MI.eraseFromParent();
1980 return Legalized;
1981 }
1982 case TargetOpcode::G_FCMP:
1983 if (TypeIdx != 0)
1984 return UnableToLegalize;
1985
1986 Observer.changingInstr(MI);
1987 narrowScalarDst(MI, NarrowTy, OpIdx: 0, ExtOpcode: TargetOpcode::G_ZEXT);
1988 Observer.changedInstr(MI);
1989 return Legalized;
1990
1991 case TargetOpcode::G_SEXT_INREG: {
1992 if (TypeIdx != 0)
1993 return UnableToLegalize;
1994
1995 int64_t SizeInBits = MI.getOperand(i: 2).getImm();
1996
1997 // So long as the new type has more bits than the bits we're extending we
1998 // don't need to break it apart.
1999 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
2000 Observer.changingInstr(MI);
2001 // We don't lose any non-extension bits by truncating the src and
2002 // sign-extending the dst.
2003 MachineOperand &MO1 = MI.getOperand(i: 1);
2004 auto TruncMIB = MIRBuilder.buildTrunc(Res: NarrowTy, Op: MO1);
2005 MO1.setReg(TruncMIB.getReg(Idx: 0));
2006
2007 MachineOperand &MO2 = MI.getOperand(i: 0);
2008 Register DstExt = MRI.createGenericVirtualRegister(Ty: NarrowTy);
2009 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: ++MIRBuilder.getInsertPt());
2010 MIRBuilder.buildSExt(Res: MO2, Op: DstExt);
2011 MO2.setReg(DstExt);
2012 Observer.changedInstr(MI);
2013 return Legalized;
2014 }
2015
2016 // Break it apart. Components below the extension point are unmodified. The
2017 // component containing the extension point becomes a narrower SEXT_INREG.
2018 // Components above it are ashr'd from the component containing the
2019 // extension point.
2020 if (SizeOp0 % NarrowSize != 0)
2021 return UnableToLegalize;
2022 int NumParts = SizeOp0 / NarrowSize;
2023
2024 // List the registers where the destination will be scattered.
2025 SmallVector<Register, 2> DstRegs;
2026 // List the registers where the source will be split.
2027 SmallVector<Register, 2> SrcRegs;
2028
2029 // Create all the temporary registers.
2030 for (int i = 0; i < NumParts; ++i) {
2031 Register SrcReg = MRI.createGenericVirtualRegister(Ty: NarrowTy);
2032
2033 SrcRegs.push_back(Elt: SrcReg);
2034 }
2035
2036 // Explode the big arguments into smaller chunks.
2037 MIRBuilder.buildUnmerge(Res: SrcRegs, Op: MI.getOperand(i: 1));
2038
2039 Register AshrCstReg =
2040 MIRBuilder.buildConstant(Res: NarrowTy, Val: NarrowTy.getScalarSizeInBits() - 1)
2041 .getReg(Idx: 0);
2042 Register FullExtensionReg;
2043 Register PartialExtensionReg;
2044
2045 // Do the operation on each small part.
2046 for (int i = 0; i < NumParts; ++i) {
2047 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
2048 DstRegs.push_back(Elt: SrcRegs[i]);
2049 PartialExtensionReg = DstRegs.back();
2050 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
2051 assert(PartialExtensionReg &&
2052 "Expected to visit partial extension before full");
2053 if (FullExtensionReg) {
2054 DstRegs.push_back(Elt: FullExtensionReg);
2055 continue;
2056 }
2057 DstRegs.push_back(
2058 Elt: MIRBuilder.buildAShr(Dst: NarrowTy, Src0: PartialExtensionReg, Src1: AshrCstReg)
2059 .getReg(Idx: 0));
2060 FullExtensionReg = DstRegs.back();
2061 } else {
2062 DstRegs.push_back(
2063 Elt: MIRBuilder
2064 .buildInstr(
2065 Opc: TargetOpcode::G_SEXT_INREG, DstOps: {NarrowTy},
2066 SrcOps: {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
2067 .getReg(Idx: 0));
2068 PartialExtensionReg = DstRegs.back();
2069 }
2070 }
2071
2072 // Gather the destination registers into the final destination.
2073 Register DstReg = MI.getOperand(i: 0).getReg();
2074 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: DstRegs);
2075 MI.eraseFromParent();
2076 return Legalized;
2077 }
2078 case TargetOpcode::G_BSWAP:
2079 case TargetOpcode::G_BITREVERSE: {
2080 if (SizeOp0 % NarrowSize != 0)
2081 return UnableToLegalize;
2082
2083 Observer.changingInstr(MI);
2084 SmallVector<Register, 2> SrcRegs, DstRegs;
2085 unsigned NumParts = SizeOp0 / NarrowSize;
2086 extractParts(Reg: MI.getOperand(i: 1).getReg(), Ty: NarrowTy, NumParts, VRegs&: SrcRegs,
2087 MIRBuilder, MRI);
2088
2089 for (unsigned i = 0; i < NumParts; ++i) {
2090 auto DstPart = MIRBuilder.buildInstr(Opc: MI.getOpcode(), DstOps: {NarrowTy},
2091 SrcOps: {SrcRegs[NumParts - 1 - i]});
2092 DstRegs.push_back(Elt: DstPart.getReg(Idx: 0));
2093 }
2094
2095 MIRBuilder.buildMergeLikeInstr(Res: MI.getOperand(i: 0), Ops: DstRegs);
2096
2097 Observer.changedInstr(MI);
2098 MI.eraseFromParent();
2099 return Legalized;
2100 }
2101 case TargetOpcode::G_PTR_ADD:
2102 case TargetOpcode::G_PTRMASK: {
2103 if (TypeIdx != 1)
2104 return UnableToLegalize;
2105 Observer.changingInstr(MI);
2106 narrowScalarSrc(MI, NarrowTy, OpIdx: 2);
2107 Observer.changedInstr(MI);
2108 return Legalized;
2109 }
2110 case TargetOpcode::G_FPTOUI:
2111 case TargetOpcode::G_FPTOSI:
2112 case TargetOpcode::G_FPTOUI_SAT:
2113 case TargetOpcode::G_FPTOSI_SAT:
2114 return narrowScalarFPTOI(MI, TypeIdx, Ty: NarrowTy);
2115 case TargetOpcode::G_FPEXT:
2116 if (TypeIdx != 0)
2117 return UnableToLegalize;
2118 Observer.changingInstr(MI);
2119 narrowScalarDst(MI, NarrowTy, OpIdx: 0, ExtOpcode: TargetOpcode::G_FPEXT);
2120 Observer.changedInstr(MI);
2121 return Legalized;
2122 case TargetOpcode::G_FLDEXP:
2123 case TargetOpcode::G_STRICT_FLDEXP:
2124 return narrowScalarFLDEXP(MI, TypeIdx, Ty: NarrowTy);
2125 case TargetOpcode::G_VSCALE: {
2126 Register Dst = MI.getOperand(i: 0).getReg();
2127 LLT Ty = MRI.getType(Reg: Dst);
2128
2129 // Assume VSCALE(1) fits into a legal integer
2130 const APInt One(NarrowTy.getSizeInBits(), 1);
2131 auto VScaleBase = MIRBuilder.buildVScale(Res: NarrowTy, MinElts: One);
2132 auto ZExt = MIRBuilder.buildZExt(Res: Ty, Op: VScaleBase);
2133 auto C = MIRBuilder.buildConstant(Res: Ty, Val: *MI.getOperand(i: 1).getCImm());
2134 MIRBuilder.buildMul(Dst, Src0: ZExt, Src1: C);
2135
2136 MI.eraseFromParent();
2137 return Legalized;
2138 }
2139 }
2140}
2141
2142Register LegalizerHelper::coerceToScalar(Register Val) {
2143 LLT Ty = MRI.getType(Reg: Val);
2144 if (Ty.isScalar())
2145 return Val;
2146
2147 const DataLayout &DL = MIRBuilder.getDataLayout();
2148 LLT NewTy = LLT::scalar(SizeInBits: Ty.getSizeInBits());
2149 if (Ty.isPointer()) {
2150 if (DL.isNonIntegralAddressSpace(AddrSpace: Ty.getAddressSpace()))
2151 return Register();
2152 return MIRBuilder.buildPtrToInt(Dst: NewTy, Src: Val).getReg(Idx: 0);
2153 }
2154
2155 Register NewVal = Val;
2156
2157 assert(Ty.isVector());
2158 if (Ty.isPointerVector())
2159 NewVal = MIRBuilder.buildPtrToInt(Dst: NewTy, Src: NewVal).getReg(Idx: 0);
2160 return MIRBuilder.buildBitcast(Dst: NewTy, Src: NewVal).getReg(Idx: 0);
2161}
2162
2163void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
2164 unsigned OpIdx, unsigned ExtOpcode) {
2165 MachineOperand &MO = MI.getOperand(i: OpIdx);
2166 auto ExtB = MIRBuilder.buildInstr(Opc: ExtOpcode, DstOps: {WideTy}, SrcOps: {MO});
2167 MO.setReg(ExtB.getReg(Idx: 0));
2168}
2169
2170void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
2171 unsigned OpIdx) {
2172 MachineOperand &MO = MI.getOperand(i: OpIdx);
2173 auto ExtB = MIRBuilder.buildTrunc(Res: NarrowTy, Op: MO);
2174 MO.setReg(ExtB.getReg(Idx: 0));
2175}
2176
2177void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
2178 unsigned OpIdx, unsigned TruncOpcode) {
2179 MachineOperand &MO = MI.getOperand(i: OpIdx);
2180 Register DstExt = MRI.createGenericVirtualRegister(Ty: WideTy);
2181 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: ++MIRBuilder.getInsertPt());
2182 MIRBuilder.buildInstr(Opc: TruncOpcode, DstOps: {MO}, SrcOps: {DstExt});
2183 MO.setReg(DstExt);
2184}
2185
2186void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
2187 unsigned OpIdx, unsigned ExtOpcode) {
2188 MachineOperand &MO = MI.getOperand(i: OpIdx);
2189 Register DstTrunc = MRI.createGenericVirtualRegister(Ty: NarrowTy);
2190 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: ++MIRBuilder.getInsertPt());
2191 MIRBuilder.buildInstr(Opc: ExtOpcode, DstOps: {MO}, SrcOps: {DstTrunc});
2192 MO.setReg(DstTrunc);
2193}
2194
2195void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
2196 unsigned OpIdx) {
2197 MachineOperand &MO = MI.getOperand(i: OpIdx);
2198 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: ++MIRBuilder.getInsertPt());
2199 Register Dst = MO.getReg();
2200 Register DstExt = MRI.createGenericVirtualRegister(Ty: WideTy);
2201 MO.setReg(DstExt);
2202 MIRBuilder.buildDeleteTrailingVectorElements(Res: Dst, Op0: DstExt);
2203}
2204
2205void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
2206 unsigned OpIdx) {
2207 MachineOperand &MO = MI.getOperand(i: OpIdx);
2208 MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(Res: MoreTy, Op0: MO).getReg(Idx: 0));
2209}
2210
2211void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
2212 MachineOperand &Op = MI.getOperand(i: OpIdx);
2213 Op.setReg(MIRBuilder.buildBitcast(Dst: CastTy, Src: Op).getReg(Idx: 0));
2214}
2215
2216void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
2217 MachineOperand &MO = MI.getOperand(i: OpIdx);
2218 Register CastDst = MRI.createGenericVirtualRegister(Ty: CastTy);
2219 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: ++MIRBuilder.getInsertPt());
2220 MIRBuilder.buildBitcast(Dst: MO, Src: CastDst);
2221 MO.setReg(CastDst);
2222}
2223
2224LegalizerHelper::LegalizeResult
2225LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2226 LLT WideTy) {
2227 if (TypeIdx != 1)
2228 return UnableToLegalize;
2229
2230 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2231 if (DstTy.isVector())
2232 return UnableToLegalize;
2233
2234 LLT SrcTy = MRI.getType(Reg: Src1Reg);
2235 const int DstSize = DstTy.getSizeInBits();
2236 const int SrcSize = SrcTy.getSizeInBits();
2237 const int WideSize = WideTy.getSizeInBits();
2238 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2239
2240 unsigned NumOps = MI.getNumOperands();
2241 unsigned NumSrc = MI.getNumOperands() - 1;
2242 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2243
2244 if (WideSize >= DstSize) {
2245 // Directly pack the bits in the target type.
2246 Register ResultReg = MIRBuilder.buildZExt(Res: WideTy, Op: Src1Reg).getReg(Idx: 0);
2247
2248 for (unsigned I = 2; I != NumOps; ++I) {
2249 const unsigned Offset = (I - 1) * PartSize;
2250
2251 Register SrcReg = MI.getOperand(i: I).getReg();
2252 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2253
2254 auto ZextInput = MIRBuilder.buildZExt(Res: WideTy, Op: SrcReg);
2255
2256 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2257 MRI.createGenericVirtualRegister(Ty: WideTy);
2258
2259 auto ShiftAmt = MIRBuilder.buildConstant(Res: WideTy, Val: Offset);
2260 auto Shl = MIRBuilder.buildShl(Dst: WideTy, Src0: ZextInput, Src1: ShiftAmt);
2261 MIRBuilder.buildOr(Dst: NextResult, Src0: ResultReg, Src1: Shl);
2262 ResultReg = NextResult;
2263 }
2264
2265 if (WideSize > DstSize)
2266 MIRBuilder.buildTrunc(Res: DstReg, Op: ResultReg);
2267 else if (DstTy.isPointer())
2268 MIRBuilder.buildIntToPtr(Dst: DstReg, Src: ResultReg);
2269
2270 MI.eraseFromParent();
2271 return Legalized;
2272 }
2273
2274 // Unmerge the original values to the GCD type, and recombine to the next
2275 // multiple greater than the original type.
2276 //
2277 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2278 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2279 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2280 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2281 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2282 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2283 // %12:_(s12) = G_MERGE_VALUES %10, %11
2284 //
2285 // Padding with undef if necessary:
2286 //
2287 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2288 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2289 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2290 // %7:_(s2) = G_IMPLICIT_DEF
2291 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2292 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2293 // %10:_(s12) = G_MERGE_VALUES %8, %9
2294
2295 const int GCD = std::gcd(m: SrcSize, n: WideSize);
2296 LLT GCDTy = WideTy.changeElementSize(NewEltSize: GCD);
2297
2298 SmallVector<Register, 8> NewMergeRegs;
2299 SmallVector<Register, 8> Unmerges;
2300 LLT WideDstTy = WideTy.changeElementSize(NewEltSize: NumMerge * WideSize);
2301
2302 // Decompose the original operands if they don't evenly divide.
2303 for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands())) {
2304 Register SrcReg = MO.getReg();
2305 if (GCD == SrcSize) {
2306 Unmerges.push_back(Elt: SrcReg);
2307 } else {
2308 auto Unmerge = MIRBuilder.buildUnmerge(Res: GCDTy, Op: SrcReg);
2309 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2310 Unmerges.push_back(Elt: Unmerge.getReg(Idx: J));
2311 }
2312 }
2313
2314 // Pad with undef to the next size that is a multiple of the requested size.
2315 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2316 Register UndefReg = MIRBuilder.buildUndef(Res: GCDTy).getReg(Idx: 0);
2317 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2318 Unmerges.push_back(Elt: UndefReg);
2319 }
2320
2321 const int PartsPerGCD = WideSize / GCD;
2322
2323 // Build merges of each piece.
2324 ArrayRef<Register> Slicer(Unmerges);
2325 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(N: PartsPerGCD)) {
2326 auto Merge =
2327 MIRBuilder.buildMergeLikeInstr(Res: WideTy, Ops: Slicer.take_front(N: PartsPerGCD));
2328 NewMergeRegs.push_back(Elt: Merge.getReg(Idx: 0));
2329 }
2330
2331 // A truncate may be necessary if the requested type doesn't evenly divide the
2332 // original result type.
2333 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2334 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: NewMergeRegs);
2335 } else {
2336 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(Res: WideDstTy, Ops: NewMergeRegs);
2337 MIRBuilder.buildTrunc(Res: DstReg, Op: FinalMerge.getReg(Idx: 0));
2338 }
2339
2340 MI.eraseFromParent();
2341 return Legalized;
2342}
2343
2344LegalizerHelper::LegalizeResult
2345LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2346 LLT WideTy) {
2347 if (TypeIdx != 0)
2348 return UnableToLegalize;
2349
2350 int NumDst = MI.getNumOperands() - 1;
2351 Register SrcReg = MI.getOperand(i: NumDst).getReg();
2352 LLT SrcTy = MRI.getType(Reg: SrcReg);
2353 if (SrcTy.isVector())
2354 return UnableToLegalize;
2355
2356 Register Dst0Reg = MI.getOperand(i: 0).getReg();
2357 LLT DstTy = MRI.getType(Reg: Dst0Reg);
2358 if (!DstTy.isScalar())
2359 return UnableToLegalize;
2360
2361 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2362 if (SrcTy.isPointer()) {
2363 const DataLayout &DL = MIRBuilder.getDataLayout();
2364 if (DL.isNonIntegralAddressSpace(AddrSpace: SrcTy.getAddressSpace())) {
2365 LLVM_DEBUG(
2366 dbgs() << "Not casting non-integral address space integer\n");
2367 return UnableToLegalize;
2368 }
2369
2370 SrcTy = LLT::scalar(SizeInBits: SrcTy.getSizeInBits());
2371 SrcReg = MIRBuilder.buildPtrToInt(Dst: SrcTy, Src: SrcReg).getReg(Idx: 0);
2372 }
2373
2374 // Widen SrcTy to WideTy. This does not affect the result, but since the
2375 // user requested this size, it is probably better handled than SrcTy and
2376 // should reduce the total number of legalization artifacts.
2377 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2378 SrcTy = WideTy;
2379 SrcReg = MIRBuilder.buildAnyExt(Res: WideTy, Op: SrcReg).getReg(Idx: 0);
2380 }
2381
2382 // Theres no unmerge type to target. Directly extract the bits from the
2383 // source type
2384 unsigned DstSize = DstTy.getSizeInBits();
2385
2386 MIRBuilder.buildTrunc(Res: Dst0Reg, Op: SrcReg);
2387 for (int I = 1; I != NumDst; ++I) {
2388 auto ShiftAmt = MIRBuilder.buildConstant(Res: SrcTy, Val: DstSize * I);
2389 auto Shr = MIRBuilder.buildLShr(Dst: SrcTy, Src0: SrcReg, Src1: ShiftAmt);
2390 MIRBuilder.buildTrunc(Res: MI.getOperand(i: I), Op: Shr);
2391 }
2392
2393 MI.eraseFromParent();
2394 return Legalized;
2395 }
2396
2397 // Extend the source to a wider type.
2398 LLT LCMTy = getLCMType(OrigTy: SrcTy, TargetTy: WideTy);
2399
2400 Register WideSrc = SrcReg;
2401 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2402 // TODO: If this is an integral address space, cast to integer and anyext.
2403 if (SrcTy.isPointer()) {
2404 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2405 return UnableToLegalize;
2406 }
2407
2408 WideSrc = MIRBuilder.buildAnyExt(Res: LCMTy, Op: WideSrc).getReg(Idx: 0);
2409 }
2410
2411 auto Unmerge = MIRBuilder.buildUnmerge(Res: WideTy, Op: WideSrc);
2412
2413 // Create a sequence of unmerges and merges to the original results. Since we
2414 // may have widened the source, we will need to pad the results with dead defs
2415 // to cover the source register.
2416 // e.g. widen s48 to s64:
2417 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2418 //
2419 // =>
2420 // %4:_(s192) = G_ANYEXT %0:_(s96)
2421 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2422 // ; unpack to GCD type, with extra dead defs
2423 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2424 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2425 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2426 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2427 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2428 const LLT GCDTy = getGCDType(OrigTy: WideTy, TargetTy: DstTy);
2429 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2430 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2431
2432 // Directly unmerge to the destination without going through a GCD type
2433 // if possible
2434 if (PartsPerRemerge == 1) {
2435 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2436
2437 for (int I = 0; I != NumUnmerge; ++I) {
2438 auto MIB = MIRBuilder.buildInstr(Opcode: TargetOpcode::G_UNMERGE_VALUES);
2439
2440 for (int J = 0; J != PartsPerUnmerge; ++J) {
2441 int Idx = I * PartsPerUnmerge + J;
2442 if (Idx < NumDst)
2443 MIB.addDef(RegNo: MI.getOperand(i: Idx).getReg());
2444 else {
2445 // Create dead def for excess components.
2446 MIB.addDef(RegNo: MRI.createGenericVirtualRegister(Ty: DstTy));
2447 }
2448 }
2449
2450 MIB.addUse(RegNo: Unmerge.getReg(Idx: I));
2451 }
2452 } else {
2453 SmallVector<Register, 16> Parts;
2454 for (int J = 0; J != NumUnmerge; ++J)
2455 extractGCDType(Parts, GCDTy, SrcReg: Unmerge.getReg(Idx: J));
2456
2457 SmallVector<Register, 8> RemergeParts;
2458 for (int I = 0; I != NumDst; ++I) {
2459 for (int J = 0; J < PartsPerRemerge; ++J) {
2460 const int Idx = I * PartsPerRemerge + J;
2461 RemergeParts.emplace_back(Args&: Parts[Idx]);
2462 }
2463
2464 MIRBuilder.buildMergeLikeInstr(Res: MI.getOperand(i: I).getReg(), Ops: RemergeParts);
2465 RemergeParts.clear();
2466 }
2467 }
2468
2469 MI.eraseFromParent();
2470 return Legalized;
2471}
2472
2473LegalizerHelper::LegalizeResult
2474LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2475 LLT WideTy) {
2476 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2477 unsigned Offset = MI.getOperand(i: 2).getImm();
2478
2479 if (TypeIdx == 0) {
2480 if (SrcTy.isVector() || DstTy.isVector())
2481 return UnableToLegalize;
2482
2483 SrcOp Src(SrcReg);
2484 if (SrcTy.isPointer()) {
2485 // Extracts from pointers can be handled only if they are really just
2486 // simple integers.
2487 const DataLayout &DL = MIRBuilder.getDataLayout();
2488 if (DL.isNonIntegralAddressSpace(AddrSpace: SrcTy.getAddressSpace()))
2489 return UnableToLegalize;
2490
2491 LLT SrcAsIntTy = LLT::scalar(SizeInBits: SrcTy.getSizeInBits());
2492 Src = MIRBuilder.buildPtrToInt(Dst: SrcAsIntTy, Src);
2493 SrcTy = SrcAsIntTy;
2494 }
2495
2496 if (DstTy.isPointer())
2497 return UnableToLegalize;
2498
2499 if (Offset == 0) {
2500 // Avoid a shift in the degenerate case.
2501 MIRBuilder.buildTrunc(Res: DstReg,
2502 Op: MIRBuilder.buildAnyExtOrTrunc(Res: WideTy, Op: Src));
2503 MI.eraseFromParent();
2504 return Legalized;
2505 }
2506
2507 // Do a shift in the source type.
2508 LLT ShiftTy = SrcTy;
2509 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2510 Src = MIRBuilder.buildAnyExt(Res: WideTy, Op: Src);
2511 ShiftTy = WideTy;
2512 }
2513
2514 auto LShr = MIRBuilder.buildLShr(
2515 Dst: ShiftTy, Src0: Src, Src1: MIRBuilder.buildConstant(Res: ShiftTy, Val: Offset));
2516 MIRBuilder.buildTrunc(Res: DstReg, Op: LShr);
2517 MI.eraseFromParent();
2518 return Legalized;
2519 }
2520
2521 if (SrcTy.isScalar()) {
2522 Observer.changingInstr(MI);
2523 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
2524 Observer.changedInstr(MI);
2525 return Legalized;
2526 }
2527
2528 if (!SrcTy.isVector())
2529 return UnableToLegalize;
2530
2531 if (DstTy != SrcTy.getElementType())
2532 return UnableToLegalize;
2533
2534 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2535 return UnableToLegalize;
2536
2537 Observer.changingInstr(MI);
2538 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
2539
2540 MI.getOperand(i: 2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2541 Offset);
2542 widenScalarDst(MI, WideTy: WideTy.getScalarType(), OpIdx: 0);
2543 Observer.changedInstr(MI);
2544 return Legalized;
2545}
2546
2547LegalizerHelper::LegalizeResult
2548LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2549 LLT WideTy) {
2550 if (TypeIdx != 0 || WideTy.isVector())
2551 return UnableToLegalize;
2552 Observer.changingInstr(MI);
2553 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
2554 widenScalarDst(MI, WideTy);
2555 Observer.changedInstr(MI);
2556 return Legalized;
2557}
2558
2559LegalizerHelper::LegalizeResult
2560LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2561 LLT WideTy) {
2562 unsigned Opcode;
2563 unsigned ExtOpcode;
2564 std::optional<Register> CarryIn;
2565 switch (MI.getOpcode()) {
2566 default:
2567 llvm_unreachable("Unexpected opcode!");
2568 case TargetOpcode::G_SADDO:
2569 Opcode = TargetOpcode::G_ADD;
2570 ExtOpcode = TargetOpcode::G_SEXT;
2571 break;
2572 case TargetOpcode::G_SSUBO:
2573 Opcode = TargetOpcode::G_SUB;
2574 ExtOpcode = TargetOpcode::G_SEXT;
2575 break;
2576 case TargetOpcode::G_UADDO:
2577 Opcode = TargetOpcode::G_ADD;
2578 ExtOpcode = TargetOpcode::G_ZEXT;
2579 break;
2580 case TargetOpcode::G_USUBO:
2581 Opcode = TargetOpcode::G_SUB;
2582 ExtOpcode = TargetOpcode::G_ZEXT;
2583 break;
2584 case TargetOpcode::G_SADDE:
2585 Opcode = TargetOpcode::G_UADDE;
2586 ExtOpcode = TargetOpcode::G_SEXT;
2587 CarryIn = MI.getOperand(i: 4).getReg();
2588 break;
2589 case TargetOpcode::G_SSUBE:
2590 Opcode = TargetOpcode::G_USUBE;
2591 ExtOpcode = TargetOpcode::G_SEXT;
2592 CarryIn = MI.getOperand(i: 4).getReg();
2593 break;
2594 case TargetOpcode::G_UADDE:
2595 Opcode = TargetOpcode::G_UADDE;
2596 ExtOpcode = TargetOpcode::G_ZEXT;
2597 CarryIn = MI.getOperand(i: 4).getReg();
2598 break;
2599 case TargetOpcode::G_USUBE:
2600 Opcode = TargetOpcode::G_USUBE;
2601 ExtOpcode = TargetOpcode::G_ZEXT;
2602 CarryIn = MI.getOperand(i: 4).getReg();
2603 break;
2604 }
2605
2606 if (TypeIdx == 1) {
2607 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(IsVec: WideTy.isVector(), IsFP: false);
2608
2609 Observer.changingInstr(MI);
2610 if (CarryIn)
2611 widenScalarSrc(MI, WideTy, OpIdx: 4, ExtOpcode: BoolExtOp);
2612 widenScalarDst(MI, WideTy, OpIdx: 1);
2613
2614 Observer.changedInstr(MI);
2615 return Legalized;
2616 }
2617
2618 auto LHSExt = MIRBuilder.buildInstr(Opc: ExtOpcode, DstOps: {WideTy}, SrcOps: {MI.getOperand(i: 2)});
2619 auto RHSExt = MIRBuilder.buildInstr(Opc: ExtOpcode, DstOps: {WideTy}, SrcOps: {MI.getOperand(i: 3)});
2620 // Do the arithmetic in the larger type.
2621 Register NewOp;
2622 if (CarryIn) {
2623 LLT CarryOutTy = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
2624 NewOp = MIRBuilder
2625 .buildInstr(Opc: Opcode, DstOps: {WideTy, CarryOutTy},
2626 SrcOps: {LHSExt, RHSExt, *CarryIn})
2627 .getReg(Idx: 0);
2628 } else {
2629 NewOp = MIRBuilder.buildInstr(Opc: Opcode, DstOps: {WideTy}, SrcOps: {LHSExt, RHSExt}).getReg(Idx: 0);
2630 }
2631 LLT OrigTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
2632 auto TruncOp = MIRBuilder.buildTrunc(Res: OrigTy, Op: NewOp);
2633 auto ExtOp = MIRBuilder.buildInstr(Opc: ExtOpcode, DstOps: {WideTy}, SrcOps: {TruncOp});
2634 // There is no overflow if the ExtOp is the same as NewOp.
2635 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_NE, Res: MI.getOperand(i: 1), Op0: NewOp, Op1: ExtOp);
2636 // Now trunc the NewOp to the original result.
2637 MIRBuilder.buildTrunc(Res: MI.getOperand(i: 0), Op: NewOp);
2638 MI.eraseFromParent();
2639 return Legalized;
2640}
2641
2642LegalizerHelper::LegalizeResult
2643LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2644 LLT WideTy) {
2645 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2646 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2647 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2648 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2649 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2650 // We can convert this to:
2651 // 1. Any extend iN to iM
2652 // 2. SHL by M-N
2653 // 3. [US][ADD|SUB|SHL]SAT
2654 // 4. L/ASHR by M-N
2655 //
2656 // It may be more efficient to lower this to a min and a max operation in
2657 // the higher precision arithmetic if the promoted operation isn't legal,
2658 // but this decision is up to the target's lowering request.
2659 Register DstReg = MI.getOperand(i: 0).getReg();
2660
2661 unsigned NewBits = WideTy.getScalarSizeInBits();
2662 unsigned SHLAmount = NewBits - MRI.getType(Reg: DstReg).getScalarSizeInBits();
2663
2664 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2665 // must not left shift the RHS to preserve the shift amount.
2666 auto LHS = MIRBuilder.buildAnyExt(Res: WideTy, Op: MI.getOperand(i: 1));
2667 auto RHS = IsShift ? MIRBuilder.buildZExt(Res: WideTy, Op: MI.getOperand(i: 2))
2668 : MIRBuilder.buildAnyExt(Res: WideTy, Op: MI.getOperand(i: 2));
2669 auto ShiftK = MIRBuilder.buildConstant(Res: WideTy, Val: SHLAmount);
2670 auto ShiftL = MIRBuilder.buildShl(Dst: WideTy, Src0: LHS, Src1: ShiftK);
2671 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(Dst: WideTy, Src0: RHS, Src1: ShiftK);
2672
2673 auto WideInst = MIRBuilder.buildInstr(Opc: MI.getOpcode(), DstOps: {WideTy},
2674 SrcOps: {ShiftL, ShiftR}, Flags: MI.getFlags());
2675
2676 // Use a shift that will preserve the number of sign bits when the trunc is
2677 // folded away.
2678 auto Result = IsSigned ? MIRBuilder.buildAShr(Dst: WideTy, Src0: WideInst, Src1: ShiftK)
2679 : MIRBuilder.buildLShr(Dst: WideTy, Src0: WideInst, Src1: ShiftK);
2680
2681 MIRBuilder.buildTrunc(Res: DstReg, Op: Result);
2682 MI.eraseFromParent();
2683 return Legalized;
2684}
2685
2686LegalizerHelper::LegalizeResult
2687LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2688 LLT WideTy) {
2689 if (TypeIdx == 1) {
2690 Observer.changingInstr(MI);
2691 widenScalarDst(MI, WideTy, OpIdx: 1);
2692 Observer.changedInstr(MI);
2693 return Legalized;
2694 }
2695
2696 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2697 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2698 LLT SrcTy = MRI.getType(Reg: LHS);
2699 LLT OverflowTy = MRI.getType(Reg: OriginalOverflow);
2700 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2701
2702 // To determine if the result overflowed in the larger type, we extend the
2703 // input to the larger type, do the multiply (checking if it overflows),
2704 // then also check the high bits of the result to see if overflow happened
2705 // there.
2706 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2707 auto LeftOperand = MIRBuilder.buildInstr(Opc: ExtOp, DstOps: {WideTy}, SrcOps: {LHS});
2708 auto RightOperand = MIRBuilder.buildInstr(Opc: ExtOp, DstOps: {WideTy}, SrcOps: {RHS});
2709
2710 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2711 // so we don't need to check the overflow result of larger type Mulo.
2712 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2713
2714 unsigned MulOpc =
2715 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2716
2717 MachineInstrBuilder Mulo;
2718 if (WideMulCanOverflow)
2719 Mulo = MIRBuilder.buildInstr(Opc: MulOpc, DstOps: {WideTy, OverflowTy},
2720 SrcOps: {LeftOperand, RightOperand});
2721 else
2722 Mulo = MIRBuilder.buildInstr(Opc: MulOpc, DstOps: {WideTy}, SrcOps: {LeftOperand, RightOperand});
2723
2724 auto Mul = Mulo->getOperand(i: 0);
2725 MIRBuilder.buildTrunc(Res: Result, Op: Mul);
2726
2727 MachineInstrBuilder ExtResult;
2728 // Overflow occurred if it occurred in the larger type, or if the high part
2729 // of the result does not zero/sign-extend the low part. Check this second
2730 // possibility first.
2731 if (IsSigned) {
2732 // For signed, overflow occurred when the high part does not sign-extend
2733 // the low part.
2734 ExtResult = MIRBuilder.buildSExtInReg(Res: WideTy, Op: Mul, ImmOp: SrcBitWidth);
2735 } else {
2736 // Unsigned overflow occurred when the high part does not zero-extend the
2737 // low part.
2738 ExtResult = MIRBuilder.buildZExtInReg(Res: WideTy, Op: Mul, ImmOp: SrcBitWidth);
2739 }
2740
2741 if (WideMulCanOverflow) {
2742 auto Overflow =
2743 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_NE, Res: OverflowTy, Op0: Mul, Op1: ExtResult);
2744 // Finally check if the multiplication in the larger type itself overflowed.
2745 MIRBuilder.buildOr(Dst: OriginalOverflow, Src0: Mulo->getOperand(i: 1), Src1: Overflow);
2746 } else {
2747 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_NE, Res: OriginalOverflow, Op0: Mul, Op1: ExtResult);
2748 }
2749 MI.eraseFromParent();
2750 return Legalized;
2751}
2752
2753LegalizerHelper::LegalizeResult
2754LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
2755 unsigned Opcode = MI.getOpcode();
2756 switch (Opcode) {
2757 default:
2758 return UnableToLegalize;
2759 case TargetOpcode::G_ATOMICRMW_XCHG:
2760 case TargetOpcode::G_ATOMICRMW_ADD:
2761 case TargetOpcode::G_ATOMICRMW_SUB:
2762 case TargetOpcode::G_ATOMICRMW_AND:
2763 case TargetOpcode::G_ATOMICRMW_OR:
2764 case TargetOpcode::G_ATOMICRMW_XOR:
2765 case TargetOpcode::G_ATOMICRMW_MIN:
2766 case TargetOpcode::G_ATOMICRMW_MAX:
2767 case TargetOpcode::G_ATOMICRMW_UMIN:
2768 case TargetOpcode::G_ATOMICRMW_UMAX:
2769 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2770 Observer.changingInstr(MI);
2771 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ANYEXT);
2772 widenScalarDst(MI, WideTy, OpIdx: 0);
2773 Observer.changedInstr(MI);
2774 return Legalized;
2775 case TargetOpcode::G_ATOMIC_CMPXCHG:
2776 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2777 Observer.changingInstr(MI);
2778 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ANYEXT);
2779 widenScalarSrc(MI, WideTy, OpIdx: 3, ExtOpcode: TargetOpcode::G_ANYEXT);
2780 widenScalarDst(MI, WideTy, OpIdx: 0);
2781 Observer.changedInstr(MI);
2782 return Legalized;
2783 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2784 if (TypeIdx == 0) {
2785 Observer.changingInstr(MI);
2786 widenScalarSrc(MI, WideTy, OpIdx: 3, ExtOpcode: TargetOpcode::G_ANYEXT);
2787 widenScalarSrc(MI, WideTy, OpIdx: 4, ExtOpcode: TargetOpcode::G_ANYEXT);
2788 widenScalarDst(MI, WideTy, OpIdx: 0);
2789 Observer.changedInstr(MI);
2790 return Legalized;
2791 }
2792 assert(TypeIdx == 1 &&
2793 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2794 Observer.changingInstr(MI);
2795 widenScalarDst(MI, WideTy, OpIdx: 1);
2796 Observer.changedInstr(MI);
2797 return Legalized;
2798 case TargetOpcode::G_EXTRACT:
2799 return widenScalarExtract(MI, TypeIdx, WideTy);
2800 case TargetOpcode::G_INSERT:
2801 return widenScalarInsert(MI, TypeIdx, WideTy);
2802 case TargetOpcode::G_MERGE_VALUES:
2803 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2804 case TargetOpcode::G_UNMERGE_VALUES:
2805 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2806 case TargetOpcode::G_SADDO:
2807 case TargetOpcode::G_SSUBO:
2808 case TargetOpcode::G_UADDO:
2809 case TargetOpcode::G_USUBO:
2810 case TargetOpcode::G_SADDE:
2811 case TargetOpcode::G_SSUBE:
2812 case TargetOpcode::G_UADDE:
2813 case TargetOpcode::G_USUBE:
2814 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2815 case TargetOpcode::G_UMULO:
2816 case TargetOpcode::G_SMULO:
2817 return widenScalarMulo(MI, TypeIdx, WideTy);
2818 case TargetOpcode::G_SADDSAT:
2819 case TargetOpcode::G_SSUBSAT:
2820 case TargetOpcode::G_SSHLSAT:
2821 case TargetOpcode::G_UADDSAT:
2822 case TargetOpcode::G_USUBSAT:
2823 case TargetOpcode::G_USHLSAT:
2824 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2825 case TargetOpcode::G_CTTZ:
2826 case TargetOpcode::G_CTTZ_ZERO_POISON:
2827 case TargetOpcode::G_CTLZ:
2828 case TargetOpcode::G_CTLZ_ZERO_POISON:
2829 case TargetOpcode::G_CTLS:
2830 case TargetOpcode::G_CTPOP: {
2831 if (TypeIdx == 0) {
2832 Observer.changingInstr(MI);
2833 widenScalarDst(MI, WideTy, OpIdx: 0);
2834 Observer.changedInstr(MI);
2835 return Legalized;
2836 }
2837
2838 Register SrcReg = MI.getOperand(i: 1).getReg();
2839
2840 // First extend the input.
2841 unsigned ExtOpc;
2842 switch (Opcode) {
2843 case TargetOpcode::G_CTTZ:
2844 case TargetOpcode::G_CTTZ_ZERO_POISON:
2845 case TargetOpcode::G_CTLZ_ZERO_POISON: // poison shifted out below
2846 ExtOpc = TargetOpcode::G_ANYEXT;
2847 break;
2848 case TargetOpcode::G_CTLS:
2849 ExtOpc = TargetOpcode::G_SEXT;
2850 break;
2851 default:
2852 ExtOpc = TargetOpcode::G_ZEXT;
2853 }
2854
2855 auto MIBSrc = MIRBuilder.buildInstr(Opc: ExtOpc, DstOps: {WideTy}, SrcOps: {SrcReg});
2856 LLT CurTy = MRI.getType(Reg: SrcReg);
2857 unsigned NewOpc = Opcode;
2858 if (NewOpc == TargetOpcode::G_CTTZ) {
2859 // The count is the same in the larger type except if the original
2860 // value was zero. This can be handled by setting the bit just off
2861 // the top of the original type.
2862 auto TopBit = APInt::getOneBitSet(numBits: WideTy.getScalarSizeInBits(),
2863 BitNo: CurTy.getScalarSizeInBits());
2864 MIBSrc = MIRBuilder.buildOr(
2865 Dst: WideTy, Src0: MIBSrc, Src1: MIRBuilder.buildConstant(Res: WideTy, Val: TopBit));
2866 // Now we know the operand is non-zero, use the more relaxed opcode.
2867 NewOpc = TargetOpcode::G_CTTZ_ZERO_POISON;
2868 }
2869
2870 unsigned SizeDiff =
2871 WideTy.getScalarSizeInBits() - CurTy.getScalarSizeInBits();
2872
2873 if (Opcode == TargetOpcode::G_CTLZ_ZERO_POISON) {
2874 // An optimization where the result is the CTLZ after the left shift by
2875 // (Difference in widety and current ty), that is,
2876 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2877 // Result = ctlz MIBSrc
2878 MIBSrc = MIRBuilder.buildShl(Dst: WideTy, Src0: MIBSrc,
2879 Src1: MIRBuilder.buildConstant(Res: WideTy, Val: SizeDiff));
2880 }
2881
2882 // Perform the operation at the larger size.
2883 auto MIBNewOp = MIRBuilder.buildInstr(Opc: NewOpc, DstOps: {WideTy}, SrcOps: {MIBSrc});
2884 // This is already the correct result for CTPOP and CTTZs
2885 if (Opcode == TargetOpcode::G_CTLZ || Opcode == TargetOpcode::G_CTLS) {
2886 // The correct result is NewOp - (Difference in widety and current ty).
2887 // At this stage SUB is guaranteed to be positive no-wrap,
2888 // that to be used in further KnownBits optimizations for CTLZ.
2889 MIBNewOp = MIRBuilder.buildSub(
2890 Dst: WideTy, Src0: MIBNewOp, Src1: MIRBuilder.buildConstant(Res: WideTy, Val: SizeDiff),
2891 Flags: Opcode == TargetOpcode::G_CTLZ
2892 ? std::optional<unsigned>(MachineInstr::NoUWrap)
2893 : std::nullopt);
2894 }
2895
2896 MIRBuilder.buildZExtOrTrunc(Res: MI.getOperand(i: 0), Op: MIBNewOp);
2897 MI.eraseFromParent();
2898 return Legalized;
2899 }
2900 case TargetOpcode::G_BSWAP: {
2901 Observer.changingInstr(MI);
2902 Register DstReg = MI.getOperand(i: 0).getReg();
2903
2904 Register ShrReg = MRI.createGenericVirtualRegister(Ty: WideTy);
2905 Register DstExt = MRI.createGenericVirtualRegister(Ty: WideTy);
2906 Register ShiftAmtReg = MRI.createGenericVirtualRegister(Ty: WideTy);
2907 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
2908
2909 MI.getOperand(i: 0).setReg(DstExt);
2910
2911 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: ++MIRBuilder.getInsertPt());
2912
2913 LLT Ty = MRI.getType(Reg: DstReg);
2914 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2915 MIRBuilder.buildConstant(Res: ShiftAmtReg, Val: DiffBits);
2916 MIRBuilder.buildLShr(Dst: ShrReg, Src0: DstExt, Src1: ShiftAmtReg);
2917
2918 MIRBuilder.buildTrunc(Res: DstReg, Op: ShrReg);
2919 Observer.changedInstr(MI);
2920 return Legalized;
2921 }
2922 case TargetOpcode::G_BITREVERSE: {
2923 Observer.changingInstr(MI);
2924
2925 Register DstReg = MI.getOperand(i: 0).getReg();
2926 LLT Ty = MRI.getType(Reg: DstReg);
2927 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2928
2929 Register DstExt = MRI.createGenericVirtualRegister(Ty: WideTy);
2930 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
2931 MI.getOperand(i: 0).setReg(DstExt);
2932 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: ++MIRBuilder.getInsertPt());
2933
2934 auto ShiftAmt = MIRBuilder.buildConstant(Res: WideTy, Val: DiffBits);
2935 auto Shift = MIRBuilder.buildLShr(Dst: WideTy, Src0: DstExt, Src1: ShiftAmt);
2936 MIRBuilder.buildTrunc(Res: DstReg, Op: Shift);
2937 Observer.changedInstr(MI);
2938 return Legalized;
2939 }
2940 case TargetOpcode::G_FREEZE:
2941 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2942 Observer.changingInstr(MI);
2943 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
2944 widenScalarDst(MI, WideTy);
2945 Observer.changedInstr(MI);
2946 return Legalized;
2947
2948 case TargetOpcode::G_ABS:
2949 Observer.changingInstr(MI);
2950 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_SEXT);
2951 widenScalarDst(MI, WideTy);
2952 Observer.changedInstr(MI);
2953 return Legalized;
2954
2955 case TargetOpcode::G_ADD:
2956 case TargetOpcode::G_AND:
2957 case TargetOpcode::G_MUL:
2958 case TargetOpcode::G_OR:
2959 case TargetOpcode::G_XOR:
2960 case TargetOpcode::G_SUB:
2961 case TargetOpcode::G_SHUFFLE_VECTOR:
2962 // Perform operation at larger width (any extension is fines here, high bits
2963 // don't affect the result) and then truncate the result back to the
2964 // original type.
2965 Observer.changingInstr(MI);
2966 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
2967 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ANYEXT);
2968 widenScalarDst(MI, WideTy);
2969 Observer.changedInstr(MI);
2970 return Legalized;
2971
2972 case TargetOpcode::G_SBFX:
2973 case TargetOpcode::G_UBFX:
2974 Observer.changingInstr(MI);
2975
2976 if (TypeIdx == 0) {
2977 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
2978 widenScalarDst(MI, WideTy);
2979 } else {
2980 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ZEXT);
2981 widenScalarSrc(MI, WideTy, OpIdx: 3, ExtOpcode: TargetOpcode::G_ZEXT);
2982 }
2983
2984 Observer.changedInstr(MI);
2985 return Legalized;
2986
2987 case TargetOpcode::G_SHL:
2988 Observer.changingInstr(MI);
2989
2990 if (TypeIdx == 0) {
2991 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
2992 widenScalarDst(MI, WideTy);
2993 } else {
2994 assert(TypeIdx == 1);
2995 // The "number of bits to shift" operand must preserve its value as an
2996 // unsigned integer:
2997 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ZEXT);
2998 }
2999
3000 Observer.changedInstr(MI);
3001 return Legalized;
3002
3003 case TargetOpcode::G_ROTR:
3004 case TargetOpcode::G_ROTL:
3005 if (TypeIdx != 1)
3006 return UnableToLegalize;
3007
3008 Observer.changingInstr(MI);
3009 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ZEXT);
3010 Observer.changedInstr(MI);
3011 return Legalized;
3012
3013 case TargetOpcode::G_SDIV:
3014 case TargetOpcode::G_SREM:
3015 case TargetOpcode::G_SMIN:
3016 case TargetOpcode::G_SMAX:
3017 case TargetOpcode::G_ABDS:
3018 Observer.changingInstr(MI);
3019 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_SEXT);
3020 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_SEXT);
3021 widenScalarDst(MI, WideTy);
3022 Observer.changedInstr(MI);
3023 return Legalized;
3024
3025 case TargetOpcode::G_SDIVREM:
3026 Observer.changingInstr(MI);
3027 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_SEXT);
3028 widenScalarSrc(MI, WideTy, OpIdx: 3, ExtOpcode: TargetOpcode::G_SEXT);
3029 widenScalarDst(MI, WideTy);
3030 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: --MIRBuilder.getInsertPt());
3031 widenScalarDst(MI, WideTy, OpIdx: 1);
3032 Observer.changedInstr(MI);
3033 return Legalized;
3034
3035 case TargetOpcode::G_ASHR:
3036 case TargetOpcode::G_LSHR:
3037 Observer.changingInstr(MI);
3038
3039 if (TypeIdx == 0) {
3040 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
3041 : TargetOpcode::G_ZEXT;
3042
3043 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: CvtOp);
3044 widenScalarDst(MI, WideTy);
3045 } else {
3046 assert(TypeIdx == 1);
3047 // The "number of bits to shift" operand must preserve its value as an
3048 // unsigned integer:
3049 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ZEXT);
3050 }
3051
3052 Observer.changedInstr(MI);
3053 return Legalized;
3054 case TargetOpcode::G_UDIV:
3055 case TargetOpcode::G_UREM:
3056 case TargetOpcode::G_ABDU:
3057 Observer.changingInstr(MI);
3058 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ZEXT);
3059 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ZEXT);
3060 widenScalarDst(MI, WideTy);
3061 Observer.changedInstr(MI);
3062 return Legalized;
3063 case TargetOpcode::G_UDIVREM:
3064 Observer.changingInstr(MI);
3065 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ZEXT);
3066 widenScalarSrc(MI, WideTy, OpIdx: 3, ExtOpcode: TargetOpcode::G_ZEXT);
3067 widenScalarDst(MI, WideTy);
3068 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: --MIRBuilder.getInsertPt());
3069 widenScalarDst(MI, WideTy, OpIdx: 1);
3070 Observer.changedInstr(MI);
3071 return Legalized;
3072 case TargetOpcode::G_UMIN:
3073 case TargetOpcode::G_UMAX: {
3074 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
3075
3076 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3077 unsigned ExtOpc =
3078 TLI.isSExtCheaperThanZExt(FromTy: getApproximateEVTForLLT(Ty, Ctx),
3079 ToTy: getApproximateEVTForLLT(Ty: WideTy, Ctx))
3080 ? TargetOpcode::G_SEXT
3081 : TargetOpcode::G_ZEXT;
3082
3083 Observer.changingInstr(MI);
3084 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: ExtOpc);
3085 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: ExtOpc);
3086 widenScalarDst(MI, WideTy);
3087 Observer.changedInstr(MI);
3088 return Legalized;
3089 }
3090
3091 case TargetOpcode::G_SELECT:
3092 Observer.changingInstr(MI);
3093 if (TypeIdx == 0) {
3094 // Perform operation at larger width (any extension is fine here, high
3095 // bits don't affect the result) and then truncate the result back to the
3096 // original type.
3097 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ANYEXT);
3098 widenScalarSrc(MI, WideTy, OpIdx: 3, ExtOpcode: TargetOpcode::G_ANYEXT);
3099 widenScalarDst(MI, WideTy);
3100 } else {
3101 bool IsVec = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).isVector();
3102 // Explicit extension is required here since high bits affect the result.
3103 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: MIRBuilder.getBoolExtOp(IsVec, IsFP: false));
3104 }
3105 Observer.changedInstr(MI);
3106 return Legalized;
3107
3108 case TargetOpcode::G_FPEXT:
3109 if (TypeIdx != 1)
3110 return UnableToLegalize;
3111
3112 Observer.changingInstr(MI);
3113 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_FPEXT);
3114 Observer.changedInstr(MI);
3115 return Legalized;
3116 case TargetOpcode::G_FPTOSI:
3117 case TargetOpcode::G_FPTOUI:
3118 case TargetOpcode::G_INTRINSIC_LRINT:
3119 case TargetOpcode::G_INTRINSIC_LLRINT:
3120 case TargetOpcode::G_IS_FPCLASS:
3121 Observer.changingInstr(MI);
3122
3123 if (TypeIdx == 0)
3124 widenScalarDst(MI, WideTy);
3125 else
3126 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_FPEXT);
3127
3128 Observer.changedInstr(MI);
3129 return Legalized;
3130 case TargetOpcode::G_SITOFP:
3131 Observer.changingInstr(MI);
3132
3133 if (TypeIdx == 0)
3134 widenScalarDst(MI, WideTy, OpIdx: 0, TruncOpcode: TargetOpcode::G_FPTRUNC);
3135 else
3136 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_SEXT);
3137
3138 Observer.changedInstr(MI);
3139 return Legalized;
3140 case TargetOpcode::G_UITOFP:
3141 Observer.changingInstr(MI);
3142
3143 if (TypeIdx == 0)
3144 widenScalarDst(MI, WideTy, OpIdx: 0, TruncOpcode: TargetOpcode::G_FPTRUNC);
3145 else
3146 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ZEXT);
3147
3148 Observer.changedInstr(MI);
3149 return Legalized;
3150 case TargetOpcode::G_FPTOSI_SAT:
3151 case TargetOpcode::G_FPTOUI_SAT:
3152 Observer.changingInstr(MI);
3153
3154 if (TypeIdx == 0) {
3155 Register OldDst = MI.getOperand(i: 0).getReg();
3156 LLT Ty = MRI.getType(Reg: OldDst);
3157 Register ExtReg = MRI.createGenericVirtualRegister(Ty: WideTy);
3158 Register NewDst;
3159 MI.getOperand(i: 0).setReg(ExtReg);
3160 uint64_t ShortBits = Ty.getScalarSizeInBits();
3161 uint64_t WideBits = WideTy.getScalarSizeInBits();
3162 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: ++MIRBuilder.getInsertPt());
3163 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3164 // z = i16 fptosi_sat(a)
3165 // ->
3166 // x = i32 fptosi_sat(a)
3167 // y = smin(x, 32767)
3168 // z = smax(y, -32768)
3169 auto MaxVal = MIRBuilder.buildConstant(
3170 Res: WideTy, Val: APInt::getSignedMaxValue(numBits: ShortBits).sext(width: WideBits));
3171 auto MinVal = MIRBuilder.buildConstant(
3172 Res: WideTy, Val: APInt::getSignedMinValue(numBits: ShortBits).sext(width: WideBits));
3173 Register MidReg =
3174 MIRBuilder.buildSMin(Dst: WideTy, Src0: ExtReg, Src1: MaxVal).getReg(Idx: 0);
3175 NewDst = MIRBuilder.buildSMax(Dst: WideTy, Src0: MidReg, Src1: MinVal).getReg(Idx: 0);
3176 } else {
3177 // z = i16 fptoui_sat(a)
3178 // ->
3179 // x = i32 fptoui_sat(a)
3180 // y = smin(x, 65535)
3181 auto MaxVal = MIRBuilder.buildConstant(
3182 Res: WideTy, Val: APInt::getAllOnes(numBits: ShortBits).zext(width: WideBits));
3183 NewDst = MIRBuilder.buildUMin(Dst: WideTy, Src0: ExtReg, Src1: MaxVal).getReg(Idx: 0);
3184 }
3185 MIRBuilder.buildTrunc(Res: OldDst, Op: NewDst);
3186 } else
3187 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_FPEXT);
3188
3189 Observer.changedInstr(MI);
3190 return Legalized;
3191 case TargetOpcode::G_LOAD:
3192 case TargetOpcode::G_SEXTLOAD:
3193 case TargetOpcode::G_ZEXTLOAD:
3194 case TargetOpcode::G_FPEXTLOAD:
3195 Observer.changingInstr(MI);
3196 widenScalarDst(MI, WideTy);
3197 Observer.changedInstr(MI);
3198 return Legalized;
3199
3200 case TargetOpcode::G_STORE: {
3201 if (TypeIdx != 0)
3202 return UnableToLegalize;
3203
3204 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
3205 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3206 if (!Ty.isScalar()) {
3207 // We need to widen the vector element type.
3208 Observer.changingInstr(MI);
3209 widenScalarSrc(MI, WideTy, OpIdx: 0, ExtOpcode: TargetOpcode::G_ANYEXT);
3210 // We also need to adjust the MMO to turn this into a truncating store.
3211 MachineMemOperand &MMO = **MI.memoperands_begin();
3212 MachineFunction &MF = MIRBuilder.getMF();
3213 auto *NewMMO = MF.getMachineMemOperand(MMO: &MMO, PtrInfo: MMO.getPointerInfo(), Ty);
3214 MI.setMemRefs(MF, MemRefs: {NewMMO});
3215 Observer.changedInstr(MI);
3216 return Legalized;
3217 }
3218
3219 Observer.changingInstr(MI);
3220
3221 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3222 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3223 widenScalarSrc(MI, WideTy, OpIdx: 0, ExtOpcode: ExtType);
3224
3225 Observer.changedInstr(MI);
3226 return Legalized;
3227 }
3228 case TargetOpcode::G_FPTRUNCSTORE:
3229 if (TypeIdx != 0)
3230 return UnableToLegalize;
3231 Observer.changingInstr(MI);
3232 widenScalarSrc(MI, WideTy, OpIdx: 0, ExtOpcode: TargetOpcode::G_FPEXT);
3233 Observer.changedInstr(MI);
3234 return Legalized;
3235 case TargetOpcode::G_CONSTANT: {
3236 MachineOperand &SrcMO = MI.getOperand(i: 1);
3237 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3238 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3239 SmallTy: MRI.getType(Reg: MI.getOperand(i: 0).getReg()));
3240 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3241 ExtOpc == TargetOpcode::G_ANYEXT) &&
3242 "Illegal Extend");
3243 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3244 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3245 ? SrcVal.sext(width: WideTy.getSizeInBits())
3246 : SrcVal.zext(width: WideTy.getSizeInBits());
3247 Observer.changingInstr(MI);
3248 SrcMO.setCImm(ConstantInt::get(Context&: Ctx, V: Val));
3249
3250 widenScalarDst(MI, WideTy);
3251 Observer.changedInstr(MI);
3252 return Legalized;
3253 }
3254 case TargetOpcode::G_FCONSTANT: {
3255 // To avoid changing the bits of the constant due to extension to a larger
3256 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3257 MachineOperand &SrcMO = MI.getOperand(i: 1);
3258 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3259 MIRBuilder.setInstrAndDebugLoc(MI);
3260 auto IntCst = MIRBuilder.buildConstant(Res: MI.getOperand(i: 0).getReg(), Val);
3261 widenScalarDst(MI&: *IntCst, WideTy, OpIdx: 0, TruncOpcode: TargetOpcode::G_TRUNC);
3262 MI.eraseFromParent();
3263 return Legalized;
3264 }
3265 case TargetOpcode::G_IMPLICIT_DEF: {
3266 Observer.changingInstr(MI);
3267 widenScalarDst(MI, WideTy);
3268 Observer.changedInstr(MI);
3269 return Legalized;
3270 }
3271 case TargetOpcode::G_BRCOND:
3272 Observer.changingInstr(MI);
3273 widenScalarSrc(MI, WideTy, OpIdx: 0, ExtOpcode: MIRBuilder.getBoolExtOp(IsVec: false, IsFP: false));
3274 Observer.changedInstr(MI);
3275 return Legalized;
3276
3277 case TargetOpcode::G_FCMP:
3278 Observer.changingInstr(MI);
3279 if (TypeIdx == 0)
3280 widenScalarDst(MI, WideTy);
3281 else {
3282 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_FPEXT);
3283 widenScalarSrc(MI, WideTy, OpIdx: 3, ExtOpcode: TargetOpcode::G_FPEXT);
3284 }
3285 Observer.changedInstr(MI);
3286 return Legalized;
3287
3288 case TargetOpcode::G_ICMP:
3289 Observer.changingInstr(MI);
3290 if (TypeIdx == 0)
3291 widenScalarDst(MI, WideTy);
3292 else {
3293 LLT SrcTy = MRI.getType(Reg: MI.getOperand(i: 2).getReg());
3294 CmpInst::Predicate Pred =
3295 static_cast<CmpInst::Predicate>(MI.getOperand(i: 1).getPredicate());
3296
3297 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3298 unsigned ExtOpcode =
3299 (CmpInst::isSigned(Pred) ||
3300 TLI.isSExtCheaperThanZExt(FromTy: getApproximateEVTForLLT(Ty: SrcTy, Ctx),
3301 ToTy: getApproximateEVTForLLT(Ty: WideTy, Ctx)))
3302 ? TargetOpcode::G_SEXT
3303 : TargetOpcode::G_ZEXT;
3304 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode);
3305 widenScalarSrc(MI, WideTy, OpIdx: 3, ExtOpcode);
3306 }
3307 Observer.changedInstr(MI);
3308 return Legalized;
3309
3310 case TargetOpcode::G_PTR_ADD:
3311 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
3312 Observer.changingInstr(MI);
3313 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_SEXT);
3314 Observer.changedInstr(MI);
3315 return Legalized;
3316
3317 case TargetOpcode::G_PHI: {
3318 assert(TypeIdx == 0 && "Expecting only Idx 0");
3319
3320 Observer.changingInstr(MI);
3321 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3322 MachineBasicBlock &OpMBB = *MI.getOperand(i: I + 1).getMBB();
3323 MIRBuilder.setInsertPt(MBB&: OpMBB, II: OpMBB.getFirstTerminatorForward());
3324 widenScalarSrc(MI, WideTy, OpIdx: I, ExtOpcode: TargetOpcode::G_ANYEXT);
3325 }
3326
3327 MachineBasicBlock &MBB = *MI.getParent();
3328 MIRBuilder.setInsertPt(MBB, II: --MBB.getFirstNonPHI());
3329 widenScalarDst(MI, WideTy);
3330 Observer.changedInstr(MI);
3331 return Legalized;
3332 }
3333 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3334 if (TypeIdx == 0) {
3335 Register VecReg = MI.getOperand(i: 1).getReg();
3336 LLT VecTy = MRI.getType(Reg: VecReg);
3337 Observer.changingInstr(MI);
3338
3339 widenScalarSrc(MI, WideTy: LLT::vector(EC: VecTy.getElementCount(), ScalarTy: WideTy), OpIdx: 1,
3340 ExtOpcode: TargetOpcode::G_ANYEXT);
3341
3342 widenScalarDst(MI, WideTy, OpIdx: 0);
3343 Observer.changedInstr(MI);
3344 return Legalized;
3345 }
3346
3347 if (TypeIdx != 2)
3348 return UnableToLegalize;
3349 Observer.changingInstr(MI);
3350 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ZEXT);
3351 Observer.changedInstr(MI);
3352 return Legalized;
3353 }
3354 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3355 if (TypeIdx == 0) {
3356 Observer.changingInstr(MI);
3357 const LLT WideEltTy = WideTy.getElementType();
3358
3359 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
3360 widenScalarSrc(MI, WideTy: WideEltTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ANYEXT);
3361 widenScalarDst(MI, WideTy, OpIdx: 0);
3362 Observer.changedInstr(MI);
3363 return Legalized;
3364 }
3365
3366 if (TypeIdx == 1) {
3367 Observer.changingInstr(MI);
3368
3369 Register VecReg = MI.getOperand(i: 1).getReg();
3370 LLT VecTy = MRI.getType(Reg: VecReg);
3371 LLT WideVecTy = VecTy.changeVectorElementType(NewEltTy: WideTy);
3372
3373 widenScalarSrc(MI, WideTy: WideVecTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
3374 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ANYEXT);
3375 widenScalarDst(MI, WideTy: WideVecTy, OpIdx: 0);
3376 Observer.changedInstr(MI);
3377 return Legalized;
3378 }
3379
3380 if (TypeIdx == 2) {
3381 Observer.changingInstr(MI);
3382 widenScalarSrc(MI, WideTy, OpIdx: 3, ExtOpcode: TargetOpcode::G_ZEXT);
3383 Observer.changedInstr(MI);
3384 return Legalized;
3385 }
3386
3387 return UnableToLegalize;
3388 }
3389 case TargetOpcode::G_FADD:
3390 case TargetOpcode::G_FMUL:
3391 case TargetOpcode::G_FSUB:
3392 case TargetOpcode::G_FMA:
3393 case TargetOpcode::G_FMAD:
3394 case TargetOpcode::G_FNEG:
3395 case TargetOpcode::G_FABS:
3396 case TargetOpcode::G_FCANONICALIZE:
3397 case TargetOpcode::G_FMINNUM:
3398 case TargetOpcode::G_FMAXNUM:
3399 case TargetOpcode::G_FMINNUM_IEEE:
3400 case TargetOpcode::G_FMAXNUM_IEEE:
3401 case TargetOpcode::G_FMINIMUM:
3402 case TargetOpcode::G_FMAXIMUM:
3403 case TargetOpcode::G_FMINIMUMNUM:
3404 case TargetOpcode::G_FMAXIMUMNUM:
3405 case TargetOpcode::G_FDIV:
3406 case TargetOpcode::G_FREM:
3407 case TargetOpcode::G_FCEIL:
3408 case TargetOpcode::G_FFLOOR:
3409 case TargetOpcode::G_FCOS:
3410 case TargetOpcode::G_FSIN:
3411 case TargetOpcode::G_FTAN:
3412 case TargetOpcode::G_FACOS:
3413 case TargetOpcode::G_FASIN:
3414 case TargetOpcode::G_FATAN:
3415 case TargetOpcode::G_FATAN2:
3416 case TargetOpcode::G_FCOSH:
3417 case TargetOpcode::G_FSINH:
3418 case TargetOpcode::G_FTANH:
3419 case TargetOpcode::G_FLOG10:
3420 case TargetOpcode::G_FLOG:
3421 case TargetOpcode::G_FLOG2:
3422 case TargetOpcode::G_FRINT:
3423 case TargetOpcode::G_FNEARBYINT:
3424 case TargetOpcode::G_FSQRT:
3425 case TargetOpcode::G_FEXP:
3426 case TargetOpcode::G_FEXP2:
3427 case TargetOpcode::G_FEXP10:
3428 case TargetOpcode::G_FPOW:
3429 case TargetOpcode::G_INTRINSIC_TRUNC:
3430 case TargetOpcode::G_INTRINSIC_ROUND:
3431 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3432 assert(TypeIdx == 0);
3433 Observer.changingInstr(MI);
3434
3435 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3436 widenScalarSrc(MI, WideTy, OpIdx: I, ExtOpcode: TargetOpcode::G_FPEXT);
3437
3438 widenScalarDst(MI, WideTy, OpIdx: 0, TruncOpcode: TargetOpcode::G_FPTRUNC);
3439 Observer.changedInstr(MI);
3440 return Legalized;
3441 case TargetOpcode::G_FMODF: {
3442 Observer.changingInstr(MI);
3443 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_FPEXT);
3444
3445 widenScalarDst(MI, WideTy, OpIdx: 1, TruncOpcode: TargetOpcode::G_FPTRUNC);
3446 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: --MIRBuilder.getInsertPt());
3447 widenScalarDst(MI, WideTy, OpIdx: 0, TruncOpcode: TargetOpcode::G_FPTRUNC);
3448 Observer.changedInstr(MI);
3449 return Legalized;
3450 }
3451 case TargetOpcode::G_FPOWI:
3452 case TargetOpcode::G_FLDEXP:
3453 case TargetOpcode::G_STRICT_FLDEXP: {
3454 if (TypeIdx == 0) {
3455 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3456 return UnableToLegalize;
3457
3458 Observer.changingInstr(MI);
3459 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_FPEXT);
3460 widenScalarDst(MI, WideTy, OpIdx: 0, TruncOpcode: TargetOpcode::G_FPTRUNC);
3461 Observer.changedInstr(MI);
3462 return Legalized;
3463 }
3464
3465 if (TypeIdx == 1) {
3466 // For some reason SelectionDAG tries to promote to a libcall without
3467 // actually changing the integer type for promotion.
3468 Observer.changingInstr(MI);
3469 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_SEXT);
3470 Observer.changedInstr(MI);
3471 return Legalized;
3472 }
3473
3474 return UnableToLegalize;
3475 }
3476 case TargetOpcode::G_FFREXP: {
3477 Observer.changingInstr(MI);
3478
3479 if (TypeIdx == 0) {
3480 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_FPEXT);
3481 widenScalarDst(MI, WideTy, OpIdx: 0, TruncOpcode: TargetOpcode::G_FPTRUNC);
3482 } else {
3483 widenScalarDst(MI, WideTy, OpIdx: 1);
3484 }
3485
3486 Observer.changedInstr(MI);
3487 return Legalized;
3488 }
3489 case TargetOpcode::G_LROUND:
3490 case TargetOpcode::G_LLROUND:
3491 Observer.changingInstr(MI);
3492
3493 if (TypeIdx == 0)
3494 widenScalarDst(MI, WideTy);
3495 else
3496 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_FPEXT);
3497
3498 Observer.changedInstr(MI);
3499 return Legalized;
3500
3501 case TargetOpcode::G_INTTOPTR:
3502 if (TypeIdx != 1)
3503 return UnableToLegalize;
3504
3505 Observer.changingInstr(MI);
3506 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ZEXT);
3507 Observer.changedInstr(MI);
3508 return Legalized;
3509 case TargetOpcode::G_PTRTOINT:
3510 if (TypeIdx != 0)
3511 return UnableToLegalize;
3512
3513 Observer.changingInstr(MI);
3514 widenScalarDst(MI, WideTy, OpIdx: 0);
3515 Observer.changedInstr(MI);
3516 return Legalized;
3517 case TargetOpcode::G_BUILD_VECTOR: {
3518 Observer.changingInstr(MI);
3519
3520 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3521 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3522 widenScalarSrc(MI, WideTy: WideEltTy, OpIdx: I, ExtOpcode: TargetOpcode::G_ANYEXT);
3523
3524 // Avoid changing the result vector type if the source element type was
3525 // requested.
3526 if (TypeIdx == 1) {
3527 MI.setDesc(MIRBuilder.getTII().get(Opcode: TargetOpcode::G_BUILD_VECTOR_TRUNC));
3528 } else {
3529 widenScalarDst(MI, WideTy, OpIdx: 0);
3530 }
3531
3532 Observer.changedInstr(MI);
3533 return Legalized;
3534 }
3535 case TargetOpcode::G_SEXT_INREG:
3536 if (TypeIdx != 0)
3537 return UnableToLegalize;
3538
3539 Observer.changingInstr(MI);
3540 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
3541 widenScalarDst(MI, WideTy, OpIdx: 0, TruncOpcode: TargetOpcode::G_TRUNC);
3542 Observer.changedInstr(MI);
3543 return Legalized;
3544 case TargetOpcode::G_PTRMASK: {
3545 if (TypeIdx != 1)
3546 return UnableToLegalize;
3547 Observer.changingInstr(MI);
3548 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ZEXT);
3549 Observer.changedInstr(MI);
3550 return Legalized;
3551 }
3552 case TargetOpcode::G_VECREDUCE_ADD: {
3553 if (TypeIdx != 1)
3554 return UnableToLegalize;
3555 Observer.changingInstr(MI);
3556 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
3557 widenScalarDst(MI, WideTy: WideTy.getScalarType(), OpIdx: 0, TruncOpcode: TargetOpcode::G_TRUNC);
3558 Observer.changedInstr(MI);
3559 return Legalized;
3560 }
3561 case TargetOpcode::G_VECREDUCE_FADD:
3562 case TargetOpcode::G_VECREDUCE_FMUL:
3563 case TargetOpcode::G_VECREDUCE_FMIN:
3564 case TargetOpcode::G_VECREDUCE_FMAX:
3565 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3566 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3567 if (TypeIdx != 0)
3568 return UnableToLegalize;
3569 Observer.changingInstr(MI);
3570 Register VecReg = MI.getOperand(i: 1).getReg();
3571 LLT VecTy = MRI.getType(Reg: VecReg);
3572 LLT WideVecTy = VecTy.changeElementType(NewEltTy: WideTy);
3573 widenScalarSrc(MI, WideTy: WideVecTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_FPEXT);
3574 widenScalarDst(MI, WideTy, OpIdx: 0, TruncOpcode: TargetOpcode::G_FPTRUNC);
3575 Observer.changedInstr(MI);
3576 return Legalized;
3577 }
3578 case TargetOpcode::G_VSCALE: {
3579 MachineOperand &SrcMO = MI.getOperand(i: 1);
3580 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3581 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3582 // The CImm is always a signed value
3583 const APInt Val = SrcVal.sext(width: WideTy.getSizeInBits());
3584 Observer.changingInstr(MI);
3585 SrcMO.setCImm(ConstantInt::get(Context&: Ctx, V: Val));
3586 widenScalarDst(MI, WideTy);
3587 Observer.changedInstr(MI);
3588 return Legalized;
3589 }
3590 case TargetOpcode::G_SPLAT_VECTOR: {
3591 if (TypeIdx != 1)
3592 return UnableToLegalize;
3593
3594 Observer.changingInstr(MI);
3595 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
3596 Observer.changedInstr(MI);
3597 return Legalized;
3598 }
3599 case TargetOpcode::G_INSERT_SUBVECTOR: {
3600 if (TypeIdx != 0)
3601 return UnableToLegalize;
3602
3603 GInsertSubvector &IS = cast<GInsertSubvector>(Val&: MI);
3604 Register BigVec = IS.getBigVec();
3605 Register SubVec = IS.getSubVec();
3606
3607 LLT SubVecTy = MRI.getType(Reg: SubVec);
3608 LLT SubVecWideTy = SubVecTy.changeElementType(NewEltTy: WideTy.getElementType());
3609
3610 // Widen the G_INSERT_SUBVECTOR
3611 auto BigZExt = MIRBuilder.buildZExt(Res: WideTy, Op: BigVec);
3612 auto SubZExt = MIRBuilder.buildZExt(Res: SubVecWideTy, Op: SubVec);
3613 auto WideInsert = MIRBuilder.buildInsertSubvector(Res: WideTy, Src0: BigZExt, Src1: SubZExt,
3614 Index: IS.getIndexImm());
3615
3616 // Truncate back down
3617 auto SplatZero = MIRBuilder.buildSplatVector(
3618 Res: WideTy, Val: MIRBuilder.buildConstant(Res: WideTy.getElementType(), Val: 0));
3619 MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_NE, Res: IS.getReg(Idx: 0), Op0: WideInsert,
3620 Op1: SplatZero);
3621
3622 MI.eraseFromParent();
3623
3624 return Legalized;
3625 }
3626 }
3627}
3628
3629static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
3630 MachineIRBuilder &B, Register Src, LLT Ty) {
3631 auto Unmerge = B.buildUnmerge(Res: Ty, Op: Src);
3632 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3633 Pieces.push_back(Elt: Unmerge.getReg(Idx: I));
3634}
3635
3636static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3637 MachineIRBuilder &MIRBuilder) {
3638 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3639 MachineFunction &MF = MIRBuilder.getMF();
3640 const DataLayout &DL = MIRBuilder.getDataLayout();
3641 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3642 LLT AddrPtrTy = LLT::pointer(AddressSpace: AddrSpace, SizeInBits: DL.getPointerSizeInBits(AS: AddrSpace));
3643 LLT DstLLT = MRI.getType(Reg: DstReg);
3644
3645 Align Alignment(DL.getABITypeAlign(Ty: ConstVal->getType()));
3646
3647 auto Addr = MIRBuilder.buildConstantPool(
3648 Res: AddrPtrTy,
3649 Idx: MF.getConstantPool()->getConstantPoolIndex(C: ConstVal, Alignment));
3650
3651 MachineMemOperand *MMO =
3652 MF.getMachineMemOperand(PtrInfo: MachinePointerInfo::getConstantPool(MF),
3653 f: MachineMemOperand::MOLoad, MemTy: DstLLT, base_alignment: Alignment);
3654
3655 MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_LOAD, Res: DstReg, Addr, MMO&: *MMO);
3656}
3657
3658LegalizerHelper::LegalizeResult
3659LegalizerHelper::lowerConstant(MachineInstr &MI) {
3660 const MachineOperand &ConstOperand = MI.getOperand(i: 1);
3661 const Constant *ConstantVal = ConstOperand.getCImm();
3662
3663 emitLoadFromConstantPool(DstReg: MI.getOperand(i: 0).getReg(), ConstVal: ConstantVal, MIRBuilder);
3664 MI.eraseFromParent();
3665
3666 return Legalized;
3667}
3668
3669LegalizerHelper::LegalizeResult
3670LegalizerHelper::lowerFConstant(MachineInstr &MI) {
3671 const MachineOperand &ConstOperand = MI.getOperand(i: 1);
3672 const Constant *ConstantVal = ConstOperand.getFPImm();
3673
3674 emitLoadFromConstantPool(DstReg: MI.getOperand(i: 0).getReg(), ConstVal: ConstantVal, MIRBuilder);
3675 MI.eraseFromParent();
3676
3677 return Legalized;
3678}
3679
3680LegalizerHelper::LegalizeResult
3681LegalizerHelper::lowerBitcast(MachineInstr &MI) {
3682 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3683 if (SrcTy.isVector()) {
3684 LLT SrcEltTy = SrcTy.getElementType();
3685 SmallVector<Register, 8> SrcRegs;
3686
3687 if (DstTy.isVector()) {
3688 int NumDstElt = DstTy.getNumElements();
3689 int NumSrcElt = SrcTy.getNumElements();
3690
3691 LLT DstEltTy = DstTy.getElementType();
3692 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3693 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3694
3695 // If there's an element size mismatch, insert intermediate casts to match
3696 // the result element type.
3697 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3698 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3699 //
3700 // =>
3701 //
3702 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3703 // %3:_(<2 x s8>) = G_BITCAST %2
3704 // %4:_(<2 x s8>) = G_BITCAST %3
3705 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3706 DstCastTy = DstTy.changeVectorElementCount(
3707 EC: ElementCount::getFixed(MinVal: NumDstElt / NumSrcElt));
3708 SrcPartTy = SrcEltTy;
3709 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3710 //
3711 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3712 //
3713 // =>
3714 //
3715 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3716 // %3:_(s16) = G_BITCAST %2
3717 // %4:_(s16) = G_BITCAST %3
3718 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3719 SrcPartTy = SrcTy.changeVectorElementCount(
3720 EC: ElementCount::getFixed(MinVal: NumSrcElt / NumDstElt));
3721 DstCastTy = DstEltTy;
3722 }
3723
3724 getUnmergePieces(Pieces&: SrcRegs, B&: MIRBuilder, Src, Ty: SrcPartTy);
3725 for (Register &SrcReg : SrcRegs)
3726 SrcReg = MIRBuilder.buildBitcast(Dst: DstCastTy, Src: SrcReg).getReg(Idx: 0);
3727 } else
3728 getUnmergePieces(Pieces&: SrcRegs, B&: MIRBuilder, Src, Ty: SrcEltTy);
3729
3730 MIRBuilder.buildMergeLikeInstr(Res: Dst, Ops: SrcRegs);
3731 MI.eraseFromParent();
3732 return Legalized;
3733 }
3734
3735 if (DstTy.isVector()) {
3736 SmallVector<Register, 8> SrcRegs;
3737 getUnmergePieces(Pieces&: SrcRegs, B&: MIRBuilder, Src, Ty: DstTy.getElementType());
3738 MIRBuilder.buildMergeLikeInstr(Res: Dst, Ops: SrcRegs);
3739 MI.eraseFromParent();
3740 return Legalized;
3741 }
3742
3743 return UnableToLegalize;
3744}
3745
3746/// Figure out the bit offset into a register when coercing a vector index for
3747/// the wide element type. This is only for the case when promoting vector to
3748/// one with larger elements.
3749//
3750///
3751/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3752/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3753static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
3754 Register Idx,
3755 unsigned NewEltSize,
3756 unsigned OldEltSize) {
3757 const unsigned Log2EltRatio = Log2_32(Value: NewEltSize / OldEltSize);
3758 LLT IdxTy = B.getMRI()->getType(Reg: Idx);
3759
3760 // Now figure out the amount we need to shift to get the target bits.
3761 auto OffsetMask = B.buildConstant(
3762 Res: IdxTy, Val: ~(APInt::getAllOnes(numBits: IdxTy.getSizeInBits()) << Log2EltRatio));
3763 auto OffsetIdx = B.buildAnd(Dst: IdxTy, Src0: Idx, Src1: OffsetMask);
3764 return B.buildShl(Dst: IdxTy, Src0: OffsetIdx,
3765 Src1: B.buildConstant(Res: IdxTy, Val: Log2_32(Value: OldEltSize))).getReg(Idx: 0);
3766}
3767
3768/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3769/// is casting to a vector with a smaller element size, perform multiple element
3770/// extracts and merge the results. If this is coercing to a vector with larger
3771/// elements, index the bitcasted vector and extract the target element with bit
3772/// operations. This is intended to force the indexing in the native register
3773/// size for architectures that can dynamically index the register file.
3774LegalizerHelper::LegalizeResult
3775LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
3776 LLT CastTy) {
3777 if (TypeIdx != 1)
3778 return UnableToLegalize;
3779
3780 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3781
3782 LLT SrcEltTy = SrcVecTy.getElementType();
3783 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3784 unsigned OldNumElts = SrcVecTy.getNumElements();
3785
3786 LLT NewEltTy = CastTy.getScalarType();
3787 Register CastVec = MIRBuilder.buildBitcast(Dst: CastTy, Src: SrcVec).getReg(Idx: 0);
3788
3789 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3790 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3791 if (NewNumElts > OldNumElts) {
3792 // Decreasing the vector element size
3793 //
3794 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3795 // =>
3796 // v4i32:castx = bitcast x:v2i64
3797 //
3798 // i64 = bitcast
3799 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3800 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3801 //
3802 if (NewNumElts % OldNumElts != 0)
3803 return UnableToLegalize;
3804
3805 // Type of the intermediate result vector.
3806 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3807 LLT MidTy =
3808 CastTy.changeElementCount(EC: ElementCount::getFixed(MinVal: NewEltsPerOldElt));
3809
3810 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(Res: IdxTy, Val: NewEltsPerOldElt);
3811
3812 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3813 auto NewBaseIdx = MIRBuilder.buildMul(Dst: IdxTy, Src0: Idx, Src1: NewEltsPerOldEltK);
3814
3815 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3816 auto IdxOffset = MIRBuilder.buildConstant(Res: IdxTy, Val: I);
3817 auto TmpIdx = MIRBuilder.buildAdd(Dst: IdxTy, Src0: NewBaseIdx, Src1: IdxOffset);
3818 auto Elt = MIRBuilder.buildExtractVectorElement(Res: NewEltTy, Val: CastVec, Idx: TmpIdx);
3819 NewOps[I] = Elt.getReg(Idx: 0);
3820 }
3821
3822 auto NewVec = MIRBuilder.buildBuildVector(Res: MidTy, Ops: NewOps);
3823 MIRBuilder.buildBitcast(Dst, Src: NewVec);
3824 MI.eraseFromParent();
3825 return Legalized;
3826 }
3827
3828 if (NewNumElts < OldNumElts) {
3829 if (NewEltSize % OldEltSize != 0)
3830 return UnableToLegalize;
3831
3832 // This only depends on powers of 2 because we use bit tricks to figure out
3833 // the bit offset we need to shift to get the target element. A general
3834 // expansion could emit division/multiply.
3835 if (!isPowerOf2_32(Value: NewEltSize / OldEltSize))
3836 return UnableToLegalize;
3837
3838 // Increasing the vector element size.
3839 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3840 //
3841 // =>
3842 //
3843 // %cast = G_BITCAST %vec
3844 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3845 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3846 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3847 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3848 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3849 // %elt = G_TRUNC %elt_bits
3850
3851 const unsigned Log2EltRatio = Log2_32(Value: NewEltSize / OldEltSize);
3852 auto Log2Ratio = MIRBuilder.buildConstant(Res: IdxTy, Val: Log2EltRatio);
3853
3854 // Divide to get the index in the wider element type.
3855 auto ScaledIdx = MIRBuilder.buildLShr(Dst: IdxTy, Src0: Idx, Src1: Log2Ratio);
3856
3857 Register WideElt = CastVec;
3858 if (CastTy.isVector()) {
3859 WideElt = MIRBuilder.buildExtractVectorElement(Res: NewEltTy, Val: CastVec,
3860 Idx: ScaledIdx).getReg(Idx: 0);
3861 }
3862
3863 // Compute the bit offset into the register of the target element.
3864 Register OffsetBits = getBitcastWiderVectorElementOffset(
3865 B&: MIRBuilder, Idx, NewEltSize, OldEltSize);
3866
3867 // Shift the wide element to get the target element.
3868 auto ExtractedBits = MIRBuilder.buildLShr(Dst: NewEltTy, Src0: WideElt, Src1: OffsetBits);
3869 MIRBuilder.buildTrunc(Res: Dst, Op: ExtractedBits);
3870 MI.eraseFromParent();
3871 return Legalized;
3872 }
3873
3874 return UnableToLegalize;
3875}
3876
3877/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3878/// TargetReg, while preserving other bits in \p TargetReg.
3879///
3880/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3881static Register buildBitFieldInsert(MachineIRBuilder &B,
3882 Register TargetReg, Register InsertReg,
3883 Register OffsetBits) {
3884 LLT TargetTy = B.getMRI()->getType(Reg: TargetReg);
3885 LLT InsertTy = B.getMRI()->getType(Reg: InsertReg);
3886 auto ZextVal = B.buildZExt(Res: TargetTy, Op: InsertReg);
3887 auto ShiftedInsertVal = B.buildShl(Dst: TargetTy, Src0: ZextVal, Src1: OffsetBits);
3888
3889 // Produce a bitmask of the value to insert
3890 auto EltMask = B.buildConstant(
3891 Res: TargetTy, Val: APInt::getLowBitsSet(numBits: TargetTy.getSizeInBits(),
3892 loBitsSet: InsertTy.getSizeInBits()));
3893 // Shift it into position
3894 auto ShiftedMask = B.buildShl(Dst: TargetTy, Src0: EltMask, Src1: OffsetBits);
3895 auto InvShiftedMask = B.buildNot(Dst: TargetTy, Src0: ShiftedMask);
3896
3897 // Clear out the bits in the wide element
3898 auto MaskedOldElt = B.buildAnd(Dst: TargetTy, Src0: TargetReg, Src1: InvShiftedMask);
3899
3900 // The value to insert has all zeros already, so stick it into the masked
3901 // wide element.
3902 return B.buildOr(Dst: TargetTy, Src0: MaskedOldElt, Src1: ShiftedInsertVal).getReg(Idx: 0);
3903}
3904
3905/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3906/// is increasing the element size, perform the indexing in the target element
3907/// type, and use bit operations to insert at the element position. This is
3908/// intended for architectures that can dynamically index the register file and
3909/// want to force indexing in the native register size.
3910LegalizerHelper::LegalizeResult
3911LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
3912 LLT CastTy) {
3913 if (TypeIdx != 0)
3914 return UnableToLegalize;
3915
3916 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3917 MI.getFirst4RegLLTs();
3918 LLT VecTy = DstTy;
3919
3920 LLT VecEltTy = VecTy.getElementType();
3921 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3922 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3923 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3924
3925 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3926 unsigned OldNumElts = VecTy.getNumElements();
3927
3928 Register CastVec = MIRBuilder.buildBitcast(Dst: CastTy, Src: SrcVec).getReg(Idx: 0);
3929 if (NewNumElts < OldNumElts) {
3930 if (NewEltSize % OldEltSize != 0)
3931 return UnableToLegalize;
3932
3933 // This only depends on powers of 2 because we use bit tricks to figure out
3934 // the bit offset we need to shift to get the target element. A general
3935 // expansion could emit division/multiply.
3936 if (!isPowerOf2_32(Value: NewEltSize / OldEltSize))
3937 return UnableToLegalize;
3938
3939 const unsigned Log2EltRatio = Log2_32(Value: NewEltSize / OldEltSize);
3940 auto Log2Ratio = MIRBuilder.buildConstant(Res: IdxTy, Val: Log2EltRatio);
3941
3942 // Divide to get the index in the wider element type.
3943 auto ScaledIdx = MIRBuilder.buildLShr(Dst: IdxTy, Src0: Idx, Src1: Log2Ratio);
3944
3945 Register ExtractedElt = CastVec;
3946 if (CastTy.isVector()) {
3947 ExtractedElt = MIRBuilder.buildExtractVectorElement(Res: NewEltTy, Val: CastVec,
3948 Idx: ScaledIdx).getReg(Idx: 0);
3949 }
3950
3951 // Compute the bit offset into the register of the target element.
3952 Register OffsetBits = getBitcastWiderVectorElementOffset(
3953 B&: MIRBuilder, Idx, NewEltSize, OldEltSize);
3954
3955 Register InsertedElt = buildBitFieldInsert(B&: MIRBuilder, TargetReg: ExtractedElt,
3956 InsertReg: Val, OffsetBits);
3957 if (CastTy.isVector()) {
3958 InsertedElt = MIRBuilder.buildInsertVectorElement(
3959 Res: CastTy, Val: CastVec, Elt: InsertedElt, Idx: ScaledIdx).getReg(Idx: 0);
3960 }
3961
3962 MIRBuilder.buildBitcast(Dst, Src: InsertedElt);
3963 MI.eraseFromParent();
3964 return Legalized;
3965 }
3966
3967 return UnableToLegalize;
3968}
3969
3970// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3971// those that have smaller than legal operands.
3972//
3973// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3974//
3975// ===>
3976//
3977// s32 = G_BITCAST <4 x s8>
3978// s32 = G_BITCAST <4 x s8>
3979// s32 = G_BITCAST <4 x s8>
3980// s32 = G_BITCAST <4 x s8>
3981// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3982// <16 x s8> = G_BITCAST <4 x s32>
3983LegalizerHelper::LegalizeResult
3984LegalizerHelper::bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx,
3985 LLT CastTy) {
3986 // Convert it to CONCAT instruction
3987 auto ConcatMI = dyn_cast<GConcatVectors>(Val: &MI);
3988 if (!ConcatMI) {
3989 return UnableToLegalize;
3990 }
3991
3992 // Check if bitcast is Legal
3993 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3994 LLT SrcScalTy = CastTy.getScalarType();
3995
3996 // Check if the build vector is Legal
3997 if (!LI.isLegal(Query: {TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3998 return UnableToLegalize;
3999 }
4000
4001 // Bitcast the sources
4002 SmallVector<Register> BitcastRegs;
4003 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
4004 BitcastRegs.push_back(
4005 Elt: MIRBuilder.buildBitcast(Dst: SrcScalTy, Src: ConcatMI->getSourceReg(I: i))
4006 .getReg(Idx: 0));
4007 }
4008
4009 // Build the scalar values into a vector
4010 Register BuildReg =
4011 MIRBuilder.buildBuildVector(Res: CastTy, Ops: BitcastRegs).getReg(Idx: 0);
4012 MIRBuilder.buildBitcast(Dst: DstReg, Src: BuildReg);
4013
4014 MI.eraseFromParent();
4015 return Legalized;
4016}
4017
4018// This bitcasts a shuffle vector to a different type currently of the same
4019// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
4020// will be used instead.
4021//
4022// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
4023// ===>
4024// <4 x s64> = G_PTRTOINT <4 x p0>
4025// <4 x s64> = G_PTRTOINT <4 x p0>
4026// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
4027// <16 x p0> = G_INTTOPTR <16 x s64>
4028LegalizerHelper::LegalizeResult
4029LegalizerHelper::bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx,
4030 LLT CastTy) {
4031 auto ShuffleMI = cast<GShuffleVector>(Val: &MI);
4032 LLT DstTy = MRI.getType(Reg: ShuffleMI->getReg(Idx: 0));
4033 LLT SrcTy = MRI.getType(Reg: ShuffleMI->getReg(Idx: 1));
4034
4035 // We currently only handle vectors of the same size.
4036 if (TypeIdx != 0 ||
4037 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
4038 CastTy.getElementCount() != DstTy.getElementCount())
4039 return UnableToLegalize;
4040
4041 LLT NewSrcTy = SrcTy.changeElementType(NewEltTy: CastTy.getScalarType());
4042
4043 auto Inp1 = MIRBuilder.buildCast(Dst: NewSrcTy, Src: ShuffleMI->getReg(Idx: 1));
4044 auto Inp2 = MIRBuilder.buildCast(Dst: NewSrcTy, Src: ShuffleMI->getReg(Idx: 2));
4045 auto Shuf =
4046 MIRBuilder.buildShuffleVector(Res: CastTy, Src1: Inp1, Src2: Inp2, Mask: ShuffleMI->getMask());
4047 MIRBuilder.buildCast(Dst: ShuffleMI->getReg(Idx: 0), Src: Shuf);
4048
4049 MI.eraseFromParent();
4050 return Legalized;
4051}
4052
4053/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
4054///
4055/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
4056///
4057/// ===>
4058///
4059/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
4060/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
4061/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
4062LegalizerHelper::LegalizeResult
4063LegalizerHelper::bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx,
4064 LLT CastTy) {
4065 auto ES = cast<GExtractSubvector>(Val: &MI);
4066
4067 if (!CastTy.isVector())
4068 return UnableToLegalize;
4069
4070 if (TypeIdx != 0)
4071 return UnableToLegalize;
4072
4073 Register Dst = ES->getReg(Idx: 0);
4074 Register Src = ES->getSrcVec();
4075 uint64_t Idx = ES->getIndexImm();
4076
4077 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4078
4079 LLT DstTy = MRI.getType(Reg: Dst);
4080 LLT SrcTy = MRI.getType(Reg: Src);
4081 ElementCount DstTyEC = DstTy.getElementCount();
4082 ElementCount SrcTyEC = SrcTy.getElementCount();
4083 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4084 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
4085
4086 if (DstTy == CastTy)
4087 return Legalized;
4088
4089 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4090 return UnableToLegalize;
4091
4092 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4093 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4094 if (CastEltSize < DstEltSize)
4095 return UnableToLegalize;
4096
4097 auto AdjustAmt = CastEltSize / DstEltSize;
4098 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4099 SrcTyMinElts % AdjustAmt != 0)
4100 return UnableToLegalize;
4101
4102 Idx /= AdjustAmt;
4103 SrcTy = LLT::vector(EC: SrcTyEC.divideCoefficientBy(RHS: AdjustAmt), ScalarSizeInBits: AdjustAmt);
4104 auto CastVec = MIRBuilder.buildBitcast(Dst: SrcTy, Src);
4105 auto PromotedES = MIRBuilder.buildExtractSubvector(Res: CastTy, Src: CastVec, Index: Idx);
4106 MIRBuilder.buildBitcast(Dst, Src: PromotedES);
4107
4108 ES->eraseFromParent();
4109 return Legalized;
4110}
4111
4112/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
4113///
4114/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
4115/// <vscale x 8 x i1>,
4116/// N
4117///
4118/// ===>
4119///
4120/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
4121/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
4122/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
4123/// <vscale x 1 x i8>, N / 8
4124/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
4125LegalizerHelper::LegalizeResult
4126LegalizerHelper::bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx,
4127 LLT CastTy) {
4128 auto ES = cast<GInsertSubvector>(Val: &MI);
4129
4130 if (!CastTy.isVector())
4131 return UnableToLegalize;
4132
4133 if (TypeIdx != 0)
4134 return UnableToLegalize;
4135
4136 Register Dst = ES->getReg(Idx: 0);
4137 Register BigVec = ES->getBigVec();
4138 Register SubVec = ES->getSubVec();
4139 uint64_t Idx = ES->getIndexImm();
4140
4141 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4142
4143 LLT DstTy = MRI.getType(Reg: Dst);
4144 LLT BigVecTy = MRI.getType(Reg: BigVec);
4145 LLT SubVecTy = MRI.getType(Reg: SubVec);
4146
4147 if (DstTy == CastTy)
4148 return Legalized;
4149
4150 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4151 return UnableToLegalize;
4152
4153 ElementCount DstTyEC = DstTy.getElementCount();
4154 ElementCount BigVecTyEC = BigVecTy.getElementCount();
4155 ElementCount SubVecTyEC = SubVecTy.getElementCount();
4156 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4157 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
4158 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
4159
4160 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4161 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4162 if (CastEltSize < DstEltSize)
4163 return UnableToLegalize;
4164
4165 auto AdjustAmt = CastEltSize / DstEltSize;
4166 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4167 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4168 return UnableToLegalize;
4169
4170 Idx /= AdjustAmt;
4171 BigVecTy = LLT::vector(EC: BigVecTyEC.divideCoefficientBy(RHS: AdjustAmt), ScalarSizeInBits: AdjustAmt);
4172 SubVecTy = LLT::vector(EC: SubVecTyEC.divideCoefficientBy(RHS: AdjustAmt), ScalarSizeInBits: AdjustAmt);
4173 auto CastBigVec = MIRBuilder.buildBitcast(Dst: BigVecTy, Src: BigVec);
4174 auto CastSubVec = MIRBuilder.buildBitcast(Dst: SubVecTy, Src: SubVec);
4175 auto PromotedIS =
4176 MIRBuilder.buildInsertSubvector(Res: CastTy, Src0: CastBigVec, Src1: CastSubVec, Index: Idx);
4177 MIRBuilder.buildBitcast(Dst, Src: PromotedIS);
4178
4179 ES->eraseFromParent();
4180 return Legalized;
4181}
4182
4183LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
4184 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
4185 Register DstReg = LoadMI.getDstReg();
4186 Register PtrReg = LoadMI.getPointerReg();
4187 LLT DstTy = MRI.getType(Reg: DstReg);
4188 MachineMemOperand &MMO = LoadMI.getMMO();
4189 LLT MemTy = MMO.getMemoryType();
4190 MachineFunction &MF = MIRBuilder.getMF();
4191
4192 LLT EltTy = MemTy.getScalarType();
4193
4194 unsigned MemSizeInBits = MemTy.getSizeInBits();
4195 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
4196
4197 if (MemSizeInBits != MemStoreSizeInBits) {
4198 if (MemTy.isVector())
4199 return UnableToLegalize;
4200
4201 // Promote to a byte-sized load if not loading an integral number of
4202 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
4203 LLT WideMemTy = EltTy.changeElementSize(NewEltSize: MemStoreSizeInBits);
4204 MachineMemOperand *NewMMO =
4205 MF.getMachineMemOperand(MMO: &MMO, PtrInfo: MMO.getPointerInfo(), Ty: WideMemTy);
4206
4207 Register LoadReg = DstReg;
4208 LLT LoadTy = DstTy;
4209
4210 // If this wasn't already an extending load, we need to widen the result
4211 // register to avoid creating a load with a narrower result than the source.
4212 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
4213 LoadTy = WideMemTy;
4214 LoadReg = MRI.createGenericVirtualRegister(Ty: WideMemTy);
4215 }
4216
4217 if (isa<GSExtLoad>(Val: LoadMI)) {
4218 auto NewLoad = MIRBuilder.buildLoad(Res: LoadTy, Addr: PtrReg, MMO&: *NewMMO);
4219 MIRBuilder.buildSExtInReg(Res: LoadReg, Op: NewLoad, ImmOp: MemSizeInBits);
4220 } else if (isa<GZExtLoad>(Val: LoadMI) || WideMemTy == LoadTy) {
4221 auto NewLoad = MIRBuilder.buildLoad(Res: LoadTy, Addr: PtrReg, MMO&: *NewMMO);
4222 // The extra bits are guaranteed to be zero, since we stored them that
4223 // way. A zext load from Wide thus automatically gives zext from MemVT.
4224 MIRBuilder.buildAssertZExt(Res: LoadReg, Op: NewLoad, Size: MemSizeInBits);
4225 } else {
4226 MIRBuilder.buildLoad(Res: LoadReg, Addr: PtrReg, MMO&: *NewMMO);
4227 }
4228
4229 if (DstTy != LoadTy)
4230 MIRBuilder.buildTrunc(Res: DstReg, Op: LoadReg);
4231
4232 LoadMI.eraseFromParent();
4233 return Legalized;
4234 }
4235
4236 // Big endian lowering not implemented.
4237 if (MIRBuilder.getDataLayout().isBigEndian())
4238 return UnableToLegalize;
4239
4240 // This load needs splitting into power of 2 sized loads.
4241 //
4242 // Our strategy here is to generate anyextending loads for the smaller
4243 // types up to next power-2 result type, and then combine the two larger
4244 // result values together, before truncating back down to the non-pow-2
4245 // type.
4246 // E.g. v1 = i24 load =>
4247 // v2 = i32 zextload (2 byte)
4248 // v3 = i32 load (1 byte)
4249 // v4 = i32 shl v3, 16
4250 // v5 = i32 or v4, v2
4251 // v1 = i24 trunc v5
4252 // By doing this we generate the correct truncate which should get
4253 // combined away as an artifact with a matching extend.
4254
4255 uint64_t LargeSplitSize, SmallSplitSize;
4256
4257 if (!isPowerOf2_32(Value: MemSizeInBits)) {
4258 // This load needs splitting into power of 2 sized loads.
4259 LargeSplitSize = llvm::bit_floor(Value: MemSizeInBits);
4260 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4261 } else {
4262 // This is already a power of 2, but we still need to split this in half.
4263 //
4264 // Assume we're being asked to decompose an unaligned load.
4265 // TODO: If this requires multiple splits, handle them all at once.
4266 auto &Ctx = MF.getFunction().getContext();
4267 if (TLI.allowsMemoryAccess(Context&: Ctx, DL: MIRBuilder.getDataLayout(), Ty: MemTy, MMO))
4268 return UnableToLegalize;
4269
4270 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4271 }
4272
4273 if (MemTy.isVector()) {
4274 // TODO: Handle vector extloads
4275 if (MemTy != DstTy)
4276 return UnableToLegalize;
4277
4278 Align Alignment = LoadMI.getAlign();
4279 // Given an alignment larger than the size of the memory, we can increase
4280 // the size of the load without needing to scalarize it.
4281 if (Alignment.value() * 8 > MemSizeInBits &&
4282 isPowerOf2_64(Value: DstTy.getScalarSizeInBits())) {
4283 LLT MoreTy = DstTy.changeVectorElementCount(
4284 EC: ElementCount::getFixed(MinVal: NextPowerOf2(A: DstTy.getNumElements())));
4285 MachineMemOperand *NewMMO = MF.getMachineMemOperand(MMO: &MMO, Offset: 0, Ty: MoreTy);
4286 auto NewLoad = MIRBuilder.buildLoad(Res: MoreTy, Addr: PtrReg, MMO&: *NewMMO);
4287 MIRBuilder.buildDeleteTrailingVectorElements(Res: LoadMI.getReg(Idx: 0),
4288 Op0: NewLoad.getReg(Idx: 0));
4289 LoadMI.eraseFromParent();
4290 return Legalized;
4291 }
4292
4293 // TODO: We can do better than scalarizing the vector and at least split it
4294 // in half.
4295 return reduceLoadStoreWidth(MI&: LoadMI, TypeIdx: 0, NarrowTy: DstTy.getElementType());
4296 }
4297
4298 MachineMemOperand *LargeMMO =
4299 MF.getMachineMemOperand(MMO: &MMO, Offset: 0, Size: LargeSplitSize / 8);
4300 MachineMemOperand *SmallMMO =
4301 MF.getMachineMemOperand(MMO: &MMO, Offset: LargeSplitSize / 8, Size: SmallSplitSize / 8);
4302
4303 LLT PtrTy = MRI.getType(Reg: PtrReg);
4304 unsigned AnyExtSize = PowerOf2Ceil(A: DstTy.getSizeInBits());
4305
4306 LLT AnyExtTy;
4307 LLT OffsetCstRes;
4308 if (EltTy.isPointer()) {
4309 AnyExtTy = LLT::scalar(SizeInBits: AnyExtSize);
4310 OffsetCstRes = LLT::scalar(SizeInBits: PtrTy.getSizeInBits());
4311 } else {
4312 AnyExtTy = DstTy.changeElementSize(NewEltSize: AnyExtSize);
4313 OffsetCstRes = DstTy.changeElementSize(NewEltSize: PtrTy.getSizeInBits());
4314 }
4315
4316 auto LargeLoad = MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_ZEXTLOAD, Res: AnyExtTy,
4317 Addr: PtrReg, MMO&: *LargeMMO);
4318
4319 auto OffsetCst = MIRBuilder.buildConstant(Res: OffsetCstRes, Val: LargeSplitSize / 8);
4320 Register PtrAddReg = MRI.createGenericVirtualRegister(Ty: PtrTy);
4321 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(Res: PtrAddReg, Op0: PtrReg, Op1: OffsetCst);
4322 auto SmallLoad = MIRBuilder.buildLoadInstr(Opcode: LoadMI.getOpcode(), Res: AnyExtTy,
4323 Addr: SmallPtr, MMO&: *SmallMMO);
4324
4325 auto ShiftAmt = MIRBuilder.buildConstant(Res: AnyExtTy, Val: LargeSplitSize);
4326 auto Shift = MIRBuilder.buildShl(Dst: AnyExtTy, Src0: SmallLoad, Src1: ShiftAmt);
4327
4328 if (AnyExtTy == DstTy)
4329 MIRBuilder.buildOr(Dst: DstReg, Src0: Shift, Src1: LargeLoad);
4330 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
4331 auto Or = MIRBuilder.buildOr(Dst: AnyExtTy, Src0: Shift, Src1: LargeLoad);
4332 MIRBuilder.buildTrunc(Res: DstReg, Op: {Or});
4333 } else {
4334 assert(DstTy.isPointer() && "expected pointer");
4335 auto Or = MIRBuilder.buildOr(Dst: AnyExtTy, Src0: Shift, Src1: LargeLoad);
4336
4337 // FIXME: We currently consider this to be illegal for non-integral address
4338 // spaces, but we need still need a way to reinterpret the bits.
4339 MIRBuilder.buildIntToPtr(Dst: DstReg, Src: Or);
4340 }
4341
4342 LoadMI.eraseFromParent();
4343 return Legalized;
4344}
4345
4346LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
4347 // Lower a non-power of 2 store into multiple pow-2 stores.
4348 // E.g. split an i24 store into an i16 store + i8 store.
4349 // We do this by first extending the stored value to the next largest power
4350 // of 2 type, and then using truncating stores to store the components.
4351 // By doing this, likewise with G_LOAD, generate an extend that can be
4352 // artifact-combined away instead of leaving behind extracts.
4353 Register SrcReg = StoreMI.getValueReg();
4354 Register PtrReg = StoreMI.getPointerReg();
4355 LLT SrcTy = MRI.getType(Reg: SrcReg);
4356 MachineFunction &MF = MIRBuilder.getMF();
4357 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4358 LLT MemTy = MMO.getMemoryType();
4359
4360 unsigned StoreWidth = MemTy.getSizeInBits();
4361 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4362
4363 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4364 // Promote to a byte-sized store with upper bits zero if not
4365 // storing an integral number of bytes. For example, promote
4366 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4367 LLT WideTy = LLT::integer(SizeInBits: StoreSizeInBits);
4368
4369 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4370 // Avoid creating a store with a narrower source than result.
4371 SrcReg = MIRBuilder.buildAnyExt(Res: WideTy, Op: SrcReg).getReg(Idx: 0);
4372 SrcTy = WideTy;
4373 }
4374
4375 auto ZextInReg = MIRBuilder.buildZExtInReg(Res: SrcTy, Op: SrcReg, ImmOp: StoreWidth);
4376
4377 MachineMemOperand *NewMMO =
4378 MF.getMachineMemOperand(MMO: &MMO, PtrInfo: MMO.getPointerInfo(), Ty: WideTy);
4379 MIRBuilder.buildStore(Val: ZextInReg, Addr: PtrReg, MMO&: *NewMMO);
4380 StoreMI.eraseFromParent();
4381 return Legalized;
4382 }
4383
4384 if (MemTy.isVector()) {
4385 if (MemTy != SrcTy)
4386 return scalarizeVectorBooleanStore(MI&: StoreMI);
4387
4388 // TODO: We can do better than scalarizing the vector and at least split it
4389 // in half.
4390 return reduceLoadStoreWidth(MI&: StoreMI, TypeIdx: 0, NarrowTy: SrcTy.getElementType());
4391 }
4392
4393 unsigned MemSizeInBits = MemTy.getSizeInBits();
4394 uint64_t LargeSplitSize, SmallSplitSize;
4395
4396 if (!isPowerOf2_32(Value: MemSizeInBits)) {
4397 LargeSplitSize = llvm::bit_floor<uint64_t>(Value: MemTy.getSizeInBits());
4398 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4399 } else {
4400 auto &Ctx = MF.getFunction().getContext();
4401 if (TLI.allowsMemoryAccess(Context&: Ctx, DL: MIRBuilder.getDataLayout(), Ty: MemTy, MMO))
4402 return UnableToLegalize; // Don't know what we're being asked to do.
4403
4404 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4405 }
4406
4407 // Extend to the next pow-2. If this store was itself the result of lowering,
4408 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4409 // that's wider than the stored size.
4410 unsigned AnyExtSize = PowerOf2Ceil(A: MemTy.getSizeInBits());
4411 const LLT NewSrcTy = LLT::integer(SizeInBits: AnyExtSize);
4412
4413 if (SrcTy.isPointer()) {
4414 const LLT IntPtrTy = LLT::integer(SizeInBits: SrcTy.getSizeInBits());
4415 SrcReg = MIRBuilder.buildPtrToInt(Dst: IntPtrTy, Src: SrcReg).getReg(Idx: 0);
4416 }
4417
4418 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(Res: NewSrcTy, Op: SrcReg);
4419
4420 // Obtain the smaller value by shifting away the larger value.
4421 auto ShiftAmt = MIRBuilder.buildConstant(Res: NewSrcTy, Val: LargeSplitSize);
4422 auto SmallVal = MIRBuilder.buildLShr(Dst: NewSrcTy, Src0: ExtVal, Src1: ShiftAmt);
4423
4424 // Generate the PtrAdd and truncating stores.
4425 LLT PtrTy = MRI.getType(Reg: PtrReg);
4426 auto OffsetCst = MIRBuilder.buildConstant(Res: LLT::integer(SizeInBits: PtrTy.getSizeInBits()),
4427 Val: LargeSplitSize / 8);
4428 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(Res: PtrTy, Op0: PtrReg, Op1: OffsetCst);
4429
4430 MachineMemOperand *LargeMMO =
4431 MF.getMachineMemOperand(MMO: &MMO, Offset: 0, Size: LargeSplitSize / 8);
4432 MachineMemOperand *SmallMMO =
4433 MF.getMachineMemOperand(MMO: &MMO, Offset: LargeSplitSize / 8, Size: SmallSplitSize / 8);
4434 MIRBuilder.buildStore(Val: ExtVal, Addr: PtrReg, MMO&: *LargeMMO);
4435 MIRBuilder.buildStore(Val: SmallVal, Addr: SmallPtr, MMO&: *SmallMMO);
4436 StoreMI.eraseFromParent();
4437 return Legalized;
4438}
4439
4440LegalizerHelper::LegalizeResult
4441LegalizerHelper::scalarizeVectorBooleanStore(GStore &StoreMI) {
4442 Register SrcReg = StoreMI.getValueReg();
4443 Register PtrReg = StoreMI.getPointerReg();
4444 LLT SrcTy = MRI.getType(Reg: SrcReg);
4445 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4446 LLT MemTy = MMO.getMemoryType();
4447 LLT MemScalarTy = MemTy.getElementType();
4448 MachineFunction &MF = MIRBuilder.getMF();
4449
4450 assert(SrcTy.isVector() && "Expect a vector store type");
4451
4452 if (!MemScalarTy.isByteSized()) {
4453 // We need to build an integer scalar of the vector bit pattern.
4454 // It's not legal for us to add padding when storing a vector.
4455 unsigned NumBits = MemTy.getSizeInBits();
4456 LLT IntTy = LLT::integer(SizeInBits: NumBits);
4457 auto CurrVal = MIRBuilder.buildConstant(Res: IntTy, Val: 0);
4458 LLT IdxTy = TLI.getVectorIdxLLT(DL: MF.getDataLayout());
4459
4460 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4461 auto Elt = MIRBuilder.buildExtractVectorElement(
4462 Res: SrcTy.getElementType(), Val: SrcReg, Idx: MIRBuilder.buildConstant(Res: IdxTy, Val: I));
4463 auto Trunc = MIRBuilder.buildTrunc(Res: MemScalarTy, Op: Elt);
4464 auto ZExt = MIRBuilder.buildZExt(Res: IntTy, Op: Trunc);
4465 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4466 ? (MemTy.getNumElements() - 1) - I
4467 : I;
4468 auto ShiftAmt = MIRBuilder.buildConstant(
4469 Res: IntTy, Val: ShiftIntoIdx * MemScalarTy.getSizeInBits());
4470 auto Shifted = MIRBuilder.buildShl(Dst: IntTy, Src0: ZExt, Src1: ShiftAmt);
4471 CurrVal = MIRBuilder.buildOr(Dst: IntTy, Src0: CurrVal, Src1: Shifted);
4472 }
4473 auto PtrInfo = MMO.getPointerInfo();
4474 auto *NewMMO = MF.getMachineMemOperand(MMO: &MMO, PtrInfo, Ty: IntTy);
4475 MIRBuilder.buildStore(Val: CurrVal, Addr: PtrReg, MMO&: *NewMMO);
4476 StoreMI.eraseFromParent();
4477 return Legalized;
4478 }
4479
4480 // TODO: implement simple scalarization.
4481 return UnableToLegalize;
4482}
4483
4484LegalizerHelper::LegalizeResult
4485LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
4486 switch (MI.getOpcode()) {
4487 case TargetOpcode::G_LOAD: {
4488 if (TypeIdx != 0)
4489 return UnableToLegalize;
4490 MachineMemOperand &MMO = **MI.memoperands_begin();
4491
4492 // Not sure how to interpret a bitcast of an extending load.
4493 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4494 return UnableToLegalize;
4495
4496 Observer.changingInstr(MI);
4497 bitcastDst(MI, CastTy, OpIdx: 0);
4498 MMO.setType(CastTy);
4499 // The range metadata is no longer valid when reinterpreted as a different
4500 // type.
4501 MMO.clearRanges();
4502 Observer.changedInstr(MI);
4503 return Legalized;
4504 }
4505 case TargetOpcode::G_STORE: {
4506 if (TypeIdx != 0)
4507 return UnableToLegalize;
4508
4509 MachineMemOperand &MMO = **MI.memoperands_begin();
4510
4511 // Not sure how to interpret a bitcast of a truncating store.
4512 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4513 return UnableToLegalize;
4514
4515 Observer.changingInstr(MI);
4516 bitcastSrc(MI, CastTy, OpIdx: 0);
4517 MMO.setType(CastTy);
4518 Observer.changedInstr(MI);
4519 return Legalized;
4520 }
4521 case TargetOpcode::G_SELECT: {
4522 if (TypeIdx != 0)
4523 return UnableToLegalize;
4524
4525 if (MRI.getType(Reg: MI.getOperand(i: 1).getReg()).isVector()) {
4526 LLVM_DEBUG(
4527 dbgs() << "bitcast action not implemented for vector select\n");
4528 return UnableToLegalize;
4529 }
4530
4531 Observer.changingInstr(MI);
4532 bitcastSrc(MI, CastTy, OpIdx: 2);
4533 bitcastSrc(MI, CastTy, OpIdx: 3);
4534 bitcastDst(MI, CastTy, OpIdx: 0);
4535 Observer.changedInstr(MI);
4536 return Legalized;
4537 }
4538 case TargetOpcode::G_AND:
4539 case TargetOpcode::G_OR:
4540 case TargetOpcode::G_XOR: {
4541 Observer.changingInstr(MI);
4542 bitcastSrc(MI, CastTy, OpIdx: 1);
4543 bitcastSrc(MI, CastTy, OpIdx: 2);
4544 bitcastDst(MI, CastTy, OpIdx: 0);
4545 Observer.changedInstr(MI);
4546 return Legalized;
4547 }
4548 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4549 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
4550 case TargetOpcode::G_INSERT_VECTOR_ELT:
4551 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
4552 case TargetOpcode::G_CONCAT_VECTORS:
4553 return bitcastConcatVector(MI, TypeIdx, CastTy);
4554 case TargetOpcode::G_SHUFFLE_VECTOR:
4555 return bitcastShuffleVector(MI, TypeIdx, CastTy);
4556 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4557 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
4558 case TargetOpcode::G_INSERT_SUBVECTOR:
4559 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
4560 default:
4561 return UnableToLegalize;
4562 }
4563}
4564
4565// Legalize an instruction by changing the opcode in place.
4566void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4567 Observer.changingInstr(MI);
4568 MI.setDesc(MIRBuilder.getTII().get(Opcode: NewOpcode));
4569 Observer.changedInstr(MI);
4570}
4571
4572LegalizerHelper::LegalizeResult
4573LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4574 using namespace TargetOpcode;
4575
4576 switch(MI.getOpcode()) {
4577 default:
4578 return UnableToLegalize;
4579 case TargetOpcode::G_FCONSTANT:
4580 return lowerFConstant(MI);
4581 case TargetOpcode::G_BITCAST:
4582 return lowerBitcast(MI);
4583 case TargetOpcode::G_SREM:
4584 case TargetOpcode::G_UREM: {
4585 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
4586 auto Quot =
4587 MIRBuilder.buildInstr(Opc: MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, DstOps: {Ty},
4588 SrcOps: {MI.getOperand(i: 1), MI.getOperand(i: 2)});
4589
4590 auto Prod = MIRBuilder.buildMul(Dst: Ty, Src0: Quot, Src1: MI.getOperand(i: 2));
4591 MIRBuilder.buildSub(Dst: MI.getOperand(i: 0), Src0: MI.getOperand(i: 1), Src1: Prod);
4592 MI.eraseFromParent();
4593 return Legalized;
4594 }
4595 case TargetOpcode::G_SADDO:
4596 case TargetOpcode::G_SSUBO:
4597 return lowerSADDO_SSUBO(MI);
4598 case TargetOpcode::G_SADDE:
4599 return lowerSADDE(MI);
4600 case TargetOpcode::G_SSUBE:
4601 return lowerSSUBE(MI);
4602 case TargetOpcode::G_UMULH:
4603 case TargetOpcode::G_SMULH:
4604 return lowerSMULH_UMULH(MI);
4605 case TargetOpcode::G_SMULO:
4606 case TargetOpcode::G_UMULO: {
4607 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4608 // result.
4609 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
4610 LLT Ty = MRI.getType(Reg: Res);
4611
4612 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4613 ? TargetOpcode::G_SMULH
4614 : TargetOpcode::G_UMULH;
4615
4616 Observer.changingInstr(MI);
4617 const auto &TII = MIRBuilder.getTII();
4618 MI.setDesc(TII.get(Opcode: TargetOpcode::G_MUL));
4619 MI.removeOperand(OpNo: 1);
4620 Observer.changedInstr(MI);
4621
4622 auto HiPart = MIRBuilder.buildInstr(Opc: Opcode, DstOps: {Ty}, SrcOps: {LHS, RHS});
4623 auto Zero = MIRBuilder.buildConstant(Res: Ty, Val: 0);
4624
4625 // Move insert point forward so we can use the Res register if needed.
4626 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: ++MIRBuilder.getInsertPt());
4627
4628 // For *signed* multiply, overflow is detected by checking:
4629 // (hi != (lo >> bitwidth-1))
4630 if (Opcode == TargetOpcode::G_SMULH) {
4631 auto ShiftAmt = MIRBuilder.buildConstant(Res: Ty, Val: Ty.getSizeInBits() - 1);
4632 auto Shifted = MIRBuilder.buildAShr(Dst: Ty, Src0: Res, Src1: ShiftAmt);
4633 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_NE, Res: Overflow, Op0: HiPart, Op1: Shifted);
4634 } else {
4635 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_NE, Res: Overflow, Op0: HiPart, Op1: Zero);
4636 }
4637 return Legalized;
4638 }
4639 case TargetOpcode::G_FNEG: {
4640 auto [Res, ResTy, SubByReg, SubByRegTy] = MI.getFirst2RegLLTs();
4641 LLT TyInt =
4642 ResTy.changeElementType(NewEltTy: LLT::integer(SizeInBits: ResTy.getScalarSizeInBits()));
4643 Register CastedSubByReg = SubByReg;
4644
4645 if (!SubByRegTy.getScalarType().isAnyScalar() &&
4646 !SubByRegTy.getScalarType().isInteger()) {
4647 auto BitcastDst = SubByRegTy.changeElementType(
4648 NewEltTy: LLT::integer(SizeInBits: SubByRegTy.getScalarSizeInBits()));
4649 CastedSubByReg = MIRBuilder.buildBitcast(Dst: BitcastDst, Src: SubByReg).getReg(Idx: 0);
4650 }
4651
4652 auto SignMask = MIRBuilder.buildConstant(
4653 Res: TyInt, Val: APInt::getSignMask(BitWidth: TyInt.getScalarSizeInBits()));
4654
4655 if (ResTy != TyInt) {
4656 Register NewDst =
4657 MIRBuilder.buildXor(Dst: TyInt, Src0: CastedSubByReg, Src1: SignMask).getReg(Idx: 0);
4658 MIRBuilder.buildBitcast(Dst: Res, Src: NewDst);
4659 } else
4660 MIRBuilder.buildXor(Dst: Res, Src0: CastedSubByReg, Src1: SignMask).getReg(Idx: 0);
4661
4662 MI.eraseFromParent();
4663 return Legalized;
4664 }
4665 case TargetOpcode::G_FSUB:
4666 case TargetOpcode::G_STRICT_FSUB: {
4667 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4668 LLT Ty = MRI.getType(Reg: Res);
4669
4670 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4671 auto Neg = MIRBuilder.buildFNeg(Dst: Ty, Src0: RHS);
4672
4673 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4674 MIRBuilder.buildStrictFAdd(Dst: Res, Src0: LHS, Src1: Neg, Flags: MI.getFlags());
4675 else
4676 MIRBuilder.buildFAdd(Dst: Res, Src0: LHS, Src1: Neg, Flags: MI.getFlags());
4677
4678 MI.eraseFromParent();
4679 return Legalized;
4680 }
4681 case TargetOpcode::G_FMAD:
4682 return lowerFMad(MI);
4683 case TargetOpcode::G_FFLOOR:
4684 return lowerFFloor(MI);
4685 case TargetOpcode::G_LROUND:
4686 case TargetOpcode::G_LLROUND: {
4687 Register DstReg = MI.getOperand(i: 0).getReg();
4688 Register SrcReg = MI.getOperand(i: 1).getReg();
4689 LLT SrcTy = MRI.getType(Reg: SrcReg);
4690 auto Round = MIRBuilder.buildInstr(Opc: TargetOpcode::G_INTRINSIC_ROUND, DstOps: {SrcTy},
4691 SrcOps: {SrcReg});
4692 MIRBuilder.buildFPTOSI(Dst: DstReg, Src0: Round);
4693 MI.eraseFromParent();
4694 return Legalized;
4695 }
4696 case TargetOpcode::G_INTRINSIC_ROUND:
4697 return lowerIntrinsicRound(MI);
4698 case TargetOpcode::G_FRINT: {
4699 // Since round even is the assumed rounding mode for unconstrained FP
4700 // operations, rint and roundeven are the same operation.
4701 changeOpcode(MI, NewOpcode: TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4702 return Legalized;
4703 }
4704 case TargetOpcode::G_INTRINSIC_LRINT:
4705 case TargetOpcode::G_INTRINSIC_LLRINT: {
4706 Register DstReg = MI.getOperand(i: 0).getReg();
4707 Register SrcReg = MI.getOperand(i: 1).getReg();
4708 LLT SrcTy = MRI.getType(Reg: SrcReg);
4709 auto Round =
4710 MIRBuilder.buildInstr(Opc: TargetOpcode::G_FRINT, DstOps: {SrcTy}, SrcOps: {SrcReg});
4711 MIRBuilder.buildFPTOSI(Dst: DstReg, Src0: Round);
4712 MI.eraseFromParent();
4713 return Legalized;
4714 }
4715 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4716 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4717 Register NewOldValRes = MRI.cloneVirtualRegister(VReg: OldValRes);
4718 MIRBuilder.buildAtomicCmpXchg(OldValRes: NewOldValRes, Addr, CmpVal, NewVal,
4719 MMO&: **MI.memoperands_begin());
4720 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_EQ, Res: SuccessRes, Op0: NewOldValRes, Op1: CmpVal);
4721 MIRBuilder.buildCopy(Res: OldValRes, Op: NewOldValRes);
4722 MI.eraseFromParent();
4723 return Legalized;
4724 }
4725 case TargetOpcode::G_LOAD:
4726 case TargetOpcode::G_SEXTLOAD:
4727 case TargetOpcode::G_ZEXTLOAD:
4728 return lowerLoad(LoadMI&: cast<GAnyLoad>(Val&: MI));
4729 case TargetOpcode::G_STORE:
4730 return lowerStore(StoreMI&: cast<GStore>(Val&: MI));
4731 case TargetOpcode::G_CTLZ_ZERO_POISON:
4732 case TargetOpcode::G_CTTZ_ZERO_POISON:
4733 case TargetOpcode::G_CTLZ:
4734 case TargetOpcode::G_CTTZ:
4735 case TargetOpcode::G_CTPOP:
4736 case TargetOpcode::G_CTLS:
4737 return lowerBitCount(MI);
4738 case G_UADDO: {
4739 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4740
4741 Register NewRes = MRI.cloneVirtualRegister(VReg: Res);
4742
4743 MIRBuilder.buildAdd(Dst: NewRes, Src0: LHS, Src1: RHS);
4744 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_ULT, Res: CarryOut, Op0: NewRes, Op1: RHS);
4745
4746 MIRBuilder.buildCopy(Res, Op: NewRes);
4747
4748 MI.eraseFromParent();
4749 return Legalized;
4750 }
4751 case G_UADDE: {
4752 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4753 const LLT CondTy = MRI.getType(Reg: CarryOut);
4754 const LLT Ty = MRI.getType(Reg: Res);
4755
4756 Register NewRes = MRI.cloneVirtualRegister(VReg: Res);
4757
4758 // Initial add of the two operands.
4759 auto TmpRes = MIRBuilder.buildAdd(Dst: Ty, Src0: LHS, Src1: RHS);
4760
4761 // Initial check for carry.
4762 auto Carry = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_ULT, Res: CondTy, Op0: TmpRes, Op1: LHS);
4763
4764 // Add the sum and the carry.
4765 auto ZExtCarryIn = MIRBuilder.buildZExt(Res: Ty, Op: CarryIn);
4766 MIRBuilder.buildAdd(Dst: NewRes, Src0: TmpRes, Src1: ZExtCarryIn);
4767
4768 // Second check for carry. We can only carry if the initial sum is all 1s
4769 // and the carry is set, resulting in a new sum of 0.
4770 auto Zero = MIRBuilder.buildConstant(Res: Ty, Val: 0);
4771 auto ResEqZero =
4772 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_EQ, Res: CondTy, Op0: NewRes, Op1: Zero);
4773 auto Carry2 = MIRBuilder.buildAnd(Dst: CondTy, Src0: ResEqZero, Src1: CarryIn);
4774 MIRBuilder.buildOr(Dst: CarryOut, Src0: Carry, Src1: Carry2);
4775
4776 MIRBuilder.buildCopy(Res, Op: NewRes);
4777
4778 MI.eraseFromParent();
4779 return Legalized;
4780 }
4781 case G_USUBO: {
4782 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4783
4784 MIRBuilder.buildSub(Dst: Res, Src0: LHS, Src1: RHS);
4785 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_ULT, Res: BorrowOut, Op0: LHS, Op1: RHS);
4786
4787 MI.eraseFromParent();
4788 return Legalized;
4789 }
4790 case G_USUBE: {
4791 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4792 const LLT CondTy = MRI.getType(Reg: BorrowOut);
4793 const LLT Ty = MRI.getType(Reg: Res);
4794
4795 // Initial subtract of the two operands.
4796 auto TmpRes = MIRBuilder.buildSub(Dst: Ty, Src0: LHS, Src1: RHS);
4797
4798 // Initial check for borrow.
4799 auto Borrow = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_UGT, Res: CondTy, Op0: TmpRes, Op1: LHS);
4800
4801 // Subtract the borrow from the first subtract.
4802 auto ZExtBorrowIn = MIRBuilder.buildZExt(Res: Ty, Op: BorrowIn);
4803 MIRBuilder.buildSub(Dst: Res, Src0: TmpRes, Src1: ZExtBorrowIn);
4804
4805 // Second check for borrow. We can only borrow if the initial difference is
4806 // 0 and the borrow is set, resulting in a new difference of all 1s.
4807 auto Zero = MIRBuilder.buildConstant(Res: Ty, Val: 0);
4808 auto TmpResEqZero =
4809 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_EQ, Res: CondTy, Op0: TmpRes, Op1: Zero);
4810 auto Borrow2 = MIRBuilder.buildAnd(Dst: CondTy, Src0: TmpResEqZero, Src1: BorrowIn);
4811 MIRBuilder.buildOr(Dst: BorrowOut, Src0: Borrow, Src1: Borrow2);
4812
4813 MI.eraseFromParent();
4814 return Legalized;
4815 }
4816 case G_UITOFP:
4817 return lowerUITOFP(MI);
4818 case G_SITOFP:
4819 return lowerSITOFP(MI);
4820 case G_FPTOUI:
4821 return lowerFPTOUI(MI);
4822 case G_FPTOSI:
4823 return lowerFPTOSI(MI);
4824 case G_FPTOUI_SAT:
4825 case G_FPTOSI_SAT:
4826 return lowerFPTOINT_SAT(MI);
4827 case G_FPEXT:
4828 return lowerFPExtAndTruncMem(MI);
4829 case G_FPTRUNC:
4830 return lowerFPTRUNC(MI);
4831 case G_FPOWI:
4832 return lowerFPOWI(MI);
4833 case G_FMODF:
4834 return lowerFMODF(MI);
4835 case G_SMIN:
4836 case G_SMAX:
4837 case G_UMIN:
4838 case G_UMAX:
4839 return lowerMinMax(MI);
4840 case G_SCMP:
4841 case G_UCMP:
4842 return lowerThreewayCompare(MI);
4843 case G_FCOPYSIGN:
4844 return lowerFCopySign(MI);
4845 case G_FMINNUM:
4846 case G_FMAXNUM:
4847 case G_FMINIMUMNUM:
4848 case G_FMAXIMUMNUM:
4849 return lowerFMinNumMaxNum(MI);
4850 case G_FMINIMUM:
4851 case G_FMAXIMUM:
4852 return lowerFMinimumMaximum(MI);
4853 case G_MERGE_VALUES:
4854 return lowerMergeValues(MI);
4855 case G_UNMERGE_VALUES:
4856 return lowerUnmergeValues(MI);
4857 case TargetOpcode::G_SEXT_INREG: {
4858 assert(MI.getOperand(2).isImm() && "Expected immediate");
4859 int64_t SizeInBits = MI.getOperand(i: 2).getImm();
4860
4861 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4862 LLT DstTy = MRI.getType(Reg: DstReg);
4863 Register TmpRes = MRI.createGenericVirtualRegister(Ty: DstTy);
4864
4865 auto MIBSz = MIRBuilder.buildConstant(Res: DstTy, Val: DstTy.getScalarSizeInBits() - SizeInBits);
4866 MIRBuilder.buildShl(Dst: TmpRes, Src0: SrcReg, Src1: MIBSz->getOperand(i: 0));
4867 MIRBuilder.buildAShr(Dst: DstReg, Src0: TmpRes, Src1: MIBSz->getOperand(i: 0));
4868 MI.eraseFromParent();
4869 return Legalized;
4870 }
4871 case G_EXTRACT_VECTOR_ELT:
4872 case G_INSERT_VECTOR_ELT:
4873 return lowerExtractInsertVectorElt(MI);
4874 case G_SHUFFLE_VECTOR:
4875 return lowerShuffleVector(MI);
4876 case G_VECTOR_COMPRESS:
4877 return lowerVECTOR_COMPRESS(MI);
4878 case G_DYN_STACKALLOC:
4879 return lowerDynStackAlloc(MI);
4880 case G_STACKSAVE:
4881 return lowerStackSave(MI);
4882 case G_STACKRESTORE:
4883 return lowerStackRestore(MI);
4884 case G_EXTRACT:
4885 return lowerExtract(MI);
4886 case G_INSERT:
4887 return lowerInsert(MI);
4888 case G_BSWAP:
4889 return lowerBswap(MI);
4890 case G_BITREVERSE:
4891 return lowerBitreverse(MI);
4892 case G_READ_REGISTER:
4893 case G_WRITE_REGISTER:
4894 return lowerReadWriteRegister(MI);
4895 case G_UADDSAT:
4896 case G_USUBSAT: {
4897 // Try to make a reasonable guess about which lowering strategy to use. The
4898 // target can override this with custom lowering and calling the
4899 // implementation functions.
4900 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
4901 if (LI.isLegalOrCustom(Query: {G_UMIN, Ty}))
4902 return lowerAddSubSatToMinMax(MI);
4903 return lowerAddSubSatToAddoSubo(MI);
4904 }
4905 case G_SADDSAT:
4906 case G_SSUBSAT: {
4907 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
4908
4909 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4910 // since it's a shorter expansion. However, we would need to figure out the
4911 // preferred boolean type for the carry out for the query.
4912 if (LI.isLegalOrCustom(Query: {G_SMIN, Ty}) && LI.isLegalOrCustom(Query: {G_SMAX, Ty}))
4913 return lowerAddSubSatToMinMax(MI);
4914 return lowerAddSubSatToAddoSubo(MI);
4915 }
4916 case G_SSHLSAT:
4917 case G_USHLSAT:
4918 return lowerShlSat(MI);
4919 case G_ABS:
4920 return lowerAbsToAddXor(MI);
4921 case G_ABDS:
4922 case G_ABDU: {
4923 bool IsSigned = MI.getOpcode() == G_ABDS;
4924 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
4925 if ((IsSigned && LI.isLegal(Query: {G_SMIN, Ty}) && LI.isLegal(Query: {G_SMAX, Ty})) ||
4926 (!IsSigned && LI.isLegal(Query: {G_UMIN, Ty}) && LI.isLegal(Query: {G_UMAX, Ty}))) {
4927 return lowerAbsDiffToMinMax(MI);
4928 }
4929 return lowerAbsDiffToSelect(MI);
4930 }
4931 case G_FABS:
4932 return lowerFAbs(MI);
4933 case G_SELECT:
4934 return lowerSelect(MI);
4935 case G_IS_FPCLASS:
4936 return lowerISFPCLASS(MI);
4937 case G_SDIVREM:
4938 case G_UDIVREM:
4939 return lowerDIVREM(MI);
4940 case G_FSHL:
4941 case G_FSHR:
4942 return lowerFunnelShift(MI);
4943 case G_ROTL:
4944 case G_ROTR:
4945 return lowerRotate(MI);
4946 case G_MEMSET:
4947 case G_MEMCPY:
4948 case G_MEMMOVE:
4949 case G_MEMCPY_INLINE:
4950 case G_MEMSET_INLINE:
4951 return lowerMemCpyFamily(MI);
4952 case G_ZEXT:
4953 case G_SEXT:
4954 case G_ANYEXT:
4955 return lowerEXT(MI);
4956 case G_TRUNC:
4957 return lowerTRUNC(MI);
4958 GISEL_VECREDUCE_CASES_NONSEQ
4959 return lowerVectorReduction(MI);
4960 case G_VAARG:
4961 return lowerVAArg(MI);
4962 case G_ATOMICRMW_SUB: {
4963 auto [Ret, Mem, Val] = MI.getFirst3Regs();
4964 const LLT ValTy = MRI.getType(Reg: Val);
4965 MachineMemOperand *MMO = *MI.memoperands_begin();
4966
4967 auto VNeg = MIRBuilder.buildNeg(Dst: ValTy, Src0: Val);
4968 MIRBuilder.buildAtomicRMW(Opcode: G_ATOMICRMW_ADD, OldValRes: Ret, Addr: Mem, Val: VNeg, MMO&: *MMO);
4969 MI.eraseFromParent();
4970 return Legalized;
4971 }
4972 case G_SMULFIX:
4973 case G_UMULFIX:
4974 return lowerMulfix(MI);
4975 }
4976}
4977
4978Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
4979 Align MinAlign) const {
4980 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4981 // datalayout for the preferred alignment. Also there should be a target hook
4982 // for this to allow targets to reduce the alignment and ignore the
4983 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4984 // the type.
4985 return std::max(a: Align(PowerOf2Ceil(A: Ty.getSizeInBytes())), b: MinAlign);
4986}
4987
4988MachineInstrBuilder
4989LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
4990 MachinePointerInfo &PtrInfo) {
4991 MachineFunction &MF = MIRBuilder.getMF();
4992 const DataLayout &DL = MIRBuilder.getDataLayout();
4993 int FrameIdx = MF.getFrameInfo().CreateStackObject(Size: Bytes, Alignment, isSpillSlot: false);
4994
4995 unsigned AddrSpace = DL.getAllocaAddrSpace();
4996 LLT FramePtrTy = LLT::pointer(AddressSpace: AddrSpace, SizeInBits: DL.getPointerSizeInBits(AS: AddrSpace));
4997
4998 PtrInfo = MachinePointerInfo::getFixedStack(MF, FI: FrameIdx);
4999 return MIRBuilder.buildFrameIndex(Res: FramePtrTy, Idx: FrameIdx);
5000}
5001
5002MachineInstrBuilder LegalizerHelper::createStackStoreLoad(const DstOp &Res,
5003 const SrcOp &Val) {
5004 LLT SrcTy = Val.getLLTTy(MRI);
5005 Align StackTypeAlign =
5006 std::max(a: getStackTemporaryAlignment(Ty: SrcTy),
5007 b: getStackTemporaryAlignment(Ty: Res.getLLTTy(MRI)));
5008 MachinePointerInfo PtrInfo;
5009 auto StackTemp =
5010 createStackTemporary(Bytes: SrcTy.getSizeInBytes(), Alignment: StackTypeAlign, PtrInfo);
5011
5012 MIRBuilder.buildStore(Val, Addr: StackTemp, PtrInfo, Alignment: StackTypeAlign);
5013 return MIRBuilder.buildLoad(Res, Addr: StackTemp, PtrInfo, Alignment: StackTypeAlign);
5014}
5015
5016static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg,
5017 LLT VecTy) {
5018 LLT IdxTy = B.getMRI()->getType(Reg: IdxReg);
5019 unsigned NElts = VecTy.getNumElements();
5020
5021 int64_t IdxVal;
5022 if (mi_match(R: IdxReg, MRI: *B.getMRI(), P: m_ICst(Cst&: IdxVal))) {
5023 if (IdxVal < VecTy.getNumElements())
5024 return IdxReg;
5025 // If a constant index would be out of bounds, clamp it as well.
5026 }
5027
5028 if (isPowerOf2_32(Value: NElts)) {
5029 APInt Imm = APInt::getLowBitsSet(numBits: IdxTy.getSizeInBits(), loBitsSet: Log2_32(Value: NElts));
5030 return B.buildAnd(Dst: IdxTy, Src0: IdxReg, Src1: B.buildConstant(Res: IdxTy, Val: Imm)).getReg(Idx: 0);
5031 }
5032
5033 return B.buildUMin(Dst: IdxTy, Src0: IdxReg, Src1: B.buildConstant(Res: IdxTy, Val: NElts - 1))
5034 .getReg(Idx: 0);
5035}
5036
5037Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
5038 Register Index) {
5039 LLT EltTy = VecTy.getElementType();
5040
5041 // Calculate the element offset and add it to the pointer.
5042 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
5043 assert(EltSize * 8 == EltTy.getSizeInBits() &&
5044 "Converting bits to bytes lost precision");
5045
5046 Index = clampVectorIndex(B&: MIRBuilder, IdxReg: Index, VecTy);
5047
5048 // Convert index to the correct size for the address space.
5049 const DataLayout &DL = MIRBuilder.getDataLayout();
5050 unsigned AS = MRI.getType(Reg: VecPtr).getAddressSpace();
5051 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
5052 LLT IdxTy = MRI.getType(Reg: Index).changeElementSize(NewEltSize: IndexSizeInBits);
5053 if (IdxTy != MRI.getType(Reg: Index))
5054 Index = MIRBuilder.buildSExtOrTrunc(Res: IdxTy, Op: Index).getReg(Idx: 0);
5055
5056 auto Mul = MIRBuilder.buildMul(Dst: IdxTy, Src0: Index,
5057 Src1: MIRBuilder.buildConstant(Res: IdxTy, Val: EltSize));
5058
5059 LLT PtrTy = MRI.getType(Reg: VecPtr);
5060 return MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: VecPtr, Op1: Mul).getReg(Idx: 0);
5061}
5062
5063#ifndef NDEBUG
5064/// Check that all vector operands have same number of elements. Other operands
5065/// should be listed in NonVecOp.
5066static bool hasSameNumEltsOnAllVectorOperands(
5067 GenericMachineInstr &MI, MachineRegisterInfo &MRI,
5068 std::initializer_list<unsigned> NonVecOpIndices) {
5069 if (MI.getNumMemOperands() != 0)
5070 return false;
5071
5072 LLT VecTy = MRI.getType(MI.getReg(0));
5073 if (!VecTy.isVector())
5074 return false;
5075 unsigned NumElts = VecTy.getNumElements();
5076
5077 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
5078 MachineOperand &Op = MI.getOperand(OpIdx);
5079 if (!Op.isReg()) {
5080 if (!is_contained(NonVecOpIndices, OpIdx))
5081 return false;
5082 continue;
5083 }
5084
5085 LLT Ty = MRI.getType(Op.getReg());
5086 if (!Ty.isVector()) {
5087 if (!is_contained(NonVecOpIndices, OpIdx))
5088 return false;
5089 continue;
5090 }
5091
5092 if (Ty.getNumElements() != NumElts)
5093 return false;
5094 }
5095
5096 return true;
5097}
5098#endif
5099
5100/// Fill \p DstOps with DstOps that have same number of elements combined as
5101/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
5102/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
5103/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
5104static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
5105 unsigned NumElts) {
5106 LLT LeftoverTy;
5107 assert(Ty.isVector() && "Expected vector type");
5108 LLT NarrowTy = Ty.changeElementCount(EC: ElementCount::getFixed(MinVal: NumElts));
5109 int NumParts, NumLeftover;
5110 std::tie(args&: NumParts, args&: NumLeftover) =
5111 getNarrowTypeBreakDown(OrigTy: Ty, NarrowTy, LeftoverTy);
5112
5113 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
5114 for (int i = 0; i < NumParts; ++i) {
5115 DstOps.push_back(Elt: NarrowTy);
5116 }
5117
5118 if (LeftoverTy.isValid()) {
5119 assert(NumLeftover == 1 && "expected exactly one leftover");
5120 DstOps.push_back(Elt: LeftoverTy);
5121 }
5122}
5123
5124/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
5125/// made from \p Op depending on operand type.
5126static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
5127 MachineOperand &Op) {
5128 for (unsigned i = 0; i < N; ++i) {
5129 if (Op.isReg())
5130 Ops.push_back(Elt: Op.getReg());
5131 else if (Op.isImm())
5132 Ops.push_back(Elt: Op.getImm());
5133 else if (Op.isPredicate())
5134 Ops.push_back(Elt: static_cast<CmpInst::Predicate>(Op.getPredicate()));
5135 else
5136 llvm_unreachable("Unsupported type");
5137 }
5138}
5139
5140// Handle splitting vector operations which need to have the same number of
5141// elements in each type index, but each type index may have a different element
5142// type.
5143//
5144// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
5145// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5146// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5147//
5148// Also handles some irregular breakdown cases, e.g.
5149// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
5150// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5151// s64 = G_SHL s64, s32
5152LegalizerHelper::LegalizeResult
5153LegalizerHelper::fewerElementsVectorMultiEltType(
5154 GenericMachineInstr &MI, unsigned NumElts,
5155 std::initializer_list<unsigned> NonVecOpIndices) {
5156 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
5157 "Non-compatible opcode or not specified non-vector operands");
5158 unsigned OrigNumElts = MRI.getType(Reg: MI.getReg(Idx: 0)).getNumElements();
5159
5160 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5161 unsigned NumDefs = MI.getNumDefs();
5162
5163 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
5164 // Build instructions with DstOps to use instruction found by CSE directly.
5165 // CSE copies found instruction into given vreg when building with vreg dest.
5166 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
5167 // Output registers will be taken from created instructions.
5168 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
5169 for (unsigned i = 0; i < NumDefs; ++i) {
5170 makeDstOps(DstOps&: OutputOpsPieces[i], Ty: MRI.getType(Reg: MI.getReg(Idx: i)), NumElts);
5171 }
5172
5173 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
5174 // Operands listed in NonVecOpIndices will be used as is without splitting;
5175 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
5176 // scalar condition (op 1), immediate in sext_inreg (op 2).
5177 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
5178 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5179 ++UseIdx, ++UseNo) {
5180 if (is_contained(Set: NonVecOpIndices, Element: UseIdx)) {
5181 broadcastSrcOp(Ops&: InputOpsPieces[UseNo], N: OutputOpsPieces[0].size(),
5182 Op&: MI.getOperand(i: UseIdx));
5183 } else {
5184 SmallVector<Register, 8> SplitPieces;
5185 extractVectorParts(Reg: MI.getReg(Idx: UseIdx), NumElts, VRegs&: SplitPieces, MIRBuilder,
5186 MRI);
5187 llvm::append_range(C&: InputOpsPieces[UseNo], R&: SplitPieces);
5188 }
5189 }
5190
5191 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5192
5193 // Take i-th piece of each input operand split and build sub-vector/scalar
5194 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
5195 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5196 SmallVector<DstOp, 2> Defs;
5197 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5198 Defs.push_back(Elt: OutputOpsPieces[DstNo][i]);
5199
5200 SmallVector<SrcOp, 3> Uses;
5201 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5202 Uses.push_back(Elt: InputOpsPieces[InputNo][i]);
5203
5204 auto I = MIRBuilder.buildInstr(Opc: MI.getOpcode(), DstOps: Defs, SrcOps: Uses, Flags: MI.getFlags());
5205 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5206 OutputRegs[DstNo].push_back(Elt: I.getReg(Idx: DstNo));
5207 }
5208
5209 // Merge small outputs into MI's output for each def operand.
5210 if (NumLeftovers) {
5211 for (unsigned i = 0; i < NumDefs; ++i)
5212 mergeMixedSubvectors(DstReg: MI.getReg(Idx: i), PartRegs: OutputRegs[i]);
5213 } else {
5214 for (unsigned i = 0; i < NumDefs; ++i)
5215 MIRBuilder.buildMergeLikeInstr(Res: MI.getReg(Idx: i), Ops: OutputRegs[i]);
5216 }
5217
5218 MI.eraseFromParent();
5219 return Legalized;
5220}
5221
5222LegalizerHelper::LegalizeResult
5223LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI,
5224 unsigned NumElts) {
5225 unsigned OrigNumElts = MRI.getType(Reg: MI.getReg(Idx: 0)).getNumElements();
5226
5227 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5228 unsigned NumDefs = MI.getNumDefs();
5229
5230 SmallVector<DstOp, 8> OutputOpsPieces;
5231 SmallVector<Register, 8> OutputRegs;
5232 makeDstOps(DstOps&: OutputOpsPieces, Ty: MRI.getType(Reg: MI.getReg(Idx: 0)), NumElts);
5233
5234 // Instructions that perform register split will be inserted in basic block
5235 // where register is defined (basic block is in the next operand).
5236 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
5237 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5238 UseIdx += 2, ++UseNo) {
5239 MachineBasicBlock &OpMBB = *MI.getOperand(i: UseIdx + 1).getMBB();
5240 MIRBuilder.setInsertPt(MBB&: OpMBB, II: OpMBB.getFirstTerminatorForward());
5241 extractVectorParts(Reg: MI.getReg(Idx: UseIdx), NumElts, VRegs&: InputOpsPieces[UseNo],
5242 MIRBuilder, MRI);
5243 }
5244
5245 // Build PHIs with fewer elements.
5246 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5247 MIRBuilder.setInsertPt(MBB&: *MI.getParent(), II: MI);
5248 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5249 auto Phi = MIRBuilder.buildInstr(Opcode: TargetOpcode::G_PHI);
5250 Phi.addDef(
5251 RegNo: MRI.createGenericVirtualRegister(Ty: OutputOpsPieces[i].getLLTTy(MRI)));
5252 OutputRegs.push_back(Elt: Phi.getReg(Idx: 0));
5253
5254 for (unsigned j = 0; j < NumInputs / 2; ++j) {
5255 Phi.addUse(RegNo: InputOpsPieces[j][i]);
5256 Phi.add(MO: MI.getOperand(i: 1 + j * 2 + 1));
5257 }
5258 }
5259
5260 // Set the insert point after the existing PHIs
5261 MachineBasicBlock &MBB = *MI.getParent();
5262 MIRBuilder.setInsertPt(MBB, II: MBB.getFirstNonPHI());
5263
5264 // Merge small outputs into MI's def.
5265 if (NumLeftovers) {
5266 mergeMixedSubvectors(DstReg: MI.getReg(Idx: 0), PartRegs: OutputRegs);
5267 } else {
5268 MIRBuilder.buildMergeLikeInstr(Res: MI.getReg(Idx: 0), Ops: OutputRegs);
5269 }
5270
5271 MI.eraseFromParent();
5272 return Legalized;
5273}
5274
5275LegalizerHelper::LegalizeResult
5276LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
5277 unsigned TypeIdx,
5278 LLT NarrowTy) {
5279 const int NumDst = MI.getNumOperands() - 1;
5280 const Register SrcReg = MI.getOperand(i: NumDst).getReg();
5281 LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
5282 LLT SrcTy = MRI.getType(Reg: SrcReg);
5283
5284 if (TypeIdx != 1 || NarrowTy == DstTy)
5285 return UnableToLegalize;
5286
5287 // Requires compatible types. Otherwise SrcReg should have been defined by
5288 // merge-like instruction that would get artifact combined. Most likely
5289 // instruction that defines SrcReg has to perform more/fewer elements
5290 // legalization compatible with NarrowTy.
5291 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5292 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5293
5294 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5295 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5296 return UnableToLegalize;
5297
5298 // This is most likely DstTy (smaller then register size) packed in SrcTy
5299 // (larger then register size) and since unmerge was not combined it will be
5300 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5301 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5302
5303 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5304 //
5305 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5306 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5307 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5308 auto Unmerge = MIRBuilder.buildUnmerge(Res: NarrowTy, Op: SrcReg);
5309 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5310 const int PartsPerUnmerge = NumDst / NumUnmerge;
5311
5312 for (int I = 0; I != NumUnmerge; ++I) {
5313 auto MIB = MIRBuilder.buildInstr(Opcode: TargetOpcode::G_UNMERGE_VALUES);
5314
5315 for (int J = 0; J != PartsPerUnmerge; ++J)
5316 MIB.addDef(RegNo: MI.getOperand(i: I * PartsPerUnmerge + J).getReg());
5317 MIB.addUse(RegNo: Unmerge.getReg(Idx: I));
5318 }
5319
5320 MI.eraseFromParent();
5321 return Legalized;
5322}
5323
5324LegalizerHelper::LegalizeResult
5325LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
5326 LLT NarrowTy) {
5327 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5328 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5329 // that should have been artifact combined. Most likely instruction that uses
5330 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5331 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5332 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5333 if (NarrowTy == SrcTy)
5334 return UnableToLegalize;
5335
5336 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5337 // is for old mir tests. Since the changes to more/fewer elements it should no
5338 // longer be possible to generate MIR like this when starting from llvm-ir
5339 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5340 if (TypeIdx == 1) {
5341 assert(SrcTy.isVector() && "Expected vector types");
5342 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5343 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5344 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5345 return UnableToLegalize;
5346 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5347 //
5348 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5349 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5350 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5351 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5352 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5353 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5354
5355 SmallVector<Register, 8> Elts;
5356 LLT EltTy = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getScalarType();
5357 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5358 auto Unmerge = MIRBuilder.buildUnmerge(Res: EltTy, Op: MI.getOperand(i).getReg());
5359 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5360 Elts.push_back(Elt: Unmerge.getReg(Idx: j));
5361 }
5362
5363 SmallVector<Register, 8> NarrowTyElts;
5364 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5365 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5366 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5367 ++i, Offset += NumNarrowTyElts) {
5368 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
5369 NarrowTyElts.push_back(
5370 Elt: MIRBuilder.buildMergeLikeInstr(Res: NarrowTy, Ops: Pieces).getReg(Idx: 0));
5371 }
5372
5373 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: NarrowTyElts);
5374 MI.eraseFromParent();
5375 return Legalized;
5376 }
5377
5378 assert(TypeIdx == 0 && "Bad type index");
5379 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5380 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5381 return UnableToLegalize;
5382
5383 // This is most likely SrcTy (smaller then register size) packed in DstTy
5384 // (larger then register size) and since merge was not combined it will be
5385 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5386 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5387
5388 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5389 //
5390 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5391 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5392 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5393 SmallVector<Register, 8> NarrowTyElts;
5394 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5395 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5396 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5397 for (unsigned i = 0; i < NumParts; ++i) {
5398 SmallVector<Register, 8> Sources;
5399 for (unsigned j = 0; j < NumElts; ++j)
5400 Sources.push_back(Elt: MI.getOperand(i: 1 + i * NumElts + j).getReg());
5401 NarrowTyElts.push_back(
5402 Elt: MIRBuilder.buildMergeLikeInstr(Res: NarrowTy, Ops: Sources).getReg(Idx: 0));
5403 }
5404
5405 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: NarrowTyElts);
5406 MI.eraseFromParent();
5407 return Legalized;
5408}
5409
5410LegalizerHelper::LegalizeResult
5411LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
5412 unsigned TypeIdx,
5413 LLT NarrowVecTy) {
5414 auto [DstReg, SrcVec] = MI.getFirst2Regs();
5415 Register InsertVal;
5416 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5417
5418 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5419 if (IsInsert)
5420 InsertVal = MI.getOperand(i: 2).getReg();
5421
5422 Register Idx = MI.getOperand(i: MI.getNumOperands() - 1).getReg();
5423 LLT VecTy = MRI.getType(Reg: SrcVec);
5424
5425 // If the index is a constant, we can really break this down as you would
5426 // expect, and index into the target size pieces.
5427 auto MaybeCst = getIConstantVRegValWithLookThrough(VReg: Idx, MRI);
5428 if (MaybeCst) {
5429 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5430 // Avoid out of bounds indexing the pieces.
5431 if (IdxVal >= VecTy.getNumElements()) {
5432 MIRBuilder.buildUndef(Res: DstReg);
5433 MI.eraseFromParent();
5434 return Legalized;
5435 }
5436
5437 if (!NarrowVecTy.isVector()) {
5438 SmallVector<Register, 8> SplitPieces;
5439 extractParts(Reg: MI.getOperand(i: 1).getReg(), Ty: NarrowVecTy,
5440 NumParts: VecTy.getNumElements(), VRegs&: SplitPieces, MIRBuilder, MRI);
5441 if (IsInsert) {
5442 SplitPieces[IdxVal] = InsertVal;
5443 MIRBuilder.buildMergeLikeInstr(Res: MI.getOperand(i: 0).getReg(), Ops: SplitPieces);
5444 } else {
5445 MIRBuilder.buildCopy(Res: MI.getOperand(i: 0).getReg(), Op: SplitPieces[IdxVal]);
5446 }
5447 } else {
5448 SmallVector<Register, 8> VecParts;
5449 LLT GCDTy = extractGCDType(Parts&: VecParts, DstTy: VecTy, NarrowTy: NarrowVecTy, SrcReg: SrcVec);
5450
5451 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5452 LLT LCMTy = buildLCMMergePieces(DstTy: VecTy, NarrowTy: NarrowVecTy, GCDTy, VRegs&: VecParts,
5453 PadStrategy: TargetOpcode::G_ANYEXT);
5454
5455 unsigned NewNumElts = NarrowVecTy.getNumElements();
5456
5457 LLT IdxTy = MRI.getType(Reg: Idx);
5458 int64_t PartIdx = IdxVal / NewNumElts;
5459 auto NewIdx =
5460 MIRBuilder.buildConstant(Res: IdxTy, Val: IdxVal - NewNumElts * PartIdx);
5461
5462 if (IsInsert) {
5463 LLT PartTy = MRI.getType(Reg: VecParts[PartIdx]);
5464
5465 // Use the adjusted index to insert into one of the subvectors.
5466 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5467 Res: PartTy, Val: VecParts[PartIdx], Elt: InsertVal, Idx: NewIdx);
5468 VecParts[PartIdx] = InsertPart.getReg(Idx: 0);
5469
5470 // Recombine the inserted subvector with the others to reform the result
5471 // vector.
5472 buildWidenedRemergeToDst(DstReg, LCMTy, RemergeRegs: VecParts);
5473 } else {
5474 MIRBuilder.buildExtractVectorElement(Res: DstReg, Val: VecParts[PartIdx], Idx: NewIdx);
5475 }
5476 }
5477
5478 MI.eraseFromParent();
5479 return Legalized;
5480 }
5481
5482 // With a variable index, we can't perform the operation in a smaller type, so
5483 // we're forced to expand this.
5484 //
5485 // TODO: We could emit a chain of compare/select to figure out which piece to
5486 // index.
5487 return lowerExtractInsertVectorElt(MI);
5488}
5489
5490LegalizerHelper::LegalizeResult
5491LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
5492 LLT NarrowTy) {
5493 // FIXME: Don't know how to handle secondary types yet.
5494 if (TypeIdx != 0)
5495 return UnableToLegalize;
5496
5497 if (!NarrowTy.isByteSized()) {
5498 LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n");
5499 return UnableToLegalize;
5500 }
5501
5502 // This implementation doesn't work for atomics. Give up instead of doing
5503 // something invalid.
5504 if (LdStMI.isAtomic())
5505 return UnableToLegalize;
5506
5507 bool IsLoad = isa<GLoad>(Val: LdStMI);
5508 Register ValReg = LdStMI.getReg(Idx: 0);
5509 Register AddrReg = LdStMI.getPointerReg();
5510 LLT ValTy = MRI.getType(Reg: ValReg);
5511
5512 // FIXME: Do we need a distinct NarrowMemory legalize action?
5513 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
5514 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5515 return UnableToLegalize;
5516 }
5517
5518 int NumParts = -1;
5519 int NumLeftover = -1;
5520 LLT LeftoverTy;
5521 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
5522 if (IsLoad) {
5523 std::tie(args&: NumParts, args&: NumLeftover) = getNarrowTypeBreakDown(OrigTy: ValTy, NarrowTy, LeftoverTy);
5524 } else {
5525 if (extractParts(Reg: ValReg, RegTy: ValTy, MainTy: NarrowTy, LeftoverTy, VRegs&: NarrowRegs,
5526 LeftoverVRegs&: NarrowLeftoverRegs, MIRBuilder, MRI)) {
5527 NumParts = NarrowRegs.size();
5528 NumLeftover = NarrowLeftoverRegs.size();
5529 }
5530 }
5531
5532 if (NumParts == -1)
5533 return UnableToLegalize;
5534
5535 LLT PtrTy = MRI.getType(Reg: AddrReg);
5536 const LLT OffsetTy = LLT::integer(SizeInBits: PtrTy.getSizeInBits());
5537
5538 unsigned TotalSize = ValTy.getSizeInBits();
5539
5540 // Split the load/store into PartTy sized pieces starting at Offset. If this
5541 // is a load, return the new registers in ValRegs. For a store, each elements
5542 // of ValRegs should be PartTy. Returns the next offset that needs to be
5543 // handled.
5544 bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
5545 auto MMO = LdStMI.getMMO();
5546 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
5547 unsigned NumParts, unsigned Offset) -> unsigned {
5548 MachineFunction &MF = MIRBuilder.getMF();
5549 unsigned PartSize = PartTy.getSizeInBits();
5550 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
5551 ++Idx) {
5552 unsigned ByteOffset = Offset / 8;
5553 Register NewAddrReg;
5554
5555 MIRBuilder.materializeObjectPtrOffset(Res&: NewAddrReg, Op0: AddrReg, ValueTy: OffsetTy,
5556 Value: ByteOffset);
5557
5558 MachineMemOperand *NewMMO =
5559 MF.getMachineMemOperand(MMO: &MMO, Offset: ByteOffset, Ty: PartTy);
5560
5561 if (IsLoad) {
5562 Register Dst = MRI.createGenericVirtualRegister(Ty: PartTy);
5563 ValRegs.push_back(Elt: Dst);
5564 MIRBuilder.buildLoad(Res: Dst, Addr: NewAddrReg, MMO&: *NewMMO);
5565 } else {
5566 MIRBuilder.buildStore(Val: ValRegs[Idx], Addr: NewAddrReg, MMO&: *NewMMO);
5567 }
5568 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
5569 }
5570
5571 return Offset;
5572 };
5573
5574 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5575 unsigned HandledOffset =
5576 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5577
5578 // Handle the rest of the register if this isn't an even type breakdown.
5579 if (LeftoverTy.isValid())
5580 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5581
5582 if (IsLoad) {
5583 insertParts(DstReg: ValReg, ResultTy: ValTy, PartTy: NarrowTy, PartRegs: NarrowRegs,
5584 LeftoverTy, LeftoverRegs: NarrowLeftoverRegs);
5585 }
5586
5587 LdStMI.eraseFromParent();
5588 return Legalized;
5589}
5590
5591LegalizerHelper::LegalizeResult
5592LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
5593 LLT NarrowTy) {
5594 using namespace TargetOpcode;
5595 GenericMachineInstr &GMI = cast<GenericMachineInstr>(Val&: MI);
5596 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5597
5598 switch (MI.getOpcode()) {
5599 case G_IMPLICIT_DEF:
5600 case G_TRUNC:
5601 case G_AND:
5602 case G_OR:
5603 case G_XOR:
5604 case G_ADD:
5605 case G_SUB:
5606 case G_MUL:
5607 case G_PTR_ADD:
5608 case G_SMULH:
5609 case G_UMULH:
5610 case G_FADD:
5611 case G_FMUL:
5612 case G_FSUB:
5613 case G_FNEG:
5614 case G_FABS:
5615 case G_FCANONICALIZE:
5616 case G_FDIV:
5617 case G_FREM:
5618 case G_FMA:
5619 case G_FMAD:
5620 case G_FPOW:
5621 case G_FEXP:
5622 case G_FEXP2:
5623 case G_FEXP10:
5624 case G_FLOG:
5625 case G_FLOG2:
5626 case G_FLOG10:
5627 case G_FLDEXP:
5628 case G_FNEARBYINT:
5629 case G_FCEIL:
5630 case G_FFLOOR:
5631 case G_FRINT:
5632 case G_INTRINSIC_LRINT:
5633 case G_INTRINSIC_LLRINT:
5634 case G_INTRINSIC_ROUND:
5635 case G_INTRINSIC_ROUNDEVEN:
5636 case G_LROUND:
5637 case G_LLROUND:
5638 case G_INTRINSIC_TRUNC:
5639 case G_FMODF:
5640 case G_FCOS:
5641 case G_FSIN:
5642 case G_FTAN:
5643 case G_FACOS:
5644 case G_FASIN:
5645 case G_FATAN:
5646 case G_FATAN2:
5647 case G_FCOSH:
5648 case G_FSINH:
5649 case G_FTANH:
5650 case G_FSQRT:
5651 case G_BSWAP:
5652 case G_BITREVERSE:
5653 case G_SDIV:
5654 case G_UDIV:
5655 case G_SREM:
5656 case G_UREM:
5657 case G_SDIVREM:
5658 case G_UDIVREM:
5659 case G_SMIN:
5660 case G_SMAX:
5661 case G_UMIN:
5662 case G_UMAX:
5663 case G_ABS:
5664 case G_FMINNUM:
5665 case G_FMAXNUM:
5666 case G_FMINNUM_IEEE:
5667 case G_FMAXNUM_IEEE:
5668 case G_FMINIMUM:
5669 case G_FMAXIMUM:
5670 case G_FMINIMUMNUM:
5671 case G_FMAXIMUMNUM:
5672 case G_FSHL:
5673 case G_FSHR:
5674 case G_ROTL:
5675 case G_ROTR:
5676 case G_FREEZE:
5677 case G_SADDSAT:
5678 case G_SSUBSAT:
5679 case G_UADDSAT:
5680 case G_USUBSAT:
5681 case G_UMULO:
5682 case G_SMULO:
5683 case G_SHL:
5684 case G_LSHR:
5685 case G_ASHR:
5686 case G_SSHLSAT:
5687 case G_USHLSAT:
5688 case G_CTLZ:
5689 case G_CTLZ_ZERO_POISON:
5690 case G_CTTZ:
5691 case G_CTTZ_ZERO_POISON:
5692 case G_CTPOP:
5693 case G_CTLS:
5694 case G_FCOPYSIGN:
5695 case G_ZEXT:
5696 case G_SEXT:
5697 case G_ANYEXT:
5698 case G_FPEXT:
5699 case G_FPTRUNC:
5700 case G_SITOFP:
5701 case G_UITOFP:
5702 case G_FPTOSI:
5703 case G_FPTOUI:
5704 case G_FPTOSI_SAT:
5705 case G_FPTOUI_SAT:
5706 case G_INTTOPTR:
5707 case G_PTRTOINT:
5708 case G_ADDRSPACE_CAST:
5709 case G_UADDO:
5710 case G_USUBO:
5711 case G_UADDE:
5712 case G_USUBE:
5713 case G_SADDO:
5714 case G_SSUBO:
5715 case G_SADDE:
5716 case G_SSUBE:
5717 case G_STRICT_FADD:
5718 case G_STRICT_FSUB:
5719 case G_STRICT_FMUL:
5720 case G_STRICT_FMA:
5721 case G_STRICT_FLDEXP:
5722 case G_FFREXP:
5723 case G_TRUNC_SSAT_S:
5724 case G_TRUNC_SSAT_U:
5725 case G_TRUNC_USAT_U:
5726 return fewerElementsVectorMultiEltType(MI&: GMI, NumElts);
5727 case G_ICMP:
5728 case G_FCMP:
5729 return fewerElementsVectorMultiEltType(MI&: GMI, NumElts, NonVecOpIndices: {1 /*cpm predicate*/});
5730 case G_IS_FPCLASS:
5731 return fewerElementsVectorMultiEltType(MI&: GMI, NumElts, NonVecOpIndices: {2, 3 /*mask,fpsem*/});
5732 case G_SELECT:
5733 if (MRI.getType(Reg: MI.getOperand(i: 1).getReg()).isVector())
5734 return fewerElementsVectorMultiEltType(MI&: GMI, NumElts);
5735 return fewerElementsVectorMultiEltType(MI&: GMI, NumElts, NonVecOpIndices: {1 /*scalar cond*/});
5736 case G_PHI:
5737 return fewerElementsVectorPhi(MI&: GMI, NumElts);
5738 case G_UNMERGE_VALUES:
5739 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5740 case G_BUILD_VECTOR:
5741 assert(TypeIdx == 0 && "not a vector type index");
5742 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5743 case G_CONCAT_VECTORS:
5744 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5745 return UnableToLegalize;
5746 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5747 case G_EXTRACT_VECTOR_ELT:
5748 case G_INSERT_VECTOR_ELT:
5749 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowVecTy: NarrowTy);
5750 case G_LOAD:
5751 case G_STORE:
5752 return reduceLoadStoreWidth(LdStMI&: cast<GLoadStore>(Val&: MI), TypeIdx, NarrowTy);
5753 case G_SEXT_INREG:
5754 return fewerElementsVectorMultiEltType(MI&: GMI, NumElts, NonVecOpIndices: {2 /*imm*/});
5755 GISEL_VECREDUCE_CASES_NONSEQ
5756 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5757 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5758 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5759 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5760 case G_SHUFFLE_VECTOR:
5761 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5762 case G_FPOWI:
5763 return fewerElementsVectorMultiEltType(MI&: GMI, NumElts, NonVecOpIndices: {2 /*pow*/});
5764 case G_BITCAST:
5765 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5766 case G_INTRINSIC_FPTRUNC_ROUND:
5767 return fewerElementsVectorMultiEltType(MI&: GMI, NumElts, NonVecOpIndices: {2});
5768 default:
5769 return UnableToLegalize;
5770 }
5771}
5772
5773LegalizerHelper::LegalizeResult
5774LegalizerHelper::fewerElementsBitcast(MachineInstr &MI, unsigned int TypeIdx,
5775 LLT NarrowTy) {
5776 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5777 "Not a bitcast operation");
5778
5779 if (TypeIdx != 0)
5780 return UnableToLegalize;
5781
5782 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5783
5784 unsigned NewElemCount =
5785 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5786 SmallVector<Register> SrcVRegs, BitcastVRegs;
5787 if (NewElemCount == 1) {
5788 LLT SrcNarrowTy = SrcTy.getElementType();
5789
5790 auto Unmerge = MIRBuilder.buildUnmerge(Res: SrcNarrowTy, Op: SrcReg);
5791 getUnmergeResults(Regs&: SrcVRegs, MI: *Unmerge);
5792 } else {
5793 LLT SrcNarrowTy =
5794 SrcTy.changeVectorElementCount(EC: ElementCount::getFixed(MinVal: NewElemCount));
5795
5796 // Split the Src and Dst Reg into smaller registers
5797 if (extractGCDType(Parts&: SrcVRegs, DstTy, NarrowTy: SrcNarrowTy, SrcReg) != SrcNarrowTy)
5798 return UnableToLegalize;
5799 }
5800
5801 // Build new smaller bitcast instructions
5802 // Not supporting Leftover types for now but will have to
5803 for (Register Reg : SrcVRegs)
5804 BitcastVRegs.push_back(Elt: MIRBuilder.buildBitcast(Dst: NarrowTy, Src: Reg).getReg(Idx: 0));
5805
5806 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: BitcastVRegs);
5807 MI.eraseFromParent();
5808 return Legalized;
5809}
5810
5811LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
5812 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5813 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5814 if (TypeIdx != 0)
5815 return UnableToLegalize;
5816
5817 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5818 MI.getFirst3RegLLTs();
5819 ArrayRef<int> Mask = MI.getOperand(i: 3).getShuffleMask();
5820 // The shuffle should be canonicalized by now.
5821 if (DstTy != Src1Ty)
5822 return UnableToLegalize;
5823 if (DstTy != Src2Ty)
5824 return UnableToLegalize;
5825
5826 if (!isPowerOf2_32(Value: DstTy.getNumElements()))
5827 return UnableToLegalize;
5828
5829 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5830 // Further legalization attempts will be needed to do split further.
5831 NarrowTy =
5832 DstTy.changeElementCount(EC: DstTy.getElementCount().divideCoefficientBy(RHS: 2));
5833 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5834
5835 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5836 extractParts(Reg: Src1Reg, Ty: NarrowTy, NumParts: 2, VRegs&: SplitSrc1Regs, MIRBuilder, MRI);
5837 extractParts(Reg: Src2Reg, Ty: NarrowTy, NumParts: 2, VRegs&: SplitSrc2Regs, MIRBuilder, MRI);
5838 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5839 SplitSrc2Regs[1]};
5840
5841 Register Hi, Lo;
5842
5843 // If Lo or Hi uses elements from at most two of the four input vectors, then
5844 // express it as a vector shuffle of those two inputs. Otherwise extract the
5845 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5846 SmallVector<int, 16> Ops;
5847 for (unsigned High = 0; High < 2; ++High) {
5848 Register &Output = High ? Hi : Lo;
5849
5850 // Build a shuffle mask for the output, discovering on the fly which
5851 // input vectors to use as shuffle operands (recorded in InputUsed).
5852 // If building a suitable shuffle vector proves too hard, then bail
5853 // out with useBuildVector set.
5854 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5855 unsigned FirstMaskIdx = High * NewElts;
5856 bool UseBuildVector = false;
5857 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5858 // The mask element. This indexes into the input.
5859 int Idx = Mask[FirstMaskIdx + MaskOffset];
5860
5861 // The input vector this mask element indexes into.
5862 unsigned Input = (unsigned)Idx / NewElts;
5863
5864 if (Input >= std::size(Inputs)) {
5865 // The mask element does not index into any input vector.
5866 Ops.push_back(Elt: -1);
5867 continue;
5868 }
5869
5870 // Turn the index into an offset from the start of the input vector.
5871 Idx -= Input * NewElts;
5872
5873 // Find or create a shuffle vector operand to hold this input.
5874 unsigned OpNo;
5875 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5876 if (InputUsed[OpNo] == Input) {
5877 // This input vector is already an operand.
5878 break;
5879 } else if (InputUsed[OpNo] == -1U) {
5880 // Create a new operand for this input vector.
5881 InputUsed[OpNo] = Input;
5882 break;
5883 }
5884 }
5885
5886 if (OpNo >= std::size(InputUsed)) {
5887 // More than two input vectors used! Give up on trying to create a
5888 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5889 UseBuildVector = true;
5890 break;
5891 }
5892
5893 // Add the mask index for the new shuffle vector.
5894 Ops.push_back(Elt: Idx + OpNo * NewElts);
5895 }
5896
5897 if (UseBuildVector) {
5898 LLT EltTy = NarrowTy.getElementType();
5899 SmallVector<Register, 16> SVOps;
5900
5901 // Extract the input elements by hand.
5902 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5903 // The mask element. This indexes into the input.
5904 int Idx = Mask[FirstMaskIdx + MaskOffset];
5905
5906 // The input vector this mask element indexes into.
5907 unsigned Input = (unsigned)Idx / NewElts;
5908
5909 if (Input >= std::size(Inputs)) {
5910 // The mask element is "undef" or indexes off the end of the input.
5911 SVOps.push_back(Elt: MIRBuilder.buildUndef(Res: EltTy).getReg(Idx: 0));
5912 continue;
5913 }
5914
5915 // Turn the index into an offset from the start of the input vector.
5916 Idx -= Input * NewElts;
5917
5918 // Extract the vector element by hand.
5919 SVOps.push_back(Elt: MIRBuilder
5920 .buildExtractVectorElement(
5921 Res: EltTy, Val: Inputs[Input],
5922 Idx: MIRBuilder.buildConstant(Res: LLT::scalar(SizeInBits: 32), Val: Idx))
5923 .getReg(Idx: 0));
5924 }
5925
5926 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5927 Output = MIRBuilder.buildBuildVector(Res: NarrowTy, Ops: SVOps).getReg(Idx: 0);
5928 } else if (InputUsed[0] == -1U) {
5929 // No input vectors were used! The result is undefined.
5930 Output = MIRBuilder.buildUndef(Res: NarrowTy).getReg(Idx: 0);
5931 } else if (NewElts == 1) {
5932 Output = MIRBuilder.buildCopy(Res: NarrowTy, Op: Inputs[InputUsed[0]]).getReg(Idx: 0);
5933 } else {
5934 Register Op0 = Inputs[InputUsed[0]];
5935 // If only one input was used, use an undefined vector for the other.
5936 Register Op1 = InputUsed[1] == -1U
5937 ? MIRBuilder.buildUndef(Res: NarrowTy).getReg(Idx: 0)
5938 : Inputs[InputUsed[1]];
5939 // At least one input vector was used. Create a new shuffle vector.
5940 Output = MIRBuilder.buildShuffleVector(Res: NarrowTy, Src1: Op0, Src2: Op1, Mask: Ops).getReg(Idx: 0);
5941 }
5942
5943 Ops.clear();
5944 }
5945
5946 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: {Lo, Hi});
5947 MI.eraseFromParent();
5948 return Legalized;
5949}
5950
5951LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
5952 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5953 auto &RdxMI = cast<GVecReduce>(Val&: MI);
5954
5955 if (TypeIdx != 1)
5956 return UnableToLegalize;
5957
5958 // The semantics of the normal non-sequential reductions allow us to freely
5959 // re-associate the operation.
5960 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5961
5962 if (NarrowTy.isVector() &&
5963 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5964 return UnableToLegalize;
5965
5966 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5967 SmallVector<Register> SplitSrcs;
5968 // If NarrowTy is a scalar then we're being asked to scalarize.
5969 const unsigned NumParts =
5970 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5971 : SrcTy.getNumElements();
5972
5973 extractParts(Reg: SrcReg, Ty: NarrowTy, NumParts, VRegs&: SplitSrcs, MIRBuilder, MRI);
5974 if (NarrowTy.isScalar()) {
5975 if (DstTy != NarrowTy)
5976 return UnableToLegalize; // FIXME: handle implicit extensions.
5977
5978 if (isPowerOf2_32(Value: NumParts)) {
5979 // Generate a tree of scalar operations to reduce the critical path.
5980 SmallVector<Register> PartialResults;
5981 unsigned NumPartsLeft = NumParts;
5982 while (NumPartsLeft > 1) {
5983 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5984 PartialResults.emplace_back(
5985 Args: MIRBuilder
5986 .buildInstr(Opc: ScalarOpc, DstOps: {NarrowTy},
5987 SrcOps: {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5988 .getReg(Idx: 0));
5989 }
5990 SplitSrcs = PartialResults;
5991 PartialResults.clear();
5992 NumPartsLeft = SplitSrcs.size();
5993 }
5994 assert(SplitSrcs.size() == 1);
5995 MIRBuilder.buildCopy(Res: DstReg, Op: SplitSrcs[0]);
5996 MI.eraseFromParent();
5997 return Legalized;
5998 }
5999 // If we can't generate a tree, then just do sequential operations.
6000 Register Acc = SplitSrcs[0];
6001 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
6002 Acc = MIRBuilder.buildInstr(Opc: ScalarOpc, DstOps: {NarrowTy}, SrcOps: {Acc, SplitSrcs[Idx]})
6003 .getReg(Idx: 0);
6004 MIRBuilder.buildCopy(Res: DstReg, Op: Acc);
6005 MI.eraseFromParent();
6006 return Legalized;
6007 }
6008 SmallVector<Register> PartialReductions;
6009 for (unsigned Part = 0; Part < NumParts; ++Part) {
6010 PartialReductions.push_back(
6011 Elt: MIRBuilder.buildInstr(Opc: RdxMI.getOpcode(), DstOps: {DstTy}, SrcOps: {SplitSrcs[Part]})
6012 .getReg(Idx: 0));
6013 }
6014
6015 // If the types involved are powers of 2, we can generate intermediate vector
6016 // ops, before generating a final reduction operation.
6017 if (isPowerOf2_32(Value: SrcTy.getNumElements()) &&
6018 isPowerOf2_32(Value: NarrowTy.getNumElements())) {
6019 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
6020 }
6021
6022 Register Acc = PartialReductions[0];
6023 for (unsigned Part = 1; Part < NumParts; ++Part) {
6024 if (Part == NumParts - 1) {
6025 MIRBuilder.buildInstr(Opc: ScalarOpc, DstOps: {DstReg},
6026 SrcOps: {Acc, PartialReductions[Part]});
6027 } else {
6028 Acc = MIRBuilder
6029 .buildInstr(Opc: ScalarOpc, DstOps: {DstTy}, SrcOps: {Acc, PartialReductions[Part]})
6030 .getReg(Idx: 0);
6031 }
6032 }
6033 MI.eraseFromParent();
6034 return Legalized;
6035}
6036
6037LegalizerHelper::LegalizeResult
6038LegalizerHelper::fewerElementsVectorSeqReductions(MachineInstr &MI,
6039 unsigned int TypeIdx,
6040 LLT NarrowTy) {
6041 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
6042 MI.getFirst3RegLLTs();
6043 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
6044 DstTy != NarrowTy)
6045 return UnableToLegalize;
6046
6047 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
6048 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
6049 "Unexpected vecreduce opcode");
6050 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
6051 ? TargetOpcode::G_FADD
6052 : TargetOpcode::G_FMUL;
6053
6054 SmallVector<Register> SplitSrcs;
6055 unsigned NumParts = SrcTy.getNumElements();
6056 extractParts(Reg: SrcReg, Ty: NarrowTy, NumParts, VRegs&: SplitSrcs, MIRBuilder, MRI);
6057 Register Acc = ScalarReg;
6058 for (unsigned i = 0; i < NumParts; i++)
6059 Acc = MIRBuilder.buildInstr(Opc: ScalarOpc, DstOps: {NarrowTy}, SrcOps: {Acc, SplitSrcs[i]})
6060 .getReg(Idx: 0);
6061
6062 MIRBuilder.buildCopy(Res: DstReg, Op: Acc);
6063 MI.eraseFromParent();
6064 return Legalized;
6065}
6066
6067LegalizerHelper::LegalizeResult
6068LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
6069 LLT SrcTy, LLT NarrowTy,
6070 unsigned ScalarOpc) {
6071 SmallVector<Register> SplitSrcs;
6072 // Split the sources into NarrowTy size pieces.
6073 extractParts(Reg: SrcReg, Ty: NarrowTy,
6074 NumParts: SrcTy.getNumElements() / NarrowTy.getNumElements(), VRegs&: SplitSrcs,
6075 MIRBuilder, MRI);
6076 // We're going to do a tree reduction using vector operations until we have
6077 // one NarrowTy size value left.
6078 while (SplitSrcs.size() > 1) {
6079 SmallVector<Register> PartialRdxs;
6080 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
6081 Register LHS = SplitSrcs[Idx];
6082 Register RHS = SplitSrcs[Idx + 1];
6083 // Create the intermediate vector op.
6084 Register Res =
6085 MIRBuilder.buildInstr(Opc: ScalarOpc, DstOps: {NarrowTy}, SrcOps: {LHS, RHS}).getReg(Idx: 0);
6086 PartialRdxs.push_back(Elt: Res);
6087 }
6088 SplitSrcs = std::move(PartialRdxs);
6089 }
6090 // Finally generate the requested NarrowTy based reduction.
6091 Observer.changingInstr(MI);
6092 MI.getOperand(i: 1).setReg(SplitSrcs[0]);
6093 Observer.changedInstr(MI);
6094 return Legalized;
6095}
6096
6097LegalizerHelper::LegalizeResult
6098LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
6099 const LLT HalfTy, const LLT AmtTy) {
6100
6101 Register InL = MRI.createGenericVirtualRegister(Ty: HalfTy);
6102 Register InH = MRI.createGenericVirtualRegister(Ty: HalfTy);
6103 MIRBuilder.buildUnmerge(Res: {InL, InH}, Op: MI.getOperand(i: 1));
6104
6105 if (Amt.isZero()) {
6106 MIRBuilder.buildMergeLikeInstr(Res: MI.getOperand(i: 0), Ops: {InL, InH});
6107 MI.eraseFromParent();
6108 return Legalized;
6109 }
6110
6111 LLT NVT = HalfTy;
6112 unsigned NVTBits = HalfTy.getSizeInBits();
6113 unsigned VTBits = 2 * NVTBits;
6114
6115 SrcOp Lo(Register(0)), Hi(Register(0));
6116 if (MI.getOpcode() == TargetOpcode::G_SHL) {
6117 if (Amt.ugt(RHS: VTBits)) {
6118 Lo = Hi = MIRBuilder.buildConstant(Res: NVT, Val: 0);
6119 } else if (Amt.ugt(RHS: NVTBits)) {
6120 Lo = MIRBuilder.buildConstant(Res: NVT, Val: 0);
6121 Hi = MIRBuilder.buildShl(Dst: NVT, Src0: InL,
6122 Src1: MIRBuilder.buildConstant(Res: AmtTy, Val: Amt - NVTBits));
6123 } else if (Amt == NVTBits) {
6124 Lo = MIRBuilder.buildConstant(Res: NVT, Val: 0);
6125 Hi = InL;
6126 } else {
6127 Lo = MIRBuilder.buildShl(Dst: NVT, Src0: InL, Src1: MIRBuilder.buildConstant(Res: AmtTy, Val: Amt));
6128 auto OrLHS =
6129 MIRBuilder.buildShl(Dst: NVT, Src0: InH, Src1: MIRBuilder.buildConstant(Res: AmtTy, Val: Amt));
6130 auto OrRHS = MIRBuilder.buildLShr(
6131 Dst: NVT, Src0: InL, Src1: MIRBuilder.buildConstant(Res: AmtTy, Val: -Amt + NVTBits));
6132 Hi = MIRBuilder.buildOr(Dst: NVT, Src0: OrLHS, Src1: OrRHS);
6133 }
6134 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6135 if (Amt.ugt(RHS: VTBits)) {
6136 Lo = Hi = MIRBuilder.buildConstant(Res: NVT, Val: 0);
6137 } else if (Amt.ugt(RHS: NVTBits)) {
6138 Lo = MIRBuilder.buildLShr(Dst: NVT, Src0: InH,
6139 Src1: MIRBuilder.buildConstant(Res: AmtTy, Val: Amt - NVTBits));
6140 Hi = MIRBuilder.buildConstant(Res: NVT, Val: 0);
6141 } else if (Amt == NVTBits) {
6142 Lo = InH;
6143 Hi = MIRBuilder.buildConstant(Res: NVT, Val: 0);
6144 } else {
6145 auto ShiftAmtConst = MIRBuilder.buildConstant(Res: AmtTy, Val: Amt);
6146
6147 auto OrLHS = MIRBuilder.buildLShr(Dst: NVT, Src0: InL, Src1: ShiftAmtConst);
6148 auto OrRHS = MIRBuilder.buildShl(
6149 Dst: NVT, Src0: InH, Src1: MIRBuilder.buildConstant(Res: AmtTy, Val: -Amt + NVTBits));
6150
6151 Lo = MIRBuilder.buildOr(Dst: NVT, Src0: OrLHS, Src1: OrRHS);
6152 Hi = MIRBuilder.buildLShr(Dst: NVT, Src0: InH, Src1: ShiftAmtConst);
6153 }
6154 } else {
6155 if (Amt.ugt(RHS: VTBits)) {
6156 Hi = Lo = MIRBuilder.buildAShr(
6157 Dst: NVT, Src0: InH, Src1: MIRBuilder.buildConstant(Res: AmtTy, Val: NVTBits - 1));
6158 } else if (Amt.ugt(RHS: NVTBits)) {
6159 Lo = MIRBuilder.buildAShr(Dst: NVT, Src0: InH,
6160 Src1: MIRBuilder.buildConstant(Res: AmtTy, Val: Amt - NVTBits));
6161 Hi = MIRBuilder.buildAShr(Dst: NVT, Src0: InH,
6162 Src1: MIRBuilder.buildConstant(Res: AmtTy, Val: NVTBits - 1));
6163 } else if (Amt == NVTBits) {
6164 Lo = InH;
6165 Hi = MIRBuilder.buildAShr(Dst: NVT, Src0: InH,
6166 Src1: MIRBuilder.buildConstant(Res: AmtTy, Val: NVTBits - 1));
6167 } else {
6168 auto ShiftAmtConst = MIRBuilder.buildConstant(Res: AmtTy, Val: Amt);
6169
6170 auto OrLHS = MIRBuilder.buildLShr(Dst: NVT, Src0: InL, Src1: ShiftAmtConst);
6171 auto OrRHS = MIRBuilder.buildShl(
6172 Dst: NVT, Src0: InH, Src1: MIRBuilder.buildConstant(Res: AmtTy, Val: -Amt + NVTBits));
6173
6174 Lo = MIRBuilder.buildOr(Dst: NVT, Src0: OrLHS, Src1: OrRHS);
6175 Hi = MIRBuilder.buildAShr(Dst: NVT, Src0: InH, Src1: ShiftAmtConst);
6176 }
6177 }
6178
6179 MIRBuilder.buildMergeLikeInstr(Res: MI.getOperand(i: 0), Ops: {Lo, Hi});
6180 MI.eraseFromParent();
6181
6182 return Legalized;
6183}
6184
6185LegalizerHelper::LegalizeResult
6186LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
6187 LLT RequestedTy) {
6188 if (TypeIdx == 1) {
6189 Observer.changingInstr(MI);
6190 narrowScalarSrc(MI, NarrowTy: RequestedTy, OpIdx: 2);
6191 Observer.changedInstr(MI);
6192 return Legalized;
6193 }
6194
6195 Register DstReg = MI.getOperand(i: 0).getReg();
6196 LLT DstTy = MRI.getType(Reg: DstReg);
6197 if (DstTy.isVector())
6198 return UnableToLegalize;
6199
6200 Register Amt = MI.getOperand(i: 2).getReg();
6201 LLT ShiftAmtTy = MRI.getType(Reg: Amt);
6202 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
6203 if (DstEltSize % 2 != 0)
6204 return UnableToLegalize;
6205
6206 // Check if we should use multi-way splitting instead of recursive binary
6207 // splitting.
6208 //
6209 // Multi-way splitting directly decomposes wide shifts (e.g., 128-bit ->
6210 // 4×32-bit) in a single legalization step, avoiding the recursive overhead
6211 // and dependency chains created by usual binary splitting approach
6212 // (128->64->32).
6213 //
6214 // The >= 8 parts threshold ensures we only use this optimization when binary
6215 // splitting would require multiple recursive passes, avoiding overhead for
6216 // simple 2-way splits where binary approach is sufficient.
6217 if (RequestedTy.isValid() && RequestedTy.isScalar() &&
6218 DstEltSize % RequestedTy.getSizeInBits() == 0) {
6219 const unsigned NumParts = DstEltSize / RequestedTy.getSizeInBits();
6220 // Use multiway if we have 8 or more parts (i.e., would need 3+ recursive
6221 // steps).
6222 if (NumParts >= 8)
6223 return narrowScalarShiftMultiway(MI, TargetTy: RequestedTy);
6224 }
6225
6226 // Fall back to binary splitting:
6227 // Ignore the input type. We can only go to exactly half the size of the
6228 // input. If that isn't small enough, the resulting pieces will be further
6229 // legalized.
6230 const unsigned NewBitSize = DstEltSize / 2;
6231 const LLT HalfTy = DstTy.getScalarType().changeElementSize(NewEltSize: NewBitSize);
6232 const LLT CondTy = LLT::integer(SizeInBits: 1);
6233
6234 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: Amt, MRI)) {
6235 return narrowScalarShiftByConstant(MI, Amt: VRegAndVal->Value, HalfTy,
6236 AmtTy: ShiftAmtTy);
6237 }
6238
6239 // TODO: Expand with known bits.
6240
6241 // Handle the fully general expansion by an unknown amount.
6242 auto NewBits = MIRBuilder.buildConstant(Res: ShiftAmtTy, Val: NewBitSize);
6243
6244 Register InL = MRI.createGenericVirtualRegister(Ty: HalfTy);
6245 Register InH = MRI.createGenericVirtualRegister(Ty: HalfTy);
6246 MIRBuilder.buildUnmerge(Res: {InL, InH}, Op: MI.getOperand(i: 1));
6247
6248 auto AmtExcess = MIRBuilder.buildSub(Dst: ShiftAmtTy, Src0: Amt, Src1: NewBits);
6249 auto AmtLack = MIRBuilder.buildSub(Dst: ShiftAmtTy, Src0: NewBits, Src1: Amt);
6250
6251 auto Zero = MIRBuilder.buildConstant(Res: ShiftAmtTy, Val: 0);
6252 auto IsShort = MIRBuilder.buildICmp(Pred: ICmpInst::ICMP_ULT, Res: CondTy, Op0: Amt, Op1: NewBits);
6253 auto IsZero = MIRBuilder.buildICmp(Pred: ICmpInst::ICMP_EQ, Res: CondTy, Op0: Amt, Op1: Zero);
6254
6255 Register ResultRegs[2];
6256 switch (MI.getOpcode()) {
6257 case TargetOpcode::G_SHL: {
6258 // Short: ShAmt < NewBitSize
6259 auto LoS = MIRBuilder.buildShl(Dst: HalfTy, Src0: InL, Src1: Amt);
6260
6261 auto LoOr = MIRBuilder.buildLShr(Dst: HalfTy, Src0: InL, Src1: AmtLack);
6262 auto HiOr = MIRBuilder.buildShl(Dst: HalfTy, Src0: InH, Src1: Amt);
6263 auto HiS = MIRBuilder.buildOr(Dst: HalfTy, Src0: LoOr, Src1: HiOr);
6264
6265 // Long: ShAmt >= NewBitSize
6266 auto LoL = MIRBuilder.buildConstant(Res: HalfTy, Val: 0); // Lo part is zero.
6267 auto HiL = MIRBuilder.buildShl(Dst: HalfTy, Src0: InL, Src1: AmtExcess); // Hi from Lo part.
6268
6269 auto Lo = MIRBuilder.buildSelect(Res: HalfTy, Tst: IsShort, Op0: LoS, Op1: LoL);
6270 auto Hi = MIRBuilder.buildSelect(
6271 Res: HalfTy, Tst: IsZero, Op0: InH, Op1: MIRBuilder.buildSelect(Res: HalfTy, Tst: IsShort, Op0: HiS, Op1: HiL));
6272
6273 ResultRegs[0] = Lo.getReg(Idx: 0);
6274 ResultRegs[1] = Hi.getReg(Idx: 0);
6275 break;
6276 }
6277 case TargetOpcode::G_LSHR:
6278 case TargetOpcode::G_ASHR: {
6279 // Short: ShAmt < NewBitSize
6280 auto HiS = MIRBuilder.buildInstr(Opc: MI.getOpcode(), DstOps: {HalfTy}, SrcOps: {InH, Amt});
6281
6282 auto LoOr = MIRBuilder.buildLShr(Dst: HalfTy, Src0: InL, Src1: Amt);
6283 auto HiOr = MIRBuilder.buildShl(Dst: HalfTy, Src0: InH, Src1: AmtLack);
6284 auto LoS = MIRBuilder.buildOr(Dst: HalfTy, Src0: LoOr, Src1: HiOr);
6285
6286 // Long: ShAmt >= NewBitSize
6287 MachineInstrBuilder HiL;
6288 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6289 HiL = MIRBuilder.buildConstant(Res: HalfTy, Val: 0); // Hi part is zero.
6290 } else {
6291 auto ShiftAmt = MIRBuilder.buildConstant(Res: ShiftAmtTy, Val: NewBitSize - 1);
6292 HiL = MIRBuilder.buildAShr(Dst: HalfTy, Src0: InH, Src1: ShiftAmt); // Sign of Hi part.
6293 }
6294 auto LoL = MIRBuilder.buildInstr(Opc: MI.getOpcode(), DstOps: {HalfTy},
6295 SrcOps: {InH, AmtExcess}); // Lo from Hi part.
6296
6297 auto Lo = MIRBuilder.buildSelect(
6298 Res: HalfTy, Tst: IsZero, Op0: InL, Op1: MIRBuilder.buildSelect(Res: HalfTy, Tst: IsShort, Op0: LoS, Op1: LoL));
6299
6300 auto Hi = MIRBuilder.buildSelect(Res: HalfTy, Tst: IsShort, Op0: HiS, Op1: HiL);
6301
6302 ResultRegs[0] = Lo.getReg(Idx: 0);
6303 ResultRegs[1] = Hi.getReg(Idx: 0);
6304 break;
6305 }
6306 default:
6307 llvm_unreachable("not a shift");
6308 }
6309
6310 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: ResultRegs);
6311 MI.eraseFromParent();
6312 return Legalized;
6313}
6314
6315Register LegalizerHelper::buildConstantShiftPart(unsigned Opcode,
6316 unsigned PartIdx,
6317 unsigned NumParts,
6318 ArrayRef<Register> SrcParts,
6319 const ShiftParams &Params,
6320 LLT TargetTy, LLT ShiftAmtTy) {
6321 auto WordShiftConst = getIConstantVRegVal(VReg: Params.WordShift, MRI);
6322 auto BitShiftConst = getIConstantVRegVal(VReg: Params.BitShift, MRI);
6323 assert(WordShiftConst && BitShiftConst && "Expected constants");
6324
6325 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6326 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6327 const bool NeedsInterWordShift = ShiftBits != 0;
6328
6329 switch (Opcode) {
6330 case TargetOpcode::G_SHL: {
6331 // Data moves from lower indices to higher indices
6332 // If this part would come from a source beyond our range, it's zero
6333 if (PartIdx < ShiftWords)
6334 return Params.Zero;
6335
6336 unsigned SrcIdx = PartIdx - ShiftWords;
6337 if (!NeedsInterWordShift)
6338 return SrcParts[SrcIdx];
6339
6340 // Combine shifted main part with carry from previous part
6341 auto Hi = MIRBuilder.buildShl(Dst: TargetTy, Src0: SrcParts[SrcIdx], Src1: Params.BitShift);
6342 if (SrcIdx > 0) {
6343 auto Lo = MIRBuilder.buildLShr(Dst: TargetTy, Src0: SrcParts[SrcIdx - 1],
6344 Src1: Params.InvBitShift);
6345 return MIRBuilder.buildOr(Dst: TargetTy, Src0: Hi, Src1: Lo).getReg(Idx: 0);
6346 }
6347 return Hi.getReg(Idx: 0);
6348 }
6349
6350 case TargetOpcode::G_LSHR: {
6351 unsigned SrcIdx = PartIdx + ShiftWords;
6352 if (SrcIdx >= NumParts)
6353 return Params.Zero;
6354 if (!NeedsInterWordShift)
6355 return SrcParts[SrcIdx];
6356
6357 // Combine shifted main part with carry from next part
6358 auto Lo = MIRBuilder.buildLShr(Dst: TargetTy, Src0: SrcParts[SrcIdx], Src1: Params.BitShift);
6359 if (SrcIdx + 1 < NumParts) {
6360 auto Hi = MIRBuilder.buildShl(Dst: TargetTy, Src0: SrcParts[SrcIdx + 1],
6361 Src1: Params.InvBitShift);
6362 return MIRBuilder.buildOr(Dst: TargetTy, Src0: Lo, Src1: Hi).getReg(Idx: 0);
6363 }
6364 return Lo.getReg(Idx: 0);
6365 }
6366
6367 case TargetOpcode::G_ASHR: {
6368 // Like LSHR but preserves sign bit
6369 unsigned SrcIdx = PartIdx + ShiftWords;
6370 if (SrcIdx >= NumParts)
6371 return Params.SignBit;
6372 if (!NeedsInterWordShift)
6373 return SrcParts[SrcIdx];
6374
6375 // Only the original MSB part uses arithmetic shift to preserve sign. All
6376 // other parts use logical shift since they're just moving data bits.
6377 auto Lo =
6378 (SrcIdx == NumParts - 1)
6379 ? MIRBuilder.buildAShr(Dst: TargetTy, Src0: SrcParts[SrcIdx], Src1: Params.BitShift)
6380 : MIRBuilder.buildLShr(Dst: TargetTy, Src0: SrcParts[SrcIdx], Src1: Params.BitShift);
6381 Register HiSrc =
6382 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.SignBit;
6383 auto Hi = MIRBuilder.buildShl(Dst: TargetTy, Src0: HiSrc, Src1: Params.InvBitShift);
6384 return MIRBuilder.buildOr(Dst: TargetTy, Src0: Lo, Src1: Hi).getReg(Idx: 0);
6385 }
6386
6387 default:
6388 llvm_unreachable("not a shift");
6389 }
6390}
6391
6392Register LegalizerHelper::buildVariableShiftPart(unsigned Opcode,
6393 Register MainOperand,
6394 Register ShiftAmt,
6395 LLT TargetTy,
6396 Register CarryOperand) {
6397 // This helper generates a single output part for variable shifts by combining
6398 // the main operand (shifted by BitShift) with carry bits from an adjacent
6399 // part.
6400
6401 // For G_ASHR, individual parts don't have their own sign bit, only the
6402 // complete value does. So we use LSHR for the main operand shift in ASHR
6403 // context.
6404 unsigned MainOpcode = (Opcode == TargetOpcode::G_ASHR)
6405 ? static_cast<unsigned>(TargetOpcode::G_LSHR)
6406 : Opcode;
6407
6408 // Perform the primary shift on the main operand
6409 Register MainShifted =
6410 MIRBuilder.buildInstr(Opc: MainOpcode, DstOps: {TargetTy}, SrcOps: {MainOperand, ShiftAmt})
6411 .getReg(Idx: 0);
6412
6413 // No carry operand available
6414 if (!CarryOperand.isValid())
6415 return MainShifted;
6416
6417 // If BitShift is 0 (word-aligned shift), no inter-word bit movement occurs,
6418 // so carry bits aren't needed.
6419 LLT ShiftAmtTy = MRI.getType(Reg: ShiftAmt);
6420 auto ZeroConst = MIRBuilder.buildConstant(Res: ShiftAmtTy, Val: 0);
6421 LLT BoolTy = LLT::scalar(SizeInBits: 1);
6422 auto IsZeroBitShift =
6423 MIRBuilder.buildICmp(Pred: ICmpInst::ICMP_EQ, Res: BoolTy, Op0: ShiftAmt, Op1: ZeroConst);
6424
6425 // Extract bits from the adjacent part that will "carry over" into this part.
6426 // The carry direction is opposite to the main shift direction, so we can
6427 // align the two shifted values before combining them with OR.
6428
6429 // Determine the carry shift opcode (opposite direction)
6430 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6431 : TargetOpcode::G_SHL;
6432
6433 // Calculate inverse shift amount: BitWidth - ShiftAmt
6434 auto TargetBitsConst =
6435 MIRBuilder.buildConstant(Res: ShiftAmtTy, Val: TargetTy.getScalarSizeInBits());
6436 auto InvShiftAmt = MIRBuilder.buildSub(Dst: ShiftAmtTy, Src0: TargetBitsConst, Src1: ShiftAmt);
6437
6438 // Shift the carry operand
6439 Register CarryBits =
6440 MIRBuilder
6441 .buildInstr(Opc: CarryOpcode, DstOps: {TargetTy}, SrcOps: {CarryOperand, InvShiftAmt})
6442 .getReg(Idx: 0);
6443
6444 // If BitShift is 0, don't include carry bits (InvShiftAmt would equal
6445 // TargetBits which would be poison for the individual carry shift operation).
6446 auto ZeroReg = MIRBuilder.buildConstant(Res: TargetTy, Val: 0);
6447 Register SafeCarryBits =
6448 MIRBuilder.buildSelect(Res: TargetTy, Tst: IsZeroBitShift, Op0: ZeroReg, Op1: CarryBits)
6449 .getReg(Idx: 0);
6450
6451 // Combine the main shifted part with the carry bits
6452 return MIRBuilder.buildOr(Dst: TargetTy, Src0: MainShifted, Src1: SafeCarryBits).getReg(Idx: 0);
6453}
6454
6455LegalizerHelper::LegalizeResult
6456LegalizerHelper::narrowScalarShiftByConstantMultiway(MachineInstr &MI,
6457 const APInt &Amt,
6458 LLT TargetTy,
6459 LLT ShiftAmtTy) {
6460 // Any wide shift can be decomposed into WordShift + BitShift components.
6461 // When shift amount is known constant, directly compute the decomposition
6462 // values and generate constant registers.
6463 Register DstReg = MI.getOperand(i: 0).getReg();
6464 Register SrcReg = MI.getOperand(i: 1).getReg();
6465 LLT DstTy = MRI.getType(Reg: DstReg);
6466
6467 const unsigned DstBits = DstTy.getScalarSizeInBits();
6468 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6469 const unsigned NumParts = DstBits / TargetBits;
6470
6471 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6472
6473 // When the shift amount is known at compile time, we just calculate which
6474 // source parts contribute to each output part.
6475
6476 SmallVector<Register, 8> SrcParts;
6477 extractParts(Reg: SrcReg, Ty: TargetTy, NumParts, VRegs&: SrcParts, MIRBuilder, MRI);
6478
6479 if (Amt.isZero()) {
6480 // No shift needed, just copy
6481 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: SrcParts);
6482 MI.eraseFromParent();
6483 return Legalized;
6484 }
6485
6486 ShiftParams Params;
6487 const unsigned ShiftWords = Amt.getZExtValue() / TargetBits;
6488 const unsigned ShiftBits = Amt.getZExtValue() % TargetBits;
6489
6490 // Generate constants and values needed by all shift types
6491 Params.WordShift = MIRBuilder.buildConstant(Res: ShiftAmtTy, Val: ShiftWords).getReg(Idx: 0);
6492 Params.BitShift = MIRBuilder.buildConstant(Res: ShiftAmtTy, Val: ShiftBits).getReg(Idx: 0);
6493 Params.InvBitShift =
6494 MIRBuilder.buildConstant(Res: ShiftAmtTy, Val: TargetBits - ShiftBits).getReg(Idx: 0);
6495 Params.Zero = MIRBuilder.buildConstant(Res: TargetTy, Val: 0).getReg(Idx: 0);
6496
6497 // For ASHR, we need the sign-extended value to fill shifted-out positions
6498 if (MI.getOpcode() == TargetOpcode::G_ASHR)
6499 Params.SignBit =
6500 MIRBuilder
6501 .buildAShr(Dst: TargetTy, Src0: SrcParts[SrcParts.size() - 1],
6502 Src1: MIRBuilder.buildConstant(Res: ShiftAmtTy, Val: TargetBits - 1))
6503 .getReg(Idx: 0);
6504
6505 SmallVector<Register, 8> DstParts(NumParts);
6506 for (unsigned I = 0; I < NumParts; ++I)
6507 DstParts[I] = buildConstantShiftPart(Opcode: MI.getOpcode(), PartIdx: I, NumParts, SrcParts,
6508 Params, TargetTy, ShiftAmtTy);
6509
6510 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: DstParts);
6511 MI.eraseFromParent();
6512 return Legalized;
6513}
6514
6515LegalizerHelper::LegalizeResult
6516LegalizerHelper::narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy) {
6517 Register DstReg = MI.getOperand(i: 0).getReg();
6518 Register SrcReg = MI.getOperand(i: 1).getReg();
6519 Register AmtReg = MI.getOperand(i: 2).getReg();
6520 LLT DstTy = MRI.getType(Reg: DstReg);
6521 LLT ShiftAmtTy = MRI.getType(Reg: AmtReg);
6522
6523 const unsigned DstBits = DstTy.getScalarSizeInBits();
6524 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6525 const unsigned NumParts = DstBits / TargetBits;
6526
6527 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6528 assert(isPowerOf2_32(TargetBits) && "Target bit width must be power of 2");
6529
6530 // If the shift amount is known at compile time, we can use direct indexing
6531 // instead of generating select chains in the general case.
6532 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: AmtReg, MRI))
6533 return narrowScalarShiftByConstantMultiway(MI, Amt: VRegAndVal->Value, TargetTy,
6534 ShiftAmtTy);
6535
6536 // For runtime-variable shift amounts, we must generate a more complex
6537 // sequence that handles all possible shift values using select chains.
6538
6539 // Split the input into target-sized pieces
6540 SmallVector<Register, 8> SrcParts;
6541 extractParts(Reg: SrcReg, Ty: TargetTy, NumParts, VRegs&: SrcParts, MIRBuilder, MRI);
6542
6543 // Shifting by zero should be a no-op.
6544 auto ZeroAmtConst = MIRBuilder.buildConstant(Res: ShiftAmtTy, Val: 0);
6545 LLT BoolTy = LLT::scalar(SizeInBits: 1);
6546 auto IsZeroShift =
6547 MIRBuilder.buildICmp(Pred: ICmpInst::ICMP_EQ, Res: BoolTy, Op0: AmtReg, Op1: ZeroAmtConst);
6548
6549 // Any wide shift can be decomposed into two components:
6550 // 1. WordShift: number of complete target-sized words to shift
6551 // 2. BitShift: number of bits to shift within each word
6552 //
6553 // Example: 128-bit >> 50 with 32-bit target:
6554 // WordShift = 50 / 32 = 1 (shift right by 1 complete word)
6555 // BitShift = 50 % 32 = 18 (shift each word right by 18 bits)
6556 unsigned TargetBitsLog2 = Log2_32(Value: TargetBits);
6557 auto TargetBitsLog2Const =
6558 MIRBuilder.buildConstant(Res: ShiftAmtTy, Val: TargetBitsLog2);
6559 auto TargetBitsMask = MIRBuilder.buildConstant(Res: ShiftAmtTy, Val: TargetBits - 1);
6560
6561 Register WordShift =
6562 MIRBuilder.buildLShr(Dst: ShiftAmtTy, Src0: AmtReg, Src1: TargetBitsLog2Const).getReg(Idx: 0);
6563 Register BitShift =
6564 MIRBuilder.buildAnd(Dst: ShiftAmtTy, Src0: AmtReg, Src1: TargetBitsMask).getReg(Idx: 0);
6565
6566 // Fill values:
6567 // - SHL/LSHR: fill with zeros
6568 // - ASHR: fill with sign-extended MSB
6569 Register ZeroReg = MIRBuilder.buildConstant(Res: TargetTy, Val: 0).getReg(Idx: 0);
6570
6571 Register FillValue;
6572 if (MI.getOpcode() == TargetOpcode::G_ASHR) {
6573 auto TargetBitsMinusOneConst =
6574 MIRBuilder.buildConstant(Res: ShiftAmtTy, Val: TargetBits - 1);
6575 FillValue = MIRBuilder
6576 .buildAShr(Dst: TargetTy, Src0: SrcParts[NumParts - 1],
6577 Src1: TargetBitsMinusOneConst)
6578 .getReg(Idx: 0);
6579 } else {
6580 FillValue = ZeroReg;
6581 }
6582
6583 SmallVector<Register, 8> DstParts(NumParts);
6584
6585 // For each output part, generate a select chain that chooses the correct
6586 // result based on the runtime WordShift value. This handles all possible
6587 // word shift amounts by pre-calculating what each would produce.
6588 for (unsigned I = 0; I < NumParts; ++I) {
6589 // Initialize with appropriate default value for this shift type
6590 Register InBoundsResult = FillValue;
6591
6592 // clang-format off
6593 // Build a branchless select chain by pre-computing results for all possible
6594 // WordShift values (0 to NumParts-1). Each iteration nests a new select:
6595 //
6596 // K=0: select(WordShift==0, result0, FillValue)
6597 // K=1: select(WordShift==1, result1, select(WordShift==0, result0, FillValue))
6598 // K=2: select(WordShift==2, result2, select(WordShift==1, result1, select(...)))
6599 // clang-format on
6600 for (unsigned K = 0; K < NumParts; ++K) {
6601 auto WordShiftKConst = MIRBuilder.buildConstant(Res: ShiftAmtTy, Val: K);
6602 auto IsWordShiftK = MIRBuilder.buildICmp(Pred: ICmpInst::ICMP_EQ, Res: BoolTy,
6603 Op0: WordShift, Op1: WordShiftKConst);
6604
6605 // Calculate source indices for this word shift
6606 //
6607 // For 4-part 128-bit value with K=1 word shift:
6608 // SHL: [3][2][1][0] << K => [2][1][0][Z]
6609 // -> (MainIdx = I-K, CarryIdx = I-K-1)
6610 // LSHR: [3][2][1][0] >> K => [Z][3][2][1]
6611 // -> (MainIdx = I+K, CarryIdx = I+K+1)
6612 int MainSrcIdx;
6613 int CarrySrcIdx; // Index for the word that provides the carried-in bits.
6614
6615 switch (MI.getOpcode()) {
6616 case TargetOpcode::G_SHL:
6617 MainSrcIdx = (int)I - (int)K;
6618 CarrySrcIdx = MainSrcIdx - 1;
6619 break;
6620 case TargetOpcode::G_LSHR:
6621 case TargetOpcode::G_ASHR:
6622 MainSrcIdx = (int)I + (int)K;
6623 CarrySrcIdx = MainSrcIdx + 1;
6624 break;
6625 default:
6626 llvm_unreachable("Not a shift");
6627 }
6628
6629 // Check bounds and build the result for this word shift
6630 Register ResultForK;
6631 if (MainSrcIdx >= 0 && MainSrcIdx < (int)NumParts) {
6632 Register MainOp = SrcParts[MainSrcIdx];
6633 Register CarryOp;
6634
6635 // Determine carry operand with bounds checking
6636 if (CarrySrcIdx >= 0 && CarrySrcIdx < (int)NumParts)
6637 CarryOp = SrcParts[CarrySrcIdx];
6638 else if (MI.getOpcode() == TargetOpcode::G_ASHR &&
6639 CarrySrcIdx >= (int)NumParts)
6640 CarryOp = FillValue; // Use sign extension
6641
6642 ResultForK = buildVariableShiftPart(Opcode: MI.getOpcode(), MainOperand: MainOp, ShiftAmt: BitShift,
6643 TargetTy, CarryOperand: CarryOp);
6644 } else {
6645 // Out of bounds - use fill value for this k
6646 ResultForK = FillValue;
6647 }
6648
6649 // Select this result if WordShift equals k
6650 InBoundsResult =
6651 MIRBuilder
6652 .buildSelect(Res: TargetTy, Tst: IsWordShiftK, Op0: ResultForK, Op1: InBoundsResult)
6653 .getReg(Idx: 0);
6654 }
6655
6656 // Handle zero-shift special case: if shift is 0, use original input
6657 DstParts[I] =
6658 MIRBuilder
6659 .buildSelect(Res: TargetTy, Tst: IsZeroShift, Op0: SrcParts[I], Op1: InBoundsResult)
6660 .getReg(Idx: 0);
6661 }
6662
6663 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: DstParts);
6664 MI.eraseFromParent();
6665 return Legalized;
6666}
6667
6668LegalizerHelper::LegalizeResult
6669LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
6670 LLT MoreTy) {
6671 assert(TypeIdx == 0 && "Expecting only Idx 0");
6672
6673 Observer.changingInstr(MI);
6674 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
6675 MachineBasicBlock &OpMBB = *MI.getOperand(i: I + 1).getMBB();
6676 MIRBuilder.setInsertPt(MBB&: OpMBB, II: OpMBB.getFirstTerminator());
6677 moreElementsVectorSrc(MI, MoreTy, OpIdx: I);
6678 }
6679
6680 MachineBasicBlock &MBB = *MI.getParent();
6681 MIRBuilder.setInsertPt(MBB, II: --MBB.getFirstNonPHI());
6682 moreElementsVectorDst(MI, WideTy: MoreTy, OpIdx: 0);
6683 Observer.changedInstr(MI);
6684 return Legalized;
6685}
6686
6687MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
6688 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
6689 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
6690
6691 switch (Opcode) {
6692 default:
6693 llvm_unreachable(
6694 "getNeutralElementForVecReduce called with invalid opcode!");
6695 case TargetOpcode::G_VECREDUCE_ADD:
6696 case TargetOpcode::G_VECREDUCE_OR:
6697 case TargetOpcode::G_VECREDUCE_XOR:
6698 case TargetOpcode::G_VECREDUCE_UMAX:
6699 return MIRBuilder.buildConstant(Res: Ty, Val: 0);
6700 case TargetOpcode::G_VECREDUCE_MUL:
6701 return MIRBuilder.buildConstant(Res: Ty, Val: 1);
6702 case TargetOpcode::G_VECREDUCE_AND:
6703 case TargetOpcode::G_VECREDUCE_UMIN:
6704 return MIRBuilder.buildConstant(
6705 Res: Ty, Val: APInt::getAllOnes(numBits: Ty.getScalarSizeInBits()));
6706 case TargetOpcode::G_VECREDUCE_SMAX:
6707 return MIRBuilder.buildConstant(
6708 Res: Ty, Val: APInt::getSignedMinValue(numBits: Ty.getSizeInBits()));
6709 case TargetOpcode::G_VECREDUCE_SMIN:
6710 return MIRBuilder.buildConstant(
6711 Res: Ty, Val: APInt::getSignedMaxValue(numBits: Ty.getSizeInBits()));
6712 case TargetOpcode::G_VECREDUCE_FADD:
6713 return MIRBuilder.buildFConstant(Res: Ty, Val: -0.0);
6714 case TargetOpcode::G_VECREDUCE_FMUL:
6715 return MIRBuilder.buildFConstant(Res: Ty, Val: 1.0);
6716 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6717 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6718 assert(false && "getNeutralElementForVecReduce unimplemented for "
6719 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6720 }
6721 llvm_unreachable("switch expected to return!");
6722}
6723
6724LegalizerHelper::LegalizeResult
6725LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
6726 LLT MoreTy) {
6727 unsigned Opc = MI.getOpcode();
6728 switch (Opc) {
6729 case TargetOpcode::G_IMPLICIT_DEF:
6730 case TargetOpcode::G_LOAD: {
6731 if (TypeIdx != 0)
6732 return UnableToLegalize;
6733 Observer.changingInstr(MI);
6734 moreElementsVectorDst(MI, WideTy: MoreTy, OpIdx: 0);
6735 Observer.changedInstr(MI);
6736 return Legalized;
6737 }
6738 case TargetOpcode::G_STORE:
6739 if (TypeIdx != 0)
6740 return UnableToLegalize;
6741 Observer.changingInstr(MI);
6742 moreElementsVectorSrc(MI, MoreTy, OpIdx: 0);
6743 Observer.changedInstr(MI);
6744 return Legalized;
6745 case TargetOpcode::G_AND:
6746 case TargetOpcode::G_OR:
6747 case TargetOpcode::G_XOR:
6748 case TargetOpcode::G_ADD:
6749 case TargetOpcode::G_SUB:
6750 case TargetOpcode::G_MUL:
6751 case TargetOpcode::G_FADD:
6752 case TargetOpcode::G_FSUB:
6753 case TargetOpcode::G_FMUL:
6754 case TargetOpcode::G_FDIV:
6755 case TargetOpcode::G_FCOPYSIGN:
6756 case TargetOpcode::G_UADDSAT:
6757 case TargetOpcode::G_USUBSAT:
6758 case TargetOpcode::G_SADDSAT:
6759 case TargetOpcode::G_SSUBSAT:
6760 case TargetOpcode::G_SMIN:
6761 case TargetOpcode::G_SMAX:
6762 case TargetOpcode::G_UMIN:
6763 case TargetOpcode::G_UMAX:
6764 case TargetOpcode::G_FMINNUM:
6765 case TargetOpcode::G_FMAXNUM:
6766 case TargetOpcode::G_FMINNUM_IEEE:
6767 case TargetOpcode::G_FMAXNUM_IEEE:
6768 case TargetOpcode::G_FMINIMUM:
6769 case TargetOpcode::G_FMAXIMUM:
6770 case TargetOpcode::G_FMINIMUMNUM:
6771 case TargetOpcode::G_FMAXIMUMNUM:
6772 case TargetOpcode::G_STRICT_FADD:
6773 case TargetOpcode::G_STRICT_FSUB:
6774 case TargetOpcode::G_STRICT_FMUL: {
6775 Observer.changingInstr(MI);
6776 moreElementsVectorSrc(MI, MoreTy, OpIdx: 1);
6777 moreElementsVectorSrc(MI, MoreTy, OpIdx: 2);
6778 moreElementsVectorDst(MI, WideTy: MoreTy, OpIdx: 0);
6779 Observer.changedInstr(MI);
6780 return Legalized;
6781 }
6782 case TargetOpcode::G_SHL:
6783 case TargetOpcode::G_ASHR:
6784 case TargetOpcode::G_LSHR: {
6785 Observer.changingInstr(MI);
6786 moreElementsVectorSrc(MI, MoreTy, OpIdx: 1);
6787 // The shift operand may have a different scalar type from the source and
6788 // destination operands.
6789 LLT ShiftMoreTy = MoreTy.changeElementType(
6790 NewEltTy: MRI.getType(Reg: MI.getOperand(i: 2).getReg()).getElementType());
6791 moreElementsVectorSrc(MI, MoreTy: ShiftMoreTy, OpIdx: 2);
6792 moreElementsVectorDst(MI, WideTy: MoreTy, OpIdx: 0);
6793 Observer.changedInstr(MI);
6794 return Legalized;
6795 }
6796 case TargetOpcode::G_FMA:
6797 case TargetOpcode::G_STRICT_FMA:
6798 case TargetOpcode::G_FSHR:
6799 case TargetOpcode::G_FSHL: {
6800 Observer.changingInstr(MI);
6801 moreElementsVectorSrc(MI, MoreTy, OpIdx: 1);
6802 moreElementsVectorSrc(MI, MoreTy, OpIdx: 2);
6803 moreElementsVectorSrc(MI, MoreTy, OpIdx: 3);
6804 moreElementsVectorDst(MI, WideTy: MoreTy, OpIdx: 0);
6805 Observer.changedInstr(MI);
6806 return Legalized;
6807 }
6808 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6809 case TargetOpcode::G_EXTRACT:
6810 if (TypeIdx != 1)
6811 return UnableToLegalize;
6812 Observer.changingInstr(MI);
6813 moreElementsVectorSrc(MI, MoreTy, OpIdx: 1);
6814 Observer.changedInstr(MI);
6815 return Legalized;
6816 case TargetOpcode::G_INSERT:
6817 case TargetOpcode::G_INSERT_VECTOR_ELT:
6818 case TargetOpcode::G_FREEZE:
6819 case TargetOpcode::G_FNEG:
6820 case TargetOpcode::G_FABS:
6821 case TargetOpcode::G_FSQRT:
6822 case TargetOpcode::G_FCEIL:
6823 case TargetOpcode::G_FFLOOR:
6824 case TargetOpcode::G_FNEARBYINT:
6825 case TargetOpcode::G_FRINT:
6826 case TargetOpcode::G_INTRINSIC_ROUND:
6827 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6828 case TargetOpcode::G_INTRINSIC_TRUNC:
6829 case TargetOpcode::G_BITREVERSE:
6830 case TargetOpcode::G_BSWAP:
6831 case TargetOpcode::G_FCANONICALIZE:
6832 case TargetOpcode::G_SEXT_INREG:
6833 case TargetOpcode::G_ABS:
6834 case TargetOpcode::G_CTLZ:
6835 case TargetOpcode::G_CTPOP:
6836 if (TypeIdx != 0)
6837 return UnableToLegalize;
6838 Observer.changingInstr(MI);
6839 moreElementsVectorSrc(MI, MoreTy, OpIdx: 1);
6840 moreElementsVectorDst(MI, WideTy: MoreTy, OpIdx: 0);
6841 Observer.changedInstr(MI);
6842 return Legalized;
6843 case TargetOpcode::G_SELECT: {
6844 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
6845 if (TypeIdx == 1) {
6846 if (!CondTy.isScalar() ||
6847 DstTy.getElementCount() != MoreTy.getElementCount())
6848 return UnableToLegalize;
6849
6850 // This is turning a scalar select of vectors into a vector
6851 // select. Broadcast the select condition.
6852 auto ShufSplat = MIRBuilder.buildShuffleSplat(Res: MoreTy, Src: CondReg);
6853 Observer.changingInstr(MI);
6854 MI.getOperand(i: 1).setReg(ShufSplat.getReg(Idx: 0));
6855 Observer.changedInstr(MI);
6856 return Legalized;
6857 }
6858
6859 if (CondTy.isVector())
6860 return UnableToLegalize;
6861
6862 Observer.changingInstr(MI);
6863 moreElementsVectorSrc(MI, MoreTy, OpIdx: 2);
6864 moreElementsVectorSrc(MI, MoreTy, OpIdx: 3);
6865 moreElementsVectorDst(MI, WideTy: MoreTy, OpIdx: 0);
6866 Observer.changedInstr(MI);
6867 return Legalized;
6868 }
6869 case TargetOpcode::G_UNMERGE_VALUES:
6870 return UnableToLegalize;
6871 case TargetOpcode::G_PHI:
6872 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
6873 case TargetOpcode::G_SHUFFLE_VECTOR:
6874 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
6875 case TargetOpcode::G_BUILD_VECTOR: {
6876 SmallVector<SrcOp, 8> Elts;
6877 for (auto Op : MI.uses()) {
6878 Elts.push_back(Elt: Op.getReg());
6879 }
6880
6881 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6882 Elts.push_back(Elt: MIRBuilder.buildUndef(Res: MoreTy.getScalarType()));
6883 }
6884
6885 MIRBuilder.buildDeleteTrailingVectorElements(
6886 Res: MI.getOperand(i: 0).getReg(), Op0: MIRBuilder.buildInstr(Opc, DstOps: {MoreTy}, SrcOps: Elts));
6887 MI.eraseFromParent();
6888 return Legalized;
6889 }
6890 case TargetOpcode::G_SEXT:
6891 case TargetOpcode::G_ZEXT:
6892 case TargetOpcode::G_ANYEXT:
6893 case TargetOpcode::G_TRUNC:
6894 case TargetOpcode::G_FPTRUNC:
6895 case TargetOpcode::G_FPEXT:
6896 case TargetOpcode::G_FPTOSI:
6897 case TargetOpcode::G_FPTOUI:
6898 case TargetOpcode::G_FPTOSI_SAT:
6899 case TargetOpcode::G_FPTOUI_SAT:
6900 case TargetOpcode::G_SITOFP:
6901 case TargetOpcode::G_UITOFP: {
6902 Observer.changingInstr(MI);
6903 LLT SrcExtTy;
6904 LLT DstExtTy;
6905 if (TypeIdx == 0) {
6906 DstExtTy = MoreTy;
6907 SrcExtTy = MoreTy.changeElementType(
6908 NewEltTy: MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getElementType());
6909 } else {
6910 DstExtTy = MoreTy.changeElementType(
6911 NewEltTy: MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getElementType());
6912 SrcExtTy = MoreTy;
6913 }
6914 moreElementsVectorSrc(MI, MoreTy: SrcExtTy, OpIdx: 1);
6915 moreElementsVectorDst(MI, WideTy: DstExtTy, OpIdx: 0);
6916 Observer.changedInstr(MI);
6917 return Legalized;
6918 }
6919 case TargetOpcode::G_ICMP:
6920 case TargetOpcode::G_FCMP: {
6921 if (TypeIdx != 1)
6922 return UnableToLegalize;
6923
6924 Observer.changingInstr(MI);
6925 moreElementsVectorSrc(MI, MoreTy, OpIdx: 2);
6926 moreElementsVectorSrc(MI, MoreTy, OpIdx: 3);
6927 LLT CondTy = MoreTy.changeVectorElementType(
6928 NewEltTy: MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getElementType());
6929 moreElementsVectorDst(MI, WideTy: CondTy, OpIdx: 0);
6930 Observer.changedInstr(MI);
6931 return Legalized;
6932 }
6933 case TargetOpcode::G_BITCAST: {
6934 if (TypeIdx != 0)
6935 return UnableToLegalize;
6936
6937 LLT SrcTy = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
6938 LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
6939
6940 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6941 if (coefficient % DstTy.getNumElements() != 0)
6942 return UnableToLegalize;
6943
6944 coefficient = coefficient / DstTy.getNumElements();
6945
6946 LLT NewTy = SrcTy.changeElementCount(
6947 EC: ElementCount::get(MinVal: coefficient, Scalable: MoreTy.isScalable()));
6948 Observer.changingInstr(MI);
6949 moreElementsVectorSrc(MI, MoreTy: NewTy, OpIdx: 1);
6950 moreElementsVectorDst(MI, WideTy: MoreTy, OpIdx: 0);
6951 Observer.changedInstr(MI);
6952 return Legalized;
6953 }
6954 case TargetOpcode::G_VECREDUCE_FADD:
6955 case TargetOpcode::G_VECREDUCE_FMUL:
6956 case TargetOpcode::G_VECREDUCE_ADD:
6957 case TargetOpcode::G_VECREDUCE_MUL:
6958 case TargetOpcode::G_VECREDUCE_AND:
6959 case TargetOpcode::G_VECREDUCE_OR:
6960 case TargetOpcode::G_VECREDUCE_XOR:
6961 case TargetOpcode::G_VECREDUCE_SMAX:
6962 case TargetOpcode::G_VECREDUCE_SMIN:
6963 case TargetOpcode::G_VECREDUCE_UMAX:
6964 case TargetOpcode::G_VECREDUCE_UMIN: {
6965 LLT OrigTy = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
6966 MachineOperand &MO = MI.getOperand(i: 1);
6967 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(Res: MoreTy, Op0: MO);
6968 auto NeutralElement = getNeutralElementForVecReduce(
6969 Opcode: MI.getOpcode(), MIRBuilder, Ty: MoreTy.getElementType());
6970
6971 LLT IdxTy(TLI.getVectorIdxLLT(DL: MIRBuilder.getDataLayout()));
6972 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6973 i != e; i++) {
6974 auto Idx = MIRBuilder.buildConstant(Res: IdxTy, Val: i);
6975 NewVec = MIRBuilder.buildInsertVectorElement(Res: MoreTy, Val: NewVec,
6976 Elt: NeutralElement, Idx);
6977 }
6978
6979 Observer.changingInstr(MI);
6980 MO.setReg(NewVec.getReg(Idx: 0));
6981 Observer.changedInstr(MI);
6982 return Legalized;
6983 }
6984
6985 default:
6986 return UnableToLegalize;
6987 }
6988}
6989
6990LegalizerHelper::LegalizeResult
6991LegalizerHelper::equalizeVectorShuffleLengths(MachineInstr &MI) {
6992 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6993 ArrayRef<int> Mask = MI.getOperand(i: 3).getShuffleMask();
6994 unsigned MaskNumElts = Mask.size();
6995 unsigned SrcNumElts = SrcTy.getNumElements();
6996 LLT DestEltTy = DstTy.getElementType();
6997
6998 if (MaskNumElts == SrcNumElts)
6999 return Legalized;
7000
7001 if (MaskNumElts < SrcNumElts) {
7002 // Extend mask to match new destination vector size with
7003 // undef values.
7004 SmallVector<int, 16> NewMask(SrcNumElts, -1);
7005 llvm::copy(Range&: Mask, Out: NewMask.begin());
7006
7007 moreElementsVectorDst(MI, WideTy: SrcTy, OpIdx: 0);
7008 MIRBuilder.setInstrAndDebugLoc(MI);
7009 MIRBuilder.buildShuffleVector(Res: MI.getOperand(i: 0).getReg(),
7010 Src1: MI.getOperand(i: 1).getReg(),
7011 Src2: MI.getOperand(i: 2).getReg(), Mask: NewMask);
7012 MI.eraseFromParent();
7013
7014 return Legalized;
7015 }
7016
7017 unsigned PaddedMaskNumElts = alignTo(Value: MaskNumElts, Align: SrcNumElts);
7018 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
7019 LLT PaddedTy =
7020 DstTy.changeVectorElementCount(EC: ElementCount::getFixed(MinVal: PaddedMaskNumElts));
7021
7022 // Create new source vectors by concatenating the initial
7023 // source vectors with undefined vectors of the same size.
7024 auto Undef = MIRBuilder.buildUndef(Res: SrcTy);
7025 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(Idx: 0));
7026 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(Idx: 0));
7027 MOps1[0] = MI.getOperand(i: 1).getReg();
7028 MOps2[0] = MI.getOperand(i: 2).getReg();
7029
7030 auto Src1 = MIRBuilder.buildConcatVectors(Res: PaddedTy, Ops: MOps1);
7031 auto Src2 = MIRBuilder.buildConcatVectors(Res: PaddedTy, Ops: MOps2);
7032
7033 // Readjust mask for new input vector length.
7034 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
7035 for (unsigned I = 0; I != MaskNumElts; ++I) {
7036 int Idx = Mask[I];
7037 if (Idx >= static_cast<int>(SrcNumElts))
7038 Idx += PaddedMaskNumElts - SrcNumElts;
7039 MappedOps[I] = Idx;
7040 }
7041
7042 // If we got more elements than required, extract subvector.
7043 if (MaskNumElts != PaddedMaskNumElts) {
7044 auto Shuffle =
7045 MIRBuilder.buildShuffleVector(Res: PaddedTy, Src1, Src2, Mask: MappedOps);
7046
7047 SmallVector<Register, 16> Elts(MaskNumElts);
7048 for (unsigned I = 0; I < MaskNumElts; ++I) {
7049 Elts[I] =
7050 MIRBuilder.buildExtractVectorElementConstant(Res: DestEltTy, Val: Shuffle, Idx: I)
7051 .getReg(Idx: 0);
7052 }
7053 MIRBuilder.buildBuildVector(Res: DstReg, Ops: Elts);
7054 } else {
7055 MIRBuilder.buildShuffleVector(Res: DstReg, Src1, Src2, Mask: MappedOps);
7056 }
7057
7058 MI.eraseFromParent();
7059 return LegalizerHelper::LegalizeResult::Legalized;
7060}
7061
7062LegalizerHelper::LegalizeResult
7063LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI,
7064 unsigned int TypeIdx, LLT MoreTy) {
7065 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
7066 ArrayRef<int> Mask = MI.getOperand(i: 3).getShuffleMask();
7067 unsigned NumElts = DstTy.getNumElements();
7068 unsigned WidenNumElts = MoreTy.getNumElements();
7069
7070 if (DstTy.isVector() && Src1Ty.isVector() &&
7071 DstTy.getNumElements() != Src1Ty.getNumElements()) {
7072 return equalizeVectorShuffleLengths(MI);
7073 }
7074
7075 if (TypeIdx != 0)
7076 return UnableToLegalize;
7077
7078 // Expect a canonicalized shuffle.
7079 if (DstTy != Src1Ty || DstTy != Src2Ty)
7080 return UnableToLegalize;
7081
7082 moreElementsVectorSrc(MI, MoreTy, OpIdx: 1);
7083 moreElementsVectorSrc(MI, MoreTy, OpIdx: 2);
7084
7085 // Adjust mask based on new input vector length.
7086 SmallVector<int, 16> NewMask(WidenNumElts, -1);
7087 for (unsigned I = 0; I != NumElts; ++I) {
7088 int Idx = Mask[I];
7089 if (Idx < static_cast<int>(NumElts))
7090 NewMask[I] = Idx;
7091 else
7092 NewMask[I] = Idx - NumElts + WidenNumElts;
7093 }
7094 moreElementsVectorDst(MI, WideTy: MoreTy, OpIdx: 0);
7095 MIRBuilder.setInstrAndDebugLoc(MI);
7096 MIRBuilder.buildShuffleVector(Res: MI.getOperand(i: 0).getReg(),
7097 Src1: MI.getOperand(i: 1).getReg(),
7098 Src2: MI.getOperand(i: 2).getReg(), Mask: NewMask);
7099 MI.eraseFromParent();
7100 return Legalized;
7101}
7102
7103void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
7104 ArrayRef<Register> Src1Regs,
7105 ArrayRef<Register> Src2Regs,
7106 LLT NarrowTy) {
7107 MachineIRBuilder &B = MIRBuilder;
7108 unsigned SrcParts = Src1Regs.size();
7109 unsigned DstParts = DstRegs.size();
7110
7111 unsigned DstIdx = 0; // Low bits of the result.
7112 Register FactorSum =
7113 B.buildMul(Dst: NarrowTy, Src0: Src1Regs[DstIdx], Src1: Src2Regs[DstIdx]).getReg(Idx: 0);
7114 DstRegs[DstIdx] = FactorSum;
7115
7116 Register CarrySumPrevDstIdx;
7117 SmallVector<Register, 4> Factors;
7118
7119 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7120 // Collect high parts of muls from previous DstIdx.
7121 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7122 i <= std::min(a: DstIdx - 1, b: SrcParts - 1); ++i) {
7123 MachineInstrBuilder Umulh =
7124 B.buildUMulH(Dst: NarrowTy, Src0: Src1Regs[DstIdx - 1 - i], Src1: Src2Regs[i]);
7125 Factors.push_back(Elt: Umulh.getReg(Idx: 0));
7126 }
7127 // Collect low parts of muls for DstIdx. Visit the diagonal starting with
7128 // the low Src1 part, so multiply-add selectors can use it as the first
7129 // accumulated cross product.
7130 unsigned LowStart = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7131 unsigned LowEnd = std::min(a: DstIdx, b: SrcParts - 1);
7132 for (unsigned RevI = LowEnd + 1; RevI != LowStart; --RevI) {
7133 unsigned i = RevI - 1;
7134 MachineInstrBuilder Mul =
7135 B.buildMul(Dst: NarrowTy, Src0: Src1Regs[DstIdx - i], Src1: Src2Regs[i]);
7136 Factors.push_back(Elt: Mul.getReg(Idx: 0));
7137 }
7138 // Add CarrySum from additions calculated for previous DstIdx.
7139 if (DstIdx != 1) {
7140 Factors.push_back(Elt: CarrySumPrevDstIdx);
7141 }
7142
7143 Register CarrySum;
7144 // Add all factors and accumulate all carries into CarrySum.
7145 if (DstIdx != DstParts - 1) {
7146 MachineInstrBuilder Uaddo =
7147 B.buildUAddo(Res: NarrowTy, CarryOut: LLT::integer(SizeInBits: 1), Op0: Factors[0], Op1: Factors[1]);
7148 FactorSum = Uaddo.getReg(Idx: 0);
7149 CarrySum = B.buildZExt(Res: NarrowTy, Op: Uaddo.getReg(Idx: 1)).getReg(Idx: 0);
7150 for (unsigned i = 2; i < Factors.size(); ++i) {
7151 MachineInstrBuilder Uaddo =
7152 B.buildUAddo(Res: NarrowTy, CarryOut: LLT::integer(SizeInBits: 1), Op0: FactorSum, Op1: Factors[i]);
7153 FactorSum = Uaddo.getReg(Idx: 0);
7154 MachineInstrBuilder Carry = B.buildZExt(Res: NarrowTy, Op: Uaddo.getReg(Idx: 1));
7155 CarrySum = B.buildAdd(Dst: NarrowTy, Src0: CarrySum, Src1: Carry).getReg(Idx: 0);
7156 }
7157 } else {
7158 // Since value for the next index is not calculated, neither is CarrySum.
7159 FactorSum = B.buildAdd(Dst: NarrowTy, Src0: Factors[0], Src1: Factors[1]).getReg(Idx: 0);
7160 for (unsigned i = 2; i < Factors.size(); ++i)
7161 FactorSum = B.buildAdd(Dst: NarrowTy, Src0: FactorSum, Src1: Factors[i]).getReg(Idx: 0);
7162 }
7163
7164 CarrySumPrevDstIdx = CarrySum;
7165 DstRegs[DstIdx] = FactorSum;
7166 Factors.clear();
7167 }
7168}
7169
7170LegalizerHelper::LegalizeResult
7171LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
7172 LLT NarrowTy) {
7173 if (TypeIdx != 0)
7174 return UnableToLegalize;
7175
7176 Register DstReg = MI.getOperand(i: 0).getReg();
7177 LLT DstType = MRI.getType(Reg: DstReg);
7178 // FIXME: add support for vector types
7179 if (DstType.isVector())
7180 return UnableToLegalize;
7181
7182 unsigned Opcode = MI.getOpcode();
7183 unsigned OpO, OpE, OpF;
7184 switch (Opcode) {
7185 case TargetOpcode::G_SADDO:
7186 case TargetOpcode::G_SADDE:
7187 case TargetOpcode::G_UADDO:
7188 case TargetOpcode::G_UADDE:
7189 case TargetOpcode::G_ADD:
7190 OpO = TargetOpcode::G_UADDO;
7191 OpE = TargetOpcode::G_UADDE;
7192 OpF = TargetOpcode::G_UADDE;
7193 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7194 OpF = TargetOpcode::G_SADDE;
7195 break;
7196 case TargetOpcode::G_SSUBO:
7197 case TargetOpcode::G_SSUBE:
7198 case TargetOpcode::G_USUBO:
7199 case TargetOpcode::G_USUBE:
7200 case TargetOpcode::G_SUB:
7201 OpO = TargetOpcode::G_USUBO;
7202 OpE = TargetOpcode::G_USUBE;
7203 OpF = TargetOpcode::G_USUBE;
7204 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7205 OpF = TargetOpcode::G_SSUBE;
7206 break;
7207 default:
7208 llvm_unreachable("Unexpected add/sub opcode!");
7209 }
7210
7211 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
7212 unsigned NumDefs = MI.getNumExplicitDefs();
7213 Register Src1 = MI.getOperand(i: NumDefs).getReg();
7214 Register Src2 = MI.getOperand(i: NumDefs + 1).getReg();
7215 Register CarryDst, CarryIn;
7216 if (NumDefs == 2)
7217 CarryDst = MI.getOperand(i: 1).getReg();
7218 if (MI.getNumOperands() == NumDefs + 3)
7219 CarryIn = MI.getOperand(i: NumDefs + 2).getReg();
7220
7221 LLT RegTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
7222 LLT LeftoverTy, DummyTy;
7223 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
7224 extractParts(Reg: Src1, RegTy, MainTy: NarrowTy, LeftoverTy, VRegs&: Src1Regs, LeftoverVRegs&: Src1Left,
7225 MIRBuilder, MRI);
7226 extractParts(Reg: Src2, RegTy, MainTy: NarrowTy, LeftoverTy&: DummyTy, VRegs&: Src2Regs, LeftoverVRegs&: Src2Left, MIRBuilder,
7227 MRI);
7228
7229 int NarrowParts = Src1Regs.size();
7230 Src1Regs.append(RHS: Src1Left);
7231 Src2Regs.append(RHS: Src2Left);
7232 DstRegs.reserve(N: Src1Regs.size());
7233
7234 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
7235 Register DstReg =
7236 MRI.createGenericVirtualRegister(Ty: MRI.getType(Reg: Src1Regs[i]));
7237 Register CarryOut;
7238 // Forward the final carry-out to the destination register
7239 if (i == e - 1 && CarryDst)
7240 CarryOut = CarryDst;
7241 else
7242 CarryOut = MRI.createGenericVirtualRegister(Ty: LLT::integer(SizeInBits: 1));
7243
7244 if (!CarryIn) {
7245 MIRBuilder.buildInstr(Opc: OpO, DstOps: {DstReg, CarryOut},
7246 SrcOps: {Src1Regs[i], Src2Regs[i]});
7247 } else if (i == e - 1) {
7248 MIRBuilder.buildInstr(Opc: OpF, DstOps: {DstReg, CarryOut},
7249 SrcOps: {Src1Regs[i], Src2Regs[i], CarryIn});
7250 } else {
7251 MIRBuilder.buildInstr(Opc: OpE, DstOps: {DstReg, CarryOut},
7252 SrcOps: {Src1Regs[i], Src2Regs[i], CarryIn});
7253 }
7254
7255 DstRegs.push_back(Elt: DstReg);
7256 CarryIn = CarryOut;
7257 }
7258 insertParts(DstReg: MI.getOperand(i: 0).getReg(), ResultTy: RegTy, PartTy: NarrowTy,
7259 PartRegs: ArrayRef(DstRegs).take_front(N: NarrowParts), LeftoverTy,
7260 LeftoverRegs: ArrayRef(DstRegs).drop_front(N: NarrowParts));
7261
7262 MI.eraseFromParent();
7263 return Legalized;
7264}
7265
7266LegalizerHelper::LegalizeResult
7267LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
7268 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
7269
7270 LLT Ty = MRI.getType(Reg: DstReg);
7271 if (Ty.isVector())
7272 return UnableToLegalize;
7273
7274 unsigned Size = Ty.getSizeInBits();
7275 unsigned NarrowSize = NarrowTy.getSizeInBits();
7276 if (Size % NarrowSize != 0)
7277 return UnableToLegalize;
7278
7279 unsigned NumParts = Size / NarrowSize;
7280 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
7281 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7282
7283 SmallVector<Register, 2> Src1Parts, Src2Parts;
7284 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
7285 extractParts(Reg: Src1, Ty: NarrowTy, NumParts, VRegs&: Src1Parts, MIRBuilder, MRI);
7286 extractParts(Reg: Src2, Ty: NarrowTy, NumParts, VRegs&: Src2Parts, MIRBuilder, MRI);
7287 multiplyRegisters(DstRegs&: DstTmpRegs, Src1Regs: Src1Parts, Src2Regs: Src2Parts, NarrowTy);
7288
7289 // Take only high half of registers if this is high mul.
7290 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
7291 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: DstRegs);
7292 MI.eraseFromParent();
7293 return Legalized;
7294}
7295
7296LegalizerHelper::LegalizeResult
7297LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
7298 LLT NarrowTy) {
7299 if (TypeIdx != 0)
7300 return UnableToLegalize;
7301
7302 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
7303
7304 Register Src = MI.getOperand(i: 1).getReg();
7305 LLT SrcTy = MRI.getType(Reg: Src);
7306
7307 // If all finite floats fit into the narrowed integer type, we can just swap
7308 // out the result type. This is practically only useful for conversions from
7309 // half to at least 16-bits, so just handle the one case.
7310 if (SrcTy.getScalarType() != LLT::scalar(SizeInBits: 16) ||
7311 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
7312 return UnableToLegalize;
7313
7314 Observer.changingInstr(MI);
7315 narrowScalarDst(MI, NarrowTy, OpIdx: 0,
7316 ExtOpcode: IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7317 Observer.changedInstr(MI);
7318 return Legalized;
7319}
7320
7321LegalizerHelper::LegalizeResult
7322LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
7323 LLT NarrowTy) {
7324 if (TypeIdx != 1)
7325 return UnableToLegalize;
7326
7327 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7328
7329 int64_t SizeOp1 = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits();
7330 // FIXME: add support for when SizeOp1 isn't an exact multiple of
7331 // NarrowSize.
7332 if (SizeOp1 % NarrowSize != 0)
7333 return UnableToLegalize;
7334 int NumParts = SizeOp1 / NarrowSize;
7335
7336 SmallVector<Register, 2> SrcRegs, DstRegs;
7337 extractParts(Reg: MI.getOperand(i: 1).getReg(), Ty: NarrowTy, NumParts, VRegs&: SrcRegs,
7338 MIRBuilder, MRI);
7339
7340 Register OpReg = MI.getOperand(i: 0).getReg();
7341 uint64_t OpStart = MI.getOperand(i: 2).getImm();
7342 uint64_t OpSize = MRI.getType(Reg: OpReg).getSizeInBits();
7343 for (int i = 0; i < NumParts; ++i) {
7344 unsigned SrcStart = i * NarrowSize;
7345
7346 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7347 // No part of the extract uses this subregister, ignore it.
7348 continue;
7349 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(Reg: OpReg)) {
7350 // The entire subregister is extracted, forward the value.
7351 DstRegs.push_back(Elt: SrcRegs[i]);
7352 continue;
7353 }
7354
7355 // OpSegStart is where this destination segment would start in OpReg if it
7356 // extended infinitely in both directions.
7357 int64_t ExtractOffset;
7358 uint64_t SegSize;
7359 if (OpStart < SrcStart) {
7360 ExtractOffset = 0;
7361 SegSize = std::min(a: NarrowSize, b: OpStart + OpSize - SrcStart);
7362 } else {
7363 ExtractOffset = OpStart - SrcStart;
7364 SegSize = std::min(a: SrcStart + NarrowSize - OpStart, b: OpSize);
7365 }
7366
7367 Register SegReg = SrcRegs[i];
7368 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7369 // A genuine extract is needed.
7370 SegReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: SegSize));
7371 MIRBuilder.buildExtract(Res: SegReg, Src: SrcRegs[i], Index: ExtractOffset);
7372 }
7373
7374 DstRegs.push_back(Elt: SegReg);
7375 }
7376
7377 Register DstReg = MI.getOperand(i: 0).getReg();
7378 if (MRI.getType(Reg: DstReg).isVector())
7379 MIRBuilder.buildBuildVector(Res: DstReg, Ops: DstRegs);
7380 else if (DstRegs.size() > 1)
7381 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: DstRegs);
7382 else
7383 MIRBuilder.buildCopy(Res: DstReg, Op: DstRegs[0]);
7384 MI.eraseFromParent();
7385 return Legalized;
7386}
7387
7388LegalizerHelper::LegalizeResult
7389LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
7390 LLT NarrowTy) {
7391 // FIXME: Don't know how to handle secondary types yet.
7392 if (TypeIdx != 0)
7393 return UnableToLegalize;
7394
7395 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
7396 LLT RegTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
7397 LLT LeftoverTy;
7398 extractParts(Reg: MI.getOperand(i: 1).getReg(), RegTy, MainTy: NarrowTy, LeftoverTy, VRegs&: SrcRegs,
7399 LeftoverVRegs&: LeftoverRegs, MIRBuilder, MRI);
7400
7401 SrcRegs.append(RHS: LeftoverRegs);
7402
7403 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7404 Register OpReg = MI.getOperand(i: 2).getReg();
7405 uint64_t OpStart = MI.getOperand(i: 3).getImm();
7406 uint64_t OpSize = MRI.getType(Reg: OpReg).getSizeInBits();
7407 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
7408 unsigned DstStart = I * NarrowSize;
7409
7410 if (DstStart == OpStart && NarrowTy == MRI.getType(Reg: OpReg)) {
7411 // The entire subregister is defined by this insert, forward the new
7412 // value.
7413 DstRegs.push_back(Elt: OpReg);
7414 continue;
7415 }
7416
7417 Register SrcReg = SrcRegs[I];
7418 if (MRI.getType(Reg: SrcRegs[I]) == LeftoverTy) {
7419 // The leftover reg is smaller than NarrowTy, so we need to extend it.
7420 SrcReg = MRI.createGenericVirtualRegister(Ty: NarrowTy);
7421 MIRBuilder.buildAnyExt(Res: SrcReg, Op: SrcRegs[I]);
7422 }
7423
7424 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7425 // No part of the insert affects this subregister, forward the original.
7426 DstRegs.push_back(Elt: SrcReg);
7427 continue;
7428 }
7429
7430 // OpSegStart is where this destination segment would start in OpReg if it
7431 // extended infinitely in both directions.
7432 int64_t ExtractOffset, InsertOffset;
7433 uint64_t SegSize;
7434 if (OpStart < DstStart) {
7435 InsertOffset = 0;
7436 ExtractOffset = DstStart - OpStart;
7437 SegSize = std::min(a: NarrowSize, b: OpStart + OpSize - DstStart);
7438 } else {
7439 InsertOffset = OpStart - DstStart;
7440 ExtractOffset = 0;
7441 SegSize =
7442 std::min(a: NarrowSize - InsertOffset, b: OpStart + OpSize - DstStart);
7443 }
7444
7445 Register SegReg = OpReg;
7446 if (ExtractOffset != 0 || SegSize != OpSize) {
7447 // A genuine extract is needed.
7448 SegReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: SegSize));
7449 MIRBuilder.buildExtract(Res: SegReg, Src: OpReg, Index: ExtractOffset);
7450 }
7451
7452 Register DstReg = MRI.createGenericVirtualRegister(Ty: NarrowTy);
7453 MIRBuilder.buildInsert(Res: DstReg, Src: SrcReg, Op: SegReg, Index: InsertOffset);
7454 DstRegs.push_back(Elt: DstReg);
7455 }
7456
7457 uint64_t WideSize = DstRegs.size() * NarrowSize;
7458 Register DstReg = MI.getOperand(i: 0).getReg();
7459 if (WideSize > RegTy.getSizeInBits()) {
7460 Register MergeReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: WideSize));
7461 MIRBuilder.buildMergeLikeInstr(Res: MergeReg, Ops: DstRegs);
7462 MIRBuilder.buildTrunc(Res: DstReg, Op: MergeReg);
7463 } else
7464 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: DstRegs);
7465
7466 MI.eraseFromParent();
7467 return Legalized;
7468}
7469
7470LegalizerHelper::LegalizeResult
7471LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
7472 LLT NarrowTy) {
7473 Register DstReg = MI.getOperand(i: 0).getReg();
7474 LLT DstTy = MRI.getType(Reg: DstReg);
7475
7476 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
7477
7478 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7479 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
7480 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7481 LLT LeftoverTy;
7482 if (!extractParts(Reg: MI.getOperand(i: 1).getReg(), RegTy: DstTy, MainTy: NarrowTy, LeftoverTy,
7483 VRegs&: Src0Regs, LeftoverVRegs&: Src0LeftoverRegs, MIRBuilder, MRI))
7484 return UnableToLegalize;
7485
7486 LLT Unused;
7487 if (!extractParts(Reg: MI.getOperand(i: 2).getReg(), RegTy: DstTy, MainTy: NarrowTy, LeftoverTy&: Unused,
7488 VRegs&: Src1Regs, LeftoverVRegs&: Src1LeftoverRegs, MIRBuilder, MRI))
7489 llvm_unreachable("inconsistent extractParts result");
7490
7491 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7492 auto Inst = MIRBuilder.buildInstr(Opc: MI.getOpcode(), DstOps: {NarrowTy},
7493 SrcOps: {Src0Regs[I], Src1Regs[I]});
7494 DstRegs.push_back(Elt: Inst.getReg(Idx: 0));
7495 }
7496
7497 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7498 auto Inst = MIRBuilder.buildInstr(
7499 Opc: MI.getOpcode(),
7500 DstOps: {LeftoverTy}, SrcOps: {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7501 DstLeftoverRegs.push_back(Elt: Inst.getReg(Idx: 0));
7502 }
7503
7504 insertParts(DstReg, ResultTy: DstTy, PartTy: NarrowTy, PartRegs: DstRegs,
7505 LeftoverTy, LeftoverRegs: DstLeftoverRegs);
7506
7507 MI.eraseFromParent();
7508 return Legalized;
7509}
7510
7511LegalizerHelper::LegalizeResult
7512LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
7513 LLT NarrowTy) {
7514 if (TypeIdx != 0)
7515 return UnableToLegalize;
7516
7517 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7518
7519 LLT DstTy = MRI.getType(Reg: DstReg);
7520 if (DstTy.isVector())
7521 return UnableToLegalize;
7522
7523 SmallVector<Register, 8> Parts;
7524 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7525 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, VRegs&: Parts, PadStrategy: MI.getOpcode());
7526 buildWidenedRemergeToDst(DstReg, LCMTy, RemergeRegs: Parts);
7527
7528 MI.eraseFromParent();
7529 return Legalized;
7530}
7531
7532LegalizerHelper::LegalizeResult
7533LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
7534 LLT NarrowTy) {
7535 if (TypeIdx != 0)
7536 return UnableToLegalize;
7537
7538 Register CondReg = MI.getOperand(i: 1).getReg();
7539 LLT CondTy = MRI.getType(Reg: CondReg);
7540 if (CondTy.isVector()) // TODO: Handle vselect
7541 return UnableToLegalize;
7542
7543 Register DstReg = MI.getOperand(i: 0).getReg();
7544 LLT DstTy = MRI.getType(Reg: DstReg);
7545
7546 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7547 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7548 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
7549 LLT LeftoverTy;
7550 if (!extractParts(Reg: MI.getOperand(i: 2).getReg(), RegTy: DstTy, MainTy: NarrowTy, LeftoverTy,
7551 VRegs&: Src1Regs, LeftoverVRegs&: Src1LeftoverRegs, MIRBuilder, MRI))
7552 return UnableToLegalize;
7553
7554 LLT Unused;
7555 if (!extractParts(Reg: MI.getOperand(i: 3).getReg(), RegTy: DstTy, MainTy: NarrowTy, LeftoverTy&: Unused,
7556 VRegs&: Src2Regs, LeftoverVRegs&: Src2LeftoverRegs, MIRBuilder, MRI))
7557 llvm_unreachable("inconsistent extractParts result");
7558
7559 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7560 auto Select = MIRBuilder.buildSelect(Res: NarrowTy,
7561 Tst: CondReg, Op0: Src1Regs[I], Op1: Src2Regs[I]);
7562 DstRegs.push_back(Elt: Select.getReg(Idx: 0));
7563 }
7564
7565 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7566 auto Select = MIRBuilder.buildSelect(
7567 Res: LeftoverTy, Tst: CondReg, Op0: Src1LeftoverRegs[I], Op1: Src2LeftoverRegs[I]);
7568 DstLeftoverRegs.push_back(Elt: Select.getReg(Idx: 0));
7569 }
7570
7571 insertParts(DstReg, ResultTy: DstTy, PartTy: NarrowTy, PartRegs: DstRegs,
7572 LeftoverTy, LeftoverRegs: DstLeftoverRegs);
7573
7574 MI.eraseFromParent();
7575 return Legalized;
7576}
7577
7578LegalizerHelper::LegalizeResult
7579LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
7580 LLT NarrowTy) {
7581 if (TypeIdx != 1)
7582 return UnableToLegalize;
7583
7584 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7585 unsigned NarrowSize = NarrowTy.getSizeInBits();
7586
7587 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7588 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_POISON;
7589
7590 MachineIRBuilder &B = MIRBuilder;
7591 auto UnmergeSrc = B.buildUnmerge(Res: NarrowTy, Op: SrcReg);
7592 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
7593 auto C_0 = B.buildConstant(Res: NarrowTy, Val: 0);
7594 auto HiIsZero = B.buildICmp(Pred: CmpInst::ICMP_EQ, Res: LLT::integer(SizeInBits: 1),
7595 Op0: UnmergeSrc.getReg(Idx: 1), Op1: C_0);
7596 auto LoCTLZ = IsUndef ? B.buildCTLZ_ZERO_POISON(Dst: DstTy, Src0: UnmergeSrc.getReg(Idx: 0))
7597 : B.buildCTLZ(Dst: DstTy, Src0: UnmergeSrc.getReg(Idx: 0));
7598 auto C_NarrowSize = B.buildConstant(Res: DstTy, Val: NarrowSize);
7599 auto HiIsZeroCTLZ = B.buildAdd(Dst: DstTy, Src0: LoCTLZ, Src1: C_NarrowSize);
7600 auto HiCTLZ = B.buildCTLZ_ZERO_POISON(Dst: DstTy, Src0: UnmergeSrc.getReg(Idx: 1));
7601 B.buildSelect(Res: DstReg, Tst: HiIsZero, Op0: HiIsZeroCTLZ, Op1: HiCTLZ);
7602
7603 MI.eraseFromParent();
7604 return Legalized;
7605 }
7606
7607 return UnableToLegalize;
7608}
7609
7610LegalizerHelper::LegalizeResult
7611LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
7612 LLT NarrowTy) {
7613 if (TypeIdx != 1)
7614 return UnableToLegalize;
7615
7616 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7617 unsigned NarrowSize = NarrowTy.getSizeInBits();
7618
7619 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7620 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_POISON;
7621
7622 MachineIRBuilder &B = MIRBuilder;
7623 auto UnmergeSrc = B.buildUnmerge(Res: NarrowTy, Op: SrcReg);
7624 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
7625 auto C_0 = B.buildConstant(Res: NarrowTy, Val: 0);
7626 auto LoIsZero = B.buildICmp(Pred: CmpInst::ICMP_EQ, Res: LLT::scalar(SizeInBits: 1),
7627 Op0: UnmergeSrc.getReg(Idx: 0), Op1: C_0);
7628 auto HiCTTZ = IsUndef ? B.buildCTTZ_ZERO_POISON(Dst: DstTy, Src0: UnmergeSrc.getReg(Idx: 1))
7629 : B.buildCTTZ(Dst: DstTy, Src0: UnmergeSrc.getReg(Idx: 1));
7630 auto C_NarrowSize = B.buildConstant(Res: DstTy, Val: NarrowSize);
7631 auto LoIsZeroCTTZ = B.buildAdd(Dst: DstTy, Src0: HiCTTZ, Src1: C_NarrowSize);
7632 auto LoCTTZ = B.buildCTTZ_ZERO_POISON(Dst: DstTy, Src0: UnmergeSrc.getReg(Idx: 0));
7633 B.buildSelect(Res: DstReg, Tst: LoIsZero, Op0: LoIsZeroCTTZ, Op1: LoCTTZ);
7634
7635 MI.eraseFromParent();
7636 return Legalized;
7637 }
7638
7639 return UnableToLegalize;
7640}
7641
7642LegalizerHelper::LegalizeResult
7643LegalizerHelper::narrowScalarCTLS(MachineInstr &MI, unsigned TypeIdx,
7644 LLT NarrowTy) {
7645 if (TypeIdx != 1)
7646 return UnableToLegalize;
7647
7648 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7649 unsigned NarrowSize = NarrowTy.getSizeInBits();
7650
7651 if (!SrcTy.isScalar() || SrcTy.getSizeInBits() != 2 * NarrowSize)
7652 return UnableToLegalize;
7653
7654 MachineIRBuilder &B = MIRBuilder;
7655
7656 auto UnmergeSrc = B.buildUnmerge(Res: NarrowTy, Op: SrcReg);
7657 Register Lo = UnmergeSrc.getReg(Idx: 0);
7658 Register Hi = UnmergeSrc.getReg(Idx: 1);
7659
7660 auto ShAmt = B.buildConstant(Res: NarrowTy, Val: NarrowSize - 1);
7661 auto Sign = B.buildAShr(Dst: NarrowTy, Src0: Hi, Src1: ShAmt);
7662
7663 auto HiIsSign = B.buildICmp(Pred: CmpInst::ICMP_EQ, Res: LLT::scalar(SizeInBits: 1), Op0: Hi, Op1: Sign);
7664
7665 // Invert Lo if Hi is negative. Then count the leading zeros. If there are no
7666 // leading zeros, then the MSB of Lo is different than the MSB of Hi.
7667 // Otherwise the leading zeros represent additional sign bits of the original
7668 // value.
7669 auto LoInv = B.buildXor(Dst: DstTy, Src0: Lo, Src1: Sign);
7670 auto LoCTLZ = B.buildCTLZ(Dst: DstTy, Src0: LoInv);
7671
7672 // Add NarrowSize-1 to LoCTLZ. This is the full CTLS if Hi is all sign bits.
7673 auto C_NarrowSizeM1 = B.buildConstant(Res: DstTy, Val: NarrowSize - 1);
7674 auto HiIsSignCTLS = B.buildAdd(Dst: DstTy, Src0: LoCTLZ, Src1: C_NarrowSizeM1);
7675
7676 auto HiCTLS = B.buildCTLS(Dst: DstTy, Src0: Hi);
7677
7678 B.buildSelect(Res: DstReg, Tst: HiIsSign, Op0: HiIsSignCTLS, Op1: HiCTLS);
7679
7680 MI.eraseFromParent();
7681 return Legalized;
7682}
7683
7684LegalizerHelper::LegalizeResult
7685LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
7686 LLT NarrowTy) {
7687 if (TypeIdx != 1)
7688 return UnableToLegalize;
7689
7690 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7691 unsigned NarrowSize = NarrowTy.getSizeInBits();
7692
7693 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7694 auto UnmergeSrc = MIRBuilder.buildUnmerge(Res: NarrowTy, Op: MI.getOperand(i: 1));
7695
7696 auto LoCTPOP = MIRBuilder.buildCTPOP(Dst: DstTy, Src0: UnmergeSrc.getReg(Idx: 0));
7697 auto HiCTPOP = MIRBuilder.buildCTPOP(Dst: DstTy, Src0: UnmergeSrc.getReg(Idx: 1));
7698 MIRBuilder.buildAdd(Dst: DstReg, Src0: HiCTPOP, Src1: LoCTPOP);
7699
7700 MI.eraseFromParent();
7701 return Legalized;
7702 }
7703
7704 return UnableToLegalize;
7705}
7706
7707LegalizerHelper::LegalizeResult
7708LegalizerHelper::narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx,
7709 LLT NarrowTy) {
7710 if (TypeIdx != 1)
7711 return UnableToLegalize;
7712
7713 MachineIRBuilder &B = MIRBuilder;
7714 Register ExpReg = MI.getOperand(i: 2).getReg();
7715 LLT ExpTy = MRI.getType(Reg: ExpReg);
7716
7717 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
7718
7719 // Clamp the exponent to the range of the target type.
7720 auto MinExp = B.buildConstant(Res: ExpTy, Val: minIntN(N: ClampSize));
7721 auto ClampMin = B.buildSMax(Dst: ExpTy, Src0: ExpReg, Src1: MinExp);
7722 auto MaxExp = B.buildConstant(Res: ExpTy, Val: maxIntN(N: ClampSize));
7723 auto Clamp = B.buildSMin(Dst: ExpTy, Src0: ClampMin, Src1: MaxExp);
7724
7725 auto Trunc = B.buildTrunc(Res: NarrowTy, Op: Clamp);
7726 Observer.changingInstr(MI);
7727 MI.getOperand(i: 2).setReg(Trunc.getReg(Idx: 0));
7728 Observer.changedInstr(MI);
7729 return Legalized;
7730}
7731
7732LegalizerHelper::LegalizeResult
7733LegalizerHelper::lowerBitCount(MachineInstr &MI) {
7734 unsigned Opc = MI.getOpcode();
7735 const auto &TII = MIRBuilder.getTII();
7736 auto isSupported = [this](const LegalityQuery &Q) {
7737 auto QAction = LI.getAction(Query: Q).Action;
7738 return QAction == Legal || QAction == Libcall || QAction == Custom;
7739 };
7740 switch (Opc) {
7741 default:
7742 return UnableToLegalize;
7743 case TargetOpcode::G_CTLZ_ZERO_POISON: {
7744 // This trivially expands to CTLZ.
7745 Observer.changingInstr(MI);
7746 MI.setDesc(TII.get(Opcode: TargetOpcode::G_CTLZ));
7747 Observer.changedInstr(MI);
7748 return Legalized;
7749 }
7750 case TargetOpcode::G_CTLZ: {
7751 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7752 unsigned Len = SrcTy.getScalarSizeInBits();
7753
7754 if (isSupported({TargetOpcode::G_CTLZ_ZERO_POISON, {DstTy, SrcTy}})) {
7755 // If CTLZ_ZERO_POISON is supported, emit that and a select for zero.
7756 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_POISON(Dst: DstTy, Src0: SrcReg);
7757 auto ZeroSrc = MIRBuilder.buildConstant(Res: SrcTy, Val: 0);
7758 auto ICmp = MIRBuilder.buildICmp(
7759 Pred: CmpInst::ICMP_EQ, Res: SrcTy.changeElementSize(NewEltSize: 1), Op0: SrcReg, Op1: ZeroSrc);
7760 auto LenConst = MIRBuilder.buildConstant(Res: DstTy, Val: Len);
7761 MIRBuilder.buildSelect(Res: DstReg, Tst: ICmp, Op0: LenConst, Op1: CtlzZU);
7762 MI.eraseFromParent();
7763 return Legalized;
7764 }
7765 // for now, we do this:
7766 // NewLen = NextPowerOf2(Len);
7767 // x = x | (x >> 1);
7768 // x = x | (x >> 2);
7769 // ...
7770 // x = x | (x >>16);
7771 // x = x | (x >>32); // for 64-bit input
7772 // Upto NewLen/2
7773 // return Len - popcount(x);
7774 //
7775 // Ref: "Hacker's Delight" by Henry Warren
7776 Register Op = SrcReg;
7777 unsigned NewLen = PowerOf2Ceil(A: Len);
7778 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7779 auto MIBShiftAmt = MIRBuilder.buildConstant(Res: SrcTy, Val: 1ULL << i);
7780 auto MIBOp = MIRBuilder.buildOr(
7781 Dst: SrcTy, Src0: Op, Src1: MIRBuilder.buildLShr(Dst: SrcTy, Src0: Op, Src1: MIBShiftAmt));
7782 Op = MIBOp.getReg(Idx: 0);
7783 }
7784 auto MIBPop = MIRBuilder.buildCTPOP(Dst: DstTy, Src0: Op);
7785 MIRBuilder.buildSub(Dst: MI.getOperand(i: 0), Src0: MIRBuilder.buildConstant(Res: DstTy, Val: Len),
7786 Src1: MIBPop);
7787 MI.eraseFromParent();
7788 return Legalized;
7789 }
7790 case TargetOpcode::G_CTTZ_ZERO_POISON: {
7791 // This trivially expands to CTTZ.
7792 Observer.changingInstr(MI);
7793 MI.setDesc(TII.get(Opcode: TargetOpcode::G_CTTZ));
7794 Observer.changedInstr(MI);
7795 return Legalized;
7796 }
7797 case TargetOpcode::G_CTTZ: {
7798 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7799
7800 unsigned Len = SrcTy.getScalarSizeInBits();
7801 if (isSupported({TargetOpcode::G_CTTZ_ZERO_POISON, {DstTy, SrcTy}})) {
7802 // If CTTZ_ZERO_POISON is legal or custom, emit that and a select with
7803 // zero.
7804 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_POISON(Dst: DstTy, Src0: SrcReg);
7805 auto Zero = MIRBuilder.buildConstant(Res: SrcTy, Val: 0);
7806 auto ICmp = MIRBuilder.buildICmp(
7807 Pred: CmpInst::ICMP_EQ, Res: DstTy.changeElementSize(NewEltSize: 1), Op0: SrcReg, Op1: Zero);
7808 auto LenConst = MIRBuilder.buildConstant(Res: DstTy, Val: Len);
7809 MIRBuilder.buildSelect(Res: DstReg, Tst: ICmp, Op0: LenConst, Op1: CttzZU);
7810 MI.eraseFromParent();
7811 return Legalized;
7812 }
7813 // for now, we use: { return popcount(~x & (x - 1)); }
7814 // unless the target has ctlz but not ctpop, in which case we use:
7815 // { return 32 - nlz(~x & (x-1)); }
7816 // Ref: "Hacker's Delight" by Henry Warren
7817 auto MIBCstNeg1 = MIRBuilder.buildConstant(Res: SrcTy, Val: -1);
7818 auto MIBNot = MIRBuilder.buildXor(Dst: SrcTy, Src0: SrcReg, Src1: MIBCstNeg1);
7819 auto MIBTmp = MIRBuilder.buildAnd(
7820 Dst: SrcTy, Src0: MIBNot, Src1: MIRBuilder.buildAdd(Dst: SrcTy, Src0: SrcReg, Src1: MIBCstNeg1));
7821 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7822 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7823 auto MIBCstLen = MIRBuilder.buildConstant(Res: SrcTy, Val: Len);
7824 MIRBuilder.buildSub(Dst: MI.getOperand(i: 0), Src0: MIBCstLen,
7825 Src1: MIRBuilder.buildCTLZ(Dst: SrcTy, Src0: MIBTmp));
7826 MI.eraseFromParent();
7827 return Legalized;
7828 }
7829 Observer.changingInstr(MI);
7830 MI.setDesc(TII.get(Opcode: TargetOpcode::G_CTPOP));
7831 MI.getOperand(i: 1).setReg(MIBTmp.getReg(Idx: 0));
7832 Observer.changedInstr(MI);
7833 return Legalized;
7834 }
7835 case TargetOpcode::G_CTPOP: {
7836 Register SrcReg = MI.getOperand(i: 1).getReg();
7837 LLT Ty = MRI.getType(Reg: SrcReg);
7838 unsigned Size = Ty.getScalarSizeInBits();
7839 MachineIRBuilder &B = MIRBuilder;
7840
7841 // Bail out on irregular type lengths.
7842 if (Size > 128 || Size % 8 != 0)
7843 return UnableToLegalize;
7844
7845 // Count set bits in blocks of 2 bits. Default approach would be
7846 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7847 // We use following formula instead:
7848 // B2Count = val - { (val >> 1) & 0x55555555 }
7849 // since it gives same result in blocks of 2 with one instruction less.
7850 auto C_1 = B.buildConstant(Res: Ty, Val: 1);
7851 auto B2Set1LoTo1Hi = B.buildLShr(Dst: Ty, Src0: SrcReg, Src1: C_1);
7852 APInt B2Mask1HiTo0 = APInt::getSplat(NewLen: Size, V: APInt(8, 0x55));
7853 auto C_B2Mask1HiTo0 = B.buildConstant(Res: Ty, Val: B2Mask1HiTo0);
7854 auto B2Count1Hi = B.buildAnd(Dst: Ty, Src0: B2Set1LoTo1Hi, Src1: C_B2Mask1HiTo0);
7855 auto B2Count = B.buildSub(Dst: Ty, Src0: SrcReg, Src1: B2Count1Hi);
7856
7857 // In order to get count in blocks of 4 add values from adjacent block of 2.
7858 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7859 auto C_2 = B.buildConstant(Res: Ty, Val: 2);
7860 auto B4Set2LoTo2Hi = B.buildLShr(Dst: Ty, Src0: B2Count, Src1: C_2);
7861 APInt B4Mask2HiTo0 = APInt::getSplat(NewLen: Size, V: APInt(8, 0x33));
7862 auto C_B4Mask2HiTo0 = B.buildConstant(Res: Ty, Val: B4Mask2HiTo0);
7863 auto B4HiB2Count = B.buildAnd(Dst: Ty, Src0: B4Set2LoTo2Hi, Src1: C_B4Mask2HiTo0);
7864 auto B4LoB2Count = B.buildAnd(Dst: Ty, Src0: B2Count, Src1: C_B4Mask2HiTo0);
7865 auto B4Count = B.buildAdd(Dst: Ty, Src0: B4HiB2Count, Src1: B4LoB2Count);
7866
7867 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7868 // addition since count value sits in range {0,...,8} and 4 bits are enough
7869 // to hold such binary values. After addition high 4 bits still hold count
7870 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7871 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7872 auto C_4 = B.buildConstant(Res: Ty, Val: 4);
7873 auto B8HiB4Count = B.buildLShr(Dst: Ty, Src0: B4Count, Src1: C_4);
7874 auto B8CountDirty4Hi = B.buildAdd(Dst: Ty, Src0: B8HiB4Count, Src1: B4Count);
7875 APInt B8Mask4HiTo0 = APInt::getSplat(NewLen: Size, V: APInt(8, 0x0F));
7876 auto C_B8Mask4HiTo0 = B.buildConstant(Res: Ty, Val: B8Mask4HiTo0);
7877 auto B8Count = B.buildAnd(Dst: Ty, Src0: B8CountDirty4Hi, Src1: C_B8Mask4HiTo0);
7878
7879 assert(Size <= 128 && "Scalar size is too large for CTPOP lower algorithm");
7880
7881 // Avoid the multiply when shift-add is cheaper.
7882 if (Size == 16 && !Ty.isVector()) {
7883 // v = (v + (v >> 8)) & 0xFF;
7884 auto C_8 = B.buildConstant(Res: Ty, Val: 8);
7885 auto HighSum = B.buildLShr(Dst: Ty, Src0: B8Count, Src1: C_8);
7886 auto Res = B.buildAdd(Dst: Ty, Src0: B8Count, Src1: HighSum);
7887 B.buildAnd(Dst: MI.getOperand(i: 0).getReg(), Src0: Res, Src1: B.buildConstant(Res: Ty, Val: 0xFF));
7888 MI.eraseFromParent();
7889 return Legalized;
7890 }
7891
7892 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7893 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7894 auto MulMask = B.buildConstant(Res: Ty, Val: APInt::getSplat(NewLen: Size, V: APInt(8, 0x01)));
7895
7896 // Shift count result from 8 high bits to low bits.
7897 auto C_SizeM8 = B.buildConstant(Res: Ty, Val: Size - 8);
7898
7899 auto IsMulSupported = [this](const LLT Ty) {
7900 auto Action = LI.getAction(Query: {TargetOpcode::G_MUL, {Ty}}).Action;
7901 return Action == Legal || Action == WidenScalar || Action == Custom;
7902 };
7903 if (IsMulSupported(Ty)) {
7904 auto ResTmp = B.buildMul(Dst: Ty, Src0: B8Count, Src1: MulMask);
7905 B.buildLShr(Dst: MI.getOperand(i: 0).getReg(), Src0: ResTmp, Src1: C_SizeM8);
7906 } else {
7907 auto ResTmp = B8Count;
7908 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7909 auto ShiftC = B.buildConstant(Res: Ty, Val: Shift);
7910 auto Shl = B.buildShl(Dst: Ty, Src0: ResTmp, Src1: ShiftC);
7911 ResTmp = B.buildAdd(Dst: Ty, Src0: ResTmp, Src1: Shl);
7912 }
7913 B.buildLShr(Dst: MI.getOperand(i: 0).getReg(), Src0: ResTmp, Src1: C_SizeM8);
7914 }
7915 MI.eraseFromParent();
7916 return Legalized;
7917 }
7918 case TargetOpcode::G_CTLS: {
7919 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7920
7921 // ctls(x) -> ctlz(x ^ (x >> (N - 1))) - 1
7922 auto SignIdxC =
7923 MIRBuilder.buildConstant(Res: SrcTy, Val: SrcTy.getScalarSizeInBits() - 1);
7924 auto OneC = MIRBuilder.buildConstant(Res: DstTy, Val: 1);
7925
7926 auto Shr = MIRBuilder.buildAShr(Dst: SrcTy, Src0: SrcReg, Src1: SignIdxC);
7927
7928 auto Xor = MIRBuilder.buildXor(Dst: SrcTy, Src0: SrcReg, Src1: Shr);
7929 auto Ctlz = MIRBuilder.buildCTLZ(Dst: DstTy, Src0: Xor);
7930
7931 MIRBuilder.buildSub(Dst: DstReg, Src0: Ctlz, Src1: OneC);
7932 MI.eraseFromParent();
7933 return Legalized;
7934 }
7935 }
7936}
7937
7938// Check that (every element of) Reg is undef or not an exact multiple of BW.
7939static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
7940 Register Reg, unsigned BW) {
7941 return matchUnaryPredicate(
7942 MRI, Reg,
7943 Match: [=](const Constant *C) {
7944 // Null constant here means an undef.
7945 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(Val: C);
7946 return !CI || CI->getValue().urem(RHS: BW) != 0;
7947 },
7948 /*AllowUndefs*/ true);
7949}
7950
7951LegalizerHelper::LegalizeResult
7952LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
7953 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7954 LLT Ty = MRI.getType(Reg: Dst);
7955 LLT ShTy = MRI.getType(Reg: Z);
7956
7957 unsigned BW = Ty.getScalarSizeInBits();
7958
7959 if (!isPowerOf2_32(Value: BW))
7960 return UnableToLegalize;
7961
7962 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7963 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7964
7965 if (isNonZeroModBitWidthOrUndef(MRI, Reg: Z, BW)) {
7966 // fshl X, Y, Z -> fshr X, Y, -Z
7967 // fshr X, Y, Z -> fshl X, Y, -Z
7968 auto Zero = MIRBuilder.buildConstant(Res: ShTy, Val: 0);
7969 Z = MIRBuilder.buildSub(Dst: Ty, Src0: Zero, Src1: Z).getReg(Idx: 0);
7970 } else {
7971 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7972 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7973 auto One = MIRBuilder.buildConstant(Res: ShTy, Val: 1);
7974 if (IsFSHL) {
7975 Y = MIRBuilder.buildInstr(Opc: RevOpcode, DstOps: {Ty}, SrcOps: {X, Y, One}).getReg(Idx: 0);
7976 X = MIRBuilder.buildLShr(Dst: Ty, Src0: X, Src1: One).getReg(Idx: 0);
7977 } else {
7978 X = MIRBuilder.buildInstr(Opc: RevOpcode, DstOps: {Ty}, SrcOps: {X, Y, One}).getReg(Idx: 0);
7979 Y = MIRBuilder.buildShl(Dst: Ty, Src0: Y, Src1: One).getReg(Idx: 0);
7980 }
7981
7982 Z = MIRBuilder.buildNot(Dst: ShTy, Src0: Z).getReg(Idx: 0);
7983 }
7984
7985 MIRBuilder.buildInstr(Opc: RevOpcode, DstOps: {Dst}, SrcOps: {X, Y, Z});
7986 MI.eraseFromParent();
7987 return Legalized;
7988}
7989
7990LegalizerHelper::LegalizeResult
7991LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
7992 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7993 LLT Ty = MRI.getType(Reg: Dst);
7994 LLT ShTy = MRI.getType(Reg: Z);
7995
7996 const unsigned BW = Ty.getScalarSizeInBits();
7997 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7998
7999 Register ShX, ShY;
8000 Register ShAmt, InvShAmt;
8001
8002 // FIXME: Emit optimized urem by constant instead of letting it expand later.
8003 if (isNonZeroModBitWidthOrUndef(MRI, Reg: Z, BW)) {
8004 // fshl: X << C | Y >> (BW - C)
8005 // fshr: X << (BW - C) | Y >> C
8006 // where C = Z % BW is not zero
8007 auto BitWidthC = MIRBuilder.buildConstant(Res: ShTy, Val: BW);
8008 ShAmt = MIRBuilder.buildURem(Dst: ShTy, Src0: Z, Src1: BitWidthC).getReg(Idx: 0);
8009 InvShAmt = MIRBuilder.buildSub(Dst: ShTy, Src0: BitWidthC, Src1: ShAmt).getReg(Idx: 0);
8010 ShX = MIRBuilder.buildShl(Dst: Ty, Src0: X, Src1: IsFSHL ? ShAmt : InvShAmt).getReg(Idx: 0);
8011 ShY = MIRBuilder.buildLShr(Dst: Ty, Src0: Y, Src1: IsFSHL ? InvShAmt : ShAmt).getReg(Idx: 0);
8012 } else {
8013 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8014 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8015 auto Mask = MIRBuilder.buildConstant(Res: ShTy, Val: BW - 1);
8016 if (isPowerOf2_32(Value: BW)) {
8017 // Z % BW -> Z & (BW - 1)
8018 ShAmt = MIRBuilder.buildAnd(Dst: ShTy, Src0: Z, Src1: Mask).getReg(Idx: 0);
8019 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8020 auto NotZ = MIRBuilder.buildNot(Dst: ShTy, Src0: Z);
8021 InvShAmt = MIRBuilder.buildAnd(Dst: ShTy, Src0: NotZ, Src1: Mask).getReg(Idx: 0);
8022 } else {
8023 auto BitWidthC = MIRBuilder.buildConstant(Res: ShTy, Val: BW);
8024 ShAmt = MIRBuilder.buildURem(Dst: ShTy, Src0: Z, Src1: BitWidthC).getReg(Idx: 0);
8025 InvShAmt = MIRBuilder.buildSub(Dst: ShTy, Src0: Mask, Src1: ShAmt).getReg(Idx: 0);
8026 }
8027
8028 auto One = MIRBuilder.buildConstant(Res: ShTy, Val: 1);
8029 if (IsFSHL) {
8030 ShX = MIRBuilder.buildShl(Dst: Ty, Src0: X, Src1: ShAmt).getReg(Idx: 0);
8031 auto ShY1 = MIRBuilder.buildLShr(Dst: Ty, Src0: Y, Src1: One);
8032 ShY = MIRBuilder.buildLShr(Dst: Ty, Src0: ShY1, Src1: InvShAmt).getReg(Idx: 0);
8033 } else {
8034 auto ShX1 = MIRBuilder.buildShl(Dst: Ty, Src0: X, Src1: One);
8035 ShX = MIRBuilder.buildShl(Dst: Ty, Src0: ShX1, Src1: InvShAmt).getReg(Idx: 0);
8036 ShY = MIRBuilder.buildLShr(Dst: Ty, Src0: Y, Src1: ShAmt).getReg(Idx: 0);
8037 }
8038 }
8039
8040 MIRBuilder.buildOr(Dst, Src0: ShX, Src1: ShY, Flags: MachineInstr::Disjoint);
8041 MI.eraseFromParent();
8042 return Legalized;
8043}
8044
8045LegalizerHelper::LegalizeResult
8046LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
8047 // These operations approximately do the following (while avoiding undefined
8048 // shifts by BW):
8049 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
8050 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
8051 Register Dst = MI.getOperand(i: 0).getReg();
8052 LLT Ty = MRI.getType(Reg: Dst);
8053 LLT ShTy = MRI.getType(Reg: MI.getOperand(i: 3).getReg());
8054
8055 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
8056 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
8057
8058 // TODO: Use smarter heuristic that accounts for vector legalization.
8059 if (LI.getAction(Query: {RevOpcode, {Ty, ShTy}}).Action == Lower)
8060 return lowerFunnelShiftAsShifts(MI);
8061
8062 // This only works for powers of 2, fallback to shifts if it fails.
8063 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
8064 if (Result == UnableToLegalize)
8065 return lowerFunnelShiftAsShifts(MI);
8066 return Result;
8067}
8068
8069LegalizerHelper::LegalizeResult LegalizerHelper::lowerEXT(MachineInstr &MI) {
8070 auto [Dst, Src] = MI.getFirst2Regs();
8071 LLT DstTy = MRI.getType(Reg: Dst);
8072 LLT SrcTy = MRI.getType(Reg: Src);
8073
8074 uint32_t DstTySize = DstTy.getSizeInBits();
8075 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
8076 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
8077
8078 if (!isPowerOf2_32(Value: DstTySize) || !isPowerOf2_32(Value: DstTyScalarSize) ||
8079 !isPowerOf2_32(Value: SrcTyScalarSize))
8080 return UnableToLegalize;
8081
8082 // The step between extend is too large, split it by creating an intermediate
8083 // extend instruction
8084 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
8085 LLT MidTy = SrcTy.changeElementSize(NewEltSize: SrcTyScalarSize * 2);
8086 // If the destination type is illegal, split it into multiple statements
8087 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
8088 auto NewExt = MIRBuilder.buildInstr(Opc: MI.getOpcode(), DstOps: {MidTy}, SrcOps: {Src});
8089 // Unmerge the vector
8090 LLT EltTy = MidTy.changeElementCount(
8091 EC: MidTy.getElementCount().divideCoefficientBy(RHS: 2));
8092 auto UnmergeSrc = MIRBuilder.buildUnmerge(Res: EltTy, Op: NewExt);
8093
8094 // ZExt the vectors
8095 LLT ZExtResTy = DstTy.changeElementCount(
8096 EC: DstTy.getElementCount().divideCoefficientBy(RHS: 2));
8097 auto ZExtRes1 = MIRBuilder.buildInstr(Opc: MI.getOpcode(), DstOps: {ZExtResTy},
8098 SrcOps: {UnmergeSrc.getReg(Idx: 0)});
8099 auto ZExtRes2 = MIRBuilder.buildInstr(Opc: MI.getOpcode(), DstOps: {ZExtResTy},
8100 SrcOps: {UnmergeSrc.getReg(Idx: 1)});
8101
8102 // Merge the ending vectors
8103 MIRBuilder.buildMergeLikeInstr(Res: Dst, Ops: {ZExtRes1, ZExtRes2});
8104
8105 MI.eraseFromParent();
8106 return Legalized;
8107 }
8108 return UnableToLegalize;
8109}
8110
8111LegalizerHelper::LegalizeResult LegalizerHelper::lowerTRUNC(MachineInstr &MI) {
8112 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
8113 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
8114 // Similar to how operand splitting is done in SelectiondDAG, we can handle
8115 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
8116 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
8117 // %lo16(<4 x s16>) = G_TRUNC %inlo
8118 // %hi16(<4 x s16>) = G_TRUNC %inhi
8119 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
8120 // %res(<8 x s8>) = G_TRUNC %in16
8121
8122 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
8123
8124 Register DstReg = MI.getOperand(i: 0).getReg();
8125 Register SrcReg = MI.getOperand(i: 1).getReg();
8126 LLT DstTy = MRI.getType(Reg: DstReg);
8127 LLT SrcTy = MRI.getType(Reg: SrcReg);
8128
8129 if (DstTy.isVector() && isPowerOf2_32(Value: DstTy.getNumElements()) &&
8130 isPowerOf2_32(Value: DstTy.getScalarSizeInBits()) &&
8131 isPowerOf2_32(Value: SrcTy.getNumElements()) &&
8132 isPowerOf2_32(Value: SrcTy.getScalarSizeInBits())) {
8133 // Split input type.
8134 LLT SplitSrcTy = SrcTy.changeElementCount(
8135 EC: SrcTy.getElementCount().divideCoefficientBy(RHS: 2));
8136
8137 // First, split the source into two smaller vectors.
8138 SmallVector<Register, 2> SplitSrcs;
8139 extractParts(Reg: SrcReg, Ty: SplitSrcTy, NumParts: 2, VRegs&: SplitSrcs, MIRBuilder, MRI);
8140
8141 // Truncate the splits into intermediate narrower elements.
8142 LLT InterTy;
8143 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
8144 InterTy = SplitSrcTy.changeElementSize(NewEltSize: DstTy.getScalarSizeInBits() * 2);
8145 else
8146 InterTy = SplitSrcTy.changeElementSize(NewEltSize: DstTy.getScalarSizeInBits());
8147 for (Register &Src : SplitSrcs)
8148 Src = MIRBuilder.buildTrunc(Res: InterTy, Op: Src).getReg(Idx: 0);
8149
8150 // Combine the new truncates into one vector
8151 auto Merge = MIRBuilder.buildMergeLikeInstr(
8152 Res: DstTy.changeElementSize(NewEltSize: InterTy.getScalarSizeInBits()), Ops: SplitSrcs);
8153
8154 // Truncate the new vector to the final result type
8155 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
8156 MIRBuilder.buildTrunc(Res: MI.getOperand(i: 0).getReg(), Op: Merge.getReg(Idx: 0));
8157 else
8158 MIRBuilder.buildCopy(Res: MI.getOperand(i: 0).getReg(), Op: Merge.getReg(Idx: 0));
8159
8160 MI.eraseFromParent();
8161
8162 return Legalized;
8163 }
8164 return UnableToLegalize;
8165}
8166
8167LegalizerHelper::LegalizeResult
8168LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
8169 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
8170 auto Zero = MIRBuilder.buildConstant(Res: AmtTy, Val: 0);
8171 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
8172 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8173 auto Neg = MIRBuilder.buildSub(Dst: AmtTy, Src0: Zero, Src1: Amt);
8174 MIRBuilder.buildInstr(Opc: RevRot, DstOps: {Dst}, SrcOps: {Src, Neg});
8175 MI.eraseFromParent();
8176 return Legalized;
8177}
8178
8179LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
8180 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
8181
8182 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
8183 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
8184
8185 MIRBuilder.setInstrAndDebugLoc(MI);
8186
8187 // If a rotate in the other direction is supported, use it.
8188 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8189 if (LI.isLegalOrCustom(Query: {RevRot, {DstTy, SrcTy}}) &&
8190 isPowerOf2_32(Value: EltSizeInBits))
8191 return lowerRotateWithReverseRotate(MI);
8192
8193 // If a funnel shift is supported, use it.
8194 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8195 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8196 bool IsFShLegal = false;
8197 if ((IsFShLegal = LI.isLegalOrCustom(Query: {FShOpc, {DstTy, AmtTy}})) ||
8198 LI.isLegalOrCustom(Query: {RevFsh, {DstTy, AmtTy}})) {
8199 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
8200 Register R3) {
8201 MIRBuilder.buildInstr(Opc, DstOps: {R1}, SrcOps: {R2, R2, R3});
8202 MI.eraseFromParent();
8203 return Legalized;
8204 };
8205 // If a funnel shift in the other direction is supported, use it.
8206 if (IsFShLegal) {
8207 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8208 } else if (isPowerOf2_32(Value: EltSizeInBits)) {
8209 Amt = MIRBuilder.buildNeg(Dst: DstTy, Src0: Amt).getReg(Idx: 0);
8210 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8211 }
8212 }
8213
8214 auto Zero = MIRBuilder.buildConstant(Res: AmtTy, Val: 0);
8215 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8216 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8217 auto BitWidthMinusOneC = MIRBuilder.buildConstant(Res: AmtTy, Val: EltSizeInBits - 1);
8218 Register ShVal;
8219 Register RevShiftVal;
8220 if (isPowerOf2_32(Value: EltSizeInBits)) {
8221 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8222 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8223 auto NegAmt = MIRBuilder.buildSub(Dst: AmtTy, Src0: Zero, Src1: Amt);
8224 auto ShAmt = MIRBuilder.buildAnd(Dst: AmtTy, Src0: Amt, Src1: BitWidthMinusOneC);
8225 ShVal = MIRBuilder.buildInstr(Opc: ShOpc, DstOps: {DstTy}, SrcOps: {Src, ShAmt}).getReg(Idx: 0);
8226 auto RevAmt = MIRBuilder.buildAnd(Dst: AmtTy, Src0: NegAmt, Src1: BitWidthMinusOneC);
8227 RevShiftVal =
8228 MIRBuilder.buildInstr(Opc: RevShiftOpc, DstOps: {DstTy}, SrcOps: {Src, RevAmt}).getReg(Idx: 0);
8229 } else {
8230 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8231 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8232 auto BitWidthC = MIRBuilder.buildConstant(Res: AmtTy, Val: EltSizeInBits);
8233 auto ShAmt = MIRBuilder.buildURem(Dst: AmtTy, Src0: Amt, Src1: BitWidthC);
8234 ShVal = MIRBuilder.buildInstr(Opc: ShOpc, DstOps: {DstTy}, SrcOps: {Src, ShAmt}).getReg(Idx: 0);
8235 auto RevAmt = MIRBuilder.buildSub(Dst: AmtTy, Src0: BitWidthMinusOneC, Src1: ShAmt);
8236 auto One = MIRBuilder.buildConstant(Res: AmtTy, Val: 1);
8237 auto Inner = MIRBuilder.buildInstr(Opc: RevShiftOpc, DstOps: {DstTy}, SrcOps: {Src, One});
8238 RevShiftVal =
8239 MIRBuilder.buildInstr(Opc: RevShiftOpc, DstOps: {DstTy}, SrcOps: {Inner, RevAmt}).getReg(Idx: 0);
8240 }
8241 MIRBuilder.buildOr(Dst, Src0: ShVal, Src1: RevShiftVal, Flags: MachineInstr::Disjoint);
8242 MI.eraseFromParent();
8243 return Legalized;
8244}
8245
8246// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
8247// representation.
8248LegalizerHelper::LegalizeResult
8249LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
8250 auto [Dst, Src] = MI.getFirst2Regs();
8251 const LLT S64 = LLT::scalar(SizeInBits: 64);
8252 const LLT S32 = LLT::scalar(SizeInBits: 32);
8253 const LLT S1 = LLT::scalar(SizeInBits: 1);
8254
8255 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8256
8257 // unsigned cul2f(ulong u) {
8258 // uint lz = clz(u);
8259 // uint e = (u != 0) ? 127U + 63U - lz : 0;
8260 // u = (u << lz) & 0x7fffffffffffffffUL;
8261 // ulong t = u & 0xffffffffffUL;
8262 // uint v = (e << 23) | (uint)(u >> 40);
8263 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
8264 // return as_float(v + r);
8265 // }
8266
8267 auto Zero32 = MIRBuilder.buildConstant(Res: S32, Val: 0);
8268 auto Zero64 = MIRBuilder.buildConstant(Res: S64, Val: 0);
8269
8270 auto LZ = MIRBuilder.buildCTLZ_ZERO_POISON(Dst: S32, Src0: Src);
8271
8272 auto K = MIRBuilder.buildConstant(Res: S32, Val: 127U + 63U);
8273 auto Sub = MIRBuilder.buildSub(Dst: S32, Src0: K, Src1: LZ);
8274
8275 auto NotZero = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_NE, Res: S1, Op0: Src, Op1: Zero64);
8276 auto E = MIRBuilder.buildSelect(Res: S32, Tst: NotZero, Op0: Sub, Op1: Zero32);
8277
8278 auto Mask0 = MIRBuilder.buildConstant(Res: S64, Val: (-1ULL) >> 1);
8279 auto ShlLZ = MIRBuilder.buildShl(Dst: S64, Src0: Src, Src1: LZ);
8280
8281 auto U = MIRBuilder.buildAnd(Dst: S64, Src0: ShlLZ, Src1: Mask0);
8282
8283 auto Mask1 = MIRBuilder.buildConstant(Res: S64, Val: 0xffffffffffULL);
8284 auto T = MIRBuilder.buildAnd(Dst: S64, Src0: U, Src1: Mask1);
8285
8286 auto UShl = MIRBuilder.buildLShr(Dst: S64, Src0: U, Src1: MIRBuilder.buildConstant(Res: S64, Val: 40));
8287 auto ShlE = MIRBuilder.buildShl(Dst: S32, Src0: E, Src1: MIRBuilder.buildConstant(Res: S32, Val: 23));
8288 auto V = MIRBuilder.buildOr(Dst: S32, Src0: ShlE, Src1: MIRBuilder.buildTrunc(Res: S32, Op: UShl));
8289
8290 auto C = MIRBuilder.buildConstant(Res: S64, Val: 0x8000000000ULL);
8291 auto RCmp = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_UGT, Res: S1, Op0: T, Op1: C);
8292 auto TCmp = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_EQ, Res: S1, Op0: T, Op1: C);
8293 auto One = MIRBuilder.buildConstant(Res: S32, Val: 1);
8294
8295 auto VTrunc1 = MIRBuilder.buildAnd(Dst: S32, Src0: V, Src1: One);
8296 auto Select0 = MIRBuilder.buildSelect(Res: S32, Tst: TCmp, Op0: VTrunc1, Op1: Zero32);
8297 auto R = MIRBuilder.buildSelect(Res: S32, Tst: RCmp, Op0: One, Op1: Select0);
8298 MIRBuilder.buildAdd(Dst, Src0: V, Src1: R);
8299
8300 MI.eraseFromParent();
8301 return Legalized;
8302}
8303
8304// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
8305// operations and G_SITOFP
8306LegalizerHelper::LegalizeResult
8307LegalizerHelper::lowerU64ToF32WithSITOFP(MachineInstr &MI) {
8308 auto [Dst, Src] = MI.getFirst2Regs();
8309 const LLT S64 = LLT::scalar(SizeInBits: 64);
8310 const LLT S32 = LLT::scalar(SizeInBits: 32);
8311 const LLT S1 = LLT::scalar(SizeInBits: 1);
8312
8313 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8314
8315 // For i64 < INT_MAX we simply reuse SITOFP.
8316 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
8317 // saved before division, convert to float by SITOFP, multiply the result
8318 // by 2.
8319 auto One = MIRBuilder.buildConstant(Res: S64, Val: 1);
8320 auto Zero = MIRBuilder.buildConstant(Res: S64, Val: 0);
8321 // Result if Src < INT_MAX
8322 auto SmallResult = MIRBuilder.buildSITOFP(Dst: S32, Src0: Src);
8323 // Result if Src >= INT_MAX
8324 auto Halved = MIRBuilder.buildLShr(Dst: S64, Src0: Src, Src1: One);
8325 auto LowerBit = MIRBuilder.buildAnd(Dst: S64, Src0: Src, Src1: One);
8326 auto RoundedHalved = MIRBuilder.buildOr(Dst: S64, Src0: Halved, Src1: LowerBit);
8327 auto HalvedFP = MIRBuilder.buildSITOFP(Dst: S32, Src0: RoundedHalved);
8328 auto LargeResult = MIRBuilder.buildFAdd(Dst: S32, Src0: HalvedFP, Src1: HalvedFP);
8329 // Check if the original value is larger than INT_MAX by comparing with
8330 // zero to pick one of the two conversions.
8331 auto IsLarge =
8332 MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_SLT, Res: S1, Op0: Src, Op1: Zero);
8333 MIRBuilder.buildSelect(Res: Dst, Tst: IsLarge, Op0: LargeResult, Op1: SmallResult);
8334
8335 MI.eraseFromParent();
8336 return Legalized;
8337}
8338
8339// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
8340// IEEE double representation.
8341LegalizerHelper::LegalizeResult
8342LegalizerHelper::lowerU64ToF64BitFloatOps(MachineInstr &MI) {
8343 auto [Dst, Src] = MI.getFirst2Regs();
8344 const LLT S64 = LLT::scalar(SizeInBits: 64);
8345 const LLT S32 = LLT::scalar(SizeInBits: 32);
8346
8347 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
8348
8349 // We create double value from 32 bit parts with 32 exponent difference.
8350 // Note that + and - are float operations that adjust the implicit leading
8351 // one, the bases 2^52 and 2^84 are for illustrative purposes.
8352 //
8353 // X = 2^52 * 1.0...LowBits
8354 // Y = 2^84 * 1.0...HighBits
8355 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
8356 // = - 2^52 * 1.0...HighBits
8357 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
8358 auto TwoP52 = MIRBuilder.buildConstant(Res: S64, UINT64_C(0x4330000000000000));
8359 auto TwoP84 = MIRBuilder.buildConstant(Res: S64, UINT64_C(0x4530000000000000));
8360 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
8361 auto TwoP52P84FP = MIRBuilder.buildFConstant(Res: S64, Val: TwoP52P84);
8362 auto HalfWidth = MIRBuilder.buildConstant(Res: S64, Val: 32);
8363
8364 auto LowBits = MIRBuilder.buildTrunc(Res: S32, Op: Src);
8365 LowBits = MIRBuilder.buildZExt(Res: S64, Op: LowBits);
8366 auto LowBitsFP = MIRBuilder.buildOr(Dst: S64, Src0: TwoP52, Src1: LowBits);
8367 auto HighBits = MIRBuilder.buildLShr(Dst: S64, Src0: Src, Src1: HalfWidth);
8368 auto HighBitsFP = MIRBuilder.buildOr(Dst: S64, Src0: TwoP84, Src1: HighBits);
8369 auto Scratch = MIRBuilder.buildFSub(Dst: S64, Src0: HighBitsFP, Src1: TwoP52P84FP);
8370 MIRBuilder.buildFAdd(Dst, Src0: Scratch, Src1: LowBitsFP);
8371
8372 MI.eraseFromParent();
8373 return Legalized;
8374}
8375
8376/// i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16. We cannot
8377/// convert fpround f64->f16 without double-rounding, so we manually perform the
8378/// lowering here where we know it is valid.
8379static LegalizerHelper::LegalizeResult
8380loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src,
8381 LLT SrcTy, MachineIRBuilder &MIRBuilder) {
8382 auto DstFpTy =
8383 SrcTy.changeElementType(NewEltTy: LLT::floatIEEE(SizeInBits: SrcTy.getScalarSizeInBits()));
8384 auto M1 = MI.getOpcode() == TargetOpcode::G_UITOFP
8385 ? MIRBuilder.buildUITOFP(Dst: DstFpTy, Src0: Src)
8386 : MIRBuilder.buildSITOFP(Dst: DstFpTy, Src0: Src);
8387 LLT F32Ty = DstFpTy.changeElementSize(NewEltSize: 32);
8388 auto M2 = MIRBuilder.buildFPTrunc(Res: F32Ty, Op: M1);
8389 MIRBuilder.buildFPTrunc(Res: Dst, Op: M2);
8390 MI.eraseFromParent();
8391 return LegalizerHelper::Legalized;
8392}
8393
8394LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
8395 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8396
8397 if (SrcTy == LLT::scalar(SizeInBits: 1)) {
8398 auto True = MIRBuilder.buildFConstant(Res: DstTy, Val: 1.0);
8399 auto False = MIRBuilder.buildFConstant(Res: DstTy, Val: 0.0);
8400 MIRBuilder.buildSelect(Res: Dst, Tst: Src, Op0: True, Op1: False);
8401 MI.eraseFromParent();
8402 return Legalized;
8403 }
8404
8405 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8406 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8407
8408 if (SrcTy != LLT::scalar(SizeInBits: 64))
8409 return UnableToLegalize;
8410
8411 if (DstTy == LLT::scalar(SizeInBits: 32))
8412 // TODO: SelectionDAG has several alternative expansions to port which may
8413 // be more reasonable depending on the available instructions. We also need
8414 // a more advanced mechanism to choose an optimal version depending on
8415 // target features such as sitofp or CTLZ availability.
8416 return lowerU64ToF32WithSITOFP(MI);
8417
8418 if (DstTy == LLT::scalar(SizeInBits: 64))
8419 return lowerU64ToF64BitFloatOps(MI);
8420
8421 return UnableToLegalize;
8422}
8423
8424LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
8425 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8426
8427 const LLT I64 = LLT::integer(SizeInBits: 64);
8428 const LLT I32 = LLT::integer(SizeInBits: 32);
8429 const LLT I1 = LLT::integer(SizeInBits: 1);
8430
8431 if (SrcTy == I1) {
8432 auto True = MIRBuilder.buildFConstant(Res: DstTy, Val: -1.0);
8433 auto False = MIRBuilder.buildFConstant(Res: DstTy, Val: 0.0);
8434 MIRBuilder.buildSelect(Res: Dst, Tst: Src, Op0: True, Op1: False);
8435 MI.eraseFromParent();
8436 return Legalized;
8437 }
8438
8439 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8440 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8441
8442 if (SrcTy != I64)
8443 return UnableToLegalize;
8444
8445 if (DstTy.getScalarSizeInBits() == 32) {
8446 // signed cl2f(long l) {
8447 // long s = l >> 63;
8448 // float r = cul2f((l + s) ^ s);
8449 // return s ? -r : r;
8450 // }
8451 Register L = Src;
8452 auto SignBit = MIRBuilder.buildConstant(Res: I64, Val: 63);
8453 auto S = MIRBuilder.buildAShr(Dst: I64, Src0: L, Src1: SignBit);
8454
8455 auto LPlusS = MIRBuilder.buildAdd(Dst: I64, Src0: L, Src1: S);
8456 auto Xor = MIRBuilder.buildXor(Dst: I64, Src0: LPlusS, Src1: S);
8457 auto R = MIRBuilder.buildUITOFP(Dst: I32, Src0: Xor);
8458
8459 auto RNeg = MIRBuilder.buildFNeg(Dst: I32, Src0: R);
8460 auto SignNotZero = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_NE, Res: I1, Op0: S,
8461 Op1: MIRBuilder.buildConstant(Res: I64, Val: 0));
8462 MIRBuilder.buildSelect(Res: Dst, Tst: SignNotZero, Op0: RNeg, Op1: R);
8463 MI.eraseFromParent();
8464 return Legalized;
8465 }
8466
8467 return UnableToLegalize;
8468}
8469
8470LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
8471 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8472 const LLT S64 = LLT::scalar(SizeInBits: 64);
8473 const LLT S32 = LLT::scalar(SizeInBits: 32);
8474
8475 if (SrcTy != S64 && SrcTy != S32)
8476 return UnableToLegalize;
8477 if (DstTy != S32 && DstTy != S64)
8478 return UnableToLegalize;
8479
8480 // FPTOSI gives same result as FPTOUI for positive signed integers.
8481 // FPTOUI needs to deal with fp values that convert to unsigned integers
8482 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
8483
8484 APInt TwoPExpInt = APInt::getSignMask(BitWidth: DstTy.getSizeInBits());
8485 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
8486 : APFloat::IEEEdouble(),
8487 APInt::getZero(numBits: SrcTy.getSizeInBits()));
8488 TwoPExpFP.convertFromAPInt(Input: TwoPExpInt, IsSigned: false, RM: APFloat::rmNearestTiesToEven);
8489
8490 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(Dst: DstTy, Src0: Src);
8491
8492 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(Res: SrcTy, Val: TwoPExpFP);
8493 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
8494 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
8495 MachineInstrBuilder FSub = MIRBuilder.buildFSub(Dst: SrcTy, Src0: Src, Src1: Threshold);
8496 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(Dst: DstTy, Src0: FSub);
8497 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(Res: DstTy, Val: TwoPExpInt);
8498 MachineInstrBuilder Res = MIRBuilder.buildXor(Dst: DstTy, Src0: ResLowBits, Src1: ResHighBit);
8499
8500 const LLT S1 = LLT::scalar(SizeInBits: 1);
8501
8502 MachineInstrBuilder FCMP =
8503 MIRBuilder.buildFCmp(Pred: CmpInst::FCMP_ULT, Res: S1, Op0: Src, Op1: Threshold);
8504 MIRBuilder.buildSelect(Res: Dst, Tst: FCMP, Op0: FPTOSI, Op1: Res);
8505
8506 MI.eraseFromParent();
8507 return Legalized;
8508}
8509
8510LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
8511 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8512 const LLT S64 = LLT::scalar(SizeInBits: 64);
8513 const LLT S32 = LLT::scalar(SizeInBits: 32);
8514
8515 // FIXME: Only f32 to i64 conversions are supported.
8516 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
8517 return UnableToLegalize;
8518
8519 // Expand f32 -> i64 conversion
8520 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8521 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8522
8523 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8524
8525 auto ExponentMask = MIRBuilder.buildConstant(Res: SrcTy, Val: 0x7F800000);
8526 auto ExponentLoBit = MIRBuilder.buildConstant(Res: SrcTy, Val: 23);
8527
8528 auto AndExpMask = MIRBuilder.buildAnd(Dst: SrcTy, Src0: Src, Src1: ExponentMask);
8529 auto ExponentBits = MIRBuilder.buildLShr(Dst: SrcTy, Src0: AndExpMask, Src1: ExponentLoBit);
8530
8531 auto SignMask = MIRBuilder.buildConstant(Res: SrcTy,
8532 Val: APInt::getSignMask(BitWidth: SrcEltBits));
8533 auto AndSignMask = MIRBuilder.buildAnd(Dst: SrcTy, Src0: Src, Src1: SignMask);
8534 auto SignLowBit = MIRBuilder.buildConstant(Res: SrcTy, Val: SrcEltBits - 1);
8535 auto Sign = MIRBuilder.buildAShr(Dst: SrcTy, Src0: AndSignMask, Src1: SignLowBit);
8536 Sign = MIRBuilder.buildSExt(Res: DstTy, Op: Sign);
8537
8538 auto MantissaMask = MIRBuilder.buildConstant(Res: SrcTy, Val: 0x007FFFFF);
8539 auto AndMantissaMask = MIRBuilder.buildAnd(Dst: SrcTy, Src0: Src, Src1: MantissaMask);
8540 auto K = MIRBuilder.buildConstant(Res: SrcTy, Val: 0x00800000);
8541
8542 auto R = MIRBuilder.buildOr(Dst: SrcTy, Src0: AndMantissaMask, Src1: K);
8543 R = MIRBuilder.buildZExt(Res: DstTy, Op: R);
8544
8545 auto Bias = MIRBuilder.buildConstant(Res: SrcTy, Val: 127);
8546 auto Exponent = MIRBuilder.buildSub(Dst: SrcTy, Src0: ExponentBits, Src1: Bias);
8547 auto SubExponent = MIRBuilder.buildSub(Dst: SrcTy, Src0: Exponent, Src1: ExponentLoBit);
8548 auto ExponentSub = MIRBuilder.buildSub(Dst: SrcTy, Src0: ExponentLoBit, Src1: Exponent);
8549
8550 auto Shl = MIRBuilder.buildShl(Dst: DstTy, Src0: R, Src1: SubExponent);
8551 auto Srl = MIRBuilder.buildLShr(Dst: DstTy, Src0: R, Src1: ExponentSub);
8552
8553 const LLT S1 = LLT::scalar(SizeInBits: 1);
8554 auto CmpGt = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_SGT,
8555 Res: S1, Op0: Exponent, Op1: ExponentLoBit);
8556
8557 R = MIRBuilder.buildSelect(Res: DstTy, Tst: CmpGt, Op0: Shl, Op1: Srl);
8558
8559 auto XorSign = MIRBuilder.buildXor(Dst: DstTy, Src0: R, Src1: Sign);
8560 auto Ret = MIRBuilder.buildSub(Dst: DstTy, Src0: XorSign, Src1: Sign);
8561
8562 auto ZeroSrcTy = MIRBuilder.buildConstant(Res: SrcTy, Val: 0);
8563
8564 auto ExponentLt0 = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_SLT,
8565 Res: S1, Op0: Exponent, Op1: ZeroSrcTy);
8566
8567 auto ZeroDstTy = MIRBuilder.buildConstant(Res: DstTy, Val: 0);
8568 MIRBuilder.buildSelect(Res: Dst, Tst: ExponentLt0, Op0: ZeroDstTy, Op1: Ret);
8569
8570 MI.eraseFromParent();
8571 return Legalized;
8572}
8573
8574LegalizerHelper::LegalizeResult
8575LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
8576 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8577
8578 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8579 unsigned SatWidth = DstTy.getScalarSizeInBits();
8580
8581 // Determine minimum and maximum integer values and their corresponding
8582 // floating-point values.
8583 APInt MinInt, MaxInt;
8584 if (IsSigned) {
8585 MinInt = APInt::getSignedMinValue(numBits: SatWidth);
8586 MaxInt = APInt::getSignedMaxValue(numBits: SatWidth);
8587 } else {
8588 MinInt = APInt::getMinValue(numBits: SatWidth);
8589 MaxInt = APInt::getMaxValue(numBits: SatWidth);
8590 }
8591
8592 const fltSemantics &Semantics = getFltSemanticForLLT(Ty: SrcTy.getScalarType());
8593 APFloat MinFloat(Semantics);
8594 APFloat MaxFloat(Semantics);
8595
8596 APFloat::opStatus MinStatus =
8597 MinFloat.convertFromAPInt(Input: MinInt, IsSigned, RM: APFloat::rmTowardZero);
8598 APFloat::opStatus MaxStatus =
8599 MaxFloat.convertFromAPInt(Input: MaxInt, IsSigned, RM: APFloat::rmTowardZero);
8600 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
8601 !(MaxStatus & APFloat::opStatus::opInexact);
8602
8603 // If the integer bounds are exactly representable as floats, emit a
8604 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
8605 // and selects.
8606 if (AreExactFloatBounds) {
8607 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
8608 auto MaxC = MIRBuilder.buildFConstant(Res: SrcTy, Val: MinFloat);
8609 auto MaxP =
8610 MIRBuilder.buildFCmp(Pred: CmpInst::FCMP_OGT, Res: LLT::integer(SizeInBits: 1), Op0: Src, Op1: MaxC);
8611 auto Max = MIRBuilder.buildSelect(Res: SrcTy, Tst: MaxP, Op0: Src, Op1: MaxC);
8612 // Clamp by MaxFloat from above. NaN cannot occur.
8613 auto MinC = MIRBuilder.buildFConstant(Res: SrcTy, Val: MaxFloat);
8614 auto MinP = MIRBuilder.buildFCmp(Pred: CmpInst::FCMP_OLT, Res: LLT::integer(SizeInBits: 1), Op0: Max,
8615 Op1: MinC, Flags: MachineInstr::FmNoNans);
8616 auto Min =
8617 MIRBuilder.buildSelect(Res: SrcTy, Tst: MinP, Op0: Max, Op1: MinC, Flags: MachineInstr::FmNoNans);
8618 // Convert clamped value to integer. In the unsigned case we're done,
8619 // because we mapped NaN to MinFloat, which will cast to zero.
8620 if (!IsSigned) {
8621 MIRBuilder.buildFPTOUI(Dst, Src0: Min);
8622 MI.eraseFromParent();
8623 return Legalized;
8624 }
8625
8626 // Otherwise, select 0 if Src is NaN.
8627 auto FpToInt = MIRBuilder.buildFPTOSI(Dst: DstTy, Src0: Min);
8628 auto IsZero =
8629 MIRBuilder.buildFCmp(Pred: CmpInst::FCMP_UNO, Res: LLT::integer(SizeInBits: 1), Op0: Src, Op1: Src);
8630 MIRBuilder.buildSelect(Res: Dst, Tst: IsZero, Op0: MIRBuilder.buildConstant(Res: DstTy, Val: 0),
8631 Op1: FpToInt);
8632 MI.eraseFromParent();
8633 return Legalized;
8634 }
8635
8636 // Result of direct conversion. The assumption here is that the operation is
8637 // non-trapping and it's fine to apply it to an out-of-range value if we
8638 // select it away later.
8639 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(Dst: DstTy, Src0: Src)
8640 : MIRBuilder.buildFPTOUI(Dst: DstTy, Src0: Src);
8641
8642 // If Src ULT MinFloat, select MinInt. In particular, this also selects
8643 // MinInt if Src is NaN.
8644 auto ULT = MIRBuilder.buildFCmp(Pred: CmpInst::FCMP_ULT, Res: LLT::integer(SizeInBits: 1), Op0: Src,
8645 Op1: MIRBuilder.buildFConstant(Res: SrcTy, Val: MinFloat));
8646 auto Max = MIRBuilder.buildSelect(
8647 Res: DstTy, Tst: ULT, Op0: MIRBuilder.buildConstant(Res: DstTy, Val: MinInt), Op1: FpToInt);
8648 // If Src OGT MaxFloat, select MaxInt.
8649 auto OGT = MIRBuilder.buildFCmp(Pred: CmpInst::FCMP_OGT, Res: LLT::integer(SizeInBits: 1), Op0: Src,
8650 Op1: MIRBuilder.buildFConstant(Res: SrcTy, Val: MaxFloat));
8651
8652 // In the unsigned case we are done, because we mapped NaN to MinInt, which
8653 // is already zero.
8654 if (!IsSigned) {
8655 MIRBuilder.buildSelect(Res: Dst, Tst: OGT, Op0: MIRBuilder.buildConstant(Res: DstTy, Val: MaxInt),
8656 Op1: Max);
8657 MI.eraseFromParent();
8658 return Legalized;
8659 }
8660
8661 // Otherwise, select 0 if Src is NaN.
8662 auto Min = MIRBuilder.buildSelect(
8663 Res: DstTy, Tst: OGT, Op0: MIRBuilder.buildConstant(Res: DstTy, Val: MaxInt), Op1: Max);
8664 auto IsZero =
8665 MIRBuilder.buildFCmp(Pred: CmpInst::FCMP_UNO, Res: LLT::integer(SizeInBits: 1), Op0: Src, Op1: Src);
8666 MIRBuilder.buildSelect(Res: Dst, Tst: IsZero, Op0: MIRBuilder.buildConstant(Res: DstTy, Val: 0), Op1: Min);
8667 MI.eraseFromParent();
8668 return Legalized;
8669}
8670
8671// Floating-point conversions using truncating and extending loads and stores.
8672LegalizerHelper::LegalizeResult
8673LegalizerHelper::lowerFPExtAndTruncMem(MachineInstr &MI) {
8674 assert((MI.getOpcode() == TargetOpcode::G_FPEXT ||
8675 MI.getOpcode() == TargetOpcode::G_FPTRUNC) &&
8676 "Only G_FPEXT and G_FPTRUNC are expected");
8677
8678 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
8679 MachinePointerInfo PtrInfo;
8680 unsigned StoreOpc;
8681 unsigned LoadOpc;
8682 LLT StackTy;
8683 if (MI.getOpcode() == TargetOpcode::G_FPEXT) {
8684 StackTy = SrcTy;
8685 StoreOpc = TargetOpcode::G_STORE;
8686 LoadOpc = TargetOpcode::G_FPEXTLOAD;
8687 } else {
8688 StackTy = DstTy;
8689 StoreOpc = TargetOpcode::G_FPTRUNCSTORE;
8690 LoadOpc = TargetOpcode::G_LOAD;
8691 }
8692
8693 Align StackTyAlign = getStackTemporaryAlignment(Ty: StackTy);
8694 auto StackTemp =
8695 createStackTemporary(Bytes: StackTy.getSizeInBytes(), Alignment: StackTyAlign, PtrInfo);
8696
8697 MachineFunction &MF = MIRBuilder.getMF();
8698 auto *StoreMMO = MF.getMachineMemOperand(PtrInfo, f: MachineMemOperand::MOStore,
8699 MemTy: StackTy, base_alignment: StackTyAlign);
8700 MIRBuilder.buildStoreInstr(Opcode: StoreOpc, Val: SrcReg, Addr: StackTemp, MMO&: *StoreMMO);
8701
8702 auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, f: MachineMemOperand::MOLoad,
8703 MemTy: StackTy, base_alignment: StackTyAlign);
8704 MIRBuilder.buildLoadInstr(Opcode: LoadOpc, Res: DstReg, Addr: StackTemp, MMO&: *LoadMMO);
8705
8706 MI.eraseFromParent();
8707 return Legalized;
8708}
8709
8710// f64 -> f16 conversion using round-to-nearest-even rounding mode.
8711LegalizerHelper::LegalizeResult
8712LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
8713 const LLT S1 = LLT::scalar(SizeInBits: 1);
8714 const LLT S32 = LLT::scalar(SizeInBits: 32);
8715
8716 auto [Dst, Src] = MI.getFirst2Regs();
8717 assert(MRI.getType(Dst).getScalarType() == LLT::float16() &&
8718 MRI.getType(Src).getScalarType() == LLT::float64());
8719
8720 if (MRI.getType(Reg: Src).isVector()) // TODO: Handle vectors directly.
8721 return UnableToLegalize;
8722
8723 if (MI.getFlag(Flag: MachineInstr::FmAfn)) {
8724 unsigned Flags = MI.getFlags();
8725 auto Src32 = MIRBuilder.buildFPTrunc(Res: S32, Op: Src, Flags);
8726 MIRBuilder.buildFPTrunc(Res: Dst, Op: Src32, Flags);
8727 MI.eraseFromParent();
8728 return Legalized;
8729 }
8730
8731 const unsigned ExpMask = 0x7ff;
8732 const unsigned ExpBiasf64 = 1023;
8733 const unsigned ExpBiasf16 = 15;
8734
8735 auto Unmerge = MIRBuilder.buildUnmerge(Res: S32, Op: Src);
8736 Register U = Unmerge.getReg(Idx: 0);
8737 Register UH = Unmerge.getReg(Idx: 1);
8738
8739 auto E = MIRBuilder.buildLShr(Dst: S32, Src0: UH, Src1: MIRBuilder.buildConstant(Res: S32, Val: 20));
8740 E = MIRBuilder.buildAnd(Dst: S32, Src0: E, Src1: MIRBuilder.buildConstant(Res: S32, Val: ExpMask));
8741
8742 // Subtract the fp64 exponent bias (1023) to get the real exponent and
8743 // add the f16 bias (15) to get the biased exponent for the f16 format.
8744 E = MIRBuilder.buildAdd(
8745 Dst: S32, Src0: E, Src1: MIRBuilder.buildConstant(Res: S32, Val: -ExpBiasf64 + ExpBiasf16));
8746
8747 auto M = MIRBuilder.buildLShr(Dst: S32, Src0: UH, Src1: MIRBuilder.buildConstant(Res: S32, Val: 8));
8748 M = MIRBuilder.buildAnd(Dst: S32, Src0: M, Src1: MIRBuilder.buildConstant(Res: S32, Val: 0xffe));
8749
8750 auto MaskedSig = MIRBuilder.buildAnd(Dst: S32, Src0: UH,
8751 Src1: MIRBuilder.buildConstant(Res: S32, Val: 0x1ff));
8752 MaskedSig = MIRBuilder.buildOr(Dst: S32, Src0: MaskedSig, Src1: U);
8753
8754 auto Zero = MIRBuilder.buildConstant(Res: S32, Val: 0);
8755 auto SigCmpNE0 = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_NE, Res: S1, Op0: MaskedSig, Op1: Zero);
8756 auto Lo40Set = MIRBuilder.buildZExt(Res: S32, Op: SigCmpNE0);
8757 M = MIRBuilder.buildOr(Dst: S32, Src0: M, Src1: Lo40Set);
8758
8759 // (M != 0 ? 0x0200 : 0) | 0x7c00;
8760 auto Bits0x200 = MIRBuilder.buildConstant(Res: S32, Val: 0x0200);
8761 auto CmpM_NE0 = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_NE, Res: S1, Op0: M, Op1: Zero);
8762 auto SelectCC = MIRBuilder.buildSelect(Res: S32, Tst: CmpM_NE0, Op0: Bits0x200, Op1: Zero);
8763
8764 auto Bits0x7c00 = MIRBuilder.buildConstant(Res: S32, Val: 0x7c00);
8765 auto I = MIRBuilder.buildOr(Dst: S32, Src0: SelectCC, Src1: Bits0x7c00);
8766
8767 // N = M | (E << 12);
8768 auto EShl12 = MIRBuilder.buildShl(Dst: S32, Src0: E, Src1: MIRBuilder.buildConstant(Res: S32, Val: 12));
8769 auto N = MIRBuilder.buildOr(Dst: S32, Src0: M, Src1: EShl12);
8770
8771 // B = clamp(1-E, 0, 13);
8772 auto One = MIRBuilder.buildConstant(Res: S32, Val: 1);
8773 auto OneSubExp = MIRBuilder.buildSub(Dst: S32, Src0: One, Src1: E);
8774 auto B = MIRBuilder.buildSMax(Dst: S32, Src0: OneSubExp, Src1: Zero);
8775 B = MIRBuilder.buildSMin(Dst: S32, Src0: B, Src1: MIRBuilder.buildConstant(Res: S32, Val: 13));
8776
8777 auto SigSetHigh = MIRBuilder.buildOr(Dst: S32, Src0: M,
8778 Src1: MIRBuilder.buildConstant(Res: S32, Val: 0x1000));
8779
8780 auto D = MIRBuilder.buildLShr(Dst: S32, Src0: SigSetHigh, Src1: B);
8781 auto D0 = MIRBuilder.buildShl(Dst: S32, Src0: D, Src1: B);
8782
8783 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_NE, Res: S1,
8784 Op0: D0, Op1: SigSetHigh);
8785 auto D1 = MIRBuilder.buildZExt(Res: S32, Op: D0_NE_SigSetHigh);
8786 D = MIRBuilder.buildOr(Dst: S32, Src0: D, Src1: D1);
8787
8788 auto CmpELtOne = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_SLT, Res: S1, Op0: E, Op1: One);
8789 auto V = MIRBuilder.buildSelect(Res: S32, Tst: CmpELtOne, Op0: D, Op1: N);
8790
8791 auto VLow3 = MIRBuilder.buildAnd(Dst: S32, Src0: V, Src1: MIRBuilder.buildConstant(Res: S32, Val: 7));
8792 V = MIRBuilder.buildLShr(Dst: S32, Src0: V, Src1: MIRBuilder.buildConstant(Res: S32, Val: 2));
8793
8794 auto VLow3Eq3 = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_EQ, Res: S1, Op0: VLow3,
8795 Op1: MIRBuilder.buildConstant(Res: S32, Val: 3));
8796 auto V0 = MIRBuilder.buildZExt(Res: S32, Op: VLow3Eq3);
8797
8798 auto VLow3Gt5 = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_SGT, Res: S1, Op0: VLow3,
8799 Op1: MIRBuilder.buildConstant(Res: S32, Val: 5));
8800 auto V1 = MIRBuilder.buildZExt(Res: S32, Op: VLow3Gt5);
8801
8802 V1 = MIRBuilder.buildOr(Dst: S32, Src0: V0, Src1: V1);
8803 V = MIRBuilder.buildAdd(Dst: S32, Src0: V, Src1: V1);
8804
8805 auto CmpEGt30 = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_SGT, Res: S1,
8806 Op0: E, Op1: MIRBuilder.buildConstant(Res: S32, Val: 30));
8807 V = MIRBuilder.buildSelect(Res: S32, Tst: CmpEGt30,
8808 Op0: MIRBuilder.buildConstant(Res: S32, Val: 0x7c00), Op1: V);
8809
8810 auto CmpEGt1039 = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_EQ, Res: S1,
8811 Op0: E, Op1: MIRBuilder.buildConstant(Res: S32, Val: 1039));
8812 V = MIRBuilder.buildSelect(Res: S32, Tst: CmpEGt1039, Op0: I, Op1: V);
8813
8814 // Extract the sign bit.
8815 auto Sign = MIRBuilder.buildLShr(Dst: S32, Src0: UH, Src1: MIRBuilder.buildConstant(Res: S32, Val: 16));
8816 Sign = MIRBuilder.buildAnd(Dst: S32, Src0: Sign, Src1: MIRBuilder.buildConstant(Res: S32, Val: 0x8000));
8817
8818 // Insert the sign bit
8819 V = MIRBuilder.buildOr(Dst: S32, Src0: Sign, Src1: V);
8820
8821 MIRBuilder.buildTrunc(Res: Dst, Op: V);
8822 MI.eraseFromParent();
8823 return Legalized;
8824}
8825
8826// f32 -> bf16 conversion using round-to-nearest-even rounding mode.
8827LegalizerHelper::LegalizeResult
8828LegalizerHelper::lowerFPTRUNC_F32_TO_BF16(MachineInstr &MI) {
8829 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
8830 assert(DstTy.getScalarType() == LLT::bfloat16() &&
8831 SrcTy.getScalarType() == LLT::float32());
8832
8833 LLT I1Ty = SrcTy.changeElementType(NewEltTy: LLT::integer(SizeInBits: 1));
8834 LLT I16Ty = SrcTy.changeElementType(NewEltTy: LLT::integer(SizeInBits: 16));
8835 LLT I32Ty = SrcTy.changeElementType(NewEltTy: LLT::integer(SizeInBits: 32));
8836
8837 auto IsNaN = MIRBuilder.buildFCmp(Pred: CmpInst::FCMP_UNO, Res: I1Ty, Op0: SrcReg,
8838 Op1: MIRBuilder.buildFConstant(Res: SrcTy, Val: 0));
8839 auto SrcI = MIRBuilder.buildBitcast(Dst: I32Ty, Src: SrcReg);
8840
8841 // Conversions should set NaN's quiet bit. This also prevents NaNs from
8842 // turning into infinities.
8843 auto NaN = MIRBuilder.buildOr(Dst: I32Ty, Src0: SrcI,
8844 Src1: MIRBuilder.buildConstant(Res: I32Ty, Val: 0x400000));
8845
8846 // Factor in the contribution of the low 16 bits.
8847 auto Lsb =
8848 MIRBuilder.buildLShr(Dst: I32Ty, Src0: SrcI, Src1: MIRBuilder.buildConstant(Res: I32Ty, Val: 16));
8849 Lsb = MIRBuilder.buildAnd(Dst: I32Ty, Src0: Lsb, Src1: MIRBuilder.buildConstant(Res: I32Ty, Val: 1));
8850 auto RoundingBias =
8851 MIRBuilder.buildAdd(Dst: I32Ty, Src0: Lsb, Src1: MIRBuilder.buildConstant(Res: I32Ty, Val: 0x7fff));
8852 auto Add = MIRBuilder.buildAdd(Dst: I32Ty, Src0: SrcI, Src1: RoundingBias);
8853
8854 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
8855 // 0x80000000.
8856 auto Sel = MIRBuilder.buildSelect(Res: I32Ty, Tst: IsNaN, Op0: NaN, Op1: Add);
8857
8858 // Now that we have rounded, shift the bits into position.
8859 auto Srl =
8860 MIRBuilder.buildLShr(Dst: I32Ty, Src0: Sel, Src1: MIRBuilder.buildConstant(Res: I32Ty, Val: 16));
8861 auto Trunc = MIRBuilder.buildTrunc(Res: I16Ty, Op: Srl);
8862 MIRBuilder.buildBitcast(Dst: DstReg, Src: Trunc);
8863 MI.eraseFromParent();
8864 return Legalized;
8865}
8866
8867LegalizerHelper::LegalizeResult
8868LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
8869 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
8870 if (DstTy.getScalarType().isFloat16() && SrcTy.getScalarType().isFloat64())
8871 return lowerFPTRUNC_F64_TO_F16(MI);
8872
8873 if (DstTy.getScalarType().isBFloat16() && SrcTy.getScalarType().isFloat32())
8874 return lowerFPTRUNC_F32_TO_BF16(MI);
8875
8876 return lowerFPExtAndTruncMem(MI);
8877}
8878
8879LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
8880 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8881 LLT Ty = MRI.getType(Reg: Dst);
8882
8883 auto CvtSrc1 = MIRBuilder.buildSITOFP(Dst: Ty, Src0: Src1);
8884 MIRBuilder.buildFPow(Dst, Src0, Src1: CvtSrc1, Flags: MI.getFlags());
8885 MI.eraseFromParent();
8886 return Legalized;
8887}
8888
8889LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMODF(MachineInstr &MI) {
8890 auto [DstFrac, DstInt, Src] = MI.getFirst3Regs();
8891 LLT Ty = MRI.getType(Reg: Src);
8892 auto Flags = MI.getFlags();
8893 const LLT CondTy = Ty.changeElementType(NewEltTy: LLT::integer(SizeInBits: 1));
8894
8895 auto IntPart = MIRBuilder.buildIntrinsicTrunc(Dst: Ty, Src0: Src, Flags);
8896 auto FracPart = MIRBuilder.buildFSub(Dst: Ty, Src0: Src, Src1: IntPart, Flags);
8897
8898 Register FracToUse;
8899 if (MI.getFlag(Flag: MachineInstr::FmNoInfs)) {
8900 FracToUse = FracPart.getReg(Idx: 0);
8901 } else {
8902 auto Abs = MIRBuilder.buildFAbs(Dst: Ty, Src0: Src, Flags);
8903 const fltSemantics &Semantics = getFltSemanticForLLT(Ty: Ty.getScalarType());
8904 auto Inf = MIRBuilder.buildFConstant(Res: Ty, Val: APFloat::getInf(Sem: Semantics));
8905 auto IsInf = MIRBuilder.buildFCmp(Pred: CmpInst::FCMP_OEQ, Res: CondTy, Op0: Abs, Op1: Inf);
8906 auto Zero = MIRBuilder.buildFConstant(Res: Ty, Val: 0.0);
8907 auto Select = MIRBuilder.buildSelect(Res: Ty, Tst: IsInf, Op0: Zero, Op1: FracPart);
8908 FracToUse = Select.getReg(Idx: 0);
8909 }
8910
8911 MIRBuilder.buildFCopysign(Dst: DstFrac, Src0: FracToUse, Src1: Src, Flags);
8912 MIRBuilder.buildCopy(Res: DstInt, Op: IntPart.getReg(Idx: 0));
8913
8914 MI.eraseFromParent();
8915 return Legalized;
8916}
8917
8918static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
8919 switch (Opc) {
8920 case TargetOpcode::G_SMIN:
8921 return CmpInst::ICMP_SLT;
8922 case TargetOpcode::G_SMAX:
8923 return CmpInst::ICMP_SGT;
8924 case TargetOpcode::G_UMIN:
8925 return CmpInst::ICMP_ULT;
8926 case TargetOpcode::G_UMAX:
8927 return CmpInst::ICMP_UGT;
8928 default:
8929 llvm_unreachable("not in integer min/max");
8930 }
8931}
8932
8933LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
8934 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8935
8936 const CmpInst::Predicate Pred = minMaxToCompare(Opc: MI.getOpcode());
8937 LLT CmpType = MRI.getType(Reg: Dst).changeElementType(NewEltTy: LLT::integer(SizeInBits: 1));
8938
8939 auto Cmp = MIRBuilder.buildICmp(Pred, Res: CmpType, Op0: Src0, Op1: Src1);
8940 MIRBuilder.buildSelect(Res: Dst, Tst: Cmp, Op0: Src0, Op1: Src1);
8941
8942 MI.eraseFromParent();
8943 return Legalized;
8944}
8945
8946LegalizerHelper::LegalizeResult
8947LegalizerHelper::lowerThreewayCompare(MachineInstr &MI) {
8948 GSUCmp *Cmp = cast<GSUCmp>(Val: &MI);
8949
8950 Register Dst = Cmp->getReg(Idx: 0);
8951 LLT DstTy = MRI.getType(Reg: Dst);
8952 LLT SrcTy = MRI.getType(Reg: Cmp->getReg(Idx: 1));
8953 LLT CmpTy = DstTy.changeElementSize(NewEltSize: 1);
8954
8955 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8956 ? CmpInst::Predicate::ICMP_SLT
8957 : CmpInst::Predicate::ICMP_ULT;
8958 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8959 ? CmpInst::Predicate::ICMP_SGT
8960 : CmpInst::Predicate::ICMP_UGT;
8961
8962 auto Zero = MIRBuilder.buildConstant(Res: DstTy, Val: 0);
8963 auto IsGT = MIRBuilder.buildICmp(Pred: GTPredicate, Res: CmpTy, Op0: Cmp->getLHSReg(),
8964 Op1: Cmp->getRHSReg());
8965 auto IsLT = MIRBuilder.buildICmp(Pred: LTPredicate, Res: CmpTy, Op0: Cmp->getLHSReg(),
8966 Op1: Cmp->getRHSReg());
8967
8968 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8969 auto BC = TLI.getBooleanContents(isVec: DstTy.isVector(), /*isFP=*/isFloat: false);
8970 if (TLI.preferSelectsOverBooleanArithmetic(
8971 VT: getApproximateEVTForLLT(Ty: SrcTy, Ctx)) ||
8972 BC == TargetLowering::UndefinedBooleanContent) {
8973 auto One = MIRBuilder.buildConstant(Res: DstTy, Val: 1);
8974 auto SelectZeroOrOne = MIRBuilder.buildSelect(Res: DstTy, Tst: IsGT, Op0: One, Op1: Zero);
8975
8976 auto MinusOne = MIRBuilder.buildConstant(Res: DstTy, Val: -1);
8977 MIRBuilder.buildSelect(Res: Dst, Tst: IsLT, Op0: MinusOne, Op1: SelectZeroOrOne);
8978 } else {
8979 if (BC == TargetLowering::ZeroOrNegativeOneBooleanContent)
8980 std::swap(a&: IsGT, b&: IsLT);
8981 // Extend boolean results to DstTy, which is at least i2, before subtracting
8982 // them.
8983 unsigned BoolExtOp =
8984 MIRBuilder.getBoolExtOp(IsVec: DstTy.isVector(), /*isFP=*/IsFP: false);
8985 IsGT = MIRBuilder.buildInstr(Opc: BoolExtOp, DstOps: {DstTy}, SrcOps: {IsGT});
8986 IsLT = MIRBuilder.buildInstr(Opc: BoolExtOp, DstOps: {DstTy}, SrcOps: {IsLT});
8987 MIRBuilder.buildSub(Dst, Src0: IsGT, Src1: IsLT);
8988 }
8989
8990 MI.eraseFromParent();
8991 return Legalized;
8992}
8993
8994LegalizerHelper::LegalizeResult
8995LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
8996 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
8997 const int Src0Size = Src0Ty.getScalarSizeInBits();
8998 const int Src1Size = Src1Ty.getScalarSizeInBits();
8999
9000 LLT DstIntTy =
9001 DstTy.changeElementType(NewEltTy: LLT::integer(SizeInBits: DstTy.getScalarSizeInBits()));
9002 LLT Src0IntTy = Src0Ty.changeElementType(NewEltTy: LLT::integer(SizeInBits: Src0Size));
9003 LLT Src1IntTy = Src1Ty.changeElementType(NewEltTy: LLT::integer(SizeInBits: Src1Size));
9004
9005 Register Src0Int = Src0;
9006 Register Src1Int = Src1;
9007
9008 if (!(Src0Ty.getScalarType().isAnyScalar() ||
9009 Src0Ty.getScalarType().isInteger()))
9010 Src0Int = MIRBuilder.buildBitcast(Dst: Src0IntTy, Src: Src0).getReg(Idx: 0);
9011
9012 if (!(Src1Ty.getScalarType().isAnyScalar() ||
9013 Src1Ty.getScalarType().isInteger()))
9014 Src1Int = MIRBuilder.buildBitcast(Dst: Src1IntTy, Src: Src1).getReg(Idx: 0);
9015
9016 auto SignBitMask =
9017 MIRBuilder.buildConstant(Res: Src0IntTy, Val: APInt::getSignMask(BitWidth: Src0Size));
9018
9019 auto NotSignBitMask = MIRBuilder.buildConstant(
9020 Res: Src0IntTy, Val: APInt::getLowBitsSet(numBits: Src0Size, loBitsSet: Src0Size - 1));
9021
9022 Register And0 =
9023 MIRBuilder.buildAnd(Dst: Src0IntTy, Src0: Src0Int, Src1: NotSignBitMask).getReg(Idx: 0);
9024 Register And1;
9025 if (Src0Ty == Src1Ty) {
9026 And1 = MIRBuilder.buildAnd(Dst: Src1IntTy, Src0: Src1Int, Src1: SignBitMask).getReg(Idx: 0);
9027 } else if (Src0Size > Src1Size) {
9028 auto ShiftAmt = MIRBuilder.buildConstant(Res: Src0IntTy, Val: Src0Size - Src1Size);
9029 auto Zext = MIRBuilder.buildZExt(Res: Src0IntTy, Op: Src1Int);
9030 auto Shift = MIRBuilder.buildShl(Dst: Src0IntTy, Src0: Zext, Src1: ShiftAmt);
9031 And1 = MIRBuilder.buildAnd(Dst: Src0Ty, Src0: Shift, Src1: SignBitMask).getReg(Idx: 0);
9032 } else {
9033 auto ShiftAmt = MIRBuilder.buildConstant(Res: Src1IntTy, Val: Src1Size - Src0Size);
9034 auto Shift = MIRBuilder.buildLShr(Dst: Src1IntTy, Src0: Src1Int, Src1: ShiftAmt);
9035 auto Trunc = MIRBuilder.buildTrunc(Res: Src0IntTy, Op: Shift);
9036 And1 = MIRBuilder.buildAnd(Dst: Src0IntTy, Src0: Trunc, Src1: SignBitMask).getReg(Idx: 0);
9037 }
9038
9039 // Be careful about setting nsz/nnan/ninf on every instruction, since the
9040 // constants are a nan and -0.0, but the final result should preserve
9041 // everything.
9042 unsigned Flags = MI.getFlags();
9043
9044 // We masked the sign bit and the not-sign bit, so these are disjoint.
9045 Flags |= MachineInstr::Disjoint;
9046
9047 if (DstTy == DstIntTy)
9048 MIRBuilder.buildOr(Dst, Src0: And0, Src1: And1, Flags).getReg(Idx: 0);
9049 else {
9050 Register NewDst = MIRBuilder.buildOr(Dst: DstIntTy, Src0: And0, Src1: And1, Flags).getReg(Idx: 0);
9051 MIRBuilder.buildBitcast(Dst, Src: NewDst);
9052 }
9053
9054 MI.eraseFromParent();
9055 return Legalized;
9056}
9057
9058LegalizerHelper::LegalizeResult
9059LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
9060 // FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
9061 // identical handling. fminimumnum/fmaximumnum also need a path that do not
9062 // depend on fminnum/fmaxnum.
9063
9064 unsigned NewOp;
9065 switch (MI.getOpcode()) {
9066 case TargetOpcode::G_FMINNUM:
9067 NewOp = TargetOpcode::G_FMINNUM_IEEE;
9068 break;
9069 case TargetOpcode::G_FMINIMUMNUM:
9070 NewOp = TargetOpcode::G_FMINNUM;
9071 break;
9072 case TargetOpcode::G_FMAXNUM:
9073 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
9074 break;
9075 case TargetOpcode::G_FMAXIMUMNUM:
9076 NewOp = TargetOpcode::G_FMAXNUM;
9077 break;
9078 default:
9079 llvm_unreachable("unexpected min/max opcode");
9080 }
9081
9082 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
9083 LLT Ty = MRI.getType(Reg: Dst);
9084
9085 if (!MI.getFlag(Flag: MachineInstr::FmNoNans)) {
9086 // Insert canonicalizes if it's possible we need to quiet to get correct
9087 // sNaN behavior.
9088
9089 // Note this must be done here, and not as an optimization combine in the
9090 // absence of a dedicate quiet-snan instruction as we're using an
9091 // omni-purpose G_FCANONICALIZE.
9092 if (!VT->isKnownNeverSNaN(Val: Src0))
9093 Src0 = MIRBuilder.buildFCanonicalize(Dst: Ty, Src0, Flags: MI.getFlags()).getReg(Idx: 0);
9094
9095 if (!VT->isKnownNeverSNaN(Val: Src1))
9096 Src1 = MIRBuilder.buildFCanonicalize(Dst: Ty, Src0: Src1, Flags: MI.getFlags()).getReg(Idx: 0);
9097 }
9098
9099 // If there are no nans, it's safe to simply replace this with the non-IEEE
9100 // version.
9101 MIRBuilder.buildInstr(Opc: NewOp, DstOps: {Dst}, SrcOps: {Src0, Src1}, Flags: MI.getFlags());
9102 MI.eraseFromParent();
9103 return Legalized;
9104}
9105
9106LegalizerHelper::LegalizeResult
9107LegalizerHelper::lowerFMinimumMaximum(MachineInstr &MI) {
9108 unsigned Opc = MI.getOpcode();
9109 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
9110 LLT Ty = MRI.getType(Reg: Dst);
9111 const LLT CmpTy = Ty.changeElementType(NewEltTy: LLT::integer(SizeInBits: 1));
9112
9113 bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM);
9114 unsigned OpcIeee =
9115 IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
9116 unsigned OpcNonIeee =
9117 IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
9118 bool MinMaxMustRespectOrderedZero = false;
9119 Register Res;
9120
9121 // IEEE variants don't need canonicalization
9122 if (LI.isLegalOrCustom(Query: {OpcIeee, Ty})) {
9123 Res = MIRBuilder.buildInstr(Opc: OpcIeee, DstOps: {Ty}, SrcOps: {Src0, Src1}).getReg(Idx: 0);
9124 MinMaxMustRespectOrderedZero = true;
9125 } else if (LI.isLegalOrCustom(Query: {OpcNonIeee, Ty})) {
9126 Res = MIRBuilder.buildInstr(Opc: OpcNonIeee, DstOps: {Ty}, SrcOps: {Src0, Src1}).getReg(Idx: 0);
9127 } else {
9128 auto Compare = MIRBuilder.buildFCmp(
9129 Pred: IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, Res: CmpTy, Op0: Src0, Op1: Src1);
9130 Res = MIRBuilder.buildSelect(Res: Ty, Tst: Compare, Op0: Src0, Op1: Src1).getReg(Idx: 0);
9131 }
9132
9133 // Propagate any NaN of both operands
9134 if (!MI.getFlag(Flag: MachineInstr::FmNoNans) &&
9135 (!VT->isKnownNeverNaN(Val: Src0) || !VT->isKnownNeverNaN(Val: Src1))) {
9136 auto IsOrdered = MIRBuilder.buildFCmp(Pred: CmpInst::FCMP_ORD, Res: CmpTy, Op0: Src0, Op1: Src1);
9137
9138 LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType();
9139 APFloat NaNValue = APFloat::getNaN(Sem: getFltSemanticForLLT(Ty: ElementTy));
9140 Register NaN = MIRBuilder.buildFConstant(Res: ElementTy, Val: NaNValue).getReg(Idx: 0);
9141 if (Ty.isVector())
9142 NaN = MIRBuilder.buildSplatBuildVector(Res: Ty, Src: NaN).getReg(Idx: 0);
9143
9144 Res = MIRBuilder.buildSelect(Res: Ty, Tst: IsOrdered, Op0: Res, Op1: NaN).getReg(Idx: 0);
9145 }
9146
9147 // fminimum/fmaximum requires -0.0 less than +0.0
9148 if (!MinMaxMustRespectOrderedZero && !MI.getFlag(Flag: MachineInstr::FmNsz)) {
9149 GISelValueTracking VT(MIRBuilder.getMF());
9150 KnownFPClass Src0Info = VT.computeKnownFPClass(R: Src0, InterestedClasses: fcZero);
9151 KnownFPClass Src1Info = VT.computeKnownFPClass(R: Src1, InterestedClasses: fcZero);
9152
9153 if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) {
9154 const unsigned Flags = MI.getFlags();
9155 Register Zero = MIRBuilder.buildFConstant(Res: Ty, Val: 0.0).getReg(Idx: 0);
9156 auto IsZero = MIRBuilder.buildFCmp(Pred: CmpInst::FCMP_OEQ, Res: CmpTy, Op0: Res, Op1: Zero);
9157
9158 unsigned TestClass = IsMax ? fcPosZero : fcNegZero;
9159
9160 auto LHSTestZero = MIRBuilder.buildIsFPClass(Res: CmpTy, Src: Src0, Mask: TestClass);
9161 auto LHSSelect =
9162 MIRBuilder.buildSelect(Res: Ty, Tst: LHSTestZero, Op0: Src0, Op1: Res, Flags);
9163
9164 auto RHSTestZero = MIRBuilder.buildIsFPClass(Res: CmpTy, Src: Src1, Mask: TestClass);
9165 auto RHSSelect =
9166 MIRBuilder.buildSelect(Res: Ty, Tst: RHSTestZero, Op0: Src1, Op1: LHSSelect, Flags);
9167
9168 Res = MIRBuilder.buildSelect(Res: Ty, Tst: IsZero, Op0: RHSSelect, Op1: Res, Flags).getReg(Idx: 0);
9169 }
9170 }
9171
9172 MIRBuilder.buildCopy(Res: Dst, Op: Res);
9173 MI.eraseFromParent();
9174 return Legalized;
9175}
9176
9177LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
9178 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
9179 Register DstReg = MI.getOperand(i: 0).getReg();
9180 LLT Ty = MRI.getType(Reg: DstReg);
9181 unsigned Flags = MI.getFlags();
9182
9183 auto Mul = MIRBuilder.buildFMul(Dst: Ty, Src0: MI.getOperand(i: 1), Src1: MI.getOperand(i: 2),
9184 Flags);
9185 MIRBuilder.buildFAdd(Dst: DstReg, Src0: Mul, Src1: MI.getOperand(i: 3), Flags);
9186 MI.eraseFromParent();
9187 return Legalized;
9188}
9189
9190LegalizerHelper::LegalizeResult
9191LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
9192 auto [DstReg, X] = MI.getFirst2Regs();
9193 const unsigned Flags = MI.getFlags();
9194 const LLT Ty = MRI.getType(Reg: DstReg);
9195 const LLT CondTy = Ty.changeElementType(NewEltTy: LLT::integer(SizeInBits: 1));
9196
9197 // round(x) =>
9198 // t = trunc(x);
9199 // d = fabs(x - t);
9200 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
9201 // return t + o;
9202
9203 auto T = MIRBuilder.buildIntrinsicTrunc(Dst: Ty, Src0: X, Flags);
9204
9205 auto Diff = MIRBuilder.buildFSub(Dst: Ty, Src0: X, Src1: T, Flags);
9206 auto AbsDiff = MIRBuilder.buildFAbs(Dst: Ty, Src0: Diff, Flags);
9207
9208 auto Half = MIRBuilder.buildFConstant(Res: Ty, Val: 0.5);
9209 auto Cmp =
9210 MIRBuilder.buildFCmp(Pred: CmpInst::FCMP_OGE, Res: CondTy, Op0: AbsDiff, Op1: Half, Flags);
9211
9212 // Could emit G_UITOFP instead
9213 auto One = MIRBuilder.buildFConstant(Res: Ty, Val: 1.0);
9214 auto Zero = MIRBuilder.buildFConstant(Res: Ty, Val: 0.0);
9215 auto BoolFP = MIRBuilder.buildSelect(Res: Ty, Tst: Cmp, Op0: One, Op1: Zero);
9216 auto SignedOffset = MIRBuilder.buildFCopysign(Dst: Ty, Src0: BoolFP, Src1: X);
9217
9218 MIRBuilder.buildFAdd(Dst: DstReg, Src0: T, Src1: SignedOffset, Flags);
9219
9220 MI.eraseFromParent();
9221 return Legalized;
9222}
9223
9224LegalizerHelper::LegalizeResult LegalizerHelper::lowerFFloor(MachineInstr &MI) {
9225 auto [DstReg, SrcReg] = MI.getFirst2Regs();
9226 unsigned Flags = MI.getFlags();
9227 LLT Ty = MRI.getType(Reg: DstReg);
9228 const LLT CondTy = Ty.changeElementType(NewEltTy: LLT::integer(SizeInBits: 1));
9229
9230 // result = trunc(src);
9231 // if (src < 0.0 && src != result)
9232 // result += -1.0.
9233
9234 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Dst: Ty, Src0: SrcReg, Flags);
9235 auto Zero = MIRBuilder.buildFConstant(Res: Ty, Val: 0.0);
9236
9237 auto Lt0 = MIRBuilder.buildFCmp(Pred: CmpInst::FCMP_OLT, Res: CondTy,
9238 Op0: SrcReg, Op1: Zero, Flags);
9239 auto NeTrunc = MIRBuilder.buildFCmp(Pred: CmpInst::FCMP_ONE, Res: CondTy,
9240 Op0: SrcReg, Op1: Trunc, Flags);
9241 auto And = MIRBuilder.buildAnd(Dst: CondTy, Src0: Lt0, Src1: NeTrunc);
9242 auto AddVal = MIRBuilder.buildSITOFP(Dst: Ty, Src0: And);
9243
9244 MIRBuilder.buildFAdd(Dst: DstReg, Src0: Trunc, Src1: AddVal, Flags);
9245 MI.eraseFromParent();
9246 return Legalized;
9247}
9248
9249LegalizerHelper::LegalizeResult
9250LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
9251 const unsigned NumOps = MI.getNumOperands();
9252 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
9253 unsigned PartSize = Src0Ty.getSizeInBits();
9254
9255 LLT WideTy = LLT::scalar(SizeInBits: DstTy.getSizeInBits());
9256 Register ResultReg = MIRBuilder.buildZExt(Res: WideTy, Op: Src0Reg).getReg(Idx: 0);
9257
9258 for (unsigned I = 2; I != NumOps; ++I) {
9259 const unsigned Offset = (I - 1) * PartSize;
9260
9261 Register SrcReg = MI.getOperand(i: I).getReg();
9262 auto ZextInput = MIRBuilder.buildZExt(Res: WideTy, Op: SrcReg);
9263
9264 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
9265 MRI.createGenericVirtualRegister(Ty: WideTy);
9266
9267 auto ShiftAmt = MIRBuilder.buildConstant(Res: WideTy, Val: Offset);
9268 auto Shl = MIRBuilder.buildShl(Dst: WideTy, Src0: ZextInput, Src1: ShiftAmt);
9269 MIRBuilder.buildOr(Dst: NextResult, Src0: ResultReg, Src1: Shl);
9270 ResultReg = NextResult;
9271 }
9272
9273 if (DstTy.isPointer()) {
9274 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
9275 AddrSpace: DstTy.getAddressSpace())) {
9276 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
9277 return UnableToLegalize;
9278 }
9279
9280 MIRBuilder.buildIntToPtr(Dst: DstReg, Src: ResultReg);
9281 }
9282
9283 MI.eraseFromParent();
9284 return Legalized;
9285}
9286
9287LegalizerHelper::LegalizeResult
9288LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
9289 const unsigned NumDst = MI.getNumOperands() - 1;
9290 Register SrcReg = MI.getOperand(i: NumDst).getReg();
9291 Register Dst0Reg = MI.getOperand(i: 0).getReg();
9292 LLT DstTy = MRI.getType(Reg: Dst0Reg);
9293 if (DstTy.isPointer())
9294 return UnableToLegalize; // TODO
9295
9296 SrcReg = coerceToScalar(Val: SrcReg);
9297 if (!SrcReg)
9298 return UnableToLegalize;
9299
9300 // Expand scalarizing unmerge as bitcast to integer and shift.
9301 LLT IntTy = MRI.getType(Reg: SrcReg);
9302
9303 MIRBuilder.buildTrunc(Res: Dst0Reg, Op: SrcReg);
9304
9305 const unsigned DstSize = DstTy.getSizeInBits();
9306 unsigned Offset = DstSize;
9307 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
9308 auto ShiftAmt = MIRBuilder.buildConstant(Res: IntTy, Val: Offset);
9309 auto Shift = MIRBuilder.buildLShr(Dst: IntTy, Src0: SrcReg, Src1: ShiftAmt);
9310 MIRBuilder.buildTrunc(Res: MI.getOperand(i: I), Op: Shift);
9311 }
9312
9313 MI.eraseFromParent();
9314 return Legalized;
9315}
9316
9317/// Lower a vector extract or insert by writing the vector to a stack temporary
9318/// and reloading the element or vector.
9319///
9320/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
9321/// =>
9322/// %stack_temp = G_FRAME_INDEX
9323/// G_STORE %vec, %stack_temp
9324/// %idx = clamp(%idx, %vec.getNumElements())
9325/// %element_ptr = G_PTR_ADD %stack_temp, %idx
9326/// %dst = G_LOAD %element_ptr
9327LegalizerHelper::LegalizeResult
9328LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
9329 Register DstReg = MI.getOperand(i: 0).getReg();
9330 Register SrcVec = MI.getOperand(i: 1).getReg();
9331 Register InsertVal;
9332 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
9333 InsertVal = MI.getOperand(i: 2).getReg();
9334
9335 Register Idx = MI.getOperand(i: MI.getNumOperands() - 1).getReg();
9336
9337 LLT VecTy = MRI.getType(Reg: SrcVec);
9338 LLT EltTy = VecTy.getElementType();
9339 unsigned NumElts = VecTy.getNumElements();
9340
9341 int64_t IdxVal;
9342 if (mi_match(R: Idx, MRI, P: m_ICst(Cst&: IdxVal)) && IdxVal <= NumElts) {
9343 SmallVector<Register, 8> SrcRegs;
9344 extractParts(Reg: SrcVec, Ty: EltTy, NumParts: NumElts, VRegs&: SrcRegs, MIRBuilder, MRI);
9345
9346 if (InsertVal) {
9347 SrcRegs[IdxVal] = MI.getOperand(i: 2).getReg();
9348 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: SrcRegs);
9349 } else {
9350 MIRBuilder.buildCopy(Res: DstReg, Op: SrcRegs[IdxVal]);
9351 }
9352
9353 MI.eraseFromParent();
9354 return Legalized;
9355 }
9356
9357 if (!EltTy.isByteSized()) { // Not implemented.
9358 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
9359 return UnableToLegalize;
9360 }
9361
9362 unsigned EltBytes = EltTy.getSizeInBytes();
9363 Align VecAlign = getStackTemporaryAlignment(Ty: VecTy);
9364 Align EltAlign;
9365
9366 MachinePointerInfo PtrInfo;
9367 auto StackTemp = createStackTemporary(
9368 Bytes: TypeSize::getFixed(ExactSize: VecTy.getSizeInBytes()), Alignment: VecAlign, PtrInfo);
9369 MIRBuilder.buildStore(Val: SrcVec, Addr: StackTemp, PtrInfo, Alignment: VecAlign);
9370
9371 // Get the pointer to the element, and be sure not to hit undefined behavior
9372 // if the index is out of bounds.
9373 Register EltPtr = getVectorElementPointer(VecPtr: StackTemp.getReg(Idx: 0), VecTy, Index: Idx);
9374
9375 if (mi_match(R: Idx, MRI, P: m_ICst(Cst&: IdxVal))) {
9376 int64_t Offset = IdxVal * EltBytes;
9377 PtrInfo = PtrInfo.getWithOffset(O: Offset);
9378 EltAlign = commonAlignment(A: VecAlign, Offset);
9379 } else {
9380 // We lose information with a variable offset.
9381 EltAlign = getStackTemporaryAlignment(Ty: EltTy);
9382 PtrInfo = MachinePointerInfo(MRI.getType(Reg: EltPtr).getAddressSpace());
9383 }
9384
9385 if (InsertVal) {
9386 // Write the inserted element
9387 MIRBuilder.buildStore(Val: InsertVal, Addr: EltPtr, PtrInfo, Alignment: EltAlign);
9388
9389 // Reload the whole vector.
9390 MIRBuilder.buildLoad(Res: DstReg, Addr: StackTemp, PtrInfo, Alignment: VecAlign);
9391 } else {
9392 MIRBuilder.buildLoad(Res: DstReg, Addr: EltPtr, PtrInfo, Alignment: EltAlign);
9393 }
9394
9395 MI.eraseFromParent();
9396 return Legalized;
9397}
9398
9399LegalizerHelper::LegalizeResult
9400LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
9401 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9402 MI.getFirst3RegLLTs();
9403 LLT IdxTy = LLT::scalar(SizeInBits: 32);
9404
9405 ArrayRef<int> Mask = MI.getOperand(i: 3).getShuffleMask();
9406 Register Undef;
9407 SmallVector<Register, 32> BuildVec;
9408 LLT EltTy = DstTy.getScalarType();
9409
9410 DenseMap<unsigned, Register> CachedExtract;
9411
9412 for (int Idx : Mask) {
9413 if (Idx < 0) {
9414 if (!Undef.isValid())
9415 Undef = MIRBuilder.buildUndef(Res: EltTy).getReg(Idx: 0);
9416 BuildVec.push_back(Elt: Undef);
9417 continue;
9418 }
9419
9420 assert(!Src0Ty.isScalar() && "Unexpected scalar G_SHUFFLE_VECTOR");
9421
9422 int NumElts = Src0Ty.getNumElements();
9423 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9424 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9425 auto [It, Inserted] = CachedExtract.try_emplace(Key: Idx);
9426 if (Inserted) {
9427 auto IdxK = MIRBuilder.buildConstant(Res: IdxTy, Val: ExtractIdx);
9428 It->second =
9429 MIRBuilder.buildExtractVectorElement(Res: EltTy, Val: SrcVec, Idx: IdxK).getReg(Idx: 0);
9430 }
9431 BuildVec.push_back(Elt: It->second);
9432 }
9433
9434 assert(DstTy.isVector() && "Unexpected scalar G_SHUFFLE_VECTOR");
9435 MIRBuilder.buildBuildVector(Res: DstReg, Ops: BuildVec);
9436 MI.eraseFromParent();
9437 return Legalized;
9438}
9439
9440LegalizerHelper::LegalizeResult
9441LegalizerHelper::lowerVECTOR_COMPRESS(llvm::MachineInstr &MI) {
9442 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9443 MI.getFirst4RegLLTs();
9444
9445 if (VecTy.isScalableVector())
9446 report_fatal_error(reason: "Cannot expand masked_compress for scalable vectors.");
9447
9448 Align VecAlign = getStackTemporaryAlignment(Ty: VecTy);
9449 MachinePointerInfo PtrInfo;
9450 Register StackPtr =
9451 createStackTemporary(Bytes: TypeSize::getFixed(ExactSize: VecTy.getSizeInBytes()), Alignment: VecAlign,
9452 PtrInfo)
9453 .getReg(Idx: 0);
9454 MachinePointerInfo ValPtrInfo =
9455 MachinePointerInfo::getUnknownStack(MF&: *MI.getMF());
9456
9457 LLT IdxTy = LLT::scalar(SizeInBits: 32);
9458 LLT ValTy = VecTy.getElementType();
9459 Align ValAlign = getStackTemporaryAlignment(Ty: ValTy);
9460
9461 auto OutPos = MIRBuilder.buildConstant(Res: IdxTy, Val: 0);
9462
9463 bool HasPassthru =
9464 MRI.getVRegDef(Reg: Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9465
9466 if (HasPassthru)
9467 MIRBuilder.buildStore(Val: Passthru, Addr: StackPtr, PtrInfo, Alignment: VecAlign);
9468
9469 Register LastWriteVal;
9470 std::optional<APInt> PassthruSplatVal =
9471 isConstantOrConstantSplatVector(MI&: *MRI.getVRegDef(Reg: Passthru), MRI);
9472
9473 if (PassthruSplatVal.has_value()) {
9474 LastWriteVal =
9475 MIRBuilder.buildConstant(Res: ValTy, Val: PassthruSplatVal.value()).getReg(Idx: 0);
9476 } else if (HasPassthru) {
9477 auto Popcount = MIRBuilder.buildZExt(Res: MaskTy.changeElementSize(NewEltSize: 32), Op: Mask);
9478 Popcount = MIRBuilder.buildInstr(Opc: TargetOpcode::G_VECREDUCE_ADD,
9479 DstOps: {LLT::scalar(SizeInBits: 32)}, SrcOps: {Popcount});
9480
9481 Register LastElmtPtr =
9482 getVectorElementPointer(VecPtr: StackPtr, VecTy, Index: Popcount.getReg(Idx: 0));
9483 LastWriteVal =
9484 MIRBuilder.buildLoad(Res: ValTy, Addr: LastElmtPtr, PtrInfo: ValPtrInfo, Alignment: ValAlign)
9485 .getReg(Idx: 0);
9486 }
9487
9488 unsigned NumElmts = VecTy.getNumElements();
9489 for (unsigned I = 0; I < NumElmts; ++I) {
9490 auto Idx = MIRBuilder.buildConstant(Res: IdxTy, Val: I);
9491 auto Val = MIRBuilder.buildExtractVectorElement(Res: ValTy, Val: Vec, Idx);
9492 Register ElmtPtr =
9493 getVectorElementPointer(VecPtr: StackPtr, VecTy, Index: OutPos.getReg(Idx: 0));
9494 MIRBuilder.buildStore(Val, Addr: ElmtPtr, PtrInfo: ValPtrInfo, Alignment: ValAlign);
9495
9496 LLT MaskITy = MaskTy.getElementType();
9497 auto MaskI = MIRBuilder.buildExtractVectorElement(Res: MaskITy, Val: Mask, Idx);
9498 if (MaskITy.getSizeInBits() > 1)
9499 MaskI = MIRBuilder.buildTrunc(Res: LLT::scalar(SizeInBits: 1), Op: MaskI);
9500
9501 MaskI = MIRBuilder.buildZExt(Res: IdxTy, Op: MaskI);
9502 OutPos = MIRBuilder.buildAdd(Dst: IdxTy, Src0: OutPos, Src1: MaskI);
9503
9504 if (HasPassthru && I == NumElmts - 1) {
9505 auto EndOfVector =
9506 MIRBuilder.buildConstant(Res: IdxTy, Val: VecTy.getNumElements() - 1);
9507 auto AllLanesSelected = MIRBuilder.buildICmp(
9508 Pred: CmpInst::ICMP_UGT, Res: LLT::scalar(SizeInBits: 1), Op0: OutPos, Op1: EndOfVector);
9509 OutPos = MIRBuilder.buildInstr(Opc: TargetOpcode::G_UMIN, DstOps: {IdxTy},
9510 SrcOps: {OutPos, EndOfVector});
9511 ElmtPtr = getVectorElementPointer(VecPtr: StackPtr, VecTy, Index: OutPos.getReg(Idx: 0));
9512
9513 LastWriteVal =
9514 MIRBuilder.buildSelect(Res: ValTy, Tst: AllLanesSelected, Op0: Val, Op1: LastWriteVal)
9515 .getReg(Idx: 0);
9516 MIRBuilder.buildStore(Val: LastWriteVal, Addr: ElmtPtr, PtrInfo: ValPtrInfo, Alignment: ValAlign);
9517 }
9518 }
9519
9520 // TODO: Use StackPtr's FrameIndex alignment.
9521 MIRBuilder.buildLoad(Res: Dst, Addr: StackPtr, PtrInfo, Alignment: VecAlign);
9522
9523 MI.eraseFromParent();
9524 return Legalized;
9525}
9526
9527Register LegalizerHelper::getDynStackAllocTargetPtr(Register SPReg,
9528 Register AllocSize,
9529 Align Alignment,
9530 LLT PtrTy) {
9531 LLT IntPtrTy = LLT::integer(SizeInBits: PtrTy.getSizeInBits());
9532
9533 auto SPTmp = MIRBuilder.buildCopy(Res: PtrTy, Op: SPReg);
9534 SPTmp = MIRBuilder.buildCast(Dst: IntPtrTy, Src: SPTmp);
9535
9536 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
9537 // have to generate an extra instruction to negate the alloc and then use
9538 // G_PTR_ADD to add the negative offset.
9539 auto Alloc = MIRBuilder.buildSub(Dst: IntPtrTy, Src0: SPTmp, Src1: AllocSize);
9540 if (Alignment > Align(1)) {
9541 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
9542 AlignMask.negate();
9543 auto AlignCst = MIRBuilder.buildConstant(Res: IntPtrTy, Val: AlignMask);
9544 Alloc = MIRBuilder.buildAnd(Dst: IntPtrTy, Src0: Alloc, Src1: AlignCst);
9545 }
9546
9547 return MIRBuilder.buildCast(Dst: PtrTy, Src: Alloc).getReg(Idx: 0);
9548}
9549
9550LegalizerHelper::LegalizeResult
9551LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
9552 const auto &MF = *MI.getMF();
9553 const auto &TFI = *MF.getSubtarget().getFrameLowering();
9554 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
9555 return UnableToLegalize;
9556
9557 Register Dst = MI.getOperand(i: 0).getReg();
9558 Register AllocSize = MI.getOperand(i: 1).getReg();
9559 Align Alignment = assumeAligned(Value: MI.getOperand(i: 2).getImm());
9560
9561 LLT PtrTy = MRI.getType(Reg: Dst);
9562 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9563 Register SPTmp =
9564 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
9565
9566 MIRBuilder.buildCopy(Res: SPReg, Op: SPTmp);
9567 MIRBuilder.buildCopy(Res: Dst, Op: SPTmp);
9568
9569 MI.eraseFromParent();
9570 return Legalized;
9571}
9572
9573LegalizerHelper::LegalizeResult
9574LegalizerHelper::lowerStackSave(MachineInstr &MI) {
9575 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9576 if (!StackPtr)
9577 return UnableToLegalize;
9578
9579 MIRBuilder.buildCopy(Res: MI.getOperand(i: 0), Op: StackPtr);
9580 MI.eraseFromParent();
9581 return Legalized;
9582}
9583
9584LegalizerHelper::LegalizeResult
9585LegalizerHelper::lowerStackRestore(MachineInstr &MI) {
9586 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9587 if (!StackPtr)
9588 return UnableToLegalize;
9589
9590 MIRBuilder.buildCopy(Res: StackPtr, Op: MI.getOperand(i: 0));
9591 MI.eraseFromParent();
9592 return Legalized;
9593}
9594
9595LegalizerHelper::LegalizeResult
9596LegalizerHelper::lowerExtract(MachineInstr &MI) {
9597 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9598 unsigned Offset = MI.getOperand(i: 2).getImm();
9599
9600 // Extract sub-vector or one element
9601 if (SrcTy.isVector()) {
9602 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9603 unsigned DstSize = DstTy.getSizeInBits();
9604
9605 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9606 (Offset + DstSize <= SrcTy.getSizeInBits())) {
9607 // Unmerge and allow access to each Src element for the artifact combiner.
9608 auto Unmerge = MIRBuilder.buildUnmerge(Res: SrcTy.getElementType(), Op: SrcReg);
9609
9610 // Take element(s) we need to extract and copy it (merge them).
9611 SmallVector<Register, 8> SubVectorElts;
9612 for (unsigned Idx = Offset / SrcEltSize;
9613 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
9614 SubVectorElts.push_back(Elt: Unmerge.getReg(Idx));
9615 }
9616 if (SubVectorElts.size() == 1)
9617 MIRBuilder.buildCopy(Res: DstReg, Op: SubVectorElts[0]);
9618 else
9619 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: SubVectorElts);
9620
9621 MI.eraseFromParent();
9622 return Legalized;
9623 }
9624 }
9625
9626 const DataLayout &DL = MIRBuilder.getDataLayout();
9627 if ((SrcTy.isPointer() &&
9628 DL.isNonIntegralAddressSpace(AddrSpace: SrcTy.getAddressSpace())) ||
9629 (DstTy.isPointer() &&
9630 DL.isNonIntegralAddressSpace(AddrSpace: DstTy.getAddressSpace()))) {
9631 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9632 return UnableToLegalize;
9633 }
9634
9635 if ((DstTy.isScalar() || DstTy.isPointer()) &&
9636 (SrcTy.isScalar() || SrcTy.isPointer() ||
9637 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9638 LLT SrcIntTy = SrcTy;
9639 if (!SrcTy.isScalar()) {
9640 SrcIntTy = LLT::scalar(SizeInBits: SrcTy.getSizeInBits());
9641 SrcReg = MIRBuilder.buildCast(Dst: SrcIntTy, Src: SrcReg).getReg(Idx: 0);
9642 }
9643
9644 Register ResultReg = DstReg;
9645 if (DstTy.isPointer())
9646 ResultReg =
9647 MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: DstTy.getSizeInBits()));
9648
9649 if (Offset == 0)
9650 MIRBuilder.buildTrunc(Res: ResultReg, Op: SrcReg);
9651 else {
9652 auto ShiftAmt = MIRBuilder.buildConstant(Res: SrcIntTy, Val: Offset);
9653 auto Shr = MIRBuilder.buildLShr(Dst: SrcIntTy, Src0: SrcReg, Src1: ShiftAmt);
9654 MIRBuilder.buildTrunc(Res: ResultReg, Op: Shr);
9655 }
9656
9657 if (DstTy.isPointer())
9658 MIRBuilder.buildIntToPtr(Dst: DstReg, Src: ResultReg);
9659
9660 MI.eraseFromParent();
9661 return Legalized;
9662 }
9663
9664 return UnableToLegalize;
9665}
9666
9667LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
9668 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
9669 uint64_t Offset = MI.getOperand(i: 3).getImm();
9670
9671 LLT DstTy = MRI.getType(Reg: Src);
9672 LLT InsertTy = MRI.getType(Reg: InsertSrc);
9673
9674 const DataLayout &DL = MIRBuilder.getDataLayout();
9675 bool IsNonIntegralInsert =
9676 InsertTy.isPointerOrPointerVector() &&
9677 DL.isNonIntegralAddressSpace(AddrSpace: InsertTy.getAddressSpace());
9678 bool IsNonIntegralDst = DstTy.isPointerOrPointerVector() &&
9679 DL.isNonIntegralAddressSpace(AddrSpace: DstTy.getAddressSpace());
9680
9681 // Insert sub-vector or one element
9682 if (DstTy.isVector()) {
9683 LLT EltTy = DstTy.getElementType();
9684
9685 if ((IsNonIntegralInsert || IsNonIntegralDst) && InsertTy != EltTy) {
9686 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9687 return UnableToLegalize;
9688 }
9689
9690 unsigned EltSize = EltTy.getSizeInBits();
9691 unsigned InsertSize = InsertTy.getSizeInBits();
9692
9693 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9694 (Offset + InsertSize <= DstTy.getSizeInBits())) {
9695 auto UnmergeSrc = MIRBuilder.buildUnmerge(Res: EltTy, Op: Src);
9696 SmallVector<Register, 8> DstElts;
9697 unsigned Idx = 0;
9698 // Elements from Src before insert start Offset
9699 for (; Idx < Offset / EltSize; ++Idx) {
9700 DstElts.push_back(Elt: UnmergeSrc.getReg(Idx));
9701 }
9702
9703 // Replace elements in Src with elements from InsertSrc
9704 if (InsertTy.getSizeInBits() > EltSize) {
9705 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(Res: EltTy, Op: InsertSrc);
9706 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
9707 ++Idx, ++i) {
9708 DstElts.push_back(Elt: UnmergeInsertSrc.getReg(Idx: i));
9709 }
9710 } else {
9711 if (InsertTy.isPointer() && !EltTy.isPointer())
9712 InsertSrc = MIRBuilder.buildPtrToInt(Dst: EltTy, Src: InsertSrc).getReg(Idx: 0);
9713 else if (!InsertTy.isPointer() && EltTy.isPointer())
9714 InsertSrc = MIRBuilder.buildIntToPtr(Dst: EltTy, Src: InsertSrc).getReg(Idx: 0);
9715 DstElts.push_back(Elt: InsertSrc);
9716 ++Idx;
9717 }
9718
9719 // Remaining elements from Src after insert
9720 for (; Idx < DstTy.getNumElements(); ++Idx) {
9721 DstElts.push_back(Elt: UnmergeSrc.getReg(Idx));
9722 }
9723
9724 MIRBuilder.buildMergeLikeInstr(Res: Dst, Ops: DstElts);
9725 MI.eraseFromParent();
9726 return Legalized;
9727 }
9728 }
9729
9730 if (InsertTy.isVector() ||
9731 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
9732 return UnableToLegalize;
9733
9734 if (IsNonIntegralDst || IsNonIntegralInsert) {
9735 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9736 return UnableToLegalize;
9737 }
9738
9739 LLT IntDstTy = DstTy;
9740
9741 if (!DstTy.isScalar()) {
9742 IntDstTy = LLT::scalar(SizeInBits: DstTy.getSizeInBits());
9743 Src = MIRBuilder.buildCast(Dst: IntDstTy, Src).getReg(Idx: 0);
9744 }
9745
9746 if (!InsertTy.isScalar()) {
9747 const LLT IntInsertTy = LLT::scalar(SizeInBits: InsertTy.getSizeInBits());
9748 InsertSrc = MIRBuilder.buildPtrToInt(Dst: IntInsertTy, Src: InsertSrc).getReg(Idx: 0);
9749 }
9750
9751 Register ExtInsSrc = MIRBuilder.buildZExt(Res: IntDstTy, Op: InsertSrc).getReg(Idx: 0);
9752 if (Offset != 0) {
9753 auto ShiftAmt = MIRBuilder.buildConstant(Res: IntDstTy, Val: Offset);
9754 ExtInsSrc = MIRBuilder.buildShl(Dst: IntDstTy, Src0: ExtInsSrc, Src1: ShiftAmt).getReg(Idx: 0);
9755 }
9756
9757 APInt MaskVal = APInt::getBitsSetWithWrap(
9758 numBits: DstTy.getSizeInBits(), loBit: Offset + InsertTy.getSizeInBits(), hiBit: Offset);
9759
9760 auto Mask = MIRBuilder.buildConstant(Res: IntDstTy, Val: MaskVal);
9761 auto MaskedSrc = MIRBuilder.buildAnd(Dst: IntDstTy, Src0: Src, Src1: Mask);
9762 auto Or = MIRBuilder.buildOr(Dst: IntDstTy, Src0: MaskedSrc, Src1: ExtInsSrc);
9763
9764 MIRBuilder.buildCast(Dst, Src: Or);
9765 MI.eraseFromParent();
9766 return Legalized;
9767}
9768
9769LegalizerHelper::LegalizeResult
9770LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
9771 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9772 MI.getFirst4RegLLTs();
9773 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
9774
9775 LLT Ty = Dst0Ty;
9776 LLT BoolTy = Dst1Ty;
9777
9778 Register NewDst0 = MRI.cloneVirtualRegister(VReg: Dst0);
9779
9780 if (IsAdd)
9781 MIRBuilder.buildAdd(Dst: NewDst0, Src0: LHS, Src1: RHS);
9782 else
9783 MIRBuilder.buildSub(Dst: NewDst0, Src0: LHS, Src1: RHS);
9784
9785 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
9786
9787 auto Zero = MIRBuilder.buildConstant(Res: Ty, Val: 0);
9788
9789 if (IsAdd) {
9790 // For an addition, the result should be less than one of the operands (LHS)
9791 // if and only if the other operand (RHS) is negative, otherwise there will
9792 // be overflow.
9793 auto ResultLowerThanLHS =
9794 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_SLT, Res: BoolTy, Op0: NewDst0, Op1: LHS);
9795 auto RHSNegative =
9796 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_SLT, Res: BoolTy, Op0: RHS, Op1: Zero);
9797 MIRBuilder.buildXor(Dst: Dst1, Src0: RHSNegative, Src1: ResultLowerThanLHS);
9798 } else {
9799 // For subtraction, overflow occurs when the signed comparison of operands
9800 // doesn't match the sign of the result.
9801 auto LHSLessThanRHS =
9802 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_SLT, Res: BoolTy, Op0: LHS, Op1: RHS);
9803 auto ResultNegative =
9804 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_SLT, Res: BoolTy, Op0: NewDst0, Op1: Zero);
9805 MIRBuilder.buildXor(Dst: Dst1, Src0: LHSLessThanRHS, Src1: ResultNegative);
9806 }
9807
9808 MIRBuilder.buildCopy(Res: Dst0, Op: NewDst0);
9809 MI.eraseFromParent();
9810
9811 return Legalized;
9812}
9813
9814LegalizerHelper::LegalizeResult LegalizerHelper::lowerSADDE(MachineInstr &MI) {
9815 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9816 const LLT Ty = MRI.getType(Reg: Res);
9817
9818 // sum = LHS + RHS + zext(CarryIn)
9819 auto Tmp = MIRBuilder.buildAdd(Dst: Ty, Src0: LHS, Src1: RHS);
9820 auto CarryZ = MIRBuilder.buildZExt(Res: Ty, Op: CarryIn);
9821 auto Sum = MIRBuilder.buildAdd(Dst: Ty, Src0: Tmp, Src1: CarryZ);
9822 MIRBuilder.buildCopy(Res, Op: Sum);
9823
9824 // OvOut = icmp slt ((sum ^ lhs) & (sum ^ rhs)), 0
9825 auto AX = MIRBuilder.buildXor(Dst: Ty, Src0: Sum, Src1: LHS);
9826 auto BX = MIRBuilder.buildXor(Dst: Ty, Src0: Sum, Src1: RHS);
9827 auto T = MIRBuilder.buildAnd(Dst: Ty, Src0: AX, Src1: BX);
9828
9829 auto Zero = MIRBuilder.buildConstant(Res: Ty, Val: 0);
9830 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_SLT, Res: OvOut, Op0: T, Op1: Zero);
9831
9832 MI.eraseFromParent();
9833 return Legalized;
9834}
9835
9836LegalizerHelper::LegalizeResult LegalizerHelper::lowerSSUBE(MachineInstr &MI) {
9837 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9838 const LLT Ty = MRI.getType(Reg: Res);
9839
9840 // Diff = LHS - (RHS + zext(CarryIn))
9841 auto CarryZ = MIRBuilder.buildZExt(Res: Ty, Op: CarryIn);
9842 auto RHSPlusCI = MIRBuilder.buildAdd(Dst: Ty, Src0: RHS, Src1: CarryZ);
9843 auto Diff = MIRBuilder.buildSub(Dst: Ty, Src0: LHS, Src1: RHSPlusCI);
9844 MIRBuilder.buildCopy(Res, Op: Diff);
9845
9846 // ov = msb((LHS ^ RHS) & (LHS ^ Diff))
9847 auto X1 = MIRBuilder.buildXor(Dst: Ty, Src0: LHS, Src1: RHS);
9848 auto X2 = MIRBuilder.buildXor(Dst: Ty, Src0: LHS, Src1: Diff);
9849 auto T = MIRBuilder.buildAnd(Dst: Ty, Src0: X1, Src1: X2);
9850 auto Zero = MIRBuilder.buildConstant(Res: Ty, Val: 0);
9851 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_SLT, Res: OvOut, Op0: T, Op1: Zero);
9852
9853 MI.eraseFromParent();
9854 return Legalized;
9855}
9856
9857LegalizerHelper::LegalizeResult
9858LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
9859 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9860 LLT Ty = MRI.getType(Reg: Res);
9861 bool IsSigned;
9862 bool IsAdd;
9863 unsigned BaseOp;
9864 switch (MI.getOpcode()) {
9865 default:
9866 llvm_unreachable("unexpected addsat/subsat opcode");
9867 case TargetOpcode::G_UADDSAT:
9868 IsSigned = false;
9869 IsAdd = true;
9870 BaseOp = TargetOpcode::G_ADD;
9871 break;
9872 case TargetOpcode::G_SADDSAT:
9873 IsSigned = true;
9874 IsAdd = true;
9875 BaseOp = TargetOpcode::G_ADD;
9876 break;
9877 case TargetOpcode::G_USUBSAT:
9878 IsSigned = false;
9879 IsAdd = false;
9880 BaseOp = TargetOpcode::G_SUB;
9881 break;
9882 case TargetOpcode::G_SSUBSAT:
9883 IsSigned = true;
9884 IsAdd = false;
9885 BaseOp = TargetOpcode::G_SUB;
9886 break;
9887 }
9888
9889 if (IsSigned) {
9890 // sadd.sat(a, b) ->
9891 // hi = 0x7fffffff - smax(a, 0)
9892 // lo = 0x80000000 - smin(a, 0)
9893 // a + smin(smax(lo, b), hi)
9894 // ssub.sat(a, b) ->
9895 // lo = smax(a, -1) - 0x7fffffff
9896 // hi = smin(a, -1) - 0x80000000
9897 // a - smin(smax(lo, b), hi)
9898 // TODO: AMDGPU can use a "median of 3" instruction here:
9899 // a +/- med3(lo, b, hi)
9900 uint64_t NumBits = Ty.getScalarSizeInBits();
9901 auto MaxVal =
9902 MIRBuilder.buildConstant(Res: Ty, Val: APInt::getSignedMaxValue(numBits: NumBits));
9903 auto MinVal =
9904 MIRBuilder.buildConstant(Res: Ty, Val: APInt::getSignedMinValue(numBits: NumBits));
9905 MachineInstrBuilder Hi, Lo;
9906 if (IsAdd) {
9907 auto Zero = MIRBuilder.buildConstant(Res: Ty, Val: 0);
9908 Hi = MIRBuilder.buildSub(Dst: Ty, Src0: MaxVal, Src1: MIRBuilder.buildSMax(Dst: Ty, Src0: LHS, Src1: Zero));
9909 Lo = MIRBuilder.buildSub(Dst: Ty, Src0: MinVal, Src1: MIRBuilder.buildSMin(Dst: Ty, Src0: LHS, Src1: Zero));
9910 } else {
9911 auto NegOne = MIRBuilder.buildConstant(Res: Ty, Val: -1);
9912 Lo = MIRBuilder.buildSub(Dst: Ty, Src0: MIRBuilder.buildSMax(Dst: Ty, Src0: LHS, Src1: NegOne),
9913 Src1: MaxVal);
9914 Hi = MIRBuilder.buildSub(Dst: Ty, Src0: MIRBuilder.buildSMin(Dst: Ty, Src0: LHS, Src1: NegOne),
9915 Src1: MinVal);
9916 }
9917 auto RHSClamped =
9918 MIRBuilder.buildSMin(Dst: Ty, Src0: MIRBuilder.buildSMax(Dst: Ty, Src0: Lo, Src1: RHS), Src1: Hi);
9919 MIRBuilder.buildInstr(Opc: BaseOp, DstOps: {Res}, SrcOps: {LHS, RHSClamped});
9920 } else {
9921 // uadd.sat(a, b) -> a + umin(~a, b)
9922 // usub.sat(a, b) -> a - umin(a, b)
9923 Register Not = IsAdd ? MIRBuilder.buildNot(Dst: Ty, Src0: LHS).getReg(Idx: 0) : LHS;
9924 auto Min = MIRBuilder.buildUMin(Dst: Ty, Src0: Not, Src1: RHS);
9925 MIRBuilder.buildInstr(Opc: BaseOp, DstOps: {Res}, SrcOps: {LHS, Min});
9926 }
9927
9928 MI.eraseFromParent();
9929 return Legalized;
9930}
9931
9932LegalizerHelper::LegalizeResult
9933LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
9934 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9935 LLT Ty = MRI.getType(Reg: Res);
9936 LLT BoolTy = Ty.changeElementSize(NewEltSize: 1);
9937 bool IsSigned;
9938 bool IsAdd;
9939 unsigned OverflowOp;
9940 switch (MI.getOpcode()) {
9941 default:
9942 llvm_unreachable("unexpected addsat/subsat opcode");
9943 case TargetOpcode::G_UADDSAT:
9944 IsSigned = false;
9945 IsAdd = true;
9946 OverflowOp = TargetOpcode::G_UADDO;
9947 break;
9948 case TargetOpcode::G_SADDSAT:
9949 IsSigned = true;
9950 IsAdd = true;
9951 OverflowOp = TargetOpcode::G_SADDO;
9952 break;
9953 case TargetOpcode::G_USUBSAT:
9954 IsSigned = false;
9955 IsAdd = false;
9956 OverflowOp = TargetOpcode::G_USUBO;
9957 break;
9958 case TargetOpcode::G_SSUBSAT:
9959 IsSigned = true;
9960 IsAdd = false;
9961 OverflowOp = TargetOpcode::G_SSUBO;
9962 break;
9963 }
9964
9965 auto OverflowRes =
9966 MIRBuilder.buildInstr(Opc: OverflowOp, DstOps: {Ty, BoolTy}, SrcOps: {LHS, RHS});
9967 Register Tmp = OverflowRes.getReg(Idx: 0);
9968 Register Ov = OverflowRes.getReg(Idx: 1);
9969 MachineInstrBuilder Clamp;
9970 if (IsSigned) {
9971 // sadd.sat(a, b) ->
9972 // {tmp, ov} = saddo(a, b)
9973 // ov ? (tmp >>s 31) + 0x80000000 : r
9974 // ssub.sat(a, b) ->
9975 // {tmp, ov} = ssubo(a, b)
9976 // ov ? (tmp >>s 31) + 0x80000000 : r
9977 uint64_t NumBits = Ty.getScalarSizeInBits();
9978 auto ShiftAmount = MIRBuilder.buildConstant(Res: Ty, Val: NumBits - 1);
9979 auto Sign = MIRBuilder.buildAShr(Dst: Ty, Src0: Tmp, Src1: ShiftAmount);
9980 auto MinVal =
9981 MIRBuilder.buildConstant(Res: Ty, Val: APInt::getSignedMinValue(numBits: NumBits));
9982 Clamp = MIRBuilder.buildAdd(Dst: Ty, Src0: Sign, Src1: MinVal);
9983 } else {
9984 // uadd.sat(a, b) ->
9985 // {tmp, ov} = uaddo(a, b)
9986 // ov ? 0xffffffff : tmp
9987 // usub.sat(a, b) ->
9988 // {tmp, ov} = usubo(a, b)
9989 // ov ? 0 : tmp
9990 Clamp = MIRBuilder.buildConstant(Res: Ty, Val: IsAdd ? -1 : 0);
9991 }
9992 MIRBuilder.buildSelect(Res, Tst: Ov, Op0: Clamp, Op1: Tmp);
9993
9994 MI.eraseFromParent();
9995 return Legalized;
9996}
9997
9998LegalizerHelper::LegalizeResult
9999LegalizerHelper::lowerShlSat(MachineInstr &MI) {
10000 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
10001 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
10002 "Expected shlsat opcode!");
10003 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
10004 auto [Res, LHS, RHS] = MI.getFirst3Regs();
10005 LLT Ty = MRI.getType(Reg: Res);
10006 LLT BoolTy = Ty.changeElementSize(NewEltSize: 1);
10007
10008 unsigned BW = Ty.getScalarSizeInBits();
10009 auto Result = MIRBuilder.buildShl(Dst: Ty, Src0: LHS, Src1: RHS);
10010 auto Orig = IsSigned ? MIRBuilder.buildAShr(Dst: Ty, Src0: Result, Src1: RHS)
10011 : MIRBuilder.buildLShr(Dst: Ty, Src0: Result, Src1: RHS);
10012
10013 MachineInstrBuilder SatVal;
10014 if (IsSigned) {
10015 auto SatMin = MIRBuilder.buildConstant(Res: Ty, Val: APInt::getSignedMinValue(numBits: BW));
10016 auto SatMax = MIRBuilder.buildConstant(Res: Ty, Val: APInt::getSignedMaxValue(numBits: BW));
10017 auto Cmp = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_SLT, Res: BoolTy, Op0: LHS,
10018 Op1: MIRBuilder.buildConstant(Res: Ty, Val: 0));
10019 SatVal = MIRBuilder.buildSelect(Res: Ty, Tst: Cmp, Op0: SatMin, Op1: SatMax);
10020 } else {
10021 SatVal = MIRBuilder.buildConstant(Res: Ty, Val: APInt::getMaxValue(numBits: BW));
10022 }
10023 auto Ov = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_NE, Res: BoolTy, Op0: LHS, Op1: Orig);
10024 MIRBuilder.buildSelect(Res, Tst: Ov, Op0: SatVal, Op1: Result);
10025
10026 MI.eraseFromParent();
10027 return Legalized;
10028}
10029
10030LegalizerHelper::LegalizeResult LegalizerHelper::lowerBswap(MachineInstr &MI) {
10031 auto [Dst, Src] = MI.getFirst2Regs();
10032 const LLT Ty = MRI.getType(Reg: Src);
10033 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
10034 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
10035
10036 // Swap most and least significant byte, set remaining bytes in Res to zero.
10037 auto ShiftAmt = MIRBuilder.buildConstant(Res: Ty, Val: BaseShiftAmt);
10038 auto LSByteShiftedLeft = MIRBuilder.buildShl(Dst: Ty, Src0: Src, Src1: ShiftAmt);
10039 auto MSByteShiftedRight = MIRBuilder.buildLShr(Dst: Ty, Src0: Src, Src1: ShiftAmt);
10040 auto Res = MIRBuilder.buildOr(Dst: Ty, Src0: MSByteShiftedRight, Src1: LSByteShiftedLeft);
10041
10042 // Set i-th high/low byte in Res to i-th low/high byte from Src.
10043 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
10044 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
10045 APInt APMask = APInt::getBitsSet(numBits: SizeInBytes * 8, loBit: i * 8, hiBit: i * 8 + 8);
10046 auto Mask = MIRBuilder.buildConstant(Res: Ty, Val: APMask);
10047 auto ShiftAmt = MIRBuilder.buildConstant(Res: Ty, Val: BaseShiftAmt - 16 * i);
10048 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
10049 auto LoByte = MIRBuilder.buildAnd(Dst: Ty, Src0: Src, Src1: Mask);
10050 auto LoShiftedLeft = MIRBuilder.buildShl(Dst: Ty, Src0: LoByte, Src1: ShiftAmt);
10051 Res = MIRBuilder.buildOr(Dst: Ty, Src0: Res, Src1: LoShiftedLeft);
10052 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
10053 auto SrcShiftedRight = MIRBuilder.buildLShr(Dst: Ty, Src0: Src, Src1: ShiftAmt);
10054 auto HiShiftedRight = MIRBuilder.buildAnd(Dst: Ty, Src0: SrcShiftedRight, Src1: Mask);
10055 Res = MIRBuilder.buildOr(Dst: Ty, Src0: Res, Src1: HiShiftedRight);
10056 }
10057 Res.getInstr()->getOperand(i: 0).setReg(Dst);
10058
10059 MI.eraseFromParent();
10060 return Legalized;
10061}
10062
10063//{ (Src & Mask) >> N } | { (Src << N) & Mask }
10064static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
10065 MachineInstrBuilder Src, const APInt &Mask) {
10066 const LLT Ty = Dst.getLLTTy(MRI: *B.getMRI());
10067 MachineInstrBuilder C_N = B.buildConstant(Res: Ty, Val: N);
10068 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Res: Ty, Val: Mask);
10069 auto LHS = B.buildLShr(Dst: Ty, Src0: B.buildAnd(Dst: Ty, Src0: Src, Src1: MaskLoNTo0), Src1: C_N);
10070 auto RHS = B.buildAnd(Dst: Ty, Src0: B.buildShl(Dst: Ty, Src0: Src, Src1: C_N), Src1: MaskLoNTo0);
10071 return B.buildOr(Dst, Src0: LHS, Src1: RHS);
10072}
10073
10074LegalizerHelper::LegalizeResult
10075LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
10076 auto [Dst, Src] = MI.getFirst2Regs();
10077 const LLT SrcTy = MRI.getType(Reg: Src);
10078 unsigned Size = SrcTy.getScalarSizeInBits();
10079 unsigned VSize = SrcTy.getSizeInBits();
10080
10081 if (Size >= 8) {
10082 if (SrcTy.isVector() && (VSize % 8 == 0) &&
10083 (LI.isLegal(Query: {TargetOpcode::G_BITREVERSE,
10084 {LLT::fixed_vector(NumElements: VSize / 8, ScalarTy: LLT::integer(SizeInBits: 8)),
10085 LLT::fixed_vector(NumElements: VSize / 8, ScalarTy: LLT::integer(SizeInBits: 8))}}))) {
10086 // If bitreverse is legal for i8 vector of the same size, then cast
10087 // to i8 vector type.
10088 // e.g. v4s32 -> v16s8
10089 LLT VTy = LLT::fixed_vector(NumElements: VSize / 8, ScalarTy: LLT::integer(SizeInBits: 8));
10090 auto BSWAP = MIRBuilder.buildBSwap(Dst: SrcTy, Src0: Src);
10091 auto Cast = MIRBuilder.buildBitcast(Dst: VTy, Src: BSWAP);
10092 auto RBIT = MIRBuilder.buildBitReverse(Dst: VTy, Src: Cast);
10093 MIRBuilder.buildBitcast(Dst, Src: RBIT);
10094 } else {
10095 MachineInstrBuilder BSWAP =
10096 MIRBuilder.buildInstr(Opc: TargetOpcode::G_BSWAP, DstOps: {SrcTy}, SrcOps: {Src});
10097
10098 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
10099 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
10100 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
10101 MachineInstrBuilder Swap4 = SwapN(N: 4, Dst: SrcTy, B&: MIRBuilder, Src: BSWAP,
10102 Mask: APInt::getSplat(NewLen: Size, V: APInt(8, 0xF0)));
10103
10104 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
10105 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
10106 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
10107 MachineInstrBuilder Swap2 = SwapN(N: 2, Dst: SrcTy, B&: MIRBuilder, Src: Swap4,
10108 Mask: APInt::getSplat(NewLen: Size, V: APInt(8, 0xCC)));
10109
10110 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
10111 // 6|7
10112 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
10113 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
10114 SwapN(N: 1, Dst, B&: MIRBuilder, Src: Swap2, Mask: APInt::getSplat(NewLen: Size, V: APInt(8, 0xAA)));
10115 }
10116 } else {
10117 // Expand bitreverse for types smaller than 8 bits.
10118 MachineInstrBuilder Tmp;
10119 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
10120 MachineInstrBuilder Tmp2;
10121 if (I < J) {
10122 auto ShAmt = MIRBuilder.buildConstant(Res: SrcTy, Val: J - I);
10123 Tmp2 = MIRBuilder.buildShl(Dst: SrcTy, Src0: Src, Src1: ShAmt);
10124 } else {
10125 auto ShAmt = MIRBuilder.buildConstant(Res: SrcTy, Val: I - J);
10126 Tmp2 = MIRBuilder.buildLShr(Dst: SrcTy, Src0: Src, Src1: ShAmt);
10127 }
10128
10129 auto Mask = MIRBuilder.buildConstant(Res: SrcTy, Val: 1ULL << J);
10130 Tmp2 = MIRBuilder.buildAnd(Dst: SrcTy, Src0: Tmp2, Src1: Mask);
10131 if (I == 0)
10132 Tmp = Tmp2;
10133 else
10134 Tmp = MIRBuilder.buildOr(Dst: SrcTy, Src0: Tmp, Src1: Tmp2);
10135 }
10136 MIRBuilder.buildCopy(Res: Dst, Op: Tmp);
10137 }
10138
10139 MI.eraseFromParent();
10140 return Legalized;
10141}
10142
10143LegalizerHelper::LegalizeResult
10144LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
10145 MachineFunction &MF = MIRBuilder.getMF();
10146
10147 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
10148 int NameOpIdx = IsRead ? 1 : 0;
10149 int ValRegIndex = IsRead ? 0 : 1;
10150
10151 Register ValReg = MI.getOperand(i: ValRegIndex).getReg();
10152 const LLT Ty = MRI.getType(Reg: ValReg);
10153 const MDString *RegStr = cast<MDString>(
10154 Val: cast<MDNode>(Val: MI.getOperand(i: NameOpIdx).getMetadata())->getOperand(I: 0));
10155
10156 Register PhysReg = TLI.getRegisterByName(RegName: RegStr->getString().data(), Ty, MF);
10157 if (!PhysReg) {
10158 const Function &Fn = MF.getFunction();
10159 Fn.getContext().diagnose(DI: DiagnosticInfoGenericWithLoc(
10160 "invalid register \"" + Twine(RegStr->getString().data()) + "\" for " +
10161 (IsRead ? "llvm.read_register" : "llvm.write_register"),
10162 Fn, MI.getDebugLoc()));
10163 if (IsRead)
10164 MIRBuilder.buildUndef(Res: ValReg);
10165
10166 MI.eraseFromParent();
10167 return Legalized;
10168 }
10169
10170 if (IsRead)
10171 MIRBuilder.buildCopy(Res: ValReg, Op: PhysReg);
10172 else
10173 MIRBuilder.buildCopy(Res: PhysReg, Op: ValReg);
10174
10175 MI.eraseFromParent();
10176 return Legalized;
10177}
10178
10179LegalizerHelper::LegalizeResult
10180LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
10181 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
10182 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
10183 Register Result = MI.getOperand(i: 0).getReg();
10184 LLT OrigTy = MRI.getType(Reg: Result);
10185 auto SizeInBits = OrigTy.getScalarSizeInBits();
10186 LLT WideTy = OrigTy.changeElementSize(NewEltSize: SizeInBits * 2);
10187
10188 auto LHS = MIRBuilder.buildInstr(Opc: ExtOp, DstOps: {WideTy}, SrcOps: {MI.getOperand(i: 1)});
10189 auto RHS = MIRBuilder.buildInstr(Opc: ExtOp, DstOps: {WideTy}, SrcOps: {MI.getOperand(i: 2)});
10190 auto Mul = MIRBuilder.buildMul(Dst: WideTy, Src0: LHS, Src1: RHS);
10191 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
10192
10193 auto ShiftAmt = MIRBuilder.buildConstant(Res: WideTy, Val: SizeInBits);
10194 auto Shifted = MIRBuilder.buildInstr(Opc: ShiftOp, DstOps: {WideTy}, SrcOps: {Mul, ShiftAmt});
10195 MIRBuilder.buildTrunc(Res: Result, Op: Shifted);
10196
10197 MI.eraseFromParent();
10198 return Legalized;
10199}
10200
10201LegalizerHelper::LegalizeResult
10202LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
10203 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
10204 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(i: 2).getImm());
10205
10206 if (Mask == fcNone) {
10207 MIRBuilder.buildConstant(Res: DstReg, Val: 0);
10208 MI.eraseFromParent();
10209 return Legalized;
10210 }
10211 if (Mask == fcAllFlags) {
10212 MIRBuilder.buildConstant(Res: DstReg, Val: 1);
10213 MI.eraseFromParent();
10214 return Legalized;
10215 }
10216
10217 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
10218 // version
10219
10220 unsigned BitSize = SrcTy.getScalarSizeInBits();
10221 const fltSemantics &Semantics = getFltSemanticForLLT(Ty: SrcTy.getScalarType());
10222
10223 LLT IntTy = SrcTy.changeElementType(NewEltTy: LLT::integer(SizeInBits: BitSize));
10224 auto AsInt = SrcTy == IntTy ? MIRBuilder.buildCopy(Res: IntTy, Op: SrcReg)
10225 : MIRBuilder.buildBitcast(Dst: IntTy, Src: SrcReg);
10226
10227 // Various masks.
10228 APInt SignBit = APInt::getSignMask(BitWidth: BitSize);
10229 APInt ValueMask = APInt::getSignedMaxValue(numBits: BitSize); // All bits but sign.
10230 APInt Inf = APFloat::getInf(Sem: Semantics).bitcastToAPInt(); // Exp and int bit.
10231 APInt ExpMask = Inf;
10232 APInt AllOneMantissa = APFloat::getLargest(Sem: Semantics).bitcastToAPInt() & ~Inf;
10233 APInt QNaNBitMask =
10234 APInt::getOneBitSet(numBits: BitSize, BitNo: AllOneMantissa.getActiveBits() - 1);
10235 APInt InversionMask = APInt::getAllOnes(numBits: DstTy.getScalarSizeInBits());
10236
10237 auto SignBitC = MIRBuilder.buildConstant(Res: IntTy, Val: SignBit);
10238 auto ValueMaskC = MIRBuilder.buildConstant(Res: IntTy, Val: ValueMask);
10239 auto InfC = MIRBuilder.buildConstant(Res: IntTy, Val: Inf);
10240 auto ExpMaskC = MIRBuilder.buildConstant(Res: IntTy, Val: ExpMask);
10241 auto ZeroC = MIRBuilder.buildConstant(Res: IntTy, Val: 0);
10242
10243 auto Abs = MIRBuilder.buildAnd(Dst: IntTy, Src0: AsInt, Src1: ValueMaskC);
10244 auto Sign =
10245 MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_NE, Res: DstTy, Op0: AsInt, Op1: Abs);
10246
10247 auto Res = MIRBuilder.buildConstant(Res: DstTy, Val: 0);
10248 // Clang doesn't support capture of structured bindings:
10249 LLT DstTyCopy = DstTy;
10250 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
10251 Res = MIRBuilder.buildOr(Dst: DstTyCopy, Src0: Res, Src1: ToAppend);
10252 };
10253
10254 // Tests that involve more than one class should be processed first.
10255 if ((Mask & fcFinite) == fcFinite) {
10256 // finite(V) ==> abs(V) u< exp_mask
10257 appendToRes(MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_ULT, Res: DstTy, Op0: Abs,
10258 Op1: ExpMaskC));
10259 Mask &= ~fcFinite;
10260 } else if ((Mask & fcFinite) == fcPosFinite) {
10261 // finite(V) && V > 0 ==> V u< exp_mask
10262 appendToRes(MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_ULT, Res: DstTy, Op0: AsInt,
10263 Op1: ExpMaskC));
10264 Mask &= ~fcPosFinite;
10265 } else if ((Mask & fcFinite) == fcNegFinite) {
10266 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
10267 auto Cmp = MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_ULT, Res: DstTy, Op0: Abs,
10268 Op1: ExpMaskC);
10269 auto And = MIRBuilder.buildAnd(Dst: DstTy, Src0: Cmp, Src1: Sign);
10270 appendToRes(And);
10271 Mask &= ~fcNegFinite;
10272 }
10273
10274 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
10275 // fcZero | fcSubnormal => test all exponent bits are 0
10276 // TODO: Handle sign bit specific cases
10277 // TODO: Handle inverted case
10278 if (PartialCheck == (fcZero | fcSubnormal)) {
10279 auto ExpBits = MIRBuilder.buildAnd(Dst: IntTy, Src0: AsInt, Src1: ExpMaskC);
10280 appendToRes(MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_EQ, Res: DstTy,
10281 Op0: ExpBits, Op1: ZeroC));
10282 Mask &= ~PartialCheck;
10283 }
10284 }
10285
10286 // Check for individual classes.
10287 if (FPClassTest PartialCheck = Mask & fcZero) {
10288 if (PartialCheck == fcPosZero)
10289 appendToRes(MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_EQ, Res: DstTy,
10290 Op0: AsInt, Op1: ZeroC));
10291 else if (PartialCheck == fcZero)
10292 appendToRes(
10293 MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_EQ, Res: DstTy, Op0: Abs, Op1: ZeroC));
10294 else // fcNegZero
10295 appendToRes(MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_EQ, Res: DstTy,
10296 Op0: AsInt, Op1: SignBitC));
10297 }
10298
10299 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
10300 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
10301 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
10302 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
10303 auto OneC = MIRBuilder.buildConstant(Res: IntTy, Val: 1);
10304 auto VMinusOne = MIRBuilder.buildSub(Dst: IntTy, Src0: V, Src1: OneC);
10305 auto SubnormalRes =
10306 MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_ULT, Res: DstTy, Op0: VMinusOne,
10307 Op1: MIRBuilder.buildConstant(Res: IntTy, Val: AllOneMantissa));
10308 if (PartialCheck == fcNegSubnormal)
10309 SubnormalRes = MIRBuilder.buildAnd(Dst: DstTy, Src0: SubnormalRes, Src1: Sign);
10310 appendToRes(SubnormalRes);
10311 }
10312
10313 if (FPClassTest PartialCheck = Mask & fcInf) {
10314 if (PartialCheck == fcPosInf)
10315 appendToRes(MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_EQ, Res: DstTy,
10316 Op0: AsInt, Op1: InfC));
10317 else if (PartialCheck == fcInf)
10318 appendToRes(
10319 MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_EQ, Res: DstTy, Op0: Abs, Op1: InfC));
10320 else { // fcNegInf
10321 APInt NegInf = APFloat::getInf(Sem: Semantics, Negative: true).bitcastToAPInt();
10322 auto NegInfC = MIRBuilder.buildConstant(Res: IntTy, Val: NegInf);
10323 appendToRes(MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_EQ, Res: DstTy,
10324 Op0: AsInt, Op1: NegInfC));
10325 }
10326 }
10327
10328 if (FPClassTest PartialCheck = Mask & fcNan) {
10329 auto InfWithQnanBitC = MIRBuilder.buildConstant(Res: IntTy, Val: Inf | QNaNBitMask);
10330 if (PartialCheck == fcNan) {
10331 // isnan(V) ==> abs(V) u> int(inf)
10332 appendToRes(
10333 MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_UGT, Res: DstTy, Op0: Abs, Op1: InfC));
10334 } else if (PartialCheck == fcQNan) {
10335 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
10336 appendToRes(MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_UGE, Res: DstTy, Op0: Abs,
10337 Op1: InfWithQnanBitC));
10338 } else { // fcSNan
10339 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
10340 // abs(V) u< (unsigned(Inf) | quiet_bit)
10341 auto IsNan =
10342 MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_UGT, Res: DstTy, Op0: Abs, Op1: InfC);
10343 auto IsNotQnan = MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_ULT, Res: DstTy,
10344 Op0: Abs, Op1: InfWithQnanBitC);
10345 appendToRes(MIRBuilder.buildAnd(Dst: DstTy, Src0: IsNan, Src1: IsNotQnan));
10346 }
10347 }
10348
10349 if (FPClassTest PartialCheck = Mask & fcNormal) {
10350 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
10351 // (max_exp-1))
10352 APInt ExpLSB = ExpMask & ~(ExpMask.shl(shiftAmt: 1));
10353 auto ExpMinusOne = MIRBuilder.buildSub(
10354 Dst: IntTy, Src0: Abs, Src1: MIRBuilder.buildConstant(Res: IntTy, Val: ExpLSB));
10355 APInt MaxExpMinusOne = ExpMask - ExpLSB;
10356 auto NormalRes =
10357 MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_ULT, Res: DstTy, Op0: ExpMinusOne,
10358 Op1: MIRBuilder.buildConstant(Res: IntTy, Val: MaxExpMinusOne));
10359 if (PartialCheck == fcNegNormal)
10360 NormalRes = MIRBuilder.buildAnd(Dst: DstTy, Src0: NormalRes, Src1: Sign);
10361 else if (PartialCheck == fcPosNormal) {
10362 auto PosSign = MIRBuilder.buildXor(
10363 Dst: DstTy, Src0: Sign, Src1: MIRBuilder.buildConstant(Res: DstTy, Val: InversionMask));
10364 NormalRes = MIRBuilder.buildAnd(Dst: DstTy, Src0: NormalRes, Src1: PosSign);
10365 }
10366 appendToRes(NormalRes);
10367 }
10368
10369 MIRBuilder.buildCopy(Res: DstReg, Op: Res);
10370 MI.eraseFromParent();
10371 return Legalized;
10372}
10373
10374LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
10375 // Implement G_SELECT in terms of XOR, AND, OR.
10376 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
10377 MI.getFirst4RegLLTs();
10378
10379 LLT Op1TyInt =
10380 Op1Ty.changeElementType(NewEltTy: LLT::integer(SizeInBits: Op1Ty.getScalarSizeInBits()));
10381
10382 bool IsEltPtr = DstTy.isPointerOrPointerVector();
10383 if (IsEltPtr) {
10384 LLT ScalarPtrTy = LLT::integer(SizeInBits: DstTy.getScalarSizeInBits());
10385 LLT NewTy = DstTy.changeElementType(NewEltTy: ScalarPtrTy);
10386 Op1Reg = MIRBuilder.buildPtrToInt(Dst: NewTy, Src: Op1Reg).getReg(Idx: 0);
10387 Op1Ty = MRI.getType(Reg: Op1Reg);
10388 Op2Reg = MIRBuilder.buildPtrToInt(Dst: NewTy, Src: Op2Reg).getReg(Idx: 0);
10389 Op2Ty = MRI.getType(Reg: Op2Reg);
10390 DstTy = NewTy;
10391 }
10392
10393 if (MaskTy.isScalar()) {
10394 // Turn the scalar condition into a vector condition mask if needed.
10395
10396 Register MaskElt = MaskReg;
10397
10398 // The condition was potentially zero extended before, but we want a sign
10399 // extended boolean.
10400 if (MaskTy != LLT::scalar(SizeInBits: 1))
10401 MaskElt = MIRBuilder.buildSExtInReg(Res: MaskTy, Op: MaskElt, ImmOp: 1).getReg(Idx: 0);
10402
10403 // Continue the sign extension (or truncate) to match the data type.
10404 MaskTy = DstTy.changeElementType(NewEltTy: LLT::integer(SizeInBits: DstTy.getScalarSizeInBits()));
10405 MaskElt =
10406 MIRBuilder.buildSExtOrTrunc(Res: MaskTy.getScalarType(), Op: MaskElt).getReg(Idx: 0);
10407
10408 if (DstTy.isVector()) {
10409 // Generate a vector splat idiom.
10410 auto ShufSplat = MIRBuilder.buildShuffleSplat(Res: MaskTy, Src: MaskElt);
10411 MaskReg = ShufSplat.getReg(Idx: 0);
10412 } else {
10413 MaskReg = MaskElt;
10414 }
10415 } else if (!DstTy.isVector()) {
10416 // Cannot handle the case that mask is a vector and dst is a scalar.
10417 return UnableToLegalize;
10418 }
10419
10420 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
10421 return UnableToLegalize;
10422 }
10423
10424 if (!Op1Ty.getScalarType().isAnyScalar() &&
10425 !Op1Ty.getScalarType().isInteger())
10426 Op1Reg = MIRBuilder.buildBitcast(Dst: Op1TyInt, Src: Op1Reg).getReg(Idx: 0);
10427
10428 if (!Op2Ty.getScalarType().isAnyScalar() &&
10429 !Op2Ty.getScalarType().isInteger()) {
10430 auto Op2TyInt =
10431 Op2Ty.changeElementType(NewEltTy: LLT::integer(SizeInBits: Op2Ty.getScalarSizeInBits()));
10432 Op2Reg = MIRBuilder.buildBitcast(Dst: Op2TyInt, Src: Op2Reg).getReg(Idx: 0);
10433 }
10434
10435 auto NotMask = MIRBuilder.buildNot(Dst: MaskTy, Src0: MaskReg);
10436 auto NewOp1 = MIRBuilder.buildAnd(Dst: MaskTy, Src0: Op1Reg, Src1: MaskReg);
10437 auto NewOp2 = MIRBuilder.buildAnd(Dst: MaskTy, Src0: Op2Reg, Src1: NotMask);
10438 if (IsEltPtr) {
10439 auto Or = MIRBuilder.buildOr(Dst: DstTy, Src0: NewOp1, Src1: NewOp2);
10440 MIRBuilder.buildIntToPtr(Dst: DstReg, Src: Or);
10441 } else {
10442 if (DstTy == Op1TyInt)
10443 MIRBuilder.buildOr(Dst: DstReg, Src0: NewOp1, Src1: NewOp2);
10444 else {
10445 auto Or = MIRBuilder.buildOr(Dst: Op1TyInt, Src0: NewOp1, Src1: NewOp2);
10446 MIRBuilder.buildBitcast(Dst: DstReg, Src: Or.getReg(Idx: 0));
10447 }
10448 }
10449 MI.eraseFromParent();
10450 return Legalized;
10451}
10452
10453LegalizerHelper::LegalizeResult LegalizerHelper::lowerDIVREM(MachineInstr &MI) {
10454 // Split DIVREM into individual instructions.
10455 unsigned Opcode = MI.getOpcode();
10456
10457 MIRBuilder.buildInstr(
10458 Opc: Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10459 : TargetOpcode::G_UDIV,
10460 DstOps: {MI.getOperand(i: 0).getReg()}, SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3)});
10461 MIRBuilder.buildInstr(
10462 Opc: Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10463 : TargetOpcode::G_UREM,
10464 DstOps: {MI.getOperand(i: 1).getReg()}, SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3)});
10465 MI.eraseFromParent();
10466 return Legalized;
10467}
10468
10469LegalizerHelper::LegalizeResult
10470LegalizerHelper::lowerAbsToAddXor(MachineInstr &MI) {
10471 // Expand %res = G_ABS %a into:
10472 // %v1 = G_ASHR %a, scalar_size-1
10473 // %v2 = G_ADD %a, %v1
10474 // %res = G_XOR %v2, %v1
10475 LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
10476 Register OpReg = MI.getOperand(i: 1).getReg();
10477 auto ShiftAmt =
10478 MIRBuilder.buildConstant(Res: DstTy, Val: DstTy.getScalarSizeInBits() - 1);
10479 auto Shift = MIRBuilder.buildAShr(Dst: DstTy, Src0: OpReg, Src1: ShiftAmt);
10480 auto Add = MIRBuilder.buildAdd(Dst: DstTy, Src0: OpReg, Src1: Shift);
10481 MIRBuilder.buildXor(Dst: MI.getOperand(i: 0).getReg(), Src0: Add, Src1: Shift);
10482 MI.eraseFromParent();
10483 return Legalized;
10484}
10485
10486LegalizerHelper::LegalizeResult
10487LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {
10488 // Expand %res = G_ABS %a into:
10489 // %v1 = G_CONSTANT 0
10490 // %v2 = G_SUB %v1, %a
10491 // %res = G_SMAX %a, %v2
10492 Register SrcReg = MI.getOperand(i: 1).getReg();
10493 LLT Ty = MRI.getType(Reg: SrcReg);
10494 auto Zero = MIRBuilder.buildConstant(Res: Ty, Val: 0);
10495 auto Sub = MIRBuilder.buildSub(Dst: Ty, Src0: Zero, Src1: SrcReg);
10496 MIRBuilder.buildSMax(Dst: MI.getOperand(i: 0), Src0: SrcReg, Src1: Sub);
10497 MI.eraseFromParent();
10498 return Legalized;
10499}
10500
10501LegalizerHelper::LegalizeResult
10502LegalizerHelper::lowerAbsToCNeg(MachineInstr &MI) {
10503 Register SrcReg = MI.getOperand(i: 1).getReg();
10504 Register DestReg = MI.getOperand(i: 0).getReg();
10505 LLT Ty = MRI.getType(Reg: SrcReg), IType = LLT::scalar(SizeInBits: 1);
10506 auto Zero = MIRBuilder.buildConstant(Res: Ty, Val: 0).getReg(Idx: 0);
10507 auto Sub = MIRBuilder.buildSub(Dst: Ty, Src0: Zero, Src1: SrcReg).getReg(Idx: 0);
10508 auto ICmp = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_SGT, Res: IType, Op0: SrcReg, Op1: Zero);
10509 MIRBuilder.buildSelect(Res: DestReg, Tst: ICmp, Op0: SrcReg, Op1: Sub);
10510 MI.eraseFromParent();
10511 return Legalized;
10512}
10513
10514LegalizerHelper::LegalizeResult
10515LegalizerHelper::lowerAbsDiffToSelect(MachineInstr &MI) {
10516 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10517 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10518 "Expected G_ABDS or G_ABDU instruction");
10519
10520 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10521 LLT Ty = MRI.getType(Reg: LHS);
10522
10523 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10524 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10525 Register LHSSub = MIRBuilder.buildSub(Dst: Ty, Src0: LHS, Src1: RHS).getReg(Idx: 0);
10526 Register RHSSub = MIRBuilder.buildSub(Dst: Ty, Src0: RHS, Src1: LHS).getReg(Idx: 0);
10527 CmpInst::Predicate Pred = (MI.getOpcode() == TargetOpcode::G_ABDS)
10528 ? CmpInst::ICMP_SGT
10529 : CmpInst::ICMP_UGT;
10530 auto ICmp = MIRBuilder.buildICmp(Pred, Res: LLT::scalar(SizeInBits: 1), Op0: LHS, Op1: RHS);
10531 MIRBuilder.buildSelect(Res: DstReg, Tst: ICmp, Op0: LHSSub, Op1: RHSSub);
10532
10533 MI.eraseFromParent();
10534 return Legalized;
10535}
10536
10537LegalizerHelper::LegalizeResult
10538LegalizerHelper::lowerAbsDiffToMinMax(MachineInstr &MI) {
10539 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10540 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10541 "Expected G_ABDS or G_ABDU instruction");
10542
10543 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10544 LLT Ty = MRI.getType(Reg: LHS);
10545
10546 // abds(lhs, rhs) -→ sub(smax(lhs, rhs), smin(lhs, rhs))
10547 // abdu(lhs, rhs) -→ sub(umax(lhs, rhs), umin(lhs, rhs))
10548 Register MaxReg, MinReg;
10549 if (MI.getOpcode() == TargetOpcode::G_ABDS) {
10550 MaxReg = MIRBuilder.buildSMax(Dst: Ty, Src0: LHS, Src1: RHS).getReg(Idx: 0);
10551 MinReg = MIRBuilder.buildSMin(Dst: Ty, Src0: LHS, Src1: RHS).getReg(Idx: 0);
10552 } else {
10553 MaxReg = MIRBuilder.buildUMax(Dst: Ty, Src0: LHS, Src1: RHS).getReg(Idx: 0);
10554 MinReg = MIRBuilder.buildUMin(Dst: Ty, Src0: LHS, Src1: RHS).getReg(Idx: 0);
10555 }
10556 MIRBuilder.buildSub(Dst: DstReg, Src0: MaxReg, Src1: MinReg);
10557
10558 MI.eraseFromParent();
10559 return Legalized;
10560}
10561
10562LegalizerHelper::LegalizeResult LegalizerHelper::lowerFAbs(MachineInstr &MI) {
10563 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
10564 LLT TyInt =
10565 DstTy.changeElementType(NewEltTy: LLT::integer(SizeInBits: DstTy.getScalarSizeInBits()));
10566 Register CastedSrc = SrcReg;
10567
10568 if (!(SrcTy.getScalarType().isAnyScalar() ||
10569 SrcTy.getScalarType().isInteger())) {
10570 auto SrcTyInt =
10571 SrcTy.changeElementType(NewEltTy: LLT::integer(SizeInBits: SrcTy.getScalarSizeInBits()));
10572 CastedSrc = MIRBuilder.buildBitcast(Dst: SrcTyInt, Src: SrcReg).getReg(Idx: 0);
10573 }
10574
10575 if (MRI.getType(Reg: DstReg) != TyInt) {
10576 // Reset sign bit
10577 Register NewDst =
10578 MIRBuilder
10579 .buildAnd(Dst: TyInt, Src0: CastedSrc,
10580 Src1: MIRBuilder.buildConstant(
10581 Res: TyInt, Val: APInt::getSignedMaxValue(
10582 numBits: DstTy.getScalarSizeInBits())))
10583 .getReg(Idx: 0);
10584
10585 MIRBuilder.buildBitcast(Dst: DstReg, Src: NewDst);
10586 } else
10587 MIRBuilder
10588 .buildAnd(
10589 Dst: DstReg, Src0: CastedSrc,
10590 Src1: MIRBuilder.buildConstant(
10591 Res: TyInt, Val: APInt::getSignedMaxValue(numBits: DstTy.getScalarSizeInBits())))
10592 .getReg(Idx: 0);
10593
10594 MI.eraseFromParent();
10595 return Legalized;
10596}
10597
10598LegalizerHelper::LegalizeResult
10599LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
10600 Register SrcReg = MI.getOperand(i: 1).getReg();
10601 LLT SrcTy = MRI.getType(Reg: SrcReg);
10602 LLT DstTy = MRI.getType(Reg: SrcReg);
10603
10604 // The source could be a scalar if the IR type was <1 x sN>.
10605 if (SrcTy.isScalar()) {
10606 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
10607 return UnableToLegalize; // FIXME: handle extension.
10608 // This can be just a plain copy.
10609 Observer.changingInstr(MI);
10610 MI.setDesc(MIRBuilder.getTII().get(Opcode: TargetOpcode::COPY));
10611 Observer.changedInstr(MI);
10612 return Legalized;
10613 }
10614 return UnableToLegalize;
10615}
10616
10617LegalizerHelper::LegalizeResult LegalizerHelper::lowerVAArg(MachineInstr &MI) {
10618 MachineFunction &MF = *MI.getMF();
10619 const DataLayout &DL = MIRBuilder.getDataLayout();
10620 LLVMContext &Ctx = MF.getFunction().getContext();
10621 Register ListPtr = MI.getOperand(i: 1).getReg();
10622 LLT PtrTy = MRI.getType(Reg: ListPtr);
10623
10624 // LstPtr is a pointer to the head of the list. Get the address
10625 // of the head of the list.
10626 Align PtrAlignment = DL.getABITypeAlign(Ty: getTypeForLLT(Ty: PtrTy, C&: Ctx));
10627 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
10628 PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOLoad, MemTy: PtrTy, base_alignment: PtrAlignment);
10629 auto VAList = MIRBuilder.buildLoad(Res: PtrTy, Addr: ListPtr, MMO&: *PtrLoadMMO).getReg(Idx: 0);
10630
10631 const Align A(MI.getOperand(i: 2).getImm());
10632 LLT PtrTyAsScalarTy = LLT::scalar(SizeInBits: PtrTy.getSizeInBits());
10633 if (A > TLI.getMinStackArgumentAlignment()) {
10634 Register AlignAmt =
10635 MIRBuilder.buildConstant(Res: PtrTyAsScalarTy, Val: A.value() - 1).getReg(Idx: 0);
10636 auto AddDst = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: VAList, Op1: AlignAmt);
10637 auto AndDst = MIRBuilder.buildMaskLowPtrBits(Res: PtrTy, Op0: AddDst, NumBits: Log2(A));
10638 VAList = AndDst.getReg(Idx: 0);
10639 }
10640
10641 // Increment the pointer, VAList, to the next vaarg
10642 // The list should be bumped by the size of element in the current head of
10643 // list.
10644 Register Dst = MI.getOperand(i: 0).getReg();
10645 LLT LLTTy = MRI.getType(Reg: Dst);
10646 Type *Ty = getTypeForLLT(Ty: LLTTy, C&: Ctx);
10647 auto IncAmt =
10648 MIRBuilder.buildConstant(Res: PtrTyAsScalarTy, Val: DL.getTypeAllocSize(Ty));
10649 auto Succ = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: VAList, Op1: IncAmt);
10650
10651 // Store the increment VAList to the legalized pointer
10652 MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
10653 PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOStore, MemTy: PtrTy, base_alignment: PtrAlignment);
10654 MIRBuilder.buildStore(Val: Succ, Addr: ListPtr, MMO&: *StoreMMO);
10655 // Load the actual argument out of the pointer VAList
10656 Align EltAlignment = DL.getABITypeAlign(Ty);
10657 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
10658 PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOLoad, MemTy: LLTTy, base_alignment: EltAlignment);
10659 MIRBuilder.buildLoad(Res: Dst, Addr: VAList, MMO&: *EltLoadMMO);
10660
10661 MI.eraseFromParent();
10662 return Legalized;
10663}
10664
10665LegalizerHelper::LegalizeResult LegalizerHelper::lowerMulfix(MachineInstr &MI) {
10666 [[maybe_unused]] unsigned OpCode = MI.getOpcode();
10667 assert((OpCode == TargetOpcode::G_SMULFIX ||
10668 OpCode == TargetOpcode::G_UMULFIX) &&
10669 "Operator must be either G_SMULFIX or G_UMULFIX!");
10670 auto [Dst, LHS, RHS] = MI.getFirst3Regs();
10671 LLT Ty = MRI.getType(Reg: Dst);
10672 unsigned Scale = MI.getOperand(i: 3).getImm();
10673
10674 if (Scale == 0) {
10675 MIRBuilder.buildMul(Dst, Src0: LHS, Src1: RHS);
10676 MI.eraseFromParent();
10677 return Legalized;
10678 }
10679
10680 // TODO: Port other lowerng paths from SelectionDAG.
10681 LLT WideTy = Ty.changeElementSize(NewEltSize: Ty.getScalarSizeInBits() * 2);
10682 auto ShiftAmt = MIRBuilder.buildConstant(Res: WideTy, Val: Scale);
10683 MachineInstrBuilder ExtLHS{}, ExtRHS{}, Shift{};
10684 if (MI.getOpcode() == TargetOpcode::G_SMULFIX) {
10685 ExtLHS = MIRBuilder.buildSExt(Res: WideTy, Op: LHS);
10686 ExtRHS = MIRBuilder.buildSExt(Res: WideTy, Op: RHS);
10687 } else {
10688 ExtLHS = MIRBuilder.buildZExt(Res: WideTy, Op: LHS);
10689 ExtRHS = MIRBuilder.buildZExt(Res: WideTy, Op: RHS);
10690 }
10691
10692 auto Mul = MIRBuilder.buildMul(Dst: WideTy, Src0: ExtLHS, Src1: ExtRHS);
10693 if (MI.getOpcode() == TargetOpcode::G_SMULFIX)
10694 Shift = MIRBuilder.buildAShr(Dst: WideTy, Src0: Mul, Src1: ShiftAmt);
10695 else
10696 Shift = MIRBuilder.buildLShr(Dst: WideTy, Src0: Mul, Src1: ShiftAmt);
10697
10698 MIRBuilder.buildTrunc(Res: Dst, Op: Shift);
10699
10700 MI.eraseFromParent();
10701 return Legalized;
10702}
10703
10704// Get a vectorized representation of the memset value operand, GISel edition.
10705static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
10706 MachineRegisterInfo &MRI = *MIB.getMRI();
10707 unsigned NumBits = Ty.getScalarSizeInBits();
10708 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(VReg: Val, MRI);
10709 if (!Ty.isVector() && ValVRegAndVal) {
10710 APInt Scalar = ValVRegAndVal->Value.trunc(width: 8);
10711 APInt SplatVal = APInt::getSplat(NewLen: NumBits, V: Scalar);
10712 return MIB.buildConstant(Res: Ty, Val: SplatVal).getReg(Idx: 0);
10713 }
10714
10715 // Extend the byte value to the larger type, and then multiply by a magic
10716 // value 0x010101... in order to replicate it across every byte.
10717 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
10718 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10719 return MIB.buildConstant(Res: Ty, Val: 0).getReg(Idx: 0);
10720 }
10721
10722 LLT ExtType = Ty.getScalarType();
10723 auto ZExt = MIB.buildZExtOrTrunc(Res: ExtType, Op: Val);
10724 if (NumBits > 8) {
10725 APInt Magic = APInt::getSplat(NewLen: NumBits, V: APInt(8, 0x01));
10726 auto MagicMI = MIB.buildConstant(Res: ExtType, Val: Magic);
10727 Val = MIB.buildMul(Dst: ExtType, Src0: ZExt, Src1: MagicMI).getReg(Idx: 0);
10728 }
10729
10730 // For vector types create a G_BUILD_VECTOR.
10731 if (Ty.isVector())
10732 Val = MIB.buildSplatBuildVector(Res: Ty, Src: Val).getReg(Idx: 0);
10733
10734 return Val;
10735}
10736
10737LegalizerHelper::LegalizeResult
10738LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
10739 uint64_t KnownLen, Align Alignment,
10740 bool DstAlignCanChange, ArrayRef<LLT> MemOps) {
10741 auto &MF = *MI.getParent()->getParent();
10742 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10743 auto &DL = MF.getDataLayout();
10744 LLVMContext &C = MF.getFunction().getContext();
10745
10746 assert(KnownLen != 0 && "Have a zero length memset length!");
10747 assert(!MemOps.empty() && "Expected at least one memory op");
10748
10749 MachineFrameInfo &MFI = MF.getFrameInfo();
10750 MachineInstr *FIDef = getOpcodeDef(Opcode: TargetOpcode::G_FRAME_INDEX, Reg: Dst, MRI);
10751 const auto &DstMMO = **MI.memoperands_begin();
10752
10753 if (DstAlignCanChange) {
10754 // Get an estimate of the type from the LLT.
10755 Type *IRTy = getTypeForLLT(Ty: MemOps[0], C);
10756 Align NewAlign = DL.getABITypeAlign(Ty: IRTy);
10757 if (NewAlign > Alignment) {
10758 Alignment = NewAlign;
10759 unsigned FI = FIDef->getOperand(i: 1).getIndex();
10760 // Give the stack frame object a larger alignment if needed.
10761 if (MFI.getObjectAlign(ObjectIdx: FI) < Alignment)
10762 MFI.setObjectAlignment(ObjectIdx: FI, Alignment);
10763 }
10764 }
10765
10766 MachineIRBuilder MIB(MI);
10767 // Find the largest store and generate the bit pattern for it.
10768 LLT LargestTy = MemOps[0];
10769 for (unsigned i = 1; i < MemOps.size(); i++)
10770 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
10771 LargestTy = MemOps[i];
10772
10773 // The memset stored value is always defined as an s8, so in order to make it
10774 // work with larger store types we need to repeat the bit pattern across the
10775 // wider type.
10776 Register MemSetValue = getMemsetValue(Val, Ty: LargestTy, MIB);
10777
10778 if (!MemSetValue)
10779 return UnableToLegalize;
10780
10781 // Generate the stores. For each store type in the list, we generate the
10782 // matching store of that type to the destination address.
10783 LLT PtrTy = MRI.getType(Reg: Dst);
10784 unsigned DstOff = 0;
10785 unsigned Size = KnownLen;
10786 for (unsigned I = 0; I < MemOps.size(); I++) {
10787 LLT Ty = MemOps[I];
10788 unsigned TySize = Ty.getSizeInBytes();
10789 if (TySize > Size) {
10790 // Issuing an unaligned load / store pair that overlaps with the previous
10791 // pair. Adjust the offset accordingly.
10792 assert(I == MemOps.size() - 1 && I != 0);
10793 DstOff -= TySize - Size;
10794 }
10795
10796 // If this store is smaller than the largest store see whether we can get
10797 // the smaller value for free with a truncate.
10798 Register Value = MemSetValue;
10799 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
10800 MVT VT = getMVTForLLT(Ty);
10801 MVT LargestVT = getMVTForLLT(Ty: LargestTy);
10802 if (!LargestTy.isVector() && !Ty.isVector() &&
10803 TLI.isTruncateFree(FromVT: LargestVT, ToVT: VT))
10804 Value = MIB.buildTrunc(Res: Ty, Op: MemSetValue).getReg(Idx: 0);
10805 else
10806 Value = getMemsetValue(Val, Ty, MIB);
10807 if (!Value)
10808 return UnableToLegalize;
10809 }
10810
10811 auto *StoreMMO = MF.getMachineMemOperand(MMO: &DstMMO, Offset: DstOff, Ty);
10812
10813 Register Ptr = Dst;
10814 if (DstOff != 0) {
10815 auto Offset =
10816 MIB.buildConstant(Res: LLT::scalar(SizeInBits: PtrTy.getSizeInBits()), Val: DstOff);
10817 Ptr = MIB.buildObjectPtrOffset(Res: PtrTy, Op0: Dst, Op1: Offset).getReg(Idx: 0);
10818 }
10819
10820 MIB.buildStore(Val: Value, Addr: Ptr, MMO&: *StoreMMO);
10821 DstOff += Ty.getSizeInBytes();
10822 Size -= TySize;
10823 }
10824
10825 MI.eraseFromParent();
10826 return Legalized;
10827}
10828
10829LegalizerHelper::LegalizeResult
10830LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
10831 uint64_t KnownLen, Align Alignment,
10832 bool DstAlignCanChange, ArrayRef<LLT> MemOps) {
10833 auto &MF = *MI.getParent()->getParent();
10834 auto &DL = MF.getDataLayout();
10835 LLVMContext &C = MF.getFunction().getContext();
10836
10837 assert(KnownLen != 0 && "Have a zero length memcpy length!");
10838 assert(!MemOps.empty() && "Expected at least one memory op");
10839
10840 MachineFrameInfo &MFI = MF.getFrameInfo();
10841 MachineInstr *FIDef = getOpcodeDef(Opcode: TargetOpcode::G_FRAME_INDEX, Reg: Dst, MRI);
10842
10843 // FIXME: infer better src pointer alignment like SelectionDAG does here.
10844 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
10845 // if the memcpy is in a tail call position.
10846
10847 const auto &DstMMO = **MI.memoperands_begin();
10848 const auto &SrcMMO = **std::next(x: MI.memoperands_begin());
10849
10850 if (DstAlignCanChange) {
10851 // Get an estimate of the type from the LLT.
10852 Type *IRTy = getTypeForLLT(Ty: MemOps[0], C);
10853 Align NewAlign = DL.getABITypeAlign(Ty: IRTy);
10854
10855 // Don't promote to an alignment that would require dynamic stack
10856 // realignment.
10857 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
10858 if (!TRI->hasStackRealignment(MF))
10859 if (MaybeAlign StackAlign = DL.getStackAlignment())
10860 NewAlign = std::min(a: NewAlign, b: *StackAlign);
10861
10862 if (NewAlign > Alignment) {
10863 Alignment = NewAlign;
10864 unsigned FI = FIDef->getOperand(i: 1).getIndex();
10865 // Give the stack frame object a larger alignment if needed.
10866 if (MFI.getObjectAlign(ObjectIdx: FI) < Alignment)
10867 MFI.setObjectAlignment(ObjectIdx: FI, Alignment);
10868 }
10869 }
10870
10871 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
10872
10873 MachineIRBuilder MIB(MI);
10874 // Now we need to emit a pair of load and stores for each of the types we've
10875 // collected. I.e. for each type, generate a load from the source pointer of
10876 // that type width, and then generate a corresponding store to the dest buffer
10877 // of that value loaded. This can result in a sequence of loads and stores
10878 // mixed types, depending on what the target specifies as good types to use.
10879 unsigned CurrOffset = 0;
10880 unsigned Size = KnownLen;
10881 for (auto CopyTy : MemOps) {
10882 // Issuing an unaligned load / store pair that overlaps with the previous
10883 // pair. Adjust the offset accordingly.
10884 if (CopyTy.getSizeInBytes() > Size)
10885 CurrOffset -= CopyTy.getSizeInBytes() - Size;
10886
10887 // Construct MMOs for the accesses.
10888 auto *LoadMMO =
10889 MF.getMachineMemOperand(MMO: &SrcMMO, Offset: CurrOffset, Size: CopyTy.getSizeInBytes());
10890 auto *StoreMMO =
10891 MF.getMachineMemOperand(MMO: &DstMMO, Offset: CurrOffset, Size: CopyTy.getSizeInBytes());
10892
10893 // Create the load.
10894 Register LoadPtr = Src;
10895 Register Offset;
10896 if (CurrOffset != 0) {
10897 LLT SrcTy = MRI.getType(Reg: Src);
10898 Offset =
10899 MIB.buildConstant(Res: LLT::integer(SizeInBits: SrcTy.getSizeInBits()), Val: CurrOffset)
10900 .getReg(Idx: 0);
10901 LoadPtr = MIB.buildObjectPtrOffset(Res: SrcTy, Op0: Src, Op1: Offset).getReg(Idx: 0);
10902 }
10903 auto LdVal = MIB.buildLoad(Res: CopyTy, Addr: LoadPtr, MMO&: *LoadMMO);
10904
10905 // Create the store.
10906 Register StorePtr = Dst;
10907 if (CurrOffset != 0) {
10908 LLT DstTy = MRI.getType(Reg: Dst);
10909 StorePtr = MIB.buildObjectPtrOffset(Res: DstTy, Op0: Dst, Op1: Offset).getReg(Idx: 0);
10910 }
10911 MIB.buildStore(Val: LdVal, Addr: StorePtr, MMO&: *StoreMMO);
10912 CurrOffset += CopyTy.getSizeInBytes();
10913 Size -= CopyTy.getSizeInBytes();
10914 }
10915
10916 MI.eraseFromParent();
10917 return Legalized;
10918}
10919
10920LegalizerHelper::LegalizeResult
10921LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
10922 uint64_t KnownLen, Align Alignment,
10923 bool DstAlignCanChange, ArrayRef<LLT> MemOps) {
10924 auto &MF = *MI.getParent()->getParent();
10925 auto &DL = MF.getDataLayout();
10926 LLVMContext &C = MF.getFunction().getContext();
10927
10928 assert(KnownLen != 0 && "Have a zero length memmove length!");
10929 assert(!MemOps.empty() && "Expected at least one memory op");
10930
10931 MachineFrameInfo &MFI = MF.getFrameInfo();
10932 MachineInstr *FIDef = getOpcodeDef(Opcode: TargetOpcode::G_FRAME_INDEX, Reg: Dst, MRI);
10933 const auto &DstMMO = **MI.memoperands_begin();
10934 const auto &SrcMMO = **std::next(x: MI.memoperands_begin());
10935
10936 if (DstAlignCanChange) {
10937 // Get an estimate of the type from the LLT.
10938 Type *IRTy = getTypeForLLT(Ty: MemOps[0], C);
10939 Align NewAlign = DL.getABITypeAlign(Ty: IRTy);
10940
10941 // Don't promote to an alignment that would require dynamic stack
10942 // realignment.
10943 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
10944 if (!TRI->hasStackRealignment(MF))
10945 if (MaybeAlign StackAlign = DL.getStackAlignment())
10946 NewAlign = std::min(a: NewAlign, b: *StackAlign);
10947
10948 if (NewAlign > Alignment) {
10949 Alignment = NewAlign;
10950 unsigned FI = FIDef->getOperand(i: 1).getIndex();
10951 // Give the stack frame object a larger alignment if needed.
10952 if (MFI.getObjectAlign(ObjectIdx: FI) < Alignment)
10953 MFI.setObjectAlignment(ObjectIdx: FI, Alignment);
10954 }
10955 }
10956
10957 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
10958
10959 MachineIRBuilder MIB(MI);
10960 // Memmove requires that we perform the loads first before issuing the stores.
10961 // Apart from that, this loop is pretty much doing the same thing as the
10962 // memcpy codegen function.
10963 unsigned CurrOffset = 0;
10964 SmallVector<Register, 16> LoadVals;
10965 for (auto CopyTy : MemOps) {
10966 // Construct MMO for the load.
10967 auto *LoadMMO =
10968 MF.getMachineMemOperand(MMO: &SrcMMO, Offset: CurrOffset, Size: CopyTy.getSizeInBytes());
10969
10970 // Create the load.
10971 Register LoadPtr = Src;
10972 if (CurrOffset != 0) {
10973 LLT SrcTy = MRI.getType(Reg: Src);
10974 auto Offset =
10975 MIB.buildConstant(Res: LLT::scalar(SizeInBits: SrcTy.getSizeInBits()), Val: CurrOffset);
10976 LoadPtr = MIB.buildObjectPtrOffset(Res: SrcTy, Op0: Src, Op1: Offset).getReg(Idx: 0);
10977 }
10978 LoadVals.push_back(Elt: MIB.buildLoad(Res: CopyTy, Addr: LoadPtr, MMO&: *LoadMMO).getReg(Idx: 0));
10979 CurrOffset += CopyTy.getSizeInBytes();
10980 }
10981
10982 CurrOffset = 0;
10983 for (unsigned I = 0; I < MemOps.size(); ++I) {
10984 LLT CopyTy = MemOps[I];
10985 // Now store the values loaded.
10986 auto *StoreMMO =
10987 MF.getMachineMemOperand(MMO: &DstMMO, Offset: CurrOffset, Size: CopyTy.getSizeInBytes());
10988
10989 Register StorePtr = Dst;
10990 if (CurrOffset != 0) {
10991 LLT DstTy = MRI.getType(Reg: Dst);
10992 auto Offset =
10993 MIB.buildConstant(Res: LLT::scalar(SizeInBits: DstTy.getSizeInBits()), Val: CurrOffset);
10994 StorePtr = MIB.buildObjectPtrOffset(Res: DstTy, Op0: Dst, Op1: Offset).getReg(Idx: 0);
10995 }
10996 MIB.buildStore(Val: LoadVals[I], Addr: StorePtr, MMO&: *StoreMMO);
10997 CurrOffset += CopyTy.getSizeInBytes();
10998 }
10999 MI.eraseFromParent();
11000 return Legalized;
11001}
11002
11003LegalizerHelper::LegalizeResult LegalizerHelper::lowerMemCpyFamily(
11004 MachineInstr &MI, Register Dst, Register Src, uint64_t KnownLen,
11005 Align Alignment, bool DstAlignCanChange, ArrayRef<LLT> MemOps) {
11006 const unsigned Opc = MI.getOpcode();
11007 assert((Opc == TargetOpcode::G_MEMCPY ||
11008 Opc == TargetOpcode::G_MEMCPY_INLINE ||
11009 Opc == TargetOpcode::G_MEMMOVE || Opc == TargetOpcode::G_MEMSET ||
11010 Opc == TargetOpcode::G_MEMSET_INLINE) &&
11011 "Expected memcpy like instruction");
11012
11013 if (KnownLen == 0) {
11014 MI.eraseFromParent();
11015 return Legalized;
11016 }
11017
11018 if (Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMCPY_INLINE) {
11019 return lowerMemcpy(MI, Dst, Src, KnownLen, Alignment, DstAlignCanChange,
11020 MemOps);
11021 }
11022 if (Opc == TargetOpcode::G_MEMMOVE)
11023 return lowerMemmove(MI, Dst, Src, KnownLen, Alignment, DstAlignCanChange,
11024 MemOps);
11025 if (Opc == TargetOpcode::G_MEMSET || Opc == TargetOpcode::G_MEMSET_INLINE)
11026 return lowerMemset(MI, Dst, Val: Src, KnownLen, Alignment, DstAlignCanChange,
11027 MemOps);
11028 return UnableToLegalize;
11029}
11030
11031LegalizerHelper::LegalizeResult
11032LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
11033 Register Dst, Src;
11034 uint64_t KnownLen;
11035 Align Alignment;
11036 bool DstAlignCanChange;
11037 std::vector<LLT> MemOps;
11038 if (!canLowerMemCpyFamily(MI, MRI, MaxLen, Dst, Src, KnownLen, Alignment,
11039 DstAlignCanChange, MemOps))
11040 return UnableToLegalize;
11041 return lowerMemCpyFamily(MI, Dst, Src, KnownLen, Alignment, DstAlignCanChange,
11042 MemOps);
11043}
11044