1//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a pattern matching instruction selector for PowerPC,
10// converting from a legalized dag to a PPC dag.
11//
12//===----------------------------------------------------------------------===//
13
14#include "MCTargetDesc/PPCMCTargetDesc.h"
15#include "MCTargetDesc/PPCPredicates.h"
16#include "PPC.h"
17#include "PPCISelLowering.h"
18#include "PPCMachineFunctionInfo.h"
19#include "PPCSelectionDAGInfo.h"
20#include "PPCSubtarget.h"
21#include "PPCTargetMachine.h"
22#include "llvm/ADT/APInt.h"
23#include "llvm/ADT/APSInt.h"
24#include "llvm/ADT/DenseMap.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallPtrSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/Analysis/BranchProbabilityInfo.h"
30#include "llvm/CodeGen/FunctionLoweringInfo.h"
31#include "llvm/CodeGen/ISDOpcodes.h"
32#include "llvm/CodeGen/MachineBasicBlock.h"
33#include "llvm/CodeGen/MachineFrameInfo.h"
34#include "llvm/CodeGen/MachineFunction.h"
35#include "llvm/CodeGen/MachineInstrBuilder.h"
36#include "llvm/CodeGen/MachineRegisterInfo.h"
37#include "llvm/CodeGen/SelectionDAG.h"
38#include "llvm/CodeGen/SelectionDAGISel.h"
39#include "llvm/CodeGen/SelectionDAGNodes.h"
40#include "llvm/CodeGen/TargetInstrInfo.h"
41#include "llvm/CodeGen/TargetRegisterInfo.h"
42#include "llvm/CodeGen/ValueTypes.h"
43#include "llvm/CodeGenTypes/MachineValueType.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/DebugLoc.h"
46#include "llvm/IR/Function.h"
47#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/InlineAsm.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/IntrinsicsPowerPC.h"
51#include "llvm/IR/Module.h"
52#include "llvm/Support/Casting.h"
53#include "llvm/Support/CodeGen.h"
54#include "llvm/Support/CommandLine.h"
55#include "llvm/Support/Compiler.h"
56#include "llvm/Support/Debug.h"
57#include "llvm/Support/ErrorHandling.h"
58#include "llvm/Support/KnownBits.h"
59#include "llvm/Support/MathExtras.h"
60#include "llvm/Support/raw_ostream.h"
61#include <algorithm>
62#include <cassert>
63#include <cstdint>
64#include <iterator>
65#include <limits>
66#include <memory>
67#include <new>
68#include <tuple>
69#include <utility>
70
71using namespace llvm;
72
73#define DEBUG_TYPE "ppc-isel"
74#define PASS_NAME "PowerPC DAG->DAG Pattern Instruction Selection"
75
76STATISTIC(NumSextSetcc,
77 "Number of (sext(setcc)) nodes expanded into GPR sequence.");
78STATISTIC(NumZextSetcc,
79 "Number of (zext(setcc)) nodes expanded into GPR sequence.");
80STATISTIC(SignExtensionsAdded,
81 "Number of sign extensions for compare inputs added.");
82STATISTIC(ZeroExtensionsAdded,
83 "Number of zero extensions for compare inputs added.");
84STATISTIC(NumLogicOpsOnComparison,
85 "Number of logical ops on i1 values calculated in GPR.");
86STATISTIC(OmittedForNonExtendUses,
87 "Number of compares not eliminated as they have non-extending uses.");
88STATISTIC(NumP9Setb,
89 "Number of compares lowered to setb.");
90
91// FIXME: Remove this once the bug has been fixed!
92cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
93cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
94
95static cl::opt<bool>
96 UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(Val: true),
97 cl::desc("use aggressive ppc isel for bit permutations"),
98 cl::Hidden);
99static cl::opt<bool> BPermRewriterNoMasking(
100 "ppc-bit-perm-rewriter-stress-rotates",
101 cl::desc("stress rotate selection in aggressive ppc isel for "
102 "bit permutations"),
103 cl::Hidden);
104
105static cl::opt<bool> EnableBranchHint(
106 "ppc-use-branch-hint", cl::init(Val: true),
107 cl::desc("Enable static hinting of branches on ppc"),
108 cl::Hidden);
109
110static cl::opt<bool> EnableTLSOpt(
111 "ppc-tls-opt", cl::init(Val: true),
112 cl::desc("Enable tls optimization peephole"),
113 cl::Hidden);
114
115enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64,
116 ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32,
117 ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 };
118
119static cl::opt<ICmpInGPRType> CmpInGPR(
120 "ppc-gpr-icmps", cl::Hidden, cl::init(Val: ICGPR_All),
121 cl::desc("Specify the types of comparisons to emit GPR-only code for."),
122 cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
123 clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
124 clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
125 clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
126 clEnumValN(ICGPR_NonExtIn, "nonextin",
127 "Only comparisons where inputs don't need [sz]ext."),
128 clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
129 clEnumValN(ICGPR_ZextI32, "zexti32",
130 "Only i32 comparisons with zext result."),
131 clEnumValN(ICGPR_ZextI64, "zexti64",
132 "Only i64 comparisons with zext result."),
133 clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
134 clEnumValN(ICGPR_SextI32, "sexti32",
135 "Only i32 comparisons with sext result."),
136 clEnumValN(ICGPR_SextI64, "sexti64",
137 "Only i64 comparisons with sext result.")));
138namespace {
139
140 //===--------------------------------------------------------------------===//
141 /// PPCDAGToDAGISel - PPC specific code to select PPC machine
142 /// instructions for SelectionDAG operations.
143 ///
144 class PPCDAGToDAGISel : public SelectionDAGISel {
145 const PPCTargetMachine &TM;
146 const PPCSubtarget *Subtarget = nullptr;
147 const PPCTargetLowering *PPCLowering = nullptr;
148 unsigned GlobalBaseReg = 0;
149
150 public:
151 PPCDAGToDAGISel() = delete;
152
153 explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOptLevel OptLevel)
154 : SelectionDAGISel(tm, OptLevel), TM(tm) {}
155
156 bool runOnMachineFunction(MachineFunction &MF) override {
157 // Make sure we re-emit a set of the global base reg if necessary
158 GlobalBaseReg = 0;
159 Subtarget = &MF.getSubtarget<PPCSubtarget>();
160 PPCLowering = Subtarget->getTargetLowering();
161 if (Subtarget->hasROPProtect()) {
162 // Create a place on the stack for the ROP Protection Hash.
163 // The ROP Protection Hash will always be 8 bytes and aligned to 8
164 // bytes.
165 MachineFrameInfo &MFI = MF.getFrameInfo();
166 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
167 const int Result = MFI.CreateStackObject(Size: 8, Alignment: Align(8), isSpillSlot: false);
168 FI->setROPProtectionHashSaveIndex(Result);
169 }
170 SelectionDAGISel::runOnMachineFunction(mf&: MF);
171
172 return true;
173 }
174
175 void PreprocessISelDAG() override;
176 void PostprocessISelDAG() override;
177
178 /// getI16Imm - Return a target constant with the specified value, of type
179 /// i16.
180 inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
181 return CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i16);
182 }
183
184 /// getI32Imm - Return a target constant with the specified value, of type
185 /// i32.
186 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
187 return CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32);
188 }
189
190 /// getI64Imm - Return a target constant with the specified value, of type
191 /// i64.
192 inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
193 return CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i64);
194 }
195
196 /// getSmallIPtrImm - Return a target constant of pointer type.
197 inline SDValue getSmallIPtrImm(int64_t Imm, const SDLoc &dl) {
198 return CurDAG->getSignedTargetConstant(
199 Val: Imm, DL: dl, VT: PPCLowering->getPointerTy(DL: CurDAG->getDataLayout()));
200 }
201
202 /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
203 /// rotate and mask opcode and mask operation.
204 static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
205 unsigned &SH, unsigned &MB, unsigned &ME);
206
207 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
208 /// base register. Return the virtual register that holds this value.
209 SDNode *getGlobalBaseReg();
210
211 void selectFrameIndex(SDNode *SN, SDNode *N, int64_t Offset = 0);
212
213 // Select - Convert the specified operand from a target-independent to a
214 // target-specific node if it hasn't already been changed.
215 void Select(SDNode *N) override;
216
217 bool tryBitfieldInsert(SDNode *N);
218 bool tryBitPermutation(SDNode *N);
219 bool tryIntCompareInGPR(SDNode *N);
220
221 // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
222 // an X-Form load instruction with the offset being a relocation coming from
223 // the PPCISD::ADD_TLS.
224 bool tryTLSXFormLoad(LoadSDNode *N);
225 // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
226 // an X-Form store instruction with the offset being a relocation coming from
227 // the PPCISD::ADD_TLS.
228 bool tryTLSXFormStore(StoreSDNode *N);
229 /// SelectCC - Select a comparison of the specified values with the
230 /// specified condition code, returning the CR# of the expression.
231 SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
232 const SDLoc &dl, SDValue Chain = SDValue());
233
234 /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
235 /// immediate field. Note that the operand at this point is already the
236 /// result of a prior SelectAddressRegImm call.
237 bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
238 if (N.getOpcode() == ISD::TargetConstant ||
239 N.getOpcode() == ISD::TargetGlobalAddress) {
240 Out = N;
241 return true;
242 }
243
244 return false;
245 }
246
247 /// SelectDSForm - Returns true if address N can be represented by the
248 /// addressing mode of DSForm instructions (a base register, plus a signed
249 /// 16-bit displacement that is a multiple of 4.
250 bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
251 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, DAG&: *CurDAG,
252 Align: Align(4)) == PPC::AM_DSForm;
253 }
254
255 /// SelectDQForm - Returns true if address N can be represented by the
256 /// addressing mode of DQForm instructions (a base register, plus a signed
257 /// 16-bit displacement that is a multiple of 16.
258 bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
259 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, DAG&: *CurDAG,
260 Align: Align(16)) == PPC::AM_DQForm;
261 }
262
263 /// SelectDForm - Returns true if address N can be represented by
264 /// the addressing mode of DForm instructions (a base register, plus a
265 /// signed 16-bit immediate.
266 bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
267 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, DAG&: *CurDAG,
268 Align: std::nullopt) == PPC::AM_DForm;
269 }
270
271 /// SelectPCRelForm - Returns true if address N can be represented by
272 /// PC-Relative addressing mode.
273 bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp,
274 SDValue &Base) {
275 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, DAG&: *CurDAG,
276 Align: std::nullopt) == PPC::AM_PCRel;
277 }
278
279 /// SelectPDForm - Returns true if address N can be represented by Prefixed
280 /// DForm addressing mode (a base register, plus a signed 34-bit immediate.
281 bool SelectPDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
282 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, DAG&: *CurDAG,
283 Align: std::nullopt) ==
284 PPC::AM_PrefixDForm;
285 }
286
287 /// SelectXForm - Returns true if address N can be represented by the
288 /// addressing mode of XForm instructions (an indexed [r+r] operation).
289 bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
290 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, DAG&: *CurDAG,
291 Align: std::nullopt) == PPC::AM_XForm;
292 }
293
294 /// SelectForceXForm - Given the specified address, force it to be
295 /// represented as an indexed [r+r] operation (an XForm instruction).
296 bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp,
297 SDValue &Base) {
298 return PPCLowering->SelectForceXFormMode(N, Disp, Base, DAG&: *CurDAG) ==
299 PPC::AM_XForm;
300 }
301
302 /// SelectAddrIdx - Given the specified address, check to see if it can be
303 /// represented as an indexed [r+r] operation.
304 /// This is for xform instructions whose associated displacement form is D.
305 /// The last parameter \p 0 means associated D form has no requirment for 16
306 /// bit signed displacement.
307 /// Returns false if it can be represented by [r+imm], which are preferred.
308 bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
309 return PPCLowering->SelectAddressRegReg(N, Base, Index, DAG&: *CurDAG,
310 EncodingAlignment: std::nullopt);
311 }
312
313 /// SelectAddrIdx4 - Given the specified address, check to see if it can be
314 /// represented as an indexed [r+r] operation.
315 /// This is for xform instructions whose associated displacement form is DS.
316 /// The last parameter \p 4 means associated DS form 16 bit signed
317 /// displacement must be a multiple of 4.
318 /// Returns false if it can be represented by [r+imm], which are preferred.
319 bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
320 return PPCLowering->SelectAddressRegReg(N, Base, Index, DAG&: *CurDAG,
321 EncodingAlignment: Align(4));
322 }
323
324 /// SelectAddrIdx16 - Given the specified address, check to see if it can be
325 /// represented as an indexed [r+r] operation.
326 /// This is for xform instructions whose associated displacement form is DQ.
327 /// The last parameter \p 16 means associated DQ form 16 bit signed
328 /// displacement must be a multiple of 16.
329 /// Returns false if it can be represented by [r+imm], which are preferred.
330 bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
331 return PPCLowering->SelectAddressRegReg(N, Base, Index, DAG&: *CurDAG,
332 EncodingAlignment: Align(16));
333 }
334
335 /// SelectAddrIdxOnly - Given the specified address, force it to be
336 /// represented as an indexed [r+r] operation.
337 bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
338 return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, DAG&: *CurDAG);
339 }
340
341 /// SelectAddrImm - Returns true if the address N can be represented by
342 /// a base register plus a signed 16-bit displacement [r+imm].
343 /// The last parameter \p 0 means D form has no requirment for 16 bit signed
344 /// displacement.
345 bool SelectAddrImm(SDValue N, SDValue &Disp,
346 SDValue &Base) {
347 return PPCLowering->SelectAddressRegImm(N, Disp, Base, DAG&: *CurDAG,
348 EncodingAlignment: std::nullopt);
349 }
350
351 /// SelectAddrImmX4 - Returns true if the address N can be represented by
352 /// a base register plus a signed 16-bit displacement that is a multiple of
353 /// 4 (last parameter). Suitable for use by STD and friends.
354 bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
355 return PPCLowering->SelectAddressRegImm(N, Disp, Base, DAG&: *CurDAG, EncodingAlignment: Align(4));
356 }
357
358 /// SelectAddrImmX16 - Returns true if the address N can be represented by
359 /// a base register plus a signed 16-bit displacement that is a multiple of
360 /// 16(last parameter). Suitable for use by STXV and friends.
361 bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
362 return PPCLowering->SelectAddressRegImm(N, Disp, Base, DAG&: *CurDAG,
363 EncodingAlignment: Align(16));
364 }
365
366 /// SelectAddrImmX34 - Returns true if the address N can be represented by
367 /// a base register plus a signed 34-bit displacement. Suitable for use by
368 /// PSTXVP and friends.
369 bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {
370 return PPCLowering->SelectAddressRegImm34(N, Disp, Base, DAG&: *CurDAG);
371 }
372
373 // Select an address into a single register.
374 bool SelectAddr(SDValue N, SDValue &Base) {
375 Base = N;
376 return true;
377 }
378
379 bool SelectAddrPCRel(SDValue N, SDValue &Base) {
380 return PPCLowering->SelectAddressPCRel(N, Base);
381 }
382
383 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
384 /// inline asm expressions. It is always correct to compute the value into
385 /// a register. The case of adding a (possibly relocatable) constant to a
386 /// register can be improved, but it is wrong to substitute Reg+Reg for
387 /// Reg in an asm, because the load or store opcode would have to change.
388 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
389 InlineAsm::ConstraintCode ConstraintID,
390 std::vector<SDValue> &OutOps) override {
391 switch(ConstraintID) {
392 default:
393 errs() << "ConstraintID: "
394 << InlineAsm::getMemConstraintName(C: ConstraintID) << "\n";
395 llvm_unreachable("Unexpected asm memory constraint");
396 case InlineAsm::ConstraintCode::es:
397 case InlineAsm::ConstraintCode::m:
398 case InlineAsm::ConstraintCode::o:
399 case InlineAsm::ConstraintCode::Q:
400 case InlineAsm::ConstraintCode::Z:
401 case InlineAsm::ConstraintCode::Zy:
402 // We need to make sure that this one operand does not end up in r0
403 // (because we might end up lowering this as 0(%op)).
404 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
405 const TargetRegisterClass *TRC = TRI->getPointerRegClass(/*Kind=*/1);
406 SDLoc dl(Op);
407 SDValue RC = CurDAG->getTargetConstant(Val: TRC->getID(), DL: dl, VT: MVT::i32);
408 SDValue NewOp =
409 SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS,
410 dl, VT: Op.getValueType(),
411 Op1: Op, Op2: RC), 0);
412
413 OutOps.push_back(x: NewOp);
414 return false;
415 }
416 return true;
417 }
418
419// Include the pieces autogenerated from the target description.
420#include "PPCGenDAGISel.inc"
421
422private:
423 bool trySETCC(SDNode *N);
424 bool tryFoldSWTestBRCC(SDNode *N);
425 bool trySelectLoopCountIntrinsic(SDNode *N);
426 bool tryAsSingleRLDICL(SDNode *N);
427 bool tryAsSingleRLDCL(SDNode *N);
428 bool tryAsSingleRLDICR(SDNode *N);
429 bool tryAsSingleRLWINM(SDNode *N);
430 bool tryAsSingleRLWINM8(SDNode *N);
431 bool tryAsSingleRLWIMI(SDNode *N);
432 bool tryAsPairOfRLDICL(SDNode *N);
433 bool tryAsSingleRLDIMI(SDNode *N);
434
435 void PeepholePPC64();
436 void PeepholePPC64ZExt();
437 void PeepholeCROps();
438
439 SDValue combineToCMPB(SDNode *N);
440 void foldBoolExts(SDValue &Res, SDNode *&N);
441
442 bool AllUsersSelectZero(SDNode *N);
443 void SwapAllSelectUsers(SDNode *N);
444
445 bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
446 void transferMemOperands(SDNode *N, SDNode *Result);
447 };
448
449 class PPCDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
450 public:
451 static char ID;
452 explicit PPCDAGToDAGISelLegacy(PPCTargetMachine &tm,
453 CodeGenOptLevel OptLevel)
454 : SelectionDAGISelLegacy(
455 ID, std::make_unique<PPCDAGToDAGISel>(args&: tm, args&: OptLevel)) {}
456 };
457} // end anonymous namespace
458
459char PPCDAGToDAGISelLegacy::ID = 0;
460
461INITIALIZE_PASS(PPCDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
462
463/// getGlobalBaseReg - Output the instructions required to put the
464/// base address to use for accessing globals into a register.
465///
466SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
467 if (!GlobalBaseReg) {
468 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
469 // Insert the set of GlobalBaseReg into the first MBB of the function
470 MachineBasicBlock &FirstMBB = MF->front();
471 MachineBasicBlock::iterator MBBI = FirstMBB.begin();
472 const Module *M = MF->getFunction().getParent();
473 DebugLoc dl;
474
475 if (PPCLowering->getPointerTy(DL: CurDAG->getDataLayout()) == MVT::i32) {
476 if (Subtarget->isTargetELF()) {
477 GlobalBaseReg = PPC::R30;
478 if (!Subtarget->isSecurePlt() &&
479 M->getPICLevel() == PICLevel::SmallPIC) {
480 BuildMI(BB&: FirstMBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::MoveGOTtoLR));
481 BuildMI(BB&: FirstMBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::MFLR), DestReg: GlobalBaseReg);
482 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
483 } else {
484 BuildMI(BB&: FirstMBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::MovePCtoLR));
485 BuildMI(BB&: FirstMBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::MFLR), DestReg: GlobalBaseReg);
486 Register TempReg = RegInfo->createVirtualRegister(RegClass: &PPC::GPRCRegClass);
487 BuildMI(BB&: FirstMBB, I: MBBI, MIMD: dl,
488 MCID: TII.get(Opcode: PPC::UpdateGBR), DestReg: GlobalBaseReg)
489 .addReg(RegNo: TempReg, Flags: RegState::Define).addReg(RegNo: GlobalBaseReg);
490 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
491 }
492 } else {
493 GlobalBaseReg =
494 RegInfo->createVirtualRegister(RegClass: &PPC::GPRC_and_GPRC_NOR0RegClass);
495 BuildMI(BB&: FirstMBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::MovePCtoLR));
496 BuildMI(BB&: FirstMBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::MFLR), DestReg: GlobalBaseReg);
497 }
498 } else {
499 // We must ensure that this sequence is dominated by the prologue.
500 // FIXME: This is a bit of a big hammer since we don't get the benefits
501 // of shrink-wrapping whenever we emit this instruction. Considering
502 // this is used in any function where we emit a jump table, this may be
503 // a significant limitation. We should consider inserting this in the
504 // block where it is used and then commoning this sequence up if it
505 // appears in multiple places.
506 // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
507 // MovePCtoLR8.
508 MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
509 GlobalBaseReg = RegInfo->createVirtualRegister(RegClass: &PPC::G8RC_and_G8RC_NOX0RegClass);
510 BuildMI(BB&: FirstMBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::MovePCtoLR8));
511 BuildMI(BB&: FirstMBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::MFLR8), DestReg: GlobalBaseReg);
512 }
513 }
514 return CurDAG->getRegister(Reg: GlobalBaseReg,
515 VT: PPCLowering->getPointerTy(DL: CurDAG->getDataLayout()))
516 .getNode();
517}
518
519// Check if a SDValue has the toc-data attribute.
520static bool hasTocDataAttr(SDValue Val) {
521 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val);
522 if (!GA)
523 return false;
524
525 const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(Val: GA->getGlobal());
526 if (!GV)
527 return false;
528
529 if (!GV->hasAttribute(Kind: "toc-data"))
530 return false;
531 return true;
532}
533
534static CodeModel::Model getCodeModel(const PPCSubtarget &Subtarget,
535 const TargetMachine &TM,
536 const SDNode *Node) {
537 // If there isn't an attribute to override the module code model
538 // this will be the effective code model.
539 CodeModel::Model ModuleModel = TM.getCodeModel();
540
541 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val: Node->getOperand(Num: 0));
542 if (!GA)
543 return ModuleModel;
544
545 const GlobalValue *GV = GA->getGlobal();
546 if (!GV)
547 return ModuleModel;
548
549 return Subtarget.getCodeModel(TM, GV);
550}
551
552/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
553/// operand. If so Imm will receive the 32-bit value.
554static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
555 if (N->getOpcode() == ISD::Constant && N->getValueType(ResNo: 0) == MVT::i32) {
556 Imm = N->getAsZExtVal();
557 return true;
558 }
559 return false;
560}
561
562/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
563/// operand. If so Imm will receive the 64-bit value.
564static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
565 if (N->getOpcode() == ISD::Constant && N->getValueType(ResNo: 0) == MVT::i64) {
566 Imm = N->getAsZExtVal();
567 return true;
568 }
569 return false;
570}
571
572// isInt32Immediate - This method tests to see if a constant operand.
573// If so Imm will receive the 32 bit value.
574static bool isInt32Immediate(SDValue N, unsigned &Imm) {
575 return isInt32Immediate(N: N.getNode(), Imm);
576}
577
578/// isInt64Immediate - This method tests to see if the value is a 64-bit
579/// constant operand. If so Imm will receive the 64-bit value.
580static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
581 return isInt64Immediate(N: N.getNode(), Imm);
582}
583
584static unsigned getBranchHint(unsigned PCC,
585 const FunctionLoweringInfo &FuncInfo,
586 const SDValue &DestMBB) {
587 assert(isa<BasicBlockSDNode>(DestMBB));
588
589 if (!FuncInfo.BPI) return PPC::BR_NO_HINT;
590
591 const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();
592 const Instruction *BBTerm = BB->getTerminator();
593
594 if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
595
596 const BasicBlock *TBB = BBTerm->getSuccessor(Idx: 0);
597 const BasicBlock *FBB = BBTerm->getSuccessor(Idx: 1);
598
599 auto TProb = FuncInfo.BPI->getEdgeProbability(Src: BB, Dst: TBB);
600 auto FProb = FuncInfo.BPI->getEdgeProbability(Src: BB, Dst: FBB);
601
602 // We only want to handle cases which are easy to predict at static time, e.g.
603 // C++ throw statement, that is very likely not taken, or calling never
604 // returned function, e.g. stdlib exit(). So we set Threshold to filter
605 // unwanted cases.
606 //
607 // Below is LLVM branch weight table, we only want to handle case 1, 2
608 //
609 // Case Taken:Nontaken Example
610 // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
611 // 2. Invoke-terminating 1:1048575
612 // 3. Coldblock 4:64 __builtin_expect
613 // 4. Loop Branch 124:4 For loop
614 // 5. PH/ZH/FPH 20:12
615 const uint32_t Threshold = 10000;
616
617 if (std::max(a: TProb, b: FProb) / Threshold < std::min(a: TProb, b: FProb))
618 return PPC::BR_NO_HINT;
619
620 LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()
621 << "::" << BB->getName() << "'\n"
622 << " -> " << TBB->getName() << ": " << TProb << "\n"
623 << " -> " << FBB->getName() << ": " << FProb << "\n");
624
625 const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(Val: DestMBB);
626
627 // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
628 // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
629 if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
630 std::swap(a&: TProb, b&: FProb);
631
632 return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
633}
634
635// isOpcWithIntImmediate - This method tests to see if the node is a specific
636// opcode and that it has a immediate integer right operand.
637// If so Imm will receive the 32 bit value.
638static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
639 return N->getOpcode() == Opc
640 && isInt32Immediate(N: N->getOperand(Num: 1).getNode(), Imm);
641}
642
643void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, int64_t Offset) {
644 SDLoc dl(SN);
645 int FI = cast<FrameIndexSDNode>(Val: N)->getIndex();
646 SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT: N->getValueType(ResNo: 0));
647 unsigned Opc = N->getValueType(ResNo: 0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
648 if (SN->hasOneUse())
649 CurDAG->SelectNodeTo(N: SN, MachineOpc: Opc, VT: N->getValueType(ResNo: 0), Op1: TFI,
650 Op2: getSmallIPtrImm(Imm: Offset, dl));
651 else
652 ReplaceNode(F: SN, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT: N->getValueType(ResNo: 0), Op1: TFI,
653 Op2: getSmallIPtrImm(Imm: Offset, dl)));
654}
655
656bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
657 bool isShiftMask, unsigned &SH,
658 unsigned &MB, unsigned &ME) {
659 // Don't even go down this path for i64, since different logic will be
660 // necessary for rldicl/rldicr/rldimi.
661 if (N->getValueType(ResNo: 0) != MVT::i32)
662 return false;
663
664 unsigned Shift = 32;
665 unsigned Indeterminant = ~0; // bit mask marking indeterminant results
666 unsigned Opcode = N->getOpcode();
667 if (N->getNumOperands() != 2 ||
668 !isInt32Immediate(N: N->getOperand(Num: 1).getNode(), Imm&: Shift) || (Shift > 31))
669 return false;
670
671 if (Opcode == ISD::SHL) {
672 // apply shift left to mask if it comes first
673 if (isShiftMask) Mask = Mask << Shift;
674 // determine which bits are made indeterminant by shift
675 Indeterminant = ~(0xFFFFFFFFu << Shift);
676 } else if (Opcode == ISD::SRL) {
677 // apply shift right to mask if it comes first
678 if (isShiftMask) Mask = Mask >> Shift;
679 // determine which bits are made indeterminant by shift
680 Indeterminant = ~(0xFFFFFFFFu >> Shift);
681 // adjust for the left rotate
682 Shift = 32 - Shift;
683 } else if (Opcode == ISD::ROTL) {
684 Indeterminant = 0;
685 } else {
686 return false;
687 }
688
689 // if the mask doesn't intersect any Indeterminant bits
690 if (Mask && !(Mask & Indeterminant)) {
691 SH = Shift & 31;
692 // make sure the mask is still a mask (wrap arounds may not be)
693 return isRunOfOnes(Val: Mask, MB, ME);
694 }
695 return false;
696}
697
698// isThreadPointerAcquisitionNode - Check if the operands of an ADD_TLS
699// instruction use the thread pointer.
700static bool isThreadPointerAcquisitionNode(SDValue Base, SelectionDAG *CurDAG) {
701 assert(
702 Base.getOpcode() == PPCISD::ADD_TLS &&
703 "Only expecting the ADD_TLS instruction to acquire the thread pointer!");
704 const PPCSubtarget &Subtarget =
705 CurDAG->getMachineFunction().getSubtarget<PPCSubtarget>();
706 SDValue ADDTLSOp1 = Base.getOperand(i: 0);
707 unsigned ADDTLSOp1Opcode = ADDTLSOp1.getOpcode();
708
709 // Account for when ADD_TLS is used for the initial-exec TLS model on Linux.
710 //
711 // Although ADD_TLS does not explicitly use the thread pointer
712 // register when LD_GOT_TPREL_L is one of it's operands, the LD_GOT_TPREL_L
713 // instruction will have a relocation specifier, @got@tprel, that is used to
714 // generate a GOT entry. The linker replaces this entry with an offset for a
715 // for a thread local variable, which will be relative to the thread pointer.
716 if (ADDTLSOp1Opcode == PPCISD::LD_GOT_TPREL_L)
717 return true;
718 // When using PC-Relative instructions for initial-exec, a MAT_PCREL_ADDR
719 // node is produced instead to represent the aforementioned situation.
720 LoadSDNode *LD = dyn_cast<LoadSDNode>(Val&: ADDTLSOp1);
721 if (LD && LD->getBasePtr().getOpcode() == PPCISD::MAT_PCREL_ADDR)
722 return true;
723
724 // A GET_TPOINTER PPCISD node (only produced on AIX 32-bit mode) as an operand
725 // to ADD_TLS represents a call to .__get_tpointer to get the thread pointer,
726 // later returning it into R3.
727 if (ADDTLSOp1Opcode == PPCISD::GET_TPOINTER)
728 return true;
729
730 // The ADD_TLS note is explicitly acquiring the thread pointer (X13/R13).
731 RegisterSDNode *AddFirstOpReg =
732 dyn_cast_or_null<RegisterSDNode>(Val: ADDTLSOp1.getNode());
733 if (AddFirstOpReg &&
734 AddFirstOpReg->getReg() == Subtarget.getThreadPointerRegister())
735 return true;
736
737 return false;
738}
739
740// canOptimizeTLSDFormToXForm - Optimize TLS accesses when an ADD_TLS
741// instruction is present. An ADD_TLS instruction, followed by a D-Form memory
742// operation, can be optimized to use an X-Form load or store, allowing the
743// ADD_TLS node to be removed completely.
744static bool canOptimizeTLSDFormToXForm(SelectionDAG *CurDAG, SDValue Base) {
745
746 // Do not do this transformation at -O0.
747 if (CurDAG->getTarget().getOptLevel() == CodeGenOptLevel::None)
748 return false;
749
750 // In order to perform this optimization inside tryTLSXForm[Load|Store],
751 // Base is expected to be an ADD_TLS node.
752 if (Base.getOpcode() != PPCISD::ADD_TLS)
753 return false;
754 for (auto *ADDTLSUse : Base.getNode()->users()) {
755 // The optimization to convert the D-Form load/store into its X-Form
756 // counterpart should only occur if the source value offset of the load/
757 // store is 0. This also means that The offset should always be undefined.
758 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: ADDTLSUse)) {
759 if (LD->getSrcValueOffset() != 0 || !LD->getOffset().isUndef())
760 return false;
761 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: ADDTLSUse)) {
762 if (ST->getSrcValueOffset() != 0 || !ST->getOffset().isUndef())
763 return false;
764 } else // Don't optimize if there are ADD_TLS users that aren't load/stores.
765 return false;
766 }
767
768 if (Base.getOperand(i: 1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
769 return false;
770
771 // Does the ADD_TLS node of the load/store use the thread pointer?
772 // If the thread pointer is not used as one of the operands of ADD_TLS,
773 // then this optimization is not valid.
774 return isThreadPointerAcquisitionNode(Base, CurDAG);
775}
776
777bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
778 SDValue Base = ST->getBasePtr();
779 if (!canOptimizeTLSDFormToXForm(CurDAG, Base))
780 return false;
781
782 SDLoc dl(ST);
783 EVT MemVT = ST->getMemoryVT();
784 EVT RegVT = ST->getValue().getValueType();
785
786 unsigned Opcode;
787 switch (MemVT.getSimpleVT().SimpleTy) {
788 default:
789 return false;
790 case MVT::i8: {
791 Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
792 break;
793 }
794 case MVT::i16: {
795 Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
796 break;
797 }
798 case MVT::i32: {
799 Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
800 break;
801 }
802 case MVT::i64: {
803 Opcode = PPC::STDXTLS;
804 break;
805 }
806 case MVT::f32: {
807 Opcode = PPC::STFSXTLS;
808 break;
809 }
810 case MVT::f64: {
811 Opcode = PPC::STFDXTLS;
812 break;
813 }
814 }
815 SDValue Chain = ST->getChain();
816 SDVTList VTs = ST->getVTList();
817 SDValue Ops[] = {ST->getValue(), Base.getOperand(i: 0), Base.getOperand(i: 1),
818 Chain};
819 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
820 transferMemOperands(N: ST, Result: MN);
821 ReplaceNode(F: ST, T: MN);
822 return true;
823}
824
825bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
826 SDValue Base = LD->getBasePtr();
827 if (!canOptimizeTLSDFormToXForm(CurDAG, Base))
828 return false;
829
830 SDLoc dl(LD);
831 EVT MemVT = LD->getMemoryVT();
832 EVT RegVT = LD->getValueType(ResNo: 0);
833 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
834 unsigned Opcode;
835 switch (MemVT.getSimpleVT().SimpleTy) {
836 default:
837 return false;
838 case MVT::i8: {
839 Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
840 break;
841 }
842 case MVT::i16: {
843 if (RegVT == MVT::i32)
844 Opcode = isSExt ? PPC::LHAXTLS_32 : PPC::LHZXTLS_32;
845 else
846 Opcode = isSExt ? PPC::LHAXTLS : PPC::LHZXTLS;
847 break;
848 }
849 case MVT::i32: {
850 if (RegVT == MVT::i32)
851 Opcode = isSExt ? PPC::LWAXTLS_32 : PPC::LWZXTLS_32;
852 else
853 Opcode = isSExt ? PPC::LWAXTLS : PPC::LWZXTLS;
854 break;
855 }
856 case MVT::i64: {
857 Opcode = PPC::LDXTLS;
858 break;
859 }
860 case MVT::f32: {
861 Opcode = PPC::LFSXTLS;
862 break;
863 }
864 case MVT::f64: {
865 Opcode = PPC::LFDXTLS;
866 break;
867 }
868 }
869 SDValue Chain = LD->getChain();
870 SDVTList VTs = LD->getVTList();
871 SDValue Ops[] = {Base.getOperand(i: 0), Base.getOperand(i: 1), Chain};
872 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
873 transferMemOperands(N: LD, Result: MN);
874 ReplaceNode(F: LD, T: MN);
875 return true;
876}
877
878/// Turn an or of two masked values into the rotate left word immediate then
879/// mask insert (rlwimi) instruction.
880bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
881 SDValue Op0 = N->getOperand(Num: 0);
882 SDValue Op1 = N->getOperand(Num: 1);
883 SDLoc dl(N);
884
885 // If either operand is a constant, let ORI/ORIS/ADDI/ADDIS tablegen
886 // patterns handle it — they produce a single instruction without the
887 // tied-register constraint that RLWIMI requires.
888 if (isa<ConstantSDNode>(Val: Op0) || isa<ConstantSDNode>(Val: Op1))
889 return false;
890
891 KnownBits LKnown = CurDAG->computeKnownBits(Op: Op0);
892 KnownBits RKnown = CurDAG->computeKnownBits(Op: Op1);
893
894 unsigned TargetMask = LKnown.Zero.getZExtValue();
895 unsigned InsertMask = RKnown.Zero.getZExtValue();
896
897 if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
898 unsigned Op0Opc = Op0.getOpcode();
899 unsigned Op1Opc = Op1.getOpcode();
900 unsigned Value, SH = 0;
901 TargetMask = ~TargetMask;
902 InsertMask = ~InsertMask;
903
904 // If the LHS has a foldable shift and the RHS does not, then swap it to the
905 // RHS so that we can fold the shift into the insert.
906 if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
907 if (Op0.getOperand(i: 0).getOpcode() == ISD::SHL ||
908 Op0.getOperand(i: 0).getOpcode() == ISD::SRL) {
909 if (Op1.getOperand(i: 0).getOpcode() != ISD::SHL &&
910 Op1.getOperand(i: 0).getOpcode() != ISD::SRL) {
911 std::swap(a&: Op0, b&: Op1);
912 std::swap(a&: Op0Opc, b&: Op1Opc);
913 std::swap(a&: TargetMask, b&: InsertMask);
914 }
915 }
916 } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
917 if (Op1Opc == ISD::AND && Op1.getOperand(i: 0).getOpcode() != ISD::SHL &&
918 Op1.getOperand(i: 0).getOpcode() != ISD::SRL) {
919 std::swap(a&: Op0, b&: Op1);
920 std::swap(a&: Op0Opc, b&: Op1Opc);
921 std::swap(a&: TargetMask, b&: InsertMask);
922 }
923 }
924
925 unsigned MB, ME;
926 if (isRunOfOnes(Val: InsertMask, MB, ME)) {
927 if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
928 isInt32Immediate(N: Op1.getOperand(i: 1), Imm&: Value)) {
929 Op1 = Op1.getOperand(i: 0);
930 SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
931 }
932 if (Op1Opc == ISD::AND) {
933 // The AND mask might not be a constant, and we need to make sure that
934 // if we're going to fold the masking with the insert, all bits not
935 // know to be zero in the mask are known to be one.
936 KnownBits MKnown = CurDAG->computeKnownBits(Op: Op1.getOperand(i: 1));
937 bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
938
939 unsigned SHOpc = Op1.getOperand(i: 0).getOpcode();
940 if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
941 isInt32Immediate(N: Op1.getOperand(i: 0).getOperand(i: 1), Imm&: Value)) {
942 // Note that Value must be in range here (less than 32) because
943 // otherwise there would not be any bits set in InsertMask.
944 Op1 = Op1.getOperand(i: 0).getOperand(i: 0);
945 SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
946 }
947 }
948
949 SH &= 31;
950 SDValue Ops[] = { Op0, Op1, getI32Imm(Imm: SH, dl), getI32Imm(Imm: MB, dl),
951 getI32Imm(Imm: ME, dl) };
952 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: PPC::RLWIMI, dl, VT: MVT::i32, Ops));
953 return true;
954 }
955 }
956 return false;
957}
958
959static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
960 unsigned MaxTruncation = 0;
961 // Cannot use range-based for loop here as we need the actual use (i.e. we
962 // need the operand number corresponding to the use). A range-based for
963 // will unbox the use and provide an SDNode*.
964 for (SDUse &Use : N->uses()) {
965 SDNode *User = Use.getUser();
966 unsigned Opc =
967 User->isMachineOpcode() ? User->getMachineOpcode() : User->getOpcode();
968 switch (Opc) {
969 default: return 0;
970 case ISD::TRUNCATE:
971 if (User->isMachineOpcode())
972 return 0;
973 MaxTruncation = std::max(a: MaxTruncation,
974 b: (unsigned)User->getValueType(ResNo: 0).getSizeInBits());
975 continue;
976 case ISD::STORE: {
977 if (User->isMachineOpcode())
978 return 0;
979 StoreSDNode *STN = cast<StoreSDNode>(Val: User);
980 unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
981 if (MemVTSize == 64 || Use.getOperandNo() != 0)
982 return 0;
983 MaxTruncation = std::max(a: MaxTruncation, b: MemVTSize);
984 continue;
985 }
986 case PPC::STW8:
987 case PPC::STWX8:
988 case PPC::STWU8:
989 case PPC::STWUX8:
990 if (Use.getOperandNo() != 0)
991 return 0;
992 MaxTruncation = std::max(a: MaxTruncation, b: 32u);
993 continue;
994 case PPC::STH8:
995 case PPC::STHX8:
996 case PPC::STHU8:
997 case PPC::STHUX8:
998 if (Use.getOperandNo() != 0)
999 return 0;
1000 MaxTruncation = std::max(a: MaxTruncation, b: 16u);
1001 continue;
1002 case PPC::STB8:
1003 case PPC::STBX8:
1004 case PPC::STBU8:
1005 case PPC::STBUX8:
1006 if (Use.getOperandNo() != 0)
1007 return 0;
1008 MaxTruncation = std::max(a: MaxTruncation, b: 8u);
1009 continue;
1010 }
1011 }
1012 return MaxTruncation;
1013}
1014
1015// For any 32 < Num < 64, check if the Imm contains at least Num consecutive
1016// zeros and return the number of bits by the left of these consecutive zeros.
1017static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
1018 unsigned HiTZ = llvm::countr_zero<uint32_t>(Val: Hi_32(Value: Imm));
1019 unsigned LoLZ = llvm::countl_zero<uint32_t>(Val: Lo_32(Value: Imm));
1020 if ((HiTZ + LoLZ) >= Num)
1021 return (32 + HiTZ);
1022 return 0;
1023}
1024
1025// Direct materialization of 64-bit constants by enumerated patterns.
1026static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
1027 uint64_t Imm, unsigned &InstCnt) {
1028 unsigned TZ = llvm::countr_zero<uint64_t>(Val: Imm);
1029 unsigned LZ = llvm::countl_zero<uint64_t>(Val: Imm);
1030 unsigned TO = llvm::countr_one<uint64_t>(Value: Imm);
1031 unsigned LO = llvm::countl_one<uint64_t>(Value: Imm);
1032 unsigned Hi32 = Hi_32(Value: Imm);
1033 unsigned Lo32 = Lo_32(Value: Imm);
1034 SDNode *Result = nullptr;
1035 unsigned Shift = 0;
1036
1037 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1038 return CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32);
1039 };
1040
1041 // Following patterns use 1 instructions to materialize the Imm.
1042 InstCnt = 1;
1043 // 1-1) Patterns : {zeros}{15-bit valve}
1044 // {ones}{15-bit valve}
1045 if (isInt<16>(x: Imm)) {
1046 SDValue SDImm = CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i64);
1047 return CurDAG->getMachineNode(Opcode: PPC::LI8, dl, VT: MVT::i64, Op1: SDImm);
1048 }
1049 // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
1050 // {ones}{15-bit valve}{16 zeros}
1051 if (TZ > 15 && (LZ > 32 || LO > 32))
1052 return CurDAG->getMachineNode(Opcode: PPC::LIS8, dl, VT: MVT::i64,
1053 Op1: getI32Imm((Imm >> 16) & 0xffff));
1054
1055 // Following patterns use 2 instructions to materialize the Imm.
1056 InstCnt = 2;
1057 assert(LZ < 64 && "Unexpected leading zeros here.");
1058 // Count of ones follwing the leading zeros.
1059 unsigned FO = llvm::countl_one<uint64_t>(Value: Imm << LZ);
1060 // 2-1) Patterns : {zeros}{31-bit value}
1061 // {ones}{31-bit value}
1062 if (isInt<32>(x: Imm)) {
1063 uint64_t ImmHi16 = (Imm >> 16) & 0xffff;
1064 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1065 Result = CurDAG->getMachineNode(Opcode, dl, VT: MVT::i64, Op1: getI32Imm(ImmHi16));
1066 return CurDAG->getMachineNode(Opcode: PPC::ORI8, dl, VT: MVT::i64, Op1: SDValue(Result, 0),
1067 Op2: getI32Imm(Imm & 0xffff));
1068 }
1069 // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
1070 // {zeros}{15-bit value}{zeros}
1071 // {zeros}{ones}{15-bit value}
1072 // {ones}{15-bit value}{zeros}
1073 // We can take advantage of LI's sign-extension semantics to generate leading
1074 // ones, and then use RLDIC to mask off the ones in both sides after rotation.
1075 if ((LZ + FO + TZ) > 48) {
1076 Result = CurDAG->getMachineNode(Opcode: PPC::LI8, dl, VT: MVT::i64,
1077 Op1: getI32Imm((Imm >> TZ) & 0xffff));
1078 return CurDAG->getMachineNode(Opcode: PPC::RLDIC, dl, VT: MVT::i64, Op1: SDValue(Result, 0),
1079 Op2: getI32Imm(TZ), Op3: getI32Imm(LZ));
1080 }
1081 // 2-3) Pattern : {zeros}{15-bit value}{ones}
1082 // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
1083 // therefore we can take advantage of LI's sign-extension semantics, and then
1084 // mask them off after rotation.
1085 //
1086 // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+
1087 // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1088 // +------------------------+ +------------------------+
1089 // 63 0 63 0
1090 // Imm (Imm >> (48 - LZ) & 0xffff)
1091 // +----sext-----|--16-bit--+ +clear-|-----------------+
1092 // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1093 // +------------------------+ +------------------------+
1094 // 63 0 63 0
1095 // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ
1096 if ((LZ + TO) > 48) {
1097 // Since the immediates with (LZ > 32) have been handled by previous
1098 // patterns, here we have (LZ <= 32) to make sure we will not shift right
1099 // the Imm by a negative value.
1100 assert(LZ <= 32 && "Unexpected shift value.");
1101 Result = CurDAG->getMachineNode(Opcode: PPC::LI8, dl, VT: MVT::i64,
1102 Op1: getI32Imm((Imm >> (48 - LZ) & 0xffff)));
1103 return CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64, Op1: SDValue(Result, 0),
1104 Op2: getI32Imm(48 - LZ), Op3: getI32Imm(LZ));
1105 }
1106 // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}
1107 // {ones}{15-bit value}{ones}
1108 // We can take advantage of LI's sign-extension semantics to generate leading
1109 // ones, and then use RLDICL to mask off the ones in left sides (if required)
1110 // after rotation.
1111 //
1112 // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+
1113 // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb|
1114 // +------------------------+ +------------------------+
1115 // 63 0 63 0
1116 // Imm (Imm >> TO) & 0xffff
1117 // +----sext-----|--16-bit--+ +LZ|---------------------+
1118 // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111|
1119 // +------------------------+ +------------------------+
1120 // 63 0 63 0
1121 // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ
1122 if ((LZ + FO + TO) > 48) {
1123 Result = CurDAG->getMachineNode(Opcode: PPC::LI8, dl, VT: MVT::i64,
1124 Op1: getI32Imm((Imm >> TO) & 0xffff));
1125 return CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64, Op1: SDValue(Result, 0),
1126 Op2: getI32Imm(TO), Op3: getI32Imm(LZ));
1127 }
1128 // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}
1129 // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit
1130 // value, we can use LI for Lo16 without generating leading ones then add the
1131 // Hi16(in Lo32).
1132 if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {
1133 Result = CurDAG->getMachineNode(Opcode: PPC::LI8, dl, VT: MVT::i64,
1134 Op1: getI32Imm(Lo32 & 0xffff));
1135 return CurDAG->getMachineNode(Opcode: PPC::ORIS8, dl, VT: MVT::i64, Op1: SDValue(Result, 0),
1136 Op2: getI32Imm(Lo32 >> 16));
1137 }
1138 // 2-6) Patterns : {******}{49 zeros}{******}
1139 // {******}{49 ones}{******}
1140 // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15
1141 // bits remain on both sides. Rotate right the Imm to construct an int<16>
1142 // value, use LI for int<16> value and then use RLDICL without mask to rotate
1143 // it back.
1144 //
1145 // 1) findContiguousZerosAtLeast(Imm, 49)
1146 // +------|--zeros-|------+ +---ones--||---15 bit--+
1147 // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb|
1148 // +----------------------+ +----------------------+
1149 // 63 0 63 0
1150 //
1151 // 2) findContiguousZerosAtLeast(~Imm, 49)
1152 // +------|--ones--|------+ +---ones--||---15 bit--+
1153 // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1154 // +----------------------+ +----------------------+
1155 // 63 0 63 0
1156 if ((Shift = findContiguousZerosAtLeast(Imm, Num: 49)) ||
1157 (Shift = findContiguousZerosAtLeast(Imm: ~Imm, Num: 49))) {
1158 uint64_t RotImm = APInt(64, Imm).rotr(rotateAmt: Shift).getZExtValue();
1159 Result = CurDAG->getMachineNode(Opcode: PPC::LI8, dl, VT: MVT::i64,
1160 Op1: getI32Imm(RotImm & 0xffff));
1161 return CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64, Op1: SDValue(Result, 0),
1162 Op2: getI32Imm(Shift), Op3: getI32Imm(0));
1163 }
1164 // 2-7) Patterns : High word == Low word
1165 // This may require 2 to 3 instructions, depending on whether Lo32 can be
1166 // materialized in 1 instruction.
1167 if (Hi32 == Lo32) {
1168 // Handle the first 32 bits.
1169 uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
1170 uint64_t ImmLo16 = Lo32 & 0xffff;
1171 if (isInt<16>(x: Lo32))
1172 Result =
1173 CurDAG->getMachineNode(Opcode: PPC::LI8, dl, VT: MVT::i64, Op1: getI32Imm(ImmLo16));
1174 else if (!ImmLo16)
1175 Result =
1176 CurDAG->getMachineNode(Opcode: PPC::LIS8, dl, VT: MVT::i64, Op1: getI32Imm(ImmHi16));
1177 else {
1178 InstCnt = 3;
1179 Result =
1180 CurDAG->getMachineNode(Opcode: PPC::LIS8, dl, VT: MVT::i64, Op1: getI32Imm(ImmHi16));
1181 Result = CurDAG->getMachineNode(Opcode: PPC::ORI8, dl, VT: MVT::i64,
1182 Op1: SDValue(Result, 0), Op2: getI32Imm(ImmLo16));
1183 }
1184 // Use rldimi to insert the Low word into High word.
1185 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1186 getI32Imm(0)};
1187 return CurDAG->getMachineNode(Opcode: PPC::RLDIMI, dl, VT: MVT::i64, Ops);
1188 }
1189
1190 // Following patterns use 3 instructions to materialize the Imm.
1191 InstCnt = 3;
1192 // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}
1193 // {zeros}{31-bit value}{zeros}
1194 // {zeros}{ones}{31-bit value}
1195 // {ones}{31-bit value}{zeros}
1196 // We can take advantage of LIS's sign-extension semantics to generate leading
1197 // ones, add the remaining bits with ORI, and then use RLDIC to mask off the
1198 // ones in both sides after rotation.
1199 if ((LZ + FO + TZ) > 32) {
1200 uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;
1201 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1202 Result = CurDAG->getMachineNode(Opcode, dl, VT: MVT::i64, Op1: getI32Imm(ImmHi16));
1203 Result = CurDAG->getMachineNode(Opcode: PPC::ORI8, dl, VT: MVT::i64, Op1: SDValue(Result, 0),
1204 Op2: getI32Imm((Imm >> TZ) & 0xffff));
1205 return CurDAG->getMachineNode(Opcode: PPC::RLDIC, dl, VT: MVT::i64, Op1: SDValue(Result, 0),
1206 Op2: getI32Imm(TZ), Op3: getI32Imm(LZ));
1207 }
1208 // 3-2) Pattern : {zeros}{31-bit value}{ones}
1209 // Shift right the Imm by (32 - LZ) bits to construct a negative 32 bits
1210 // value, therefore we can take advantage of LIS's sign-extension semantics,
1211 // add the remaining bits with ORI, and then mask them off after rotation.
1212 // This is similar to Pattern 2-3, please refer to the diagram there.
1213 if ((LZ + TO) > 32) {
1214 // Since the immediates with (LZ > 32) have been handled by previous
1215 // patterns, here we have (LZ <= 32) to make sure we will not shift right
1216 // the Imm by a negative value.
1217 assert(LZ <= 32 && "Unexpected shift value.");
1218 Result = CurDAG->getMachineNode(Opcode: PPC::LIS8, dl, VT: MVT::i64,
1219 Op1: getI32Imm((Imm >> (48 - LZ)) & 0xffff));
1220 Result = CurDAG->getMachineNode(Opcode: PPC::ORI8, dl, VT: MVT::i64, Op1: SDValue(Result, 0),
1221 Op2: getI32Imm((Imm >> (32 - LZ)) & 0xffff));
1222 return CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64, Op1: SDValue(Result, 0),
1223 Op2: getI32Imm(32 - LZ), Op3: getI32Imm(LZ));
1224 }
1225 // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}
1226 // {ones}{31-bit value}{ones}
1227 // We can take advantage of LIS's sign-extension semantics to generate leading
1228 // ones, add the remaining bits with ORI, and then use RLDICL to mask off the
1229 // ones in left sides (if required) after rotation.
1230 // This is similar to Pattern 2-4, please refer to the diagram there.
1231 if ((LZ + FO + TO) > 32) {
1232 Result = CurDAG->getMachineNode(Opcode: PPC::LIS8, dl, VT: MVT::i64,
1233 Op1: getI32Imm((Imm >> (TO + 16)) & 0xffff));
1234 Result = CurDAG->getMachineNode(Opcode: PPC::ORI8, dl, VT: MVT::i64, Op1: SDValue(Result, 0),
1235 Op2: getI32Imm((Imm >> TO) & 0xffff));
1236 return CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64, Op1: SDValue(Result, 0),
1237 Op2: getI32Imm(TO), Op3: getI32Imm(LZ));
1238 }
1239 // 3-4) Patterns : {******}{33 zeros}{******}
1240 // {******}{33 ones}{******}
1241 // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
1242 // bits remain on both sides. Rotate right the Imm to construct an int<32>
1243 // value, use LIS + ORI for int<32> value and then use RLDICL without mask to
1244 // rotate it back.
1245 // This is similar to Pattern 2-6, please refer to the diagram there.
1246 if ((Shift = findContiguousZerosAtLeast(Imm, Num: 33)) ||
1247 (Shift = findContiguousZerosAtLeast(Imm: ~Imm, Num: 33))) {
1248 uint64_t RotImm = APInt(64, Imm).rotr(rotateAmt: Shift).getZExtValue();
1249 uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;
1250 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1251 Result = CurDAG->getMachineNode(Opcode, dl, VT: MVT::i64, Op1: getI32Imm(ImmHi16));
1252 Result = CurDAG->getMachineNode(Opcode: PPC::ORI8, dl, VT: MVT::i64, Op1: SDValue(Result, 0),
1253 Op2: getI32Imm(RotImm & 0xffff));
1254 return CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64, Op1: SDValue(Result, 0),
1255 Op2: getI32Imm(Shift), Op3: getI32Imm(0));
1256 }
1257
1258 InstCnt = 0;
1259 return nullptr;
1260}
1261
1262// Try to select instructions to generate a 64 bit immediate using prefix as
1263// well as non prefix instructions. The function will return the SDNode
1264// to materialize that constant or it will return nullptr if it does not
1265// find one. The variable InstCnt is set to the number of instructions that
1266// were selected.
1267static SDNode *selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl,
1268 uint64_t Imm, unsigned &InstCnt) {
1269 unsigned TZ = llvm::countr_zero<uint64_t>(Val: Imm);
1270 unsigned LZ = llvm::countl_zero<uint64_t>(Val: Imm);
1271 unsigned TO = llvm::countr_one<uint64_t>(Value: Imm);
1272 unsigned FO = llvm::countl_one<uint64_t>(Value: LZ == 64 ? 0 : (Imm << LZ));
1273 unsigned Hi32 = Hi_32(Value: Imm);
1274 unsigned Lo32 = Lo_32(Value: Imm);
1275
1276 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1277 return CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32);
1278 };
1279
1280 auto getI64Imm = [CurDAG, dl](uint64_t Imm) {
1281 return CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i64);
1282 };
1283
1284 // Following patterns use 1 instruction to materialize Imm.
1285 InstCnt = 1;
1286
1287 // The pli instruction can materialize up to 34 bits directly.
1288 // If a constant fits within 34-bits, emit the pli instruction here directly.
1289 if (isInt<34>(x: Imm))
1290 return CurDAG->getMachineNode(Opcode: PPC::PLI8, dl, VT: MVT::i64,
1291 Op1: CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i64));
1292
1293 // Require at least two instructions.
1294 InstCnt = 2;
1295 SDNode *Result = nullptr;
1296 // Patterns : {zeros}{ones}{33-bit value}{zeros}
1297 // {zeros}{33-bit value}{zeros}
1298 // {zeros}{ones}{33-bit value}
1299 // {ones}{33-bit value}{zeros}
1300 // We can take advantage of PLI's sign-extension semantics to generate leading
1301 // ones, and then use RLDIC to mask off the ones on both sides after rotation.
1302 if ((LZ + FO + TZ) > 30) {
1303 APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff);
1304 APInt Extended = SignedInt34.sext(width: 64);
1305 Result = CurDAG->getMachineNode(Opcode: PPC::PLI8, dl, VT: MVT::i64,
1306 Op1: getI64Imm(Extended.getZExtValue()));
1307 return CurDAG->getMachineNode(Opcode: PPC::RLDIC, dl, VT: MVT::i64, Op1: SDValue(Result, 0),
1308 Op2: getI32Imm(TZ), Op3: getI32Imm(LZ));
1309 }
1310 // Pattern : {zeros}{33-bit value}{ones}
1311 // Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value,
1312 // therefore we can take advantage of PLI's sign-extension semantics, and then
1313 // mask them off after rotation.
1314 //
1315 // +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+
1316 // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1317 // +------------------------+ +------------------------+
1318 // 63 0 63 0
1319 //
1320 // +----sext-----|--34-bit--+ +clear-|-----------------+
1321 // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1322 // +------------------------+ +------------------------+
1323 // 63 0 63 0
1324 if ((LZ + TO) > 30) {
1325 APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff);
1326 APInt Extended = SignedInt34.sext(width: 64);
1327 Result = CurDAG->getMachineNode(Opcode: PPC::PLI8, dl, VT: MVT::i64,
1328 Op1: getI64Imm(Extended.getZExtValue()));
1329 return CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64, Op1: SDValue(Result, 0),
1330 Op2: getI32Imm(30 - LZ), Op3: getI32Imm(LZ));
1331 }
1332 // Patterns : {zeros}{ones}{33-bit value}{ones}
1333 // {ones}{33-bit value}{ones}
1334 // Similar to LI we can take advantage of PLI's sign-extension semantics to
1335 // generate leading ones, and then use RLDICL to mask off the ones in left
1336 // sides (if required) after rotation.
1337 if ((LZ + FO + TO) > 30) {
1338 APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff);
1339 APInt Extended = SignedInt34.sext(width: 64);
1340 Result = CurDAG->getMachineNode(Opcode: PPC::PLI8, dl, VT: MVT::i64,
1341 Op1: getI64Imm(Extended.getZExtValue()));
1342 return CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64, Op1: SDValue(Result, 0),
1343 Op2: getI32Imm(TO), Op3: getI32Imm(LZ));
1344 }
1345 // Patterns : {******}{31 zeros}{******}
1346 // : {******}{31 ones}{******}
1347 // If Imm contains 31 consecutive zeros/ones then the remaining bit count
1348 // is 33. Rotate right the Imm to construct a int<33> value, we can use PLI
1349 // for the int<33> value and then use RLDICL without a mask to rotate it back.
1350 //
1351 // +------|--ones--|------+ +---ones--||---33 bit--+
1352 // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1353 // +----------------------+ +----------------------+
1354 // 63 0 63 0
1355 for (unsigned Shift = 0; Shift < 63; ++Shift) {
1356 uint64_t RotImm = APInt(64, Imm).rotr(rotateAmt: Shift).getZExtValue();
1357 if (isInt<34>(x: RotImm)) {
1358 Result =
1359 CurDAG->getMachineNode(Opcode: PPC::PLI8, dl, VT: MVT::i64, Op1: getI64Imm(RotImm));
1360 return CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64,
1361 Op1: SDValue(Result, 0), Op2: getI32Imm(Shift),
1362 Op3: getI32Imm(0));
1363 }
1364 }
1365
1366 // Patterns : High word == Low word
1367 // This is basically a splat of a 32 bit immediate.
1368 if (Hi32 == Lo32) {
1369 Result = CurDAG->getMachineNode(Opcode: PPC::PLI8, dl, VT: MVT::i64, Op1: getI64Imm(Hi32));
1370 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1371 getI32Imm(0)};
1372 return CurDAG->getMachineNode(Opcode: PPC::RLDIMI, dl, VT: MVT::i64, Ops);
1373 }
1374
1375 InstCnt = 3;
1376 // Catch-all
1377 // This pattern can form any 64 bit immediate in 3 instructions.
1378 SDNode *ResultHi =
1379 CurDAG->getMachineNode(Opcode: PPC::PLI8, dl, VT: MVT::i64, Op1: getI64Imm(Hi32));
1380 SDNode *ResultLo =
1381 CurDAG->getMachineNode(Opcode: PPC::PLI8, dl, VT: MVT::i64, Op1: getI64Imm(Lo32));
1382 SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32),
1383 getI32Imm(0)};
1384 return CurDAG->getMachineNode(Opcode: PPC::RLDIMI, dl, VT: MVT::i64, Ops);
1385}
1386
1387static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,
1388 unsigned *InstCnt = nullptr) {
1389 unsigned InstCntDirect = 0;
1390 // No more than 3 instructions are used if we can select the i64 immediate
1391 // directly.
1392 SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCnt&: InstCntDirect);
1393
1394 const PPCSubtarget &Subtarget =
1395 CurDAG->getMachineFunction().getSubtarget<PPCSubtarget>();
1396
1397 // If we have prefixed instructions and there is a chance we can
1398 // materialize the constant with fewer prefixed instructions than
1399 // non-prefixed, try that.
1400 if (Subtarget.hasPrefixInstrs() && InstCntDirect != 1) {
1401 unsigned InstCntDirectP = 0;
1402 SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCnt&: InstCntDirectP);
1403 // Use the prefix case in either of two cases:
1404 // 1) We have no result from the non-prefix case to use.
1405 // 2) The non-prefix case uses more instructions than the prefix case.
1406 // If the prefix and non-prefix cases use the same number of instructions
1407 // we will prefer the non-prefix case.
1408 if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) {
1409 if (InstCnt)
1410 *InstCnt = InstCntDirectP;
1411 return ResultP;
1412 }
1413 }
1414
1415 if (Result) {
1416 if (InstCnt)
1417 *InstCnt = InstCntDirect;
1418 return Result;
1419 }
1420 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1421 return CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32);
1422 };
1423
1424 uint32_t Hi16OfLo32 = (Lo_32(Value: Imm) >> 16) & 0xffff;
1425 uint32_t Lo16OfLo32 = Lo_32(Value: Imm) & 0xffff;
1426
1427 // Try to use 4 instructions to materialize the immediate which is "almost" a
1428 // splat of a 32 bit immediate.
1429 if (Hi16OfLo32 && Lo16OfLo32) {
1430 uint32_t Hi16OfHi32 = (Hi_32(Value: Imm) >> 16) & 0xffff;
1431 uint32_t Lo16OfHi32 = Hi_32(Value: Imm) & 0xffff;
1432 bool IsSelected = false;
1433
1434 auto getSplat = [CurDAG, dl, getI32Imm](uint32_t Hi16, uint32_t Lo16) {
1435 SDNode *Result =
1436 CurDAG->getMachineNode(Opcode: PPC::LIS8, dl, VT: MVT::i64, Op1: getI32Imm(Hi16));
1437 Result = CurDAG->getMachineNode(Opcode: PPC::ORI8, dl, VT: MVT::i64,
1438 Op1: SDValue(Result, 0), Op2: getI32Imm(Lo16));
1439 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1440 getI32Imm(0)};
1441 return CurDAG->getMachineNode(Opcode: PPC::RLDIMI, dl, VT: MVT::i64, Ops);
1442 };
1443
1444 if (Hi16OfHi32 == Lo16OfHi32 && Lo16OfHi32 == Lo16OfLo32) {
1445 IsSelected = true;
1446 Result = getSplat(Hi16OfLo32, Lo16OfLo32);
1447 // Modify Hi16OfHi32.
1448 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(48),
1449 getI32Imm(0)};
1450 Result = CurDAG->getMachineNode(Opcode: PPC::RLDIMI, dl, VT: MVT::i64, Ops);
1451 } else if (Hi16OfHi32 == Hi16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
1452 IsSelected = true;
1453 Result = getSplat(Hi16OfHi32, Lo16OfHi32);
1454 // Modify Lo16OfLo32.
1455 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16),
1456 getI32Imm(16), getI32Imm(31)};
1457 Result = CurDAG->getMachineNode(Opcode: PPC::RLWIMI8, dl, VT: MVT::i64, Ops);
1458 } else if (Lo16OfHi32 == Lo16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
1459 IsSelected = true;
1460 Result = getSplat(Hi16OfHi32, Lo16OfHi32);
1461 // Modify Hi16OfLo32.
1462 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16),
1463 getI32Imm(0), getI32Imm(15)};
1464 Result = CurDAG->getMachineNode(Opcode: PPC::RLWIMI8, dl, VT: MVT::i64, Ops);
1465 }
1466 if (IsSelected == true) {
1467 if (InstCnt)
1468 *InstCnt = 4;
1469 return Result;
1470 }
1471 }
1472
1473 // Handle the upper 32 bit value.
1474 Result =
1475 selectI64ImmDirect(CurDAG, dl, Imm: Imm & 0xffffffff00000000, InstCnt&: InstCntDirect);
1476 // Add in the last bits as required.
1477 if (Hi16OfLo32) {
1478 Result = CurDAG->getMachineNode(Opcode: PPC::ORIS8, dl, VT: MVT::i64,
1479 Op1: SDValue(Result, 0), Op2: getI32Imm(Hi16OfLo32));
1480 ++InstCntDirect;
1481 }
1482 if (Lo16OfLo32) {
1483 Result = CurDAG->getMachineNode(Opcode: PPC::ORI8, dl, VT: MVT::i64, Op1: SDValue(Result, 0),
1484 Op2: getI32Imm(Lo16OfLo32));
1485 ++InstCntDirect;
1486 }
1487 if (InstCnt)
1488 *InstCnt = InstCntDirect;
1489 return Result;
1490}
1491
1492// Select a 64-bit constant.
1493static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) {
1494 SDLoc dl(N);
1495
1496 // Get 64 bit value.
1497 int64_t Imm = N->getAsZExtVal();
1498 if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
1499 uint64_t SextImm = SignExtend64(X: Imm, B: MinSize);
1500 SDValue SDImm = CurDAG->getTargetConstant(Val: SextImm, DL: dl, VT: MVT::i64);
1501 if (isInt<16>(x: SextImm))
1502 return CurDAG->getMachineNode(Opcode: PPC::LI8, dl, VT: MVT::i64, Op1: SDImm);
1503 }
1504 return selectI64Imm(CurDAG, dl, Imm);
1505}
1506
1507namespace {
1508
1509class BitPermutationSelector {
1510 struct ValueBit {
1511 SDValue V;
1512
1513 // The bit number in the value, using a convention where bit 0 is the
1514 // lowest-order bit.
1515 unsigned Idx;
1516
1517 // ConstZero means a bit we need to mask off.
1518 // Variable is a bit comes from an input variable.
1519 // VariableKnownToBeZero is also a bit comes from an input variable,
1520 // but it is known to be already zero. So we do not need to mask them.
1521 enum Kind {
1522 ConstZero,
1523 Variable,
1524 VariableKnownToBeZero
1525 } K;
1526
1527 ValueBit(SDValue V, unsigned I, Kind K = Variable)
1528 : V(V), Idx(I), K(K) {}
1529 ValueBit(Kind K = Variable) : Idx(UINT32_MAX), K(K) {}
1530
1531 bool isZero() const {
1532 return K == ConstZero || K == VariableKnownToBeZero;
1533 }
1534
1535 bool hasValue() const {
1536 return K == Variable || K == VariableKnownToBeZero;
1537 }
1538
1539 SDValue getValue() const {
1540 assert(hasValue() && "Cannot get the value of a constant bit");
1541 return V;
1542 }
1543
1544 unsigned getValueBitIndex() const {
1545 assert(hasValue() && "Cannot get the value bit index of a constant bit");
1546 return Idx;
1547 }
1548 };
1549
1550 // A bit group has the same underlying value and the same rotate factor.
1551 struct BitGroup {
1552 SDValue V;
1553 unsigned RLAmt;
1554 unsigned StartIdx, EndIdx;
1555
1556 // This rotation amount assumes that the lower 32 bits of the quantity are
1557 // replicated in the high 32 bits by the rotation operator (which is done
1558 // by rlwinm and friends in 64-bit mode).
1559 bool Repl32;
1560 // Did converting to Repl32 == true change the rotation factor? If it did,
1561 // it decreased it by 32.
1562 bool Repl32CR;
1563 // Was this group coalesced after setting Repl32 to true?
1564 bool Repl32Coalesced;
1565
1566 BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
1567 : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
1568 Repl32Coalesced(false) {
1569 LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R
1570 << " [" << S << ", " << E << "]\n");
1571 }
1572 };
1573
1574 // Information on each (Value, RLAmt) pair (like the number of groups
1575 // associated with each) used to choose the lowering method.
1576 struct ValueRotInfo {
1577 SDValue V;
1578 unsigned RLAmt = std::numeric_limits<unsigned>::max();
1579 unsigned NumGroups = 0;
1580 unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
1581 bool Repl32 = false;
1582
1583 ValueRotInfo() = default;
1584
1585 // For sorting (in reverse order) by NumGroups, and then by
1586 // FirstGroupStartIdx.
1587 bool operator < (const ValueRotInfo &Other) const {
1588 // We need to sort so that the non-Repl32 come first because, when we're
1589 // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1590 // masking operation.
1591 if (Repl32 < Other.Repl32)
1592 return true;
1593 else if (Repl32 > Other.Repl32)
1594 return false;
1595 else if (NumGroups > Other.NumGroups)
1596 return true;
1597 else if (NumGroups < Other.NumGroups)
1598 return false;
1599 else if (RLAmt == 0 && Other.RLAmt != 0)
1600 return true;
1601 else if (RLAmt != 0 && Other.RLAmt == 0)
1602 return false;
1603 else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
1604 return true;
1605 return false;
1606 }
1607 };
1608
1609 using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
1610 using ValueBitsMemoizer =
1611 DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>;
1612 ValueBitsMemoizer Memoizer;
1613
1614 // Return a pair of bool and a SmallVector pointer to a memoization entry.
1615 // The bool is true if something interesting was deduced, otherwise if we're
1616 // providing only a generic representation of V (or something else likewise
1617 // uninteresting for instruction selection) through the SmallVector.
1618 std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
1619 unsigned NumBits) {
1620 auto &ValueEntry = Memoizer[V];
1621 if (ValueEntry)
1622 return std::make_pair(x&: ValueEntry->first, y: &ValueEntry->second);
1623 ValueEntry.reset(p: new ValueBitsMemoizedValue());
1624 bool &Interesting = ValueEntry->first;
1625 SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
1626 Bits.resize(N: NumBits);
1627
1628 switch (V.getOpcode()) {
1629 default: break;
1630 case ISD::ROTL:
1631 if (isa<ConstantSDNode>(Val: V.getOperand(i: 1))) {
1632 assert(isPowerOf2_32(NumBits) && "rotl bits should be power of 2!");
1633 unsigned RotAmt = V.getConstantOperandVal(i: 1) & (NumBits - 1);
1634
1635 const auto &LHSBits = *getValueBits(V: V.getOperand(i: 0), NumBits).second;
1636
1637 for (unsigned i = 0; i < NumBits; ++i)
1638 Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1639
1640 return std::make_pair(x&: Interesting = true, y: &Bits);
1641 }
1642 break;
1643 case ISD::SHL:
1644 case PPCISD::SHL:
1645 if (isa<ConstantSDNode>(Val: V.getOperand(i: 1))) {
1646 // sld takes 7 bits, slw takes 6.
1647 unsigned ShiftAmt = V.getConstantOperandVal(i: 1) & ((NumBits << 1) - 1);
1648
1649 const auto &LHSBits = *getValueBits(V: V.getOperand(i: 0), NumBits).second;
1650
1651 if (ShiftAmt >= NumBits) {
1652 for (unsigned i = 0; i < NumBits; ++i)
1653 Bits[i] = ValueBit(ValueBit::ConstZero);
1654 } else {
1655 for (unsigned i = ShiftAmt; i < NumBits; ++i)
1656 Bits[i] = LHSBits[i - ShiftAmt];
1657 for (unsigned i = 0; i < ShiftAmt; ++i)
1658 Bits[i] = ValueBit(ValueBit::ConstZero);
1659 }
1660
1661 return std::make_pair(x&: Interesting = true, y: &Bits);
1662 }
1663 break;
1664 case ISD::SRL:
1665 case PPCISD::SRL:
1666 if (isa<ConstantSDNode>(Val: V.getOperand(i: 1))) {
1667 // srd takes lowest 7 bits, srw takes 6.
1668 unsigned ShiftAmt = V.getConstantOperandVal(i: 1) & ((NumBits << 1) - 1);
1669
1670 const auto &LHSBits = *getValueBits(V: V.getOperand(i: 0), NumBits).second;
1671
1672 if (ShiftAmt >= NumBits) {
1673 for (unsigned i = 0; i < NumBits; ++i)
1674 Bits[i] = ValueBit(ValueBit::ConstZero);
1675 } else {
1676 for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1677 Bits[i] = LHSBits[i + ShiftAmt];
1678 for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1679 Bits[i] = ValueBit(ValueBit::ConstZero);
1680 }
1681
1682 return std::make_pair(x&: Interesting = true, y: &Bits);
1683 }
1684 break;
1685 case ISD::AND:
1686 if (isa<ConstantSDNode>(Val: V.getOperand(i: 1))) {
1687 uint64_t Mask = V.getConstantOperandVal(i: 1);
1688
1689 const SmallVector<ValueBit, 64> *LHSBits;
1690 // Mark this as interesting, only if the LHS was also interesting. This
1691 // prevents the overall procedure from matching a single immediate 'and'
1692 // (which is non-optimal because such an and might be folded with other
1693 // things if we don't select it here).
1694 std::tie(args&: Interesting, args&: LHSBits) = getValueBits(V: V.getOperand(i: 0), NumBits);
1695
1696 for (unsigned i = 0; i < NumBits; ++i)
1697 if (((Mask >> i) & 1) == 1)
1698 Bits[i] = (*LHSBits)[i];
1699 else {
1700 // AND instruction masks this bit. If the input is already zero,
1701 // we have nothing to do here. Otherwise, make the bit ConstZero.
1702 if ((*LHSBits)[i].isZero())
1703 Bits[i] = (*LHSBits)[i];
1704 else
1705 Bits[i] = ValueBit(ValueBit::ConstZero);
1706 }
1707
1708 return std::make_pair(x&: Interesting, y: &Bits);
1709 }
1710 break;
1711 case ISD::OR: {
1712 const auto &LHSBits = *getValueBits(V: V.getOperand(i: 0), NumBits).second;
1713 const auto &RHSBits = *getValueBits(V: V.getOperand(i: 1), NumBits).second;
1714
1715 bool AllDisjoint = true;
1716 SDValue LastVal = SDValue();
1717 unsigned LastIdx = 0;
1718 for (unsigned i = 0; i < NumBits; ++i) {
1719 if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
1720 // If both inputs are known to be zero and one is ConstZero and
1721 // another is VariableKnownToBeZero, we can select whichever
1722 // we like. To minimize the number of bit groups, we select
1723 // VariableKnownToBeZero if this bit is the next bit of the same
1724 // input variable from the previous bit. Otherwise, we select
1725 // ConstZero.
1726 if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
1727 LHSBits[i].getValueBitIndex() == LastIdx + 1)
1728 Bits[i] = LHSBits[i];
1729 else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
1730 RHSBits[i].getValueBitIndex() == LastIdx + 1)
1731 Bits[i] = RHSBits[i];
1732 else
1733 Bits[i] = ValueBit(ValueBit::ConstZero);
1734 }
1735 else if (LHSBits[i].isZero())
1736 Bits[i] = RHSBits[i];
1737 else if (RHSBits[i].isZero())
1738 Bits[i] = LHSBits[i];
1739 else {
1740 AllDisjoint = false;
1741 break;
1742 }
1743 // We remember the value and bit index of this bit.
1744 if (Bits[i].hasValue()) {
1745 LastVal = Bits[i].getValue();
1746 LastIdx = Bits[i].getValueBitIndex();
1747 }
1748 else {
1749 if (LastVal) LastVal = SDValue();
1750 LastIdx = 0;
1751 }
1752 }
1753
1754 if (!AllDisjoint)
1755 break;
1756
1757 return std::make_pair(x&: Interesting = true, y: &Bits);
1758 }
1759 case ISD::ZERO_EXTEND: {
1760 // We support only the case with zero extension from i32 to i64 so far.
1761 if (V.getValueType() != MVT::i64 ||
1762 V.getOperand(i: 0).getValueType() != MVT::i32)
1763 break;
1764
1765 const SmallVector<ValueBit, 64> *LHSBits;
1766 const unsigned NumOperandBits = 32;
1767 std::tie(args&: Interesting, args&: LHSBits) = getValueBits(V: V.getOperand(i: 0),
1768 NumBits: NumOperandBits);
1769
1770 for (unsigned i = 0; i < NumOperandBits; ++i)
1771 Bits[i] = (*LHSBits)[i];
1772
1773 for (unsigned i = NumOperandBits; i < NumBits; ++i)
1774 Bits[i] = ValueBit(ValueBit::ConstZero);
1775
1776 return std::make_pair(x&: Interesting, y: &Bits);
1777 }
1778 case ISD::TRUNCATE: {
1779 EVT FromType = V.getOperand(i: 0).getValueType();
1780 EVT ToType = V.getValueType();
1781 // We support only the case with truncate from i64 to i32.
1782 if (FromType != MVT::i64 || ToType != MVT::i32)
1783 break;
1784 const unsigned NumAllBits = FromType.getSizeInBits();
1785 SmallVector<ValueBit, 64> *InBits;
1786 std::tie(args&: Interesting, args&: InBits) = getValueBits(V: V.getOperand(i: 0),
1787 NumBits: NumAllBits);
1788 const unsigned NumValidBits = ToType.getSizeInBits();
1789
1790 // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
1791 // So, we cannot include this truncate.
1792 bool UseUpper32bit = false;
1793 for (unsigned i = 0; i < NumValidBits; ++i)
1794 if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
1795 UseUpper32bit = true;
1796 break;
1797 }
1798 if (UseUpper32bit)
1799 break;
1800
1801 for (unsigned i = 0; i < NumValidBits; ++i)
1802 Bits[i] = (*InBits)[i];
1803
1804 return std::make_pair(x&: Interesting, y: &Bits);
1805 }
1806 case ISD::AssertZext: {
1807 // For AssertZext, we look through the operand and
1808 // mark the bits known to be zero.
1809 const SmallVector<ValueBit, 64> *LHSBits;
1810 std::tie(args&: Interesting, args&: LHSBits) = getValueBits(V: V.getOperand(i: 0),
1811 NumBits);
1812
1813 EVT FromType = cast<VTSDNode>(Val: V.getOperand(i: 1))->getVT();
1814 const unsigned NumValidBits = FromType.getSizeInBits();
1815 for (unsigned i = 0; i < NumValidBits; ++i)
1816 Bits[i] = (*LHSBits)[i];
1817
1818 // These bits are known to be zero but the AssertZext may be from a value
1819 // that already has some constant zero bits (i.e. from a masking and).
1820 for (unsigned i = NumValidBits; i < NumBits; ++i)
1821 Bits[i] = (*LHSBits)[i].hasValue()
1822 ? ValueBit((*LHSBits)[i].getValue(),
1823 (*LHSBits)[i].getValueBitIndex(),
1824 ValueBit::VariableKnownToBeZero)
1825 : ValueBit(ValueBit::ConstZero);
1826
1827 return std::make_pair(x&: Interesting, y: &Bits);
1828 }
1829 case ISD::LOAD:
1830 LoadSDNode *LD = cast<LoadSDNode>(Val&: V);
1831 if (ISD::isZEXTLoad(N: V.getNode()) && V.getResNo() == 0) {
1832 EVT VT = LD->getMemoryVT();
1833 const unsigned NumValidBits = VT.getSizeInBits();
1834
1835 for (unsigned i = 0; i < NumValidBits; ++i)
1836 Bits[i] = ValueBit(V, i);
1837
1838 // These bits are known to be zero.
1839 for (unsigned i = NumValidBits; i < NumBits; ++i)
1840 Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
1841
1842 // Zero-extending load itself cannot be optimized. So, it is not
1843 // interesting by itself though it gives useful information.
1844 return std::make_pair(x&: Interesting = false, y: &Bits);
1845 }
1846 break;
1847 }
1848
1849 for (unsigned i = 0; i < NumBits; ++i)
1850 Bits[i] = ValueBit(V, i);
1851
1852 return std::make_pair(x&: Interesting = false, y: &Bits);
1853 }
1854
1855 // For each value (except the constant ones), compute the left-rotate amount
1856 // to get it from its original to final position.
1857 void computeRotationAmounts() {
1858 NeedMask = false;
1859 RLAmt.resize(N: Bits.size());
1860 for (unsigned i = 0; i < Bits.size(); ++i)
1861 if (Bits[i].hasValue()) {
1862 unsigned VBI = Bits[i].getValueBitIndex();
1863 if (i >= VBI)
1864 RLAmt[i] = i - VBI;
1865 else
1866 RLAmt[i] = Bits.size() - (VBI - i);
1867 } else if (Bits[i].isZero()) {
1868 NeedMask = true;
1869 RLAmt[i] = UINT32_MAX;
1870 } else {
1871 llvm_unreachable("Unknown value bit type");
1872 }
1873 }
1874
1875 // Collect groups of consecutive bits with the same underlying value and
1876 // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1877 // they break up groups.
1878 void collectBitGroups(bool LateMask) {
1879 BitGroups.clear();
1880
1881 unsigned LastRLAmt = RLAmt[0];
1882 SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
1883 unsigned LastGroupStartIdx = 0;
1884 bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1885 for (unsigned i = 1; i < Bits.size(); ++i) {
1886 unsigned ThisRLAmt = RLAmt[i];
1887 SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
1888 if (LateMask && !ThisValue) {
1889 ThisValue = LastValue;
1890 ThisRLAmt = LastRLAmt;
1891 // If we're doing late masking, then the first bit group always starts
1892 // at zero (even if the first bits were zero).
1893 if (BitGroups.empty())
1894 LastGroupStartIdx = 0;
1895 }
1896
1897 // If this bit is known to be zero and the current group is a bit group
1898 // of zeros, we do not need to terminate the current bit group even the
1899 // Value or RLAmt does not match here. Instead, we terminate this group
1900 // when the first non-zero bit appears later.
1901 if (IsGroupOfZeros && Bits[i].isZero())
1902 continue;
1903
1904 // If this bit has the same underlying value and the same rotate factor as
1905 // the last one, then they're part of the same group.
1906 if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1907 // We cannot continue the current group if this bits is not known to
1908 // be zero in a bit group of zeros.
1909 if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
1910 continue;
1911
1912 if (LastValue.getNode())
1913 BitGroups.push_back(Elt: BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1914 i-1));
1915 LastRLAmt = ThisRLAmt;
1916 LastValue = ThisValue;
1917 LastGroupStartIdx = i;
1918 IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1919 }
1920 if (LastValue.getNode())
1921 BitGroups.push_back(Elt: BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1922 Bits.size()-1));
1923
1924 if (BitGroups.empty())
1925 return;
1926
1927 // We might be able to combine the first and last groups.
1928 if (BitGroups.size() > 1) {
1929 // If the first and last groups are the same, then remove the first group
1930 // in favor of the last group, making the ending index of the last group
1931 // equal to the ending index of the to-be-removed first group.
1932 if (BitGroups[0].StartIdx == 0 &&
1933 BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1934 BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1935 BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1936 LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1937 BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1938 BitGroups.erase(CI: BitGroups.begin());
1939 }
1940 }
1941 }
1942
1943 // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1944 // associated with each. If the number of groups are same, we prefer a group
1945 // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
1946 // instruction. If there is a degeneracy, pick the one that occurs
1947 // first (in the final value).
1948 void collectValueRotInfo() {
1949 ValueRots.clear();
1950
1951 for (auto &BG : BitGroups) {
1952 unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1953 ValueRotInfo &VRI = ValueRots[std::make_pair(x&: BG.V, y&: RLAmtKey)];
1954 VRI.V = BG.V;
1955 VRI.RLAmt = BG.RLAmt;
1956 VRI.Repl32 = BG.Repl32;
1957 VRI.NumGroups += 1;
1958 VRI.FirstGroupStartIdx = std::min(a: VRI.FirstGroupStartIdx, b: BG.StartIdx);
1959 }
1960
1961 // Now that we've collected the various ValueRotInfo instances, we need to
1962 // sort them.
1963 ValueRotsVec.clear();
1964 for (auto &I : ValueRots) {
1965 ValueRotsVec.push_back(Elt: I.second);
1966 }
1967 llvm::sort(C&: ValueRotsVec);
1968 }
1969
1970 // In 64-bit mode, rlwinm and friends have a rotation operator that
1971 // replicates the low-order 32 bits into the high-order 32-bits. The mask
1972 // indices of these instructions can only be in the lower 32 bits, so they
1973 // can only represent some 64-bit bit groups. However, when they can be used,
1974 // the 32-bit replication can be used to represent, as a single bit group,
1975 // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1976 // groups when possible. Returns true if any of the bit groups were
1977 // converted.
1978 void assignRepl32BitGroups() {
1979 // If we have bits like this:
1980 //
1981 // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1982 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1983 // Groups: | RLAmt = 8 | RLAmt = 40 |
1984 //
1985 // But, making use of a 32-bit operation that replicates the low-order 32
1986 // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1987 // of 8.
1988
1989 auto IsAllLow32 = [this](BitGroup & BG) {
1990 if (BG.StartIdx <= BG.EndIdx) {
1991 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1992 if (!Bits[i].hasValue())
1993 continue;
1994 if (Bits[i].getValueBitIndex() >= 32)
1995 return false;
1996 }
1997 } else {
1998 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1999 if (!Bits[i].hasValue())
2000 continue;
2001 if (Bits[i].getValueBitIndex() >= 32)
2002 return false;
2003 }
2004 for (unsigned i = 0; i <= BG.EndIdx; ++i) {
2005 if (!Bits[i].hasValue())
2006 continue;
2007 if (Bits[i].getValueBitIndex() >= 32)
2008 return false;
2009 }
2010 }
2011
2012 return true;
2013 };
2014
2015 for (auto &BG : BitGroups) {
2016 // If this bit group has RLAmt of 0 and will not be merged with
2017 // another bit group, we don't benefit from Repl32. We don't mark
2018 // such group to give more freedom for later instruction selection.
2019 if (BG.RLAmt == 0) {
2020 auto PotentiallyMerged = [this](BitGroup & BG) {
2021 for (auto &BG2 : BitGroups)
2022 if (&BG != &BG2 && BG.V == BG2.V &&
2023 (BG2.RLAmt == 0 || BG2.RLAmt == 32))
2024 return true;
2025 return false;
2026 };
2027 if (!PotentiallyMerged(BG))
2028 continue;
2029 }
2030 if (BG.StartIdx < 32 && BG.EndIdx < 32) {
2031 if (IsAllLow32(BG)) {
2032 if (BG.RLAmt >= 32) {
2033 BG.RLAmt -= 32;
2034 BG.Repl32CR = true;
2035 }
2036
2037 BG.Repl32 = true;
2038
2039 LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
2040 << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["
2041 << BG.StartIdx << ", " << BG.EndIdx << "]\n");
2042 }
2043 }
2044 }
2045
2046 // Now walk through the bit groups, consolidating where possible.
2047 for (auto I = BitGroups.begin(); I != BitGroups.end();) {
2048 // We might want to remove this bit group by merging it with the previous
2049 // group (which might be the ending group).
2050 auto IP = (I == BitGroups.begin()) ?
2051 std::prev(x: BitGroups.end()) : std::prev(x: I);
2052 if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
2053 I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
2054
2055 LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
2056 << I->V.getNode() << " RLAmt = " << I->RLAmt << " ["
2057 << I->StartIdx << ", " << I->EndIdx
2058 << "] with group with range [" << IP->StartIdx << ", "
2059 << IP->EndIdx << "]\n");
2060
2061 IP->EndIdx = I->EndIdx;
2062 IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
2063 IP->Repl32Coalesced = true;
2064 I = BitGroups.erase(CI: I);
2065 continue;
2066 } else {
2067 // There is a special case worth handling: If there is a single group
2068 // covering the entire upper 32 bits, and it can be merged with both
2069 // the next and previous groups (which might be the same group), then
2070 // do so. If it is the same group (so there will be only one group in
2071 // total), then we need to reverse the order of the range so that it
2072 // covers the entire 64 bits.
2073 if (I->StartIdx == 32 && I->EndIdx == 63) {
2074 assert(std::next(I) == BitGroups.end() &&
2075 "bit group ends at index 63 but there is another?");
2076 auto IN = BitGroups.begin();
2077
2078 if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
2079 (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
2080 IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
2081 IsAllLow32(*I)) {
2082
2083 LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()
2084 << " RLAmt = " << I->RLAmt << " [" << I->StartIdx
2085 << ", " << I->EndIdx
2086 << "] with 32-bit replicated groups with ranges ["
2087 << IP->StartIdx << ", " << IP->EndIdx << "] and ["
2088 << IN->StartIdx << ", " << IN->EndIdx << "]\n");
2089
2090 if (IP == IN) {
2091 // There is only one other group; change it to cover the whole
2092 // range (backward, so that it can still be Repl32 but cover the
2093 // whole 64-bit range).
2094 IP->StartIdx = 31;
2095 IP->EndIdx = 30;
2096 IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
2097 IP->Repl32Coalesced = true;
2098 I = BitGroups.erase(CI: I);
2099 } else {
2100 // There are two separate groups, one before this group and one
2101 // after us (at the beginning). We're going to remove this group,
2102 // but also the group at the very beginning.
2103 IP->EndIdx = IN->EndIdx;
2104 IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
2105 IP->Repl32Coalesced = true;
2106 I = BitGroups.erase(CI: I);
2107 BitGroups.erase(CI: BitGroups.begin());
2108 }
2109
2110 // This must be the last group in the vector (and we might have
2111 // just invalidated the iterator above), so break here.
2112 break;
2113 }
2114 }
2115 }
2116
2117 ++I;
2118 }
2119 }
2120
2121 SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
2122 return CurDAG->getTargetConstant(Val: Imm, DL: dl, VT: MVT::i32);
2123 }
2124
2125 uint64_t getZerosMask() {
2126 uint64_t Mask = 0;
2127 for (unsigned i = 0; i < Bits.size(); ++i) {
2128 if (Bits[i].hasValue())
2129 continue;
2130 Mask |= (UINT64_C(1) << i);
2131 }
2132
2133 return ~Mask;
2134 }
2135
2136 // This method extends an input value to 64 bit if input is 32-bit integer.
2137 // While selecting instructions in BitPermutationSelector in 64-bit mode,
2138 // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
2139 // In such case, we extend it to 64 bit to be consistent with other values.
2140 SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
2141 if (V.getValueSizeInBits() == 64)
2142 return V;
2143
2144 assert(V.getValueSizeInBits() == 32);
2145 SDValue SubRegIdx = CurDAG->getTargetConstant(Val: PPC::sub_32, DL: dl, VT: MVT::i32);
2146 SDValue ImDef = SDValue(CurDAG->getMachineNode(Opcode: PPC::IMPLICIT_DEF, dl,
2147 VT: MVT::i64), 0);
2148 SDValue ExtVal = SDValue(CurDAG->getMachineNode(Opcode: PPC::INSERT_SUBREG, dl,
2149 VT: MVT::i64, Op1: ImDef, Op2: V,
2150 Op3: SubRegIdx), 0);
2151 return ExtVal;
2152 }
2153
2154 SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
2155 if (V.getValueSizeInBits() == 32)
2156 return V;
2157
2158 assert(V.getValueSizeInBits() == 64);
2159 SDValue SubRegIdx = CurDAG->getTargetConstant(Val: PPC::sub_32, DL: dl, VT: MVT::i32);
2160 SDValue SubVal = SDValue(CurDAG->getMachineNode(Opcode: PPC::EXTRACT_SUBREG, dl,
2161 VT: MVT::i32, Op1: V, Op2: SubRegIdx), 0);
2162 return SubVal;
2163 }
2164
2165 // Depending on the number of groups for a particular value, it might be
2166 // better to rotate, mask explicitly (using andi/andis), and then or the
2167 // result. Select this part of the result first.
2168 void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2169 if (BPermRewriterNoMasking)
2170 return;
2171
2172 for (ValueRotInfo &VRI : ValueRotsVec) {
2173 unsigned Mask = 0;
2174 for (unsigned i = 0; i < Bits.size(); ++i) {
2175 if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
2176 continue;
2177 if (RLAmt[i] != VRI.RLAmt)
2178 continue;
2179 Mask |= (1u << i);
2180 }
2181
2182 // Compute the masks for andi/andis that would be necessary.
2183 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2184 assert((ANDIMask != 0 || ANDISMask != 0) &&
2185 "No set bits in mask for value bit groups");
2186 bool NeedsRotate = VRI.RLAmt != 0;
2187
2188 // We're trying to minimize the number of instructions. If we have one
2189 // group, using one of andi/andis can break even. If we have three
2190 // groups, we can use both andi and andis and break even (to use both
2191 // andi and andis we also need to or the results together). We need four
2192 // groups if we also need to rotate. To use andi/andis we need to do more
2193 // than break even because rotate-and-mask instructions tend to be easier
2194 // to schedule.
2195
2196 // FIXME: We've biased here against using andi/andis, which is right for
2197 // POWER cores, but not optimal everywhere. For example, on the A2,
2198 // andi/andis have single-cycle latency whereas the rotate-and-mask
2199 // instructions take two cycles, and it would be better to bias toward
2200 // andi/andis in break-even cases.
2201
2202 unsigned NumAndInsts = (unsigned) NeedsRotate +
2203 (unsigned) (ANDIMask != 0) +
2204 (unsigned) (ANDISMask != 0) +
2205 (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
2206 (unsigned) (bool) Res;
2207
2208 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2209 << " RL: " << VRI.RLAmt << ":"
2210 << "\n\t\t\tisel using masking: " << NumAndInsts
2211 << " using rotates: " << VRI.NumGroups << "\n");
2212
2213 if (NumAndInsts >= VRI.NumGroups)
2214 continue;
2215
2216 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2217
2218 if (InstCnt) *InstCnt += NumAndInsts;
2219
2220 SDValue VRot;
2221 if (VRI.RLAmt) {
2222 SDValue Ops[] =
2223 { TruncateToInt32(V: VRI.V, dl), getI32Imm(Imm: VRI.RLAmt, dl),
2224 getI32Imm(Imm: 0, dl), getI32Imm(Imm: 31, dl) };
2225 VRot = SDValue(CurDAG->getMachineNode(Opcode: PPC::RLWINM, dl, VT: MVT::i32,
2226 Ops), 0);
2227 } else {
2228 VRot = TruncateToInt32(V: VRI.V, dl);
2229 }
2230
2231 SDValue ANDIVal, ANDISVal;
2232 if (ANDIMask != 0)
2233 ANDIVal = SDValue(CurDAG->getMachineNode(Opcode: PPC::ANDI_rec, dl, VT: MVT::i32,
2234 Op1: VRot, Op2: getI32Imm(Imm: ANDIMask, dl)),
2235 0);
2236 if (ANDISMask != 0)
2237 ANDISVal =
2238 SDValue(CurDAG->getMachineNode(Opcode: PPC::ANDIS_rec, dl, VT: MVT::i32, Op1: VRot,
2239 Op2: getI32Imm(Imm: ANDISMask, dl)),
2240 0);
2241
2242 SDValue TotalVal;
2243 if (!ANDIVal)
2244 TotalVal = ANDISVal;
2245 else if (!ANDISVal)
2246 TotalVal = ANDIVal;
2247 else
2248 TotalVal = SDValue(CurDAG->getMachineNode(Opcode: PPC::OR, dl, VT: MVT::i32,
2249 Op1: ANDIVal, Op2: ANDISVal), 0);
2250
2251 if (!Res)
2252 Res = TotalVal;
2253 else
2254 Res = SDValue(CurDAG->getMachineNode(Opcode: PPC::OR, dl, VT: MVT::i32,
2255 Op1: Res, Op2: TotalVal), 0);
2256
2257 // Now, remove all groups with this underlying value and rotation
2258 // factor.
2259 eraseMatchingBitGroups(F: [VRI](const BitGroup &BG) {
2260 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2261 });
2262 }
2263 }
2264
2265 // Instruction selection for the 32-bit case.
2266 SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
2267 SDLoc dl(N);
2268 SDValue Res;
2269
2270 if (InstCnt) *InstCnt = 0;
2271
2272 // Take care of cases that should use andi/andis first.
2273 SelectAndParts32(dl, Res, InstCnt);
2274
2275 // If we've not yet selected a 'starting' instruction, and we have no zeros
2276 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2277 // number of groups), and start with this rotated value.
2278 if ((!NeedMask || LateMask) && !Res) {
2279 ValueRotInfo &VRI = ValueRotsVec[0];
2280 if (VRI.RLAmt) {
2281 if (InstCnt) *InstCnt += 1;
2282 SDValue Ops[] =
2283 { TruncateToInt32(V: VRI.V, dl), getI32Imm(Imm: VRI.RLAmt, dl),
2284 getI32Imm(Imm: 0, dl), getI32Imm(Imm: 31, dl) };
2285 Res = SDValue(CurDAG->getMachineNode(Opcode: PPC::RLWINM, dl, VT: MVT::i32, Ops),
2286 0);
2287 } else {
2288 Res = TruncateToInt32(V: VRI.V, dl);
2289 }
2290
2291 // Now, remove all groups with this underlying value and rotation factor.
2292 eraseMatchingBitGroups(F: [VRI](const BitGroup &BG) {
2293 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2294 });
2295 }
2296
2297 if (InstCnt) *InstCnt += BitGroups.size();
2298
2299 // Insert the other groups (one at a time).
2300 for (auto &BG : BitGroups) {
2301 if (!Res) {
2302 SDValue Ops[] =
2303 { TruncateToInt32(V: BG.V, dl), getI32Imm(Imm: BG.RLAmt, dl),
2304 getI32Imm(Imm: Bits.size() - BG.EndIdx - 1, dl),
2305 getI32Imm(Imm: Bits.size() - BG.StartIdx - 1, dl) };
2306 Res = SDValue(CurDAG->getMachineNode(Opcode: PPC::RLWINM, dl, VT: MVT::i32, Ops), 0);
2307 } else {
2308 SDValue Ops[] =
2309 { Res, TruncateToInt32(V: BG.V, dl), getI32Imm(Imm: BG.RLAmt, dl),
2310 getI32Imm(Imm: Bits.size() - BG.EndIdx - 1, dl),
2311 getI32Imm(Imm: Bits.size() - BG.StartIdx - 1, dl) };
2312 Res = SDValue(CurDAG->getMachineNode(Opcode: PPC::RLWIMI, dl, VT: MVT::i32, Ops), 0);
2313 }
2314 }
2315
2316 if (LateMask) {
2317 unsigned Mask = (unsigned) getZerosMask();
2318
2319 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2320 assert((ANDIMask != 0 || ANDISMask != 0) &&
2321 "No set bits in zeros mask?");
2322
2323 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2324 (unsigned) (ANDISMask != 0) +
2325 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2326
2327 SDValue ANDIVal, ANDISVal;
2328 if (ANDIMask != 0)
2329 ANDIVal = SDValue(CurDAG->getMachineNode(Opcode: PPC::ANDI_rec, dl, VT: MVT::i32,
2330 Op1: Res, Op2: getI32Imm(Imm: ANDIMask, dl)),
2331 0);
2332 if (ANDISMask != 0)
2333 ANDISVal =
2334 SDValue(CurDAG->getMachineNode(Opcode: PPC::ANDIS_rec, dl, VT: MVT::i32, Op1: Res,
2335 Op2: getI32Imm(Imm: ANDISMask, dl)),
2336 0);
2337
2338 if (!ANDIVal)
2339 Res = ANDISVal;
2340 else if (!ANDISVal)
2341 Res = ANDIVal;
2342 else
2343 Res = SDValue(CurDAG->getMachineNode(Opcode: PPC::OR, dl, VT: MVT::i32,
2344 Op1: ANDIVal, Op2: ANDISVal), 0);
2345 }
2346
2347 return Res.getNode();
2348 }
2349
2350 unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
2351 unsigned MaskStart, unsigned MaskEnd,
2352 bool IsIns) {
2353 // In the notation used by the instructions, 'start' and 'end' are reversed
2354 // because bits are counted from high to low order.
2355 unsigned InstMaskStart = 64 - MaskEnd - 1,
2356 InstMaskEnd = 64 - MaskStart - 1;
2357
2358 if (Repl32)
2359 return 1;
2360
2361 if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
2362 InstMaskEnd == 63 - RLAmt)
2363 return 1;
2364
2365 return 2;
2366 }
2367
2368 // For 64-bit values, not all combinations of rotates and masks are
2369 // available. Produce one if it is available.
2370 SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
2371 bool Repl32, unsigned MaskStart, unsigned MaskEnd,
2372 unsigned *InstCnt = nullptr) {
2373 // In the notation used by the instructions, 'start' and 'end' are reversed
2374 // because bits are counted from high to low order.
2375 unsigned InstMaskStart = 64 - MaskEnd - 1,
2376 InstMaskEnd = 64 - MaskStart - 1;
2377
2378 if (InstCnt) *InstCnt += 1;
2379
2380 if (Repl32) {
2381 // This rotation amount assumes that the lower 32 bits of the quantity
2382 // are replicated in the high 32 bits by the rotation operator (which is
2383 // done by rlwinm and friends).
2384 assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2385 assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2386 SDValue Ops[] =
2387 { ExtendToInt64(V, dl), getI32Imm(Imm: RLAmt, dl),
2388 getI32Imm(Imm: InstMaskStart - 32, dl), getI32Imm(Imm: InstMaskEnd - 32, dl) };
2389 return SDValue(CurDAG->getMachineNode(Opcode: PPC::RLWINM8, dl, VT: MVT::i64,
2390 Ops), 0);
2391 }
2392
2393 if (InstMaskEnd == 63) {
2394 SDValue Ops[] =
2395 { ExtendToInt64(V, dl), getI32Imm(Imm: RLAmt, dl),
2396 getI32Imm(Imm: InstMaskStart, dl) };
2397 return SDValue(CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64, Ops), 0);
2398 }
2399
2400 if (InstMaskStart == 0) {
2401 SDValue Ops[] =
2402 { ExtendToInt64(V, dl), getI32Imm(Imm: RLAmt, dl),
2403 getI32Imm(Imm: InstMaskEnd, dl) };
2404 return SDValue(CurDAG->getMachineNode(Opcode: PPC::RLDICR, dl, VT: MVT::i64, Ops), 0);
2405 }
2406
2407 if (InstMaskEnd == 63 - RLAmt) {
2408 SDValue Ops[] =
2409 { ExtendToInt64(V, dl), getI32Imm(Imm: RLAmt, dl),
2410 getI32Imm(Imm: InstMaskStart, dl) };
2411 return SDValue(CurDAG->getMachineNode(Opcode: PPC::RLDIC, dl, VT: MVT::i64, Ops), 0);
2412 }
2413
2414 // We cannot do this with a single instruction, so we'll use two. The
2415 // problem is that we're not free to choose both a rotation amount and mask
2416 // start and end independently. We can choose an arbitrary mask start and
2417 // end, but then the rotation amount is fixed. Rotation, however, can be
2418 // inverted, and so by applying an "inverse" rotation first, we can get the
2419 // desired result.
2420 if (InstCnt) *InstCnt += 1;
2421
2422 // The rotation mask for the second instruction must be MaskStart.
2423 unsigned RLAmt2 = MaskStart;
2424 // The first instruction must rotate V so that the overall rotation amount
2425 // is RLAmt.
2426 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2427 if (RLAmt1)
2428 V = SelectRotMask64(V, dl, RLAmt: RLAmt1, Repl32: false, MaskStart: 0, MaskEnd: 63);
2429 return SelectRotMask64(V, dl, RLAmt: RLAmt2, Repl32: false, MaskStart, MaskEnd);
2430 }
2431
2432 // For 64-bit values, not all combinations of rotates and masks are
2433 // available. Produce a rotate-mask-and-insert if one is available.
2434 SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
2435 unsigned RLAmt, bool Repl32, unsigned MaskStart,
2436 unsigned MaskEnd, unsigned *InstCnt = nullptr) {
2437 // In the notation used by the instructions, 'start' and 'end' are reversed
2438 // because bits are counted from high to low order.
2439 unsigned InstMaskStart = 64 - MaskEnd - 1,
2440 InstMaskEnd = 64 - MaskStart - 1;
2441
2442 if (InstCnt) *InstCnt += 1;
2443
2444 if (Repl32) {
2445 // This rotation amount assumes that the lower 32 bits of the quantity
2446 // are replicated in the high 32 bits by the rotation operator (which is
2447 // done by rlwinm and friends).
2448 assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2449 assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2450 SDValue Ops[] =
2451 { ExtendToInt64(V: Base, dl), ExtendToInt64(V, dl), getI32Imm(Imm: RLAmt, dl),
2452 getI32Imm(Imm: InstMaskStart - 32, dl), getI32Imm(Imm: InstMaskEnd - 32, dl) };
2453 return SDValue(CurDAG->getMachineNode(Opcode: PPC::RLWIMI8, dl, VT: MVT::i64,
2454 Ops), 0);
2455 }
2456
2457 if (InstMaskEnd == 63 - RLAmt) {
2458 SDValue Ops[] =
2459 { ExtendToInt64(V: Base, dl), ExtendToInt64(V, dl), getI32Imm(Imm: RLAmt, dl),
2460 getI32Imm(Imm: InstMaskStart, dl) };
2461 return SDValue(CurDAG->getMachineNode(Opcode: PPC::RLDIMI, dl, VT: MVT::i64, Ops), 0);
2462 }
2463
2464 // We cannot do this with a single instruction, so we'll use two. The
2465 // problem is that we're not free to choose both a rotation amount and mask
2466 // start and end independently. We can choose an arbitrary mask start and
2467 // end, but then the rotation amount is fixed. Rotation, however, can be
2468 // inverted, and so by applying an "inverse" rotation first, we can get the
2469 // desired result.
2470 if (InstCnt) *InstCnt += 1;
2471
2472 // The rotation mask for the second instruction must be MaskStart.
2473 unsigned RLAmt2 = MaskStart;
2474 // The first instruction must rotate V so that the overall rotation amount
2475 // is RLAmt.
2476 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2477 if (RLAmt1)
2478 V = SelectRotMask64(V, dl, RLAmt: RLAmt1, Repl32: false, MaskStart: 0, MaskEnd: 63);
2479 return SelectRotMaskIns64(Base, V, dl, RLAmt: RLAmt2, Repl32: false, MaskStart, MaskEnd);
2480 }
2481
2482 void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2483 if (BPermRewriterNoMasking)
2484 return;
2485
2486 // The idea here is the same as in the 32-bit version, but with additional
2487 // complications from the fact that Repl32 might be true. Because we
2488 // aggressively convert bit groups to Repl32 form (which, for small
2489 // rotation factors, involves no other change), and then coalesce, it might
2490 // be the case that a single 64-bit masking operation could handle both
2491 // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
2492 // form allowed coalescing, then we must use a 32-bit rotaton in order to
2493 // completely capture the new combined bit group.
2494
2495 for (ValueRotInfo &VRI : ValueRotsVec) {
2496 uint64_t Mask = 0;
2497
2498 // We need to add to the mask all bits from the associated bit groups.
2499 // If Repl32 is false, we need to add bits from bit groups that have
2500 // Repl32 true, but are trivially convertable to Repl32 false. Such a
2501 // group is trivially convertable if it overlaps only with the lower 32
2502 // bits, and the group has not been coalesced.
2503 auto MatchingBG = [VRI](const BitGroup &BG) {
2504 if (VRI.V != BG.V)
2505 return false;
2506
2507 unsigned EffRLAmt = BG.RLAmt;
2508 if (!VRI.Repl32 && BG.Repl32) {
2509 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
2510 !BG.Repl32Coalesced) {
2511 if (BG.Repl32CR)
2512 EffRLAmt += 32;
2513 } else {
2514 return false;
2515 }
2516 } else if (VRI.Repl32 != BG.Repl32) {
2517 return false;
2518 }
2519
2520 return VRI.RLAmt == EffRLAmt;
2521 };
2522
2523 for (auto &BG : BitGroups) {
2524 if (!MatchingBG(BG))
2525 continue;
2526
2527 if (BG.StartIdx <= BG.EndIdx) {
2528 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
2529 Mask |= (UINT64_C(1) << i);
2530 } else {
2531 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
2532 Mask |= (UINT64_C(1) << i);
2533 for (unsigned i = 0; i <= BG.EndIdx; ++i)
2534 Mask |= (UINT64_C(1) << i);
2535 }
2536 }
2537
2538 // We can use the 32-bit andi/andis technique if the mask does not
2539 // require any higher-order bits. This can save an instruction compared
2540 // to always using the general 64-bit technique.
2541 bool Use32BitInsts = isUInt<32>(x: Mask);
2542 // Compute the masks for andi/andis that would be necessary.
2543 unsigned ANDIMask = (Mask & UINT16_MAX),
2544 ANDISMask = (Mask >> 16) & UINT16_MAX;
2545
2546 bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(x: Mask));
2547
2548 unsigned NumAndInsts = (unsigned) NeedsRotate +
2549 (unsigned) (bool) Res;
2550 unsigned NumOfSelectInsts = 0;
2551 selectI64Imm(CurDAG, dl, Imm: Mask, InstCnt: &NumOfSelectInsts);
2552 assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant.");
2553 if (Use32BitInsts)
2554 NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
2555 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2556 else
2557 NumAndInsts += NumOfSelectInsts + /* and */ 1;
2558
2559 unsigned NumRLInsts = 0;
2560 bool FirstBG = true;
2561 bool MoreBG = false;
2562 for (auto &BG : BitGroups) {
2563 if (!MatchingBG(BG)) {
2564 MoreBG = true;
2565 continue;
2566 }
2567 NumRLInsts +=
2568 SelectRotMask64Count(RLAmt: BG.RLAmt, Repl32: BG.Repl32, MaskStart: BG.StartIdx, MaskEnd: BG.EndIdx,
2569 IsIns: !FirstBG);
2570 FirstBG = false;
2571 }
2572
2573 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2574 << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")
2575 << "\n\t\t\tisel using masking: " << NumAndInsts
2576 << " using rotates: " << NumRLInsts << "\n");
2577
2578 // When we'd use andi/andis, we bias toward using the rotates (andi only
2579 // has a record form, and is cracked on POWER cores). However, when using
2580 // general 64-bit constant formation, bias toward the constant form,
2581 // because that exposes more opportunities for CSE.
2582 if (NumAndInsts > NumRLInsts)
2583 continue;
2584 // When merging multiple bit groups, instruction or is used.
2585 // But when rotate is used, rldimi can inert the rotated value into any
2586 // register, so instruction or can be avoided.
2587 if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
2588 continue;
2589
2590 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2591
2592 if (InstCnt) *InstCnt += NumAndInsts;
2593
2594 SDValue VRot;
2595 // We actually need to generate a rotation if we have a non-zero rotation
2596 // factor or, in the Repl32 case, if we care about any of the
2597 // higher-order replicated bits. In the latter case, we generate a mask
2598 // backward so that it actually includes the entire 64 bits.
2599 if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(x: Mask)))
2600 VRot = SelectRotMask64(V: VRI.V, dl, RLAmt: VRI.RLAmt, Repl32: VRI.Repl32,
2601 MaskStart: VRI.Repl32 ? 31 : 0, MaskEnd: VRI.Repl32 ? 30 : 63);
2602 else
2603 VRot = VRI.V;
2604
2605 SDValue TotalVal;
2606 if (Use32BitInsts) {
2607 assert((ANDIMask != 0 || ANDISMask != 0) &&
2608 "No set bits in mask when using 32-bit ands for 64-bit value");
2609
2610 SDValue ANDIVal, ANDISVal;
2611 if (ANDIMask != 0)
2612 ANDIVal = SDValue(CurDAG->getMachineNode(Opcode: PPC::ANDI8_rec, dl, VT: MVT::i64,
2613 Op1: ExtendToInt64(V: VRot, dl),
2614 Op2: getI32Imm(Imm: ANDIMask, dl)),
2615 0);
2616 if (ANDISMask != 0)
2617 ANDISVal =
2618 SDValue(CurDAG->getMachineNode(Opcode: PPC::ANDIS8_rec, dl, VT: MVT::i64,
2619 Op1: ExtendToInt64(V: VRot, dl),
2620 Op2: getI32Imm(Imm: ANDISMask, dl)),
2621 0);
2622
2623 if (!ANDIVal)
2624 TotalVal = ANDISVal;
2625 else if (!ANDISVal)
2626 TotalVal = ANDIVal;
2627 else
2628 TotalVal = SDValue(CurDAG->getMachineNode(Opcode: PPC::OR8, dl, VT: MVT::i64,
2629 Op1: ExtendToInt64(V: ANDIVal, dl), Op2: ANDISVal), 0);
2630 } else {
2631 TotalVal = SDValue(selectI64Imm(CurDAG, dl, Imm: Mask), 0);
2632 TotalVal =
2633 SDValue(CurDAG->getMachineNode(Opcode: PPC::AND8, dl, VT: MVT::i64,
2634 Op1: ExtendToInt64(V: VRot, dl), Op2: TotalVal),
2635 0);
2636 }
2637
2638 if (!Res)
2639 Res = TotalVal;
2640 else
2641 Res = SDValue(CurDAG->getMachineNode(Opcode: PPC::OR8, dl, VT: MVT::i64,
2642 Op1: ExtendToInt64(V: Res, dl), Op2: TotalVal),
2643 0);
2644
2645 // Now, remove all groups with this underlying value and rotation
2646 // factor.
2647 eraseMatchingBitGroups(F: MatchingBG);
2648 }
2649 }
2650
2651 // Instruction selection for the 64-bit case.
2652 SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
2653 SDLoc dl(N);
2654 SDValue Res;
2655
2656 if (InstCnt) *InstCnt = 0;
2657
2658 // Take care of cases that should use andi/andis first.
2659 SelectAndParts64(dl, Res, InstCnt);
2660
2661 // If we've not yet selected a 'starting' instruction, and we have no zeros
2662 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2663 // number of groups), and start with this rotated value.
2664 if ((!NeedMask || LateMask) && !Res) {
2665 // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2666 // groups will come first, and so the VRI representing the largest number
2667 // of groups might not be first (it might be the first Repl32 groups).
2668 unsigned MaxGroupsIdx = 0;
2669 if (!ValueRotsVec[0].Repl32) {
2670 for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
2671 if (ValueRotsVec[i].Repl32) {
2672 if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
2673 MaxGroupsIdx = i;
2674 break;
2675 }
2676 }
2677
2678 ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
2679 bool NeedsRotate = false;
2680 if (VRI.RLAmt) {
2681 NeedsRotate = true;
2682 } else if (VRI.Repl32) {
2683 for (auto &BG : BitGroups) {
2684 if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
2685 BG.Repl32 != VRI.Repl32)
2686 continue;
2687
2688 // We don't need a rotate if the bit group is confined to the lower
2689 // 32 bits.
2690 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
2691 continue;
2692
2693 NeedsRotate = true;
2694 break;
2695 }
2696 }
2697
2698 if (NeedsRotate)
2699 Res = SelectRotMask64(V: VRI.V, dl, RLAmt: VRI.RLAmt, Repl32: VRI.Repl32,
2700 MaskStart: VRI.Repl32 ? 31 : 0, MaskEnd: VRI.Repl32 ? 30 : 63,
2701 InstCnt);
2702 else
2703 Res = VRI.V;
2704
2705 // Now, remove all groups with this underlying value and rotation factor.
2706 if (Res)
2707 eraseMatchingBitGroups(F: [VRI](const BitGroup &BG) {
2708 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
2709 BG.Repl32 == VRI.Repl32;
2710 });
2711 }
2712
2713 // Because 64-bit rotates are more flexible than inserts, we might have a
2714 // preference regarding which one we do first (to save one instruction).
2715 if (!Res)
2716 for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
2717 if (SelectRotMask64Count(RLAmt: I->RLAmt, Repl32: I->Repl32, MaskStart: I->StartIdx, MaskEnd: I->EndIdx,
2718 IsIns: false) <
2719 SelectRotMask64Count(RLAmt: I->RLAmt, Repl32: I->Repl32, MaskStart: I->StartIdx, MaskEnd: I->EndIdx,
2720 IsIns: true)) {
2721 if (I != BitGroups.begin()) {
2722 BitGroup BG = *I;
2723 BitGroups.erase(CI: I);
2724 BitGroups.insert(I: BitGroups.begin(), Elt: BG);
2725 }
2726
2727 break;
2728 }
2729 }
2730
2731 // Insert the other groups (one at a time).
2732 for (auto &BG : BitGroups) {
2733 if (!Res)
2734 Res = SelectRotMask64(V: BG.V, dl, RLAmt: BG.RLAmt, Repl32: BG.Repl32, MaskStart: BG.StartIdx,
2735 MaskEnd: BG.EndIdx, InstCnt);
2736 else
2737 Res = SelectRotMaskIns64(Base: Res, V: BG.V, dl, RLAmt: BG.RLAmt, Repl32: BG.Repl32,
2738 MaskStart: BG.StartIdx, MaskEnd: BG.EndIdx, InstCnt);
2739 }
2740
2741 if (LateMask) {
2742 uint64_t Mask = getZerosMask();
2743
2744 // We can use the 32-bit andi/andis technique if the mask does not
2745 // require any higher-order bits. This can save an instruction compared
2746 // to always using the general 64-bit technique.
2747 bool Use32BitInsts = isUInt<32>(x: Mask);
2748 // Compute the masks for andi/andis that would be necessary.
2749 unsigned ANDIMask = (Mask & UINT16_MAX),
2750 ANDISMask = (Mask >> 16) & UINT16_MAX;
2751
2752 if (Use32BitInsts) {
2753 assert((ANDIMask != 0 || ANDISMask != 0) &&
2754 "No set bits in mask when using 32-bit ands for 64-bit value");
2755
2756 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2757 (unsigned) (ANDISMask != 0) +
2758 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2759
2760 SDValue ANDIVal, ANDISVal;
2761 if (ANDIMask != 0)
2762 ANDIVal = SDValue(CurDAG->getMachineNode(Opcode: PPC::ANDI8_rec, dl, VT: MVT::i64,
2763 Op1: ExtendToInt64(V: Res, dl),
2764 Op2: getI32Imm(Imm: ANDIMask, dl)),
2765 0);
2766 if (ANDISMask != 0)
2767 ANDISVal =
2768 SDValue(CurDAG->getMachineNode(Opcode: PPC::ANDIS8_rec, dl, VT: MVT::i64,
2769 Op1: ExtendToInt64(V: Res, dl),
2770 Op2: getI32Imm(Imm: ANDISMask, dl)),
2771 0);
2772
2773 if (!ANDIVal)
2774 Res = ANDISVal;
2775 else if (!ANDISVal)
2776 Res = ANDIVal;
2777 else
2778 Res = SDValue(CurDAG->getMachineNode(Opcode: PPC::OR8, dl, VT: MVT::i64,
2779 Op1: ExtendToInt64(V: ANDIVal, dl), Op2: ANDISVal), 0);
2780 } else {
2781 unsigned NumOfSelectInsts = 0;
2782 SDValue MaskVal =
2783 SDValue(selectI64Imm(CurDAG, dl, Imm: Mask, InstCnt: &NumOfSelectInsts), 0);
2784 Res = SDValue(CurDAG->getMachineNode(Opcode: PPC::AND8, dl, VT: MVT::i64,
2785 Op1: ExtendToInt64(V: Res, dl), Op2: MaskVal),
2786 0);
2787 if (InstCnt)
2788 *InstCnt += NumOfSelectInsts + /* and */ 1;
2789 }
2790 }
2791
2792 return Res.getNode();
2793 }
2794
2795 SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
2796 // Fill in BitGroups.
2797 collectBitGroups(LateMask);
2798 if (BitGroups.empty())
2799 return nullptr;
2800
2801 // For 64-bit values, figure out when we can use 32-bit instructions.
2802 if (Bits.size() == 64)
2803 assignRepl32BitGroups();
2804
2805 // Fill in ValueRotsVec.
2806 collectValueRotInfo();
2807
2808 if (Bits.size() == 32) {
2809 return Select32(N, LateMask, InstCnt);
2810 } else {
2811 assert(Bits.size() == 64 && "Not 64 bits here?");
2812 return Select64(N, LateMask, InstCnt);
2813 }
2814
2815 return nullptr;
2816 }
2817
2818 void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
2819 erase_if(C&: BitGroups, P: F);
2820 }
2821
2822 SmallVector<ValueBit, 64> Bits;
2823
2824 bool NeedMask = false;
2825 SmallVector<unsigned, 64> RLAmt;
2826
2827 SmallVector<BitGroup, 16> BitGroups;
2828
2829 DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
2830 SmallVector<ValueRotInfo, 16> ValueRotsVec;
2831
2832 SelectionDAG *CurDAG = nullptr;
2833
2834public:
2835 BitPermutationSelector(SelectionDAG *DAG)
2836 : CurDAG(DAG) {}
2837
2838 // Here we try to match complex bit permutations into a set of
2839 // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2840 // known to produce optimal code for common cases (like i32 byte swapping).
2841 SDNode *Select(SDNode *N) {
2842 Memoizer.clear();
2843 auto Result =
2844 getValueBits(V: SDValue(N, 0), NumBits: N->getValueType(ResNo: 0).getSizeInBits());
2845 if (!Result.first)
2846 return nullptr;
2847 Bits = std::move(*Result.second);
2848
2849 LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2850 " selection for: ");
2851 LLVM_DEBUG(N->dump(CurDAG));
2852
2853 // Fill it RLAmt and set NeedMask.
2854 computeRotationAmounts();
2855
2856 if (!NeedMask)
2857 return Select(N, LateMask: false);
2858
2859 // We currently have two techniques for handling results with zeros: early
2860 // masking (the default) and late masking. Late masking is sometimes more
2861 // efficient, but because the structure of the bit groups is different, it
2862 // is hard to tell without generating both and comparing the results. With
2863 // late masking, we ignore zeros in the resulting value when inserting each
2864 // set of bit groups, and then mask in the zeros at the end. With early
2865 // masking, we only insert the non-zero parts of the result at every step.
2866
2867 unsigned InstCnt = 0, InstCntLateMask = 0;
2868 LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
2869 SDNode *RN = Select(N, LateMask: false, InstCnt: &InstCnt);
2870 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
2871
2872 LLVM_DEBUG(dbgs() << "\tLate masking:\n");
2873 SDNode *RNLM = Select(N, LateMask: true, InstCnt: &InstCntLateMask);
2874 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
2875 << " instructions\n");
2876
2877 if (InstCnt <= InstCntLateMask) {
2878 LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2879 return RN;
2880 }
2881
2882 LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2883 return RNLM;
2884 }
2885};
2886
2887class IntegerCompareEliminator {
2888 SelectionDAG *CurDAG;
2889 PPCDAGToDAGISel *S;
2890 // Conversion type for interpreting results of a 32-bit instruction as
2891 // a 64-bit value or vice versa.
2892 enum ExtOrTruncConversion { Ext, Trunc };
2893
2894 // Modifiers to guide how an ISD::SETCC node's result is to be computed
2895 // in a GPR.
2896 // ZExtOrig - use the original condition code, zero-extend value
2897 // ZExtInvert - invert the condition code, zero-extend value
2898 // SExtOrig - use the original condition code, sign-extend value
2899 // SExtInvert - invert the condition code, sign-extend value
2900 enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2901
2902 // Comparisons against zero to emit GPR code sequences for. Each of these
2903 // sequences may need to be emitted for two or more equivalent patterns.
2904 // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2905 // matters as well as the extension type: sext (-1/0), zext (1/0).
2906 // GEZExt - (zext (LHS >= 0))
2907 // GESExt - (sext (LHS >= 0))
2908 // LEZExt - (zext (LHS <= 0))
2909 // LESExt - (sext (LHS <= 0))
2910 enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2911
2912 SDNode *tryEXTEND(SDNode *N);
2913 SDNode *tryLogicOpOfCompares(SDNode *N);
2914 SDValue computeLogicOpInGPR(SDValue LogicOp);
2915 SDValue signExtendInputIfNeeded(SDValue Input);
2916 SDValue zeroExtendInputIfNeeded(SDValue Input);
2917 SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
2918 SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2919 ZeroCompare CmpTy);
2920 SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2921 int64_t RHSValue, SDLoc dl);
2922 SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2923 int64_t RHSValue, SDLoc dl);
2924 SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2925 int64_t RHSValue, SDLoc dl);
2926 SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2927 int64_t RHSValue, SDLoc dl);
2928 SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
2929
2930public:
2931 IntegerCompareEliminator(SelectionDAG *DAG,
2932 PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
2933 assert(CurDAG->getTargetLoweringInfo()
2934 .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
2935 "Only expecting to use this on 64 bit targets.");
2936 }
2937 SDNode *Select(SDNode *N) {
2938 if (CmpInGPR == ICGPR_None)
2939 return nullptr;
2940 switch (N->getOpcode()) {
2941 default: break;
2942 case ISD::ZERO_EXTEND:
2943 if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 ||
2944 CmpInGPR == ICGPR_SextI64)
2945 return nullptr;
2946 [[fallthrough]];
2947 case ISD::SIGN_EXTEND:
2948 if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 ||
2949 CmpInGPR == ICGPR_ZextI64)
2950 return nullptr;
2951 return tryEXTEND(N);
2952 case ISD::AND:
2953 case ISD::OR:
2954 case ISD::XOR:
2955 return tryLogicOpOfCompares(N);
2956 }
2957 return nullptr;
2958 }
2959};
2960
2961// The obvious case for wanting to keep the value in a GPR. Namely, the
2962// result of the comparison is actually needed in a GPR.
2963SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
2964 assert((N->getOpcode() == ISD::ZERO_EXTEND ||
2965 N->getOpcode() == ISD::SIGN_EXTEND) &&
2966 "Expecting a zero/sign extend node!");
2967 SDValue WideRes;
2968 // If we are zero-extending the result of a logical operation on i1
2969 // values, we can keep the values in GPRs.
2970 if (ISD::isBitwiseLogicOp(Opcode: N->getOperand(Num: 0).getOpcode()) &&
2971 N->getOperand(Num: 0).getValueType() == MVT::i1 &&
2972 N->getOpcode() == ISD::ZERO_EXTEND)
2973 WideRes = computeLogicOpInGPR(LogicOp: N->getOperand(Num: 0));
2974 else if (N->getOperand(Num: 0).getOpcode() != ISD::SETCC)
2975 return nullptr;
2976 else
2977 WideRes =
2978 getSETCCInGPR(Compare: N->getOperand(Num: 0),
2979 ConvOpts: N->getOpcode() == ISD::SIGN_EXTEND ?
2980 SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
2981
2982 if (!WideRes)
2983 return nullptr;
2984
2985 bool Input32Bit = WideRes.getValueType() == MVT::i32;
2986 bool Output32Bit = N->getValueType(ResNo: 0) == MVT::i32;
2987
2988 NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
2989 NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
2990
2991 SDValue ConvOp = WideRes;
2992 if (Input32Bit != Output32Bit)
2993 ConvOp = addExtOrTrunc(NatWidthRes: WideRes, Conv: Input32Bit ? ExtOrTruncConversion::Ext :
2994 ExtOrTruncConversion::Trunc);
2995 return ConvOp.getNode();
2996}
2997
2998// Attempt to perform logical operations on the results of comparisons while
2999// keeping the values in GPRs. Without doing so, these would end up being
3000// lowered to CR-logical operations which suffer from significant latency and
3001// low ILP.
3002SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
3003 if (N->getValueType(ResNo: 0) != MVT::i1)
3004 return nullptr;
3005 assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&
3006 "Expected a logic operation on setcc results.");
3007 SDValue LoweredLogical = computeLogicOpInGPR(LogicOp: SDValue(N, 0));
3008 if (!LoweredLogical)
3009 return nullptr;
3010
3011 SDLoc dl(N);
3012 bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
3013 unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
3014 SDValue CR0Reg = CurDAG->getRegister(Reg: PPC::CR0, VT: MVT::i32);
3015 SDValue LHS = LoweredLogical.getOperand(i: 0);
3016 SDValue RHS = LoweredLogical.getOperand(i: 1);
3017 SDValue WideOp;
3018 SDValue OpToConvToRecForm;
3019
3020 // Look through any 32-bit to 64-bit implicit extend nodes to find the
3021 // opcode that is input to the XORI.
3022 if (IsBitwiseNegate &&
3023 LoweredLogical.getOperand(i: 0).getMachineOpcode() == PPC::INSERT_SUBREG)
3024 OpToConvToRecForm = LoweredLogical.getOperand(i: 0).getOperand(i: 1);
3025 else if (IsBitwiseNegate)
3026 // If the input to the XORI isn't an extension, that's what we're after.
3027 OpToConvToRecForm = LoweredLogical.getOperand(i: 0);
3028 else
3029 // If this is not an XORI, it is a reg-reg logical op and we can convert
3030 // it to record-form.
3031 OpToConvToRecForm = LoweredLogical;
3032
3033 // Get the record-form version of the node we're looking to use to get the
3034 // CR result from.
3035 uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
3036 int NewOpc = PPCInstrInfo::getRecordFormOpcode(Opcode: NonRecOpc);
3037
3038 // Convert the right node to record-form. This is either the logical we're
3039 // looking at or it is the input node to the negation (if we're looking at
3040 // a bitwise negation).
3041 if (NewOpc != -1 && IsBitwiseNegate) {
3042 // The input to the XORI has a record-form. Use it.
3043 assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
3044 "Expected a PPC::XORI8 only for bitwise negation.");
3045 // Emit the record-form instruction.
3046 std::vector<SDValue> Ops;
3047 for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
3048 Ops.push_back(x: OpToConvToRecForm.getOperand(i));
3049
3050 WideOp =
3051 SDValue(CurDAG->getMachineNode(Opcode: NewOpc, dl,
3052 VT1: OpToConvToRecForm.getValueType(),
3053 VT2: MVT::Glue, Ops), 0);
3054 } else {
3055 assert((NewOpc != -1 || !IsBitwiseNegate) &&
3056 "No record form available for AND8/OR8/XOR8?");
3057 WideOp =
3058 SDValue(CurDAG->getMachineNode(Opcode: NewOpc == -1 ? PPC::ANDI8_rec : NewOpc,
3059 dl, VT1: MVT::i64, VT2: MVT::Glue, Op1: LHS, Op2: RHS),
3060 0);
3061 }
3062
3063 // Select this node to a single bit from CR0 set by the record-form node
3064 // just created. For bitwise negation, use the EQ bit which is the equivalent
3065 // of negating the result (i.e. it is a bit set when the result of the
3066 // operation is zero).
3067 SDValue SRIdxVal =
3068 CurDAG->getTargetConstant(Val: SubRegToExtract, DL: dl, VT: MVT::i32);
3069 SDValue CRBit =
3070 SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl,
3071 VT: MVT::i1, Op1: CR0Reg, Op2: SRIdxVal,
3072 Op3: WideOp.getValue(R: 1)), 0);
3073 return CRBit.getNode();
3074}
3075
3076// Lower a logical operation on i1 values into a GPR sequence if possible.
3077// The result can be kept in a GPR if requested.
3078// Three types of inputs can be handled:
3079// - SETCC
3080// - TRUNCATE
3081// - Logical operation (AND/OR/XOR)
3082// There is also a special case that is handled (namely a complement operation
3083// achieved with xor %a, -1).
3084SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
3085 assert(ISD::isBitwiseLogicOp(LogicOp.getOpcode()) &&
3086 "Can only handle logic operations here.");
3087 assert(LogicOp.getValueType() == MVT::i1 &&
3088 "Can only handle logic operations on i1 values here.");
3089 SDLoc dl(LogicOp);
3090 SDValue LHS, RHS;
3091
3092 // Special case: xor %a, -1
3093 bool IsBitwiseNegation = isBitwiseNot(V: LogicOp);
3094
3095 // Produces a GPR sequence for each operand of the binary logic operation.
3096 // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
3097 // the value in a GPR and for logic operations, it will recursively produce
3098 // a GPR sequence for the operation.
3099 auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
3100 unsigned OperandOpcode = Operand.getOpcode();
3101 if (OperandOpcode == ISD::SETCC)
3102 return getSETCCInGPR(Compare: Operand, ConvOpts: SetccInGPROpts::ZExtOrig);
3103 else if (OperandOpcode == ISD::TRUNCATE) {
3104 SDValue InputOp = Operand.getOperand(i: 0);
3105 EVT InVT = InputOp.getValueType();
3106 return SDValue(CurDAG->getMachineNode(Opcode: InVT == MVT::i32 ? PPC::RLDICL_32 :
3107 PPC::RLDICL, dl, VT: InVT, Op1: InputOp,
3108 Op2: S->getI64Imm(Imm: 0, dl),
3109 Op3: S->getI64Imm(Imm: 63, dl)), 0);
3110 } else if (ISD::isBitwiseLogicOp(Opcode: OperandOpcode))
3111 return computeLogicOpInGPR(LogicOp: Operand);
3112 return SDValue();
3113 };
3114 LHS = getLogicOperand(LogicOp.getOperand(i: 0));
3115 RHS = getLogicOperand(LogicOp.getOperand(i: 1));
3116
3117 // If a GPR sequence can't be produced for the LHS we can't proceed.
3118 // Not producing a GPR sequence for the RHS is only a problem if this isn't
3119 // a bitwise negation operation.
3120 if (!LHS || (!RHS && !IsBitwiseNegation))
3121 return SDValue();
3122
3123 NumLogicOpsOnComparison++;
3124
3125 // We will use the inputs as 64-bit values.
3126 if (LHS.getValueType() == MVT::i32)
3127 LHS = addExtOrTrunc(NatWidthRes: LHS, Conv: ExtOrTruncConversion::Ext);
3128 if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
3129 RHS = addExtOrTrunc(NatWidthRes: RHS, Conv: ExtOrTruncConversion::Ext);
3130
3131 unsigned NewOpc;
3132 switch (LogicOp.getOpcode()) {
3133 default: llvm_unreachable("Unknown logic operation.");
3134 case ISD::AND: NewOpc = PPC::AND8; break;
3135 case ISD::OR: NewOpc = PPC::OR8; break;
3136 case ISD::XOR: NewOpc = PPC::XOR8; break;
3137 }
3138
3139 if (IsBitwiseNegation) {
3140 RHS = S->getI64Imm(Imm: 1, dl);
3141 NewOpc = PPC::XORI8;
3142 }
3143
3144 return SDValue(CurDAG->getMachineNode(Opcode: NewOpc, dl, VT: MVT::i64, Op1: LHS, Op2: RHS), 0);
3145
3146}
3147
3148/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
3149/// Otherwise just reinterpret it as a 64-bit value.
3150/// Useful when emitting comparison code for 32-bit values without using
3151/// the compare instruction (which only considers the lower 32-bits).
3152SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
3153 assert(Input.getValueType() == MVT::i32 &&
3154 "Can only sign-extend 32-bit values here.");
3155 unsigned Opc = Input.getOpcode();
3156
3157 // The value was sign extended and then truncated to 32-bits. No need to
3158 // sign extend it again.
3159 if (Opc == ISD::TRUNCATE &&
3160 (Input.getOperand(i: 0).getOpcode() == ISD::AssertSext ||
3161 Input.getOperand(i: 0).getOpcode() == ISD::SIGN_EXTEND))
3162 return addExtOrTrunc(NatWidthRes: Input, Conv: ExtOrTruncConversion::Ext);
3163
3164 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Val&: Input);
3165 // The input is a sign-extending load. All ppc sign-extending loads
3166 // sign-extend to the full 64-bits.
3167 if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
3168 return addExtOrTrunc(NatWidthRes: Input, Conv: ExtOrTruncConversion::Ext);
3169
3170 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Val&: Input);
3171 // We don't sign-extend constants.
3172 if (InputConst)
3173 return addExtOrTrunc(NatWidthRes: Input, Conv: ExtOrTruncConversion::Ext);
3174
3175 SDLoc dl(Input);
3176 SignExtensionsAdded++;
3177 return SDValue(CurDAG->getMachineNode(Opcode: PPC::EXTSW_32_64, dl,
3178 VT: MVT::i64, Op1: Input), 0);
3179}
3180
3181/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
3182/// Otherwise just reinterpret it as a 64-bit value.
3183/// Useful when emitting comparison code for 32-bit values without using
3184/// the compare instruction (which only considers the lower 32-bits).
3185SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
3186 assert(Input.getValueType() == MVT::i32 &&
3187 "Can only zero-extend 32-bit values here.");
3188 unsigned Opc = Input.getOpcode();
3189
3190 // The only condition under which we can omit the actual extend instruction:
3191 // - The value is a positive constant
3192 // - The value comes from a load that isn't a sign-extending load
3193 // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
3194 bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
3195 (Input.getOperand(i: 0).getOpcode() == ISD::AssertZext ||
3196 Input.getOperand(i: 0).getOpcode() == ISD::ZERO_EXTEND);
3197 if (IsTruncateOfZExt)
3198 return addExtOrTrunc(NatWidthRes: Input, Conv: ExtOrTruncConversion::Ext);
3199
3200 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Val&: Input);
3201 if (InputConst && InputConst->getSExtValue() >= 0)
3202 return addExtOrTrunc(NatWidthRes: Input, Conv: ExtOrTruncConversion::Ext);
3203
3204 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Val&: Input);
3205 // The input is a load that doesn't sign-extend (it will be zero-extended).
3206 if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
3207 return addExtOrTrunc(NatWidthRes: Input, Conv: ExtOrTruncConversion::Ext);
3208
3209 // None of the above, need to zero-extend.
3210 SDLoc dl(Input);
3211 ZeroExtensionsAdded++;
3212 return SDValue(CurDAG->getMachineNode(Opcode: PPC::RLDICL_32_64, dl, VT: MVT::i64, Op1: Input,
3213 Op2: S->getI64Imm(Imm: 0, dl),
3214 Op3: S->getI64Imm(Imm: 32, dl)), 0);
3215}
3216
3217// Handle a 32-bit value in a 64-bit register and vice-versa. These are of
3218// course not actual zero/sign extensions that will generate machine code,
3219// they're just a way to reinterpret a 32 bit value in a register as a
3220// 64 bit value and vice-versa.
3221SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
3222 ExtOrTruncConversion Conv) {
3223 SDLoc dl(NatWidthRes);
3224
3225 // For reinterpreting 32-bit values as 64 bit values, we generate
3226 // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
3227 if (Conv == ExtOrTruncConversion::Ext) {
3228 SDValue ImDef(CurDAG->getMachineNode(Opcode: PPC::IMPLICIT_DEF, dl, VT: MVT::i64), 0);
3229 SDValue SubRegIdx =
3230 CurDAG->getTargetConstant(Val: PPC::sub_32, DL: dl, VT: MVT::i32);
3231 return SDValue(CurDAG->getMachineNode(Opcode: PPC::INSERT_SUBREG, dl, VT: MVT::i64,
3232 Op1: ImDef, Op2: NatWidthRes, Op3: SubRegIdx), 0);
3233 }
3234
3235 assert(Conv == ExtOrTruncConversion::Trunc &&
3236 "Unknown convertion between 32 and 64 bit values.");
3237 // For reinterpreting 64-bit values as 32-bit values, we just need to
3238 // EXTRACT_SUBREG (i.e. extract the low word).
3239 SDValue SubRegIdx =
3240 CurDAG->getTargetConstant(Val: PPC::sub_32, DL: dl, VT: MVT::i32);
3241 return SDValue(CurDAG->getMachineNode(Opcode: PPC::EXTRACT_SUBREG, dl, VT: MVT::i32,
3242 Op1: NatWidthRes, Op2: SubRegIdx), 0);
3243}
3244
3245// Produce a GPR sequence for compound comparisons (<=, >=) against zero.
3246// Handle both zero-extensions and sign-extensions.
3247SDValue
3248IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
3249 ZeroCompare CmpTy) {
3250 EVT InVT = LHS.getValueType();
3251 bool Is32Bit = InVT == MVT::i32;
3252 SDValue ToExtend;
3253
3254 // Produce the value that needs to be either zero or sign extended.
3255 switch (CmpTy) {
3256 case ZeroCompare::GEZExt:
3257 case ZeroCompare::GESExt:
3258 ToExtend = SDValue(CurDAG->getMachineNode(Opcode: Is32Bit ? PPC::NOR : PPC::NOR8,
3259 dl, VT: InVT, Op1: LHS, Op2: LHS), 0);
3260 break;
3261 case ZeroCompare::LEZExt:
3262 case ZeroCompare::LESExt: {
3263 if (Is32Bit) {
3264 // Upper 32 bits cannot be undefined for this sequence.
3265 LHS = signExtendInputIfNeeded(Input: LHS);
3266 SDValue Neg =
3267 SDValue(CurDAG->getMachineNode(Opcode: PPC::NEG8, dl, VT: MVT::i64, Op1: LHS), 0);
3268 ToExtend =
3269 SDValue(CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64,
3270 Op1: Neg, Op2: S->getI64Imm(Imm: 1, dl),
3271 Op3: S->getI64Imm(Imm: 63, dl)), 0);
3272 } else {
3273 SDValue Addi =
3274 SDValue(CurDAG->getMachineNode(Opcode: PPC::ADDI8, dl, VT: MVT::i64, Op1: LHS,
3275 Op2: S->getI64Imm(Imm: ~0ULL, dl)), 0);
3276 ToExtend = SDValue(CurDAG->getMachineNode(Opcode: PPC::OR8, dl, VT: MVT::i64,
3277 Op1: Addi, Op2: LHS), 0);
3278 }
3279 break;
3280 }
3281 }
3282
3283 // For 64-bit sequences, the extensions are the same for the GE/LE cases.
3284 if (!Is32Bit &&
3285 (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
3286 return SDValue(CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64,
3287 Op1: ToExtend, Op2: S->getI64Imm(Imm: 1, dl),
3288 Op3: S->getI64Imm(Imm: 63, dl)), 0);
3289 if (!Is32Bit &&
3290 (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
3291 return SDValue(CurDAG->getMachineNode(Opcode: PPC::SRADI, dl, VT: MVT::i64, Op1: ToExtend,
3292 Op2: S->getI64Imm(Imm: 63, dl)), 0);
3293
3294 assert(Is32Bit && "Should have handled the 32-bit sequences above.");
3295 // For 32-bit sequences, the extensions differ between GE/LE cases.
3296 switch (CmpTy) {
3297 case ZeroCompare::GEZExt: {
3298 SDValue ShiftOps[] = { ToExtend, S->getI32Imm(Imm: 1, dl), S->getI32Imm(Imm: 31, dl),
3299 S->getI32Imm(Imm: 31, dl) };
3300 return SDValue(CurDAG->getMachineNode(Opcode: PPC::RLWINM, dl, VT: MVT::i32,
3301 Ops: ShiftOps), 0);
3302 }
3303 case ZeroCompare::GESExt:
3304 return SDValue(CurDAG->getMachineNode(Opcode: PPC::SRAWI, dl, VT: MVT::i32, Op1: ToExtend,
3305 Op2: S->getI32Imm(Imm: 31, dl)), 0);
3306 case ZeroCompare::LEZExt:
3307 return SDValue(CurDAG->getMachineNode(Opcode: PPC::XORI8, dl, VT: MVT::i64, Op1: ToExtend,
3308 Op2: S->getI32Imm(Imm: 1, dl)), 0);
3309 case ZeroCompare::LESExt:
3310 return SDValue(CurDAG->getMachineNode(Opcode: PPC::ADDI8, dl, VT: MVT::i64, Op1: ToExtend,
3311 Op2: S->getI32Imm(Imm: -1, dl)), 0);
3312 }
3313
3314 // The above case covers all the enumerators so it can't have a default clause
3315 // to avoid compiler warnings.
3316 llvm_unreachable("Unknown zero-comparison type.");
3317}
3318
3319/// Produces a zero-extended result of comparing two 32-bit values according to
3320/// the passed condition code.
3321SDValue
3322IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
3323 ISD::CondCode CC,
3324 int64_t RHSValue, SDLoc dl) {
3325 if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
3326 CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext)
3327 return SDValue();
3328 bool IsRHSZero = RHSValue == 0;
3329 bool IsRHSOne = RHSValue == 1;
3330 bool IsRHSNegOne = RHSValue == -1LL;
3331 switch (CC) {
3332 default: return SDValue();
3333 case ISD::SETEQ: {
3334 // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
3335 // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
3336 SDValue Xor = IsRHSZero ? LHS :
3337 SDValue(CurDAG->getMachineNode(Opcode: PPC::XOR, dl, VT: MVT::i32, Op1: LHS, Op2: RHS), 0);
3338 SDValue Clz =
3339 SDValue(CurDAG->getMachineNode(Opcode: PPC::CNTLZW, dl, VT: MVT::i32, Op1: Xor), 0);
3340 SDValue ShiftOps[] = { Clz, S->getI32Imm(Imm: 27, dl), S->getI32Imm(Imm: 5, dl),
3341 S->getI32Imm(Imm: 31, dl) };
3342 return SDValue(CurDAG->getMachineNode(Opcode: PPC::RLWINM, dl, VT: MVT::i32,
3343 Ops: ShiftOps), 0);
3344 }
3345 case ISD::SETNE: {
3346 // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
3347 // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
3348 SDValue Xor = IsRHSZero ? LHS :
3349 SDValue(CurDAG->getMachineNode(Opcode: PPC::XOR, dl, VT: MVT::i32, Op1: LHS, Op2: RHS), 0);
3350 SDValue Clz =
3351 SDValue(CurDAG->getMachineNode(Opcode: PPC::CNTLZW, dl, VT: MVT::i32, Op1: Xor), 0);
3352 SDValue ShiftOps[] = { Clz, S->getI32Imm(Imm: 27, dl), S->getI32Imm(Imm: 5, dl),
3353 S->getI32Imm(Imm: 31, dl) };
3354 SDValue Shift =
3355 SDValue(CurDAG->getMachineNode(Opcode: PPC::RLWINM, dl, VT: MVT::i32, Ops: ShiftOps), 0);
3356 return SDValue(CurDAG->getMachineNode(Opcode: PPC::XORI, dl, VT: MVT::i32, Op1: Shift,
3357 Op2: S->getI32Imm(Imm: 1, dl)), 0);
3358 }
3359 case ISD::SETGE: {
3360 // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
3361 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
3362 if(IsRHSZero)
3363 return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::GEZExt);
3364
3365 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3366 // by swapping inputs and falling through.
3367 std::swap(a&: LHS, b&: RHS);
3368 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(Val&: RHS);
3369 IsRHSZero = RHSConst && RHSConst->isZero();
3370 [[fallthrough]];
3371 }
3372 case ISD::SETLE: {
3373 if (CmpInGPR == ICGPR_NonExtIn)
3374 return SDValue();
3375 // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
3376 // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
3377 if(IsRHSZero) {
3378 if (CmpInGPR == ICGPR_NonExtIn)
3379 return SDValue();
3380 return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::LEZExt);
3381 }
3382
3383 // The upper 32-bits of the register can't be undefined for this sequence.
3384 LHS = signExtendInputIfNeeded(Input: LHS);
3385 RHS = signExtendInputIfNeeded(Input: RHS);
3386 SDValue Sub =
3387 SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBF8, dl, VT: MVT::i64, Op1: LHS, Op2: RHS), 0);
3388 SDValue Shift =
3389 SDValue(CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64, Op1: Sub,
3390 Op2: S->getI64Imm(Imm: 1, dl), Op3: S->getI64Imm(Imm: 63, dl)),
3391 0);
3392 return
3393 SDValue(CurDAG->getMachineNode(Opcode: PPC::XORI8, dl,
3394 VT: MVT::i64, Op1: Shift, Op2: S->getI32Imm(Imm: 1, dl)), 0);
3395 }
3396 case ISD::SETGT: {
3397 // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
3398 // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
3399 // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
3400 // Handle SETLT -1 (which is equivalent to SETGE 0).
3401 if (IsRHSNegOne)
3402 return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::GEZExt);
3403
3404 if (IsRHSZero) {
3405 if (CmpInGPR == ICGPR_NonExtIn)
3406 return SDValue();
3407 // The upper 32-bits of the register can't be undefined for this sequence.
3408 LHS = signExtendInputIfNeeded(Input: LHS);
3409 RHS = signExtendInputIfNeeded(Input: RHS);
3410 SDValue Neg =
3411 SDValue(CurDAG->getMachineNode(Opcode: PPC::NEG8, dl, VT: MVT::i64, Op1: LHS), 0);
3412 return SDValue(CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64,
3413 Op1: Neg, Op2: S->getI32Imm(Imm: 1, dl), Op3: S->getI32Imm(Imm: 63, dl)), 0);
3414 }
3415 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3416 // (%b < %a) by swapping inputs and falling through.
3417 std::swap(a&: LHS, b&: RHS);
3418 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(Val&: RHS);
3419 IsRHSZero = RHSConst && RHSConst->isZero();
3420 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3421 [[fallthrough]];
3422 }
3423 case ISD::SETLT: {
3424 // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
3425 // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
3426 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
3427 // Handle SETLT 1 (which is equivalent to SETLE 0).
3428 if (IsRHSOne) {
3429 if (CmpInGPR == ICGPR_NonExtIn)
3430 return SDValue();
3431 return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::LEZExt);
3432 }
3433
3434 if (IsRHSZero) {
3435 SDValue ShiftOps[] = { LHS, S->getI32Imm(Imm: 1, dl), S->getI32Imm(Imm: 31, dl),
3436 S->getI32Imm(Imm: 31, dl) };
3437 return SDValue(CurDAG->getMachineNode(Opcode: PPC::RLWINM, dl, VT: MVT::i32,
3438 Ops: ShiftOps), 0);
3439 }
3440
3441 if (CmpInGPR == ICGPR_NonExtIn)
3442 return SDValue();
3443 // The upper 32-bits of the register can't be undefined for this sequence.
3444 LHS = signExtendInputIfNeeded(Input: LHS);
3445 RHS = signExtendInputIfNeeded(Input: RHS);
3446 SDValue SUBFNode =
3447 SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBF8, dl, VT: MVT::i64, Op1: RHS, Op2: LHS), 0);
3448 return SDValue(CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64,
3449 Op1: SUBFNode, Op2: S->getI64Imm(Imm: 1, dl),
3450 Op3: S->getI64Imm(Imm: 63, dl)), 0);
3451 }
3452 case ISD::SETUGE:
3453 // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
3454 // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
3455 std::swap(a&: LHS, b&: RHS);
3456 [[fallthrough]];
3457 case ISD::SETULE: {
3458 if (CmpInGPR == ICGPR_NonExtIn)
3459 return SDValue();
3460 // The upper 32-bits of the register can't be undefined for this sequence.
3461 LHS = zeroExtendInputIfNeeded(Input: LHS);
3462 RHS = zeroExtendInputIfNeeded(Input: RHS);
3463 SDValue Subtract =
3464 SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBF8, dl, VT: MVT::i64, Op1: LHS, Op2: RHS), 0);
3465 SDValue SrdiNode =
3466 SDValue(CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64,
3467 Op1: Subtract, Op2: S->getI64Imm(Imm: 1, dl),
3468 Op3: S->getI64Imm(Imm: 63, dl)), 0);
3469 return SDValue(CurDAG->getMachineNode(Opcode: PPC::XORI8, dl, VT: MVT::i64, Op1: SrdiNode,
3470 Op2: S->getI32Imm(Imm: 1, dl)), 0);
3471 }
3472 case ISD::SETUGT:
3473 // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
3474 // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
3475 std::swap(a&: LHS, b&: RHS);
3476 [[fallthrough]];
3477 case ISD::SETULT: {
3478 if (CmpInGPR == ICGPR_NonExtIn)
3479 return SDValue();
3480 // The upper 32-bits of the register can't be undefined for this sequence.
3481 LHS = zeroExtendInputIfNeeded(Input: LHS);
3482 RHS = zeroExtendInputIfNeeded(Input: RHS);
3483 SDValue Subtract =
3484 SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBF8, dl, VT: MVT::i64, Op1: RHS, Op2: LHS), 0);
3485 return SDValue(CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64,
3486 Op1: Subtract, Op2: S->getI64Imm(Imm: 1, dl),
3487 Op3: S->getI64Imm(Imm: 63, dl)), 0);
3488 }
3489 }
3490}
3491
3492/// Produces a sign-extended result of comparing two 32-bit values according to
3493/// the passed condition code.
3494SDValue
3495IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
3496 ISD::CondCode CC,
3497 int64_t RHSValue, SDLoc dl) {
3498 if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
3499 CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext)
3500 return SDValue();
3501 bool IsRHSZero = RHSValue == 0;
3502 bool IsRHSOne = RHSValue == 1;
3503 bool IsRHSNegOne = RHSValue == -1LL;
3504
3505 switch (CC) {
3506 default: return SDValue();
3507 case ISD::SETEQ: {
3508 // (sext (setcc %a, %b, seteq)) ->
3509 // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
3510 // (sext (setcc %a, 0, seteq)) ->
3511 // (ashr (shl (ctlz %a), 58), 63)
3512 SDValue CountInput = IsRHSZero ? LHS :
3513 SDValue(CurDAG->getMachineNode(Opcode: PPC::XOR, dl, VT: MVT::i32, Op1: LHS, Op2: RHS), 0);
3514 SDValue Cntlzw =
3515 SDValue(CurDAG->getMachineNode(Opcode: PPC::CNTLZW, dl, VT: MVT::i32, Op1: CountInput), 0);
3516 SDValue SHLOps[] = { Cntlzw, S->getI32Imm(Imm: 27, dl),
3517 S->getI32Imm(Imm: 5, dl), S->getI32Imm(Imm: 31, dl) };
3518 SDValue Slwi =
3519 SDValue(CurDAG->getMachineNode(Opcode: PPC::RLWINM, dl, VT: MVT::i32, Ops: SHLOps), 0);
3520 return SDValue(CurDAG->getMachineNode(Opcode: PPC::NEG, dl, VT: MVT::i32, Op1: Slwi), 0);
3521 }
3522 case ISD::SETNE: {
3523 // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
3524 // flip the bit, finally take 2's complement.
3525 // (sext (setcc %a, %b, setne)) ->
3526 // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
3527 // Same as above, but the first xor is not needed.
3528 // (sext (setcc %a, 0, setne)) ->
3529 // (neg (xor (lshr (ctlz %a), 5), 1))
3530 SDValue Xor = IsRHSZero ? LHS :
3531 SDValue(CurDAG->getMachineNode(Opcode: PPC::XOR, dl, VT: MVT::i32, Op1: LHS, Op2: RHS), 0);
3532 SDValue Clz =
3533 SDValue(CurDAG->getMachineNode(Opcode: PPC::CNTLZW, dl, VT: MVT::i32, Op1: Xor), 0);
3534 SDValue ShiftOps[] =
3535 { Clz, S->getI32Imm(Imm: 27, dl), S->getI32Imm(Imm: 5, dl), S->getI32Imm(Imm: 31, dl) };
3536 SDValue Shift =
3537 SDValue(CurDAG->getMachineNode(Opcode: PPC::RLWINM, dl, VT: MVT::i32, Ops: ShiftOps), 0);
3538 SDValue Xori =
3539 SDValue(CurDAG->getMachineNode(Opcode: PPC::XORI, dl, VT: MVT::i32, Op1: Shift,
3540 Op2: S->getI32Imm(Imm: 1, dl)), 0);
3541 return SDValue(CurDAG->getMachineNode(Opcode: PPC::NEG, dl, VT: MVT::i32, Op1: Xori), 0);
3542 }
3543 case ISD::SETGE: {
3544 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
3545 // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
3546 if (IsRHSZero)
3547 return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::GESExt);
3548
3549 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3550 // by swapping inputs and falling through.
3551 std::swap(a&: LHS, b&: RHS);
3552 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(Val&: RHS);
3553 IsRHSZero = RHSConst && RHSConst->isZero();
3554 [[fallthrough]];
3555 }
3556 case ISD::SETLE: {
3557 if (CmpInGPR == ICGPR_NonExtIn)
3558 return SDValue();
3559 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
3560 // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
3561 if (IsRHSZero)
3562 return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::LESExt);
3563
3564 // The upper 32-bits of the register can't be undefined for this sequence.
3565 LHS = signExtendInputIfNeeded(Input: LHS);
3566 RHS = signExtendInputIfNeeded(Input: RHS);
3567 SDValue SUBFNode =
3568 SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBF8, dl, VT1: MVT::i64, VT2: MVT::Glue,
3569 Op1: LHS, Op2: RHS), 0);
3570 SDValue Srdi =
3571 SDValue(CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64,
3572 Op1: SUBFNode, Op2: S->getI64Imm(Imm: 1, dl),
3573 Op3: S->getI64Imm(Imm: 63, dl)), 0);
3574 return SDValue(CurDAG->getMachineNode(Opcode: PPC::ADDI8, dl, VT: MVT::i64, Op1: Srdi,
3575 Op2: S->getI32Imm(Imm: -1, dl)), 0);
3576 }
3577 case ISD::SETGT: {
3578 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
3579 // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
3580 // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
3581 if (IsRHSNegOne)
3582 return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::GESExt);
3583 if (IsRHSZero) {
3584 if (CmpInGPR == ICGPR_NonExtIn)
3585 return SDValue();
3586 // The upper 32-bits of the register can't be undefined for this sequence.
3587 LHS = signExtendInputIfNeeded(Input: LHS);
3588 RHS = signExtendInputIfNeeded(Input: RHS);
3589 SDValue Neg =
3590 SDValue(CurDAG->getMachineNode(Opcode: PPC::NEG8, dl, VT: MVT::i64, Op1: LHS), 0);
3591 return SDValue(CurDAG->getMachineNode(Opcode: PPC::SRADI, dl, VT: MVT::i64, Op1: Neg,
3592 Op2: S->getI64Imm(Imm: 63, dl)), 0);
3593 }
3594 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3595 // (%b < %a) by swapping inputs and falling through.
3596 std::swap(a&: LHS, b&: RHS);
3597 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(Val&: RHS);
3598 IsRHSZero = RHSConst && RHSConst->isZero();
3599 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3600 [[fallthrough]];
3601 }
3602 case ISD::SETLT: {
3603 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3604 // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
3605 // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
3606 if (IsRHSOne) {
3607 if (CmpInGPR == ICGPR_NonExtIn)
3608 return SDValue();
3609 return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::LESExt);
3610 }
3611 if (IsRHSZero)
3612 return SDValue(CurDAG->getMachineNode(Opcode: PPC::SRAWI, dl, VT: MVT::i32, Op1: LHS,
3613 Op2: S->getI32Imm(Imm: 31, dl)), 0);
3614
3615 if (CmpInGPR == ICGPR_NonExtIn)
3616 return SDValue();
3617 // The upper 32-bits of the register can't be undefined for this sequence.
3618 LHS = signExtendInputIfNeeded(Input: LHS);
3619 RHS = signExtendInputIfNeeded(Input: RHS);
3620 SDValue SUBFNode =
3621 SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBF8, dl, VT: MVT::i64, Op1: RHS, Op2: LHS), 0);
3622 return SDValue(CurDAG->getMachineNode(Opcode: PPC::SRADI, dl, VT: MVT::i64,
3623 Op1: SUBFNode, Op2: S->getI64Imm(Imm: 63, dl)), 0);
3624 }
3625 case ISD::SETUGE:
3626 // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3627 // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3628 std::swap(a&: LHS, b&: RHS);
3629 [[fallthrough]];
3630 case ISD::SETULE: {
3631 if (CmpInGPR == ICGPR_NonExtIn)
3632 return SDValue();
3633 // The upper 32-bits of the register can't be undefined for this sequence.
3634 LHS = zeroExtendInputIfNeeded(Input: LHS);
3635 RHS = zeroExtendInputIfNeeded(Input: RHS);
3636 SDValue Subtract =
3637 SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBF8, dl, VT: MVT::i64, Op1: LHS, Op2: RHS), 0);
3638 SDValue Shift =
3639 SDValue(CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64, Op1: Subtract,
3640 Op2: S->getI32Imm(Imm: 1, dl), Op3: S->getI32Imm(Imm: 63,dl)),
3641 0);
3642 return SDValue(CurDAG->getMachineNode(Opcode: PPC::ADDI8, dl, VT: MVT::i64, Op1: Shift,
3643 Op2: S->getI32Imm(Imm: -1, dl)), 0);
3644 }
3645 case ISD::SETUGT:
3646 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3647 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3648 std::swap(a&: LHS, b&: RHS);
3649 [[fallthrough]];
3650 case ISD::SETULT: {
3651 if (CmpInGPR == ICGPR_NonExtIn)
3652 return SDValue();
3653 // The upper 32-bits of the register can't be undefined for this sequence.
3654 LHS = zeroExtendInputIfNeeded(Input: LHS);
3655 RHS = zeroExtendInputIfNeeded(Input: RHS);
3656 SDValue Subtract =
3657 SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBF8, dl, VT: MVT::i64, Op1: RHS, Op2: LHS), 0);
3658 return SDValue(CurDAG->getMachineNode(Opcode: PPC::SRADI, dl, VT: MVT::i64,
3659 Op1: Subtract, Op2: S->getI64Imm(Imm: 63, dl)), 0);
3660 }
3661 }
3662}
3663
3664/// Produces a zero-extended result of comparing two 64-bit values according to
3665/// the passed condition code.
3666SDValue
3667IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
3668 ISD::CondCode CC,
3669 int64_t RHSValue, SDLoc dl) {
3670 if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3671 CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext)
3672 return SDValue();
3673 bool IsRHSZero = RHSValue == 0;
3674 bool IsRHSOne = RHSValue == 1;
3675 bool IsRHSNegOne = RHSValue == -1LL;
3676 switch (CC) {
3677 default: return SDValue();
3678 case ISD::SETEQ: {
3679 // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3680 // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
3681 SDValue Xor = IsRHSZero ? LHS :
3682 SDValue(CurDAG->getMachineNode(Opcode: PPC::XOR8, dl, VT: MVT::i64, Op1: LHS, Op2: RHS), 0);
3683 SDValue Clz =
3684 SDValue(CurDAG->getMachineNode(Opcode: PPC::CNTLZD, dl, VT: MVT::i64, Op1: Xor), 0);
3685 return SDValue(CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64, Op1: Clz,
3686 Op2: S->getI64Imm(Imm: 58, dl),
3687 Op3: S->getI64Imm(Imm: 63, dl)), 0);
3688 }
3689 case ISD::SETNE: {
3690 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3691 // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3692 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3693 // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3694 SDValue Xor = IsRHSZero ? LHS :
3695 SDValue(CurDAG->getMachineNode(Opcode: PPC::XOR8, dl, VT: MVT::i64, Op1: LHS, Op2: RHS), 0);
3696 SDValue AC =
3697 SDValue(CurDAG->getMachineNode(Opcode: PPC::ADDIC8, dl, VT1: MVT::i64, VT2: MVT::Glue,
3698 Op1: Xor, Op2: S->getI32Imm(Imm: ~0U, dl)), 0);
3699 return SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBFE8, dl, VT: MVT::i64, Op1: AC,
3700 Op2: Xor, Op3: AC.getValue(R: 1)), 0);
3701 }
3702 case ISD::SETGE: {
3703 // {subc.reg, subc.CA} = (subcarry %a, %b)
3704 // (zext (setcc %a, %b, setge)) ->
3705 // (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3706 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3707 if (IsRHSZero)
3708 return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::GEZExt);
3709 std::swap(a&: LHS, b&: RHS);
3710 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(Val&: RHS);
3711 IsRHSZero = RHSConst && RHSConst->isZero();
3712 [[fallthrough]];
3713 }
3714 case ISD::SETLE: {
3715 // {subc.reg, subc.CA} = (subcarry %b, %a)
3716 // (zext (setcc %a, %b, setge)) ->
3717 // (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3718 // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3719 if (IsRHSZero)
3720 return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::LEZExt);
3721 SDValue ShiftL =
3722 SDValue(CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64, Op1: LHS,
3723 Op2: S->getI64Imm(Imm: 1, dl),
3724 Op3: S->getI64Imm(Imm: 63, dl)), 0);
3725 SDValue ShiftR =
3726 SDValue(CurDAG->getMachineNode(Opcode: PPC::SRADI, dl, VT: MVT::i64, Op1: RHS,
3727 Op2: S->getI64Imm(Imm: 63, dl)), 0);
3728 SDValue SubtractCarry =
3729 SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBFC8, dl, VT1: MVT::i64, VT2: MVT::Glue,
3730 Op1: LHS, Op2: RHS), 1);
3731 return SDValue(CurDAG->getMachineNode(Opcode: PPC::ADDE8, dl, VT1: MVT::i64, VT2: MVT::Glue,
3732 Op1: ShiftR, Op2: ShiftL, Op3: SubtractCarry), 0);
3733 }
3734 case ISD::SETGT: {
3735 // {subc.reg, subc.CA} = (subcarry %b, %a)
3736 // (zext (setcc %a, %b, setgt)) ->
3737 // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3738 // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3739 if (IsRHSNegOne)
3740 return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::GEZExt);
3741 if (IsRHSZero) {
3742 SDValue Addi =
3743 SDValue(CurDAG->getMachineNode(Opcode: PPC::ADDI8, dl, VT: MVT::i64, Op1: LHS,
3744 Op2: S->getI64Imm(Imm: ~0ULL, dl)), 0);
3745 SDValue Nor =
3746 SDValue(CurDAG->getMachineNode(Opcode: PPC::NOR8, dl, VT: MVT::i64, Op1: Addi, Op2: LHS), 0);
3747 return SDValue(CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64, Op1: Nor,
3748 Op2: S->getI64Imm(Imm: 1, dl),
3749 Op3: S->getI64Imm(Imm: 63, dl)), 0);
3750 }
3751 std::swap(a&: LHS, b&: RHS);
3752 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(Val&: RHS);
3753 IsRHSZero = RHSConst && RHSConst->isZero();
3754 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3755 [[fallthrough]];
3756 }
3757 case ISD::SETLT: {
3758 // {subc.reg, subc.CA} = (subcarry %a, %b)
3759 // (zext (setcc %a, %b, setlt)) ->
3760 // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3761 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3762 if (IsRHSOne)
3763 return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::LEZExt);
3764 if (IsRHSZero)
3765 return SDValue(CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64, Op1: LHS,
3766 Op2: S->getI64Imm(Imm: 1, dl),
3767 Op3: S->getI64Imm(Imm: 63, dl)), 0);
3768 SDValue SRADINode =
3769 SDValue(CurDAG->getMachineNode(Opcode: PPC::SRADI, dl, VT: MVT::i64,
3770 Op1: LHS, Op2: S->getI64Imm(Imm: 63, dl)), 0);
3771 SDValue SRDINode =
3772 SDValue(CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64,
3773 Op1: RHS, Op2: S->getI64Imm(Imm: 1, dl),
3774 Op3: S->getI64Imm(Imm: 63, dl)), 0);
3775 SDValue SUBFC8Carry =
3776 SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBFC8, dl, VT1: MVT::i64, VT2: MVT::Glue,
3777 Op1: RHS, Op2: LHS), 1);
3778 SDValue ADDE8Node =
3779 SDValue(CurDAG->getMachineNode(Opcode: PPC::ADDE8, dl, VT1: MVT::i64, VT2: MVT::Glue,
3780 Op1: SRDINode, Op2: SRADINode, Op3: SUBFC8Carry), 0);
3781 return SDValue(CurDAG->getMachineNode(Opcode: PPC::XORI8, dl, VT: MVT::i64,
3782 Op1: ADDE8Node, Op2: S->getI64Imm(Imm: 1, dl)), 0);
3783 }
3784 case ISD::SETUGE:
3785 // {subc.reg, subc.CA} = (subcarry %a, %b)
3786 // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3787 std::swap(a&: LHS, b&: RHS);
3788 [[fallthrough]];
3789 case ISD::SETULE: {
3790 // {subc.reg, subc.CA} = (subcarry %b, %a)
3791 // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3792 SDValue SUBFC8Carry =
3793 SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBFC8, dl, VT1: MVT::i64, VT2: MVT::Glue,
3794 Op1: LHS, Op2: RHS), 1);
3795 SDValue SUBFE8Node =
3796 SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBFE8, dl, VT1: MVT::i64, VT2: MVT::Glue,
3797 Op1: LHS, Op2: LHS, Op3: SUBFC8Carry), 0);
3798 return SDValue(CurDAG->getMachineNode(Opcode: PPC::ADDI8, dl, VT: MVT::i64,
3799 Op1: SUBFE8Node, Op2: S->getI64Imm(Imm: 1, dl)), 0);
3800 }
3801 case ISD::SETUGT:
3802 // {subc.reg, subc.CA} = (subcarry %b, %a)
3803 // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3804 std::swap(a&: LHS, b&: RHS);
3805 [[fallthrough]];
3806 case ISD::SETULT: {
3807 // {subc.reg, subc.CA} = (subcarry %a, %b)
3808 // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3809 SDValue SubtractCarry =
3810 SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBFC8, dl, VT1: MVT::i64, VT2: MVT::Glue,
3811 Op1: RHS, Op2: LHS), 1);
3812 SDValue ExtSub =
3813 SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBFE8, dl, VT: MVT::i64,
3814 Op1: LHS, Op2: LHS, Op3: SubtractCarry), 0);
3815 return SDValue(CurDAG->getMachineNode(Opcode: PPC::NEG8, dl, VT: MVT::i64,
3816 Op1: ExtSub), 0);
3817 }
3818 }
3819}
3820
3821/// Produces a sign-extended result of comparing two 64-bit values according to
3822/// the passed condition code.
3823SDValue
3824IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
3825 ISD::CondCode CC,
3826 int64_t RHSValue, SDLoc dl) {
3827 if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3828 CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext)
3829 return SDValue();
3830 bool IsRHSZero = RHSValue == 0;
3831 bool IsRHSOne = RHSValue == 1;
3832 bool IsRHSNegOne = RHSValue == -1LL;
3833 switch (CC) {
3834 default: return SDValue();
3835 case ISD::SETEQ: {
3836 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3837 // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3838 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3839 // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3840 SDValue AddInput = IsRHSZero ? LHS :
3841 SDValue(CurDAG->getMachineNode(Opcode: PPC::XOR8, dl, VT: MVT::i64, Op1: LHS, Op2: RHS), 0);
3842 SDValue Addic =
3843 SDValue(CurDAG->getMachineNode(Opcode: PPC::ADDIC8, dl, VT1: MVT::i64, VT2: MVT::Glue,
3844 Op1: AddInput, Op2: S->getI32Imm(Imm: ~0U, dl)), 0);
3845 return SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBFE8, dl, VT: MVT::i64, Op1: Addic,
3846 Op2: Addic, Op3: Addic.getValue(R: 1)), 0);
3847 }
3848 case ISD::SETNE: {
3849 // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3850 // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3851 // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3852 // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3853 SDValue Xor = IsRHSZero ? LHS :
3854 SDValue(CurDAG->getMachineNode(Opcode: PPC::XOR8, dl, VT: MVT::i64, Op1: LHS, Op2: RHS), 0);
3855 SDValue SC =
3856 SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBFIC8, dl, VT1: MVT::i64, VT2: MVT::Glue,
3857 Op1: Xor, Op2: S->getI32Imm(Imm: 0, dl)), 0);
3858 return SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBFE8, dl, VT: MVT::i64, Op1: SC,
3859 Op2: SC, Op3: SC.getValue(R: 1)), 0);
3860 }
3861 case ISD::SETGE: {
3862 // {subc.reg, subc.CA} = (subcarry %a, %b)
3863 // (zext (setcc %a, %b, setge)) ->
3864 // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3865 // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3866 if (IsRHSZero)
3867 return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::GESExt);
3868 std::swap(a&: LHS, b&: RHS);
3869 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(Val&: RHS);
3870 IsRHSZero = RHSConst && RHSConst->isZero();
3871 [[fallthrough]];
3872 }
3873 case ISD::SETLE: {
3874 // {subc.reg, subc.CA} = (subcarry %b, %a)
3875 // (zext (setcc %a, %b, setge)) ->
3876 // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3877 // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3878 if (IsRHSZero)
3879 return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::LESExt);
3880 SDValue ShiftR =
3881 SDValue(CurDAG->getMachineNode(Opcode: PPC::SRADI, dl, VT: MVT::i64, Op1: RHS,
3882 Op2: S->getI64Imm(Imm: 63, dl)), 0);
3883 SDValue ShiftL =
3884 SDValue(CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64, Op1: LHS,
3885 Op2: S->getI64Imm(Imm: 1, dl),
3886 Op3: S->getI64Imm(Imm: 63, dl)), 0);
3887 SDValue SubtractCarry =
3888 SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBFC8, dl, VT1: MVT::i64, VT2: MVT::Glue,
3889 Op1: LHS, Op2: RHS), 1);
3890 SDValue Adde =
3891 SDValue(CurDAG->getMachineNode(Opcode: PPC::ADDE8, dl, VT1: MVT::i64, VT2: MVT::Glue,
3892 Op1: ShiftR, Op2: ShiftL, Op3: SubtractCarry), 0);
3893 return SDValue(CurDAG->getMachineNode(Opcode: PPC::NEG8, dl, VT: MVT::i64, Op1: Adde), 0);
3894 }
3895 case ISD::SETGT: {
3896 // {subc.reg, subc.CA} = (subcarry %b, %a)
3897 // (zext (setcc %a, %b, setgt)) ->
3898 // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3899 // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3900 if (IsRHSNegOne)
3901 return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::GESExt);
3902 if (IsRHSZero) {
3903 SDValue Add =
3904 SDValue(CurDAG->getMachineNode(Opcode: PPC::ADDI8, dl, VT: MVT::i64, Op1: LHS,
3905 Op2: S->getI64Imm(Imm: -1, dl)), 0);
3906 SDValue Nor =
3907 SDValue(CurDAG->getMachineNode(Opcode: PPC::NOR8, dl, VT: MVT::i64, Op1: Add, Op2: LHS), 0);
3908 return SDValue(CurDAG->getMachineNode(Opcode: PPC::SRADI, dl, VT: MVT::i64, Op1: Nor,
3909 Op2: S->getI64Imm(Imm: 63, dl)), 0);
3910 }
3911 std::swap(a&: LHS, b&: RHS);
3912 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(Val&: RHS);
3913 IsRHSZero = RHSConst && RHSConst->isZero();
3914 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3915 [[fallthrough]];
3916 }
3917 case ISD::SETLT: {
3918 // {subc.reg, subc.CA} = (subcarry %a, %b)
3919 // (zext (setcc %a, %b, setlt)) ->
3920 // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3921 // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3922 if (IsRHSOne)
3923 return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::LESExt);
3924 if (IsRHSZero) {
3925 return SDValue(CurDAG->getMachineNode(Opcode: PPC::SRADI, dl, VT: MVT::i64, Op1: LHS,
3926 Op2: S->getI64Imm(Imm: 63, dl)), 0);
3927 }
3928 SDValue SRADINode =
3929 SDValue(CurDAG->getMachineNode(Opcode: PPC::SRADI, dl, VT: MVT::i64,
3930 Op1: LHS, Op2: S->getI64Imm(Imm: 63, dl)), 0);
3931 SDValue SRDINode =
3932 SDValue(CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl, VT: MVT::i64,
3933 Op1: RHS, Op2: S->getI64Imm(Imm: 1, dl),
3934 Op3: S->getI64Imm(Imm: 63, dl)), 0);
3935 SDValue SUBFC8Carry =
3936 SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBFC8, dl, VT1: MVT::i64, VT2: MVT::Glue,
3937 Op1: RHS, Op2: LHS), 1);
3938 SDValue ADDE8Node =
3939 SDValue(CurDAG->getMachineNode(Opcode: PPC::ADDE8, dl, VT: MVT::i64,
3940 Op1: SRDINode, Op2: SRADINode, Op3: SUBFC8Carry), 0);
3941 SDValue XORI8Node =
3942 SDValue(CurDAG->getMachineNode(Opcode: PPC::XORI8, dl, VT: MVT::i64,
3943 Op1: ADDE8Node, Op2: S->getI64Imm(Imm: 1, dl)), 0);
3944 return SDValue(CurDAG->getMachineNode(Opcode: PPC::NEG8, dl, VT: MVT::i64,
3945 Op1: XORI8Node), 0);
3946 }
3947 case ISD::SETUGE:
3948 // {subc.reg, subc.CA} = (subcarry %a, %b)
3949 // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3950 std::swap(a&: LHS, b&: RHS);
3951 [[fallthrough]];
3952 case ISD::SETULE: {
3953 // {subc.reg, subc.CA} = (subcarry %b, %a)
3954 // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3955 SDValue SubtractCarry =
3956 SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBFC8, dl, VT1: MVT::i64, VT2: MVT::Glue,
3957 Op1: LHS, Op2: RHS), 1);
3958 SDValue ExtSub =
3959 SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBFE8, dl, VT1: MVT::i64, VT2: MVT::Glue, Op1: LHS,
3960 Op2: LHS, Op3: SubtractCarry), 0);
3961 return SDValue(CurDAG->getMachineNode(Opcode: PPC::NOR8, dl, VT: MVT::i64,
3962 Op1: ExtSub, Op2: ExtSub), 0);
3963 }
3964 case ISD::SETUGT:
3965 // {subc.reg, subc.CA} = (subcarry %b, %a)
3966 // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3967 std::swap(a&: LHS, b&: RHS);
3968 [[fallthrough]];
3969 case ISD::SETULT: {
3970 // {subc.reg, subc.CA} = (subcarry %a, %b)
3971 // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3972 SDValue SubCarry =
3973 SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBFC8, dl, VT1: MVT::i64, VT2: MVT::Glue,
3974 Op1: RHS, Op2: LHS), 1);
3975 return SDValue(CurDAG->getMachineNode(Opcode: PPC::SUBFE8, dl, VT: MVT::i64,
3976 Op1: LHS, Op2: LHS, Op3: SubCarry), 0);
3977 }
3978 }
3979}
3980
3981/// Do all uses of this SDValue need the result in a GPR?
3982/// This is meant to be used on values that have type i1 since
3983/// it is somewhat meaningless to ask if values of other types
3984/// should be kept in GPR's.
3985static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
3986 assert(Compare.getOpcode() == ISD::SETCC &&
3987 "An ISD::SETCC node required here.");
3988
3989 // For values that have a single use, the caller should obviously already have
3990 // checked if that use is an extending use. We check the other uses here.
3991 if (Compare.hasOneUse())
3992 return true;
3993 // We want the value in a GPR if it is being extended, used for a select, or
3994 // used in logical operations.
3995 for (auto *CompareUse : Compare.getNode()->users())
3996 if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
3997 CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
3998 CompareUse->getOpcode() != ISD::SELECT &&
3999 !ISD::isBitwiseLogicOp(Opcode: CompareUse->getOpcode())) {
4000 OmittedForNonExtendUses++;
4001 return false;
4002 }
4003 return true;
4004}
4005
4006/// Returns an equivalent of a SETCC node but with the result the same width as
4007/// the inputs. This can also be used for SELECT_CC if either the true or false
4008/// values is a power of two while the other is zero.
4009SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
4010 SetccInGPROpts ConvOpts) {
4011 assert((Compare.getOpcode() == ISD::SETCC ||
4012 Compare.getOpcode() == ISD::SELECT_CC) &&
4013 "An ISD::SETCC node required here.");
4014
4015 // Don't convert this comparison to a GPR sequence because there are uses
4016 // of the i1 result (i.e. uses that require the result in the CR).
4017 if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
4018 return SDValue();
4019
4020 SDValue LHS = Compare.getOperand(i: 0);
4021 SDValue RHS = Compare.getOperand(i: 1);
4022
4023 // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
4024 int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
4025 ISD::CondCode CC =
4026 cast<CondCodeSDNode>(Val: Compare.getOperand(i: CCOpNum))->get();
4027 EVT InputVT = LHS.getValueType();
4028 if (InputVT != MVT::i32 && InputVT != MVT::i64)
4029 return SDValue();
4030
4031 if (ConvOpts == SetccInGPROpts::ZExtInvert ||
4032 ConvOpts == SetccInGPROpts::SExtInvert)
4033 CC = ISD::getSetCCInverse(Operation: CC, Type: InputVT);
4034
4035 bool Inputs32Bit = InputVT == MVT::i32;
4036
4037 SDLoc dl(Compare);
4038 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(Val&: RHS);
4039 int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
4040 bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
4041 ConvOpts == SetccInGPROpts::SExtInvert;
4042
4043 if (IsSext && Inputs32Bit)
4044 return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
4045 else if (Inputs32Bit)
4046 return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
4047 else if (IsSext)
4048 return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
4049 return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
4050}
4051
4052} // end anonymous namespace
4053
4054bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
4055 if (N->getValueType(ResNo: 0) != MVT::i32 &&
4056 N->getValueType(ResNo: 0) != MVT::i64)
4057 return false;
4058
4059 // This optimization will emit code that assumes 64-bit registers
4060 // so we don't want to run it in 32-bit mode. Also don't run it
4061 // on functions that are not to be optimized.
4062 if (TM.getOptLevel() == CodeGenOptLevel::None || !TM.isPPC64())
4063 return false;
4064
4065 // For POWER10, it is more profitable to use the set boolean extension
4066 // instructions rather than the integer compare elimination codegen.
4067 // Users can override this via the command line option, `--ppc-gpr-icmps`.
4068 if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1())
4069 return false;
4070
4071 switch (N->getOpcode()) {
4072 default: break;
4073 case ISD::ZERO_EXTEND:
4074 case ISD::SIGN_EXTEND:
4075 case ISD::AND:
4076 case ISD::OR:
4077 case ISD::XOR: {
4078 IntegerCompareEliminator ICmpElim(CurDAG, this);
4079 if (SDNode *New = ICmpElim.Select(N)) {
4080 ReplaceNode(F: N, T: New);
4081 return true;
4082 }
4083 }
4084 }
4085 return false;
4086}
4087
4088bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
4089 if (N->getValueType(ResNo: 0) != MVT::i32 &&
4090 N->getValueType(ResNo: 0) != MVT::i64)
4091 return false;
4092
4093 if (!UseBitPermRewriter)
4094 return false;
4095
4096 switch (N->getOpcode()) {
4097 default: break;
4098 case ISD::SRL:
4099 // If we are on P10, we have a pattern for 32-bit (srl (bswap r), 16) that
4100 // uses the BRH instruction.
4101 if (Subtarget->isISA3_1() && N->getValueType(ResNo: 0) == MVT::i32 &&
4102 N->getOperand(Num: 0).getOpcode() == ISD::BSWAP) {
4103 auto &OpRight = N->getOperand(Num: 1);
4104 ConstantSDNode *SRLConst = dyn_cast<ConstantSDNode>(Val: OpRight);
4105 if (SRLConst && SRLConst->getSExtValue() == 16)
4106 return false;
4107 }
4108 [[fallthrough]];
4109 case ISD::ROTL:
4110 case ISD::SHL:
4111 case ISD::AND:
4112 case ISD::OR: {
4113 BitPermutationSelector BPS(CurDAG);
4114 if (SDNode *New = BPS.Select(N)) {
4115 ReplaceNode(F: N, T: New);
4116 return true;
4117 }
4118 return false;
4119 }
4120 }
4121
4122 return false;
4123}
4124
4125/// SelectCC - Select a comparison of the specified values with the specified
4126/// condition code, returning the CR# of the expression.
4127SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4128 const SDLoc &dl, SDValue Chain) {
4129 // Always select the LHS.
4130 unsigned Opc;
4131
4132 if (LHS.getValueType() == MVT::i32) {
4133 unsigned Imm;
4134 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
4135 if (isInt32Immediate(N: RHS, Imm)) {
4136 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
4137 if (isUInt<16>(x: Imm))
4138 return SDValue(CurDAG->getMachineNode(Opcode: PPC::CMPLWI, dl, VT: MVT::i32, Op1: LHS,
4139 Op2: getI32Imm(Imm: Imm & 0xFFFF, dl)),
4140 0);
4141 // If this is a 16-bit signed immediate, fold it.
4142 if (isInt<16>(x: (int)Imm))
4143 return SDValue(CurDAG->getMachineNode(Opcode: PPC::CMPWI, dl, VT: MVT::i32, Op1: LHS,
4144 Op2: getI32Imm(Imm: Imm & 0xFFFF, dl)),
4145 0);
4146
4147 // For non-equality comparisons, the default code would materialize the
4148 // constant, then compare against it, like this:
4149 // lis r2, 4660
4150 // ori r2, r2, 22136
4151 // cmpw cr0, r3, r2
4152 // Since we are just comparing for equality, we can emit this instead:
4153 // xoris r0,r3,0x1234
4154 // cmplwi cr0,r0,0x5678
4155 // beq cr0,L6
4156 SDValue Xor(CurDAG->getMachineNode(Opcode: PPC::XORIS, dl, VT: MVT::i32, Op1: LHS,
4157 Op2: getI32Imm(Imm: Imm >> 16, dl)), 0);
4158 return SDValue(CurDAG->getMachineNode(Opcode: PPC::CMPLWI, dl, VT: MVT::i32, Op1: Xor,
4159 Op2: getI32Imm(Imm: Imm & 0xFFFF, dl)), 0);
4160 }
4161 Opc = PPC::CMPLW;
4162 } else if (ISD::isUnsignedIntSetCC(Code: CC)) {
4163 if (isInt32Immediate(N: RHS, Imm) && isUInt<16>(x: Imm))
4164 return SDValue(CurDAG->getMachineNode(Opcode: PPC::CMPLWI, dl, VT: MVT::i32, Op1: LHS,
4165 Op2: getI32Imm(Imm: Imm & 0xFFFF, dl)), 0);
4166 Opc = PPC::CMPLW;
4167 } else {
4168 int16_t SImm;
4169 if (isIntS16Immediate(Op: RHS, Imm&: SImm))
4170 return SDValue(CurDAG->getMachineNode(Opcode: PPC::CMPWI, dl, VT: MVT::i32, Op1: LHS,
4171 Op2: getI32Imm(Imm: (int)SImm & 0xFFFF,
4172 dl)),
4173 0);
4174 Opc = PPC::CMPW;
4175 }
4176 } else if (LHS.getValueType() == MVT::i64) {
4177 uint64_t Imm;
4178 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
4179 if (isInt64Immediate(N: RHS.getNode(), Imm)) {
4180 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
4181 if (isUInt<16>(x: Imm))
4182 return SDValue(CurDAG->getMachineNode(Opcode: PPC::CMPLDI, dl, VT: MVT::i64, Op1: LHS,
4183 Op2: getI32Imm(Imm: Imm & 0xFFFF, dl)),
4184 0);
4185 // If this is a 16-bit signed immediate, fold it.
4186 if (isInt<16>(x: Imm))
4187 return SDValue(CurDAG->getMachineNode(Opcode: PPC::CMPDI, dl, VT: MVT::i64, Op1: LHS,
4188 Op2: getI32Imm(Imm: Imm & 0xFFFF, dl)),
4189 0);
4190
4191 // For non-equality comparisons, the default code would materialize the
4192 // constant, then compare against it, like this:
4193 // lis r2, 4660
4194 // ori r2, r2, 22136
4195 // cmpd cr0, r3, r2
4196 // Since we are just comparing for equality, we can emit this instead:
4197 // xoris r0,r3,0x1234
4198 // cmpldi cr0,r0,0x5678
4199 // beq cr0,L6
4200 if (isUInt<32>(x: Imm)) {
4201 SDValue Xor(CurDAG->getMachineNode(Opcode: PPC::XORIS8, dl, VT: MVT::i64, Op1: LHS,
4202 Op2: getI64Imm(Imm: Imm >> 16, dl)), 0);
4203 return SDValue(CurDAG->getMachineNode(Opcode: PPC::CMPLDI, dl, VT: MVT::i64, Op1: Xor,
4204 Op2: getI64Imm(Imm: Imm & 0xFFFF, dl)),
4205 0);
4206 }
4207 }
4208 Opc = PPC::CMPLD;
4209 } else if (ISD::isUnsignedIntSetCC(Code: CC)) {
4210 if (isInt64Immediate(N: RHS.getNode(), Imm) && isUInt<16>(x: Imm))
4211 return SDValue(CurDAG->getMachineNode(Opcode: PPC::CMPLDI, dl, VT: MVT::i64, Op1: LHS,
4212 Op2: getI64Imm(Imm: Imm & 0xFFFF, dl)), 0);
4213 Opc = PPC::CMPLD;
4214 } else {
4215 int16_t SImm;
4216 if (isIntS16Immediate(Op: RHS, Imm&: SImm))
4217 return SDValue(CurDAG->getMachineNode(Opcode: PPC::CMPDI, dl, VT: MVT::i64, Op1: LHS,
4218 Op2: getI64Imm(Imm: SImm & 0xFFFF, dl)),
4219 0);
4220 Opc = PPC::CMPD;
4221 }
4222 } else if (LHS.getValueType() == MVT::f32) {
4223 if (Subtarget->hasSPE()) {
4224 switch (CC) {
4225 default:
4226 case ISD::SETEQ:
4227 case ISD::SETNE:
4228 Opc = PPC::EFSCMPEQ;
4229 break;
4230 case ISD::SETLT:
4231 case ISD::SETGE:
4232 case ISD::SETOLT:
4233 case ISD::SETOGE:
4234 case ISD::SETULT:
4235 case ISD::SETUGE:
4236 Opc = PPC::EFSCMPLT;
4237 break;
4238 case ISD::SETGT:
4239 case ISD::SETLE:
4240 case ISD::SETOGT:
4241 case ISD::SETOLE:
4242 case ISD::SETUGT:
4243 case ISD::SETULE:
4244 Opc = PPC::EFSCMPGT;
4245 break;
4246 }
4247 } else
4248 Opc = PPC::FCMPUS;
4249 } else if (LHS.getValueType() == MVT::f64) {
4250 if (Subtarget->hasSPE()) {
4251 switch (CC) {
4252 default:
4253 case ISD::SETEQ:
4254 case ISD::SETNE:
4255 Opc = PPC::EFDCMPEQ;
4256 break;
4257 case ISD::SETLT:
4258 case ISD::SETGE:
4259 case ISD::SETOLT:
4260 case ISD::SETOGE:
4261 case ISD::SETULT:
4262 case ISD::SETUGE:
4263 Opc = PPC::EFDCMPLT;
4264 break;
4265 case ISD::SETGT:
4266 case ISD::SETLE:
4267 case ISD::SETOGT:
4268 case ISD::SETOLE:
4269 case ISD::SETUGT:
4270 case ISD::SETULE:
4271 Opc = PPC::EFDCMPGT;
4272 break;
4273 }
4274 } else
4275 Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
4276 } else {
4277 assert(LHS.getValueType() == MVT::f128 && "Unknown vt!");
4278 assert(Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector");
4279 Opc = PPC::XSCMPUQP;
4280 }
4281 if (Chain)
4282 return SDValue(
4283 CurDAG->getMachineNode(Opcode: Opc, dl, VT1: MVT::i32, VT2: MVT::Other, Op1: LHS, Op2: RHS, Op3: Chain),
4284 0);
4285 else
4286 return SDValue(CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i32, Op1: LHS, Op2: RHS), 0);
4287}
4288
4289static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT,
4290 const PPCSubtarget *Subtarget) {
4291 // For SPE instructions, the result is in GT bit of the CR
4292 bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();
4293
4294 switch (CC) {
4295 case ISD::SETUEQ:
4296 case ISD::SETONE:
4297 case ISD::SETOLE:
4298 case ISD::SETOGE:
4299 llvm_unreachable("Should be lowered by legalize!");
4300 default: llvm_unreachable("Unknown condition!");
4301 case ISD::SETOEQ:
4302 case ISD::SETEQ:
4303 return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;
4304 case ISD::SETUNE:
4305 case ISD::SETNE:
4306 return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;
4307 case ISD::SETOLT:
4308 case ISD::SETLT:
4309 return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
4310 case ISD::SETULE:
4311 case ISD::SETLE:
4312 return PPC::PRED_LE;
4313 case ISD::SETOGT:
4314 case ISD::SETGT:
4315 return PPC::PRED_GT;
4316 case ISD::SETUGE:
4317 case ISD::SETGE:
4318 return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
4319 case ISD::SETO: return PPC::PRED_NU;
4320 case ISD::SETUO: return PPC::PRED_UN;
4321 // These two are invalid for floating point. Assume we have int.
4322 case ISD::SETULT: return PPC::PRED_LT;
4323 case ISD::SETUGT: return PPC::PRED_GT;
4324 }
4325}
4326
4327/// getCRIdxForSetCC - Return the index of the condition register field
4328/// associated with the SetCC condition, and whether or not the field is
4329/// treated as inverted. That is, lt = 0; ge = 0 inverted.
4330static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
4331 Invert = false;
4332 switch (CC) {
4333 default: llvm_unreachable("Unknown condition!");
4334 case ISD::SETOLT:
4335 case ISD::SETLT: return 0; // Bit #0 = SETOLT
4336 case ISD::SETOGT:
4337 case ISD::SETGT: return 1; // Bit #1 = SETOGT
4338 case ISD::SETOEQ:
4339 case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
4340 case ISD::SETUO: return 3; // Bit #3 = SETUO
4341 case ISD::SETUGE:
4342 case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
4343 case ISD::SETULE:
4344 case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
4345 case ISD::SETUNE:
4346 case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
4347 case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
4348 case ISD::SETUEQ:
4349 case ISD::SETOGE:
4350 case ISD::SETOLE:
4351 case ISD::SETONE:
4352 llvm_unreachable("Invalid branch code: should be expanded by legalize");
4353 // These are invalid for floating point. Assume integer.
4354 case ISD::SETULT: return 0;
4355 case ISD::SETUGT: return 1;
4356 }
4357}
4358
4359// getVCmpInst: return the vector compare instruction for the specified
4360// vector type and condition code. Since this is for altivec specific code,
4361// only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,
4362// and v4f32).
4363static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
4364 bool HasVSX, bool &Swap, bool &Negate) {
4365 Swap = false;
4366 Negate = false;
4367
4368 if (VecVT.isFloatingPoint()) {
4369 /* Handle some cases by swapping input operands. */
4370 switch (CC) {
4371 case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
4372 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4373 case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
4374 case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
4375 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4376 case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
4377 default: break;
4378 }
4379 /* Handle some cases by negating the result. */
4380 switch (CC) {
4381 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4382 case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
4383 case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
4384 case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
4385 default: break;
4386 }
4387 /* We have instructions implementing the remaining cases. */
4388 switch (CC) {
4389 case ISD::SETEQ:
4390 case ISD::SETOEQ:
4391 if (VecVT == MVT::v4f32)
4392 return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
4393 else if (VecVT == MVT::v2f64)
4394 return PPC::XVCMPEQDP;
4395 break;
4396 case ISD::SETGT:
4397 case ISD::SETOGT:
4398 if (VecVT == MVT::v4f32)
4399 return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
4400 else if (VecVT == MVT::v2f64)
4401 return PPC::XVCMPGTDP;
4402 break;
4403 case ISD::SETGE:
4404 case ISD::SETOGE:
4405 if (VecVT == MVT::v4f32)
4406 return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
4407 else if (VecVT == MVT::v2f64)
4408 return PPC::XVCMPGEDP;
4409 break;
4410 default:
4411 break;
4412 }
4413 llvm_unreachable("Invalid floating-point vector compare condition");
4414 } else {
4415 /* Handle some cases by swapping input operands. */
4416 switch (CC) {
4417 case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
4418 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4419 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4420 case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
4421 default: break;
4422 }
4423 /* Handle some cases by negating the result. */
4424 switch (CC) {
4425 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4426 case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
4427 case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
4428 case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
4429 default: break;
4430 }
4431 /* We have instructions implementing the remaining cases. */
4432 switch (CC) {
4433 case ISD::SETEQ:
4434 case ISD::SETUEQ:
4435 if (VecVT == MVT::v16i8)
4436 return PPC::VCMPEQUB;
4437 else if (VecVT == MVT::v8i16)
4438 return PPC::VCMPEQUH;
4439 else if (VecVT == MVT::v4i32)
4440 return PPC::VCMPEQUW;
4441 else if (VecVT == MVT::v2i64)
4442 return PPC::VCMPEQUD;
4443 else if (VecVT == MVT::v1i128)
4444 return PPC::VCMPEQUQ;
4445 break;
4446 case ISD::SETGT:
4447 if (VecVT == MVT::v16i8)
4448 return PPC::VCMPGTSB;
4449 else if (VecVT == MVT::v8i16)
4450 return PPC::VCMPGTSH;
4451 else if (VecVT == MVT::v4i32)
4452 return PPC::VCMPGTSW;
4453 else if (VecVT == MVT::v2i64)
4454 return PPC::VCMPGTSD;
4455 else if (VecVT == MVT::v1i128)
4456 return PPC::VCMPGTSQ;
4457 break;
4458 case ISD::SETUGT:
4459 if (VecVT == MVT::v16i8)
4460 return PPC::VCMPGTUB;
4461 else if (VecVT == MVT::v8i16)
4462 return PPC::VCMPGTUH;
4463 else if (VecVT == MVT::v4i32)
4464 return PPC::VCMPGTUW;
4465 else if (VecVT == MVT::v2i64)
4466 return PPC::VCMPGTUD;
4467 else if (VecVT == MVT::v1i128)
4468 return PPC::VCMPGTUQ;
4469 break;
4470 default:
4471 break;
4472 }
4473 llvm_unreachable("Invalid integer vector compare condition");
4474 }
4475}
4476
4477bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
4478 SDLoc dl(N);
4479 unsigned Imm;
4480 bool IsStrict = N->isStrictFPOpcode();
4481 ISD::CondCode CC =
4482 cast<CondCodeSDNode>(Val: N->getOperand(Num: IsStrict ? 3 : 2))->get();
4483 EVT PtrVT =
4484 CurDAG->getTargetLoweringInfo().getPointerTy(DL: CurDAG->getDataLayout());
4485 bool isPPC64 = (PtrVT == MVT::i64);
4486 SDValue Chain = IsStrict ? N->getOperand(Num: 0) : SDValue();
4487
4488 SDValue LHS = N->getOperand(Num: IsStrict ? 1 : 0);
4489 SDValue RHS = N->getOperand(Num: IsStrict ? 2 : 1);
4490
4491 if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(N: RHS, Imm)) {
4492 // We can codegen setcc op, imm very efficiently compared to a brcond.
4493 // Check for those cases here.
4494 // setcc op, 0
4495 if (Imm == 0) {
4496 SDValue Op = LHS;
4497 switch (CC) {
4498 default: break;
4499 case ISD::SETEQ: {
4500 Op = SDValue(CurDAG->getMachineNode(Opcode: PPC::CNTLZW, dl, VT: MVT::i32, Op1: Op), 0);
4501 SDValue Ops[] = { Op, getI32Imm(Imm: 27, dl), getI32Imm(Imm: 5, dl),
4502 getI32Imm(Imm: 31, dl) };
4503 CurDAG->SelectNodeTo(N, MachineOpc: PPC::RLWINM, VT: MVT::i32, Ops);
4504 return true;
4505 }
4506 case ISD::SETNE: {
4507 if (isPPC64) break;
4508 SDValue AD =
4509 SDValue(CurDAG->getMachineNode(Opcode: PPC::ADDIC, dl, VT1: MVT::i32, VT2: MVT::Glue,
4510 Op1: Op, Op2: getI32Imm(Imm: ~0U, dl)), 0);
4511 CurDAG->SelectNodeTo(N, MachineOpc: PPC::SUBFE, VT: MVT::i32, Op1: AD, Op2: Op, Op3: AD.getValue(R: 1));
4512 return true;
4513 }
4514 case ISD::SETLT: {
4515 SDValue Ops[] = { Op, getI32Imm(Imm: 1, dl), getI32Imm(Imm: 31, dl),
4516 getI32Imm(Imm: 31, dl) };
4517 CurDAG->SelectNodeTo(N, MachineOpc: PPC::RLWINM, VT: MVT::i32, Ops);
4518 return true;
4519 }
4520 case ISD::SETGT: {
4521 SDValue T =
4522 SDValue(CurDAG->getMachineNode(Opcode: PPC::NEG, dl, VT: MVT::i32, Op1: Op), 0);
4523 T = SDValue(CurDAG->getMachineNode(Opcode: PPC::ANDC, dl, VT: MVT::i32, Op1: T, Op2: Op), 0);
4524 SDValue Ops[] = { T, getI32Imm(Imm: 1, dl), getI32Imm(Imm: 31, dl),
4525 getI32Imm(Imm: 31, dl) };
4526 CurDAG->SelectNodeTo(N, MachineOpc: PPC::RLWINM, VT: MVT::i32, Ops);
4527 return true;
4528 }
4529 }
4530 } else if (Imm == ~0U) { // setcc op, -1
4531 SDValue Op = LHS;
4532 switch (CC) {
4533 default: break;
4534 case ISD::SETEQ:
4535 if (isPPC64) break;
4536 Op = SDValue(CurDAG->getMachineNode(Opcode: PPC::ADDIC, dl, VT1: MVT::i32, VT2: MVT::Glue,
4537 Op1: Op, Op2: getI32Imm(Imm: 1, dl)), 0);
4538 CurDAG->SelectNodeTo(N, MachineOpc: PPC::ADDZE, VT: MVT::i32,
4539 Op1: SDValue(CurDAG->getMachineNode(Opcode: PPC::LI, dl,
4540 VT: MVT::i32,
4541 Op1: getI32Imm(Imm: 0, dl)),
4542 0), Op2: Op.getValue(R: 1));
4543 return true;
4544 case ISD::SETNE: {
4545 if (isPPC64) break;
4546 Op = SDValue(CurDAG->getMachineNode(Opcode: PPC::NOR, dl, VT: MVT::i32, Op1: Op, Op2: Op), 0);
4547 SDNode *AD = CurDAG->getMachineNode(Opcode: PPC::ADDIC, dl, VT1: MVT::i32, VT2: MVT::Glue,
4548 Op1: Op, Op2: getI32Imm(Imm: ~0U, dl));
4549 CurDAG->SelectNodeTo(N, MachineOpc: PPC::SUBFE, VT: MVT::i32, Op1: SDValue(AD, 0), Op2: Op,
4550 Op3: SDValue(AD, 1));
4551 return true;
4552 }
4553 case ISD::SETLT: {
4554 SDValue AD = SDValue(CurDAG->getMachineNode(Opcode: PPC::ADDI, dl, VT: MVT::i32, Op1: Op,
4555 Op2: getI32Imm(Imm: 1, dl)), 0);
4556 SDValue AN = SDValue(CurDAG->getMachineNode(Opcode: PPC::AND, dl, VT: MVT::i32, Op1: AD,
4557 Op2: Op), 0);
4558 SDValue Ops[] = { AN, getI32Imm(Imm: 1, dl), getI32Imm(Imm: 31, dl),
4559 getI32Imm(Imm: 31, dl) };
4560 CurDAG->SelectNodeTo(N, MachineOpc: PPC::RLWINM, VT: MVT::i32, Ops);
4561 return true;
4562 }
4563 case ISD::SETGT: {
4564 SDValue Ops[] = { Op, getI32Imm(Imm: 1, dl), getI32Imm(Imm: 31, dl),
4565 getI32Imm(Imm: 31, dl) };
4566 Op = SDValue(CurDAG->getMachineNode(Opcode: PPC::RLWINM, dl, VT: MVT::i32, Ops), 0);
4567 CurDAG->SelectNodeTo(N, MachineOpc: PPC::XORI, VT: MVT::i32, Op1: Op, Op2: getI32Imm(Imm: 1, dl));
4568 return true;
4569 }
4570 }
4571 }
4572 }
4573
4574 // Altivec Vector compare instructions do not set any CR register by default and
4575 // vector compare operations return the same type as the operands.
4576 if (!IsStrict && LHS.getValueType().isVector()) {
4577 if (Subtarget->hasSPE())
4578 return false;
4579
4580 EVT VecVT = LHS.getValueType();
4581 // Optimize 'Not equal to zero-vector' comparisons to 'Greater than or
4582 // less than' operators.
4583 // Example: Consider k to be any non-zero positive value.
4584 // * for k != 0, change SETNE to SETUGT (k > 0)
4585 // * for 0 != k, change SETNE to SETULT (0 < k)
4586 if (CC == ISD::SETNE) {
4587 // Only optimize for integer types (avoid FP completely)
4588 if (VecVT.getVectorElementType().isInteger()) {
4589 if (ISD::isBuildVectorAllZeros(N: RHS.getNode()))
4590 CC = ISD::SETUGT;
4591 else if (ISD::isBuildVectorAllZeros(N: LHS.getNode()))
4592 CC = ISD::SETULT;
4593 }
4594 }
4595 bool Swap, Negate;
4596 unsigned int VCmpInst =
4597 getVCmpInst(VecVT: VecVT.getSimpleVT(), CC, HasVSX: Subtarget->hasVSX(), Swap, Negate);
4598 if (Swap)
4599 std::swap(a&: LHS, b&: RHS);
4600
4601 EVT ResVT = VecVT.changeVectorElementTypeToInteger();
4602 if (Negate) {
4603 SDValue VCmp(CurDAG->getMachineNode(Opcode: VCmpInst, dl, VT: ResVT, Op1: LHS, Op2: RHS), 0);
4604 CurDAG->SelectNodeTo(N, MachineOpc: Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
4605 VT: ResVT, Op1: VCmp, Op2: VCmp);
4606 return true;
4607 }
4608
4609 CurDAG->SelectNodeTo(N, MachineOpc: VCmpInst, VT: ResVT, Op1: LHS, Op2: RHS);
4610 return true;
4611 }
4612
4613 if (Subtarget->useCRBits())
4614 return false;
4615
4616 bool Inv;
4617 unsigned Idx = getCRIdxForSetCC(CC, Invert&: Inv);
4618 SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain);
4619 if (IsStrict)
4620 CurDAG->ReplaceAllUsesOfValueWith(From: SDValue(N, 1), To: CCReg.getValue(R: 1));
4621 SDValue IntCR;
4622
4623 // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
4624 // The correct compare instruction is already set by SelectCC()
4625 if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
4626 Idx = 1;
4627 }
4628
4629 // Force the ccreg into CR7.
4630 SDValue CR7Reg = CurDAG->getRegister(Reg: PPC::CR7, VT: MVT::i32);
4631
4632 SDValue InGlue; // Null incoming flag value.
4633 CCReg = CurDAG->getCopyToReg(Chain: CurDAG->getEntryNode(), dl, Reg: CR7Reg, N: CCReg,
4634 Glue: InGlue).getValue(R: 1);
4635
4636 IntCR = SDValue(CurDAG->getMachineNode(Opcode: PPC::MFOCRF, dl, VT: MVT::i32, Op1: CR7Reg,
4637 Op2: CCReg), 0);
4638
4639 SDValue Ops[] = { IntCR, getI32Imm(Imm: (32 - (3 - Idx)) & 31, dl),
4640 getI32Imm(Imm: 31, dl), getI32Imm(Imm: 31, dl) };
4641 if (!Inv) {
4642 CurDAG->SelectNodeTo(N, MachineOpc: PPC::RLWINM, VT: MVT::i32, Ops);
4643 return true;
4644 }
4645
4646 // Get the specified bit.
4647 SDValue Tmp =
4648 SDValue(CurDAG->getMachineNode(Opcode: PPC::RLWINM, dl, VT: MVT::i32, Ops), 0);
4649 CurDAG->SelectNodeTo(N, MachineOpc: PPC::XORI, VT: MVT::i32, Op1: Tmp, Op2: getI32Imm(Imm: 1, dl));
4650 return true;
4651}
4652
4653/// Does this node represent a load/store node whose address can be represented
4654/// with a register plus an immediate that's a multiple of \p Val:
4655bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
4656 LoadSDNode *LDN = dyn_cast<LoadSDNode>(Val: N);
4657 StoreSDNode *STN = dyn_cast<StoreSDNode>(Val: N);
4658 MemIntrinsicSDNode *MIN = dyn_cast<MemIntrinsicSDNode>(Val: N);
4659 SDValue AddrOp;
4660 if (LDN || (MIN && MIN->getOpcode() == PPCISD::LD_SPLAT))
4661 AddrOp = N->getOperand(Num: 1);
4662 else if (STN)
4663 AddrOp = STN->getOperand(Num: 2);
4664
4665 // If the address points a frame object or a frame object with an offset,
4666 // we need to check the object alignment.
4667 short Imm = 0;
4668 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
4669 Val: AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(i: 0) :
4670 AddrOp)) {
4671 // If op0 is a frame index that is under aligned, we can't do it either,
4672 // because it is translated to r31 or r1 + slot + offset. We won't know the
4673 // slot number until the stack frame is finalized.
4674 const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
4675 unsigned SlotAlign = MFI.getObjectAlign(ObjectIdx: FI->getIndex()).value();
4676 if ((SlotAlign % Val) != 0)
4677 return false;
4678
4679 // If we have an offset, we need further check on the offset.
4680 if (AddrOp.getOpcode() != ISD::ADD)
4681 return true;
4682 }
4683
4684 if (AddrOp.getOpcode() == ISD::ADD)
4685 return isIntS16Immediate(Op: AddrOp.getOperand(i: 1), Imm) && !(Imm % Val);
4686
4687 // If the address comes from the outside, the offset will be zero.
4688 return AddrOp.getOpcode() == ISD::CopyFromReg;
4689}
4690
4691void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
4692 // Transfer memoperands.
4693 MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
4694 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Result), NewMemRefs: {MemOp});
4695}
4696
4697static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
4698 bool &NeedSwapOps, bool &IsUnCmp) {
4699
4700 assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
4701
4702 SDValue LHS = N->getOperand(Num: 0);
4703 SDValue RHS = N->getOperand(Num: 1);
4704 SDValue TrueRes = N->getOperand(Num: 2);
4705 SDValue FalseRes = N->getOperand(Num: 3);
4706 ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(Val&: TrueRes);
4707 if (!TrueConst || (N->getSimpleValueType(ResNo: 0) != MVT::i64 &&
4708 N->getSimpleValueType(ResNo: 0) != MVT::i32))
4709 return false;
4710
4711 // We are looking for any of:
4712 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4713 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4714 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
4715 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
4716 int64_t TrueResVal = TrueConst->getSExtValue();
4717 if ((TrueResVal < -1 || TrueResVal > 1) ||
4718 (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
4719 (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
4720 (TrueResVal == 0 &&
4721 (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
4722 return false;
4723
4724 SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC
4725 ? FalseRes
4726 : FalseRes.getOperand(i: 0);
4727 bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC;
4728 if (SetOrSelCC.getOpcode() != ISD::SETCC &&
4729 SetOrSelCC.getOpcode() != ISD::SELECT_CC)
4730 return false;
4731
4732 // Without this setb optimization, the outer SELECT_CC will be manually
4733 // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
4734 // transforms pseudo instruction to isel instruction. When there are more than
4735 // one use for result like zext/sext, with current optimization we only see
4736 // isel is replaced by setb but can't see any significant gain. Since
4737 // setb has longer latency than original isel, we should avoid this. Another
4738 // point is that setb requires comparison always kept, it can break the
4739 // opportunity to get the comparison away if we have in future.
4740 if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
4741 return false;
4742
4743 SDValue InnerLHS = SetOrSelCC.getOperand(i: 0);
4744 SDValue InnerRHS = SetOrSelCC.getOperand(i: 1);
4745 ISD::CondCode InnerCC =
4746 cast<CondCodeSDNode>(Val: SetOrSelCC.getOperand(i: InnerIsSel ? 4 : 2))->get();
4747 // If the inner comparison is a select_cc, make sure the true/false values are
4748 // 1/-1 and canonicalize it if needed.
4749 if (InnerIsSel) {
4750 ConstantSDNode *SelCCTrueConst =
4751 dyn_cast<ConstantSDNode>(Val: SetOrSelCC.getOperand(i: 2));
4752 ConstantSDNode *SelCCFalseConst =
4753 dyn_cast<ConstantSDNode>(Val: SetOrSelCC.getOperand(i: 3));
4754 if (!SelCCTrueConst || !SelCCFalseConst)
4755 return false;
4756 int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
4757 int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
4758 // The values must be -1/1 (requiring a swap) or 1/-1.
4759 if (SelCCTVal == -1 && SelCCFVal == 1) {
4760 std::swap(a&: InnerLHS, b&: InnerRHS);
4761 } else if (SelCCTVal != 1 || SelCCFVal != -1)
4762 return false;
4763 }
4764
4765 // Canonicalize unsigned case
4766 if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
4767 IsUnCmp = true;
4768 InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
4769 }
4770
4771 bool InnerSwapped = false;
4772 if (LHS == InnerRHS && RHS == InnerLHS)
4773 InnerSwapped = true;
4774 else if (LHS != InnerLHS || RHS != InnerRHS)
4775 return false;
4776
4777 switch (CC) {
4778 // (select_cc lhs, rhs, 0, \
4779 // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
4780 case ISD::SETEQ:
4781 if (!InnerIsSel)
4782 return false;
4783 if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
4784 return false;
4785 NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
4786 break;
4787
4788 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4789 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
4790 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
4791 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4792 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
4793 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
4794 case ISD::SETULT:
4795 if (!IsUnCmp && InnerCC != ISD::SETNE)
4796 return false;
4797 IsUnCmp = true;
4798 [[fallthrough]];
4799 case ISD::SETLT:
4800 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
4801 (InnerCC == ISD::SETLT && InnerSwapped))
4802 NeedSwapOps = (TrueResVal == 1);
4803 else
4804 return false;
4805 break;
4806
4807 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4808 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
4809 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
4810 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4811 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
4812 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
4813 case ISD::SETUGT:
4814 if (!IsUnCmp && InnerCC != ISD::SETNE)
4815 return false;
4816 IsUnCmp = true;
4817 [[fallthrough]];
4818 case ISD::SETGT:
4819 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
4820 (InnerCC == ISD::SETGT && InnerSwapped))
4821 NeedSwapOps = (TrueResVal == -1);
4822 else
4823 return false;
4824 break;
4825
4826 default:
4827 return false;
4828 }
4829
4830 LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
4831 LLVM_DEBUG(N->dump());
4832
4833 return true;
4834}
4835
4836// Return true if it's a software square-root/divide operand.
4837static bool isSWTestOp(SDValue N) {
4838 if (N.getOpcode() == PPCISD::FTSQRT)
4839 return true;
4840 if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(Val: N.getOperand(i: 0)) ||
4841 N.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
4842 return false;
4843 switch (N.getConstantOperandVal(i: 0)) {
4844 case Intrinsic::ppc_vsx_xvtdivdp:
4845 case Intrinsic::ppc_vsx_xvtdivsp:
4846 case Intrinsic::ppc_vsx_xvtsqrtdp:
4847 case Intrinsic::ppc_vsx_xvtsqrtsp:
4848 return true;
4849 }
4850 return false;
4851}
4852
4853bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
4854 assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.");
4855 // We are looking for following patterns, where `truncate to i1` actually has
4856 // the same semantic with `and 1`.
4857 // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
4858 // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
4859 // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
4860 // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
4861 // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
4862 // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
4863 // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
4864 // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
4865 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 1))->get();
4866 if (CC != ISD::SETEQ && CC != ISD::SETNE)
4867 return false;
4868
4869 SDValue CmpRHS = N->getOperand(Num: 3);
4870 if (!isNullConstant(V: CmpRHS))
4871 return false;
4872
4873 SDValue CmpLHS = N->getOperand(Num: 2);
4874 if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(N: CmpLHS.getOperand(i: 0)))
4875 return false;
4876
4877 unsigned PCC = 0;
4878 bool IsCCNE = CC == ISD::SETNE;
4879 if (CmpLHS.getOpcode() == ISD::AND &&
4880 isa<ConstantSDNode>(Val: CmpLHS.getOperand(i: 1)))
4881 switch (CmpLHS.getConstantOperandVal(i: 1)) {
4882 case 1:
4883 PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4884 break;
4885 case 2:
4886 PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
4887 break;
4888 case 4:
4889 PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
4890 break;
4891 case 8:
4892 PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
4893 break;
4894 default:
4895 return false;
4896 }
4897 else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
4898 CmpLHS.getValueType() == MVT::i1)
4899 PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4900
4901 if (PCC) {
4902 SDLoc dl(N);
4903 SDValue Ops[] = {getI32Imm(Imm: PCC, dl), CmpLHS.getOperand(i: 0), N->getOperand(Num: 4),
4904 N->getOperand(Num: 0)};
4905 CurDAG->SelectNodeTo(N, MachineOpc: PPC::BCC, VT: MVT::Other, Ops);
4906 return true;
4907 }
4908 return false;
4909}
4910
4911bool PPCDAGToDAGISel::trySelectLoopCountIntrinsic(SDNode *N) {
4912 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
4913 // value, for example when crbits is disabled. If so, select the
4914 // loop_decrement intrinsics now.
4915 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 1))->get();
4916 SDValue LHS = N->getOperand(Num: 2), RHS = N->getOperand(Num: 3);
4917
4918 if (LHS.getOpcode() != ISD::AND || !isa<ConstantSDNode>(Val: LHS.getOperand(i: 1)) ||
4919 isNullConstant(V: LHS.getOperand(i: 1)))
4920 return false;
4921
4922 if (LHS.getOperand(i: 0).getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4923 LHS.getOperand(i: 0).getConstantOperandVal(i: 1) != Intrinsic::loop_decrement)
4924 return false;
4925
4926 if (!isa<ConstantSDNode>(Val: RHS))
4927 return false;
4928
4929 assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
4930 "Counter decrement comparison is not EQ or NE");
4931
4932 SDValue OldDecrement = LHS.getOperand(i: 0);
4933 assert(OldDecrement.hasOneUse() && "loop decrement has more than one use!");
4934
4935 SDLoc DecrementLoc(OldDecrement);
4936 SDValue ChainInput = OldDecrement.getOperand(i: 0);
4937 SDValue DecrementOps[] = {Subtarget->isPPC64() ? getI64Imm(Imm: 1, dl: DecrementLoc)
4938 : getI32Imm(Imm: 1, dl: DecrementLoc)};
4939 unsigned DecrementOpcode =
4940 Subtarget->isPPC64() ? PPC::DecreaseCTR8loop : PPC::DecreaseCTRloop;
4941 SDNode *NewDecrement = CurDAG->getMachineNode(Opcode: DecrementOpcode, dl: DecrementLoc,
4942 VT: MVT::i1, Ops: DecrementOps);
4943
4944 unsigned Val = RHS->getAsZExtVal();
4945 bool IsBranchOnTrue = (CC == ISD::SETEQ && Val) || (CC == ISD::SETNE && !Val);
4946 unsigned Opcode = IsBranchOnTrue ? PPC::BC : PPC::BCn;
4947
4948 ReplaceUses(F: LHS.getValue(R: 0), T: LHS.getOperand(i: 1));
4949 CurDAG->RemoveDeadNode(N: LHS.getNode());
4950
4951 // Mark the old loop_decrement intrinsic as dead.
4952 ReplaceUses(F: OldDecrement.getValue(R: 1), T: ChainInput);
4953 CurDAG->RemoveDeadNode(N: OldDecrement.getNode());
4954
4955 SDValue Chain = CurDAG->getNode(Opcode: ISD::TokenFactor, DL: SDLoc(N), VT: MVT::Other,
4956 N1: ChainInput, N2: N->getOperand(Num: 0));
4957
4958 CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VT: MVT::Other, Op1: SDValue(NewDecrement, 0),
4959 Op2: N->getOperand(Num: 4), Op3: Chain);
4960 return true;
4961}
4962
4963bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
4964 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4965 unsigned Imm;
4966 if (!isInt32Immediate(N: N->getOperand(Num: 1), Imm))
4967 return false;
4968
4969 SDLoc dl(N);
4970 SDValue Val = N->getOperand(Num: 0);
4971 unsigned SH, MB, ME;
4972 // If this is an and of a value rotated between 0 and 31 bits and then and'd
4973 // with a mask, emit rlwinm
4974 if (isRotateAndMask(N: Val.getNode(), Mask: Imm, isShiftMask: false, SH, MB, ME)) {
4975 Val = Val.getOperand(i: 0);
4976 SDValue Ops[] = {Val, getI32Imm(Imm: SH, dl), getI32Imm(Imm: MB, dl),
4977 getI32Imm(Imm: ME, dl)};
4978 CurDAG->SelectNodeTo(N, MachineOpc: PPC::RLWINM, VT: MVT::i32, Ops);
4979 return true;
4980 }
4981
4982 // If this is just a masked value where the input is not handled, and
4983 // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4984 if (isRunOfOnes(Val: Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) {
4985 // The result of LBARX/LHARX do not need to be cleared as the instructions
4986 // implicitly clear the upper bits.
4987 unsigned AlreadyCleared = 0;
4988 if (Val.getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4989 auto IntrinsicID = Val.getConstantOperandVal(i: 1);
4990 if (IntrinsicID == Intrinsic::ppc_lbarx)
4991 AlreadyCleared = 24;
4992 else if (IntrinsicID == Intrinsic::ppc_lharx)
4993 AlreadyCleared = 16;
4994 if (AlreadyCleared != 0 && AlreadyCleared == MB && ME == 31) {
4995 ReplaceUses(F: SDValue(N, 0), T: N->getOperand(Num: 0));
4996 return true;
4997 }
4998 }
4999
5000 SDValue Ops[] = {Val, getI32Imm(Imm: 0, dl), getI32Imm(Imm: MB, dl),
5001 getI32Imm(Imm: ME, dl)};
5002 CurDAG->SelectNodeTo(N, MachineOpc: PPC::RLWINM, VT: MVT::i32, Ops);
5003 return true;
5004 }
5005
5006 // AND X, 0 -> 0, not "rlwinm 32".
5007 if (Imm == 0) {
5008 ReplaceUses(F: SDValue(N, 0), T: N->getOperand(Num: 1));
5009 return true;
5010 }
5011
5012 return false;
5013}
5014
5015bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
5016 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5017 uint64_t Imm64;
5018 if (!isInt64Immediate(N: N->getOperand(Num: 1).getNode(), Imm&: Imm64))
5019 return false;
5020
5021 unsigned MB, ME;
5022 if (isRunOfOnes64(Val: Imm64, MB, ME) && MB >= 32 && MB <= ME) {
5023 // MB ME
5024 // +----------------------+
5025 // |xxxxxxxxxxx00011111000|
5026 // +----------------------+
5027 // 0 32 64
5028 // We can only do it if the MB is larger than 32 and MB <= ME
5029 // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even
5030 // we didn't rotate it.
5031 SDLoc dl(N);
5032 SDValue Ops[] = {N->getOperand(Num: 0), getI64Imm(Imm: 0, dl), getI64Imm(Imm: MB - 32, dl),
5033 getI64Imm(Imm: ME - 32, dl)};
5034 CurDAG->SelectNodeTo(N, MachineOpc: PPC::RLWINM8, VT: MVT::i64, Ops);
5035 return true;
5036 }
5037
5038 return false;
5039}
5040
5041bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
5042 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5043 uint64_t Imm64;
5044 if (!isInt64Immediate(N: N->getOperand(Num: 1).getNode(), Imm&: Imm64))
5045 return false;
5046
5047 // Do nothing if it is 16-bit imm as the pattern in the .td file handle
5048 // it well with "andi.".
5049 if (isUInt<16>(x: Imm64))
5050 return false;
5051
5052 SDLoc Loc(N);
5053 SDValue Val = N->getOperand(Num: 0);
5054
5055 // Optimized with two rldicl's as follows:
5056 // Add missing bits on left to the mask and check that the mask is a
5057 // wrapped run of ones, i.e.
5058 // Change pattern |0001111100000011111111|
5059 // to |1111111100000011111111|.
5060 unsigned NumOfLeadingZeros = llvm::countl_zero(Val: Imm64);
5061 if (NumOfLeadingZeros != 0)
5062 Imm64 |= maskLeadingOnes<uint64_t>(N: NumOfLeadingZeros);
5063
5064 unsigned MB, ME;
5065 if (!isRunOfOnes64(Val: Imm64, MB, ME))
5066 return false;
5067
5068 // ME MB MB-ME+63
5069 // +----------------------+ +----------------------+
5070 // |1111111100000011111111| -> |0000001111111111111111|
5071 // +----------------------+ +----------------------+
5072 // 0 63 0 63
5073 // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
5074 unsigned OnesOnLeft = ME + 1;
5075 unsigned ZerosInBetween = (MB - ME + 63) & 63;
5076 // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
5077 // on the left the bits that are already zeros in the mask.
5078 Val = SDValue(CurDAG->getMachineNode(Opcode: PPC::RLDICL, dl: Loc, VT: MVT::i64, Op1: Val,
5079 Op2: getI64Imm(Imm: OnesOnLeft, dl: Loc),
5080 Op3: getI64Imm(Imm: ZerosInBetween, dl: Loc)),
5081 0);
5082 // MB-ME+63 ME MB
5083 // +----------------------+ +----------------------+
5084 // |0000001111111111111111| -> |0001111100000011111111|
5085 // +----------------------+ +----------------------+
5086 // 0 63 0 63
5087 // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
5088 // left the number of ones we previously added.
5089 SDValue Ops[] = {Val, getI64Imm(Imm: 64 - OnesOnLeft, dl: Loc),
5090 getI64Imm(Imm: NumOfLeadingZeros, dl: Loc)};
5091 CurDAG->SelectNodeTo(N, MachineOpc: PPC::RLDICL, VT: MVT::i64, Ops);
5092 return true;
5093}
5094
5095bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
5096 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5097 unsigned Imm;
5098 if (!isInt32Immediate(N: N->getOperand(Num: 1), Imm))
5099 return false;
5100
5101 SDValue Val = N->getOperand(Num: 0);
5102 unsigned Imm2;
5103 // ISD::OR doesn't get all the bitfield insertion fun.
5104 // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
5105 // bitfield insert.
5106 if (Val.getOpcode() != ISD::OR || !isInt32Immediate(N: Val.getOperand(i: 1), Imm&: Imm2))
5107 return false;
5108
5109 // The idea here is to check whether this is equivalent to:
5110 // (c1 & m) | (x & ~m)
5111 // where m is a run-of-ones mask. The logic here is that, for each bit in
5112 // c1 and c2:
5113 // - if both are 1, then the output will be 1.
5114 // - if both are 0, then the output will be 0.
5115 // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
5116 // come from x.
5117 // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
5118 // be 0.
5119 // If that last condition is never the case, then we can form m from the
5120 // bits that are the same between c1 and c2.
5121 unsigned MB, ME;
5122 if (isRunOfOnes(Val: ~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) {
5123 SDLoc dl(N);
5124 SDValue Ops[] = {Val.getOperand(i: 0), Val.getOperand(i: 1), getI32Imm(Imm: 0, dl),
5125 getI32Imm(Imm: MB, dl), getI32Imm(Imm: ME, dl)};
5126 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: PPC::RLWIMI, dl, VT: MVT::i32, Ops));
5127 return true;
5128 }
5129
5130 return false;
5131}
5132
5133bool PPCDAGToDAGISel::tryAsSingleRLDCL(SDNode *N) {
5134 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5135
5136 uint64_t Imm64;
5137 if (!isInt64Immediate(N: N->getOperand(Num: 1).getNode(), Imm&: Imm64) || !isMask_64(Value: Imm64))
5138 return false;
5139
5140 SDValue Val = N->getOperand(Num: 0);
5141
5142 if (Val.getOpcode() != ISD::ROTL)
5143 return false;
5144
5145 // Looking to try to avoid a situation like this one:
5146 // %2 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 23)
5147 // %and1 = and i64 %2, 9223372036854775807
5148 // In this function we are looking to try to match RLDCL. However, the above
5149 // DAG would better match RLDICL instead which is not what we are looking
5150 // for here.
5151 SDValue RotateAmt = Val.getOperand(i: 1);
5152 if (RotateAmt.getOpcode() == ISD::Constant)
5153 return false;
5154
5155 unsigned MB = 64 - llvm::countr_one(Value: Imm64);
5156 SDLoc dl(N);
5157 SDValue Ops[] = {Val.getOperand(i: 0), RotateAmt, getI32Imm(Imm: MB, dl)};
5158 CurDAG->SelectNodeTo(N, MachineOpc: PPC::RLDCL, VT: MVT::i64, Ops);
5159 return true;
5160}
5161
5162bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
5163 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5164 uint64_t Imm64;
5165 if (!isInt64Immediate(N: N->getOperand(Num: 1).getNode(), Imm&: Imm64) || !isMask_64(Value: Imm64))
5166 return false;
5167
5168 // If this is a 64-bit zero-extension mask, emit rldicl.
5169 unsigned MB = 64 - llvm::countr_one(Value: Imm64);
5170 unsigned SH = 0;
5171 unsigned Imm;
5172 SDValue Val = N->getOperand(Num: 0);
5173 SDLoc dl(N);
5174
5175 if (Val.getOpcode() == ISD::ANY_EXTEND) {
5176 auto Op0 = Val.getOperand(i: 0);
5177 if (Op0.getOpcode() == ISD::SRL &&
5178 isInt32Immediate(N: Op0.getOperand(i: 1).getNode(), Imm) && Imm <= MB) {
5179
5180 auto ResultType = Val.getNode()->getValueType(ResNo: 0);
5181 auto ImDef = CurDAG->getMachineNode(Opcode: PPC::IMPLICIT_DEF, dl, VT: ResultType);
5182 SDValue IDVal(ImDef, 0);
5183
5184 Val = SDValue(CurDAG->getMachineNode(Opcode: PPC::INSERT_SUBREG, dl, VT: ResultType,
5185 Op1: IDVal, Op2: Op0.getOperand(i: 0),
5186 Op3: getI32Imm(Imm: 1, dl)),
5187 0);
5188 SH = 64 - Imm;
5189 }
5190 }
5191
5192 // If the operand is a logical right shift, we can fold it into this
5193 // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
5194 // for n <= mb. The right shift is really a left rotate followed by a
5195 // mask, and this mask is a more-restrictive sub-mask of the mask implied
5196 // by the shift.
5197 if (Val.getOpcode() == ISD::SRL &&
5198 isInt32Immediate(N: Val.getOperand(i: 1).getNode(), Imm) && Imm <= MB) {
5199 assert(Imm < 64 && "Illegal shift amount");
5200 Val = Val.getOperand(i: 0);
5201 SH = 64 - Imm;
5202 }
5203
5204 SDValue Ops[] = {Val, getI32Imm(Imm: SH, dl), getI32Imm(Imm: MB, dl)};
5205 CurDAG->SelectNodeTo(N, MachineOpc: PPC::RLDICL, VT: MVT::i64, Ops);
5206 return true;
5207}
5208
5209bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) {
5210 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5211 uint64_t Imm64;
5212 if (!isInt64Immediate(N: N->getOperand(Num: 1).getNode(), Imm&: Imm64) ||
5213 !isMask_64(Value: ~Imm64))
5214 return false;
5215
5216 // If this is a negated 64-bit zero-extension mask,
5217 // i.e. the immediate is a sequence of ones from most significant side
5218 // and all zero for reminder, we should use rldicr.
5219 unsigned MB = 63 - llvm::countr_one(Value: ~Imm64);
5220 unsigned SH = 0;
5221 SDLoc dl(N);
5222 SDValue Ops[] = {N->getOperand(Num: 0), getI32Imm(Imm: SH, dl), getI32Imm(Imm: MB, dl)};
5223 CurDAG->SelectNodeTo(N, MachineOpc: PPC::RLDICR, VT: MVT::i64, Ops);
5224 return true;
5225}
5226
5227bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) {
5228 assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected");
5229 uint64_t Imm64;
5230 unsigned MB, ME;
5231 SDValue N0 = N->getOperand(Num: 0);
5232
5233 // We won't get fewer instructions if the imm is 32-bit integer.
5234 // rldimi requires the imm to have consecutive ones with both sides zero.
5235 // Also, make sure the first Op has only one use, otherwise this may increase
5236 // register pressure since rldimi is destructive.
5237 if (!isInt64Immediate(N: N->getOperand(Num: 1).getNode(), Imm&: Imm64) ||
5238 isUInt<32>(x: Imm64) || !isRunOfOnes64(Val: Imm64, MB, ME) || !N0.hasOneUse())
5239 return false;
5240
5241 unsigned SH = 63 - ME;
5242 SDLoc Dl(N);
5243 // Use select64Imm for making LI instr instead of directly putting Imm64
5244 SDValue Ops[] = {
5245 N->getOperand(Num: 0),
5246 SDValue(selectI64Imm(CurDAG, N: getI64Imm(Imm: -1, dl: Dl).getNode()), 0),
5247 getI32Imm(Imm: SH, dl: Dl), getI32Imm(Imm: MB, dl: Dl)};
5248 CurDAG->SelectNodeTo(N, MachineOpc: PPC::RLDIMI, VT: MVT::i64, Ops);
5249 return true;
5250}
5251
5252// Select - Convert the specified operand from a target-independent to a
5253// target-specific node if it hasn't already been changed.
5254void PPCDAGToDAGISel::Select(SDNode *N) {
5255 SDLoc dl(N);
5256 if (N->isMachineOpcode()) {
5257 N->setNodeId(-1);
5258 return; // Already selected.
5259 }
5260
5261 // In case any misguided DAG-level optimizations form an ADD with a
5262 // TargetConstant operand, crash here instead of miscompiling (by selecting
5263 // an r+r add instead of some kind of r+i add).
5264 if (N->getOpcode() == ISD::ADD &&
5265 N->getOperand(Num: 1).getOpcode() == ISD::TargetConstant)
5266 llvm_unreachable("Invalid ADD with TargetConstant operand");
5267
5268 // Try matching complex bit permutations before doing anything else.
5269 if (tryBitPermutation(N))
5270 return;
5271
5272 // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
5273 if (tryIntCompareInGPR(N))
5274 return;
5275
5276 switch (N->getOpcode()) {
5277 default: break;
5278
5279 case ISD::Constant:
5280 if (N->getValueType(ResNo: 0) == MVT::i64) {
5281 ReplaceNode(F: N, T: selectI64Imm(CurDAG, N));
5282 return;
5283 }
5284 break;
5285
5286 case ISD::INTRINSIC_VOID: {
5287 auto IntrinsicID = N->getConstantOperandVal(Num: 1);
5288 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
5289 IntrinsicID != Intrinsic::ppc_trapd &&
5290 IntrinsicID != Intrinsic::ppc_trap)
5291 break;
5292 unsigned Opcode = (IntrinsicID == Intrinsic::ppc_tdw ||
5293 IntrinsicID == Intrinsic::ppc_trapd)
5294 ? PPC::TDI
5295 : PPC::TWI;
5296 SmallVector<SDValue, 4> OpsWithMD;
5297 unsigned MDIndex;
5298 if (IntrinsicID == Intrinsic::ppc_tdw ||
5299 IntrinsicID == Intrinsic::ppc_tw) {
5300 SDValue Ops[] = {N->getOperand(Num: 4), N->getOperand(Num: 2), N->getOperand(Num: 3)};
5301 int16_t SImmOperand2;
5302 int16_t SImmOperand3;
5303 int16_t SImmOperand4;
5304 bool isOperand2IntS16Immediate =
5305 isIntS16Immediate(Op: N->getOperand(Num: 2), Imm&: SImmOperand2);
5306 bool isOperand3IntS16Immediate =
5307 isIntS16Immediate(Op: N->getOperand(Num: 3), Imm&: SImmOperand3);
5308 // We will emit PPC::TD or PPC::TW if the 2nd and 3rd operands are reg +
5309 // reg or imm + imm. The imm + imm form will be optimized to either an
5310 // unconditional trap or a nop in a later pass.
5311 if (isOperand2IntS16Immediate == isOperand3IntS16Immediate)
5312 Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TD : PPC::TW;
5313 else if (isOperand3IntS16Immediate)
5314 // The 2nd and 3rd operands are reg + imm.
5315 Ops[2] = getI32Imm(Imm: int(SImmOperand3) & 0xFFFF, dl);
5316 else {
5317 // The 2nd and 3rd operands are imm + reg.
5318 bool isOperand4IntS16Immediate =
5319 isIntS16Immediate(Op: N->getOperand(Num: 4), Imm&: SImmOperand4);
5320 (void)isOperand4IntS16Immediate;
5321 assert(isOperand4IntS16Immediate &&
5322 "The 4th operand is not an Immediate");
5323 // We need to flip the condition immediate TO.
5324 int16_t TO = int(SImmOperand4) & 0x1F;
5325 // We swap the first and second bit of TO if they are not same.
5326 if ((TO & 0x1) != ((TO & 0x2) >> 1))
5327 TO = (TO & 0x1) ? TO + 1 : TO - 1;
5328 // We swap the fourth and fifth bit of TO if they are not same.
5329 if ((TO & 0x8) != ((TO & 0x10) >> 1))
5330 TO = (TO & 0x8) ? TO + 8 : TO - 8;
5331 Ops[0] = getI32Imm(Imm: TO, dl);
5332 Ops[1] = N->getOperand(Num: 3);
5333 Ops[2] = getI32Imm(Imm: int(SImmOperand2) & 0xFFFF, dl);
5334 }
5335 OpsWithMD = {Ops[0], Ops[1], Ops[2]};
5336 MDIndex = 5;
5337 } else {
5338 OpsWithMD = {getI32Imm(Imm: 24, dl), N->getOperand(Num: 2), getI32Imm(Imm: 0, dl)};
5339 MDIndex = 3;
5340 }
5341
5342 if (N->getNumOperands() > MDIndex) {
5343 SDValue MDV = N->getOperand(Num: MDIndex);
5344 const MDNode *MD = cast<MDNodeSDNode>(Val&: MDV)->getMD();
5345 assert(MD->getNumOperands() != 0 && "Empty MDNode in operands!");
5346 assert((isa<MDString>(MD->getOperand(0)) &&
5347 cast<MDString>(MD->getOperand(0))->getString() ==
5348 "ppc-trap-reason") &&
5349 "Unsupported annotation data type!");
5350 for (unsigned i = 1; i < MD->getNumOperands(); i++) {
5351 assert(isa<MDString>(MD->getOperand(i)) &&
5352 "Invalid data type for annotation ppc-trap-reason!");
5353 OpsWithMD.push_back(
5354 Elt: getI32Imm(Imm: std::stoi(str: cast<MDString>(
5355 Val: MD->getOperand(I: i))->getString().str()), dl));
5356 }
5357 }
5358 OpsWithMD.push_back(Elt: N->getOperand(Num: 0)); // chain
5359 CurDAG->SelectNodeTo(N, MachineOpc: Opcode, VT: MVT::Other, Ops: OpsWithMD);
5360 return;
5361 }
5362
5363 case ISD::INTRINSIC_WO_CHAIN: {
5364 // We emit the PPC::FSELS instruction here because of type conflicts with
5365 // the comparison operand. The FSELS instruction is defined to use an 8-byte
5366 // comparison like the FSELD version. The fsels intrinsic takes a 4-byte
5367 // value for the comparison. When selecting through a .td file, a type
5368 // error is raised. Must check this first so we never break on the
5369 // !Subtarget->isISA3_1() check.
5370 auto IntID = N->getConstantOperandVal(Num: 0);
5371 if (IntID == Intrinsic::ppc_fsels) {
5372 SDValue Ops[] = {N->getOperand(Num: 1), N->getOperand(Num: 2), N->getOperand(Num: 3)};
5373 CurDAG->SelectNodeTo(N, MachineOpc: PPC::FSELS, VT: MVT::f32, Ops);
5374 return;
5375 }
5376
5377 if (IntID == Intrinsic::ppc_bcdadd_p || IntID == Intrinsic::ppc_bcdsub_p) {
5378 auto Pred = N->getConstantOperandVal(Num: 1);
5379 unsigned Opcode =
5380 IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec;
5381 unsigned SubReg = 0;
5382 unsigned ShiftVal = 0;
5383 bool Reverse = false;
5384 switch (Pred) {
5385 case 0:
5386 SubReg = PPC::sub_eq;
5387 ShiftVal = 1;
5388 break;
5389 case 1:
5390 SubReg = PPC::sub_eq;
5391 ShiftVal = 1;
5392 Reverse = true;
5393 break;
5394 case 2:
5395 SubReg = PPC::sub_lt;
5396 ShiftVal = 3;
5397 break;
5398 case 3:
5399 SubReg = PPC::sub_lt;
5400 ShiftVal = 3;
5401 Reverse = true;
5402 break;
5403 case 4:
5404 SubReg = PPC::sub_gt;
5405 ShiftVal = 2;
5406 break;
5407 case 5:
5408 SubReg = PPC::sub_gt;
5409 ShiftVal = 2;
5410 Reverse = true;
5411 break;
5412 case 6:
5413 SubReg = PPC::sub_un;
5414 break;
5415 case 7:
5416 SubReg = PPC::sub_un;
5417 Reverse = true;
5418 break;
5419 }
5420
5421 EVT VTs[] = {MVT::v16i8, MVT::Glue};
5422 SDValue Ops[] = {N->getOperand(Num: 2), N->getOperand(Num: 3),
5423 CurDAG->getTargetConstant(Val: 0, DL: dl, VT: MVT::i32)};
5424 SDValue BCDOp = SDValue(CurDAG->getMachineNode(Opcode, dl, ResultTys: VTs, Ops), 0);
5425 SDValue CR6Reg = CurDAG->getRegister(Reg: PPC::CR6, VT: MVT::i32);
5426 // On Power10, we can use SETBC[R]. On prior architectures, we have to use
5427 // MFOCRF and shift/negate the value.
5428 if (Subtarget->isISA3_1()) {
5429 SDValue SubRegIdx = CurDAG->getTargetConstant(Val: SubReg, DL: dl, VT: MVT::i32);
5430 SDValue CRBit = SDValue(
5431 CurDAG->getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::i1,
5432 Op1: CR6Reg, Op2: SubRegIdx, Op3: BCDOp.getValue(R: 1)),
5433 0);
5434 CurDAG->SelectNodeTo(N, MachineOpc: Reverse ? PPC::SETBCR : PPC::SETBC, VT: MVT::i32,
5435 Op1: CRBit);
5436 } else {
5437 SDValue Move =
5438 SDValue(CurDAG->getMachineNode(Opcode: PPC::MFOCRF, dl, VT: MVT::i32, Op1: CR6Reg,
5439 Op2: BCDOp.getValue(R: 1)),
5440 0);
5441 SDValue Ops[] = {Move, getI32Imm(Imm: (32 - (4 + ShiftVal)) & 31, dl),
5442 getI32Imm(Imm: 31, dl), getI32Imm(Imm: 31, dl)};
5443 if (!Reverse)
5444 CurDAG->SelectNodeTo(N, MachineOpc: PPC::RLWINM, VT: MVT::i32, Ops);
5445 else {
5446 SDValue Shift = SDValue(
5447 CurDAG->getMachineNode(Opcode: PPC::RLWINM, dl, VT: MVT::i32, Ops), 0);
5448 CurDAG->SelectNodeTo(N, MachineOpc: PPC::XORI, VT: MVT::i32, Op1: Shift, Op2: getI32Imm(Imm: 1, dl));
5449 }
5450 }
5451 return;
5452 }
5453
5454 if (!Subtarget->isISA3_1())
5455 break;
5456 unsigned Opcode = 0;
5457 switch (IntID) {
5458 default:
5459 break;
5460 case Intrinsic::ppc_altivec_vstribr_p:
5461 Opcode = PPC::VSTRIBR_rec;
5462 break;
5463 case Intrinsic::ppc_altivec_vstribl_p:
5464 Opcode = PPC::VSTRIBL_rec;
5465 break;
5466 case Intrinsic::ppc_altivec_vstrihr_p:
5467 Opcode = PPC::VSTRIHR_rec;
5468 break;
5469 case Intrinsic::ppc_altivec_vstrihl_p:
5470 Opcode = PPC::VSTRIHL_rec;
5471 break;
5472 }
5473 if (!Opcode)
5474 break;
5475
5476 // Generate the appropriate vector string isolate intrinsic to match.
5477 EVT VTs[] = {MVT::v16i8, MVT::Glue};
5478 SDValue VecStrOp =
5479 SDValue(CurDAG->getMachineNode(Opcode, dl, ResultTys: VTs, Ops: N->getOperand(Num: 2)), 0);
5480 // Vector string isolate instructions update the EQ bit of CR6.
5481 // Generate a SETBC instruction to extract the bit and place it in a GPR.
5482 SDValue SubRegIdx = CurDAG->getTargetConstant(Val: PPC::sub_eq, DL: dl, VT: MVT::i32);
5483 SDValue CR6Reg = CurDAG->getRegister(Reg: PPC::CR6, VT: MVT::i32);
5484 SDValue CRBit = SDValue(
5485 CurDAG->getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::i1,
5486 Op1: CR6Reg, Op2: SubRegIdx, Op3: VecStrOp.getValue(R: 1)),
5487 0);
5488 CurDAG->SelectNodeTo(N, MachineOpc: PPC::SETBC, VT: MVT::i32, Op1: CRBit);
5489 return;
5490 }
5491
5492 case ISD::SETCC:
5493 case ISD::STRICT_FSETCC:
5494 case ISD::STRICT_FSETCCS:
5495 if (trySETCC(N))
5496 return;
5497 break;
5498 // These nodes will be transformed into GETtlsADDR32 node, which
5499 // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
5500 case PPCISD::ADDI_TLSLD_L_ADDR:
5501 case PPCISD::ADDI_TLSGD_L_ADDR: {
5502 const Module *Mod = MF->getFunction().getParent();
5503 if (PPCLowering->getPointerTy(DL: CurDAG->getDataLayout()) != MVT::i32 ||
5504 !Subtarget->isSecurePlt() || !Subtarget->isTargetELF() ||
5505 Mod->getPICLevel() == PICLevel::SmallPIC)
5506 break;
5507 // Attach global base pointer on GETtlsADDR32 node in order to
5508 // generate secure plt code for TLS symbols.
5509 getGlobalBaseReg();
5510 } break;
5511 case PPCISD::CALL:
5512 case PPCISD::CALL_RM: {
5513 if (Subtarget->isPPC64() || !TM.isPositionIndependent() ||
5514 !Subtarget->isSecurePlt() || !Subtarget->isTargetELF())
5515 break;
5516
5517 SDValue Op = N->getOperand(Num: 1);
5518
5519 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op)) {
5520 if (GA->getTargetFlags() == PPCII::MO_PLT)
5521 getGlobalBaseReg();
5522 }
5523 else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Val&: Op)) {
5524 if (ES->getTargetFlags() == PPCII::MO_PLT)
5525 getGlobalBaseReg();
5526 }
5527 } break;
5528
5529 case PPCISD::GlobalBaseReg:
5530 ReplaceNode(F: N, T: getGlobalBaseReg());
5531 return;
5532
5533 case ISD::FrameIndex:
5534 selectFrameIndex(SN: N, N);
5535 return;
5536
5537 case PPCISD::MFOCRF: {
5538 SDValue InGlue = N->getOperand(Num: 1);
5539 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: PPC::MFOCRF, dl, VT: MVT::i32,
5540 Op1: N->getOperand(Num: 0), Op2: InGlue));
5541 return;
5542 }
5543
5544 case PPCISD::READ_TIME_BASE:
5545 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: PPC::ReadTB, dl, VT1: MVT::i32, VT2: MVT::i32,
5546 VT3: MVT::Other, Ops: N->getOperand(Num: 0)));
5547 return;
5548
5549 case PPCISD::SRA_ADDZE: {
5550 SDValue N0 = N->getOperand(Num: 0);
5551 SDValue ShiftAmt =
5552 CurDAG->getTargetConstant(Val: *cast<ConstantSDNode>(Val: N->getOperand(Num: 1))->
5553 getConstantIntValue(), DL: dl,
5554 VT: N->getValueType(ResNo: 0));
5555 if (N->getValueType(ResNo: 0) == MVT::i64) {
5556 SDNode *Op =
5557 CurDAG->getMachineNode(Opcode: PPC::SRADI, dl, VT1: MVT::i64, VT2: MVT::Glue,
5558 Op1: N0, Op2: ShiftAmt);
5559 CurDAG->SelectNodeTo(N, MachineOpc: PPC::ADDZE8, VT: MVT::i64, Op1: SDValue(Op, 0),
5560 Op2: SDValue(Op, 1));
5561 return;
5562 } else {
5563 assert(N->getValueType(0) == MVT::i32 &&
5564 "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
5565 SDNode *Op =
5566 CurDAG->getMachineNode(Opcode: PPC::SRAWI, dl, VT1: MVT::i32, VT2: MVT::Glue,
5567 Op1: N0, Op2: ShiftAmt);
5568 CurDAG->SelectNodeTo(N, MachineOpc: PPC::ADDZE, VT: MVT::i32, Op1: SDValue(Op, 0),
5569 Op2: SDValue(Op, 1));
5570 return;
5571 }
5572 }
5573
5574 case ISD::STORE: {
5575 // Change TLS initial-exec (or TLS local-exec on AIX) D-form stores to
5576 // X-form stores.
5577 StoreSDNode *ST = cast<StoreSDNode>(Val: N);
5578 if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()) &&
5579 ST->getAddressingMode() != ISD::PRE_INC)
5580 if (tryTLSXFormStore(ST))
5581 return;
5582 break;
5583 }
5584 case ISD::LOAD: {
5585 // Handle preincrement loads.
5586 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
5587 EVT LoadedVT = LD->getMemoryVT();
5588
5589 // Normal loads are handled by code generated from the .td file.
5590 if (LD->getAddressingMode() != ISD::PRE_INC) {
5591 // Change TLS initial-exec (or TLS local-exec on AIX) D-form loads to
5592 // X-form loads.
5593 if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()))
5594 if (tryTLSXFormLoad(LD))
5595 return;
5596 break;
5597 }
5598
5599 SDValue Offset = LD->getOffset();
5600 if (Offset.getOpcode() == ISD::TargetConstant ||
5601 Offset.getOpcode() == ISD::TargetGlobalAddress) {
5602
5603 unsigned Opcode;
5604 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
5605 if (LD->getValueType(ResNo: 0) != MVT::i64) {
5606 // Handle PPC32 integer and normal FP loads.
5607 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5608 switch (LoadedVT.getSimpleVT().SimpleTy) {
5609 default: llvm_unreachable("Invalid PPC load type!");
5610 case MVT::f64: Opcode = PPC::LFDU; break;
5611 case MVT::f32: Opcode = PPC::LFSU; break;
5612 case MVT::i32: Opcode = PPC::LWZU; break;
5613 case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
5614 case MVT::i1:
5615 case MVT::i8: Opcode = PPC::LBZU; break;
5616 }
5617 } else {
5618 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
5619 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5620 switch (LoadedVT.getSimpleVT().SimpleTy) {
5621 default: llvm_unreachable("Invalid PPC load type!");
5622 case MVT::i64: Opcode = PPC::LDU; break;
5623 case MVT::i32: Opcode = PPC::LWZU8; break;
5624 case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
5625 case MVT::i1:
5626 case MVT::i8: Opcode = PPC::LBZU8; break;
5627 }
5628 }
5629
5630 SDValue Chain = LD->getChain();
5631 SDValue Base = LD->getBasePtr();
5632 SDValue Ops[] = { Offset, Base, Chain };
5633 SDNode *MN = CurDAG->getMachineNode(
5634 Opcode, dl, VT1: LD->getValueType(ResNo: 0),
5635 VT2: PPCLowering->getPointerTy(DL: CurDAG->getDataLayout()), VT3: MVT::Other, Ops);
5636 transferMemOperands(N, Result: MN);
5637 ReplaceNode(F: N, T: MN);
5638 return;
5639 } else {
5640 unsigned Opcode;
5641 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
5642 if (LD->getValueType(ResNo: 0) != MVT::i64) {
5643 // Handle PPC32 integer and normal FP loads.
5644 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5645 switch (LoadedVT.getSimpleVT().SimpleTy) {
5646 default: llvm_unreachable("Invalid PPC load type!");
5647 case MVT::f64: Opcode = PPC::LFDUX; break;
5648 case MVT::f32: Opcode = PPC::LFSUX; break;
5649 case MVT::i32: Opcode = PPC::LWZUX; break;
5650 case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
5651 case MVT::i1:
5652 case MVT::i8: Opcode = PPC::LBZUX; break;
5653 }
5654 } else {
5655 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
5656 assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
5657 "Invalid sext update load");
5658 switch (LoadedVT.getSimpleVT().SimpleTy) {
5659 default: llvm_unreachable("Invalid PPC load type!");
5660 case MVT::i64: Opcode = PPC::LDUX; break;
5661 case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
5662 case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
5663 case MVT::i1:
5664 case MVT::i8: Opcode = PPC::LBZUX8; break;
5665 }
5666 }
5667
5668 SDValue Chain = LD->getChain();
5669 SDValue Base = LD->getBasePtr();
5670 SDValue Ops[] = { Base, Offset, Chain };
5671 SDNode *MN = CurDAG->getMachineNode(
5672 Opcode, dl, VT1: LD->getValueType(ResNo: 0),
5673 VT2: PPCLowering->getPointerTy(DL: CurDAG->getDataLayout()), VT3: MVT::Other, Ops);
5674 transferMemOperands(N, Result: MN);
5675 ReplaceNode(F: N, T: MN);
5676 return;
5677 }
5678 }
5679
5680 case ISD::AND:
5681 // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
5682 if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDCL(N) ||
5683 tryAsSingleRLDICL(N) || tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) ||
5684 tryAsPairOfRLDICL(N))
5685 return;
5686
5687 // Other cases are autogenerated.
5688 break;
5689 case ISD::OR: {
5690 if (N->getValueType(ResNo: 0) == MVT::i32)
5691 if (tryBitfieldInsert(N))
5692 return;
5693
5694 int16_t Imm;
5695 if (N->getOperand(Num: 0)->getOpcode() == ISD::FrameIndex &&
5696 isIntS16Immediate(Op: N->getOperand(Num: 1), Imm)) {
5697 KnownBits LHSKnown = CurDAG->computeKnownBits(Op: N->getOperand(Num: 0));
5698
5699 // If this is equivalent to an add, then we can fold it with the
5700 // FrameIndex calculation.
5701 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
5702 selectFrameIndex(SN: N, N: N->getOperand(Num: 0).getNode(), Offset: (int64_t)Imm);
5703 return;
5704 }
5705 }
5706
5707 // If this is 'or' against an imm with consecutive ones and both sides zero,
5708 // try to emit rldimi
5709 if (tryAsSingleRLDIMI(N))
5710 return;
5711
5712 // OR with a 32-bit immediate can be handled by ori + oris
5713 // without creating an immediate in a GPR.
5714 uint64_t Imm64 = 0;
5715 bool IsPPC64 = Subtarget->isPPC64();
5716 if (IsPPC64 && isInt64Immediate(N: N->getOperand(Num: 1), Imm&: Imm64) &&
5717 (Imm64 & ~0xFFFFFFFFuLL) == 0) {
5718 // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
5719 uint64_t ImmHi = Imm64 >> 16;
5720 uint64_t ImmLo = Imm64 & 0xFFFF;
5721 if (ImmHi != 0 && ImmLo != 0) {
5722 SDNode *Lo = CurDAG->getMachineNode(Opcode: PPC::ORI8, dl, VT: MVT::i64,
5723 Op1: N->getOperand(Num: 0),
5724 Op2: getI16Imm(Imm: ImmLo, dl));
5725 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(Imm: ImmHi, dl)};
5726 CurDAG->SelectNodeTo(N, MachineOpc: PPC::ORIS8, VT: MVT::i64, Ops: Ops1);
5727 return;
5728 }
5729 }
5730
5731 // Other cases are autogenerated.
5732 break;
5733 }
5734 case ISD::XOR: {
5735 // XOR with a 32-bit immediate can be handled by xori + xoris
5736 // without creating an immediate in a GPR.
5737 uint64_t Imm64 = 0;
5738 bool IsPPC64 = Subtarget->isPPC64();
5739 if (IsPPC64 && isInt64Immediate(N: N->getOperand(Num: 1), Imm&: Imm64) &&
5740 (Imm64 & ~0xFFFFFFFFuLL) == 0) {
5741 // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
5742 uint64_t ImmHi = Imm64 >> 16;
5743 uint64_t ImmLo = Imm64 & 0xFFFF;
5744 if (ImmHi != 0 && ImmLo != 0) {
5745 SDNode *Lo = CurDAG->getMachineNode(Opcode: PPC::XORI8, dl, VT: MVT::i64,
5746 Op1: N->getOperand(Num: 0),
5747 Op2: getI16Imm(Imm: ImmLo, dl));
5748 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(Imm: ImmHi, dl)};
5749 CurDAG->SelectNodeTo(N, MachineOpc: PPC::XORIS8, VT: MVT::i64, Ops: Ops1);
5750 return;
5751 }
5752 }
5753
5754 break;
5755 }
5756 case ISD::ADD: {
5757 int16_t Imm;
5758 if (N->getOperand(Num: 0)->getOpcode() == ISD::FrameIndex &&
5759 isIntS16Immediate(Op: N->getOperand(Num: 1), Imm)) {
5760 selectFrameIndex(SN: N, N: N->getOperand(Num: 0).getNode(), Offset: (int64_t)Imm);
5761 return;
5762 }
5763
5764 break;
5765 }
5766 case ISD::SHL: {
5767 unsigned Imm, SH, MB, ME;
5768 if (isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::AND, Imm) &&
5769 isRotateAndMask(N, Mask: Imm, isShiftMask: true, SH, MB, ME)) {
5770 SDValue Ops[] = { N->getOperand(Num: 0).getOperand(i: 0),
5771 getI32Imm(Imm: SH, dl), getI32Imm(Imm: MB, dl),
5772 getI32Imm(Imm: ME, dl) };
5773 CurDAG->SelectNodeTo(N, MachineOpc: PPC::RLWINM, VT: MVT::i32, Ops);
5774 return;
5775 }
5776
5777 // Other cases are autogenerated.
5778 break;
5779 }
5780 case ISD::SRL: {
5781 unsigned Imm, SH, MB, ME;
5782 if (isOpcWithIntImmediate(N: N->getOperand(Num: 0).getNode(), Opc: ISD::AND, Imm) &&
5783 isRotateAndMask(N, Mask: Imm, isShiftMask: true, SH, MB, ME)) {
5784 SDValue Ops[] = { N->getOperand(Num: 0).getOperand(i: 0),
5785 getI32Imm(Imm: SH, dl), getI32Imm(Imm: MB, dl),
5786 getI32Imm(Imm: ME, dl) };
5787 CurDAG->SelectNodeTo(N, MachineOpc: PPC::RLWINM, VT: MVT::i32, Ops);
5788 return;
5789 }
5790
5791 // Other cases are autogenerated.
5792 break;
5793 }
5794 case ISD::MUL: {
5795 SDValue Op1 = N->getOperand(Num: 1);
5796 if (Op1.getOpcode() != ISD::Constant ||
5797 (Op1.getValueType() != MVT::i64 && Op1.getValueType() != MVT::i32))
5798 break;
5799
5800 // If the multiplier fits int16, we can handle it with mulli.
5801 int64_t Imm = Op1->getAsZExtVal();
5802 unsigned Shift = llvm::countr_zero<uint64_t>(Val: Imm);
5803 if (isInt<16>(x: Imm) || !Shift)
5804 break;
5805
5806 // If the shifted value fits int16, we can do this transformation:
5807 // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to
5808 // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2).
5809 uint64_t ImmSh = Imm >> Shift;
5810 if (!isInt<16>(x: ImmSh))
5811 break;
5812
5813 uint64_t SextImm = SignExtend64(X: ImmSh & 0xFFFF, B: 16);
5814 if (Op1.getValueType() == MVT::i64) {
5815 SDValue SDImm = CurDAG->getTargetConstant(Val: SextImm, DL: dl, VT: MVT::i64);
5816 SDNode *MulNode = CurDAG->getMachineNode(Opcode: PPC::MULLI8, dl, VT: MVT::i64,
5817 Op1: N->getOperand(Num: 0), Op2: SDImm);
5818
5819 SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Imm: Shift, dl),
5820 getI32Imm(Imm: 63 - Shift, dl)};
5821 CurDAG->SelectNodeTo(N, MachineOpc: PPC::RLDICR, VT: MVT::i64, Ops);
5822 return;
5823 } else {
5824 SDValue SDImm = CurDAG->getTargetConstant(Val: SextImm, DL: dl, VT: MVT::i32);
5825 SDNode *MulNode = CurDAG->getMachineNode(Opcode: PPC::MULLI, dl, VT: MVT::i32,
5826 Op1: N->getOperand(Num: 0), Op2: SDImm);
5827
5828 SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Imm: Shift, dl),
5829 getI32Imm(Imm: 0, dl), getI32Imm(Imm: 31 - Shift, dl)};
5830 CurDAG->SelectNodeTo(N, MachineOpc: PPC::RLWINM, VT: MVT::i32, Ops);
5831 return;
5832 }
5833 break;
5834 }
5835 // FIXME: Remove this once the ANDI glue bug is fixed:
5836 case PPCISD::ANDI_rec_1_EQ_BIT:
5837 case PPCISD::ANDI_rec_1_GT_BIT: {
5838 if (!ANDIGlueBug)
5839 break;
5840
5841 EVT InVT = N->getOperand(Num: 0).getValueType();
5842 assert((InVT == MVT::i64 || InVT == MVT::i32) &&
5843 "Invalid input type for ANDI_rec_1_EQ_BIT");
5844
5845 unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec;
5846 SDValue AndI(CurDAG->getMachineNode(Opcode, dl, VT1: InVT, VT2: MVT::Glue,
5847 Op1: N->getOperand(Num: 0),
5848 Op2: CurDAG->getTargetConstant(Val: 1, DL: dl, VT: InVT)),
5849 0);
5850 SDValue CR0Reg = CurDAG->getRegister(Reg: PPC::CR0, VT: MVT::i32);
5851 SDValue SRIdxVal = CurDAG->getTargetConstant(
5852 Val: N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt,
5853 DL: dl, VT: MVT::i32);
5854
5855 CurDAG->SelectNodeTo(N, MachineOpc: TargetOpcode::EXTRACT_SUBREG, VT: MVT::i1, Op1: CR0Reg,
5856 Op2: SRIdxVal, Op3: SDValue(AndI.getNode(), 1) /* glue */);
5857 return;
5858 }
5859 case ISD::SELECT_CC: {
5860 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 4))->get();
5861 EVT PtrVT =
5862 CurDAG->getTargetLoweringInfo().getPointerTy(DL: CurDAG->getDataLayout());
5863 bool isPPC64 = (PtrVT == MVT::i64);
5864
5865 // If this is a select of i1 operands, we'll pattern match it.
5866 if (Subtarget->useCRBits() && N->getOperand(Num: 0).getValueType() == MVT::i1)
5867 break;
5868
5869 if (Subtarget->isISA3_0() && Subtarget->isPPC64()) {
5870 bool NeedSwapOps = false;
5871 bool IsUnCmp = false;
5872 if (mayUseP9Setb(N, CC, DAG: CurDAG, NeedSwapOps, IsUnCmp)) {
5873 SDValue LHS = N->getOperand(Num: 0);
5874 SDValue RHS = N->getOperand(Num: 1);
5875 if (NeedSwapOps)
5876 std::swap(a&: LHS, b&: RHS);
5877
5878 // Make use of SelectCC to generate the comparison to set CR bits, for
5879 // equality comparisons having one literal operand, SelectCC probably
5880 // doesn't need to materialize the whole literal and just use xoris to
5881 // check it first, it leads the following comparison result can't
5882 // exactly represent GT/LT relationship. So to avoid this we specify
5883 // SETGT/SETUGT here instead of SETEQ.
5884 SDValue GenCC =
5885 SelectCC(LHS, RHS, CC: IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
5886 CurDAG->SelectNodeTo(
5887 N, MachineOpc: N->getSimpleValueType(ResNo: 0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
5888 VT: N->getValueType(ResNo: 0), Op1: GenCC);
5889 NumP9Setb++;
5890 return;
5891 }
5892 }
5893
5894 // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
5895 if (!isPPC64 && isNullConstant(V: N->getOperand(Num: 1)) &&
5896 isOneConstant(V: N->getOperand(Num: 2)) && isNullConstant(V: N->getOperand(Num: 3)) &&
5897 CC == ISD::SETNE &&
5898 // FIXME: Implement this optzn for PPC64.
5899 N->getValueType(ResNo: 0) == MVT::i32) {
5900 SDNode *Tmp =
5901 CurDAG->getMachineNode(Opcode: PPC::ADDIC, dl, VT1: MVT::i32, VT2: MVT::Glue,
5902 Op1: N->getOperand(Num: 0), Op2: getI32Imm(Imm: ~0U, dl));
5903 CurDAG->SelectNodeTo(N, MachineOpc: PPC::SUBFE, VT: MVT::i32, Op1: SDValue(Tmp, 0),
5904 Op2: N->getOperand(Num: 0), Op3: SDValue(Tmp, 1));
5905 return;
5906 }
5907
5908 SDValue CCReg = SelectCC(LHS: N->getOperand(Num: 0), RHS: N->getOperand(Num: 1), CC, dl);
5909
5910 if (N->getValueType(ResNo: 0) == MVT::i1) {
5911 // An i1 select is: (c & t) | (!c & f).
5912 bool Inv;
5913 unsigned Idx = getCRIdxForSetCC(CC, Invert&: Inv);
5914
5915 unsigned SRI;
5916 switch (Idx) {
5917 default: llvm_unreachable("Invalid CC index");
5918 case 0: SRI = PPC::sub_lt; break;
5919 case 1: SRI = PPC::sub_gt; break;
5920 case 2: SRI = PPC::sub_eq; break;
5921 case 3: SRI = PPC::sub_un; break;
5922 }
5923
5924 SDValue CCBit = CurDAG->getTargetExtractSubreg(SRIdx: SRI, DL: dl, VT: MVT::i1, Operand: CCReg);
5925
5926 SDValue NotCCBit(CurDAG->getMachineNode(Opcode: PPC::CRNOR, dl, VT: MVT::i1,
5927 Op1: CCBit, Op2: CCBit), 0);
5928 SDValue C = Inv ? NotCCBit : CCBit,
5929 NotC = Inv ? CCBit : NotCCBit;
5930
5931 SDValue CAndT(CurDAG->getMachineNode(Opcode: PPC::CRAND, dl, VT: MVT::i1,
5932 Op1: C, Op2: N->getOperand(Num: 2)), 0);
5933 SDValue NotCAndF(CurDAG->getMachineNode(Opcode: PPC::CRAND, dl, VT: MVT::i1,
5934 Op1: NotC, Op2: N->getOperand(Num: 3)), 0);
5935
5936 CurDAG->SelectNodeTo(N, MachineOpc: PPC::CROR, VT: MVT::i1, Op1: CAndT, Op2: NotCAndF);
5937 return;
5938 }
5939
5940 unsigned BROpc =
5941 getPredicateForSetCC(CC, VT: N->getOperand(Num: 0).getValueType(), Subtarget);
5942
5943 unsigned SelectCCOp;
5944 if (N->getValueType(ResNo: 0) == MVT::i32)
5945 SelectCCOp = PPC::SELECT_CC_I4;
5946 else if (N->getValueType(ResNo: 0) == MVT::i64)
5947 SelectCCOp = PPC::SELECT_CC_I8;
5948 else if (N->getValueType(ResNo: 0) == MVT::f32) {
5949 if (Subtarget->hasP8Vector())
5950 SelectCCOp = PPC::SELECT_CC_VSSRC;
5951 else if (Subtarget->hasSPE())
5952 SelectCCOp = PPC::SELECT_CC_SPE4;
5953 else
5954 SelectCCOp = PPC::SELECT_CC_F4;
5955 } else if (N->getValueType(ResNo: 0) == MVT::f64) {
5956 if (Subtarget->hasVSX())
5957 SelectCCOp = PPC::SELECT_CC_VSFRC;
5958 else if (Subtarget->hasSPE())
5959 SelectCCOp = PPC::SELECT_CC_SPE;
5960 else
5961 SelectCCOp = PPC::SELECT_CC_F8;
5962 } else if (N->getValueType(ResNo: 0) == MVT::f128)
5963 SelectCCOp = PPC::SELECT_CC_F16;
5964 else if (Subtarget->hasSPE())
5965 SelectCCOp = PPC::SELECT_CC_SPE;
5966 else if (N->getValueType(ResNo: 0) == MVT::v2f64 ||
5967 N->getValueType(ResNo: 0) == MVT::v2i64)
5968 SelectCCOp = PPC::SELECT_CC_VSRC;
5969 else
5970 SelectCCOp = PPC::SELECT_CC_VRRC;
5971
5972 SDValue Ops[] = { CCReg, N->getOperand(Num: 2), N->getOperand(Num: 3),
5973 getI32Imm(Imm: BROpc, dl) };
5974 CurDAG->SelectNodeTo(N, MachineOpc: SelectCCOp, VT: N->getValueType(ResNo: 0), Ops);
5975 return;
5976 }
5977 case ISD::VECTOR_SHUFFLE:
5978 if (Subtarget->hasVSX() && (N->getValueType(ResNo: 0) == MVT::v2f64 ||
5979 N->getValueType(ResNo: 0) == MVT::v2i64)) {
5980 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: N);
5981
5982 SDValue Op1 = N->getOperand(Num: SVN->getMaskElt(Idx: 0) < 2 ? 0 : 1),
5983 Op2 = N->getOperand(Num: SVN->getMaskElt(Idx: 1) < 2 ? 0 : 1);
5984 unsigned DM[2];
5985
5986 for (int i = 0; i < 2; ++i)
5987 if (SVN->getMaskElt(Idx: i) <= 0 || SVN->getMaskElt(Idx: i) == 2)
5988 DM[i] = 0;
5989 else
5990 DM[i] = 1;
5991
5992 if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
5993 Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
5994 isa<LoadSDNode>(Val: Op1.getOperand(i: 0))) {
5995 LoadSDNode *LD = cast<LoadSDNode>(Val: Op1.getOperand(i: 0));
5996 SDValue Base, Offset;
5997
5998 if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
5999 (LD->getMemoryVT() == MVT::f64 ||
6000 LD->getMemoryVT() == MVT::i64) &&
6001 SelectAddrIdxOnly(N: LD->getBasePtr(), Base, Index&: Offset)) {
6002 SDValue Chain = LD->getChain();
6003 SDValue Ops[] = { Base, Offset, Chain };
6004 MachineMemOperand *MemOp = LD->getMemOperand();
6005 SDNode *NewN = CurDAG->SelectNodeTo(N, MachineOpc: PPC::LXVDSX,
6006 VT: N->getValueType(ResNo: 0), Ops);
6007 CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: NewN), NewMemRefs: {MemOp});
6008 return;
6009 }
6010 }
6011
6012 // For little endian, we must swap the input operands and adjust
6013 // the mask elements (reverse and invert them).
6014 if (Subtarget->isLittleEndian()) {
6015 std::swap(a&: Op1, b&: Op2);
6016 unsigned tmp = DM[0];
6017 DM[0] = 1 - DM[1];
6018 DM[1] = 1 - tmp;
6019 }
6020
6021 SDValue DMV = CurDAG->getTargetConstant(Val: DM[1] | (DM[0] << 1), DL: dl,
6022 VT: MVT::i32);
6023 SDValue Ops[] = { Op1, Op2, DMV };
6024 CurDAG->SelectNodeTo(N, MachineOpc: PPC::XXPERMDI, VT: N->getValueType(ResNo: 0), Ops);
6025 return;
6026 }
6027
6028 break;
6029 case PPCISD::BDNZ:
6030 case PPCISD::BDZ: {
6031 bool IsPPC64 = Subtarget->isPPC64();
6032 SDValue Ops[] = { N->getOperand(Num: 1), N->getOperand(Num: 0) };
6033 CurDAG->SelectNodeTo(N, MachineOpc: N->getOpcode() == PPCISD::BDNZ
6034 ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
6035 : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
6036 VT: MVT::Other, Ops);
6037 return;
6038 }
6039 case PPCISD::COND_BRANCH: {
6040 // Op #0 is the Chain.
6041 // Op #1 is the PPC::PRED_* number.
6042 // Op #2 is the CR#
6043 // Op #3 is the Dest MBB
6044 // Op #4 is the Flag.
6045 // Prevent PPC::PRED_* from being selected into LI.
6046 unsigned PCC = N->getConstantOperandVal(Num: 1);
6047 if (EnableBranchHint)
6048 PCC |= getBranchHint(PCC, FuncInfo: *FuncInfo, DestMBB: N->getOperand(Num: 3));
6049
6050 SDValue Pred = getI32Imm(Imm: PCC, dl);
6051 SDValue Ops[] = { Pred, N->getOperand(Num: 2), N->getOperand(Num: 3),
6052 N->getOperand(Num: 0), N->getOperand(Num: 4) };
6053 CurDAG->SelectNodeTo(N, MachineOpc: PPC::BCC, VT: MVT::Other, Ops);
6054 return;
6055 }
6056 case ISD::BR_CC: {
6057 if (tryFoldSWTestBRCC(N))
6058 return;
6059 if (trySelectLoopCountIntrinsic(N))
6060 return;
6061 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 1))->get();
6062 unsigned PCC =
6063 getPredicateForSetCC(CC, VT: N->getOperand(Num: 2).getValueType(), Subtarget);
6064
6065 if (N->getOperand(Num: 2).getValueType() == MVT::i1) {
6066 unsigned Opc;
6067 bool Swap;
6068 switch (PCC) {
6069 default: llvm_unreachable("Unexpected Boolean-operand predicate");
6070 case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
6071 case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
6072 case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
6073 case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
6074 case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
6075 case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
6076 }
6077
6078 // A signed comparison of i1 values produces the opposite result to an
6079 // unsigned one if the condition code includes less-than or greater-than.
6080 // This is because 1 is the most negative signed i1 number and the most
6081 // positive unsigned i1 number. The CR-logical operations used for such
6082 // comparisons are non-commutative so for signed comparisons vs. unsigned
6083 // ones, the input operands just need to be swapped.
6084 if (ISD::isSignedIntSetCC(Code: CC))
6085 Swap = !Swap;
6086
6087 SDValue BitComp(CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i1,
6088 Op1: N->getOperand(Num: Swap ? 3 : 2),
6089 Op2: N->getOperand(Num: Swap ? 2 : 3)), 0);
6090 CurDAG->SelectNodeTo(N, MachineOpc: PPC::BC, VT: MVT::Other, Op1: BitComp, Op2: N->getOperand(Num: 4),
6091 Op3: N->getOperand(Num: 0));
6092 return;
6093 }
6094
6095 if (EnableBranchHint)
6096 PCC |= getBranchHint(PCC, FuncInfo: *FuncInfo, DestMBB: N->getOperand(Num: 4));
6097
6098 SDValue CondCode = SelectCC(LHS: N->getOperand(Num: 2), RHS: N->getOperand(Num: 3), CC, dl);
6099 SDValue Ops[] = { getI32Imm(Imm: PCC, dl), CondCode,
6100 N->getOperand(Num: 4), N->getOperand(Num: 0) };
6101 CurDAG->SelectNodeTo(N, MachineOpc: PPC::BCC, VT: MVT::Other, Ops);
6102 return;
6103 }
6104 case ISD::BRIND: {
6105 // FIXME: Should custom lower this.
6106 SDValue Chain = N->getOperand(Num: 0);
6107 SDValue Target = N->getOperand(Num: 1);
6108 unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
6109 unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
6110 Chain = SDValue(CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::Glue, Op1: Target,
6111 Op2: Chain), 0);
6112 CurDAG->SelectNodeTo(N, MachineOpc: Reg, VT: MVT::Other, Op1: Chain);
6113 return;
6114 }
6115 case PPCISD::TOC_ENTRY: {
6116 const bool isPPC64 = Subtarget->isPPC64();
6117 const bool isELFABI = Subtarget->isSVR4ABI();
6118 const bool isAIXABI = Subtarget->isAIXABI();
6119
6120 // PowerPC only support small, medium and large code model.
6121 const CodeModel::Model CModel = getCodeModel(Subtarget: *Subtarget, TM, Node: N);
6122
6123 assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&
6124 "PowerPC doesn't support tiny or kernel code models.");
6125
6126 if (isAIXABI && CModel == CodeModel::Medium)
6127 report_fatal_error(reason: "Medium code model is not supported on AIX.");
6128
6129 // For 64-bit ELF small code model, we allow SelectCodeCommon to handle
6130 // this, selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. For AIX
6131 // small code model, we need to check for a toc-data attribute.
6132 if (isPPC64 && !isAIXABI && CModel == CodeModel::Small)
6133 break;
6134
6135 auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry,
6136 EVT OperandTy) {
6137 SDValue GA = TocEntry->getOperand(Num: 0);
6138 SDValue TocBase = TocEntry->getOperand(Num: 1);
6139 SDNode *MN = nullptr;
6140 if (OpCode == PPC::ADDItoc || OpCode == PPC::ADDItoc8)
6141 // toc-data access doesn't involve in loading from got, no need to
6142 // keep memory operands.
6143 MN = CurDAG->getMachineNode(Opcode: OpCode, dl, VT: OperandTy, Op1: TocBase, Op2: GA);
6144 else {
6145 MN = CurDAG->getMachineNode(Opcode: OpCode, dl, VT: OperandTy, Op1: GA, Op2: TocBase);
6146 transferMemOperands(N: TocEntry, Result: MN);
6147 }
6148 ReplaceNode(F: TocEntry, T: MN);
6149 };
6150
6151 // Handle 32-bit small code model.
6152 if (!isPPC64 && CModel == CodeModel::Small) {
6153 // Transforms the ISD::TOC_ENTRY node to passed in Opcode, either
6154 // PPC::ADDItoc, or PPC::LWZtoc
6155 if (isELFABI) {
6156 assert(TM.isPositionIndependent() &&
6157 "32-bit ELF can only have TOC entries in position independent"
6158 " code.");
6159 // 32-bit ELF always uses a small code model toc access.
6160 replaceWith(PPC::LWZtoc, N, MVT::i32);
6161 return;
6162 }
6163
6164 assert(isAIXABI && "ELF ABI already handled");
6165
6166 if (hasTocDataAttr(Val: N->getOperand(Num: 0))) {
6167 replaceWith(PPC::ADDItoc, N, MVT::i32);
6168 return;
6169 }
6170
6171 replaceWith(PPC::LWZtoc, N, MVT::i32);
6172 return;
6173 }
6174
6175 if (isPPC64 && CModel == CodeModel::Small) {
6176 assert(isAIXABI && "ELF ABI handled in common SelectCode");
6177
6178 if (hasTocDataAttr(Val: N->getOperand(Num: 0))) {
6179 replaceWith(PPC::ADDItoc8, N, MVT::i64);
6180 return;
6181 }
6182 // Break if it doesn't have toc data attribute. Proceed with common
6183 // SelectCode.
6184 break;
6185 }
6186
6187 assert(CModel != CodeModel::Small && "All small code models handled.");
6188
6189 assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit"
6190 " ELF/AIX or 32-bit AIX in the following.");
6191
6192 // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode,
6193 // 64-bit medium (ELF-only), or 64-bit large (ELF and AIX) code model code
6194 // that does not contain TOC data symbols. We generate two instructions as
6195 // described below. The first source operand is a symbol reference. If it
6196 // must be referenced via the TOC according to Subtarget, we generate:
6197 // [32-bit AIX]
6198 // LWZtocL(@sym, ADDIStocHA(%r2, @sym))
6199 // [64-bit ELF/AIX]
6200 // LDtocL(@sym, ADDIStocHA8(%x2, @sym))
6201 // Otherwise for medium code model ELF we generate:
6202 // ADDItocL8(ADDIStocHA8(%x2, @sym), @sym)
6203
6204 // And finally for AIX with toc-data we generate:
6205 // [32-bit AIX]
6206 // ADDItocL(ADDIStocHA(%x2, @sym), @sym)
6207 // [64-bit AIX]
6208 // ADDItocL8(ADDIStocHA8(%x2, @sym), @sym)
6209
6210 SDValue GA = N->getOperand(Num: 0);
6211 SDValue TOCbase = N->getOperand(Num: 1);
6212
6213 EVT VT = Subtarget->getScalarIntVT();
6214 SDNode *Tmp = CurDAG->getMachineNode(
6215 Opcode: isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, Op1: TOCbase, Op2: GA);
6216
6217 // On AIX, if the symbol has the toc-data attribute it will be defined
6218 // in the TOC entry, so we use an ADDItocL/ADDItocL8.
6219 if (isAIXABI && hasTocDataAttr(Val: GA)) {
6220 ReplaceNode(
6221 F: N, T: CurDAG->getMachineNode(Opcode: isPPC64 ? PPC::ADDItocL8 : PPC::ADDItocL,
6222 dl, VT, Op1: SDValue(Tmp, 0), Op2: GA));
6223 return;
6224 }
6225
6226 if (PPCLowering->isAccessedAsGotIndirect(N: GA)) {
6227 // If it is accessed as got-indirect, we need an extra LWZ/LD to load
6228 // the address.
6229 SDNode *MN = CurDAG->getMachineNode(
6230 Opcode: isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, Op1: GA, Op2: SDValue(Tmp, 0));
6231
6232 transferMemOperands(N, Result: MN);
6233 ReplaceNode(F: N, T: MN);
6234 return;
6235 }
6236
6237 assert(isPPC64 && "TOC_ENTRY already handled for 32-bit.");
6238 // Build the address relative to the TOC-pointer.
6239 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: PPC::ADDItocL8, dl, VT: MVT::i64,
6240 Op1: SDValue(Tmp, 0), Op2: GA));
6241 return;
6242 }
6243 case PPCISD::PPC32_PICGOT:
6244 // Generate a PIC-safe GOT reference.
6245 assert(Subtarget->is32BitELFABI() &&
6246 "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
6247 CurDAG->SelectNodeTo(N, MachineOpc: PPC::PPC32PICGOT,
6248 VT1: PPCLowering->getPointerTy(DL: CurDAG->getDataLayout()),
6249 VT2: MVT::i32);
6250 return;
6251
6252 case PPCISD::VADD_SPLAT: {
6253 // This expands into one of three sequences, depending on whether
6254 // the first operand is odd or even, positive or negative.
6255 assert(isa<ConstantSDNode>(N->getOperand(0)) &&
6256 isa<ConstantSDNode>(N->getOperand(1)) &&
6257 "Invalid operand on VADD_SPLAT!");
6258
6259 int Elt = N->getConstantOperandVal(Num: 0);
6260 int EltSize = N->getConstantOperandVal(Num: 1);
6261 unsigned Opc1, Opc2, Opc3;
6262 EVT VT;
6263
6264 if (EltSize == 1) {
6265 Opc1 = PPC::VSPLTISB;
6266 Opc2 = PPC::VADDUBM;
6267 Opc3 = PPC::VSUBUBM;
6268 VT = MVT::v16i8;
6269 } else if (EltSize == 2) {
6270 Opc1 = PPC::VSPLTISH;
6271 Opc2 = PPC::VADDUHM;
6272 Opc3 = PPC::VSUBUHM;
6273 VT = MVT::v8i16;
6274 } else {
6275 assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
6276 Opc1 = PPC::VSPLTISW;
6277 Opc2 = PPC::VADDUWM;
6278 Opc3 = PPC::VSUBUWM;
6279 VT = MVT::v4i32;
6280 }
6281
6282 if ((Elt & 1) == 0) {
6283 // Elt is even, in the range [-32,-18] + [16,30].
6284 //
6285 // Convert: VADD_SPLAT elt, size
6286 // Into: tmp = VSPLTIS[BHW] elt
6287 // VADDU[BHW]M tmp, tmp
6288 // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
6289 SDValue EltVal = getI32Imm(Imm: Elt >> 1, dl);
6290 SDNode *Tmp = CurDAG->getMachineNode(Opcode: Opc1, dl, VT, Op1: EltVal);
6291 SDValue TmpVal = SDValue(Tmp, 0);
6292 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc2, dl, VT, Op1: TmpVal, Op2: TmpVal));
6293 return;
6294 } else if (Elt > 0) {
6295 // Elt is odd and positive, in the range [17,31].
6296 //
6297 // Convert: VADD_SPLAT elt, size
6298 // Into: tmp1 = VSPLTIS[BHW] elt-16
6299 // tmp2 = VSPLTIS[BHW] -16
6300 // VSUBU[BHW]M tmp1, tmp2
6301 SDValue EltVal = getI32Imm(Imm: Elt - 16, dl);
6302 SDNode *Tmp1 = CurDAG->getMachineNode(Opcode: Opc1, dl, VT, Op1: EltVal);
6303 EltVal = getI32Imm(Imm: -16, dl);
6304 SDNode *Tmp2 = CurDAG->getMachineNode(Opcode: Opc1, dl, VT, Op1: EltVal);
6305 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc3, dl, VT, Op1: SDValue(Tmp1, 0),
6306 Op2: SDValue(Tmp2, 0)));
6307 return;
6308 } else {
6309 // Elt is odd and negative, in the range [-31,-17].
6310 //
6311 // Convert: VADD_SPLAT elt, size
6312 // Into: tmp1 = VSPLTIS[BHW] elt+16
6313 // tmp2 = VSPLTIS[BHW] -16
6314 // VADDU[BHW]M tmp1, tmp2
6315 SDValue EltVal = getI32Imm(Imm: Elt + 16, dl);
6316 SDNode *Tmp1 = CurDAG->getMachineNode(Opcode: Opc1, dl, VT, Op1: EltVal);
6317 EltVal = getI32Imm(Imm: -16, dl);
6318 SDNode *Tmp2 = CurDAG->getMachineNode(Opcode: Opc1, dl, VT, Op1: EltVal);
6319 ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc2, dl, VT, Op1: SDValue(Tmp1, 0),
6320 Op2: SDValue(Tmp2, 0)));
6321 return;
6322 }
6323 }
6324 case PPCISD::LD_SPLAT: {
6325 // Here we want to handle splat load for type v16i8 and v8i16 when there is
6326 // no direct move, we don't need to use stack for this case. If target has
6327 // direct move, we should be able to get the best selection in the .td file.
6328 if (!Subtarget->hasAltivec() || Subtarget->hasDirectMove())
6329 break;
6330
6331 EVT Type = N->getValueType(ResNo: 0);
6332 if (Type != MVT::v16i8 && Type != MVT::v8i16)
6333 break;
6334
6335 // If the alignment for the load is 16 or bigger, we don't need the
6336 // permutated mask to get the required value. The value must be the 0
6337 // element in big endian target or 7/15 in little endian target in the
6338 // result vsx register of lvx instruction.
6339 // Select the instruction in the .td file.
6340 if (cast<MemIntrinsicSDNode>(Val: N)->getAlign() >= Align(16) &&
6341 isOffsetMultipleOf(N, Val: 16))
6342 break;
6343
6344 SDValue ZeroReg =
6345 CurDAG->getRegister(Reg: Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,
6346 VT: Subtarget->getScalarIntVT());
6347 unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI;
6348 // v16i8 LD_SPLAT addr
6349 // ======>
6350 // Mask = LVSR/LVSL 0, addr
6351 // LoadLow = LVX 0, addr
6352 // Perm = VPERM LoadLow, LoadLow, Mask
6353 // Splat = VSPLTB 15/0, Perm
6354 //
6355 // v8i16 LD_SPLAT addr
6356 // ======>
6357 // Mask = LVSR/LVSL 0, addr
6358 // LoadLow = LVX 0, addr
6359 // LoadHigh = LVX (LI, 1), addr
6360 // Perm = VPERM LoadLow, LoadHigh, Mask
6361 // Splat = VSPLTH 7/0, Perm
6362 unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH;
6363 unsigned SplatElemIndex =
6364 Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0;
6365
6366 SDNode *Mask = CurDAG->getMachineNode(
6367 Opcode: Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, VT: Type, Op1: ZeroReg,
6368 Op2: N->getOperand(Num: 1));
6369
6370 SDNode *LoadLow =
6371 CurDAG->getMachineNode(Opcode: PPC::LVX, dl, VT1: MVT::v16i8, VT2: MVT::Other,
6372 Ops: {ZeroReg, N->getOperand(Num: 1), N->getOperand(Num: 0)});
6373
6374 SDNode *LoadHigh = LoadLow;
6375 if (Type == MVT::v8i16) {
6376 LoadHigh = CurDAG->getMachineNode(
6377 Opcode: PPC::LVX, dl, VT1: MVT::v16i8, VT2: MVT::Other,
6378 Ops: {SDValue(CurDAG->getMachineNode(
6379 Opcode: LIOpcode, dl, VT: MVT::i32,
6380 Op1: CurDAG->getTargetConstant(Val: 1, DL: dl, VT: MVT::i8)),
6381 0),
6382 N->getOperand(Num: 1), SDValue(LoadLow, 1)});
6383 }
6384
6385 CurDAG->ReplaceAllUsesOfValueWith(From: SDValue(N, 1), To: SDValue(LoadHigh, 1));
6386 transferMemOperands(N, Result: LoadHigh);
6387
6388 SDNode *Perm =
6389 CurDAG->getMachineNode(Opcode: PPC::VPERM, dl, VT: Type, Op1: SDValue(LoadLow, 0),
6390 Op2: SDValue(LoadHigh, 0), Op3: SDValue(Mask, 0));
6391 CurDAG->SelectNodeTo(N, MachineOpc: SplatOp, VT: Type,
6392 Op1: CurDAG->getTargetConstant(Val: SplatElemIndex, DL: dl, VT: MVT::i8),
6393 Op2: SDValue(Perm, 0));
6394 return;
6395 }
6396 }
6397
6398 SelectCode(N);
6399}
6400
6401// If the target supports the cmpb instruction, do the idiom recognition here.
6402// We don't do this as a DAG combine because we don't want to do it as nodes
6403// are being combined (because we might miss part of the eventual idiom). We
6404// don't want to do it during instruction selection because we want to reuse
6405// the logic for lowering the masking operations already part of the
6406// instruction selector.
6407SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
6408 SDLoc dl(N);
6409
6410 assert(N->getOpcode() == ISD::OR &&
6411 "Only OR nodes are supported for CMPB");
6412
6413 SDValue Res;
6414 if (!Subtarget->hasCMPB())
6415 return Res;
6416
6417 if (N->getValueType(ResNo: 0) != MVT::i32 &&
6418 N->getValueType(ResNo: 0) != MVT::i64)
6419 return Res;
6420
6421 EVT VT = N->getValueType(ResNo: 0);
6422
6423 SDValue RHS, LHS;
6424 bool BytesFound[8] = {false, false, false, false, false, false, false, false};
6425 uint64_t Mask = 0, Alt = 0;
6426
6427 auto IsByteSelectCC = [this](SDValue O, unsigned &b,
6428 uint64_t &Mask, uint64_t &Alt,
6429 SDValue &LHS, SDValue &RHS) {
6430 if (O.getOpcode() != ISD::SELECT_CC)
6431 return false;
6432 ISD::CondCode CC = cast<CondCodeSDNode>(Val: O.getOperand(i: 4))->get();
6433
6434 if (!isa<ConstantSDNode>(Val: O.getOperand(i: 2)) ||
6435 !isa<ConstantSDNode>(Val: O.getOperand(i: 3)))
6436 return false;
6437
6438 uint64_t PM = O.getConstantOperandVal(i: 2);
6439 uint64_t PAlt = O.getConstantOperandVal(i: 3);
6440 for (b = 0; b < 8; ++b) {
6441 uint64_t Mask = UINT64_C(0xFF) << (8*b);
6442 if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
6443 break;
6444 }
6445
6446 if (b == 8)
6447 return false;
6448 Mask |= PM;
6449 Alt |= PAlt;
6450
6451 if (!isa<ConstantSDNode>(Val: O.getOperand(i: 1)) ||
6452 O.getConstantOperandVal(i: 1) != 0) {
6453 SDValue Op0 = O.getOperand(i: 0), Op1 = O.getOperand(i: 1);
6454 if (Op0.getOpcode() == ISD::TRUNCATE)
6455 Op0 = Op0.getOperand(i: 0);
6456 if (Op1.getOpcode() == ISD::TRUNCATE)
6457 Op1 = Op1.getOperand(i: 0);
6458
6459 if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
6460 Op0.getOperand(i: 1) == Op1.getOperand(i: 1) && CC == ISD::SETEQ &&
6461 isa<ConstantSDNode>(Val: Op0.getOperand(i: 1))) {
6462
6463 unsigned Bits = Op0.getValueSizeInBits();
6464 if (b != Bits/8-1)
6465 return false;
6466 if (Op0.getConstantOperandVal(i: 1) != Bits-8)
6467 return false;
6468
6469 LHS = Op0.getOperand(i: 0);
6470 RHS = Op1.getOperand(i: 0);
6471 return true;
6472 }
6473
6474 // When we have small integers (i16 to be specific), the form present
6475 // post-legalization uses SETULT in the SELECT_CC for the
6476 // higher-order byte, depending on the fact that the
6477 // even-higher-order bytes are known to all be zero, for example:
6478 // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
6479 // (so when the second byte is the same, because all higher-order
6480 // bits from bytes 3 and 4 are known to be zero, the result of the
6481 // xor can be at most 255)
6482 if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
6483 isa<ConstantSDNode>(Val: O.getOperand(i: 1))) {
6484
6485 uint64_t ULim = O.getConstantOperandVal(i: 1);
6486 if (ULim != (UINT64_C(1) << b*8))
6487 return false;
6488
6489 // Now we need to make sure that the upper bytes are known to be
6490 // zero.
6491 unsigned Bits = Op0.getValueSizeInBits();
6492 if (!CurDAG->MaskedValueIsZero(
6493 Op: Op0, Mask: APInt::getHighBitsSet(numBits: Bits, hiBitsSet: Bits - (b + 1) * 8)))
6494 return false;
6495
6496 LHS = Op0.getOperand(i: 0);
6497 RHS = Op0.getOperand(i: 1);
6498 return true;
6499 }
6500
6501 return false;
6502 }
6503
6504 if (CC != ISD::SETEQ)
6505 return false;
6506
6507 SDValue Op = O.getOperand(i: 0);
6508 if (Op.getOpcode() == ISD::AND) {
6509 if (!isa<ConstantSDNode>(Val: Op.getOperand(i: 1)))
6510 return false;
6511 if (Op.getConstantOperandVal(i: 1) != (UINT64_C(0xFF) << (8*b)))
6512 return false;
6513
6514 SDValue XOR = Op.getOperand(i: 0);
6515 if (XOR.getOpcode() == ISD::TRUNCATE)
6516 XOR = XOR.getOperand(i: 0);
6517 if (XOR.getOpcode() != ISD::XOR)
6518 return false;
6519
6520 LHS = XOR.getOperand(i: 0);
6521 RHS = XOR.getOperand(i: 1);
6522 return true;
6523 } else if (Op.getOpcode() == ISD::SRL) {
6524 if (!isa<ConstantSDNode>(Val: Op.getOperand(i: 1)))
6525 return false;
6526 unsigned Bits = Op.getValueSizeInBits();
6527 if (b != Bits/8-1)
6528 return false;
6529 if (Op.getConstantOperandVal(i: 1) != Bits-8)
6530 return false;
6531
6532 SDValue XOR = Op.getOperand(i: 0);
6533 if (XOR.getOpcode() == ISD::TRUNCATE)
6534 XOR = XOR.getOperand(i: 0);
6535 if (XOR.getOpcode() != ISD::XOR)
6536 return false;
6537
6538 LHS = XOR.getOperand(i: 0);
6539 RHS = XOR.getOperand(i: 1);
6540 return true;
6541 }
6542
6543 return false;
6544 };
6545
6546 SmallVector<SDValue, 8> Queue(1, SDValue(N, 0));
6547 while (!Queue.empty()) {
6548 SDValue V = Queue.pop_back_val();
6549
6550 for (const SDValue &O : V.getNode()->ops()) {
6551 unsigned b = 0;
6552 uint64_t M = 0, A = 0;
6553 SDValue OLHS, ORHS;
6554 if (O.getOpcode() == ISD::OR) {
6555 Queue.push_back(Elt: O);
6556 } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
6557 if (!LHS) {
6558 LHS = OLHS;
6559 RHS = ORHS;
6560 BytesFound[b] = true;
6561 Mask |= M;
6562 Alt |= A;
6563 } else if ((LHS == ORHS && RHS == OLHS) ||
6564 (RHS == ORHS && LHS == OLHS)) {
6565 BytesFound[b] = true;
6566 Mask |= M;
6567 Alt |= A;
6568 } else {
6569 return Res;
6570 }
6571 } else {
6572 return Res;
6573 }
6574 }
6575 }
6576
6577 unsigned LastB = 0, BCnt = 0;
6578 for (unsigned i = 0; i < 8; ++i)
6579 if (BytesFound[LastB]) {
6580 ++BCnt;
6581 LastB = i;
6582 }
6583
6584 if (!LastB || BCnt < 2)
6585 return Res;
6586
6587 // Because we'll be zero-extending the output anyway if don't have a specific
6588 // value for each input byte (via the Mask), we can 'anyext' the inputs.
6589 if (LHS.getValueType() != VT) {
6590 LHS = CurDAG->getAnyExtOrTrunc(Op: LHS, DL: dl, VT);
6591 RHS = CurDAG->getAnyExtOrTrunc(Op: RHS, DL: dl, VT);
6592 }
6593
6594 Res = CurDAG->getNode(Opcode: PPCISD::CMPB, DL: dl, VT, N1: LHS, N2: RHS);
6595
6596 bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
6597 if (NonTrivialMask && !Alt) {
6598 // Res = Mask & CMPB
6599 Res = CurDAG->getNode(Opcode: ISD::AND, DL: dl, VT, N1: Res,
6600 N2: CurDAG->getConstant(Val: Mask, DL: dl, VT));
6601 } else if (Alt) {
6602 // Res = (CMPB & Mask) | (~CMPB & Alt)
6603 // Which, as suggested here:
6604 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
6605 // can be written as:
6606 // Res = Alt ^ ((Alt ^ Mask) & CMPB)
6607 // useful because the (Alt ^ Mask) can be pre-computed.
6608 Res = CurDAG->getNode(Opcode: ISD::AND, DL: dl, VT, N1: Res,
6609 N2: CurDAG->getConstant(Val: Mask ^ Alt, DL: dl, VT));
6610 Res = CurDAG->getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Res,
6611 N2: CurDAG->getConstant(Val: Alt, DL: dl, VT));
6612 }
6613
6614 return Res;
6615}
6616
6617// When CR bit registers are enabled, an extension of an i1 variable to a i32
6618// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
6619// involves constant materialization of a 0 or a 1 or both. If the result of
6620// the extension is then operated upon by some operator that can be constant
6621// folded with a constant 0 or 1, and that constant can be materialized using
6622// only one instruction (like a zero or one), then we should fold in those
6623// operations with the select.
6624void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
6625 if (!Subtarget->useCRBits())
6626 return;
6627
6628 if (N->getOpcode() != ISD::ZERO_EXTEND &&
6629 N->getOpcode() != ISD::SIGN_EXTEND &&
6630 N->getOpcode() != ISD::ANY_EXTEND)
6631 return;
6632
6633 if (N->getOperand(Num: 0).getValueType() != MVT::i1)
6634 return;
6635
6636 if (!N->hasOneUse())
6637 return;
6638
6639 SDLoc dl(N);
6640 EVT VT = N->getValueType(ResNo: 0);
6641 SDValue Cond = N->getOperand(Num: 0);
6642 SDValue ConstTrue = CurDAG->getSignedConstant(
6643 Val: N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, DL: dl, VT);
6644 SDValue ConstFalse = CurDAG->getConstant(Val: 0, DL: dl, VT);
6645
6646 do {
6647 SDNode *User = *N->user_begin();
6648 if (User->getNumOperands() != 2)
6649 break;
6650
6651 auto TryFold = [this, N, User, dl](SDValue Val) {
6652 SDValue UserO0 = User->getOperand(Num: 0), UserO1 = User->getOperand(Num: 1);
6653 SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
6654 SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
6655
6656 return CurDAG->FoldConstantArithmetic(Opcode: User->getOpcode(), DL: dl,
6657 VT: User->getValueType(ResNo: 0), Ops: {O0, O1});
6658 };
6659
6660 // FIXME: When the semantics of the interaction between select and undef
6661 // are clearly defined, it may turn out to be unnecessary to break here.
6662 SDValue TrueRes = TryFold(ConstTrue);
6663 if (!TrueRes || TrueRes.isUndef())
6664 break;
6665 SDValue FalseRes = TryFold(ConstFalse);
6666 if (!FalseRes || FalseRes.isUndef())
6667 break;
6668
6669 // For us to materialize these using one instruction, we must be able to
6670 // represent them as signed 16-bit integers.
6671 uint64_t True = TrueRes->getAsZExtVal(), False = FalseRes->getAsZExtVal();
6672 if (!isInt<16>(x: True) || !isInt<16>(x: False))
6673 break;
6674
6675 // We can replace User with a new SELECT node, and try again to see if we
6676 // can fold the select with its user.
6677 Res = CurDAG->getSelect(DL: dl, VT: User->getValueType(ResNo: 0), Cond, LHS: TrueRes, RHS: FalseRes);
6678 N = User;
6679 ConstTrue = TrueRes;
6680 ConstFalse = FalseRes;
6681 } while (N->hasOneUse());
6682}
6683
6684void PPCDAGToDAGISel::PreprocessISelDAG() {
6685 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
6686
6687 bool MadeChange = false;
6688 while (Position != CurDAG->allnodes_begin()) {
6689 SDNode *N = &*--Position;
6690 if (N->use_empty())
6691 continue;
6692
6693 SDValue Res;
6694 switch (N->getOpcode()) {
6695 default: break;
6696 case ISD::OR:
6697 Res = combineToCMPB(N);
6698 break;
6699 }
6700
6701 if (!Res)
6702 foldBoolExts(Res, N);
6703
6704 if (Res) {
6705 LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
6706 LLVM_DEBUG(N->dump(CurDAG));
6707 LLVM_DEBUG(dbgs() << "\nNew: ");
6708 LLVM_DEBUG(Res.getNode()->dump(CurDAG));
6709 LLVM_DEBUG(dbgs() << "\n");
6710
6711 CurDAG->ReplaceAllUsesOfValueWith(From: SDValue(N, 0), To: Res);
6712 MadeChange = true;
6713 }
6714 }
6715
6716 if (MadeChange)
6717 CurDAG->RemoveDeadNodes();
6718}
6719
6720/// PostprocessISelDAG - Perform some late peephole optimizations
6721/// on the DAG representation.
6722void PPCDAGToDAGISel::PostprocessISelDAG() {
6723 // Skip peepholes at -O0.
6724 if (TM.getOptLevel() == CodeGenOptLevel::None)
6725 return;
6726
6727 PeepholePPC64();
6728 PeepholeCROps();
6729 PeepholePPC64ZExt();
6730}
6731
6732// Check if all users of this node will become isel where the second operand
6733// is the constant zero. If this is so, and if we can negate the condition,
6734// then we can flip the true and false operands. This will allow the zero to
6735// be folded with the isel so that we don't need to materialize a register
6736// containing zero.
6737bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
6738 for (const SDNode *User : N->users()) {
6739 if (!User->isMachineOpcode())
6740 return false;
6741 if (User->getMachineOpcode() != PPC::SELECT_I4 &&
6742 User->getMachineOpcode() != PPC::SELECT_I8)
6743 return false;
6744
6745 SDNode *Op1 = User->getOperand(Num: 1).getNode();
6746 SDNode *Op2 = User->getOperand(Num: 2).getNode();
6747 // If we have a degenerate select with two equal operands, swapping will
6748 // not do anything, and we may run into an infinite loop.
6749 if (Op1 == Op2)
6750 return false;
6751
6752 if (!Op2->isMachineOpcode())
6753 return false;
6754
6755 if (Op2->getMachineOpcode() != PPC::LI &&
6756 Op2->getMachineOpcode() != PPC::LI8)
6757 return false;
6758
6759 if (!isNullConstant(V: Op2->getOperand(Num: 0)))
6760 return false;
6761 }
6762
6763 return true;
6764}
6765
6766void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
6767 SmallVector<SDNode *, 4> ToReplace;
6768 for (SDNode *User : N->users()) {
6769 assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
6770 User->getMachineOpcode() == PPC::SELECT_I8) &&
6771 "Must have all select users");
6772 ToReplace.push_back(Elt: User);
6773 }
6774
6775 for (SDNode *User : ToReplace) {
6776 SDNode *ResNode =
6777 CurDAG->getMachineNode(Opcode: User->getMachineOpcode(), dl: SDLoc(User),
6778 VT: User->getValueType(ResNo: 0), Op1: User->getOperand(Num: 0),
6779 Op2: User->getOperand(Num: 2),
6780 Op3: User->getOperand(Num: 1));
6781
6782 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
6783 LLVM_DEBUG(User->dump(CurDAG));
6784 LLVM_DEBUG(dbgs() << "\nNew: ");
6785 LLVM_DEBUG(ResNode->dump(CurDAG));
6786 LLVM_DEBUG(dbgs() << "\n");
6787
6788 ReplaceUses(F: User, T: ResNode);
6789 }
6790}
6791
6792void PPCDAGToDAGISel::PeepholeCROps() {
6793 bool IsModified;
6794 do {
6795 IsModified = false;
6796 for (SDNode &Node : CurDAG->allnodes()) {
6797 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Val: &Node);
6798 if (!MachineNode || MachineNode->use_empty())
6799 continue;
6800 SDNode *ResNode = MachineNode;
6801
6802 bool Op1Set = false, Op1Unset = false,
6803 Op1Not = false,
6804 Op2Set = false, Op2Unset = false,
6805 Op2Not = false;
6806
6807 unsigned Opcode = MachineNode->getMachineOpcode();
6808 switch (Opcode) {
6809 default: break;
6810 case PPC::CRAND:
6811 case PPC::CRNAND:
6812 case PPC::CROR:
6813 case PPC::CRXOR:
6814 case PPC::CRNOR:
6815 case PPC::CREQV:
6816 case PPC::CRANDC:
6817 case PPC::CRORC: {
6818 SDValue Op = MachineNode->getOperand(Num: 1);
6819 if (Op.isMachineOpcode()) {
6820 if (Op.getMachineOpcode() == PPC::CRSET)
6821 Op2Set = true;
6822 else if (Op.getMachineOpcode() == PPC::CRUNSET)
6823 Op2Unset = true;
6824 else if ((Op.getMachineOpcode() == PPC::CRNOR &&
6825 Op.getOperand(i: 0) == Op.getOperand(i: 1)) ||
6826 Op.getMachineOpcode() == PPC::CRNOT)
6827 Op2Not = true;
6828 }
6829 [[fallthrough]];
6830 }
6831 case PPC::BC:
6832 case PPC::BCn:
6833 case PPC::SELECT_I4:
6834 case PPC::SELECT_I8:
6835 case PPC::SELECT_F4:
6836 case PPC::SELECT_F8:
6837 case PPC::SELECT_SPE:
6838 case PPC::SELECT_SPE4:
6839 case PPC::SELECT_VRRC:
6840 case PPC::SELECT_VSFRC:
6841 case PPC::SELECT_VSSRC:
6842 case PPC::SELECT_VSRC: {
6843 SDValue Op = MachineNode->getOperand(Num: 0);
6844 if (Op.isMachineOpcode()) {
6845 if (Op.getMachineOpcode() == PPC::CRSET)
6846 Op1Set = true;
6847 else if (Op.getMachineOpcode() == PPC::CRUNSET)
6848 Op1Unset = true;
6849 else if ((Op.getMachineOpcode() == PPC::CRNOR &&
6850 Op.getOperand(i: 0) == Op.getOperand(i: 1)) ||
6851 Op.getMachineOpcode() == PPC::CRNOT)
6852 Op1Not = true;
6853 }
6854 }
6855 break;
6856 }
6857
6858 bool SelectSwap = false;
6859 switch (Opcode) {
6860 default: break;
6861 case PPC::CRAND:
6862 if (MachineNode->getOperand(Num: 0) == MachineNode->getOperand(Num: 1))
6863 // x & x = x
6864 ResNode = MachineNode->getOperand(Num: 0).getNode();
6865 else if (Op1Set)
6866 // 1 & y = y
6867 ResNode = MachineNode->getOperand(Num: 1).getNode();
6868 else if (Op2Set)
6869 // x & 1 = x
6870 ResNode = MachineNode->getOperand(Num: 0).getNode();
6871 else if (Op1Unset || Op2Unset)
6872 // x & 0 = 0 & y = 0
6873 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRUNSET, dl: SDLoc(MachineNode),
6874 VT: MVT::i1);
6875 else if (Op1Not)
6876 // ~x & y = andc(y, x)
6877 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRANDC, dl: SDLoc(MachineNode),
6878 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 1),
6879 Op2: MachineNode->getOperand(Num: 0).
6880 getOperand(i: 0));
6881 else if (Op2Not)
6882 // x & ~y = andc(x, y)
6883 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRANDC, dl: SDLoc(MachineNode),
6884 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0),
6885 Op2: MachineNode->getOperand(Num: 1).
6886 getOperand(i: 0));
6887 else if (AllUsersSelectZero(N: MachineNode)) {
6888 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRNAND, dl: SDLoc(MachineNode),
6889 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0),
6890 Op2: MachineNode->getOperand(Num: 1));
6891 SelectSwap = true;
6892 }
6893 break;
6894 case PPC::CRNAND:
6895 if (MachineNode->getOperand(Num: 0) == MachineNode->getOperand(Num: 1))
6896 // nand(x, x) -> nor(x, x)
6897 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRNOR, dl: SDLoc(MachineNode),
6898 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0),
6899 Op2: MachineNode->getOperand(Num: 0));
6900 else if (Op1Set)
6901 // nand(1, y) -> nor(y, y)
6902 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRNOR, dl: SDLoc(MachineNode),
6903 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 1),
6904 Op2: MachineNode->getOperand(Num: 1));
6905 else if (Op2Set)
6906 // nand(x, 1) -> nor(x, x)
6907 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRNOR, dl: SDLoc(MachineNode),
6908 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0),
6909 Op2: MachineNode->getOperand(Num: 0));
6910 else if (Op1Unset || Op2Unset)
6911 // nand(x, 0) = nand(0, y) = 1
6912 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRSET, dl: SDLoc(MachineNode),
6913 VT: MVT::i1);
6914 else if (Op1Not)
6915 // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
6916 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRORC, dl: SDLoc(MachineNode),
6917 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0).
6918 getOperand(i: 0),
6919 Op2: MachineNode->getOperand(Num: 1));
6920 else if (Op2Not)
6921 // nand(x, ~y) = ~x | y = orc(y, x)
6922 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRORC, dl: SDLoc(MachineNode),
6923 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 1).
6924 getOperand(i: 0),
6925 Op2: MachineNode->getOperand(Num: 0));
6926 else if (AllUsersSelectZero(N: MachineNode)) {
6927 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRAND, dl: SDLoc(MachineNode),
6928 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0),
6929 Op2: MachineNode->getOperand(Num: 1));
6930 SelectSwap = true;
6931 }
6932 break;
6933 case PPC::CROR:
6934 if (MachineNode->getOperand(Num: 0) == MachineNode->getOperand(Num: 1))
6935 // x | x = x
6936 ResNode = MachineNode->getOperand(Num: 0).getNode();
6937 else if (Op1Set || Op2Set)
6938 // x | 1 = 1 | y = 1
6939 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRSET, dl: SDLoc(MachineNode),
6940 VT: MVT::i1);
6941 else if (Op1Unset)
6942 // 0 | y = y
6943 ResNode = MachineNode->getOperand(Num: 1).getNode();
6944 else if (Op2Unset)
6945 // x | 0 = x
6946 ResNode = MachineNode->getOperand(Num: 0).getNode();
6947 else if (Op1Not)
6948 // ~x | y = orc(y, x)
6949 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRORC, dl: SDLoc(MachineNode),
6950 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 1),
6951 Op2: MachineNode->getOperand(Num: 0).
6952 getOperand(i: 0));
6953 else if (Op2Not)
6954 // x | ~y = orc(x, y)
6955 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRORC, dl: SDLoc(MachineNode),
6956 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0),
6957 Op2: MachineNode->getOperand(Num: 1).
6958 getOperand(i: 0));
6959 else if (AllUsersSelectZero(N: MachineNode)) {
6960 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRNOR, dl: SDLoc(MachineNode),
6961 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0),
6962 Op2: MachineNode->getOperand(Num: 1));
6963 SelectSwap = true;
6964 }
6965 break;
6966 case PPC::CRXOR:
6967 if (MachineNode->getOperand(Num: 0) == MachineNode->getOperand(Num: 1))
6968 // xor(x, x) = 0
6969 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRUNSET, dl: SDLoc(MachineNode),
6970 VT: MVT::i1);
6971 else if (Op1Set)
6972 // xor(1, y) -> nor(y, y)
6973 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRNOR, dl: SDLoc(MachineNode),
6974 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 1),
6975 Op2: MachineNode->getOperand(Num: 1));
6976 else if (Op2Set)
6977 // xor(x, 1) -> nor(x, x)
6978 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRNOR, dl: SDLoc(MachineNode),
6979 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0),
6980 Op2: MachineNode->getOperand(Num: 0));
6981 else if (Op1Unset)
6982 // xor(0, y) = y
6983 ResNode = MachineNode->getOperand(Num: 1).getNode();
6984 else if (Op2Unset)
6985 // xor(x, 0) = x
6986 ResNode = MachineNode->getOperand(Num: 0).getNode();
6987 else if (Op1Not)
6988 // xor(~x, y) = eqv(x, y)
6989 ResNode = CurDAG->getMachineNode(Opcode: PPC::CREQV, dl: SDLoc(MachineNode),
6990 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0).
6991 getOperand(i: 0),
6992 Op2: MachineNode->getOperand(Num: 1));
6993 else if (Op2Not)
6994 // xor(x, ~y) = eqv(x, y)
6995 ResNode = CurDAG->getMachineNode(Opcode: PPC::CREQV, dl: SDLoc(MachineNode),
6996 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0),
6997 Op2: MachineNode->getOperand(Num: 1).
6998 getOperand(i: 0));
6999 else if (AllUsersSelectZero(N: MachineNode)) {
7000 ResNode = CurDAG->getMachineNode(Opcode: PPC::CREQV, dl: SDLoc(MachineNode),
7001 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0),
7002 Op2: MachineNode->getOperand(Num: 1));
7003 SelectSwap = true;
7004 }
7005 break;
7006 case PPC::CRNOR:
7007 if (Op1Set || Op2Set)
7008 // nor(1, y) -> 0
7009 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRUNSET, dl: SDLoc(MachineNode),
7010 VT: MVT::i1);
7011 else if (Op1Unset)
7012 // nor(0, y) = ~y -> nor(y, y)
7013 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRNOR, dl: SDLoc(MachineNode),
7014 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 1),
7015 Op2: MachineNode->getOperand(Num: 1));
7016 else if (Op2Unset)
7017 // nor(x, 0) = ~x
7018 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRNOR, dl: SDLoc(MachineNode),
7019 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0),
7020 Op2: MachineNode->getOperand(Num: 0));
7021 else if (Op1Not)
7022 // nor(~x, y) = andc(x, y)
7023 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRANDC, dl: SDLoc(MachineNode),
7024 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0).
7025 getOperand(i: 0),
7026 Op2: MachineNode->getOperand(Num: 1));
7027 else if (Op2Not)
7028 // nor(x, ~y) = andc(y, x)
7029 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRANDC, dl: SDLoc(MachineNode),
7030 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 1).
7031 getOperand(i: 0),
7032 Op2: MachineNode->getOperand(Num: 0));
7033 else if (AllUsersSelectZero(N: MachineNode)) {
7034 ResNode = CurDAG->getMachineNode(Opcode: PPC::CROR, dl: SDLoc(MachineNode),
7035 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0),
7036 Op2: MachineNode->getOperand(Num: 1));
7037 SelectSwap = true;
7038 }
7039 break;
7040 case PPC::CREQV:
7041 if (MachineNode->getOperand(Num: 0) == MachineNode->getOperand(Num: 1))
7042 // eqv(x, x) = 1
7043 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRSET, dl: SDLoc(MachineNode),
7044 VT: MVT::i1);
7045 else if (Op1Set)
7046 // eqv(1, y) = y
7047 ResNode = MachineNode->getOperand(Num: 1).getNode();
7048 else if (Op2Set)
7049 // eqv(x, 1) = x
7050 ResNode = MachineNode->getOperand(Num: 0).getNode();
7051 else if (Op1Unset)
7052 // eqv(0, y) = ~y -> nor(y, y)
7053 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRNOR, dl: SDLoc(MachineNode),
7054 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 1),
7055 Op2: MachineNode->getOperand(Num: 1));
7056 else if (Op2Unset)
7057 // eqv(x, 0) = ~x
7058 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRNOR, dl: SDLoc(MachineNode),
7059 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0),
7060 Op2: MachineNode->getOperand(Num: 0));
7061 else if (Op1Not)
7062 // eqv(~x, y) = xor(x, y)
7063 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRXOR, dl: SDLoc(MachineNode),
7064 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0).
7065 getOperand(i: 0),
7066 Op2: MachineNode->getOperand(Num: 1));
7067 else if (Op2Not)
7068 // eqv(x, ~y) = xor(x, y)
7069 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRXOR, dl: SDLoc(MachineNode),
7070 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0),
7071 Op2: MachineNode->getOperand(Num: 1).
7072 getOperand(i: 0));
7073 else if (AllUsersSelectZero(N: MachineNode)) {
7074 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRXOR, dl: SDLoc(MachineNode),
7075 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0),
7076 Op2: MachineNode->getOperand(Num: 1));
7077 SelectSwap = true;
7078 }
7079 break;
7080 case PPC::CRANDC:
7081 if (MachineNode->getOperand(Num: 0) == MachineNode->getOperand(Num: 1))
7082 // andc(x, x) = 0
7083 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRUNSET, dl: SDLoc(MachineNode),
7084 VT: MVT::i1);
7085 else if (Op1Set)
7086 // andc(1, y) = ~y
7087 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRNOR, dl: SDLoc(MachineNode),
7088 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 1),
7089 Op2: MachineNode->getOperand(Num: 1));
7090 else if (Op1Unset || Op2Set)
7091 // andc(0, y) = andc(x, 1) = 0
7092 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRUNSET, dl: SDLoc(MachineNode),
7093 VT: MVT::i1);
7094 else if (Op2Unset)
7095 // andc(x, 0) = x
7096 ResNode = MachineNode->getOperand(Num: 0).getNode();
7097 else if (Op1Not)
7098 // andc(~x, y) = ~(x | y) = nor(x, y)
7099 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRNOR, dl: SDLoc(MachineNode),
7100 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0).
7101 getOperand(i: 0),
7102 Op2: MachineNode->getOperand(Num: 1));
7103 else if (Op2Not)
7104 // andc(x, ~y) = x & y
7105 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRAND, dl: SDLoc(MachineNode),
7106 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0),
7107 Op2: MachineNode->getOperand(Num: 1).
7108 getOperand(i: 0));
7109 else if (AllUsersSelectZero(N: MachineNode)) {
7110 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRORC, dl: SDLoc(MachineNode),
7111 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 1),
7112 Op2: MachineNode->getOperand(Num: 0));
7113 SelectSwap = true;
7114 }
7115 break;
7116 case PPC::CRORC:
7117 if (MachineNode->getOperand(Num: 0) == MachineNode->getOperand(Num: 1))
7118 // orc(x, x) = 1
7119 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRSET, dl: SDLoc(MachineNode),
7120 VT: MVT::i1);
7121 else if (Op1Set || Op2Unset)
7122 // orc(1, y) = orc(x, 0) = 1
7123 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRSET, dl: SDLoc(MachineNode),
7124 VT: MVT::i1);
7125 else if (Op2Set)
7126 // orc(x, 1) = x
7127 ResNode = MachineNode->getOperand(Num: 0).getNode();
7128 else if (Op1Unset)
7129 // orc(0, y) = ~y
7130 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRNOR, dl: SDLoc(MachineNode),
7131 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 1),
7132 Op2: MachineNode->getOperand(Num: 1));
7133 else if (Op1Not)
7134 // orc(~x, y) = ~(x & y) = nand(x, y)
7135 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRNAND, dl: SDLoc(MachineNode),
7136 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0).
7137 getOperand(i: 0),
7138 Op2: MachineNode->getOperand(Num: 1));
7139 else if (Op2Not)
7140 // orc(x, ~y) = x | y
7141 ResNode = CurDAG->getMachineNode(Opcode: PPC::CROR, dl: SDLoc(MachineNode),
7142 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 0),
7143 Op2: MachineNode->getOperand(Num: 1).
7144 getOperand(i: 0));
7145 else if (AllUsersSelectZero(N: MachineNode)) {
7146 ResNode = CurDAG->getMachineNode(Opcode: PPC::CRANDC, dl: SDLoc(MachineNode),
7147 VT: MVT::i1, Op1: MachineNode->getOperand(Num: 1),
7148 Op2: MachineNode->getOperand(Num: 0));
7149 SelectSwap = true;
7150 }
7151 break;
7152 case PPC::SELECT_I4:
7153 case PPC::SELECT_I8:
7154 case PPC::SELECT_F4:
7155 case PPC::SELECT_F8:
7156 case PPC::SELECT_SPE:
7157 case PPC::SELECT_SPE4:
7158 case PPC::SELECT_VRRC:
7159 case PPC::SELECT_VSFRC:
7160 case PPC::SELECT_VSSRC:
7161 case PPC::SELECT_VSRC:
7162 if (Op1Set)
7163 ResNode = MachineNode->getOperand(Num: 1).getNode();
7164 else if (Op1Unset)
7165 ResNode = MachineNode->getOperand(Num: 2).getNode();
7166 else if (Op1Not)
7167 ResNode = CurDAG->getMachineNode(Opcode: MachineNode->getMachineOpcode(),
7168 dl: SDLoc(MachineNode),
7169 VT: MachineNode->getValueType(ResNo: 0),
7170 Op1: MachineNode->getOperand(Num: 0).
7171 getOperand(i: 0),
7172 Op2: MachineNode->getOperand(Num: 2),
7173 Op3: MachineNode->getOperand(Num: 1));
7174 break;
7175 case PPC::BC:
7176 case PPC::BCn:
7177 if (Op1Not)
7178 ResNode = CurDAG->getMachineNode(Opcode: Opcode == PPC::BC ? PPC::BCn :
7179 PPC::BC,
7180 dl: SDLoc(MachineNode),
7181 VT: MVT::Other,
7182 Op1: MachineNode->getOperand(Num: 0).
7183 getOperand(i: 0),
7184 Op2: MachineNode->getOperand(Num: 1),
7185 Op3: MachineNode->getOperand(Num: 2));
7186 // FIXME: Handle Op1Set, Op1Unset here too.
7187 break;
7188 }
7189
7190 // If we're inverting this node because it is used only by selects that
7191 // we'd like to swap, then swap the selects before the node replacement.
7192 if (SelectSwap)
7193 SwapAllSelectUsers(N: MachineNode);
7194
7195 if (ResNode != MachineNode) {
7196 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
7197 LLVM_DEBUG(MachineNode->dump(CurDAG));
7198 LLVM_DEBUG(dbgs() << "\nNew: ");
7199 LLVM_DEBUG(ResNode->dump(CurDAG));
7200 LLVM_DEBUG(dbgs() << "\n");
7201
7202 ReplaceUses(F: MachineNode, T: ResNode);
7203 IsModified = true;
7204 }
7205 }
7206 if (IsModified)
7207 CurDAG->RemoveDeadNodes();
7208 } while (IsModified);
7209}
7210
7211// Gather the set of 32-bit operations that are known to have their
7212// higher-order 32 bits zero, where ToPromote contains all such operations.
7213static bool PeepholePPC64ZExtGather(SDValue Op32,
7214 SmallPtrSetImpl<SDNode *> &ToPromote) {
7215 if (!Op32.isMachineOpcode())
7216 return false;
7217
7218 // First, check for the "frontier" instructions (those that will clear the
7219 // higher-order 32 bits.
7220
7221 // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
7222 // around. If it does not, then these instructions will clear the
7223 // higher-order bits.
7224 if ((Op32.getMachineOpcode() == PPC::RLWINM ||
7225 Op32.getMachineOpcode() == PPC::RLWNM) &&
7226 Op32.getConstantOperandVal(i: 2) <= Op32.getConstantOperandVal(i: 3)) {
7227 ToPromote.insert(Ptr: Op32.getNode());
7228 return true;
7229 }
7230
7231 // SLW and SRW always clear the higher-order bits.
7232 if (Op32.getMachineOpcode() == PPC::SLW ||
7233 Op32.getMachineOpcode() == PPC::SRW) {
7234 ToPromote.insert(Ptr: Op32.getNode());
7235 return true;
7236 }
7237
7238 // For LI and LIS, we need the immediate to be positive (so that it is not
7239 // sign extended).
7240 if (Op32.getMachineOpcode() == PPC::LI ||
7241 Op32.getMachineOpcode() == PPC::LIS) {
7242 if (!isUInt<15>(x: Op32.getConstantOperandVal(i: 0)))
7243 return false;
7244
7245 ToPromote.insert(Ptr: Op32.getNode());
7246 return true;
7247 }
7248
7249 // LHBRX and LWBRX always clear the higher-order bits.
7250 if (Op32.getMachineOpcode() == PPC::LHBRX ||
7251 Op32.getMachineOpcode() == PPC::LWBRX) {
7252 ToPromote.insert(Ptr: Op32.getNode());
7253 return true;
7254 }
7255
7256 // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
7257 if (Op32.getMachineOpcode() == PPC::CNTLZW ||
7258 Op32.getMachineOpcode() == PPC::CNTTZW) {
7259 ToPromote.insert(Ptr: Op32.getNode());
7260 return true;
7261 }
7262
7263 // Next, check for those instructions we can look through.
7264
7265 // Assuming the mask does not wrap around, then the higher-order bits are
7266 // taken directly from the first operand.
7267 if (Op32.getMachineOpcode() == PPC::RLWIMI &&
7268 Op32.getConstantOperandVal(i: 3) <= Op32.getConstantOperandVal(i: 4)) {
7269 SmallPtrSet<SDNode *, 16> ToPromote1;
7270 if (!PeepholePPC64ZExtGather(Op32: Op32.getOperand(i: 0), ToPromote&: ToPromote1))
7271 return false;
7272
7273 ToPromote.insert(Ptr: Op32.getNode());
7274 ToPromote.insert_range(R&: ToPromote1);
7275 return true;
7276 }
7277
7278 // For OR, the higher-order bits are zero if that is true for both operands.
7279 // For SELECT_I4, the same is true (but the relevant operand numbers are
7280 // shifted by 1).
7281 if (Op32.getMachineOpcode() == PPC::OR ||
7282 Op32.getMachineOpcode() == PPC::SELECT_I4) {
7283 unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
7284 SmallPtrSet<SDNode *, 16> ToPromote1;
7285 if (!PeepholePPC64ZExtGather(Op32: Op32.getOperand(i: B+0), ToPromote&: ToPromote1))
7286 return false;
7287 if (!PeepholePPC64ZExtGather(Op32: Op32.getOperand(i: B+1), ToPromote&: ToPromote1))
7288 return false;
7289
7290 ToPromote.insert(Ptr: Op32.getNode());
7291 ToPromote.insert_range(R&: ToPromote1);
7292 return true;
7293 }
7294
7295 // For ORI and ORIS, we need the higher-order bits of the first operand to be
7296 // zero, and also for the constant to be positive (so that it is not sign
7297 // extended).
7298 if (Op32.getMachineOpcode() == PPC::ORI ||
7299 Op32.getMachineOpcode() == PPC::ORIS) {
7300 SmallPtrSet<SDNode *, 16> ToPromote1;
7301 if (!PeepholePPC64ZExtGather(Op32: Op32.getOperand(i: 0), ToPromote&: ToPromote1))
7302 return false;
7303 if (!isUInt<15>(x: Op32.getConstantOperandVal(i: 1)))
7304 return false;
7305
7306 ToPromote.insert(Ptr: Op32.getNode());
7307 ToPromote.insert_range(R&: ToPromote1);
7308 return true;
7309 }
7310
7311 // The higher-order bits of AND are zero if that is true for at least one of
7312 // the operands.
7313 if (Op32.getMachineOpcode() == PPC::AND) {
7314 SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
7315 bool Op0OK =
7316 PeepholePPC64ZExtGather(Op32: Op32.getOperand(i: 0), ToPromote&: ToPromote1);
7317 bool Op1OK =
7318 PeepholePPC64ZExtGather(Op32: Op32.getOperand(i: 1), ToPromote&: ToPromote2);
7319 if (!Op0OK && !Op1OK)
7320 return false;
7321
7322 ToPromote.insert(Ptr: Op32.getNode());
7323
7324 if (Op0OK)
7325 ToPromote.insert_range(R&: ToPromote1);
7326
7327 if (Op1OK)
7328 ToPromote.insert_range(R&: ToPromote2);
7329
7330 return true;
7331 }
7332
7333 // For ANDI and ANDIS, the higher-order bits are zero if either that is true
7334 // of the first operand, or if the second operand is positive (so that it is
7335 // not sign extended).
7336 if (Op32.getMachineOpcode() == PPC::ANDI_rec ||
7337 Op32.getMachineOpcode() == PPC::ANDIS_rec) {
7338 SmallPtrSet<SDNode *, 16> ToPromote1;
7339 bool Op0OK =
7340 PeepholePPC64ZExtGather(Op32: Op32.getOperand(i: 0), ToPromote&: ToPromote1);
7341 bool Op1OK = isUInt<15>(x: Op32.getConstantOperandVal(i: 1));
7342 if (!Op0OK && !Op1OK)
7343 return false;
7344
7345 ToPromote.insert(Ptr: Op32.getNode());
7346
7347 if (Op0OK)
7348 ToPromote.insert_range(R&: ToPromote1);
7349
7350 return true;
7351 }
7352
7353 return false;
7354}
7355
7356void PPCDAGToDAGISel::PeepholePPC64ZExt() {
7357 if (!Subtarget->isPPC64())
7358 return;
7359
7360 // When we zero-extend from i32 to i64, we use a pattern like this:
7361 // def : Pat<(i64 (zext i32:$in)),
7362 // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
7363 // 0, 32)>;
7364 // There are several 32-bit shift/rotate instructions, however, that will
7365 // clear the higher-order bits of their output, rendering the RLDICL
7366 // unnecessary. When that happens, we remove it here, and redefine the
7367 // relevant 32-bit operation to be a 64-bit operation.
7368
7369 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
7370
7371 bool MadeChange = false;
7372 while (Position != CurDAG->allnodes_begin()) {
7373 SDNode *N = &*--Position;
7374 // Skip dead nodes and any non-machine opcodes.
7375 if (N->use_empty() || !N->isMachineOpcode())
7376 continue;
7377
7378 if (N->getMachineOpcode() != PPC::RLDICL)
7379 continue;
7380
7381 if (N->getConstantOperandVal(Num: 1) != 0 ||
7382 N->getConstantOperandVal(Num: 2) != 32)
7383 continue;
7384
7385 SDValue ISR = N->getOperand(Num: 0);
7386 if (!ISR.isMachineOpcode() ||
7387 ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
7388 continue;
7389
7390 if (!ISR.hasOneUse())
7391 continue;
7392
7393 if (ISR.getConstantOperandVal(i: 2) != PPC::sub_32)
7394 continue;
7395
7396 SDValue IDef = ISR.getOperand(i: 0);
7397 if (!IDef.isMachineOpcode() ||
7398 IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
7399 continue;
7400
7401 // We now know that we're looking at a canonical i32 -> i64 zext. See if we
7402 // can get rid of it.
7403
7404 SDValue Op32 = ISR->getOperand(Num: 1);
7405 if (!Op32.isMachineOpcode())
7406 continue;
7407
7408 // There are some 32-bit instructions that always clear the high-order 32
7409 // bits, there are also some instructions (like AND) that we can look
7410 // through.
7411 SmallPtrSet<SDNode *, 16> ToPromote;
7412 if (!PeepholePPC64ZExtGather(Op32, ToPromote))
7413 continue;
7414
7415 // If the ToPromote set contains nodes that have uses outside of the set
7416 // (except for the original INSERT_SUBREG), then abort the transformation.
7417 bool OutsideUse = false;
7418 for (SDNode *PN : ToPromote) {
7419 for (SDNode *UN : PN->users()) {
7420 if (!ToPromote.count(Ptr: UN) && UN != ISR.getNode()) {
7421 OutsideUse = true;
7422 break;
7423 }
7424 }
7425
7426 if (OutsideUse)
7427 break;
7428 }
7429 if (OutsideUse)
7430 continue;
7431
7432 MadeChange = true;
7433
7434 // We now know that this zero extension can be removed by promoting to
7435 // nodes in ToPromote to 64-bit operations, where for operations in the
7436 // frontier of the set, we need to insert INSERT_SUBREGs for their
7437 // operands.
7438 for (SDNode *PN : ToPromote) {
7439 unsigned NewOpcode;
7440 switch (PN->getMachineOpcode()) {
7441 default:
7442 llvm_unreachable("Don't know the 64-bit variant of this instruction");
7443 case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break;
7444 case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break;
7445 case PPC::SLW: NewOpcode = PPC::SLW8; break;
7446 case PPC::SRW: NewOpcode = PPC::SRW8; break;
7447 case PPC::LI: NewOpcode = PPC::LI8; break;
7448 case PPC::LIS: NewOpcode = PPC::LIS8; break;
7449 case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
7450 case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
7451 case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
7452 case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break;
7453 case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
7454 case PPC::OR: NewOpcode = PPC::OR8; break;
7455 case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
7456 case PPC::ORI: NewOpcode = PPC::ORI8; break;
7457 case PPC::ORIS: NewOpcode = PPC::ORIS8; break;
7458 case PPC::AND: NewOpcode = PPC::AND8; break;
7459 case PPC::ANDI_rec:
7460 NewOpcode = PPC::ANDI8_rec;
7461 break;
7462 case PPC::ANDIS_rec:
7463 NewOpcode = PPC::ANDIS8_rec;
7464 break;
7465 }
7466
7467 // Note: During the replacement process, the nodes will be in an
7468 // inconsistent state (some instructions will have operands with values
7469 // of the wrong type). Once done, however, everything should be right
7470 // again.
7471
7472 SmallVector<SDValue, 4> Ops;
7473 for (const SDValue &V : PN->ops()) {
7474 if (!ToPromote.count(Ptr: V.getNode()) && V.getValueType() == MVT::i32 &&
7475 !isa<ConstantSDNode>(Val: V)) {
7476 SDValue ReplOpOps[] = { ISR.getOperand(i: 0), V, ISR.getOperand(i: 2) };
7477 SDNode *ReplOp =
7478 CurDAG->getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: SDLoc(V),
7479 VTs: ISR.getNode()->getVTList(), Ops: ReplOpOps);
7480 Ops.push_back(Elt: SDValue(ReplOp, 0));
7481 } else {
7482 Ops.push_back(Elt: V);
7483 }
7484 }
7485
7486 // Because all to-be-promoted nodes only have users that are other
7487 // promoted nodes (or the original INSERT_SUBREG), we can safely replace
7488 // the i32 result value type with i64.
7489
7490 SmallVector<EVT, 2> NewVTs;
7491 SDVTList VTs = PN->getVTList();
7492 for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
7493 if (VTs.VTs[i] == MVT::i32)
7494 NewVTs.push_back(Elt: MVT::i64);
7495 else
7496 NewVTs.push_back(Elt: VTs.VTs[i]);
7497
7498 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
7499 LLVM_DEBUG(PN->dump(CurDAG));
7500
7501 CurDAG->SelectNodeTo(N: PN, MachineOpc: NewOpcode, VTs: CurDAG->getVTList(VTs: NewVTs), Ops);
7502
7503 LLVM_DEBUG(dbgs() << "\nNew: ");
7504 LLVM_DEBUG(PN->dump(CurDAG));
7505 LLVM_DEBUG(dbgs() << "\n");
7506 }
7507
7508 // Now we replace the original zero extend and its associated INSERT_SUBREG
7509 // with the value feeding the INSERT_SUBREG (which has now been promoted to
7510 // return an i64).
7511
7512 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
7513 LLVM_DEBUG(N->dump(CurDAG));
7514 LLVM_DEBUG(dbgs() << "\nNew: ");
7515 LLVM_DEBUG(Op32.getNode()->dump(CurDAG));
7516 LLVM_DEBUG(dbgs() << "\n");
7517
7518 ReplaceUses(F: N, T: Op32.getNode());
7519 }
7520
7521 if (MadeChange)
7522 CurDAG->RemoveDeadNodes();
7523}
7524
7525static bool isVSXSwap(SDValue N) {
7526 if (!N->isMachineOpcode())
7527 return false;
7528 unsigned Opc = N->getMachineOpcode();
7529
7530 // Single-operand XXPERMDI or the regular XXPERMDI/XXSLDWI where the immediate
7531 // operand is 2.
7532 if (Opc == PPC::XXPERMDIs) {
7533 return isa<ConstantSDNode>(Val: N->getOperand(Num: 1)) &&
7534 N->getConstantOperandVal(Num: 1) == 2;
7535 } else if (Opc == PPC::XXPERMDI || Opc == PPC::XXSLDWI) {
7536 return N->getOperand(Num: 0) == N->getOperand(Num: 1) &&
7537 isa<ConstantSDNode>(Val: N->getOperand(Num: 2)) &&
7538 N->getConstantOperandVal(Num: 2) == 2;
7539 }
7540
7541 return false;
7542}
7543
7544// TODO: Make this complete and replace with a table-gen bit.
7545static bool isLaneInsensitive(SDValue N) {
7546 if (!N->isMachineOpcode())
7547 return false;
7548 unsigned Opc = N->getMachineOpcode();
7549
7550 switch (Opc) {
7551 default:
7552 return false;
7553 case PPC::VAVGSB:
7554 case PPC::VAVGUB:
7555 case PPC::VAVGSH:
7556 case PPC::VAVGUH:
7557 case PPC::VAVGSW:
7558 case PPC::VAVGUW:
7559 case PPC::VMAXFP:
7560 case PPC::VMAXSB:
7561 case PPC::VMAXUB:
7562 case PPC::VMAXSH:
7563 case PPC::VMAXUH:
7564 case PPC::VMAXSW:
7565 case PPC::VMAXUW:
7566 case PPC::VMINFP:
7567 case PPC::VMINSB:
7568 case PPC::VMINUB:
7569 case PPC::VMINSH:
7570 case PPC::VMINUH:
7571 case PPC::VMINSW:
7572 case PPC::VMINUW:
7573 case PPC::VADDFP:
7574 case PPC::VADDUBM:
7575 case PPC::VADDUHM:
7576 case PPC::VADDUWM:
7577 case PPC::VSUBFP:
7578 case PPC::VSUBUBM:
7579 case PPC::VSUBUHM:
7580 case PPC::VSUBUWM:
7581 case PPC::VAND:
7582 case PPC::VANDC:
7583 case PPC::VOR:
7584 case PPC::VORC:
7585 case PPC::VXOR:
7586 case PPC::VNOR:
7587 case PPC::VMULUWM:
7588 return true;
7589 }
7590}
7591
7592// Try to simplify (xxswap (vec-op (xxswap) (xxswap))) where vec-op is
7593// lane-insensitive.
7594static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
7595 // Our desired xxswap might be source of COPY_TO_REGCLASS.
7596 // TODO: Can we put this a common method for DAG?
7597 auto SkipRCCopy = [](SDValue V) {
7598 while (V->isMachineOpcode() &&
7599 V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS) {
7600 // All values in the chain should have single use.
7601 if (V->use_empty() || !V->user_begin()->isOnlyUserOf(N: V.getNode()))
7602 return SDValue();
7603 V = V->getOperand(Num: 0);
7604 }
7605 return V.hasOneUse() ? V : SDValue();
7606 };
7607
7608 SDValue VecOp = SkipRCCopy(N->getOperand(Num: 0));
7609 if (!VecOp || !isLaneInsensitive(N: VecOp))
7610 return;
7611
7612 SDValue LHS = SkipRCCopy(VecOp.getOperand(i: 0)),
7613 RHS = SkipRCCopy(VecOp.getOperand(i: 1));
7614 if (!LHS || !RHS || !isVSXSwap(N: LHS) || !isVSXSwap(N: RHS))
7615 return;
7616
7617 // These swaps may still have chain-uses here, count on dead code elimination
7618 // in following passes to remove them.
7619 DAG->ReplaceAllUsesOfValueWith(From: LHS, To: LHS.getOperand(i: 0));
7620 DAG->ReplaceAllUsesOfValueWith(From: RHS, To: RHS.getOperand(i: 0));
7621 DAG->ReplaceAllUsesOfValueWith(From: SDValue(N, 0), To: N->getOperand(Num: 0));
7622}
7623
7624// Check if an SDValue has the 'aix-small-tls' global variable attribute.
7625static bool hasAIXSmallTLSAttr(SDValue Val) {
7626 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val))
7627 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Val: GA->getGlobal()))
7628 if (GV->hasAttribute(Kind: "aix-small-tls"))
7629 return true;
7630
7631 return false;
7632}
7633
7634// Is an ADDI eligible for folding for non-TOC-based local-[exec|dynamic]
7635// accesses?
7636static bool isEligibleToFoldADDIForFasterLocalAccesses(SelectionDAG *DAG,
7637 SDValue ADDIToFold) {
7638 // Check if ADDIToFold (the ADDI that we want to fold into local-exec
7639 // accesses), is truly an ADDI.
7640 if (!ADDIToFold.isMachineOpcode() ||
7641 (ADDIToFold.getMachineOpcode() != PPC::ADDI8))
7642 return false;
7643
7644 // Folding is only allowed for the AIX small-local-[exec|dynamic] TLS target
7645 // attribute or when the 'aix-small-tls' global variable attribute is present.
7646 const PPCSubtarget &Subtarget =
7647 DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
7648 SDValue TLSVarNode = ADDIToFold.getOperand(i: 1);
7649 if (!(Subtarget.hasAIXSmallLocalDynamicTLS() ||
7650 Subtarget.hasAIXSmallLocalExecTLS() || hasAIXSmallTLSAttr(Val: TLSVarNode)))
7651 return false;
7652
7653 // The second operand of the ADDIToFold should be the global TLS address
7654 // (the local-exec TLS variable). We only perform the folding if the TLS
7655 // variable is the second operand.
7656 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: TLSVarNode);
7657 if (!GA)
7658 return false;
7659
7660 if (DAG->getTarget().getTLSModel(GV: GA->getGlobal()) == TLSModel::LocalExec) {
7661 // The first operand of the ADDIToFold should be the thread pointer.
7662 // This transformation is only performed if the first operand of the
7663 // addi is the thread pointer.
7664 SDValue TPRegNode = ADDIToFold.getOperand(i: 0);
7665 RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(Val: TPRegNode.getNode());
7666 if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
7667 return false;
7668 }
7669
7670 // The local-[exec|dynamic] TLS variable should only have the
7671 // [MO_TPREL_FLAG|MO_TLSLD_FLAG] target flags, so this optimization is not
7672 // performed otherwise if the flag is not set.
7673 unsigned TargetFlags = GA->getTargetFlags();
7674 if (!(TargetFlags == PPCII::MO_TPREL_FLAG ||
7675 TargetFlags == PPCII::MO_TLSLD_FLAG))
7676 return false;
7677
7678 // If all conditions are satisfied, the ADDI is valid for folding.
7679 return true;
7680}
7681
7682// For non-TOC-based local-[exec|dynamic] access where an addi is feeding into
7683// another addi, fold this sequence into a single addi if possible. Before this
7684// optimization, the sequence appears as:
7685// addi rN, r13, sym@[le|ld]
7686// addi rM, rN, imm
7687// After this optimization, we can fold the two addi into a single one:
7688// addi rM, r13, sym@[le|ld] + imm
7689static void foldADDIForFasterLocalAccesses(SDNode *N, SelectionDAG *DAG) {
7690 if (N->getMachineOpcode() != PPC::ADDI8)
7691 return;
7692
7693 // InitialADDI is the addi feeding into N (also an addi), and the addi that
7694 // we want optimized out.
7695 SDValue InitialADDI = N->getOperand(Num: 0);
7696
7697 if (!isEligibleToFoldADDIForFasterLocalAccesses(DAG, ADDIToFold: InitialADDI))
7698 return;
7699
7700 // The second operand of the InitialADDI should be the global TLS address
7701 // (the local-[exec|dynamic] TLS variable), with the
7702 // [MO_TPREL_FLAG|MO_TLSLD_FLAG] target flag. This has been checked in
7703 // isEligibleToFoldADDIForFasterLocalAccesses().
7704 SDValue TLSVarNode = InitialADDI.getOperand(i: 1);
7705 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: TLSVarNode);
7706 assert(GA && "Expecting a valid GlobalAddressSDNode when folding addi into "
7707 "local-[exec|dynamic] accesses!");
7708 unsigned TargetFlags = GA->getTargetFlags();
7709
7710 // The second operand of the addi that we want to preserve will be an
7711 // immediate. We add this immediate, together with the address of the TLS
7712 // variable found in InitialADDI, in order to preserve the correct TLS address
7713 // information during assembly printing. The offset is likely to be non-zero
7714 // when we end up in this case.
7715 int Offset = N->getConstantOperandVal(Num: 1);
7716 TLSVarNode = DAG->getTargetGlobalAddress(GV: GA->getGlobal(), DL: SDLoc(GA), VT: MVT::i64,
7717 offset: Offset, TargetFlags);
7718
7719 (void)DAG->UpdateNodeOperands(N, Op1: InitialADDI.getOperand(i: 0), Op2: TLSVarNode);
7720 if (InitialADDI.getNode()->use_empty())
7721 DAG->RemoveDeadNode(N: InitialADDI.getNode());
7722}
7723
7724void PPCDAGToDAGISel::PeepholePPC64() {
7725 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
7726
7727 while (Position != CurDAG->allnodes_begin()) {
7728 SDNode *N = &*--Position;
7729 // Skip dead nodes and any non-machine opcodes.
7730 if (N->use_empty() || !N->isMachineOpcode())
7731 continue;
7732
7733 if (isVSXSwap(N: SDValue(N, 0)))
7734 reduceVSXSwap(N, DAG: CurDAG);
7735
7736 // This optimization is performed for non-TOC-based local-[exec|dynamic]
7737 // accesses.
7738 foldADDIForFasterLocalAccesses(N, DAG: CurDAG);
7739
7740 unsigned FirstOp;
7741 unsigned StorageOpcode = N->getMachineOpcode();
7742 bool RequiresMod4Offset = false;
7743
7744 switch (StorageOpcode) {
7745 default: continue;
7746
7747 case PPC::LWA:
7748 case PPC::LD:
7749 case PPC::DFLOADf64:
7750 case PPC::DFLOADf32:
7751 RequiresMod4Offset = true;
7752 [[fallthrough]];
7753 case PPC::LBZ:
7754 case PPC::LBZ8:
7755 case PPC::LFD:
7756 case PPC::LFS:
7757 case PPC::LHA:
7758 case PPC::LHA8:
7759 case PPC::LHZ:
7760 case PPC::LHZ8:
7761 case PPC::LWZ:
7762 case PPC::LWZ8:
7763 FirstOp = 0;
7764 break;
7765
7766 case PPC::STD:
7767 case PPC::DFSTOREf64:
7768 case PPC::DFSTOREf32:
7769 RequiresMod4Offset = true;
7770 [[fallthrough]];
7771 case PPC::STB:
7772 case PPC::STB8:
7773 case PPC::STFD:
7774 case PPC::STFS:
7775 case PPC::STH:
7776 case PPC::STH8:
7777 case PPC::STW:
7778 case PPC::STW8:
7779 FirstOp = 1;
7780 break;
7781 }
7782
7783 // If this is a load or store with a zero offset, or within the alignment,
7784 // we may be able to fold an add-immediate into the memory operation.
7785 // The check against alignment is below, as it can't occur until we check
7786 // the arguments to N
7787 if (!isa<ConstantSDNode>(Val: N->getOperand(Num: FirstOp)))
7788 continue;
7789
7790 SDValue Base = N->getOperand(Num: FirstOp + 1);
7791 if (!Base.isMachineOpcode())
7792 continue;
7793
7794 unsigned Flags = 0;
7795 bool ReplaceFlags = true;
7796
7797 // When the feeding operation is an add-immediate of some sort,
7798 // determine whether we need to add relocation information to the
7799 // target flags on the immediate operand when we fold it into the
7800 // load instruction.
7801 //
7802 // For something like ADDItocL8, the relocation information is
7803 // inferred from the opcode; when we process it in the AsmPrinter,
7804 // we add the necessary relocation there. A load, though, can receive
7805 // relocation from various flavors of ADDIxxx, so we need to carry
7806 // the relocation information in the target flags.
7807 switch (Base.getMachineOpcode()) {
7808 default: continue;
7809
7810 case PPC::ADDI8:
7811 case PPC::ADDI:
7812 // In some cases (such as TLS) the relocation information
7813 // is already in place on the operand, so copying the operand
7814 // is sufficient.
7815 ReplaceFlags = false;
7816 break;
7817 case PPC::ADDIdtprelL:
7818 Flags = PPCII::MO_DTPREL_LO;
7819 break;
7820 case PPC::ADDItlsldL:
7821 Flags = PPCII::MO_TLSLD_LO;
7822 break;
7823 case PPC::ADDItocL8:
7824 // Skip the following peephole optimizations for ADDItocL8 on AIX which
7825 // is used for toc-data access.
7826 if (Subtarget->isAIXABI())
7827 continue;
7828 Flags = PPCII::MO_TOC_LO;
7829 break;
7830 }
7831
7832 SDValue ImmOpnd = Base.getOperand(i: 1);
7833
7834 // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
7835 // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
7836 // we might have needed different @ha relocation values for the offset
7837 // pointers).
7838 int MaxDisplacement = 7;
7839 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: ImmOpnd)) {
7840 const GlobalValue *GV = GA->getGlobal();
7841 Align Alignment = GV->getPointerAlignment(DL: CurDAG->getDataLayout());
7842 MaxDisplacement = std::min(a: (int)Alignment.value() - 1, b: MaxDisplacement);
7843 }
7844
7845 bool UpdateHBase = false;
7846 SDValue HBase = Base.getOperand(i: 0);
7847
7848 int Offset = N->getConstantOperandVal(Num: FirstOp);
7849 if (ReplaceFlags) {
7850 if (Offset < 0 || Offset > MaxDisplacement) {
7851 // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
7852 // one use, then we can do this for any offset, we just need to also
7853 // update the offset (i.e. the symbol addend) on the addis also.
7854 if (Base.getMachineOpcode() != PPC::ADDItocL8)
7855 continue;
7856
7857 if (!HBase.isMachineOpcode() ||
7858 HBase.getMachineOpcode() != PPC::ADDIStocHA8)
7859 continue;
7860
7861 if (!Base.hasOneUse() || !HBase.hasOneUse())
7862 continue;
7863
7864 SDValue HImmOpnd = HBase.getOperand(i: 1);
7865 if (HImmOpnd != ImmOpnd)
7866 continue;
7867
7868 UpdateHBase = true;
7869 }
7870 } else {
7871 // Global addresses can be folded, but only if they are sufficiently
7872 // aligned.
7873 if (RequiresMod4Offset) {
7874 if (GlobalAddressSDNode *GA =
7875 dyn_cast<GlobalAddressSDNode>(Val&: ImmOpnd)) {
7876 const GlobalValue *GV = GA->getGlobal();
7877 Align Alignment = GV->getPointerAlignment(DL: CurDAG->getDataLayout());
7878 if (Alignment < 4)
7879 continue;
7880 }
7881 }
7882
7883 // If we're directly folding the addend from an addi instruction, then:
7884 // 1. In general, the offset on the memory access must be zero.
7885 // 2. If the addend is a constant, then it can be combined with a
7886 // non-zero offset, but only if the result meets the encoding
7887 // requirements.
7888 if (auto *C = dyn_cast<ConstantSDNode>(Val&: ImmOpnd)) {
7889 Offset += C->getSExtValue();
7890
7891 if (RequiresMod4Offset && (Offset % 4) != 0)
7892 continue;
7893
7894 if (!isInt<16>(x: Offset))
7895 continue;
7896
7897 ImmOpnd = CurDAG->getSignedTargetConstant(Val: Offset, DL: SDLoc(ImmOpnd),
7898 VT: ImmOpnd.getValueType());
7899 } else if (Offset != 0) {
7900 // This optimization is performed for non-TOC-based local-[exec|dynamic]
7901 // accesses.
7902 if (isEligibleToFoldADDIForFasterLocalAccesses(DAG: CurDAG, ADDIToFold: Base)) {
7903 // Add the non-zero offset information into the load or store
7904 // instruction to be used for non-TOC-based local-[exec|dynamic]
7905 // accesses.
7906 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: ImmOpnd);
7907 assert(GA && "Expecting a valid GlobalAddressSDNode when folding "
7908 "addi into local-[exec|dynamic] accesses!");
7909 ImmOpnd = CurDAG->getTargetGlobalAddress(GV: GA->getGlobal(), DL: SDLoc(GA),
7910 VT: MVT::i64, offset: Offset,
7911 TargetFlags: GA->getTargetFlags());
7912 } else
7913 continue;
7914 }
7915 }
7916
7917 // We found an opportunity. Reverse the operands from the add
7918 // immediate and substitute them into the load or store. If
7919 // needed, update the target flags for the immediate operand to
7920 // reflect the necessary relocation information.
7921 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
7922 LLVM_DEBUG(Base->dump(CurDAG));
7923 LLVM_DEBUG(dbgs() << "\nN: ");
7924 LLVM_DEBUG(N->dump(CurDAG));
7925 LLVM_DEBUG(dbgs() << "\n");
7926
7927 // If the relocation information isn't already present on the
7928 // immediate operand, add it now.
7929 if (ReplaceFlags) {
7930 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: ImmOpnd)) {
7931 SDLoc dl(GA);
7932 const GlobalValue *GV = GA->getGlobal();
7933 Align Alignment = GV->getPointerAlignment(DL: CurDAG->getDataLayout());
7934 // We can't perform this optimization for data whose alignment
7935 // is insufficient for the instruction encoding.
7936 if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) {
7937 LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
7938 continue;
7939 }
7940 ImmOpnd = CurDAG->getTargetGlobalAddress(GV, DL: dl, VT: MVT::i64, offset: Offset, TargetFlags: Flags);
7941 } else if (ConstantPoolSDNode *CP =
7942 dyn_cast<ConstantPoolSDNode>(Val&: ImmOpnd)) {
7943 const Constant *C = CP->getConstVal();
7944 ImmOpnd = CurDAG->getTargetConstantPool(C, VT: MVT::i64, Align: CP->getAlign(),
7945 Offset, TargetFlags: Flags);
7946 }
7947 }
7948
7949 if (FirstOp == 1) // Store
7950 (void)CurDAG->UpdateNodeOperands(N, Op1: N->getOperand(Num: 0), Op2: ImmOpnd,
7951 Op3: Base.getOperand(i: 0), Op4: N->getOperand(Num: 3));
7952 else // Load
7953 (void)CurDAG->UpdateNodeOperands(N, Op1: ImmOpnd, Op2: Base.getOperand(i: 0),
7954 Op3: N->getOperand(Num: 2));
7955
7956 if (UpdateHBase)
7957 (void)CurDAG->UpdateNodeOperands(N: HBase.getNode(), Op1: HBase.getOperand(i: 0),
7958 Op2: ImmOpnd);
7959
7960 // The add-immediate may now be dead, in which case remove it.
7961 if (Base.getNode()->use_empty())
7962 CurDAG->RemoveDeadNode(N: Base.getNode());
7963 }
7964}
7965
7966/// createPPCISelDag - This pass converts a legalized DAG into a
7967/// PowerPC-specific DAG, ready for instruction scheduling.
7968///
7969FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM,
7970 CodeGenOptLevel OptLevel) {
7971 return new PPCDAGToDAGISelLegacy(TM, OptLevel);
7972}
7973