1//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the PowerPC-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// PPCGenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "MCTargetDesc/PPCPredicates.h"
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCISelLowering.h"
19#include "PPCMachineFunctionInfo.h"
20#include "PPCSubtarget.h"
21#include "llvm/CodeGen/CallingConvLower.h"
22#include "llvm/CodeGen/FastISel.h"
23#include "llvm/CodeGen/FunctionLoweringInfo.h"
24#include "llvm/CodeGen/MachineConstantPool.h"
25#include "llvm/CodeGen/MachineFrameInfo.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineRegisterInfo.h"
28#include "llvm/CodeGen/TargetLowering.h"
29#include "llvm/IR/CallingConv.h"
30#include "llvm/IR/GetElementPtrTypeIterator.h"
31#include "llvm/IR/GlobalVariable.h"
32#include "llvm/IR/Operator.h"
33#include "llvm/Target/TargetMachine.h"
34
35//===----------------------------------------------------------------------===//
36//
37// TBD:
38// fastLowerArguments: Handle simple cases.
39// PPCMaterializeGV: Handle TLS.
40// SelectCall: Handle function pointers.
41// SelectCall: Handle multi-register return values.
42// SelectCall: Optimize away nops for local calls.
43// processCallArgs: Handle bit-converted arguments.
44// finishCall: Handle multi-register return values.
45// PPCComputeAddress: Handle parameter references as FrameIndex's.
46// PPCEmitCmp: Handle immediate as operand 1.
47// SelectCall: Handle small byval arguments.
48// SelectIntrinsicCall: Implement.
49// SelectSelect: Implement.
50// Consider factoring isTypeLegal into the base class.
51// Implement switches and jump tables.
52//
53//===----------------------------------------------------------------------===//
54using namespace llvm;
55
56#define DEBUG_TYPE "ppcfastisel"
57
58namespace {
59
60struct Address {
61 enum {
62 RegBase,
63 FrameIndexBase
64 } BaseType;
65
66 union {
67 unsigned Reg;
68 int FI;
69 } Base;
70
71 int64_t Offset;
72
73 // Innocuous defaults for our address.
74 Address()
75 : BaseType(RegBase), Offset(0) {
76 Base.Reg = 0;
77 }
78};
79
80class PPCFastISel final : public FastISel {
81
82 const TargetMachine &TM;
83 const PPCSubtarget *Subtarget;
84 PPCFunctionInfo *PPCFuncInfo;
85 const TargetInstrInfo &TII;
86 const TargetLowering &TLI;
87 LLVMContext *Context;
88
89 public:
90 explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
91 const TargetLibraryInfo *LibInfo)
92 : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
93 Subtarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
94 PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
95 TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()),
96 Context(&FuncInfo.Fn->getContext()) {}
97
98 // Backend specific FastISel code.
99 private:
100 bool fastSelectInstruction(const Instruction *I) override;
101 Register fastMaterializeConstant(const Constant *C) override;
102 Register fastMaterializeAlloca(const AllocaInst *AI) override;
103 bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
104 const LoadInst *LI) override;
105 bool fastLowerArguments() override;
106 Register fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
107 Register fastEmitInst_ri(unsigned MachineInstOpcode,
108 const TargetRegisterClass *RC, Register Op0,
109 uint64_t Imm);
110 Register fastEmitInst_r(unsigned MachineInstOpcode,
111 const TargetRegisterClass *RC, Register Op0);
112 Register fastEmitInst_rr(unsigned MachineInstOpcode,
113 const TargetRegisterClass *RC, Register Op0,
114 Register Op1);
115
116 bool fastLowerCall(CallLoweringInfo &CLI) override;
117
118 // Instruction selection routines.
119 private:
120 bool SelectLoad(const Instruction *I);
121 bool SelectStore(const Instruction *I);
122 bool SelectBranch(const Instruction *I);
123 bool SelectIndirectBr(const Instruction *I);
124 bool SelectFPExt(const Instruction *I);
125 bool SelectFPTrunc(const Instruction *I);
126 bool SelectIToFP(const Instruction *I, bool IsSigned);
127 bool SelectFPToI(const Instruction *I, bool IsSigned);
128 bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
129 bool SelectRet(const Instruction *I);
130 bool SelectTrunc(const Instruction *I);
131 bool SelectIntExt(const Instruction *I);
132
133 // Utility routines.
134 private:
135 bool isTypeLegal(Type *Ty, MVT &VT);
136 bool isLoadTypeLegal(Type *Ty, MVT &VT);
137 bool isValueAvailable(const Value *V) const;
138 bool isVSFRCRegClass(const TargetRegisterClass *RC) const {
139 return RC->getID() == PPC::VSFRCRegClassID;
140 }
141 bool isVSSRCRegClass(const TargetRegisterClass *RC) const {
142 return RC->getID() == PPC::VSSRCRegClassID;
143 }
144 Register copyRegToRegClass(const TargetRegisterClass *ToRC, Register SrcReg,
145 unsigned Flag = 0, unsigned SubReg = 0) {
146 Register TmpReg = createResultReg(RC: ToRC);
147 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
148 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: TmpReg).addReg(RegNo: SrcReg, flags: Flag, SubReg);
149 return TmpReg;
150 }
151 bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt,
152 Register DestReg, const PPC::Predicate Pred);
153 bool PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
154 const TargetRegisterClass *RC, bool IsZExt = true,
155 unsigned FP64LoadOpc = PPC::LFD);
156 bool PPCEmitStore(MVT VT, Register SrcReg, Address &Addr);
157 bool PPCComputeAddress(const Value *Obj, Address &Addr);
158 void PPCSimplifyAddress(Address &Addr, bool &UseOffset, Register &IndexReg);
159 bool PPCEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, Register DestReg,
160 bool IsZExt);
161 Register PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
162 Register PPCMaterializeGV(const GlobalValue *GV, MVT VT);
163 Register PPCMaterializeInt(const ConstantInt *CI, MVT VT,
164 bool UseSExt = true);
165 Register PPCMaterialize32BitInt(int64_t Imm, const TargetRegisterClass *RC);
166 Register PPCMaterialize64BitInt(int64_t Imm, const TargetRegisterClass *RC);
167 Register PPCMoveToIntReg(const Instruction *I, MVT VT, Register SrcReg,
168 bool IsSigned);
169 Register PPCMoveToFPReg(MVT VT, Register SrcReg, bool IsSigned);
170
171 // Call handling routines.
172 private:
173 bool processCallArgs(SmallVectorImpl<Value *> &Args,
174 SmallVectorImpl<Register> &ArgRegs,
175 SmallVectorImpl<MVT> &ArgVTs,
176 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
177 SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC,
178 unsigned &NumBytes, bool IsVarArg);
179 bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
180
181 private:
182 #include "PPCGenFastISel.inc"
183
184};
185
186} // end anonymous namespace
187
188static std::optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
189 switch (Pred) {
190 // These are not representable with any single compare.
191 case CmpInst::FCMP_FALSE:
192 case CmpInst::FCMP_TRUE:
193 // Major concern about the following 6 cases is NaN result. The comparison
194 // result consists of 4 bits, indicating lt, eq, gt and un (unordered),
195 // only one of which will be set. The result is generated by fcmpu
196 // instruction. However, bc instruction only inspects one of the first 3
197 // bits, so when un is set, bc instruction may jump to an undesired
198 // place.
199 //
200 // More specifically, if we expect an unordered comparison and un is set, we
201 // expect to always go to true branch; in such case UEQ, UGT and ULT still
202 // give false, which are undesired; but UNE, UGE, ULE happen to give true,
203 // since they are tested by inspecting !eq, !lt, !gt, respectively.
204 //
205 // Similarly, for ordered comparison, when un is set, we always expect the
206 // result to be false. In such case OGT, OLT and OEQ is good, since they are
207 // actually testing GT, LT, and EQ respectively, which are false. OGE, OLE
208 // and ONE are tested through !lt, !gt and !eq, and these are true.
209 case CmpInst::FCMP_UEQ:
210 case CmpInst::FCMP_UGT:
211 case CmpInst::FCMP_ULT:
212 case CmpInst::FCMP_OGE:
213 case CmpInst::FCMP_OLE:
214 case CmpInst::FCMP_ONE:
215 default:
216 return std::nullopt;
217
218 case CmpInst::FCMP_OEQ:
219 case CmpInst::ICMP_EQ:
220 return PPC::PRED_EQ;
221
222 case CmpInst::FCMP_OGT:
223 case CmpInst::ICMP_UGT:
224 case CmpInst::ICMP_SGT:
225 return PPC::PRED_GT;
226
227 case CmpInst::FCMP_UGE:
228 case CmpInst::ICMP_UGE:
229 case CmpInst::ICMP_SGE:
230 return PPC::PRED_GE;
231
232 case CmpInst::FCMP_OLT:
233 case CmpInst::ICMP_ULT:
234 case CmpInst::ICMP_SLT:
235 return PPC::PRED_LT;
236
237 case CmpInst::FCMP_ULE:
238 case CmpInst::ICMP_ULE:
239 case CmpInst::ICMP_SLE:
240 return PPC::PRED_LE;
241
242 case CmpInst::FCMP_UNE:
243 case CmpInst::ICMP_NE:
244 return PPC::PRED_NE;
245
246 case CmpInst::FCMP_ORD:
247 return PPC::PRED_NU;
248
249 case CmpInst::FCMP_UNO:
250 return PPC::PRED_UN;
251 }
252}
253
254// Determine whether the type Ty is simple enough to be handled by
255// fast-isel, and return its equivalent machine type in VT.
256// FIXME: Copied directly from ARM -- factor into base class?
257bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
258 EVT Evt = TLI.getValueType(DL, Ty, AllowUnknown: true);
259
260 // Only handle simple types.
261 if (Evt == MVT::Other || !Evt.isSimple()) return false;
262 VT = Evt.getSimpleVT();
263
264 // Handle all legal types, i.e. a register that will directly hold this
265 // value.
266 return TLI.isTypeLegal(VT);
267}
268
269// Determine whether the type Ty is simple enough to be handled by
270// fast-isel as a load target, and return its equivalent machine type in VT.
271bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
272 if (isTypeLegal(Ty, VT)) return true;
273
274 // If this is a type than can be sign or zero-extended to a basic operation
275 // go ahead and accept it now.
276 if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
277 return true;
278 }
279
280 return false;
281}
282
283bool PPCFastISel::isValueAvailable(const Value *V) const {
284 if (!isa<Instruction>(Val: V))
285 return true;
286
287 const auto *I = cast<Instruction>(Val: V);
288 return FuncInfo.getMBB(BB: I->getParent()) == FuncInfo.MBB;
289}
290
291// Given a value Obj, create an Address object Addr that represents its
292// address. Return false if we can't handle it.
293bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
294 const User *U = nullptr;
295 unsigned Opcode = Instruction::UserOp1;
296 if (const Instruction *I = dyn_cast<Instruction>(Val: Obj)) {
297 // Don't walk into other basic blocks unless the object is an alloca from
298 // another block, otherwise it may not have a virtual register assigned.
299 if (FuncInfo.StaticAllocaMap.count(Val: static_cast<const AllocaInst *>(Obj)) ||
300 FuncInfo.getMBB(BB: I->getParent()) == FuncInfo.MBB) {
301 Opcode = I->getOpcode();
302 U = I;
303 }
304 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Val: Obj)) {
305 Opcode = C->getOpcode();
306 U = C;
307 }
308
309 switch (Opcode) {
310 default:
311 break;
312 case Instruction::BitCast:
313 // Look through bitcasts.
314 return PPCComputeAddress(Obj: U->getOperand(i: 0), Addr);
315 case Instruction::IntToPtr:
316 // Look past no-op inttoptrs.
317 if (TLI.getValueType(DL, Ty: U->getOperand(i: 0)->getType()) ==
318 TLI.getPointerTy(DL))
319 return PPCComputeAddress(Obj: U->getOperand(i: 0), Addr);
320 break;
321 case Instruction::PtrToInt:
322 // Look past no-op ptrtoints.
323 if (TLI.getValueType(DL, Ty: U->getType()) == TLI.getPointerTy(DL))
324 return PPCComputeAddress(Obj: U->getOperand(i: 0), Addr);
325 break;
326 case Instruction::GetElementPtr: {
327 Address SavedAddr = Addr;
328 int64_t TmpOffset = Addr.Offset;
329
330 // Iterate through the GEP folding the constants into offsets where
331 // we can.
332 gep_type_iterator GTI = gep_type_begin(GEP: U);
333 for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
334 II != IE; ++II, ++GTI) {
335 const Value *Op = *II;
336 if (StructType *STy = GTI.getStructTypeOrNull()) {
337 const StructLayout *SL = DL.getStructLayout(Ty: STy);
338 unsigned Idx = cast<ConstantInt>(Val: Op)->getZExtValue();
339 TmpOffset += SL->getElementOffset(Idx);
340 } else {
341 uint64_t S = GTI.getSequentialElementStride(DL);
342 for (;;) {
343 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Op)) {
344 // Constant-offset addressing.
345 TmpOffset += CI->getSExtValue() * S;
346 break;
347 }
348 if (canFoldAddIntoGEP(GEP: U, Add: Op)) {
349 // A compatible add with a constant operand. Fold the constant.
350 ConstantInt *CI =
351 cast<ConstantInt>(Val: cast<AddOperator>(Val: Op)->getOperand(i_nocapture: 1));
352 TmpOffset += CI->getSExtValue() * S;
353 // Iterate on the other operand.
354 Op = cast<AddOperator>(Val: Op)->getOperand(i_nocapture: 0);
355 continue;
356 }
357 // Unsupported
358 goto unsupported_gep;
359 }
360 }
361 }
362
363 // Try to grab the base operand now.
364 Addr.Offset = TmpOffset;
365 if (PPCComputeAddress(Obj: U->getOperand(i: 0), Addr)) return true;
366
367 // We failed, restore everything and try the other options.
368 Addr = SavedAddr;
369
370 unsupported_gep:
371 break;
372 }
373 case Instruction::Alloca: {
374 const AllocaInst *AI = cast<AllocaInst>(Val: Obj);
375 DenseMap<const AllocaInst*, int>::iterator SI =
376 FuncInfo.StaticAllocaMap.find(Val: AI);
377 if (SI != FuncInfo.StaticAllocaMap.end()) {
378 Addr.BaseType = Address::FrameIndexBase;
379 Addr.Base.FI = SI->second;
380 return true;
381 }
382 break;
383 }
384 }
385
386 // FIXME: References to parameters fall through to the behavior
387 // below. They should be able to reference a frame index since
388 // they are stored to the stack, so we can get "ld rx, offset(r1)"
389 // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
390 // just contain the parameter. Try to handle this with a FI.
391
392 // Try to get this in a register if nothing else has worked.
393 if (Addr.Base.Reg == 0)
394 Addr.Base.Reg = getRegForValue(V: Obj);
395
396 // Prevent assignment of base register to X0, which is inappropriate
397 // for loads and stores alike.
398 if (Addr.Base.Reg != 0)
399 MRI.setRegClass(Reg: Addr.Base.Reg, RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
400
401 return Addr.Base.Reg != 0;
402}
403
404// Fix up some addresses that can't be used directly. For example, if
405// an offset won't fit in an instruction field, we may need to move it
406// into an index register.
407void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
408 Register &IndexReg) {
409
410 // Check whether the offset fits in the instruction field.
411 if (!isInt<16>(x: Addr.Offset))
412 UseOffset = false;
413
414 // If this is a stack pointer and the offset needs to be simplified then
415 // put the alloca address into a register, set the base type back to
416 // register and continue. This should almost never happen.
417 if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
418 Register ResultReg = createResultReg(RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
419 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDI8),
420 DestReg: ResultReg).addFrameIndex(Idx: Addr.Base.FI).addImm(Val: 0);
421 Addr.Base.Reg = ResultReg;
422 Addr.BaseType = Address::RegBase;
423 }
424
425 if (!UseOffset) {
426 IntegerType *OffsetTy = Type::getInt64Ty(C&: *Context);
427 const ConstantInt *Offset = ConstantInt::getSigned(Ty: OffsetTy, V: Addr.Offset);
428 IndexReg = PPCMaterializeInt(CI: Offset, VT: MVT::i64);
429 assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
430 }
431}
432
433// Emit a load instruction if possible, returning true if we succeeded,
434// otherwise false. See commentary below for how the register class of
435// the load is determined.
436bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
437 const TargetRegisterClass *RC,
438 bool IsZExt, unsigned FP64LoadOpc) {
439 unsigned Opc;
440 bool UseOffset = true;
441 bool HasSPE = Subtarget->hasSPE();
442
443 // If ResultReg is given, it determines the register class of the load.
444 // Otherwise, RC is the register class to use. If the result of the
445 // load isn't anticipated in this block, both may be zero, in which
446 // case we must make a conservative guess. In particular, don't assign
447 // R0 or X0 to the result register, as the result may be used in a load,
448 // store, add-immediate, or isel that won't permit this. (Though
449 // perhaps the spill and reload of live-exit values would handle this?)
450 const TargetRegisterClass *UseRC =
451 (ResultReg ? MRI.getRegClass(Reg: ResultReg) :
452 (RC ? RC :
453 (VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) :
454 (VT == MVT::f32 ? (HasSPE ? &PPC::GPRCRegClass : &PPC::F4RCRegClass) :
455 (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
456 &PPC::GPRC_and_GPRC_NOR0RegClass)))));
457
458 bool Is32BitInt = UseRC->hasSuperClassEq(RC: &PPC::GPRCRegClass);
459
460 switch (VT.SimpleTy) {
461 default: // e.g., vector types not handled
462 return false;
463 case MVT::i8:
464 Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
465 break;
466 case MVT::i16:
467 Opc = (IsZExt ? (Is32BitInt ? PPC::LHZ : PPC::LHZ8)
468 : (Is32BitInt ? PPC::LHA : PPC::LHA8));
469 break;
470 case MVT::i32:
471 Opc = (IsZExt ? (Is32BitInt ? PPC::LWZ : PPC::LWZ8)
472 : (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
473 if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
474 UseOffset = false;
475 break;
476 case MVT::i64:
477 Opc = PPC::LD;
478 assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
479 "64-bit load with 32-bit target??");
480 UseOffset = ((Addr.Offset & 3) == 0);
481 break;
482 case MVT::f32:
483 Opc = Subtarget->hasSPE() ? PPC::SPELWZ : PPC::LFS;
484 break;
485 case MVT::f64:
486 Opc = FP64LoadOpc;
487 break;
488 }
489
490 // If necessary, materialize the offset into a register and use
491 // the indexed form. Also handle stack pointers with special needs.
492 Register IndexReg;
493 PPCSimplifyAddress(Addr, UseOffset, IndexReg);
494
495 // If this is a potential VSX load with an offset of 0, a VSX indexed load can
496 // be used.
497 bool IsVSSRC = isVSSRCRegClass(RC: UseRC);
498 bool IsVSFRC = isVSFRCRegClass(RC: UseRC);
499 bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
500 bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD;
501 if ((Is32VSXLoad || Is64VSXLoad) &&
502 (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
503 (Addr.Offset == 0)) {
504 UseOffset = false;
505 }
506
507 if (!ResultReg)
508 ResultReg = createResultReg(RC: UseRC);
509
510 // Note: If we still have a frame index here, we know the offset is
511 // in range, as otherwise PPCSimplifyAddress would have converted it
512 // into a RegBase.
513 if (Addr.BaseType == Address::FrameIndexBase) {
514 // VSX only provides an indexed load.
515 if (Is32VSXLoad || Is64VSXLoad) return false;
516
517 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
518 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *FuncInfo.MF, FI: Addr.Base.FI,
519 Offset: Addr.Offset),
520 F: MachineMemOperand::MOLoad, Size: MFI.getObjectSize(ObjectIdx: Addr.Base.FI),
521 BaseAlignment: MFI.getObjectAlign(ObjectIdx: Addr.Base.FI));
522
523 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
524 .addImm(Val: Addr.Offset).addFrameIndex(Idx: Addr.Base.FI).addMemOperand(MMO);
525
526 // Base reg with offset in range.
527 } else if (UseOffset) {
528 // VSX only provides an indexed load.
529 if (Is32VSXLoad || Is64VSXLoad) return false;
530
531 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
532 .addImm(Val: Addr.Offset).addReg(RegNo: Addr.Base.Reg);
533
534 // Indexed form.
535 } else {
536 // Get the RR opcode corresponding to the RI one. FIXME: It would be
537 // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
538 // is hard to get at.
539 switch (Opc) {
540 default: llvm_unreachable("Unexpected opcode!");
541 case PPC::LBZ: Opc = PPC::LBZX; break;
542 case PPC::LBZ8: Opc = PPC::LBZX8; break;
543 case PPC::LHZ: Opc = PPC::LHZX; break;
544 case PPC::LHZ8: Opc = PPC::LHZX8; break;
545 case PPC::LHA: Opc = PPC::LHAX; break;
546 case PPC::LHA8: Opc = PPC::LHAX8; break;
547 case PPC::LWZ: Opc = PPC::LWZX; break;
548 case PPC::LWZ8: Opc = PPC::LWZX8; break;
549 case PPC::LWA: Opc = PPC::LWAX; break;
550 case PPC::LWA_32: Opc = PPC::LWAX_32; break;
551 case PPC::LD: Opc = PPC::LDX; break;
552 case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
553 case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
554 case PPC::EVLDD: Opc = PPC::EVLDDX; break;
555 case PPC::SPELWZ: Opc = PPC::SPELWZX; break;
556 }
557
558 auto MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc),
559 DestReg: ResultReg);
560
561 // If we have an index register defined we use it in the store inst,
562 // otherwise we use X0 as base as it makes the vector instructions to
563 // use zero in the computation of the effective address regardless the
564 // content of the register.
565 if (IndexReg)
566 MIB.addReg(RegNo: Addr.Base.Reg).addReg(RegNo: IndexReg);
567 else
568 MIB.addReg(RegNo: PPC::ZERO8).addReg(RegNo: Addr.Base.Reg);
569 }
570
571 return true;
572}
573
574// Attempt to fast-select a load instruction.
575bool PPCFastISel::SelectLoad(const Instruction *I) {
576 // FIXME: No atomic loads are supported.
577 if (cast<LoadInst>(Val: I)->isAtomic())
578 return false;
579
580 // Verify we have a legal type before going any further.
581 MVT VT;
582 if (!isLoadTypeLegal(Ty: I->getType(), VT))
583 return false;
584
585 // See if we can handle this address.
586 Address Addr;
587 if (!PPCComputeAddress(Obj: I->getOperand(i: 0), Addr))
588 return false;
589
590 // Look at the currently assigned register for this instruction
591 // to determine the required register class. This is necessary
592 // to constrain RA from using R0/X0 when this is not legal.
593 Register AssignedReg = FuncInfo.ValueMap[I];
594 const TargetRegisterClass *RC =
595 AssignedReg ? MRI.getRegClass(Reg: AssignedReg) : nullptr;
596
597 Register ResultReg = 0;
598 if (!PPCEmitLoad(VT, ResultReg, Addr, RC, IsZExt: true,
599 FP64LoadOpc: Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
600 return false;
601 updateValueMap(I, Reg: ResultReg);
602 return true;
603}
604
605// Emit a store instruction to store SrcReg at Addr.
606bool PPCFastISel::PPCEmitStore(MVT VT, Register SrcReg, Address &Addr) {
607 assert(SrcReg && "Nothing to store!");
608 unsigned Opc;
609 bool UseOffset = true;
610
611 const TargetRegisterClass *RC = MRI.getRegClass(Reg: SrcReg);
612 bool Is32BitInt = RC->hasSuperClassEq(RC: &PPC::GPRCRegClass);
613
614 switch (VT.SimpleTy) {
615 default: // e.g., vector types not handled
616 return false;
617 case MVT::i8:
618 Opc = Is32BitInt ? PPC::STB : PPC::STB8;
619 break;
620 case MVT::i16:
621 Opc = Is32BitInt ? PPC::STH : PPC::STH8;
622 break;
623 case MVT::i32:
624 assert(Is32BitInt && "Not GPRC for i32??");
625 Opc = PPC::STW;
626 break;
627 case MVT::i64:
628 Opc = PPC::STD;
629 UseOffset = ((Addr.Offset & 3) == 0);
630 break;
631 case MVT::f32:
632 Opc = Subtarget->hasSPE() ? PPC::SPESTW : PPC::STFS;
633 break;
634 case MVT::f64:
635 Opc = Subtarget->hasSPE() ? PPC::EVSTDD : PPC::STFD;
636 break;
637 }
638
639 // If necessary, materialize the offset into a register and use
640 // the indexed form. Also handle stack pointers with special needs.
641 Register IndexReg;
642 PPCSimplifyAddress(Addr, UseOffset, IndexReg);
643
644 // If this is a potential VSX store with an offset of 0, a VSX indexed store
645 // can be used.
646 bool IsVSSRC = isVSSRCRegClass(RC);
647 bool IsVSFRC = isVSFRCRegClass(RC);
648 bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
649 bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
650 if ((Is32VSXStore || Is64VSXStore) &&
651 (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
652 (Addr.Offset == 0)) {
653 UseOffset = false;
654 }
655
656 // Note: If we still have a frame index here, we know the offset is
657 // in range, as otherwise PPCSimplifyAddress would have converted it
658 // into a RegBase.
659 if (Addr.BaseType == Address::FrameIndexBase) {
660 // VSX only provides an indexed store.
661 if (Is32VSXStore || Is64VSXStore) return false;
662
663 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
664 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *FuncInfo.MF, FI: Addr.Base.FI,
665 Offset: Addr.Offset),
666 F: MachineMemOperand::MOStore, Size: MFI.getObjectSize(ObjectIdx: Addr.Base.FI),
667 BaseAlignment: MFI.getObjectAlign(ObjectIdx: Addr.Base.FI));
668
669 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
670 .addReg(RegNo: SrcReg)
671 .addImm(Val: Addr.Offset)
672 .addFrameIndex(Idx: Addr.Base.FI)
673 .addMemOperand(MMO);
674
675 // Base reg with offset in range.
676 } else if (UseOffset) {
677 // VSX only provides an indexed store.
678 if (Is32VSXStore || Is64VSXStore)
679 return false;
680
681 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
682 .addReg(RegNo: SrcReg).addImm(Val: Addr.Offset).addReg(RegNo: Addr.Base.Reg);
683
684 // Indexed form.
685 } else {
686 // Get the RR opcode corresponding to the RI one. FIXME: It would be
687 // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
688 // is hard to get at.
689 switch (Opc) {
690 default: llvm_unreachable("Unexpected opcode!");
691 case PPC::STB: Opc = PPC::STBX; break;
692 case PPC::STH : Opc = PPC::STHX; break;
693 case PPC::STW : Opc = PPC::STWX; break;
694 case PPC::STB8: Opc = PPC::STBX8; break;
695 case PPC::STH8: Opc = PPC::STHX8; break;
696 case PPC::STW8: Opc = PPC::STWX8; break;
697 case PPC::STD: Opc = PPC::STDX; break;
698 case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
699 case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
700 case PPC::EVSTDD: Opc = PPC::EVSTDDX; break;
701 case PPC::SPESTW: Opc = PPC::SPESTWX; break;
702 }
703
704 auto MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
705 .addReg(RegNo: SrcReg);
706
707 // If we have an index register defined we use it in the store inst,
708 // otherwise we use X0 as base as it makes the vector instructions to
709 // use zero in the computation of the effective address regardless the
710 // content of the register.
711 if (IndexReg)
712 MIB.addReg(RegNo: Addr.Base.Reg).addReg(RegNo: IndexReg);
713 else
714 MIB.addReg(RegNo: PPC::ZERO8).addReg(RegNo: Addr.Base.Reg);
715 }
716
717 return true;
718}
719
720// Attempt to fast-select a store instruction.
721bool PPCFastISel::SelectStore(const Instruction *I) {
722 Value *Op0 = I->getOperand(i: 0);
723 Register SrcReg;
724
725 // FIXME: No atomics loads are supported.
726 if (cast<StoreInst>(Val: I)->isAtomic())
727 return false;
728
729 // Verify we have a legal type before going any further.
730 MVT VT;
731 if (!isLoadTypeLegal(Ty: Op0->getType(), VT))
732 return false;
733
734 // Get the value to be stored into a register.
735 SrcReg = getRegForValue(V: Op0);
736 if (!SrcReg)
737 return false;
738
739 // See if we can handle this address.
740 Address Addr;
741 if (!PPCComputeAddress(Obj: I->getOperand(i: 1), Addr))
742 return false;
743
744 if (!PPCEmitStore(VT, SrcReg, Addr))
745 return false;
746
747 return true;
748}
749
750// Attempt to fast-select a branch instruction.
751bool PPCFastISel::SelectBranch(const Instruction *I) {
752 const BranchInst *BI = cast<BranchInst>(Val: I);
753 MachineBasicBlock *BrBB = FuncInfo.MBB;
754 MachineBasicBlock *TBB = FuncInfo.getMBB(BB: BI->getSuccessor(i: 0));
755 MachineBasicBlock *FBB = FuncInfo.getMBB(BB: BI->getSuccessor(i: 1));
756
757 // For now, just try the simplest case where it's fed by a compare.
758 if (const CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition())) {
759 if (isValueAvailable(V: CI)) {
760 std::optional<PPC::Predicate> OptPPCPred =
761 getComparePred(Pred: CI->getPredicate());
762 if (!OptPPCPred)
763 return false;
764
765 PPC::Predicate PPCPred = *OptPPCPred;
766
767 // Take advantage of fall-through opportunities.
768 if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
769 std::swap(a&: TBB, b&: FBB);
770 PPCPred = PPC::InvertPredicate(Opcode: PPCPred);
771 }
772
773 Register CondReg = createResultReg(RC: &PPC::CRRCRegClass);
774
775 if (!PPCEmitCmp(Src1Value: CI->getOperand(i_nocapture: 0), Src2Value: CI->getOperand(i_nocapture: 1), isZExt: CI->isUnsigned(),
776 DestReg: CondReg, Pred: PPCPred))
777 return false;
778
779 BuildMI(BB&: *BrBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::BCC))
780 .addImm(Val: Subtarget->hasSPE() ? PPC::PRED_SPE : PPCPred)
781 .addReg(RegNo: CondReg)
782 .addMBB(MBB: TBB);
783 finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
784 return true;
785 }
786 } else if (const ConstantInt *CI =
787 dyn_cast<ConstantInt>(Val: BI->getCondition())) {
788 uint64_t Imm = CI->getZExtValue();
789 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
790 fastEmitBranch(MSucc: Target, DbgLoc: MIMD.getDL());
791 return true;
792 }
793
794 // FIXME: ARM looks for a case where the block containing the compare
795 // has been split from the block containing the branch. If this happens,
796 // there is a vreg available containing the result of the compare. I'm
797 // not sure we can do much, as we've lost the predicate information with
798 // the compare instruction -- we have a 4-bit CR but don't know which bit
799 // to test here.
800 return false;
801}
802
803// Attempt to emit a compare of the two source values. Signed and unsigned
804// comparisons are supported. Return false if we can't handle it.
805bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
806 bool IsZExt, Register DestReg,
807 const PPC::Predicate Pred) {
808 Type *Ty = SrcValue1->getType();
809 EVT SrcEVT = TLI.getValueType(DL, Ty, AllowUnknown: true);
810 if (!SrcEVT.isSimple())
811 return false;
812 MVT SrcVT = SrcEVT.getSimpleVT();
813
814 if (SrcVT == MVT::i1 && Subtarget->useCRBits())
815 return false;
816
817 // See if operand 2 is an immediate encodeable in the compare.
818 // FIXME: Operands are not in canonical order at -O0, so an immediate
819 // operand in position 1 is a lost opportunity for now. We are
820 // similar to ARM in this regard.
821 int64_t Imm = 0;
822 bool UseImm = false;
823 const bool HasSPE = Subtarget->hasSPE();
824
825 // Only 16-bit integer constants can be represented in compares for
826 // PowerPC. Others will be materialized into a register.
827 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Val: SrcValue2)) {
828 if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
829 SrcVT == MVT::i8 || SrcVT == MVT::i1) {
830 const APInt &CIVal = ConstInt->getValue();
831 Imm = (IsZExt) ? (int64_t)CIVal.getZExtValue() :
832 (int64_t)CIVal.getSExtValue();
833 if ((IsZExt && isUInt<16>(x: Imm)) || (!IsZExt && isInt<16>(x: Imm)))
834 UseImm = true;
835 }
836 }
837
838 Register SrcReg1 = getRegForValue(V: SrcValue1);
839 if (!SrcReg1)
840 return false;
841
842 Register SrcReg2;
843 if (!UseImm) {
844 SrcReg2 = getRegForValue(V: SrcValue2);
845 if (!SrcReg2)
846 return false;
847 }
848
849 unsigned CmpOpc;
850 bool NeedsExt = false;
851
852 auto RC1 = MRI.getRegClass(Reg: SrcReg1);
853 auto RC2 = SrcReg2 != 0 ? MRI.getRegClass(Reg: SrcReg2) : nullptr;
854
855 switch (SrcVT.SimpleTy) {
856 default: return false;
857 case MVT::f32:
858 if (HasSPE) {
859 switch (Pred) {
860 default: return false;
861 case PPC::PRED_EQ:
862 CmpOpc = PPC::EFSCMPEQ;
863 break;
864 case PPC::PRED_LT:
865 CmpOpc = PPC::EFSCMPLT;
866 break;
867 case PPC::PRED_GT:
868 CmpOpc = PPC::EFSCMPGT;
869 break;
870 }
871 } else {
872 CmpOpc = PPC::FCMPUS;
873 if (isVSSRCRegClass(RC: RC1))
874 SrcReg1 = copyRegToRegClass(ToRC: &PPC::F4RCRegClass, SrcReg: SrcReg1);
875 if (RC2 && isVSSRCRegClass(RC: RC2))
876 SrcReg2 = copyRegToRegClass(ToRC: &PPC::F4RCRegClass, SrcReg: SrcReg2);
877 }
878 break;
879 case MVT::f64:
880 if (HasSPE) {
881 switch (Pred) {
882 default: return false;
883 case PPC::PRED_EQ:
884 CmpOpc = PPC::EFDCMPEQ;
885 break;
886 case PPC::PRED_LT:
887 CmpOpc = PPC::EFDCMPLT;
888 break;
889 case PPC::PRED_GT:
890 CmpOpc = PPC::EFDCMPGT;
891 break;
892 }
893 } else if (isVSFRCRegClass(RC: RC1) || (RC2 && isVSFRCRegClass(RC: RC2))) {
894 CmpOpc = PPC::XSCMPUDP;
895 } else {
896 CmpOpc = PPC::FCMPUD;
897 }
898 break;
899 case MVT::i1:
900 case MVT::i8:
901 case MVT::i16:
902 NeedsExt = true;
903 [[fallthrough]];
904 case MVT::i32:
905 if (!UseImm)
906 CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
907 else
908 CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
909 break;
910 case MVT::i64:
911 if (!UseImm)
912 CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
913 else
914 CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
915 break;
916 }
917
918 if (NeedsExt) {
919 Register ExtReg = createResultReg(RC: &PPC::GPRCRegClass);
920 if (!PPCEmitIntExt(SrcVT, SrcReg: SrcReg1, DestVT: MVT::i32, DestReg: ExtReg, IsZExt))
921 return false;
922 SrcReg1 = ExtReg;
923
924 if (!UseImm) {
925 Register ExtReg = createResultReg(RC: &PPC::GPRCRegClass);
926 if (!PPCEmitIntExt(SrcVT, SrcReg: SrcReg2, DestVT: MVT::i32, DestReg: ExtReg, IsZExt))
927 return false;
928 SrcReg2 = ExtReg;
929 }
930 }
931
932 if (!UseImm)
933 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: CmpOpc), DestReg)
934 .addReg(RegNo: SrcReg1).addReg(RegNo: SrcReg2);
935 else
936 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: CmpOpc), DestReg)
937 .addReg(RegNo: SrcReg1).addImm(Val: Imm);
938
939 return true;
940}
941
942// Attempt to fast-select a floating-point extend instruction.
943bool PPCFastISel::SelectFPExt(const Instruction *I) {
944 Value *Src = I->getOperand(i: 0);
945 EVT SrcVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
946 EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
947
948 if (SrcVT != MVT::f32 || DestVT != MVT::f64)
949 return false;
950
951 Register SrcReg = getRegForValue(V: Src);
952 if (!SrcReg)
953 return false;
954
955 // No code is generated for a FP extend.
956 updateValueMap(I, Reg: SrcReg);
957 return true;
958}
959
960// Attempt to fast-select a floating-point truncate instruction.
961bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
962 Value *Src = I->getOperand(i: 0);
963 EVT SrcVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
964 EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
965
966 if (SrcVT != MVT::f64 || DestVT != MVT::f32)
967 return false;
968
969 Register SrcReg = getRegForValue(V: Src);
970 if (!SrcReg)
971 return false;
972
973 // Round the result to single precision.
974 Register DestReg;
975 auto RC = MRI.getRegClass(Reg: SrcReg);
976 if (Subtarget->hasSPE()) {
977 DestReg = createResultReg(RC: &PPC::GPRCRegClass);
978 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::EFSCFD),
979 DestReg)
980 .addReg(RegNo: SrcReg);
981 } else if (Subtarget->hasP8Vector() && isVSFRCRegClass(RC)) {
982 DestReg = createResultReg(RC: &PPC::VSSRCRegClass);
983 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::XSRSP),
984 DestReg)
985 .addReg(RegNo: SrcReg);
986 } else {
987 SrcReg = copyRegToRegClass(ToRC: &PPC::F8RCRegClass, SrcReg);
988 DestReg = createResultReg(RC: &PPC::F4RCRegClass);
989 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
990 MCID: TII.get(Opcode: PPC::FRSP), DestReg)
991 .addReg(RegNo: SrcReg);
992 }
993
994 updateValueMap(I, Reg: DestReg);
995 return true;
996}
997
998// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
999// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1000// those should be used instead of moving via a stack slot when the
1001// subtarget permits.
1002// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
1003// stack slot and 4-byte store/load sequence. Or just sext the 4-byte
1004// case to 8 bytes which produces tighter code but wastes stack space.
1005Register PPCFastISel::PPCMoveToFPReg(MVT SrcVT, Register SrcReg,
1006 bool IsSigned) {
1007
1008 // If necessary, extend 32-bit int to 64-bit.
1009 if (SrcVT == MVT::i32) {
1010 Register TmpReg = createResultReg(RC: &PPC::G8RCRegClass);
1011 if (!PPCEmitIntExt(SrcVT: MVT::i32, SrcReg, DestVT: MVT::i64, DestReg: TmpReg, IsZExt: !IsSigned))
1012 return Register();
1013 SrcReg = TmpReg;
1014 }
1015
1016 // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1017 Address Addr;
1018 Addr.BaseType = Address::FrameIndexBase;
1019 Addr.Base.FI = MFI.CreateStackObject(Size: 8, Alignment: Align(8), isSpillSlot: false);
1020
1021 // Store the value from the GPR.
1022 if (!PPCEmitStore(VT: MVT::i64, SrcReg, Addr))
1023 return Register();
1024
1025 // Load the integer value into an FPR. The kind of load used depends
1026 // on a number of conditions.
1027 unsigned LoadOpc = PPC::LFD;
1028
1029 if (SrcVT == MVT::i32) {
1030 if (!IsSigned) {
1031 LoadOpc = PPC::LFIWZX;
1032 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1033 } else if (Subtarget->hasLFIWAX()) {
1034 LoadOpc = PPC::LFIWAX;
1035 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1036 }
1037 }
1038
1039 const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1040 Register ResultReg;
1041 if (!PPCEmitLoad(VT: MVT::f64, ResultReg, Addr, RC, IsZExt: !IsSigned, FP64LoadOpc: LoadOpc))
1042 return Register();
1043
1044 return ResultReg;
1045}
1046
1047// Attempt to fast-select an integer-to-floating-point conversion.
1048// FIXME: Once fast-isel has better support for VSX, conversions using
1049// direct moves should be implemented.
1050bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
1051 MVT DstVT;
1052 Type *DstTy = I->getType();
1053 if (!isTypeLegal(Ty: DstTy, VT&: DstVT))
1054 return false;
1055
1056 if (DstVT != MVT::f32 && DstVT != MVT::f64)
1057 return false;
1058
1059 Value *Src = I->getOperand(i: 0);
1060 EVT SrcEVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
1061 if (!SrcEVT.isSimple())
1062 return false;
1063
1064 MVT SrcVT = SrcEVT.getSimpleVT();
1065
1066 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
1067 SrcVT != MVT::i32 && SrcVT != MVT::i64)
1068 return false;
1069
1070 Register SrcReg = getRegForValue(V: Src);
1071 if (!SrcReg)
1072 return false;
1073
1074 // Shortcut for SPE. Doesn't need to store/load, since it's all in the GPRs
1075 if (Subtarget->hasSPE()) {
1076 unsigned Opc;
1077 if (DstVT == MVT::f32)
1078 Opc = IsSigned ? PPC::EFSCFSI : PPC::EFSCFUI;
1079 else
1080 Opc = IsSigned ? PPC::EFDCFSI : PPC::EFDCFUI;
1081
1082 Register DestReg = createResultReg(RC: &PPC::SPERCRegClass);
1083 // Generate the convert.
1084 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1085 .addReg(RegNo: SrcReg);
1086 updateValueMap(I, Reg: DestReg);
1087 return true;
1088 }
1089
1090 // We can only lower an unsigned convert if we have the newer
1091 // floating-point conversion operations.
1092 if (!IsSigned && !Subtarget->hasFPCVT())
1093 return false;
1094
1095 // FIXME: For now we require the newer floating-point conversion operations
1096 // (which are present only on P7 and A2 server models) when converting
1097 // to single-precision float. Otherwise we have to generate a lot of
1098 // fiddly code to avoid double rounding. If necessary, the fiddly code
1099 // can be found in PPCTargetLowering::LowerINT_TO_FP().
1100 if (DstVT == MVT::f32 && !Subtarget->hasFPCVT())
1101 return false;
1102
1103 // Extend the input if necessary.
1104 if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
1105 Register TmpReg = createResultReg(RC: &PPC::G8RCRegClass);
1106 if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT: MVT::i64, DestReg: TmpReg, IsZExt: !IsSigned))
1107 return false;
1108 SrcVT = MVT::i64;
1109 SrcReg = TmpReg;
1110 }
1111
1112 // Move the integer value to an FPR.
1113 Register FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
1114 if (!FPReg)
1115 return false;
1116
1117 // Determine the opcode for the conversion.
1118 const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1119 Register DestReg = createResultReg(RC);
1120 unsigned Opc;
1121
1122 if (DstVT == MVT::f32)
1123 Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
1124 else
1125 Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
1126
1127 // Generate the convert.
1128 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1129 .addReg(RegNo: FPReg);
1130
1131 updateValueMap(I, Reg: DestReg);
1132 return true;
1133}
1134
1135// Move the floating-point value in SrcReg into an integer destination
1136// register, and return the register (or zero if we can't handle it).
1137// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1138// those should be used instead of moving via a stack slot when the
1139// subtarget permits.
1140Register PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
1141 Register SrcReg, bool IsSigned) {
1142 // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1143 // Note that if have STFIWX available, we could use a 4-byte stack
1144 // slot for i32, but this being fast-isel we'll just go with the
1145 // easiest code gen possible.
1146 Address Addr;
1147 Addr.BaseType = Address::FrameIndexBase;
1148 Addr.Base.FI = MFI.CreateStackObject(Size: 8, Alignment: Align(8), isSpillSlot: false);
1149
1150 // Store the value from the FPR.
1151 if (!PPCEmitStore(VT: MVT::f64, SrcReg, Addr))
1152 return Register();
1153
1154 // Reload it into a GPR. If we want an i32 on big endian, modify the
1155 // address to have a 4-byte offset so we load from the right place.
1156 if (VT == MVT::i32)
1157 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1158
1159 // Look at the currently assigned register for this instruction
1160 // to determine the required register class.
1161 Register AssignedReg = FuncInfo.ValueMap[I];
1162 const TargetRegisterClass *RC =
1163 AssignedReg ? MRI.getRegClass(Reg: AssignedReg) : nullptr;
1164
1165 Register ResultReg;
1166 if (!PPCEmitLoad(VT, ResultReg, Addr, RC, IsZExt: !IsSigned))
1167 return Register();
1168
1169 return ResultReg;
1170}
1171
1172// Attempt to fast-select a floating-point-to-integer conversion.
1173// FIXME: Once fast-isel has better support for VSX, conversions using
1174// direct moves should be implemented.
1175bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
1176 MVT DstVT, SrcVT;
1177 Type *DstTy = I->getType();
1178 if (!isTypeLegal(Ty: DstTy, VT&: DstVT))
1179 return false;
1180
1181 if (DstVT != MVT::i32 && DstVT != MVT::i64)
1182 return false;
1183
1184 // If we don't have FCTIDUZ, or SPE, and we need it, punt to SelectionDAG.
1185 if (DstVT == MVT::i64 && !IsSigned && !Subtarget->hasFPCVT() &&
1186 !Subtarget->hasSPE())
1187 return false;
1188
1189 Value *Src = I->getOperand(i: 0);
1190 Type *SrcTy = Src->getType();
1191 if (!isTypeLegal(Ty: SrcTy, VT&: SrcVT))
1192 return false;
1193
1194 if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
1195 return false;
1196
1197 Register SrcReg = getRegForValue(V: Src);
1198 if (!SrcReg)
1199 return false;
1200
1201 // Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a
1202 // meaningless copy to get the register class right.
1203 const TargetRegisterClass *InRC = MRI.getRegClass(Reg: SrcReg);
1204 if (InRC == &PPC::F4RCRegClass)
1205 SrcReg = copyRegToRegClass(ToRC: &PPC::F8RCRegClass, SrcReg);
1206 else if (InRC == &PPC::VSSRCRegClass)
1207 SrcReg = copyRegToRegClass(ToRC: &PPC::VSFRCRegClass, SrcReg);
1208
1209 // Determine the opcode for the conversion, which takes place
1210 // entirely within FPRs or VSRs.
1211 Register DestReg;
1212 unsigned Opc;
1213 auto RC = MRI.getRegClass(Reg: SrcReg);
1214
1215 if (Subtarget->hasSPE()) {
1216 DestReg = createResultReg(RC: &PPC::GPRCRegClass);
1217 if (IsSigned)
1218 Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
1219 else
1220 Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
1221 } else if (isVSFRCRegClass(RC)) {
1222 DestReg = createResultReg(RC: &PPC::VSFRCRegClass);
1223 if (DstVT == MVT::i32)
1224 Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
1225 else
1226 Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
1227 } else {
1228 DestReg = createResultReg(RC: &PPC::F8RCRegClass);
1229 if (DstVT == MVT::i32)
1230 if (IsSigned)
1231 Opc = PPC::FCTIWZ;
1232 else
1233 Opc = Subtarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
1234 else
1235 Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
1236 }
1237
1238 // Generate the convert.
1239 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1240 .addReg(RegNo: SrcReg);
1241
1242 // Now move the integer value from a float register to an integer register.
1243 Register IntReg = Subtarget->hasSPE()
1244 ? DestReg
1245 : PPCMoveToIntReg(I, VT: DstVT, SrcReg: DestReg, IsSigned);
1246
1247 if (!IntReg)
1248 return false;
1249
1250 updateValueMap(I, Reg: IntReg);
1251 return true;
1252}
1253
1254// Attempt to fast-select a binary integer operation that isn't already
1255// handled automatically.
1256bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1257 EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
1258
1259 // We can get here in the case when we have a binary operation on a non-legal
1260 // type and the target independent selector doesn't know how to handle it.
1261 if (DestVT != MVT::i16 && DestVT != MVT::i8)
1262 return false;
1263
1264 // Look at the currently assigned register for this instruction
1265 // to determine the required register class. If there is no register,
1266 // make a conservative choice (don't assign R0).
1267 Register AssignedReg = FuncInfo.ValueMap[I];
1268 const TargetRegisterClass *RC =
1269 (AssignedReg ? MRI.getRegClass(Reg: AssignedReg) :
1270 &PPC::GPRC_and_GPRC_NOR0RegClass);
1271 bool IsGPRC = RC->hasSuperClassEq(RC: &PPC::GPRCRegClass);
1272
1273 unsigned Opc;
1274 switch (ISDOpcode) {
1275 default: return false;
1276 case ISD::ADD:
1277 Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
1278 break;
1279 case ISD::OR:
1280 Opc = IsGPRC ? PPC::OR : PPC::OR8;
1281 break;
1282 case ISD::SUB:
1283 Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
1284 break;
1285 }
1286
1287 Register ResultReg = createResultReg(RC: RC ? RC : &PPC::G8RCRegClass);
1288 Register SrcReg1 = getRegForValue(V: I->getOperand(i: 0));
1289 if (!SrcReg1)
1290 return false;
1291
1292 // Handle case of small immediate operand.
1293 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Val: I->getOperand(i: 1))) {
1294 const APInt &CIVal = ConstInt->getValue();
1295 int Imm = (int)CIVal.getSExtValue();
1296 bool UseImm = true;
1297 if (isInt<16>(x: Imm)) {
1298 switch (Opc) {
1299 default:
1300 llvm_unreachable("Missing case!");
1301 case PPC::ADD4:
1302 Opc = PPC::ADDI;
1303 MRI.setRegClass(Reg: SrcReg1, RC: &PPC::GPRC_and_GPRC_NOR0RegClass);
1304 break;
1305 case PPC::ADD8:
1306 Opc = PPC::ADDI8;
1307 MRI.setRegClass(Reg: SrcReg1, RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
1308 break;
1309 case PPC::OR:
1310 Opc = PPC::ORI;
1311 break;
1312 case PPC::OR8:
1313 Opc = PPC::ORI8;
1314 break;
1315 case PPC::SUBF:
1316 if (Imm == -32768)
1317 UseImm = false;
1318 else {
1319 Opc = PPC::ADDI;
1320 MRI.setRegClass(Reg: SrcReg1, RC: &PPC::GPRC_and_GPRC_NOR0RegClass);
1321 Imm = -Imm;
1322 }
1323 break;
1324 case PPC::SUBF8:
1325 if (Imm == -32768)
1326 UseImm = false;
1327 else {
1328 Opc = PPC::ADDI8;
1329 MRI.setRegClass(Reg: SrcReg1, RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
1330 Imm = -Imm;
1331 }
1332 break;
1333 }
1334
1335 if (UseImm) {
1336 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc),
1337 DestReg: ResultReg)
1338 .addReg(RegNo: SrcReg1)
1339 .addImm(Val: Imm);
1340 updateValueMap(I, Reg: ResultReg);
1341 return true;
1342 }
1343 }
1344 }
1345
1346 // Reg-reg case.
1347 Register SrcReg2 = getRegForValue(V: I->getOperand(i: 1));
1348 if (!SrcReg2)
1349 return false;
1350
1351 // Reverse operands for subtract-from.
1352 if (ISDOpcode == ISD::SUB)
1353 std::swap(a&: SrcReg1, b&: SrcReg2);
1354
1355 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
1356 .addReg(RegNo: SrcReg1).addReg(RegNo: SrcReg2);
1357 updateValueMap(I, Reg: ResultReg);
1358 return true;
1359}
1360
1361// Handle arguments to a call that we're attempting to fast-select.
1362// Return false if the arguments are too complex for us at the moment.
1363bool PPCFastISel::processCallArgs(SmallVectorImpl<Value *> &Args,
1364 SmallVectorImpl<Register> &ArgRegs,
1365 SmallVectorImpl<MVT> &ArgVTs,
1366 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1367 SmallVectorImpl<unsigned> &RegArgs,
1368 CallingConv::ID CC, unsigned &NumBytes,
1369 bool IsVarArg) {
1370 SmallVector<CCValAssign, 16> ArgLocs;
1371 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
1372
1373 // Reserve space for the linkage area on the stack.
1374 unsigned LinkageSize = Subtarget->getFrameLowering()->getLinkageSize();
1375 CCInfo.AllocateStack(Size: LinkageSize, Alignment: Align(8));
1376
1377 CCInfo.AnalyzeCallOperands(ArgVTs, Flags&: ArgFlags, Fn: CC_PPC64_ELF_FIS);
1378
1379 // Bail out if we can't handle any of the arguments.
1380 for (const CCValAssign &VA : ArgLocs) {
1381 MVT ArgVT = ArgVTs[VA.getValNo()];
1382
1383 // Skip vector arguments for now, as well as long double and
1384 // uint128_t, and anything that isn't passed in a register.
1385 if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
1386 !VA.isRegLoc() || VA.needsCustom())
1387 return false;
1388
1389 // Skip bit-converted arguments for now.
1390 if (VA.getLocInfo() == CCValAssign::BCvt)
1391 return false;
1392 }
1393
1394 // Get a count of how many bytes are to be pushed onto the stack.
1395 NumBytes = CCInfo.getStackSize();
1396
1397 // The prolog code of the callee may store up to 8 GPR argument registers to
1398 // the stack, allowing va_start to index over them in memory if its varargs.
1399 // Because we cannot tell if this is needed on the caller side, we have to
1400 // conservatively assume that it is needed. As such, make sure we have at
1401 // least enough stack space for the caller to store the 8 GPRs.
1402 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
1403 NumBytes = std::max(a: NumBytes, b: LinkageSize + 64);
1404
1405 // Issue CALLSEQ_START.
1406 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1407 MCID: TII.get(Opcode: TII.getCallFrameSetupOpcode()))
1408 .addImm(Val: NumBytes).addImm(Val: 0);
1409
1410 // Prepare to assign register arguments. Every argument uses up a
1411 // GPR protocol register even if it's passed in a floating-point
1412 // register (unless we're using the fast calling convention).
1413 unsigned NextGPR = PPC::X3;
1414 unsigned NextFPR = PPC::F1;
1415
1416 // Process arguments.
1417 for (const CCValAssign &VA : ArgLocs) {
1418 Register Arg = ArgRegs[VA.getValNo()];
1419 MVT ArgVT = ArgVTs[VA.getValNo()];
1420
1421 // Handle argument promotion and bitcasts.
1422 switch (VA.getLocInfo()) {
1423 default:
1424 llvm_unreachable("Unknown loc info!");
1425 case CCValAssign::Full:
1426 break;
1427 case CCValAssign::SExt: {
1428 MVT DestVT = VA.getLocVT();
1429 const TargetRegisterClass *RC =
1430 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1431 Register TmpReg = createResultReg(RC);
1432 if (!PPCEmitIntExt(SrcVT: ArgVT, SrcReg: Arg, DestVT, DestReg: TmpReg, /*IsZExt*/false))
1433 llvm_unreachable("Failed to emit a sext!");
1434 ArgVT = DestVT;
1435 Arg = TmpReg;
1436 break;
1437 }
1438 case CCValAssign::AExt:
1439 case CCValAssign::ZExt: {
1440 MVT DestVT = VA.getLocVT();
1441 const TargetRegisterClass *RC =
1442 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1443 Register TmpReg = createResultReg(RC);
1444 if (!PPCEmitIntExt(SrcVT: ArgVT, SrcReg: Arg, DestVT, DestReg: TmpReg, /*IsZExt*/true))
1445 llvm_unreachable("Failed to emit a zext!");
1446 ArgVT = DestVT;
1447 Arg = TmpReg;
1448 break;
1449 }
1450 case CCValAssign::BCvt: {
1451 // FIXME: Not yet handled.
1452 llvm_unreachable("Should have bailed before getting here!");
1453 break;
1454 }
1455 }
1456
1457 // Copy this argument to the appropriate register.
1458 unsigned ArgReg;
1459 if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
1460 ArgReg = NextFPR++;
1461 if (CC != CallingConv::Fast)
1462 ++NextGPR;
1463 } else
1464 ArgReg = NextGPR++;
1465
1466 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1467 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ArgReg).addReg(RegNo: Arg);
1468 RegArgs.push_back(Elt: ArgReg);
1469 }
1470
1471 return true;
1472}
1473
1474// For a call that we've determined we can fast-select, finish the
1475// call sequence and generate a copy to obtain the return value (if any).
1476bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
1477 CallingConv::ID CC = CLI.CallConv;
1478
1479 // Issue CallSEQ_END.
1480 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1481 MCID: TII.get(Opcode: TII.getCallFrameDestroyOpcode()))
1482 .addImm(Val: NumBytes).addImm(Val: 0);
1483
1484 // Next, generate a copy to obtain the return value.
1485 // FIXME: No multi-register return values yet, though I don't foresee
1486 // any real difficulties there.
1487 if (RetVT != MVT::isVoid) {
1488 SmallVector<CCValAssign, 16> RVLocs;
1489 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
1490 CCInfo.AnalyzeCallResult(VT: RetVT, Fn: RetCC_PPC64_ELF_FIS);
1491 CCValAssign &VA = RVLocs[0];
1492 assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
1493 assert(VA.isRegLoc() && "Can only return in registers!");
1494
1495 MVT DestVT = VA.getValVT();
1496 MVT CopyVT = DestVT;
1497
1498 // Ints smaller than a register still arrive in a full 64-bit
1499 // register, so make sure we recognize this.
1500 if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
1501 CopyVT = MVT::i64;
1502
1503 Register SourcePhysReg = VA.getLocReg();
1504 Register ResultReg;
1505
1506 if (RetVT == CopyVT) {
1507 const TargetRegisterClass *CpyRC = TLI.getRegClassFor(VT: CopyVT);
1508 ResultReg = copyRegToRegClass(ToRC: CpyRC, SrcReg: SourcePhysReg);
1509
1510 // If necessary, round the floating result to single precision.
1511 } else if (CopyVT == MVT::f64) {
1512 ResultReg = createResultReg(RC: TLI.getRegClassFor(VT: RetVT));
1513 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::FRSP),
1514 DestReg: ResultReg).addReg(RegNo: SourcePhysReg);
1515
1516 // If only the low half of a general register is needed, generate
1517 // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
1518 // used along the fast-isel path (not lowered), and downstream logic
1519 // also doesn't like a direct subreg copy on a physical reg.)
1520 } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
1521 // Convert physical register from G8RC to GPRC.
1522 SourcePhysReg = (SourcePhysReg - PPC::X0) + PPC::R0;
1523 ResultReg = copyRegToRegClass(ToRC: &PPC::GPRCRegClass, SrcReg: SourcePhysReg);
1524 }
1525
1526 assert(ResultReg && "ResultReg unset!");
1527 CLI.InRegs.push_back(Elt: SourcePhysReg);
1528 CLI.ResultReg = ResultReg;
1529 CLI.NumResultRegs = 1;
1530 }
1531
1532 return true;
1533}
1534
1535bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
1536 CallingConv::ID CC = CLI.CallConv;
1537 bool IsTailCall = CLI.IsTailCall;
1538 bool IsVarArg = CLI.IsVarArg;
1539 const Value *Callee = CLI.Callee;
1540 const MCSymbol *Symbol = CLI.Symbol;
1541
1542 if (!Callee && !Symbol)
1543 return false;
1544
1545 // Allow SelectionDAG isel to handle tail calls and long calls.
1546 if (IsTailCall || Subtarget->useLongCalls())
1547 return false;
1548
1549 // Let SDISel handle vararg functions.
1550 if (IsVarArg)
1551 return false;
1552
1553 // If this is a PC-Rel function, let SDISel handle the call.
1554 if (Subtarget->isUsingPCRelativeCalls())
1555 return false;
1556
1557 // Handle simple calls for now, with legal return types and
1558 // those that can be extended.
1559 Type *RetTy = CLI.RetTy;
1560 MVT RetVT;
1561 if (RetTy->isVoidTy())
1562 RetVT = MVT::isVoid;
1563 else if (!isTypeLegal(Ty: RetTy, VT&: RetVT) && RetVT != MVT::i16 &&
1564 RetVT != MVT::i8)
1565 return false;
1566 else if (RetVT == MVT::i1 && Subtarget->useCRBits())
1567 // We can't handle boolean returns when CR bits are in use.
1568 return false;
1569
1570 // FIXME: No multi-register return values yet.
1571 if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
1572 RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
1573 RetVT != MVT::f64) {
1574 SmallVector<CCValAssign, 16> RVLocs;
1575 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
1576 CCInfo.AnalyzeCallResult(VT: RetVT, Fn: RetCC_PPC64_ELF_FIS);
1577 if (RVLocs.size() > 1)
1578 return false;
1579 }
1580
1581 // Bail early if more than 8 arguments, as we only currently
1582 // handle arguments passed in registers.
1583 unsigned NumArgs = CLI.OutVals.size();
1584 if (NumArgs > 8)
1585 return false;
1586
1587 // Set up the argument vectors.
1588 SmallVector<Value*, 8> Args;
1589 SmallVector<Register, 8> ArgRegs;
1590 SmallVector<MVT, 8> ArgVTs;
1591 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
1592
1593 Args.reserve(N: NumArgs);
1594 ArgRegs.reserve(N: NumArgs);
1595 ArgVTs.reserve(N: NumArgs);
1596 ArgFlags.reserve(N: NumArgs);
1597
1598 for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
1599 // Only handle easy calls for now. It would be reasonably easy
1600 // to handle <= 8-byte structures passed ByVal in registers, but we
1601 // have to ensure they are right-justified in the register.
1602 ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
1603 if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
1604 return false;
1605
1606 Value *ArgValue = CLI.OutVals[i];
1607 Type *ArgTy = ArgValue->getType();
1608 MVT ArgVT;
1609 if (!isTypeLegal(Ty: ArgTy, VT&: ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
1610 return false;
1611
1612 // FIXME: FastISel cannot handle non-simple types yet, including 128-bit FP
1613 // types, which is passed through vector register. Skip these types and
1614 // fallback to default SelectionDAG based selection.
1615 if (ArgVT.isVector() || ArgVT == MVT::f128)
1616 return false;
1617
1618 Register Arg = getRegForValue(V: ArgValue);
1619 if (!Arg)
1620 return false;
1621
1622 Args.push_back(Elt: ArgValue);
1623 ArgRegs.push_back(Elt: Arg);
1624 ArgVTs.push_back(Elt: ArgVT);
1625 ArgFlags.push_back(Elt: Flags);
1626 }
1627
1628 // Process the arguments.
1629 SmallVector<unsigned, 8> RegArgs;
1630 unsigned NumBytes;
1631
1632 if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
1633 RegArgs, CC, NumBytes, IsVarArg))
1634 return false;
1635
1636 MachineInstrBuilder MIB;
1637 // FIXME: No handling for function pointers yet. This requires
1638 // implementing the function descriptor (OPD) setup.
1639 const GlobalValue *GV = dyn_cast<GlobalValue>(Val: Callee);
1640 if (!GV) {
1641 // patchpoints are a special case; they always dispatch to a pointer value.
1642 // However, we don't actually want to generate the indirect call sequence
1643 // here (that will be generated, as necessary, during asm printing), and
1644 // the call we generate here will be erased by FastISel::selectPatchpoint,
1645 // so don't try very hard...
1646 if (CLI.IsPatchPoint)
1647 MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::NOP));
1648 else
1649 return false;
1650 } else {
1651 // Build direct call with NOP for TOC restore.
1652 // FIXME: We can and should optimize away the NOP for local calls.
1653 MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1654 MCID: TII.get(Opcode: PPC::BL8_NOP));
1655 // Add callee.
1656 MIB.addGlobalAddress(GV);
1657 }
1658
1659 // Add implicit physical register uses to the call.
1660 for (unsigned Reg : RegArgs)
1661 MIB.addReg(RegNo: Reg, flags: RegState::Implicit);
1662
1663 // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
1664 // into the call.
1665 PPCFuncInfo->setUsesTOCBasePtr();
1666 MIB.addReg(RegNo: PPC::X2, flags: RegState::Implicit);
1667
1668 // Add a register mask with the call-preserved registers. Proper
1669 // defs for return values will be added by setPhysRegsDeadExcept().
1670 MIB.addRegMask(Mask: TRI.getCallPreservedMask(MF: *FuncInfo.MF, CC));
1671
1672 CLI.Call = MIB;
1673
1674 // Finish off the call including any return values.
1675 return finishCall(RetVT, CLI, NumBytes);
1676}
1677
1678// Attempt to fast-select a return instruction.
1679bool PPCFastISel::SelectRet(const Instruction *I) {
1680
1681 if (!FuncInfo.CanLowerReturn)
1682 return false;
1683
1684 const ReturnInst *Ret = cast<ReturnInst>(Val: I);
1685 const Function &F = *I->getParent()->getParent();
1686
1687 // Build a list of return value registers.
1688 SmallVector<Register, 4> RetRegs;
1689 CallingConv::ID CC = F.getCallingConv();
1690
1691 if (Ret->getNumOperands() > 0) {
1692 SmallVector<ISD::OutputArg, 4> Outs;
1693 GetReturnInfo(CC, ReturnType: F.getReturnType(), attr: F.getAttributes(), Outs, TLI, DL);
1694
1695 // Analyze operands of the call, assigning locations to each operand.
1696 SmallVector<CCValAssign, 16> ValLocs;
1697 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
1698 CCInfo.AnalyzeReturn(Outs, Fn: RetCC_PPC64_ELF_FIS);
1699 const Value *RV = Ret->getOperand(i_nocapture: 0);
1700
1701 // FIXME: Only one output register for now.
1702 if (ValLocs.size() > 1)
1703 return false;
1704
1705 // Special case for returning a constant integer of any size - materialize
1706 // the constant as an i64 and copy it to the return register.
1707 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: RV)) {
1708 CCValAssign &VA = ValLocs[0];
1709
1710 Register RetReg = VA.getLocReg();
1711 // We still need to worry about properly extending the sign. For example,
1712 // we could have only a single bit or a constant that needs zero
1713 // extension rather than sign extension. Make sure we pass the return
1714 // value extension property to integer materialization.
1715 Register SrcReg =
1716 PPCMaterializeInt(CI, VT: MVT::i64, UseSExt: VA.getLocInfo() != CCValAssign::ZExt);
1717
1718 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1719 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: RetReg).addReg(RegNo: SrcReg);
1720
1721 RetRegs.push_back(Elt: RetReg);
1722
1723 } else {
1724 Register Reg = getRegForValue(V: RV);
1725
1726 if (!Reg)
1727 return false;
1728
1729 // Copy the result values into the output registers.
1730 for (unsigned i = 0; i < ValLocs.size(); ++i) {
1731
1732 CCValAssign &VA = ValLocs[i];
1733 assert(VA.isRegLoc() && "Can only return in registers!");
1734 RetRegs.push_back(Elt: VA.getLocReg());
1735 Register SrcReg = Reg + VA.getValNo();
1736
1737 EVT RVEVT = TLI.getValueType(DL, Ty: RV->getType());
1738 if (!RVEVT.isSimple())
1739 return false;
1740 MVT RVVT = RVEVT.getSimpleVT();
1741 MVT DestVT = VA.getLocVT();
1742
1743 if (RVVT != DestVT && RVVT != MVT::i8 &&
1744 RVVT != MVT::i16 && RVVT != MVT::i32)
1745 return false;
1746
1747 if (RVVT != DestVT) {
1748 switch (VA.getLocInfo()) {
1749 default:
1750 llvm_unreachable("Unknown loc info!");
1751 case CCValAssign::Full:
1752 llvm_unreachable("Full value assign but types don't match?");
1753 case CCValAssign::AExt:
1754 case CCValAssign::ZExt: {
1755 const TargetRegisterClass *RC =
1756 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1757 Register TmpReg = createResultReg(RC);
1758 if (!PPCEmitIntExt(SrcVT: RVVT, SrcReg, DestVT, DestReg: TmpReg, IsZExt: true))
1759 return false;
1760 SrcReg = TmpReg;
1761 break;
1762 }
1763 case CCValAssign::SExt: {
1764 const TargetRegisterClass *RC =
1765 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1766 Register TmpReg = createResultReg(RC);
1767 if (!PPCEmitIntExt(SrcVT: RVVT, SrcReg, DestVT, DestReg: TmpReg, IsZExt: false))
1768 return false;
1769 SrcReg = TmpReg;
1770 break;
1771 }
1772 }
1773 }
1774
1775 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1776 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: RetRegs[i])
1777 .addReg(RegNo: SrcReg);
1778 }
1779 }
1780 }
1781
1782 MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1783 MCID: TII.get(Opcode: PPC::BLR8));
1784
1785 for (Register Reg : RetRegs)
1786 MIB.addReg(RegNo: Reg, flags: RegState::Implicit);
1787
1788 return true;
1789}
1790
1791// Attempt to emit an integer extend of SrcReg into DestReg. Both
1792// signed and zero extensions are supported. Return false if we
1793// can't handle it.
1794bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT,
1795 Register DestReg, bool IsZExt) {
1796 if (DestVT != MVT::i32 && DestVT != MVT::i64)
1797 return false;
1798 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
1799 return false;
1800
1801 // Signed extensions use EXTSB, EXTSH, EXTSW.
1802 if (!IsZExt) {
1803 unsigned Opc;
1804 if (SrcVT == MVT::i8)
1805 Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
1806 else if (SrcVT == MVT::i16)
1807 Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
1808 else {
1809 assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
1810 Opc = PPC::EXTSW_32_64;
1811 }
1812 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1813 .addReg(RegNo: SrcReg);
1814
1815 // Unsigned 32-bit extensions use RLWINM.
1816 } else if (DestVT == MVT::i32) {
1817 unsigned MB;
1818 if (SrcVT == MVT::i8)
1819 MB = 24;
1820 else {
1821 assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
1822 MB = 16;
1823 }
1824 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::RLWINM),
1825 DestReg)
1826 .addReg(RegNo: SrcReg).addImm(/*SH=*/Val: 0).addImm(Val: MB).addImm(/*ME=*/Val: 31);
1827
1828 // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
1829 } else {
1830 unsigned MB;
1831 if (SrcVT == MVT::i8)
1832 MB = 56;
1833 else if (SrcVT == MVT::i16)
1834 MB = 48;
1835 else
1836 MB = 32;
1837 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1838 MCID: TII.get(Opcode: PPC::RLDICL_32_64), DestReg)
1839 .addReg(RegNo: SrcReg).addImm(/*SH=*/Val: 0).addImm(Val: MB);
1840 }
1841
1842 return true;
1843}
1844
1845// Attempt to fast-select an indirect branch instruction.
1846bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
1847 Register AddrReg = getRegForValue(V: I->getOperand(i: 0));
1848 if (!AddrReg)
1849 return false;
1850
1851 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::MTCTR8))
1852 .addReg(RegNo: AddrReg);
1853 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::BCTR8));
1854
1855 const IndirectBrInst *IB = cast<IndirectBrInst>(Val: I);
1856 for (const BasicBlock *SuccBB : IB->successors())
1857 FuncInfo.MBB->addSuccessor(Succ: FuncInfo.getMBB(BB: SuccBB));
1858
1859 return true;
1860}
1861
1862// Attempt to fast-select an integer truncate instruction.
1863bool PPCFastISel::SelectTrunc(const Instruction *I) {
1864 Value *Src = I->getOperand(i: 0);
1865 EVT SrcVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
1866 EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
1867
1868 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
1869 return false;
1870
1871 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
1872 return false;
1873
1874 Register SrcReg = getRegForValue(V: Src);
1875 if (!SrcReg)
1876 return false;
1877
1878 // The only interesting case is when we need to switch register classes.
1879 if (SrcVT == MVT::i64)
1880 SrcReg = copyRegToRegClass(ToRC: &PPC::GPRCRegClass, SrcReg, Flag: 0, SubReg: PPC::sub_32);
1881
1882 updateValueMap(I, Reg: SrcReg);
1883 return true;
1884}
1885
1886// Attempt to fast-select an integer extend instruction.
1887bool PPCFastISel::SelectIntExt(const Instruction *I) {
1888 Type *DestTy = I->getType();
1889 Value *Src = I->getOperand(i: 0);
1890 Type *SrcTy = Src->getType();
1891
1892 bool IsZExt = isa<ZExtInst>(Val: I);
1893 Register SrcReg = getRegForValue(V: Src);
1894 if (!SrcReg) return false;
1895
1896 EVT SrcEVT, DestEVT;
1897 SrcEVT = TLI.getValueType(DL, Ty: SrcTy, AllowUnknown: true);
1898 DestEVT = TLI.getValueType(DL, Ty: DestTy, AllowUnknown: true);
1899 if (!SrcEVT.isSimple())
1900 return false;
1901 if (!DestEVT.isSimple())
1902 return false;
1903
1904 MVT SrcVT = SrcEVT.getSimpleVT();
1905 MVT DestVT = DestEVT.getSimpleVT();
1906
1907 // If we know the register class needed for the result of this
1908 // instruction, use it. Otherwise pick the register class of the
1909 // correct size that does not contain X0/R0, since we don't know
1910 // whether downstream uses permit that assignment.
1911 Register AssignedReg = FuncInfo.ValueMap[I];
1912 const TargetRegisterClass *RC =
1913 (AssignedReg ? MRI.getRegClass(Reg: AssignedReg) :
1914 (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
1915 &PPC::GPRC_and_GPRC_NOR0RegClass));
1916 Register ResultReg = createResultReg(RC);
1917
1918 if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, DestReg: ResultReg, IsZExt))
1919 return false;
1920
1921 updateValueMap(I, Reg: ResultReg);
1922 return true;
1923}
1924
1925// Attempt to fast-select an instruction that wasn't handled by
1926// the table-generated machinery.
1927bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
1928
1929 switch (I->getOpcode()) {
1930 case Instruction::Load:
1931 return SelectLoad(I);
1932 case Instruction::Store:
1933 return SelectStore(I);
1934 case Instruction::Br:
1935 return SelectBranch(I);
1936 case Instruction::IndirectBr:
1937 return SelectIndirectBr(I);
1938 case Instruction::FPExt:
1939 return SelectFPExt(I);
1940 case Instruction::FPTrunc:
1941 return SelectFPTrunc(I);
1942 case Instruction::SIToFP:
1943 return SelectIToFP(I, /*IsSigned*/ true);
1944 case Instruction::UIToFP:
1945 return SelectIToFP(I, /*IsSigned*/ false);
1946 case Instruction::FPToSI:
1947 return SelectFPToI(I, /*IsSigned*/ true);
1948 case Instruction::FPToUI:
1949 return SelectFPToI(I, /*IsSigned*/ false);
1950 case Instruction::Add:
1951 return SelectBinaryIntOp(I, ISDOpcode: ISD::ADD);
1952 case Instruction::Or:
1953 return SelectBinaryIntOp(I, ISDOpcode: ISD::OR);
1954 case Instruction::Sub:
1955 return SelectBinaryIntOp(I, ISDOpcode: ISD::SUB);
1956 case Instruction::Ret:
1957 return SelectRet(I);
1958 case Instruction::Trunc:
1959 return SelectTrunc(I);
1960 case Instruction::ZExt:
1961 case Instruction::SExt:
1962 return SelectIntExt(I);
1963 // Here add other flavors of Instruction::XXX that automated
1964 // cases don't catch. For example, switches are terminators
1965 // that aren't yet handled.
1966 default:
1967 break;
1968 }
1969 return false;
1970}
1971
1972// Materialize a floating-point constant into a register, and return
1973// the register number (or zero if we failed to handle it).
1974Register PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
1975 // If this is a PC-Rel function, let SDISel handle constant pool.
1976 if (Subtarget->isUsingPCRelativeCalls())
1977 return Register();
1978
1979 // No plans to handle long double here.
1980 if (VT != MVT::f32 && VT != MVT::f64)
1981 return Register();
1982
1983 // All FP constants are loaded from the constant pool.
1984 Align Alignment = DL.getPrefTypeAlign(Ty: CFP->getType());
1985 unsigned Idx = MCP.getConstantPoolIndex(C: cast<Constant>(Val: CFP), Alignment);
1986 const bool HasSPE = Subtarget->hasSPE();
1987 const TargetRegisterClass *RC;
1988 if (HasSPE)
1989 RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass);
1990 else
1991 RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass);
1992
1993 Register DestReg = createResultReg(RC);
1994 CodeModel::Model CModel = TM.getCodeModel();
1995
1996 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
1997 PtrInfo: MachinePointerInfo::getConstantPool(MF&: *FuncInfo.MF),
1998 F: MachineMemOperand::MOLoad, Size: (VT == MVT::f32) ? 4 : 8, BaseAlignment: Alignment);
1999
2000 unsigned Opc;
2001
2002 if (HasSPE)
2003 Opc = ((VT == MVT::f32) ? PPC::SPELWZ : PPC::EVLDD);
2004 else
2005 Opc = ((VT == MVT::f32) ? PPC::LFS : PPC::LFD);
2006
2007 Register TmpReg = createResultReg(RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
2008
2009 PPCFuncInfo->setUsesTOCBasePtr();
2010 // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
2011 if (CModel == CodeModel::Small) {
2012 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::LDtocCPT),
2013 DestReg: TmpReg)
2014 .addConstantPoolIndex(Idx).addReg(RegNo: PPC::X2);
2015 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
2016 .addImm(Val: 0).addReg(RegNo: TmpReg).addMemOperand(MMO);
2017 } else {
2018 // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA8(X2, Idx)).
2019 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDIStocHA8),
2020 DestReg: TmpReg).addReg(RegNo: PPC::X2).addConstantPoolIndex(Idx);
2021 // But for large code model, we must generate a LDtocL followed
2022 // by the LF[SD].
2023 if (CModel == CodeModel::Large) {
2024 Register TmpReg2 = createResultReg(RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
2025 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::LDtocL),
2026 DestReg: TmpReg2).addConstantPoolIndex(Idx).addReg(RegNo: TmpReg);
2027 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
2028 .addImm(Val: 0)
2029 .addReg(RegNo: TmpReg2);
2030 } else
2031 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
2032 .addConstantPoolIndex(Idx, Offset: 0, TargetFlags: PPCII::MO_TOC_LO)
2033 .addReg(RegNo: TmpReg)
2034 .addMemOperand(MMO);
2035 }
2036
2037 return DestReg;
2038}
2039
2040// Materialize the address of a global value into a register, and return
2041// the register number (or zero if we failed to handle it).
2042Register PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
2043 // If this is a PC-Rel function, let SDISel handle GV materialization.
2044 if (Subtarget->isUsingPCRelativeCalls())
2045 return Register();
2046
2047 assert(VT == MVT::i64 && "Non-address!");
2048 const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
2049 Register DestReg = createResultReg(RC);
2050
2051 // Global values may be plain old object addresses, TLS object
2052 // addresses, constant pool entries, or jump tables. How we generate
2053 // code for these may depend on small, medium, or large code model.
2054 CodeModel::Model CModel = TM.getCodeModel();
2055
2056 // FIXME: Jump tables are not yet required because fast-isel doesn't
2057 // handle switches; if that changes, we need them as well. For now,
2058 // what follows assumes everything's a generic (or TLS) global address.
2059
2060 // FIXME: We don't yet handle the complexity of TLS.
2061 if (GV->isThreadLocal())
2062 return Register();
2063
2064 PPCFuncInfo->setUsesTOCBasePtr();
2065 bool IsAIXTocData = TM.getTargetTriple().isOSAIX() &&
2066 isa<GlobalVariable>(Val: GV) &&
2067 cast<GlobalVariable>(Val: GV)->hasAttribute(Kind: "toc-data");
2068
2069 // For small code model, generate a simple TOC load.
2070 if (CModel == CodeModel::Small) {
2071 auto MIB = BuildMI(
2072 BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2073 MCID: IsAIXTocData ? TII.get(Opcode: PPC::ADDItoc8) : TII.get(Opcode: PPC::LDtoc), DestReg);
2074 if (IsAIXTocData)
2075 MIB.addReg(RegNo: PPC::X2).addGlobalAddress(GV);
2076 else
2077 MIB.addGlobalAddress(GV).addReg(RegNo: PPC::X2);
2078 } else {
2079 // If the address is an externally defined symbol, a symbol with common
2080 // or externally available linkage, a non-local function address, or a
2081 // jump table address (not yet needed), or if we are generating code
2082 // for large code model, we generate:
2083 // LDtocL(GV, ADDIStocHA8(%x2, GV))
2084 // Otherwise we generate:
2085 // ADDItocL8(ADDIStocHA8(%x2, GV), GV)
2086 // Either way, start with the ADDIStocHA8:
2087 Register HighPartReg = createResultReg(RC);
2088 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDIStocHA8),
2089 DestReg: HighPartReg).addReg(RegNo: PPC::X2).addGlobalAddress(GV);
2090
2091 if (Subtarget->isGVIndirectSymbol(GV)) {
2092 assert(!IsAIXTocData && "TOC data should always be direct.");
2093 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::LDtocL),
2094 DestReg).addGlobalAddress(GV).addReg(RegNo: HighPartReg);
2095 } else {
2096 // Otherwise generate the ADDItocL8.
2097 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDItocL8),
2098 DestReg)
2099 .addReg(RegNo: HighPartReg)
2100 .addGlobalAddress(GV);
2101 }
2102 }
2103
2104 return DestReg;
2105}
2106
2107// Materialize a 32-bit integer constant into a register, and return
2108// the register number (or zero if we failed to handle it).
2109Register PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
2110 const TargetRegisterClass *RC) {
2111 unsigned Lo = Imm & 0xFFFF;
2112 unsigned Hi = (Imm >> 16) & 0xFFFF;
2113
2114 Register ResultReg = createResultReg(RC);
2115 bool IsGPRC = RC->hasSuperClassEq(RC: &PPC::GPRCRegClass);
2116
2117 if (isInt<16>(x: Imm))
2118 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2119 MCID: TII.get(Opcode: IsGPRC ? PPC::LI : PPC::LI8), DestReg: ResultReg)
2120 .addImm(Val: Imm);
2121 else if (Lo) {
2122 // Both Lo and Hi have nonzero bits.
2123 Register TmpReg = createResultReg(RC);
2124 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2125 MCID: TII.get(Opcode: IsGPRC ? PPC::LIS : PPC::LIS8), DestReg: TmpReg)
2126 .addImm(Val: Hi);
2127 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2128 MCID: TII.get(Opcode: IsGPRC ? PPC::ORI : PPC::ORI8), DestReg: ResultReg)
2129 .addReg(RegNo: TmpReg).addImm(Val: Lo);
2130 } else
2131 // Just Hi bits.
2132 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2133 MCID: TII.get(Opcode: IsGPRC ? PPC::LIS : PPC::LIS8), DestReg: ResultReg)
2134 .addImm(Val: Hi);
2135
2136 return ResultReg;
2137}
2138
2139// Materialize a 64-bit integer constant into a register, and return
2140// the register number (or zero if we failed to handle it).
2141Register PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
2142 const TargetRegisterClass *RC) {
2143 unsigned Remainder = 0;
2144 unsigned Shift = 0;
2145
2146 // If the value doesn't fit in 32 bits, see if we can shift it
2147 // so that it fits in 32 bits.
2148 if (!isInt<32>(x: Imm)) {
2149 Shift = llvm::countr_zero<uint64_t>(Val: Imm);
2150 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
2151
2152 if (isInt<32>(x: ImmSh))
2153 Imm = ImmSh;
2154 else {
2155 Remainder = Imm;
2156 Shift = 32;
2157 Imm >>= 32;
2158 }
2159 }
2160
2161 // Handle the high-order 32 bits (if shifted) or the whole 32 bits
2162 // (if not shifted).
2163 Register TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
2164 if (!Shift)
2165 return TmpReg1;
2166
2167 // If upper 32 bits were not zero, we've built them and need to shift
2168 // them into place.
2169 Register TmpReg2;
2170 if (Imm) {
2171 TmpReg2 = createResultReg(RC);
2172 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::RLDICR),
2173 DestReg: TmpReg2).addReg(RegNo: TmpReg1).addImm(Val: Shift).addImm(Val: 63 - Shift);
2174 } else
2175 TmpReg2 = TmpReg1;
2176
2177 Register TmpReg3;
2178 unsigned Hi, Lo;
2179 if ((Hi = (Remainder >> 16) & 0xFFFF)) {
2180 TmpReg3 = createResultReg(RC);
2181 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ORIS8),
2182 DestReg: TmpReg3).addReg(RegNo: TmpReg2).addImm(Val: Hi);
2183 } else
2184 TmpReg3 = TmpReg2;
2185
2186 if ((Lo = Remainder & 0xFFFF)) {
2187 Register ResultReg = createResultReg(RC);
2188 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ORI8),
2189 DestReg: ResultReg).addReg(RegNo: TmpReg3).addImm(Val: Lo);
2190 return ResultReg;
2191 }
2192
2193 return TmpReg3;
2194}
2195
2196// Materialize an integer constant into a register, and return
2197// the register number (or zero if we failed to handle it).
2198Register PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
2199 bool UseSExt) {
2200 // If we're using CR bit registers for i1 values, handle that as a special
2201 // case first.
2202 if (VT == MVT::i1 && Subtarget->useCRBits()) {
2203 Register ImmReg = createResultReg(RC: &PPC::CRBITRCRegClass);
2204 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2205 MCID: TII.get(Opcode: CI->isZero() ? PPC::CRUNSET : PPC::CRSET), DestReg: ImmReg);
2206 return ImmReg;
2207 }
2208
2209 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2210 VT != MVT::i1)
2211 return Register();
2212
2213 const TargetRegisterClass *RC =
2214 ((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass);
2215 int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue();
2216
2217 // If the constant is in range, use a load-immediate.
2218 // Since LI will sign extend the constant we need to make sure that for
2219 // our zeroext constants that the sign extended constant fits into 16-bits -
2220 // a range of 0..0x7fff.
2221 if (isInt<16>(x: Imm)) {
2222 unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
2223 Register ImmReg = createResultReg(RC);
2224 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ImmReg)
2225 .addImm(Val: Imm);
2226 return ImmReg;
2227 }
2228
2229 // Construct the constant piecewise.
2230 if (VT == MVT::i64)
2231 return PPCMaterialize64BitInt(Imm, RC);
2232 else if (VT == MVT::i32)
2233 return PPCMaterialize32BitInt(Imm, RC);
2234
2235 return Register();
2236}
2237
2238// Materialize a constant into a register, and return the register
2239// number (or zero if we failed to handle it).
2240Register PPCFastISel::fastMaterializeConstant(const Constant *C) {
2241 EVT CEVT = TLI.getValueType(DL, Ty: C->getType(), AllowUnknown: true);
2242
2243 // Only handle simple types.
2244 if (!CEVT.isSimple())
2245 return Register();
2246 MVT VT = CEVT.getSimpleVT();
2247
2248 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: C))
2249 return PPCMaterializeFP(CFP, VT);
2250 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: C))
2251 return PPCMaterializeGV(GV, VT);
2252 else if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: C))
2253 // Note that the code in FunctionLoweringInfo::ComputePHILiveOutRegInfo
2254 // assumes that constant PHI operands will be zero extended, and failure to
2255 // match that assumption will cause problems if we sign extend here but
2256 // some user of a PHI is in a block for which we fall back to full SDAG
2257 // instruction selection.
2258 return PPCMaterializeInt(CI, VT, UseSExt: false);
2259
2260 return Register();
2261}
2262
2263// Materialize the address created by an alloca into a register, and
2264// return the register number (or zero if we failed to handle it).
2265Register PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
2266 DenseMap<const AllocaInst *, int>::iterator SI =
2267 FuncInfo.StaticAllocaMap.find(Val: AI);
2268
2269 // Don't handle dynamic allocas.
2270 if (SI == FuncInfo.StaticAllocaMap.end())
2271 return Register();
2272
2273 MVT VT;
2274 if (!isLoadTypeLegal(Ty: AI->getType(), VT))
2275 return Register();
2276
2277 if (SI != FuncInfo.StaticAllocaMap.end()) {
2278 Register ResultReg = createResultReg(RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
2279 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDI8),
2280 DestReg: ResultReg).addFrameIndex(Idx: SI->second).addImm(Val: 0);
2281 return ResultReg;
2282 }
2283
2284 return Register();
2285}
2286
2287// Fold loads into extends when possible.
2288// FIXME: We can have multiple redundant extend/trunc instructions
2289// following a load. The folding only picks up one. Extend this
2290// to check subsequent instructions for the same pattern and remove
2291// them. Thus ResultReg should be the def reg for the last redundant
2292// instruction in a chain, and all intervening instructions can be
2293// removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
2294// to add ELF64-NOT: rldicl to the appropriate tests when this works.
2295bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2296 const LoadInst *LI) {
2297 // Verify we have a legal type before going any further.
2298 MVT VT;
2299 if (!isLoadTypeLegal(Ty: LI->getType(), VT))
2300 return false;
2301
2302 // Combine load followed by zero- or sign-extend.
2303 bool IsZExt = false;
2304 switch(MI->getOpcode()) {
2305 default:
2306 return false;
2307
2308 case PPC::RLDICL:
2309 case PPC::RLDICL_32_64: {
2310 IsZExt = true;
2311 unsigned MB = MI->getOperand(i: 3).getImm();
2312 if ((VT == MVT::i8 && MB <= 56) ||
2313 (VT == MVT::i16 && MB <= 48) ||
2314 (VT == MVT::i32 && MB <= 32))
2315 break;
2316 return false;
2317 }
2318
2319 case PPC::RLWINM:
2320 case PPC::RLWINM8: {
2321 IsZExt = true;
2322 unsigned MB = MI->getOperand(i: 3).getImm();
2323 if ((VT == MVT::i8 && MB <= 24) ||
2324 (VT == MVT::i16 && MB <= 16))
2325 break;
2326 return false;
2327 }
2328
2329 case PPC::EXTSB:
2330 case PPC::EXTSB8:
2331 case PPC::EXTSB8_32_64:
2332 /* There is no sign-extending load-byte instruction. */
2333 return false;
2334
2335 case PPC::EXTSH:
2336 case PPC::EXTSH8:
2337 case PPC::EXTSH8_32_64: {
2338 if (VT != MVT::i16 && VT != MVT::i8)
2339 return false;
2340 break;
2341 }
2342
2343 case PPC::EXTSW:
2344 case PPC::EXTSW_32:
2345 case PPC::EXTSW_32_64: {
2346 if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
2347 return false;
2348 break;
2349 }
2350 }
2351
2352 // See if we can handle this address.
2353 Address Addr;
2354 if (!PPCComputeAddress(Obj: LI->getOperand(i_nocapture: 0), Addr))
2355 return false;
2356
2357 Register ResultReg = MI->getOperand(i: 0).getReg();
2358
2359 if (!PPCEmitLoad(VT, ResultReg, Addr, RC: nullptr, IsZExt,
2360 FP64LoadOpc: Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
2361 return false;
2362
2363 MachineBasicBlock::iterator I(MI);
2364 removeDeadCode(I, E: std::next(x: I));
2365 return true;
2366}
2367
2368// Attempt to lower call arguments in a faster way than done by
2369// the selection DAG code.
2370bool PPCFastISel::fastLowerArguments() {
2371 // Defer to normal argument lowering for now. It's reasonably
2372 // efficient. Consider doing something like ARM to handle the
2373 // case where all args fit in registers, no varargs, no float
2374 // or vector args.
2375 return false;
2376}
2377
2378// Handle materializing integer constants into a register. This is not
2379// automatically generated for PowerPC, so must be explicitly created here.
2380Register PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
2381
2382 if (Opc != ISD::Constant)
2383 return Register();
2384
2385 // If we're using CR bit registers for i1 values, handle that as a special
2386 // case first.
2387 if (VT == MVT::i1 && Subtarget->useCRBits()) {
2388 Register ImmReg = createResultReg(RC: &PPC::CRBITRCRegClass);
2389 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2390 MCID: TII.get(Opcode: Imm == 0 ? PPC::CRUNSET : PPC::CRSET), DestReg: ImmReg);
2391 return ImmReg;
2392 }
2393
2394 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2395 VT != MVT::i1)
2396 return Register();
2397
2398 const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
2399 &PPC::GPRCRegClass);
2400 if (VT == MVT::i64)
2401 return PPCMaterialize64BitInt(Imm, RC);
2402 else
2403 return PPCMaterialize32BitInt(Imm, RC);
2404}
2405
2406// Override for ADDI and ADDI8 to set the correct register class
2407// on RHS operand 0. The automatic infrastructure naively assumes
2408// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
2409// for these cases. At the moment, none of the other automatically
2410// generated RI instructions require special treatment. However, once
2411// SelectSelect is implemented, "isel" requires similar handling.
2412//
2413// Also be conservative about the output register class. Avoid
2414// assigning R0 or X0 to the output register for GPRC and G8RC
2415// register classes, as any such result could be used in ADDI, etc.,
2416// where those regs have another meaning.
2417Register PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
2418 const TargetRegisterClass *RC,
2419 Register Op0, uint64_t Imm) {
2420 if (MachineInstOpcode == PPC::ADDI)
2421 MRI.setRegClass(Reg: Op0, RC: &PPC::GPRC_and_GPRC_NOR0RegClass);
2422 else if (MachineInstOpcode == PPC::ADDI8)
2423 MRI.setRegClass(Reg: Op0, RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
2424
2425 const TargetRegisterClass *UseRC =
2426 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2427 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2428
2429 return FastISel::fastEmitInst_ri(MachineInstOpcode, RC: UseRC, Op0, Imm);
2430}
2431
2432// Override for instructions with one register operand to avoid use of
2433// R0/X0. The automatic infrastructure isn't aware of the context so
2434// we must be conservative.
2435Register PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
2436 const TargetRegisterClass *RC,
2437 Register Op0) {
2438 const TargetRegisterClass *UseRC =
2439 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2440 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2441
2442 return FastISel::fastEmitInst_r(MachineInstOpcode, RC: UseRC, Op0);
2443}
2444
2445// Override for instructions with two register operands to avoid use
2446// of R0/X0. The automatic infrastructure isn't aware of the context
2447// so we must be conservative.
2448Register PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
2449 const TargetRegisterClass *RC,
2450 Register Op0, Register Op1) {
2451 const TargetRegisterClass *UseRC =
2452 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2453 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2454
2455 return FastISel::fastEmitInst_rr(MachineInstOpcode, RC: UseRC, Op0, Op1);
2456}
2457
2458namespace llvm {
2459 // Create the fast instruction selector for PowerPC64 ELF.
2460 FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
2461 const TargetLibraryInfo *LibInfo) {
2462 // Only available on 64-bit for now.
2463 const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
2464 if (Subtarget.isPPC64())
2465 return new PPCFastISel(FuncInfo, LibInfo);
2466 return nullptr;
2467 }
2468}
2469