1//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the PowerPC-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// PPCGenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "MCTargetDesc/PPCPredicates.h"
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCISelLowering.h"
19#include "PPCMachineFunctionInfo.h"
20#include "PPCSelectionDAGInfo.h"
21#include "PPCSubtarget.h"
22#include "llvm/CodeGen/CallingConvLower.h"
23#include "llvm/CodeGen/FastISel.h"
24#include "llvm/CodeGen/FunctionLoweringInfo.h"
25#include "llvm/CodeGen/MachineConstantPool.h"
26#include "llvm/CodeGen/MachineFrameInfo.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29#include "llvm/CodeGen/TargetLowering.h"
30#include "llvm/IR/CallingConv.h"
31#include "llvm/IR/GetElementPtrTypeIterator.h"
32#include "llvm/IR/GlobalVariable.h"
33#include "llvm/IR/Operator.h"
34#include "llvm/Target/TargetMachine.h"
35
36//===----------------------------------------------------------------------===//
37//
38// TBD:
39// fastLowerArguments: Handle simple cases.
40// PPCMaterializeGV: Handle TLS.
41// SelectCall: Handle function pointers.
42// SelectCall: Handle multi-register return values.
43// SelectCall: Optimize away nops for local calls.
44// processCallArgs: Handle bit-converted arguments.
45// finishCall: Handle multi-register return values.
46// PPCComputeAddress: Handle parameter references as FrameIndex's.
47// PPCEmitCmp: Handle immediate as operand 1.
48// SelectCall: Handle small byval arguments.
49// SelectIntrinsicCall: Implement.
50// SelectSelect: Implement.
51// Consider factoring isTypeLegal into the base class.
52// Implement switches and jump tables.
53//
54//===----------------------------------------------------------------------===//
55using namespace llvm;
56
57#define DEBUG_TYPE "ppcfastisel"
58
59namespace {
60
61struct Address {
62 enum {
63 RegBase,
64 FrameIndexBase
65 } BaseType;
66
67 union {
68 unsigned Reg;
69 int FI;
70 } Base;
71
72 int64_t Offset;
73
74 // Innocuous defaults for our address.
75 Address()
76 : BaseType(RegBase), Offset(0) {
77 Base.Reg = 0;
78 }
79};
80
81class PPCFastISel final : public FastISel {
82
83 const TargetMachine &TM;
84 const PPCSubtarget *Subtarget;
85 PPCFunctionInfo *PPCFuncInfo;
86 const TargetInstrInfo &TII;
87 const TargetLowering &TLI;
88 LLVMContext *Context;
89
90 public:
91 explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
92 const TargetLibraryInfo *LibInfo,
93 const LibcallLoweringInfo *LibcallLowering)
94 : FastISel(FuncInfo, LibInfo, LibcallLowering),
95 TM(FuncInfo.MF->getTarget()),
96 Subtarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
97 PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
98 TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()),
99 Context(&FuncInfo.Fn->getContext()) {}
100
101 // Backend specific FastISel code.
102 private:
103 bool fastSelectInstruction(const Instruction *I) override;
104 Register fastMaterializeConstant(const Constant *C) override;
105 Register fastMaterializeAlloca(const AllocaInst *AI) override;
106 bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
107 const LoadInst *LI) override;
108 bool fastLowerArguments() override;
109 Register fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
110 Register fastEmitInst_ri(unsigned MachineInstOpcode,
111 const TargetRegisterClass *RC, Register Op0,
112 uint64_t Imm);
113 Register fastEmitInst_r(unsigned MachineInstOpcode,
114 const TargetRegisterClass *RC, Register Op0);
115 Register fastEmitInst_rr(unsigned MachineInstOpcode,
116 const TargetRegisterClass *RC, Register Op0,
117 Register Op1);
118
119 bool fastLowerCall(CallLoweringInfo &CLI) override;
120
121 // Instruction selection routines.
122 private:
123 bool SelectLoad(const Instruction *I);
124 bool SelectStore(const Instruction *I);
125 bool SelectBranch(const Instruction *I);
126 bool SelectIndirectBr(const Instruction *I);
127 bool SelectFPExt(const Instruction *I);
128 bool SelectFPTrunc(const Instruction *I);
129 bool SelectIToFP(const Instruction *I, bool IsSigned);
130 bool SelectFPToI(const Instruction *I, bool IsSigned);
131 bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
132 bool SelectRet(const Instruction *I);
133 bool SelectTrunc(const Instruction *I);
134 bool SelectIntExt(const Instruction *I);
135
136 // Utility routines.
137 private:
138 bool isTypeLegal(Type *Ty, MVT &VT);
139 bool isLoadTypeLegal(Type *Ty, MVT &VT);
140 bool isValueAvailable(const Value *V) const;
141 bool isVSFRCRegClass(const TargetRegisterClass *RC) const {
142 return RC->getID() == PPC::VSFRCRegClassID;
143 }
144 bool isVSSRCRegClass(const TargetRegisterClass *RC) const {
145 return RC->getID() == PPC::VSSRCRegClassID;
146 }
147 Register copyRegToRegClass(const TargetRegisterClass *ToRC, Register SrcReg,
148 RegState Flag = {}, unsigned SubReg = 0) {
149 Register TmpReg = createResultReg(RC: ToRC);
150 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
151 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: TmpReg).addReg(RegNo: SrcReg, Flags: Flag, SubReg);
152 return TmpReg;
153 }
154 bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt,
155 Register DestReg, const PPC::Predicate Pred);
156 bool PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
157 const TargetRegisterClass *RC, bool IsZExt = true,
158 unsigned FP64LoadOpc = PPC::LFD);
159 bool PPCEmitStore(MVT VT, Register SrcReg, Address &Addr);
160 bool PPCComputeAddress(const Value *Obj, Address &Addr);
161 void PPCSimplifyAddress(Address &Addr, bool &UseOffset, Register &IndexReg);
162 bool PPCEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, Register DestReg,
163 bool IsZExt);
164 Register PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
165 Register PPCMaterializeGV(const GlobalValue *GV, MVT VT);
166 Register PPCMaterializeInt(const ConstantInt *CI, MVT VT,
167 bool UseSExt = true);
168 Register PPCMaterialize32BitInt(int64_t Imm, const TargetRegisterClass *RC);
169 Register PPCMaterialize64BitInt(int64_t Imm, const TargetRegisterClass *RC);
170 Register PPCMoveToIntReg(const Instruction *I, MVT VT, Register SrcReg,
171 bool IsSigned);
172 Register PPCMoveToFPReg(MVT VT, Register SrcReg, bool IsSigned);
173
174 // Call handling routines.
175 private:
176 bool processCallArgs(SmallVectorImpl<Value *> &Args,
177 SmallVectorImpl<Register> &ArgRegs,
178 SmallVectorImpl<MVT> &ArgVTs,
179 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
180 SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC,
181 unsigned &NumBytes, bool IsVarArg);
182 bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
183
184 private:
185 #include "PPCGenFastISel.inc"
186
187};
188
189} // end anonymous namespace
190
191static std::optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
192 switch (Pred) {
193 // These are not representable with any single compare.
194 case CmpInst::FCMP_FALSE:
195 case CmpInst::FCMP_TRUE:
196 // Major concern about the following 6 cases is NaN result. The comparison
197 // result consists of 4 bits, indicating lt, eq, gt and un (unordered),
198 // only one of which will be set. The result is generated by fcmpu
199 // instruction. However, bc instruction only inspects one of the first 3
200 // bits, so when un is set, bc instruction may jump to an undesired
201 // place.
202 //
203 // More specifically, if we expect an unordered comparison and un is set, we
204 // expect to always go to true branch; in such case UEQ, UGT and ULT still
205 // give false, which are undesired; but UNE, UGE, ULE happen to give true,
206 // since they are tested by inspecting !eq, !lt, !gt, respectively.
207 //
208 // Similarly, for ordered comparison, when un is set, we always expect the
209 // result to be false. In such case OGT, OLT and OEQ is good, since they are
210 // actually testing GT, LT, and EQ respectively, which are false. OGE, OLE
211 // and ONE are tested through !lt, !gt and !eq, and these are true.
212 case CmpInst::FCMP_UEQ:
213 case CmpInst::FCMP_UGT:
214 case CmpInst::FCMP_ULT:
215 case CmpInst::FCMP_OGE:
216 case CmpInst::FCMP_OLE:
217 case CmpInst::FCMP_ONE:
218 default:
219 return std::nullopt;
220
221 case CmpInst::FCMP_OEQ:
222 case CmpInst::ICMP_EQ:
223 return PPC::PRED_EQ;
224
225 case CmpInst::FCMP_OGT:
226 case CmpInst::ICMP_UGT:
227 case CmpInst::ICMP_SGT:
228 return PPC::PRED_GT;
229
230 case CmpInst::FCMP_UGE:
231 case CmpInst::ICMP_UGE:
232 case CmpInst::ICMP_SGE:
233 return PPC::PRED_GE;
234
235 case CmpInst::FCMP_OLT:
236 case CmpInst::ICMP_ULT:
237 case CmpInst::ICMP_SLT:
238 return PPC::PRED_LT;
239
240 case CmpInst::FCMP_ULE:
241 case CmpInst::ICMP_ULE:
242 case CmpInst::ICMP_SLE:
243 return PPC::PRED_LE;
244
245 case CmpInst::FCMP_UNE:
246 case CmpInst::ICMP_NE:
247 return PPC::PRED_NE;
248
249 case CmpInst::FCMP_ORD:
250 return PPC::PRED_NU;
251
252 case CmpInst::FCMP_UNO:
253 return PPC::PRED_UN;
254 }
255}
256
257// Determine whether the type Ty is simple enough to be handled by
258// fast-isel, and return its equivalent machine type in VT.
259// FIXME: Copied directly from ARM -- factor into base class?
260bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
261 EVT Evt = TLI.getValueType(DL, Ty, AllowUnknown: true);
262
263 // Only handle simple types.
264 if (Evt == MVT::Other || !Evt.isSimple()) return false;
265 VT = Evt.getSimpleVT();
266
267 // Handle all legal types, i.e. a register that will directly hold this
268 // value.
269 return TLI.isTypeLegal(VT);
270}
271
272// Determine whether the type Ty is simple enough to be handled by
273// fast-isel as a load target, and return its equivalent machine type in VT.
274bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
275 if (isTypeLegal(Ty, VT)) return true;
276
277 // If this is a type than can be sign or zero-extended to a basic operation
278 // go ahead and accept it now.
279 if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
280 return true;
281 }
282
283 return false;
284}
285
286bool PPCFastISel::isValueAvailable(const Value *V) const {
287 if (!isa<Instruction>(Val: V))
288 return true;
289
290 const auto *I = cast<Instruction>(Val: V);
291 return FuncInfo.getMBB(BB: I->getParent()) == FuncInfo.MBB;
292}
293
294// Given a value Obj, create an Address object Addr that represents its
295// address. Return false if we can't handle it.
296bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
297 const User *U = nullptr;
298 unsigned Opcode = Instruction::UserOp1;
299 if (const Instruction *I = dyn_cast<Instruction>(Val: Obj)) {
300 // Don't walk into other basic blocks unless the object is an alloca from
301 // another block, otherwise it may not have a virtual register assigned.
302 if (FuncInfo.StaticAllocaMap.count(Val: static_cast<const AllocaInst *>(Obj)) ||
303 FuncInfo.getMBB(BB: I->getParent()) == FuncInfo.MBB) {
304 Opcode = I->getOpcode();
305 U = I;
306 }
307 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Val: Obj)) {
308 Opcode = C->getOpcode();
309 U = C;
310 }
311
312 switch (Opcode) {
313 default:
314 break;
315 case Instruction::BitCast:
316 // Look through bitcasts.
317 return PPCComputeAddress(Obj: U->getOperand(i: 0), Addr);
318 case Instruction::IntToPtr:
319 // Look past no-op inttoptrs.
320 if (TLI.getValueType(DL, Ty: U->getOperand(i: 0)->getType()) ==
321 TLI.getPointerTy(DL))
322 return PPCComputeAddress(Obj: U->getOperand(i: 0), Addr);
323 break;
324 case Instruction::PtrToInt:
325 // Look past no-op ptrtoints.
326 if (TLI.getValueType(DL, Ty: U->getType()) == TLI.getPointerTy(DL))
327 return PPCComputeAddress(Obj: U->getOperand(i: 0), Addr);
328 break;
329 case Instruction::GetElementPtr: {
330 Address SavedAddr = Addr;
331 int64_t TmpOffset = Addr.Offset;
332
333 // Iterate through the GEP folding the constants into offsets where
334 // we can.
335 gep_type_iterator GTI = gep_type_begin(GEP: U);
336 for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
337 II != IE; ++II, ++GTI) {
338 const Value *Op = *II;
339 if (StructType *STy = GTI.getStructTypeOrNull()) {
340 const StructLayout *SL = DL.getStructLayout(Ty: STy);
341 unsigned Idx = cast<ConstantInt>(Val: Op)->getZExtValue();
342 TmpOffset += SL->getElementOffset(Idx);
343 } else {
344 uint64_t S = GTI.getSequentialElementStride(DL);
345 for (;;) {
346 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Op)) {
347 // Constant-offset addressing.
348 TmpOffset += CI->getSExtValue() * S;
349 break;
350 }
351 if (canFoldAddIntoGEP(GEP: U, Add: Op)) {
352 // A compatible add with a constant operand. Fold the constant.
353 ConstantInt *CI =
354 cast<ConstantInt>(Val: cast<AddOperator>(Val: Op)->getOperand(i_nocapture: 1));
355 TmpOffset += CI->getSExtValue() * S;
356 // Iterate on the other operand.
357 Op = cast<AddOperator>(Val: Op)->getOperand(i_nocapture: 0);
358 continue;
359 }
360 // Unsupported
361 goto unsupported_gep;
362 }
363 }
364 }
365
366 // Try to grab the base operand now.
367 Addr.Offset = TmpOffset;
368 if (PPCComputeAddress(Obj: U->getOperand(i: 0), Addr)) return true;
369
370 // We failed, restore everything and try the other options.
371 Addr = SavedAddr;
372
373 unsupported_gep:
374 break;
375 }
376 case Instruction::Alloca: {
377 const AllocaInst *AI = cast<AllocaInst>(Val: Obj);
378 DenseMap<const AllocaInst*, int>::iterator SI =
379 FuncInfo.StaticAllocaMap.find(Val: AI);
380 if (SI != FuncInfo.StaticAllocaMap.end()) {
381 Addr.BaseType = Address::FrameIndexBase;
382 Addr.Base.FI = SI->second;
383 return true;
384 }
385 break;
386 }
387 }
388
389 // FIXME: References to parameters fall through to the behavior
390 // below. They should be able to reference a frame index since
391 // they are stored to the stack, so we can get "ld rx, offset(r1)"
392 // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
393 // just contain the parameter. Try to handle this with a FI.
394
395 // Try to get this in a register if nothing else has worked.
396 if (Addr.Base.Reg == 0)
397 Addr.Base.Reg = getRegForValue(V: Obj);
398
399 // Prevent assignment of base register to X0, which is inappropriate
400 // for loads and stores alike.
401 if (Addr.Base.Reg != 0)
402 MRI.setRegClass(Reg: Addr.Base.Reg, RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
403
404 return Addr.Base.Reg != 0;
405}
406
407// Fix up some addresses that can't be used directly. For example, if
408// an offset won't fit in an instruction field, we may need to move it
409// into an index register.
410void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
411 Register &IndexReg) {
412
413 // Check whether the offset fits in the instruction field.
414 if (!isInt<16>(x: Addr.Offset))
415 UseOffset = false;
416
417 // If this is a stack pointer and the offset needs to be simplified then
418 // put the alloca address into a register, set the base type back to
419 // register and continue. This should almost never happen.
420 if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
421 Register ResultReg = createResultReg(RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
422 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDI8),
423 DestReg: ResultReg).addFrameIndex(Idx: Addr.Base.FI).addImm(Val: 0);
424 Addr.Base.Reg = ResultReg;
425 Addr.BaseType = Address::RegBase;
426 }
427
428 if (!UseOffset) {
429 IntegerType *OffsetTy = Type::getInt64Ty(C&: *Context);
430 const ConstantInt *Offset = ConstantInt::getSigned(Ty: OffsetTy, V: Addr.Offset);
431 IndexReg = PPCMaterializeInt(CI: Offset, VT: MVT::i64);
432 assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
433 }
434}
435
436// Emit a load instruction if possible, returning true if we succeeded,
437// otherwise false. See commentary below for how the register class of
438// the load is determined.
439bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
440 const TargetRegisterClass *RC,
441 bool IsZExt, unsigned FP64LoadOpc) {
442 unsigned Opc;
443 bool UseOffset = true;
444 bool HasSPE = Subtarget->hasSPE();
445
446 // If ResultReg is given, it determines the register class of the load.
447 // Otherwise, RC is the register class to use. If the result of the
448 // load isn't anticipated in this block, both may be zero, in which
449 // case we must make a conservative guess. In particular, don't assign
450 // R0 or X0 to the result register, as the result may be used in a load,
451 // store, add-immediate, or isel that won't permit this. (Though
452 // perhaps the spill and reload of live-exit values would handle this?)
453 const TargetRegisterClass *UseRC =
454 (ResultReg ? MRI.getRegClass(Reg: ResultReg) :
455 (RC ? RC :
456 (VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) :
457 (VT == MVT::f32 ? (HasSPE ? &PPC::GPRCRegClass : &PPC::F4RCRegClass) :
458 (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
459 &PPC::GPRC_and_GPRC_NOR0RegClass)))));
460
461 bool Is32BitInt = UseRC->hasSuperClassEq(RC: &PPC::GPRCRegClass);
462
463 switch (VT.SimpleTy) {
464 default: // e.g., vector types not handled
465 return false;
466 case MVT::i8:
467 Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
468 break;
469 case MVT::i16:
470 Opc = (IsZExt ? (Is32BitInt ? PPC::LHZ : PPC::LHZ8)
471 : (Is32BitInt ? PPC::LHA : PPC::LHA8));
472 break;
473 case MVT::i32:
474 Opc = (IsZExt ? (Is32BitInt ? PPC::LWZ : PPC::LWZ8)
475 : (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
476 if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
477 UseOffset = false;
478 break;
479 case MVT::i64:
480 Opc = PPC::LD;
481 assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
482 "64-bit load with 32-bit target??");
483 UseOffset = ((Addr.Offset & 3) == 0);
484 break;
485 case MVT::f32:
486 Opc = Subtarget->hasSPE() ? PPC::SPELWZ : PPC::LFS;
487 break;
488 case MVT::f64:
489 Opc = FP64LoadOpc;
490 break;
491 }
492
493 // If necessary, materialize the offset into a register and use
494 // the indexed form. Also handle stack pointers with special needs.
495 Register IndexReg;
496 PPCSimplifyAddress(Addr, UseOffset, IndexReg);
497
498 // If this is a potential VSX load with an offset of 0, a VSX indexed load can
499 // be used.
500 bool IsVSSRC = isVSSRCRegClass(RC: UseRC);
501 bool IsVSFRC = isVSFRCRegClass(RC: UseRC);
502 bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
503 bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD;
504 if ((Is32VSXLoad || Is64VSXLoad) &&
505 (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
506 (Addr.Offset == 0)) {
507 UseOffset = false;
508 }
509
510 if (!ResultReg)
511 ResultReg = createResultReg(RC: UseRC);
512
513 // Note: If we still have a frame index here, we know the offset is
514 // in range, as otherwise PPCSimplifyAddress would have converted it
515 // into a RegBase.
516 if (Addr.BaseType == Address::FrameIndexBase) {
517 // VSX only provides an indexed load.
518 if (Is32VSXLoad || Is64VSXLoad) return false;
519
520 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
521 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *FuncInfo.MF, FI: Addr.Base.FI,
522 Offset: Addr.Offset),
523 F: MachineMemOperand::MOLoad, Size: MFI.getObjectSize(ObjectIdx: Addr.Base.FI),
524 BaseAlignment: MFI.getObjectAlign(ObjectIdx: Addr.Base.FI));
525
526 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
527 .addImm(Val: Addr.Offset).addFrameIndex(Idx: Addr.Base.FI).addMemOperand(MMO);
528
529 // Base reg with offset in range.
530 } else if (UseOffset) {
531 // VSX only provides an indexed load.
532 if (Is32VSXLoad || Is64VSXLoad) return false;
533
534 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
535 .addImm(Val: Addr.Offset).addReg(RegNo: Addr.Base.Reg);
536
537 // Indexed form.
538 } else {
539 // Get the RR opcode corresponding to the RI one. FIXME: It would be
540 // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
541 // is hard to get at.
542 switch (Opc) {
543 default: llvm_unreachable("Unexpected opcode!");
544 case PPC::LBZ: Opc = PPC::LBZX; break;
545 case PPC::LBZ8: Opc = PPC::LBZX8; break;
546 case PPC::LHZ: Opc = PPC::LHZX; break;
547 case PPC::LHZ8: Opc = PPC::LHZX8; break;
548 case PPC::LHA: Opc = PPC::LHAX; break;
549 case PPC::LHA8: Opc = PPC::LHAX8; break;
550 case PPC::LWZ: Opc = PPC::LWZX; break;
551 case PPC::LWZ8: Opc = PPC::LWZX8; break;
552 case PPC::LWA: Opc = PPC::LWAX; break;
553 case PPC::LWA_32: Opc = PPC::LWAX_32; break;
554 case PPC::LD: Opc = PPC::LDX; break;
555 case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
556 case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
557 case PPC::EVLDD: Opc = PPC::EVLDDX; break;
558 case PPC::SPELWZ: Opc = PPC::SPELWZX; break;
559 }
560
561 auto MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc),
562 DestReg: ResultReg);
563
564 // If we have an index register defined we use it in the store inst,
565 // otherwise we use X0 as base as it makes the vector instructions to
566 // use zero in the computation of the effective address regardless the
567 // content of the register.
568 if (IndexReg)
569 MIB.addReg(RegNo: Addr.Base.Reg).addReg(RegNo: IndexReg);
570 else
571 MIB.addReg(RegNo: PPC::ZERO8).addReg(RegNo: Addr.Base.Reg);
572 }
573
574 return true;
575}
576
577// Attempt to fast-select a load instruction.
578bool PPCFastISel::SelectLoad(const Instruction *I) {
579 // FIXME: No atomic loads are supported.
580 if (cast<LoadInst>(Val: I)->isAtomic())
581 return false;
582
583 // Verify we have a legal type before going any further.
584 MVT VT;
585 if (!isLoadTypeLegal(Ty: I->getType(), VT))
586 return false;
587
588 // See if we can handle this address.
589 Address Addr;
590 if (!PPCComputeAddress(Obj: I->getOperand(i: 0), Addr))
591 return false;
592
593 // Look at the currently assigned register for this instruction
594 // to determine the required register class. This is necessary
595 // to constrain RA from using R0/X0 when this is not legal.
596 Register AssignedReg = FuncInfo.ValueMap[I];
597 const TargetRegisterClass *RC =
598 AssignedReg ? MRI.getRegClass(Reg: AssignedReg) : nullptr;
599
600 Register ResultReg = 0;
601 if (!PPCEmitLoad(VT, ResultReg, Addr, RC, IsZExt: true,
602 FP64LoadOpc: Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
603 return false;
604 updateValueMap(I, Reg: ResultReg);
605 return true;
606}
607
608// Emit a store instruction to store SrcReg at Addr.
609bool PPCFastISel::PPCEmitStore(MVT VT, Register SrcReg, Address &Addr) {
610 assert(SrcReg && "Nothing to store!");
611 unsigned Opc;
612 bool UseOffset = true;
613
614 const TargetRegisterClass *RC = MRI.getRegClass(Reg: SrcReg);
615 bool Is32BitInt = RC->hasSuperClassEq(RC: &PPC::GPRCRegClass);
616
617 switch (VT.SimpleTy) {
618 default: // e.g., vector types not handled
619 return false;
620 case MVT::i8:
621 Opc = Is32BitInt ? PPC::STB : PPC::STB8;
622 break;
623 case MVT::i16:
624 Opc = Is32BitInt ? PPC::STH : PPC::STH8;
625 break;
626 case MVT::i32:
627 assert(Is32BitInt && "Not GPRC for i32??");
628 Opc = PPC::STW;
629 break;
630 case MVT::i64:
631 Opc = PPC::STD;
632 UseOffset = ((Addr.Offset & 3) == 0);
633 break;
634 case MVT::f32:
635 Opc = Subtarget->hasSPE() ? PPC::SPESTW : PPC::STFS;
636 break;
637 case MVT::f64:
638 Opc = Subtarget->hasSPE() ? PPC::EVSTDD : PPC::STFD;
639 break;
640 }
641
642 // If necessary, materialize the offset into a register and use
643 // the indexed form. Also handle stack pointers with special needs.
644 Register IndexReg;
645 PPCSimplifyAddress(Addr, UseOffset, IndexReg);
646
647 // If this is a potential VSX store with an offset of 0, a VSX indexed store
648 // can be used.
649 bool IsVSSRC = isVSSRCRegClass(RC);
650 bool IsVSFRC = isVSFRCRegClass(RC);
651 bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
652 bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
653 if ((Is32VSXStore || Is64VSXStore) &&
654 (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
655 (Addr.Offset == 0)) {
656 UseOffset = false;
657 }
658
659 // Note: If we still have a frame index here, we know the offset is
660 // in range, as otherwise PPCSimplifyAddress would have converted it
661 // into a RegBase.
662 if (Addr.BaseType == Address::FrameIndexBase) {
663 // VSX only provides an indexed store.
664 if (Is32VSXStore || Is64VSXStore) return false;
665
666 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
667 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *FuncInfo.MF, FI: Addr.Base.FI,
668 Offset: Addr.Offset),
669 F: MachineMemOperand::MOStore, Size: MFI.getObjectSize(ObjectIdx: Addr.Base.FI),
670 BaseAlignment: MFI.getObjectAlign(ObjectIdx: Addr.Base.FI));
671
672 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
673 .addReg(RegNo: SrcReg)
674 .addImm(Val: Addr.Offset)
675 .addFrameIndex(Idx: Addr.Base.FI)
676 .addMemOperand(MMO);
677
678 // Base reg with offset in range.
679 } else if (UseOffset) {
680 // VSX only provides an indexed store.
681 if (Is32VSXStore || Is64VSXStore)
682 return false;
683
684 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
685 .addReg(RegNo: SrcReg).addImm(Val: Addr.Offset).addReg(RegNo: Addr.Base.Reg);
686
687 // Indexed form.
688 } else {
689 // Get the RR opcode corresponding to the RI one. FIXME: It would be
690 // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
691 // is hard to get at.
692 switch (Opc) {
693 default: llvm_unreachable("Unexpected opcode!");
694 case PPC::STB: Opc = PPC::STBX; break;
695 case PPC::STH : Opc = PPC::STHX; break;
696 case PPC::STW : Opc = PPC::STWX; break;
697 case PPC::STB8: Opc = PPC::STBX8; break;
698 case PPC::STH8: Opc = PPC::STHX8; break;
699 case PPC::STW8: Opc = PPC::STWX8; break;
700 case PPC::STD: Opc = PPC::STDX; break;
701 case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
702 case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
703 case PPC::EVSTDD: Opc = PPC::EVSTDDX; break;
704 case PPC::SPESTW: Opc = PPC::SPESTWX; break;
705 }
706
707 auto MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
708 .addReg(RegNo: SrcReg);
709
710 // If we have an index register defined we use it in the store inst,
711 // otherwise we use X0 as base as it makes the vector instructions to
712 // use zero in the computation of the effective address regardless the
713 // content of the register.
714 if (IndexReg)
715 MIB.addReg(RegNo: Addr.Base.Reg).addReg(RegNo: IndexReg);
716 else
717 MIB.addReg(RegNo: PPC::ZERO8).addReg(RegNo: Addr.Base.Reg);
718 }
719
720 return true;
721}
722
723// Attempt to fast-select a store instruction.
724bool PPCFastISel::SelectStore(const Instruction *I) {
725 Value *Op0 = I->getOperand(i: 0);
726 Register SrcReg;
727
728 // FIXME: No atomics loads are supported.
729 if (cast<StoreInst>(Val: I)->isAtomic())
730 return false;
731
732 // Verify we have a legal type before going any further.
733 MVT VT;
734 if (!isLoadTypeLegal(Ty: Op0->getType(), VT))
735 return false;
736
737 // Get the value to be stored into a register.
738 SrcReg = getRegForValue(V: Op0);
739 if (!SrcReg)
740 return false;
741
742 // See if we can handle this address.
743 Address Addr;
744 if (!PPCComputeAddress(Obj: I->getOperand(i: 1), Addr))
745 return false;
746
747 if (!PPCEmitStore(VT, SrcReg, Addr))
748 return false;
749
750 return true;
751}
752
753// Attempt to fast-select a branch instruction.
754bool PPCFastISel::SelectBranch(const Instruction *I) {
755 const BranchInst *BI = cast<BranchInst>(Val: I);
756 MachineBasicBlock *BrBB = FuncInfo.MBB;
757 MachineBasicBlock *TBB = FuncInfo.getMBB(BB: BI->getSuccessor(i: 0));
758 MachineBasicBlock *FBB = FuncInfo.getMBB(BB: BI->getSuccessor(i: 1));
759
760 // For now, just try the simplest case where it's fed by a compare.
761 if (const CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition())) {
762 if (isValueAvailable(V: CI)) {
763 std::optional<PPC::Predicate> OptPPCPred =
764 getComparePred(Pred: CI->getPredicate());
765 if (!OptPPCPred)
766 return false;
767
768 PPC::Predicate PPCPred = *OptPPCPred;
769
770 // Take advantage of fall-through opportunities.
771 if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
772 std::swap(a&: TBB, b&: FBB);
773 PPCPred = PPC::InvertPredicate(Opcode: PPCPred);
774 }
775
776 Register CondReg = createResultReg(RC: &PPC::CRRCRegClass);
777
778 if (!PPCEmitCmp(Src1Value: CI->getOperand(i_nocapture: 0), Src2Value: CI->getOperand(i_nocapture: 1), isZExt: CI->isUnsigned(),
779 DestReg: CondReg, Pred: PPCPred))
780 return false;
781
782 BuildMI(BB&: *BrBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::BCC))
783 .addImm(Val: Subtarget->hasSPE() ? PPC::PRED_SPE : PPCPred)
784 .addReg(RegNo: CondReg)
785 .addMBB(MBB: TBB);
786 finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
787 return true;
788 }
789 } else if (const ConstantInt *CI =
790 dyn_cast<ConstantInt>(Val: BI->getCondition())) {
791 uint64_t Imm = CI->getZExtValue();
792 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
793 fastEmitBranch(MSucc: Target, DbgLoc: MIMD.getDL());
794 return true;
795 }
796
797 // FIXME: ARM looks for a case where the block containing the compare
798 // has been split from the block containing the branch. If this happens,
799 // there is a vreg available containing the result of the compare. I'm
800 // not sure we can do much, as we've lost the predicate information with
801 // the compare instruction -- we have a 4-bit CR but don't know which bit
802 // to test here.
803 return false;
804}
805
806// Attempt to emit a compare of the two source values. Signed and unsigned
807// comparisons are supported. Return false if we can't handle it.
808bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
809 bool IsZExt, Register DestReg,
810 const PPC::Predicate Pred) {
811 Type *Ty = SrcValue1->getType();
812 EVT SrcEVT = TLI.getValueType(DL, Ty, AllowUnknown: true);
813 if (!SrcEVT.isSimple())
814 return false;
815 MVT SrcVT = SrcEVT.getSimpleVT();
816
817 if (SrcVT == MVT::i1 && Subtarget->useCRBits())
818 return false;
819
820 // See if operand 2 is an immediate encodeable in the compare.
821 // FIXME: Operands are not in canonical order at -O0, so an immediate
822 // operand in position 1 is a lost opportunity for now. We are
823 // similar to ARM in this regard.
824 int64_t Imm = 0;
825 bool UseImm = false;
826 const bool HasSPE = Subtarget->hasSPE();
827
828 // Only 16-bit integer constants can be represented in compares for
829 // PowerPC. Others will be materialized into a register.
830 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Val: SrcValue2)) {
831 if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
832 SrcVT == MVT::i8 || SrcVT == MVT::i1) {
833 const APInt &CIVal = ConstInt->getValue();
834 Imm = (IsZExt) ? (int64_t)CIVal.getZExtValue() :
835 (int64_t)CIVal.getSExtValue();
836 if ((IsZExt && isUInt<16>(x: Imm)) || (!IsZExt && isInt<16>(x: Imm)))
837 UseImm = true;
838 }
839 }
840
841 Register SrcReg1 = getRegForValue(V: SrcValue1);
842 if (!SrcReg1)
843 return false;
844
845 Register SrcReg2;
846 if (!UseImm) {
847 SrcReg2 = getRegForValue(V: SrcValue2);
848 if (!SrcReg2)
849 return false;
850 }
851
852 unsigned CmpOpc;
853 bool NeedsExt = false;
854
855 auto RC1 = MRI.getRegClass(Reg: SrcReg1);
856 auto RC2 = SrcReg2 != 0 ? MRI.getRegClass(Reg: SrcReg2) : nullptr;
857
858 switch (SrcVT.SimpleTy) {
859 default: return false;
860 case MVT::f32:
861 if (HasSPE) {
862 switch (Pred) {
863 default: return false;
864 case PPC::PRED_EQ:
865 CmpOpc = PPC::EFSCMPEQ;
866 break;
867 case PPC::PRED_LT:
868 CmpOpc = PPC::EFSCMPLT;
869 break;
870 case PPC::PRED_GT:
871 CmpOpc = PPC::EFSCMPGT;
872 break;
873 }
874 } else {
875 CmpOpc = PPC::FCMPUS;
876 if (isVSSRCRegClass(RC: RC1))
877 SrcReg1 = copyRegToRegClass(ToRC: &PPC::F4RCRegClass, SrcReg: SrcReg1);
878 if (RC2 && isVSSRCRegClass(RC: RC2))
879 SrcReg2 = copyRegToRegClass(ToRC: &PPC::F4RCRegClass, SrcReg: SrcReg2);
880 }
881 break;
882 case MVT::f64:
883 if (HasSPE) {
884 switch (Pred) {
885 default: return false;
886 case PPC::PRED_EQ:
887 CmpOpc = PPC::EFDCMPEQ;
888 break;
889 case PPC::PRED_LT:
890 CmpOpc = PPC::EFDCMPLT;
891 break;
892 case PPC::PRED_GT:
893 CmpOpc = PPC::EFDCMPGT;
894 break;
895 }
896 } else if (isVSFRCRegClass(RC: RC1) || (RC2 && isVSFRCRegClass(RC: RC2))) {
897 CmpOpc = PPC::XSCMPUDP;
898 } else {
899 CmpOpc = PPC::FCMPUD;
900 }
901 break;
902 case MVT::i1:
903 case MVT::i8:
904 case MVT::i16:
905 NeedsExt = true;
906 [[fallthrough]];
907 case MVT::i32:
908 if (!UseImm)
909 CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
910 else
911 CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
912 break;
913 case MVT::i64:
914 if (!UseImm)
915 CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
916 else
917 CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
918 break;
919 }
920
921 if (NeedsExt) {
922 Register ExtReg = createResultReg(RC: &PPC::GPRCRegClass);
923 if (!PPCEmitIntExt(SrcVT, SrcReg: SrcReg1, DestVT: MVT::i32, DestReg: ExtReg, IsZExt))
924 return false;
925 SrcReg1 = ExtReg;
926
927 if (!UseImm) {
928 Register ExtReg = createResultReg(RC: &PPC::GPRCRegClass);
929 if (!PPCEmitIntExt(SrcVT, SrcReg: SrcReg2, DestVT: MVT::i32, DestReg: ExtReg, IsZExt))
930 return false;
931 SrcReg2 = ExtReg;
932 }
933 }
934
935 if (!UseImm)
936 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: CmpOpc), DestReg)
937 .addReg(RegNo: SrcReg1).addReg(RegNo: SrcReg2);
938 else
939 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: CmpOpc), DestReg)
940 .addReg(RegNo: SrcReg1).addImm(Val: Imm);
941
942 return true;
943}
944
945// Attempt to fast-select a floating-point extend instruction.
946bool PPCFastISel::SelectFPExt(const Instruction *I) {
947 Value *Src = I->getOperand(i: 0);
948 EVT SrcVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
949 EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
950
951 if (SrcVT != MVT::f32 || DestVT != MVT::f64)
952 return false;
953
954 Register SrcReg = getRegForValue(V: Src);
955 if (!SrcReg)
956 return false;
957
958 // No code is generated for a FP extend.
959 updateValueMap(I, Reg: SrcReg);
960 return true;
961}
962
963// Attempt to fast-select a floating-point truncate instruction.
964bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
965 Value *Src = I->getOperand(i: 0);
966 EVT SrcVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
967 EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
968
969 if (SrcVT != MVT::f64 || DestVT != MVT::f32)
970 return false;
971
972 Register SrcReg = getRegForValue(V: Src);
973 if (!SrcReg)
974 return false;
975
976 // Round the result to single precision.
977 Register DestReg;
978 auto RC = MRI.getRegClass(Reg: SrcReg);
979 if (Subtarget->hasSPE()) {
980 DestReg = createResultReg(RC: &PPC::GPRCRegClass);
981 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::EFSCFD),
982 DestReg)
983 .addReg(RegNo: SrcReg);
984 } else if (Subtarget->hasP8Vector() && isVSFRCRegClass(RC)) {
985 DestReg = createResultReg(RC: &PPC::VSSRCRegClass);
986 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::XSRSP),
987 DestReg)
988 .addReg(RegNo: SrcReg);
989 } else {
990 SrcReg = copyRegToRegClass(ToRC: &PPC::F8RCRegClass, SrcReg);
991 DestReg = createResultReg(RC: &PPC::F4RCRegClass);
992 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
993 MCID: TII.get(Opcode: PPC::FRSP), DestReg)
994 .addReg(RegNo: SrcReg);
995 }
996
997 updateValueMap(I, Reg: DestReg);
998 return true;
999}
1000
1001// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
1002// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1003// those should be used instead of moving via a stack slot when the
1004// subtarget permits.
1005// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
1006// stack slot and 4-byte store/load sequence. Or just sext the 4-byte
1007// case to 8 bytes which produces tighter code but wastes stack space.
1008Register PPCFastISel::PPCMoveToFPReg(MVT SrcVT, Register SrcReg,
1009 bool IsSigned) {
1010
1011 // If necessary, extend 32-bit int to 64-bit.
1012 if (SrcVT == MVT::i32) {
1013 Register TmpReg = createResultReg(RC: &PPC::G8RCRegClass);
1014 if (!PPCEmitIntExt(SrcVT: MVT::i32, SrcReg, DestVT: MVT::i64, DestReg: TmpReg, IsZExt: !IsSigned))
1015 return Register();
1016 SrcReg = TmpReg;
1017 }
1018
1019 // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1020 Address Addr;
1021 Addr.BaseType = Address::FrameIndexBase;
1022 Addr.Base.FI = MFI.CreateStackObject(Size: 8, Alignment: Align(8), isSpillSlot: false);
1023
1024 // Store the value from the GPR.
1025 if (!PPCEmitStore(VT: MVT::i64, SrcReg, Addr))
1026 return Register();
1027
1028 // Load the integer value into an FPR. The kind of load used depends
1029 // on a number of conditions.
1030 unsigned LoadOpc = PPC::LFD;
1031
1032 if (SrcVT == MVT::i32) {
1033 if (!IsSigned) {
1034 LoadOpc = PPC::LFIWZX;
1035 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1036 } else if (Subtarget->hasLFIWAX()) {
1037 LoadOpc = PPC::LFIWAX;
1038 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1039 }
1040 }
1041
1042 const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1043 Register ResultReg;
1044 if (!PPCEmitLoad(VT: MVT::f64, ResultReg, Addr, RC, IsZExt: !IsSigned, FP64LoadOpc: LoadOpc))
1045 return Register();
1046
1047 return ResultReg;
1048}
1049
1050// Attempt to fast-select an integer-to-floating-point conversion.
1051// FIXME: Once fast-isel has better support for VSX, conversions using
1052// direct moves should be implemented.
1053bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
1054 MVT DstVT;
1055 Type *DstTy = I->getType();
1056 if (!isTypeLegal(Ty: DstTy, VT&: DstVT))
1057 return false;
1058
1059 if (DstVT != MVT::f32 && DstVT != MVT::f64)
1060 return false;
1061
1062 Value *Src = I->getOperand(i: 0);
1063 EVT SrcEVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
1064 if (!SrcEVT.isSimple())
1065 return false;
1066
1067 MVT SrcVT = SrcEVT.getSimpleVT();
1068
1069 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
1070 SrcVT != MVT::i32 && SrcVT != MVT::i64)
1071 return false;
1072
1073 Register SrcReg = getRegForValue(V: Src);
1074 if (!SrcReg)
1075 return false;
1076
1077 // Shortcut for SPE. Doesn't need to store/load, since it's all in the GPRs
1078 if (Subtarget->hasSPE()) {
1079 unsigned Opc;
1080 if (DstVT == MVT::f32)
1081 Opc = IsSigned ? PPC::EFSCFSI : PPC::EFSCFUI;
1082 else
1083 Opc = IsSigned ? PPC::EFDCFSI : PPC::EFDCFUI;
1084
1085 Register DestReg = createResultReg(RC: &PPC::SPERCRegClass);
1086 // Generate the convert.
1087 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1088 .addReg(RegNo: SrcReg);
1089 updateValueMap(I, Reg: DestReg);
1090 return true;
1091 }
1092
1093 // We can only lower an unsigned convert if we have the newer
1094 // floating-point conversion operations.
1095 if (!IsSigned && !Subtarget->hasFPCVT())
1096 return false;
1097
1098 // FIXME: For now we require the newer floating-point conversion operations
1099 // (which are present only on P7 and A2 server models) when converting
1100 // to single-precision float. Otherwise we have to generate a lot of
1101 // fiddly code to avoid double rounding. If necessary, the fiddly code
1102 // can be found in PPCTargetLowering::LowerINT_TO_FP().
1103 if (DstVT == MVT::f32 && !Subtarget->hasFPCVT())
1104 return false;
1105
1106 // Extend the input if necessary.
1107 if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
1108 Register TmpReg = createResultReg(RC: &PPC::G8RCRegClass);
1109 if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT: MVT::i64, DestReg: TmpReg, IsZExt: !IsSigned))
1110 return false;
1111 SrcVT = MVT::i64;
1112 SrcReg = TmpReg;
1113 }
1114
1115 // Move the integer value to an FPR.
1116 Register FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
1117 if (!FPReg)
1118 return false;
1119
1120 // Determine the opcode for the conversion.
1121 const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1122 Register DestReg = createResultReg(RC);
1123 unsigned Opc;
1124
1125 if (DstVT == MVT::f32)
1126 Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
1127 else
1128 Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
1129
1130 // Generate the convert.
1131 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1132 .addReg(RegNo: FPReg);
1133
1134 updateValueMap(I, Reg: DestReg);
1135 return true;
1136}
1137
1138// Move the floating-point value in SrcReg into an integer destination
1139// register, and return the register (or zero if we can't handle it).
1140// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1141// those should be used instead of moving via a stack slot when the
1142// subtarget permits.
1143Register PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
1144 Register SrcReg, bool IsSigned) {
1145 // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1146 // Note that if have STFIWX available, we could use a 4-byte stack
1147 // slot for i32, but this being fast-isel we'll just go with the
1148 // easiest code gen possible.
1149 Address Addr;
1150 Addr.BaseType = Address::FrameIndexBase;
1151 Addr.Base.FI = MFI.CreateStackObject(Size: 8, Alignment: Align(8), isSpillSlot: false);
1152
1153 // Store the value from the FPR.
1154 if (!PPCEmitStore(VT: MVT::f64, SrcReg, Addr))
1155 return Register();
1156
1157 // Reload it into a GPR. If we want an i32 on big endian, modify the
1158 // address to have a 4-byte offset so we load from the right place.
1159 if (VT == MVT::i32)
1160 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1161
1162 // Look at the currently assigned register for this instruction
1163 // to determine the required register class.
1164 Register AssignedReg = FuncInfo.ValueMap[I];
1165 const TargetRegisterClass *RC =
1166 AssignedReg ? MRI.getRegClass(Reg: AssignedReg) : nullptr;
1167
1168 Register ResultReg;
1169 if (!PPCEmitLoad(VT, ResultReg, Addr, RC, IsZExt: !IsSigned))
1170 return Register();
1171
1172 return ResultReg;
1173}
1174
1175// Attempt to fast-select a floating-point-to-integer conversion.
1176// FIXME: Once fast-isel has better support for VSX, conversions using
1177// direct moves should be implemented.
1178bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
1179 MVT DstVT, SrcVT;
1180 Type *DstTy = I->getType();
1181 if (!isTypeLegal(Ty: DstTy, VT&: DstVT))
1182 return false;
1183
1184 if (DstVT != MVT::i32 && DstVT != MVT::i64)
1185 return false;
1186
1187 // If we don't have FCTIDUZ, or SPE, and we need it, punt to SelectionDAG.
1188 if (DstVT == MVT::i64 && !IsSigned && !Subtarget->hasFPCVT() &&
1189 !Subtarget->hasSPE())
1190 return false;
1191
1192 Value *Src = I->getOperand(i: 0);
1193 Type *SrcTy = Src->getType();
1194 if (!isTypeLegal(Ty: SrcTy, VT&: SrcVT))
1195 return false;
1196
1197 if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
1198 return false;
1199
1200 Register SrcReg = getRegForValue(V: Src);
1201 if (!SrcReg)
1202 return false;
1203
1204 // Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a
1205 // meaningless copy to get the register class right.
1206 const TargetRegisterClass *InRC = MRI.getRegClass(Reg: SrcReg);
1207 if (InRC == &PPC::F4RCRegClass)
1208 SrcReg = copyRegToRegClass(ToRC: &PPC::F8RCRegClass, SrcReg);
1209 else if (InRC == &PPC::VSSRCRegClass)
1210 SrcReg = copyRegToRegClass(ToRC: &PPC::VSFRCRegClass, SrcReg);
1211
1212 // Determine the opcode for the conversion, which takes place
1213 // entirely within FPRs or VSRs.
1214 Register DestReg;
1215 unsigned Opc;
1216 auto RC = MRI.getRegClass(Reg: SrcReg);
1217
1218 if (Subtarget->hasSPE()) {
1219 DestReg = createResultReg(RC: &PPC::GPRCRegClass);
1220 if (IsSigned)
1221 Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
1222 else
1223 Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
1224 } else if (isVSFRCRegClass(RC)) {
1225 DestReg = createResultReg(RC: &PPC::VSFRCRegClass);
1226 if (DstVT == MVT::i32)
1227 Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
1228 else
1229 Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
1230 } else {
1231 DestReg = createResultReg(RC: &PPC::F8RCRegClass);
1232 if (DstVT == MVT::i32)
1233 if (IsSigned)
1234 Opc = PPC::FCTIWZ;
1235 else
1236 Opc = Subtarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
1237 else
1238 Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
1239 }
1240
1241 // Generate the convert.
1242 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1243 .addReg(RegNo: SrcReg);
1244
1245 // Now move the integer value from a float register to an integer register.
1246 Register IntReg = Subtarget->hasSPE()
1247 ? DestReg
1248 : PPCMoveToIntReg(I, VT: DstVT, SrcReg: DestReg, IsSigned);
1249
1250 if (!IntReg)
1251 return false;
1252
1253 updateValueMap(I, Reg: IntReg);
1254 return true;
1255}
1256
1257// Attempt to fast-select a binary integer operation that isn't already
1258// handled automatically.
1259bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1260 EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
1261
1262 // We can get here in the case when we have a binary operation on a non-legal
1263 // type and the target independent selector doesn't know how to handle it.
1264 if (DestVT != MVT::i16 && DestVT != MVT::i8)
1265 return false;
1266
1267 // Look at the currently assigned register for this instruction
1268 // to determine the required register class. If there is no register,
1269 // make a conservative choice (don't assign R0).
1270 Register AssignedReg = FuncInfo.ValueMap[I];
1271 const TargetRegisterClass *RC =
1272 (AssignedReg ? MRI.getRegClass(Reg: AssignedReg) :
1273 &PPC::GPRC_and_GPRC_NOR0RegClass);
1274 bool IsGPRC = RC->hasSuperClassEq(RC: &PPC::GPRCRegClass);
1275
1276 unsigned Opc;
1277 switch (ISDOpcode) {
1278 default: return false;
1279 case ISD::ADD:
1280 Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
1281 break;
1282 case ISD::OR:
1283 Opc = IsGPRC ? PPC::OR : PPC::OR8;
1284 break;
1285 case ISD::SUB:
1286 Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
1287 break;
1288 }
1289
1290 Register ResultReg = createResultReg(RC: RC ? RC : &PPC::G8RCRegClass);
1291 Register SrcReg1 = getRegForValue(V: I->getOperand(i: 0));
1292 if (!SrcReg1)
1293 return false;
1294
1295 // Handle case of small immediate operand.
1296 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Val: I->getOperand(i: 1))) {
1297 const APInt &CIVal = ConstInt->getValue();
1298 int Imm = (int)CIVal.getSExtValue();
1299 bool UseImm = true;
1300 if (isInt<16>(x: Imm)) {
1301 switch (Opc) {
1302 default:
1303 llvm_unreachable("Missing case!");
1304 case PPC::ADD4:
1305 Opc = PPC::ADDI;
1306 MRI.setRegClass(Reg: SrcReg1, RC: &PPC::GPRC_and_GPRC_NOR0RegClass);
1307 break;
1308 case PPC::ADD8:
1309 Opc = PPC::ADDI8;
1310 MRI.setRegClass(Reg: SrcReg1, RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
1311 break;
1312 case PPC::OR:
1313 Opc = PPC::ORI;
1314 break;
1315 case PPC::OR8:
1316 Opc = PPC::ORI8;
1317 break;
1318 case PPC::SUBF:
1319 if (Imm == -32768)
1320 UseImm = false;
1321 else {
1322 Opc = PPC::ADDI;
1323 MRI.setRegClass(Reg: SrcReg1, RC: &PPC::GPRC_and_GPRC_NOR0RegClass);
1324 Imm = -Imm;
1325 }
1326 break;
1327 case PPC::SUBF8:
1328 if (Imm == -32768)
1329 UseImm = false;
1330 else {
1331 Opc = PPC::ADDI8;
1332 MRI.setRegClass(Reg: SrcReg1, RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
1333 Imm = -Imm;
1334 }
1335 break;
1336 }
1337
1338 if (UseImm) {
1339 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc),
1340 DestReg: ResultReg)
1341 .addReg(RegNo: SrcReg1)
1342 .addImm(Val: Imm);
1343 updateValueMap(I, Reg: ResultReg);
1344 return true;
1345 }
1346 }
1347 }
1348
1349 // Reg-reg case.
1350 Register SrcReg2 = getRegForValue(V: I->getOperand(i: 1));
1351 if (!SrcReg2)
1352 return false;
1353
1354 // Reverse operands for subtract-from.
1355 if (ISDOpcode == ISD::SUB)
1356 std::swap(a&: SrcReg1, b&: SrcReg2);
1357
1358 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
1359 .addReg(RegNo: SrcReg1).addReg(RegNo: SrcReg2);
1360 updateValueMap(I, Reg: ResultReg);
1361 return true;
1362}
1363
1364// Handle arguments to a call that we're attempting to fast-select.
1365// Return false if the arguments are too complex for us at the moment.
1366bool PPCFastISel::processCallArgs(SmallVectorImpl<Value *> &Args,
1367 SmallVectorImpl<Register> &ArgRegs,
1368 SmallVectorImpl<MVT> &ArgVTs,
1369 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1370 SmallVectorImpl<unsigned> &RegArgs,
1371 CallingConv::ID CC, unsigned &NumBytes,
1372 bool IsVarArg) {
1373 SmallVector<CCValAssign, 16> ArgLocs;
1374 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
1375
1376 // Reserve space for the linkage area on the stack.
1377 unsigned LinkageSize = Subtarget->getFrameLowering()->getLinkageSize();
1378 CCInfo.AllocateStack(Size: LinkageSize, Alignment: Align(8));
1379
1380 SmallVector<Type *, 16> ArgTys;
1381 for (Value *Arg : Args)
1382 ArgTys.push_back(Elt: Arg->getType());
1383 CCInfo.AnalyzeCallOperands(ArgVTs, Flags&: ArgFlags, OrigTys&: ArgTys, Fn: CC_PPC64_ELF_FIS);
1384
1385 // Bail out if we can't handle any of the arguments.
1386 for (const CCValAssign &VA : ArgLocs) {
1387 MVT ArgVT = ArgVTs[VA.getValNo()];
1388
1389 // Skip vector arguments for now, as well as long double and
1390 // uint128_t, and anything that isn't passed in a register.
1391 if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
1392 !VA.isRegLoc() || VA.needsCustom())
1393 return false;
1394
1395 // Skip bit-converted arguments for now.
1396 if (VA.getLocInfo() == CCValAssign::BCvt)
1397 return false;
1398 }
1399
1400 // Get a count of how many bytes are to be pushed onto the stack.
1401 NumBytes = CCInfo.getStackSize();
1402
1403 // The prolog code of the callee may store up to 8 GPR argument registers to
1404 // the stack, allowing va_start to index over them in memory if its varargs.
1405 // Because we cannot tell if this is needed on the caller side, we have to
1406 // conservatively assume that it is needed. As such, make sure we have at
1407 // least enough stack space for the caller to store the 8 GPRs.
1408 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
1409 NumBytes = std::max(a: NumBytes, b: LinkageSize + 64);
1410
1411 // Issue CALLSEQ_START.
1412 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1413 MCID: TII.get(Opcode: TII.getCallFrameSetupOpcode()))
1414 .addImm(Val: NumBytes).addImm(Val: 0);
1415
1416 // Prepare to assign register arguments. Every argument uses up a
1417 // GPR protocol register even if it's passed in a floating-point
1418 // register (unless we're using the fast calling convention).
1419 unsigned NextGPR = PPC::X3;
1420 unsigned NextFPR = PPC::F1;
1421
1422 // Process arguments.
1423 for (const CCValAssign &VA : ArgLocs) {
1424 Register Arg = ArgRegs[VA.getValNo()];
1425 MVT ArgVT = ArgVTs[VA.getValNo()];
1426
1427 // Handle argument promotion and bitcasts.
1428 switch (VA.getLocInfo()) {
1429 default:
1430 llvm_unreachable("Unknown loc info!");
1431 case CCValAssign::Full:
1432 break;
1433 case CCValAssign::SExt: {
1434 MVT DestVT = VA.getLocVT();
1435 const TargetRegisterClass *RC =
1436 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1437 Register TmpReg = createResultReg(RC);
1438 if (!PPCEmitIntExt(SrcVT: ArgVT, SrcReg: Arg, DestVT, DestReg: TmpReg, /*IsZExt*/false))
1439 llvm_unreachable("Failed to emit a sext!");
1440 ArgVT = DestVT;
1441 Arg = TmpReg;
1442 break;
1443 }
1444 case CCValAssign::AExt:
1445 case CCValAssign::ZExt: {
1446 MVT DestVT = VA.getLocVT();
1447 const TargetRegisterClass *RC =
1448 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1449 Register TmpReg = createResultReg(RC);
1450 if (!PPCEmitIntExt(SrcVT: ArgVT, SrcReg: Arg, DestVT, DestReg: TmpReg, /*IsZExt*/true))
1451 llvm_unreachable("Failed to emit a zext!");
1452 ArgVT = DestVT;
1453 Arg = TmpReg;
1454 break;
1455 }
1456 case CCValAssign::BCvt: {
1457 // FIXME: Not yet handled.
1458 llvm_unreachable("Should have bailed before getting here!");
1459 break;
1460 }
1461 }
1462
1463 // Copy this argument to the appropriate register.
1464 unsigned ArgReg;
1465 if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
1466 ArgReg = NextFPR++;
1467 if (CC != CallingConv::Fast)
1468 ++NextGPR;
1469 } else
1470 ArgReg = NextGPR++;
1471
1472 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1473 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ArgReg).addReg(RegNo: Arg);
1474 RegArgs.push_back(Elt: ArgReg);
1475 }
1476
1477 return true;
1478}
1479
1480// For a call that we've determined we can fast-select, finish the
1481// call sequence and generate a copy to obtain the return value (if any).
1482bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
1483 CallingConv::ID CC = CLI.CallConv;
1484
1485 // Issue CallSEQ_END.
1486 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1487 MCID: TII.get(Opcode: TII.getCallFrameDestroyOpcode()))
1488 .addImm(Val: NumBytes).addImm(Val: 0);
1489
1490 // Next, generate a copy to obtain the return value.
1491 // FIXME: No multi-register return values yet, though I don't foresee
1492 // any real difficulties there.
1493 if (RetVT != MVT::isVoid) {
1494 SmallVector<CCValAssign, 16> RVLocs;
1495 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
1496 CCInfo.AnalyzeCallResult(VT: RetVT, OrigTy: CLI.RetTy, Fn: RetCC_PPC64_ELF_FIS);
1497 CCValAssign &VA = RVLocs[0];
1498 assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
1499 assert(VA.isRegLoc() && "Can only return in registers!");
1500
1501 MVT DestVT = VA.getValVT();
1502 MVT CopyVT = DestVT;
1503
1504 // Ints smaller than a register still arrive in a full 64-bit
1505 // register, so make sure we recognize this.
1506 if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
1507 CopyVT = MVT::i64;
1508
1509 Register SourcePhysReg = VA.getLocReg();
1510 Register ResultReg;
1511
1512 if (RetVT == CopyVT) {
1513 const TargetRegisterClass *CpyRC = TLI.getRegClassFor(VT: CopyVT);
1514 ResultReg = copyRegToRegClass(ToRC: CpyRC, SrcReg: SourcePhysReg);
1515
1516 // If necessary, round the floating result to single precision.
1517 } else if (CopyVT == MVT::f64) {
1518 ResultReg = createResultReg(RC: TLI.getRegClassFor(VT: RetVT));
1519 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::FRSP),
1520 DestReg: ResultReg).addReg(RegNo: SourcePhysReg);
1521
1522 // If only the low half of a general register is needed, generate
1523 // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
1524 // used along the fast-isel path (not lowered), and downstream logic
1525 // also doesn't like a direct subreg copy on a physical reg.)
1526 } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
1527 // Convert physical register from G8RC to GPRC.
1528 SourcePhysReg = (SourcePhysReg - PPC::X0) + PPC::R0;
1529 ResultReg = copyRegToRegClass(ToRC: &PPC::GPRCRegClass, SrcReg: SourcePhysReg);
1530 }
1531
1532 assert(ResultReg && "ResultReg unset!");
1533 CLI.InRegs.push_back(Elt: SourcePhysReg);
1534 CLI.ResultReg = ResultReg;
1535 CLI.NumResultRegs = 1;
1536 }
1537
1538 return true;
1539}
1540
1541bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
1542 CallingConv::ID CC = CLI.CallConv;
1543 bool IsTailCall = CLI.IsTailCall;
1544 bool IsVarArg = CLI.IsVarArg;
1545 const Value *Callee = CLI.Callee;
1546 const MCSymbol *Symbol = CLI.Symbol;
1547
1548 if (!Callee && !Symbol)
1549 return false;
1550
1551 // Allow SelectionDAG isel to handle tail calls and long calls.
1552 if (IsTailCall || Subtarget->useLongCalls())
1553 return false;
1554
1555 // Let SDISel handle vararg functions.
1556 if (IsVarArg)
1557 return false;
1558
1559 // If this is a PC-Rel function, let SDISel handle the call.
1560 if (Subtarget->isUsingPCRelativeCalls())
1561 return false;
1562
1563 // Handle simple calls for now, with legal return types and
1564 // those that can be extended.
1565 Type *RetTy = CLI.RetTy;
1566 MVT RetVT;
1567 if (RetTy->isVoidTy())
1568 RetVT = MVT::isVoid;
1569 else if (!isTypeLegal(Ty: RetTy, VT&: RetVT) && RetVT != MVT::i16 &&
1570 RetVT != MVT::i8)
1571 return false;
1572 else if (RetVT == MVT::i1 && Subtarget->useCRBits())
1573 // We can't handle boolean returns when CR bits are in use.
1574 return false;
1575
1576 // FIXME: No multi-register return values yet.
1577 if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
1578 RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
1579 RetVT != MVT::f64) {
1580 SmallVector<CCValAssign, 16> RVLocs;
1581 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
1582 CCInfo.AnalyzeCallResult(VT: RetVT, OrigTy: RetTy, Fn: RetCC_PPC64_ELF_FIS);
1583 if (RVLocs.size() > 1)
1584 return false;
1585 }
1586
1587 // Bail early if more than 8 arguments, as we only currently
1588 // handle arguments passed in registers.
1589 unsigned NumArgs = CLI.OutVals.size();
1590 if (NumArgs > 8)
1591 return false;
1592
1593 // Set up the argument vectors.
1594 SmallVector<Value*, 8> Args;
1595 SmallVector<Register, 8> ArgRegs;
1596 SmallVector<MVT, 8> ArgVTs;
1597 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
1598
1599 Args.reserve(N: NumArgs);
1600 ArgRegs.reserve(N: NumArgs);
1601 ArgVTs.reserve(N: NumArgs);
1602 ArgFlags.reserve(N: NumArgs);
1603
1604 for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
1605 // Only handle easy calls for now. It would be reasonably easy
1606 // to handle <= 8-byte structures passed ByVal in registers, but we
1607 // have to ensure they are right-justified in the register.
1608 ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
1609 if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
1610 return false;
1611
1612 Value *ArgValue = CLI.OutVals[i];
1613 Type *ArgTy = ArgValue->getType();
1614 MVT ArgVT;
1615 if (!isTypeLegal(Ty: ArgTy, VT&: ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
1616 return false;
1617
1618 // FIXME: FastISel cannot handle non-simple types yet, including 128-bit FP
1619 // types, which is passed through vector register. Skip these types and
1620 // fallback to default SelectionDAG based selection.
1621 if (ArgVT.isVector() || ArgVT == MVT::f128)
1622 return false;
1623
1624 Register Arg = getRegForValue(V: ArgValue);
1625 if (!Arg)
1626 return false;
1627
1628 Args.push_back(Elt: ArgValue);
1629 ArgRegs.push_back(Elt: Arg);
1630 ArgVTs.push_back(Elt: ArgVT);
1631 ArgFlags.push_back(Elt: Flags);
1632 }
1633
1634 // Process the arguments.
1635 SmallVector<unsigned, 8> RegArgs;
1636 unsigned NumBytes;
1637
1638 if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
1639 RegArgs, CC, NumBytes, IsVarArg))
1640 return false;
1641
1642 MachineInstrBuilder MIB;
1643 // FIXME: No handling for function pointers yet. This requires
1644 // implementing the function descriptor (OPD) setup.
1645 const GlobalValue *GV = dyn_cast<GlobalValue>(Val: Callee);
1646 if (!GV) {
1647 // patchpoints are a special case; they always dispatch to a pointer value.
1648 // However, we don't actually want to generate the indirect call sequence
1649 // here (that will be generated, as necessary, during asm printing), and
1650 // the call we generate here will be erased by FastISel::selectPatchpoint,
1651 // so don't try very hard...
1652 if (CLI.IsPatchPoint)
1653 MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::NOP));
1654 else
1655 return false;
1656 } else {
1657 // Build direct call with NOP for TOC restore.
1658 // FIXME: We can and should optimize away the NOP for local calls.
1659 MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1660 MCID: TII.get(Opcode: PPC::BL8_NOP));
1661 // Add callee.
1662 MIB.addGlobalAddress(GV);
1663 }
1664
1665 // Add implicit physical register uses to the call.
1666 for (unsigned Reg : RegArgs)
1667 MIB.addReg(RegNo: Reg, Flags: RegState::Implicit);
1668
1669 // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
1670 // into the call.
1671 PPCFuncInfo->setUsesTOCBasePtr();
1672 MIB.addReg(RegNo: PPC::X2, Flags: RegState::Implicit);
1673
1674 // Add a register mask with the call-preserved registers. Proper
1675 // defs for return values will be added by setPhysRegsDeadExcept().
1676 MIB.addRegMask(Mask: TRI.getCallPreservedMask(MF: *FuncInfo.MF, CC));
1677
1678 CLI.Call = MIB;
1679
1680 // Finish off the call including any return values.
1681 return finishCall(RetVT, CLI, NumBytes);
1682}
1683
1684// Attempt to fast-select a return instruction.
1685bool PPCFastISel::SelectRet(const Instruction *I) {
1686
1687 if (!FuncInfo.CanLowerReturn)
1688 return false;
1689
1690 const ReturnInst *Ret = cast<ReturnInst>(Val: I);
1691 const Function &F = *I->getParent()->getParent();
1692
1693 // Build a list of return value registers.
1694 SmallVector<Register, 4> RetRegs;
1695 CallingConv::ID CC = F.getCallingConv();
1696
1697 if (Ret->getNumOperands() > 0) {
1698 SmallVector<ISD::OutputArg, 4> Outs;
1699 GetReturnInfo(CC, ReturnType: F.getReturnType(), attr: F.getAttributes(), Outs, TLI, DL);
1700
1701 // Analyze operands of the call, assigning locations to each operand.
1702 SmallVector<CCValAssign, 16> ValLocs;
1703 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
1704 CCInfo.AnalyzeReturn(Outs, Fn: RetCC_PPC64_ELF_FIS);
1705 const Value *RV = Ret->getOperand(i_nocapture: 0);
1706
1707 // FIXME: Only one output register for now.
1708 if (ValLocs.size() > 1)
1709 return false;
1710
1711 // Special case for returning a constant integer of any size - materialize
1712 // the constant as an i64 and copy it to the return register.
1713 if (isa<ConstantInt>(Val: RV) && RV->getType()->isIntegerTy()) {
1714 const ConstantInt *CI = cast<ConstantInt>(Val: RV);
1715 CCValAssign &VA = ValLocs[0];
1716
1717 Register RetReg = VA.getLocReg();
1718 // We still need to worry about properly extending the sign. For example,
1719 // we could have only a single bit or a constant that needs zero
1720 // extension rather than sign extension. Make sure we pass the return
1721 // value extension property to integer materialization.
1722 Register SrcReg =
1723 PPCMaterializeInt(CI, VT: MVT::i64, UseSExt: VA.getLocInfo() != CCValAssign::ZExt);
1724
1725 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1726 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: RetReg).addReg(RegNo: SrcReg);
1727
1728 RetRegs.push_back(Elt: RetReg);
1729
1730 } else {
1731 Register Reg = getRegForValue(V: RV);
1732
1733 if (!Reg)
1734 return false;
1735
1736 // Copy the result values into the output registers.
1737 for (unsigned i = 0; i < ValLocs.size(); ++i) {
1738
1739 CCValAssign &VA = ValLocs[i];
1740 assert(VA.isRegLoc() && "Can only return in registers!");
1741 RetRegs.push_back(Elt: VA.getLocReg());
1742 Register SrcReg = Reg + VA.getValNo();
1743
1744 EVT RVEVT = TLI.getValueType(DL, Ty: RV->getType());
1745 if (!RVEVT.isSimple())
1746 return false;
1747 MVT RVVT = RVEVT.getSimpleVT();
1748 MVT DestVT = VA.getLocVT();
1749
1750 if (RVVT != DestVT && RVVT != MVT::i8 &&
1751 RVVT != MVT::i16 && RVVT != MVT::i32)
1752 return false;
1753
1754 if (RVVT != DestVT) {
1755 switch (VA.getLocInfo()) {
1756 default:
1757 llvm_unreachable("Unknown loc info!");
1758 case CCValAssign::Full:
1759 llvm_unreachable("Full value assign but types don't match?");
1760 case CCValAssign::AExt:
1761 case CCValAssign::ZExt: {
1762 const TargetRegisterClass *RC =
1763 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1764 Register TmpReg = createResultReg(RC);
1765 if (!PPCEmitIntExt(SrcVT: RVVT, SrcReg, DestVT, DestReg: TmpReg, IsZExt: true))
1766 return false;
1767 SrcReg = TmpReg;
1768 break;
1769 }
1770 case CCValAssign::SExt: {
1771 const TargetRegisterClass *RC =
1772 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1773 Register TmpReg = createResultReg(RC);
1774 if (!PPCEmitIntExt(SrcVT: RVVT, SrcReg, DestVT, DestReg: TmpReg, IsZExt: false))
1775 return false;
1776 SrcReg = TmpReg;
1777 break;
1778 }
1779 }
1780 }
1781
1782 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1783 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: RetRegs[i])
1784 .addReg(RegNo: SrcReg);
1785 }
1786 }
1787 }
1788
1789 MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1790 MCID: TII.get(Opcode: PPC::BLR8));
1791
1792 for (Register Reg : RetRegs)
1793 MIB.addReg(RegNo: Reg, Flags: RegState::Implicit);
1794
1795 return true;
1796}
1797
1798// Attempt to emit an integer extend of SrcReg into DestReg. Both
1799// signed and zero extensions are supported. Return false if we
1800// can't handle it.
1801bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT,
1802 Register DestReg, bool IsZExt) {
1803 if (DestVT != MVT::i32 && DestVT != MVT::i64)
1804 return false;
1805 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
1806 return false;
1807
1808 // Signed extensions use EXTSB, EXTSH, EXTSW.
1809 if (!IsZExt) {
1810 unsigned Opc;
1811 if (SrcVT == MVT::i8)
1812 Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
1813 else if (SrcVT == MVT::i16)
1814 Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
1815 else {
1816 assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
1817 Opc = PPC::EXTSW_32_64;
1818 }
1819 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1820 .addReg(RegNo: SrcReg);
1821
1822 // Unsigned 32-bit extensions use RLWINM.
1823 } else if (DestVT == MVT::i32) {
1824 unsigned MB;
1825 if (SrcVT == MVT::i8)
1826 MB = 24;
1827 else {
1828 assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
1829 MB = 16;
1830 }
1831 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::RLWINM),
1832 DestReg)
1833 .addReg(RegNo: SrcReg).addImm(/*SH=*/Val: 0).addImm(Val: MB).addImm(/*ME=*/Val: 31);
1834
1835 // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
1836 } else {
1837 unsigned MB;
1838 if (SrcVT == MVT::i8)
1839 MB = 56;
1840 else if (SrcVT == MVT::i16)
1841 MB = 48;
1842 else
1843 MB = 32;
1844 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1845 MCID: TII.get(Opcode: PPC::RLDICL_32_64), DestReg)
1846 .addReg(RegNo: SrcReg).addImm(/*SH=*/Val: 0).addImm(Val: MB);
1847 }
1848
1849 return true;
1850}
1851
1852// Attempt to fast-select an indirect branch instruction.
1853bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
1854 Register AddrReg = getRegForValue(V: I->getOperand(i: 0));
1855 if (!AddrReg)
1856 return false;
1857
1858 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::MTCTR8))
1859 .addReg(RegNo: AddrReg);
1860 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::BCTR8));
1861
1862 const IndirectBrInst *IB = cast<IndirectBrInst>(Val: I);
1863 for (const BasicBlock *SuccBB : IB->successors())
1864 FuncInfo.MBB->addSuccessor(Succ: FuncInfo.getMBB(BB: SuccBB));
1865
1866 return true;
1867}
1868
1869// Attempt to fast-select an integer truncate instruction.
1870bool PPCFastISel::SelectTrunc(const Instruction *I) {
1871 Value *Src = I->getOperand(i: 0);
1872 EVT SrcVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
1873 EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
1874
1875 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
1876 return false;
1877
1878 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
1879 return false;
1880
1881 Register SrcReg = getRegForValue(V: Src);
1882 if (!SrcReg)
1883 return false;
1884
1885 // The only interesting case is when we need to switch register classes.
1886 if (SrcVT == MVT::i64)
1887 SrcReg = copyRegToRegClass(ToRC: &PPC::GPRCRegClass, SrcReg, Flag: {}, SubReg: PPC::sub_32);
1888
1889 updateValueMap(I, Reg: SrcReg);
1890 return true;
1891}
1892
1893// Attempt to fast-select an integer extend instruction.
1894bool PPCFastISel::SelectIntExt(const Instruction *I) {
1895 Type *DestTy = I->getType();
1896 Value *Src = I->getOperand(i: 0);
1897 Type *SrcTy = Src->getType();
1898
1899 bool IsZExt = isa<ZExtInst>(Val: I);
1900 Register SrcReg = getRegForValue(V: Src);
1901 if (!SrcReg) return false;
1902
1903 EVT SrcEVT, DestEVT;
1904 SrcEVT = TLI.getValueType(DL, Ty: SrcTy, AllowUnknown: true);
1905 DestEVT = TLI.getValueType(DL, Ty: DestTy, AllowUnknown: true);
1906 if (!SrcEVT.isSimple())
1907 return false;
1908 if (!DestEVT.isSimple())
1909 return false;
1910
1911 MVT SrcVT = SrcEVT.getSimpleVT();
1912 MVT DestVT = DestEVT.getSimpleVT();
1913
1914 // If we know the register class needed for the result of this
1915 // instruction, use it. Otherwise pick the register class of the
1916 // correct size that does not contain X0/R0, since we don't know
1917 // whether downstream uses permit that assignment.
1918 Register AssignedReg = FuncInfo.ValueMap[I];
1919 const TargetRegisterClass *RC =
1920 (AssignedReg ? MRI.getRegClass(Reg: AssignedReg) :
1921 (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
1922 &PPC::GPRC_and_GPRC_NOR0RegClass));
1923 Register ResultReg = createResultReg(RC);
1924
1925 if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, DestReg: ResultReg, IsZExt))
1926 return false;
1927
1928 updateValueMap(I, Reg: ResultReg);
1929 return true;
1930}
1931
1932// Attempt to fast-select an instruction that wasn't handled by
1933// the table-generated machinery.
1934bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
1935
1936 switch (I->getOpcode()) {
1937 case Instruction::Load:
1938 return SelectLoad(I);
1939 case Instruction::Store:
1940 return SelectStore(I);
1941 case Instruction::Br:
1942 return SelectBranch(I);
1943 case Instruction::IndirectBr:
1944 return SelectIndirectBr(I);
1945 case Instruction::FPExt:
1946 return SelectFPExt(I);
1947 case Instruction::FPTrunc:
1948 return SelectFPTrunc(I);
1949 case Instruction::SIToFP:
1950 return SelectIToFP(I, /*IsSigned*/ true);
1951 case Instruction::UIToFP:
1952 return SelectIToFP(I, /*IsSigned*/ false);
1953 case Instruction::FPToSI:
1954 return SelectFPToI(I, /*IsSigned*/ true);
1955 case Instruction::FPToUI:
1956 return SelectFPToI(I, /*IsSigned*/ false);
1957 case Instruction::Add:
1958 return SelectBinaryIntOp(I, ISDOpcode: ISD::ADD);
1959 case Instruction::Or:
1960 return SelectBinaryIntOp(I, ISDOpcode: ISD::OR);
1961 case Instruction::Sub:
1962 return SelectBinaryIntOp(I, ISDOpcode: ISD::SUB);
1963 case Instruction::Ret:
1964 return SelectRet(I);
1965 case Instruction::Trunc:
1966 return SelectTrunc(I);
1967 case Instruction::ZExt:
1968 case Instruction::SExt:
1969 return SelectIntExt(I);
1970 // Here add other flavors of Instruction::XXX that automated
1971 // cases don't catch. For example, switches are terminators
1972 // that aren't yet handled.
1973 default:
1974 break;
1975 }
1976 return false;
1977}
1978
1979// Materialize a floating-point constant into a register, and return
1980// the register number (or zero if we failed to handle it).
1981Register PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
1982 // If this is a PC-Rel function, let SDISel handle constant pool.
1983 if (Subtarget->isUsingPCRelativeCalls())
1984 return Register();
1985
1986 // No plans to handle long double here.
1987 if (VT != MVT::f32 && VT != MVT::f64)
1988 return Register();
1989
1990 // All FP constants are loaded from the constant pool.
1991 Align Alignment = DL.getPrefTypeAlign(Ty: CFP->getType());
1992 unsigned Idx = MCP.getConstantPoolIndex(C: cast<Constant>(Val: CFP), Alignment);
1993 const bool HasSPE = Subtarget->hasSPE();
1994 const TargetRegisterClass *RC;
1995 if (HasSPE)
1996 RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass);
1997 else
1998 RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass);
1999
2000 Register DestReg = createResultReg(RC);
2001 CodeModel::Model CModel = TM.getCodeModel();
2002
2003 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2004 PtrInfo: MachinePointerInfo::getConstantPool(MF&: *FuncInfo.MF),
2005 F: MachineMemOperand::MOLoad, Size: (VT == MVT::f32) ? 4 : 8, BaseAlignment: Alignment);
2006
2007 unsigned Opc;
2008
2009 if (HasSPE)
2010 Opc = ((VT == MVT::f32) ? PPC::SPELWZ : PPC::EVLDD);
2011 else
2012 Opc = ((VT == MVT::f32) ? PPC::LFS : PPC::LFD);
2013
2014 Register TmpReg = createResultReg(RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
2015
2016 PPCFuncInfo->setUsesTOCBasePtr();
2017 // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
2018 if (CModel == CodeModel::Small) {
2019 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::LDtocCPT),
2020 DestReg: TmpReg)
2021 .addConstantPoolIndex(Idx).addReg(RegNo: PPC::X2);
2022 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
2023 .addImm(Val: 0).addReg(RegNo: TmpReg).addMemOperand(MMO);
2024 } else {
2025 // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA8(X2, Idx)).
2026 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDIStocHA8),
2027 DestReg: TmpReg).addReg(RegNo: PPC::X2).addConstantPoolIndex(Idx);
2028 // But for large code model, we must generate a LDtocL followed
2029 // by the LF[SD].
2030 if (CModel == CodeModel::Large) {
2031 Register TmpReg2 = createResultReg(RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
2032 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::LDtocL),
2033 DestReg: TmpReg2).addConstantPoolIndex(Idx).addReg(RegNo: TmpReg);
2034 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
2035 .addImm(Val: 0)
2036 .addReg(RegNo: TmpReg2);
2037 } else
2038 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
2039 .addConstantPoolIndex(Idx, Offset: 0, TargetFlags: PPCII::MO_TOC_LO)
2040 .addReg(RegNo: TmpReg)
2041 .addMemOperand(MMO);
2042 }
2043
2044 return DestReg;
2045}
2046
2047// Materialize the address of a global value into a register, and return
2048// the register number (or zero if we failed to handle it).
2049Register PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
2050 // If this is a PC-Rel function, let SDISel handle GV materialization.
2051 if (Subtarget->isUsingPCRelativeCalls())
2052 return Register();
2053
2054 assert(VT == MVT::i64 && "Non-address!");
2055 const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
2056 Register DestReg = createResultReg(RC);
2057
2058 // Global values may be plain old object addresses, TLS object
2059 // addresses, constant pool entries, or jump tables. How we generate
2060 // code for these may depend on small, medium, or large code model.
2061 CodeModel::Model CModel = TM.getCodeModel();
2062
2063 // FIXME: Jump tables are not yet required because fast-isel doesn't
2064 // handle switches; if that changes, we need them as well. For now,
2065 // what follows assumes everything's a generic (or TLS) global address.
2066
2067 // FIXME: We don't yet handle the complexity of TLS.
2068 if (GV->isThreadLocal())
2069 return Register();
2070
2071 PPCFuncInfo->setUsesTOCBasePtr();
2072 bool IsAIXTocData = TM.getTargetTriple().isOSAIX() &&
2073 isa<GlobalVariable>(Val: GV) &&
2074 cast<GlobalVariable>(Val: GV)->hasAttribute(Kind: "toc-data");
2075
2076 // For small code model, generate a simple TOC load.
2077 if (CModel == CodeModel::Small) {
2078 auto MIB = BuildMI(
2079 BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2080 MCID: IsAIXTocData ? TII.get(Opcode: PPC::ADDItoc8) : TII.get(Opcode: PPC::LDtoc), DestReg);
2081 if (IsAIXTocData)
2082 MIB.addReg(RegNo: PPC::X2).addGlobalAddress(GV);
2083 else
2084 MIB.addGlobalAddress(GV).addReg(RegNo: PPC::X2);
2085 } else {
2086 // If the address is an externally defined symbol, a symbol with common
2087 // or externally available linkage, a non-local function address, or a
2088 // jump table address (not yet needed), or if we are generating code
2089 // for large code model, we generate:
2090 // LDtocL(GV, ADDIStocHA8(%x2, GV))
2091 // Otherwise we generate:
2092 // ADDItocL8(ADDIStocHA8(%x2, GV), GV)
2093 // Either way, start with the ADDIStocHA8:
2094 Register HighPartReg = createResultReg(RC);
2095 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDIStocHA8),
2096 DestReg: HighPartReg).addReg(RegNo: PPC::X2).addGlobalAddress(GV);
2097
2098 if (Subtarget->isGVIndirectSymbol(GV)) {
2099 assert(!IsAIXTocData && "TOC data should always be direct.");
2100 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::LDtocL),
2101 DestReg).addGlobalAddress(GV).addReg(RegNo: HighPartReg);
2102 } else {
2103 // Otherwise generate the ADDItocL8.
2104 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDItocL8),
2105 DestReg)
2106 .addReg(RegNo: HighPartReg)
2107 .addGlobalAddress(GV);
2108 }
2109 }
2110
2111 return DestReg;
2112}
2113
2114// Materialize a 32-bit integer constant into a register, and return
2115// the register number (or zero if we failed to handle it).
2116Register PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
2117 const TargetRegisterClass *RC) {
2118 unsigned Lo = Imm & 0xFFFF;
2119 unsigned Hi = (Imm >> 16) & 0xFFFF;
2120
2121 Register ResultReg = createResultReg(RC);
2122 bool IsGPRC = RC->hasSuperClassEq(RC: &PPC::GPRCRegClass);
2123
2124 if (isInt<16>(x: Imm))
2125 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2126 MCID: TII.get(Opcode: IsGPRC ? PPC::LI : PPC::LI8), DestReg: ResultReg)
2127 .addImm(Val: Imm);
2128 else if (Lo) {
2129 // Both Lo and Hi have nonzero bits.
2130 Register TmpReg = createResultReg(RC);
2131 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2132 MCID: TII.get(Opcode: IsGPRC ? PPC::LIS : PPC::LIS8), DestReg: TmpReg)
2133 .addImm(Val: Hi);
2134 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2135 MCID: TII.get(Opcode: IsGPRC ? PPC::ORI : PPC::ORI8), DestReg: ResultReg)
2136 .addReg(RegNo: TmpReg).addImm(Val: Lo);
2137 } else
2138 // Just Hi bits.
2139 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2140 MCID: TII.get(Opcode: IsGPRC ? PPC::LIS : PPC::LIS8), DestReg: ResultReg)
2141 .addImm(Val: Hi);
2142
2143 return ResultReg;
2144}
2145
2146// Materialize a 64-bit integer constant into a register, and return
2147// the register number (or zero if we failed to handle it).
2148Register PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
2149 const TargetRegisterClass *RC) {
2150 unsigned Remainder = 0;
2151 unsigned Shift = 0;
2152
2153 // If the value doesn't fit in 32 bits, see if we can shift it
2154 // so that it fits in 32 bits.
2155 if (!isInt<32>(x: Imm)) {
2156 Shift = llvm::countr_zero<uint64_t>(Val: Imm);
2157 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
2158
2159 if (isInt<32>(x: ImmSh))
2160 Imm = ImmSh;
2161 else {
2162 Remainder = Imm;
2163 Shift = 32;
2164 Imm >>= 32;
2165 }
2166 }
2167
2168 // Handle the high-order 32 bits (if shifted) or the whole 32 bits
2169 // (if not shifted).
2170 Register TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
2171 if (!Shift)
2172 return TmpReg1;
2173
2174 // If upper 32 bits were not zero, we've built them and need to shift
2175 // them into place.
2176 Register TmpReg2;
2177 if (Imm) {
2178 TmpReg2 = createResultReg(RC);
2179 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::RLDICR),
2180 DestReg: TmpReg2).addReg(RegNo: TmpReg1).addImm(Val: Shift).addImm(Val: 63 - Shift);
2181 } else
2182 TmpReg2 = TmpReg1;
2183
2184 Register TmpReg3;
2185 unsigned Hi, Lo;
2186 if ((Hi = (Remainder >> 16) & 0xFFFF)) {
2187 TmpReg3 = createResultReg(RC);
2188 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ORIS8),
2189 DestReg: TmpReg3).addReg(RegNo: TmpReg2).addImm(Val: Hi);
2190 } else
2191 TmpReg3 = TmpReg2;
2192
2193 if ((Lo = Remainder & 0xFFFF)) {
2194 Register ResultReg = createResultReg(RC);
2195 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ORI8),
2196 DestReg: ResultReg).addReg(RegNo: TmpReg3).addImm(Val: Lo);
2197 return ResultReg;
2198 }
2199
2200 return TmpReg3;
2201}
2202
2203// Materialize an integer constant into a register, and return
2204// the register number (or zero if we failed to handle it).
2205Register PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
2206 bool UseSExt) {
2207 // If we're using CR bit registers for i1 values, handle that as a special
2208 // case first.
2209 if (VT == MVT::i1 && Subtarget->useCRBits()) {
2210 Register ImmReg = createResultReg(RC: &PPC::CRBITRCRegClass);
2211 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2212 MCID: TII.get(Opcode: CI->isZero() ? PPC::CRUNSET : PPC::CRSET), DestReg: ImmReg);
2213 return ImmReg;
2214 }
2215
2216 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2217 VT != MVT::i1)
2218 return Register();
2219
2220 const TargetRegisterClass *RC =
2221 ((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass);
2222 int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue();
2223
2224 // If the constant is in range, use a load-immediate.
2225 // Since LI will sign extend the constant we need to make sure that for
2226 // our zeroext constants that the sign extended constant fits into 16-bits -
2227 // a range of 0..0x7fff.
2228 if (isInt<16>(x: Imm)) {
2229 unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
2230 Register ImmReg = createResultReg(RC);
2231 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ImmReg)
2232 .addImm(Val: Imm);
2233 return ImmReg;
2234 }
2235
2236 // Construct the constant piecewise.
2237 if (VT == MVT::i64)
2238 return PPCMaterialize64BitInt(Imm, RC);
2239 else if (VT == MVT::i32)
2240 return PPCMaterialize32BitInt(Imm, RC);
2241
2242 return Register();
2243}
2244
2245// Materialize a constant into a register, and return the register
2246// number (or zero if we failed to handle it).
2247Register PPCFastISel::fastMaterializeConstant(const Constant *C) {
2248 EVT CEVT = TLI.getValueType(DL, Ty: C->getType(), AllowUnknown: true);
2249
2250 // Only handle simple types.
2251 if (!CEVT.isSimple())
2252 return Register();
2253 MVT VT = CEVT.getSimpleVT();
2254
2255 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: C))
2256 return PPCMaterializeFP(CFP, VT);
2257 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: C))
2258 return PPCMaterializeGV(GV, VT);
2259 else if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: C))
2260 // Note that the code in FunctionLoweringInfo::ComputePHILiveOutRegInfo
2261 // assumes that constant PHI operands will be zero extended, and failure to
2262 // match that assumption will cause problems if we sign extend here but
2263 // some user of a PHI is in a block for which we fall back to full SDAG
2264 // instruction selection.
2265 return PPCMaterializeInt(CI, VT, UseSExt: false);
2266
2267 return Register();
2268}
2269
2270// Materialize the address created by an alloca into a register, and
2271// return the register number (or zero if we failed to handle it).
2272Register PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
2273 DenseMap<const AllocaInst *, int>::iterator SI =
2274 FuncInfo.StaticAllocaMap.find(Val: AI);
2275
2276 // Don't handle dynamic allocas.
2277 if (SI == FuncInfo.StaticAllocaMap.end())
2278 return Register();
2279
2280 MVT VT;
2281 if (!isLoadTypeLegal(Ty: AI->getType(), VT))
2282 return Register();
2283
2284 if (SI != FuncInfo.StaticAllocaMap.end()) {
2285 Register ResultReg = createResultReg(RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
2286 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDI8),
2287 DestReg: ResultReg).addFrameIndex(Idx: SI->second).addImm(Val: 0);
2288 return ResultReg;
2289 }
2290
2291 return Register();
2292}
2293
2294// Fold loads into extends when possible.
2295// FIXME: We can have multiple redundant extend/trunc instructions
2296// following a load. The folding only picks up one. Extend this
2297// to check subsequent instructions for the same pattern and remove
2298// them. Thus ResultReg should be the def reg for the last redundant
2299// instruction in a chain, and all intervening instructions can be
2300// removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
2301// to add ELF64-NOT: rldicl to the appropriate tests when this works.
2302bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2303 const LoadInst *LI) {
2304 // Verify we have a legal type before going any further.
2305 MVT VT;
2306 if (!isLoadTypeLegal(Ty: LI->getType(), VT))
2307 return false;
2308
2309 // Combine load followed by zero- or sign-extend.
2310 bool IsZExt = false;
2311 switch(MI->getOpcode()) {
2312 default:
2313 return false;
2314
2315 case PPC::RLDICL:
2316 case PPC::RLDICL_32_64: {
2317 IsZExt = true;
2318 unsigned MB = MI->getOperand(i: 3).getImm();
2319 if ((VT == MVT::i8 && MB <= 56) ||
2320 (VT == MVT::i16 && MB <= 48) ||
2321 (VT == MVT::i32 && MB <= 32))
2322 break;
2323 return false;
2324 }
2325
2326 case PPC::RLWINM:
2327 case PPC::RLWINM8: {
2328 IsZExt = true;
2329 unsigned MB = MI->getOperand(i: 3).getImm();
2330 if ((VT == MVT::i8 && MB <= 24) ||
2331 (VT == MVT::i16 && MB <= 16))
2332 break;
2333 return false;
2334 }
2335
2336 case PPC::EXTSB:
2337 case PPC::EXTSB8:
2338 case PPC::EXTSB8_32_64:
2339 /* There is no sign-extending load-byte instruction. */
2340 return false;
2341
2342 case PPC::EXTSH:
2343 case PPC::EXTSH8:
2344 case PPC::EXTSH8_32_64: {
2345 if (VT != MVT::i16 && VT != MVT::i8)
2346 return false;
2347 break;
2348 }
2349
2350 case PPC::EXTSW:
2351 case PPC::EXTSW_32:
2352 case PPC::EXTSW_32_64: {
2353 if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
2354 return false;
2355 break;
2356 }
2357 }
2358
2359 // See if we can handle this address.
2360 Address Addr;
2361 if (!PPCComputeAddress(Obj: LI->getOperand(i_nocapture: 0), Addr))
2362 return false;
2363
2364 Register ResultReg = MI->getOperand(i: 0).getReg();
2365
2366 if (!PPCEmitLoad(VT, ResultReg, Addr, RC: nullptr, IsZExt,
2367 FP64LoadOpc: Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
2368 return false;
2369
2370 MachineBasicBlock::iterator I(MI);
2371 removeDeadCode(I, E: std::next(x: I));
2372 return true;
2373}
2374
2375// Attempt to lower call arguments in a faster way than done by
2376// the selection DAG code.
2377bool PPCFastISel::fastLowerArguments() {
2378 // Defer to normal argument lowering for now. It's reasonably
2379 // efficient. Consider doing something like ARM to handle the
2380 // case where all args fit in registers, no varargs, no float
2381 // or vector args.
2382 return false;
2383}
2384
2385// Handle materializing integer constants into a register. This is not
2386// automatically generated for PowerPC, so must be explicitly created here.
2387Register PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
2388
2389 if (Opc != ISD::Constant)
2390 return Register();
2391
2392 // If we're using CR bit registers for i1 values, handle that as a special
2393 // case first.
2394 if (VT == MVT::i1 && Subtarget->useCRBits()) {
2395 Register ImmReg = createResultReg(RC: &PPC::CRBITRCRegClass);
2396 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2397 MCID: TII.get(Opcode: Imm == 0 ? PPC::CRUNSET : PPC::CRSET), DestReg: ImmReg);
2398 return ImmReg;
2399 }
2400
2401 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2402 VT != MVT::i1)
2403 return Register();
2404
2405 const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
2406 &PPC::GPRCRegClass);
2407 if (VT == MVT::i64)
2408 return PPCMaterialize64BitInt(Imm, RC);
2409 else
2410 return PPCMaterialize32BitInt(Imm, RC);
2411}
2412
2413// Override for ADDI and ADDI8 to set the correct register class
2414// on RHS operand 0. The automatic infrastructure naively assumes
2415// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
2416// for these cases. At the moment, none of the other automatically
2417// generated RI instructions require special treatment. However, once
2418// SelectSelect is implemented, "isel" requires similar handling.
2419//
2420// Also be conservative about the output register class. Avoid
2421// assigning R0 or X0 to the output register for GPRC and G8RC
2422// register classes, as any such result could be used in ADDI, etc.,
2423// where those regs have another meaning.
2424Register PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
2425 const TargetRegisterClass *RC,
2426 Register Op0, uint64_t Imm) {
2427 if (MachineInstOpcode == PPC::ADDI)
2428 MRI.setRegClass(Reg: Op0, RC: &PPC::GPRC_and_GPRC_NOR0RegClass);
2429 else if (MachineInstOpcode == PPC::ADDI8)
2430 MRI.setRegClass(Reg: Op0, RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
2431
2432 const TargetRegisterClass *UseRC =
2433 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2434 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2435
2436 return FastISel::fastEmitInst_ri(MachineInstOpcode, RC: UseRC, Op0, Imm);
2437}
2438
2439// Override for instructions with one register operand to avoid use of
2440// R0/X0. The automatic infrastructure isn't aware of the context so
2441// we must be conservative.
2442Register PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
2443 const TargetRegisterClass *RC,
2444 Register Op0) {
2445 const TargetRegisterClass *UseRC =
2446 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2447 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2448
2449 return FastISel::fastEmitInst_r(MachineInstOpcode, RC: UseRC, Op0);
2450}
2451
2452// Override for instructions with two register operands to avoid use
2453// of R0/X0. The automatic infrastructure isn't aware of the context
2454// so we must be conservative.
2455Register PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
2456 const TargetRegisterClass *RC,
2457 Register Op0, Register Op1) {
2458 const TargetRegisterClass *UseRC =
2459 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2460 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2461
2462 return FastISel::fastEmitInst_rr(MachineInstOpcode, RC: UseRC, Op0, Op1);
2463}
2464
2465namespace llvm {
2466 // Create the fast instruction selector for PowerPC64 ELF.
2467FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
2468 const TargetLibraryInfo *LibInfo,
2469 const LibcallLoweringInfo *LibcallLowering) {
2470 // Only available on 64-bit for now.
2471 const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
2472 if (Subtarget.isPPC64())
2473 return new PPCFastISel(FuncInfo, LibInfo, LibcallLowering);
2474 return nullptr;
2475}
2476}
2477