1 | //===- PPCInstructionSelector.cpp --------------------------------*- C++ -*-==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This file implements the targeting of the InstructionSelector class for |
10 | /// PowerPC. |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "PPC.h" |
14 | #include "PPCInstrInfo.h" |
15 | #include "PPCMachineFunctionInfo.h" |
16 | #include "PPCRegisterBankInfo.h" |
17 | #include "PPCSubtarget.h" |
18 | #include "PPCTargetMachine.h" |
19 | #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" |
20 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
21 | #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" |
22 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
23 | #include "llvm/CodeGen/MachineConstantPool.h" |
24 | #include "llvm/CodeGen/MachineFunction.h" |
25 | #include "llvm/IR/IntrinsicsPowerPC.h" |
26 | #include "llvm/Support/Debug.h" |
27 | |
28 | #define DEBUG_TYPE "ppc-gisel" |
29 | |
30 | using namespace llvm; |
31 | |
32 | namespace { |
33 | |
34 | #define GET_GLOBALISEL_PREDICATE_BITSET |
35 | #include "PPCGenGlobalISel.inc" |
36 | #undef GET_GLOBALISEL_PREDICATE_BITSET |
37 | |
38 | class PPCInstructionSelector : public InstructionSelector { |
39 | public: |
40 | PPCInstructionSelector(const PPCTargetMachine &TM, const PPCSubtarget &STI, |
41 | const PPCRegisterBankInfo &RBI); |
42 | |
43 | bool select(MachineInstr &I) override; |
44 | static const char *getName() { return DEBUG_TYPE; } |
45 | |
46 | private: |
47 | /// tblgen generated 'select' implementation that is used as the initial |
48 | /// selector for the patterns that do not require complex C++. |
49 | bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; |
50 | |
51 | bool selectFPToInt(MachineInstr &I, MachineBasicBlock &MBB, |
52 | MachineRegisterInfo &MRI) const; |
53 | bool selectIntToFP(MachineInstr &I, MachineBasicBlock &MBB, |
54 | MachineRegisterInfo &MRI) const; |
55 | |
56 | bool selectZExt(MachineInstr &I, MachineBasicBlock &MBB, |
57 | MachineRegisterInfo &MRI) const; |
58 | bool selectConstantPool(MachineInstr &I, MachineBasicBlock &MBB, |
59 | MachineRegisterInfo &MRI) const; |
60 | |
61 | std::optional<bool> selectI64ImmDirect(MachineInstr &I, |
62 | MachineBasicBlock &MBB, |
63 | MachineRegisterInfo &MRI, Register Reg, |
64 | uint64_t Imm) const; |
65 | bool selectI64Imm(MachineInstr &I, MachineBasicBlock &MBB, |
66 | MachineRegisterInfo &MRI) const; |
67 | |
68 | const PPCTargetMachine &TM; |
69 | const PPCSubtarget &STI; |
70 | const PPCInstrInfo &TII; |
71 | const PPCRegisterInfo &TRI; |
72 | const PPCRegisterBankInfo &RBI; |
73 | |
74 | #define GET_GLOBALISEL_PREDICATES_DECL |
75 | #include "PPCGenGlobalISel.inc" |
76 | #undef GET_GLOBALISEL_PREDICATES_DECL |
77 | |
78 | #define GET_GLOBALISEL_TEMPORARIES_DECL |
79 | #include "PPCGenGlobalISel.inc" |
80 | #undef GET_GLOBALISEL_TEMPORARIES_DECL |
81 | }; |
82 | |
83 | } // end anonymous namespace |
84 | |
85 | #define GET_GLOBALISEL_IMPL |
86 | #include "PPCGenGlobalISel.inc" |
87 | #undef GET_GLOBALISEL_IMPL |
88 | |
89 | PPCInstructionSelector::PPCInstructionSelector(const PPCTargetMachine &TM, |
90 | const PPCSubtarget &STI, |
91 | const PPCRegisterBankInfo &RBI) |
92 | : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), |
93 | RBI(RBI), |
94 | #define GET_GLOBALISEL_PREDICATES_INIT |
95 | #include "PPCGenGlobalISel.inc" |
96 | #undef GET_GLOBALISEL_PREDICATES_INIT |
97 | #define GET_GLOBALISEL_TEMPORARIES_INIT |
98 | #include "PPCGenGlobalISel.inc" |
99 | #undef GET_GLOBALISEL_TEMPORARIES_INIT |
100 | { |
101 | } |
102 | |
103 | static const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank *RB) { |
104 | if (RB->getID() == PPC::GPRRegBankID) { |
105 | if (Ty.getSizeInBits() == 64) |
106 | return &PPC::G8RCRegClass; |
107 | if (Ty.getSizeInBits() <= 32) |
108 | return &PPC::GPRCRegClass; |
109 | } |
110 | if (RB->getID() == PPC::FPRRegBankID) { |
111 | if (Ty.getSizeInBits() == 32) |
112 | return &PPC::F4RCRegClass; |
113 | if (Ty.getSizeInBits() == 64) |
114 | return &PPC::F8RCRegClass; |
115 | } |
116 | if (RB->getID() == PPC::VECRegBankID) { |
117 | if (Ty.getSizeInBits() == 128) |
118 | return &PPC::VSRCRegClass; |
119 | } |
120 | if (RB->getID() == PPC::CRRegBankID) { |
121 | if (Ty.getSizeInBits() == 1) |
122 | return &PPC::CRBITRCRegClass; |
123 | if (Ty.getSizeInBits() == 4) |
124 | return &PPC::CRRCRegClass; |
125 | } |
126 | |
127 | llvm_unreachable("Unknown RegBank!" ); |
128 | } |
129 | |
130 | static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, |
131 | MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, |
132 | const RegisterBankInfo &RBI) { |
133 | Register DstReg = I.getOperand(i: 0).getReg(); |
134 | |
135 | if (DstReg.isPhysical()) |
136 | return true; |
137 | |
138 | const RegisterBank *DstRegBank = RBI.getRegBank(Reg: DstReg, MRI, TRI); |
139 | const TargetRegisterClass *DstRC = |
140 | getRegClass(Ty: MRI.getType(Reg: DstReg), RB: DstRegBank); |
141 | |
142 | // No need to constrain SrcReg. It will get constrained when we hit another of |
143 | // its use or its defs. |
144 | // Copies do not have constraints. |
145 | if (!RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) { |
146 | LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) |
147 | << " operand\n" ); |
148 | return false; |
149 | } |
150 | |
151 | return true; |
152 | } |
153 | |
154 | static unsigned selectLoadStoreOp(unsigned GenericOpc, unsigned RegBankID, |
155 | unsigned OpSize) { |
156 | const bool IsStore = GenericOpc == TargetOpcode::G_STORE; |
157 | switch (RegBankID) { |
158 | case PPC::GPRRegBankID: |
159 | switch (OpSize) { |
160 | case 32: |
161 | return IsStore ? PPC::STW : PPC::LWZ; |
162 | case 64: |
163 | return IsStore ? PPC::STD : PPC::LD; |
164 | default: |
165 | llvm_unreachable("Unexpected size!" ); |
166 | } |
167 | break; |
168 | case PPC::FPRRegBankID: |
169 | switch (OpSize) { |
170 | case 32: |
171 | return IsStore ? PPC::STFS : PPC::LFS; |
172 | case 64: |
173 | return IsStore ? PPC::STFD : PPC::LFD; |
174 | default: |
175 | llvm_unreachable("Unexpected size!" ); |
176 | } |
177 | break; |
178 | default: |
179 | llvm_unreachable("Unexpected register bank!" ); |
180 | } |
181 | return GenericOpc; |
182 | } |
183 | |
184 | bool PPCInstructionSelector::selectIntToFP(MachineInstr &I, |
185 | MachineBasicBlock &MBB, |
186 | MachineRegisterInfo &MRI) const { |
187 | if (!STI.hasDirectMove() || !STI.isPPC64() || !STI.hasFPCVT()) |
188 | return false; |
189 | |
190 | const DebugLoc &DbgLoc = I.getDebugLoc(); |
191 | const Register DstReg = I.getOperand(i: 0).getReg(); |
192 | const Register SrcReg = I.getOperand(i: 1).getReg(); |
193 | |
194 | Register MoveReg = MRI.createVirtualRegister(RegClass: &PPC::VSFRCRegClass); |
195 | |
196 | // For now, only handle the case for 64 bit integer. |
197 | BuildMI(BB&: MBB, I, MIMD: DbgLoc, MCID: TII.get(Opcode: PPC::MTVSRD), DestReg: MoveReg).addReg(RegNo: SrcReg); |
198 | |
199 | bool IsSingle = MRI.getType(Reg: DstReg).getSizeInBits() == 32; |
200 | bool IsSigned = I.getOpcode() == TargetOpcode::G_SITOFP; |
201 | unsigned ConvOp = IsSingle ? (IsSigned ? PPC::XSCVSXDSP : PPC::XSCVUXDSP) |
202 | : (IsSigned ? PPC::XSCVSXDDP : PPC::XSCVUXDDP); |
203 | |
204 | MachineInstr *MI = |
205 | BuildMI(BB&: MBB, I, MIMD: DbgLoc, MCID: TII.get(Opcode: ConvOp), DestReg: DstReg).addReg(RegNo: MoveReg); |
206 | |
207 | I.eraseFromParent(); |
208 | return constrainSelectedInstRegOperands(I&: *MI, TII, TRI, RBI); |
209 | } |
210 | |
211 | bool PPCInstructionSelector::selectFPToInt(MachineInstr &I, |
212 | MachineBasicBlock &MBB, |
213 | MachineRegisterInfo &MRI) const { |
214 | if (!STI.hasDirectMove() || !STI.isPPC64() || !STI.hasFPCVT()) |
215 | return false; |
216 | |
217 | const DebugLoc &DbgLoc = I.getDebugLoc(); |
218 | const Register DstReg = I.getOperand(i: 0).getReg(); |
219 | const Register SrcReg = I.getOperand(i: 1).getReg(); |
220 | |
221 | Register CopyReg = MRI.createVirtualRegister(RegClass: &PPC::VSFRCRegClass); |
222 | BuildMI(BB&: MBB, I, MIMD: DbgLoc, MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: CopyReg).addReg(RegNo: SrcReg); |
223 | |
224 | Register ConvReg = MRI.createVirtualRegister(RegClass: &PPC::VSFRCRegClass); |
225 | |
226 | bool IsSigned = I.getOpcode() == TargetOpcode::G_FPTOSI; |
227 | |
228 | // single-precision is stored as double-precision on PPC in registers, so |
229 | // always use double-precision convertions. |
230 | unsigned ConvOp = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS; |
231 | |
232 | BuildMI(BB&: MBB, I, MIMD: DbgLoc, MCID: TII.get(Opcode: ConvOp), DestReg: ConvReg).addReg(RegNo: CopyReg); |
233 | |
234 | MachineInstr *MI = |
235 | BuildMI(BB&: MBB, I, MIMD: DbgLoc, MCID: TII.get(Opcode: PPC::MFVSRD), DestReg: DstReg).addReg(RegNo: ConvReg); |
236 | |
237 | I.eraseFromParent(); |
238 | return constrainSelectedInstRegOperands(I&: *MI, TII, TRI, RBI); |
239 | } |
240 | |
241 | bool PPCInstructionSelector::selectZExt(MachineInstr &I, MachineBasicBlock &MBB, |
242 | MachineRegisterInfo &MRI) const { |
243 | const Register DstReg = I.getOperand(i: 0).getReg(); |
244 | const LLT DstTy = MRI.getType(Reg: DstReg); |
245 | const RegisterBank *DstRegBank = RBI.getRegBank(Reg: DstReg, MRI, TRI); |
246 | |
247 | const Register SrcReg = I.getOperand(i: 1).getReg(); |
248 | |
249 | assert(DstTy.getSizeInBits() == 64 && "Unexpected dest size!" ); |
250 | assert(MRI.getType(SrcReg).getSizeInBits() == 32 && "Unexpected src size!" ); |
251 | |
252 | Register ImpDefReg = |
253 | MRI.createVirtualRegister(RegClass: getRegClass(Ty: DstTy, RB: DstRegBank)); |
254 | BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: TargetOpcode::IMPLICIT_DEF), |
255 | DestReg: ImpDefReg); |
256 | |
257 | Register NewDefReg = |
258 | MRI.createVirtualRegister(RegClass: getRegClass(Ty: DstTy, RB: DstRegBank)); |
259 | BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: TargetOpcode::INSERT_SUBREG), |
260 | DestReg: NewDefReg) |
261 | .addReg(RegNo: ImpDefReg) |
262 | .addReg(RegNo: SrcReg) |
263 | .addImm(Val: PPC::sub_32); |
264 | |
265 | MachineInstr *MI = |
266 | BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::RLDICL), DestReg: DstReg) |
267 | .addReg(RegNo: NewDefReg) |
268 | .addImm(Val: 0) |
269 | .addImm(Val: 32); |
270 | |
271 | I.eraseFromParent(); |
272 | return constrainSelectedInstRegOperands(I&: *MI, TII, TRI, RBI); |
273 | } |
274 | |
275 | // For any 32 < Num < 64, check if the Imm contains at least Num consecutive |
276 | // zeros and return the number of bits by the left of these consecutive zeros. |
277 | static uint32_t findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) { |
278 | uint32_t HiTZ = llvm::countr_zero<uint32_t>(Val: Hi_32(Value: Imm)); |
279 | uint32_t LoLZ = llvm::countl_zero<uint32_t>(Val: Lo_32(Value: Imm)); |
280 | if ((HiTZ + LoLZ) >= Num) |
281 | return (32 + HiTZ); |
282 | return 0; |
283 | } |
284 | |
285 | // Direct materialization of 64-bit constants by enumerated patterns. |
286 | // Similar to PPCISelDAGToDAG::selectI64ImmDirect(). |
287 | std::optional<bool> PPCInstructionSelector::selectI64ImmDirect(MachineInstr &I, |
288 | MachineBasicBlock &MBB, |
289 | MachineRegisterInfo &MRI, |
290 | Register Reg, |
291 | uint64_t Imm) const { |
292 | unsigned TZ = llvm::countr_zero<uint64_t>(Val: Imm); |
293 | unsigned LZ = llvm::countl_zero<uint64_t>(Val: Imm); |
294 | unsigned TO = llvm::countr_one<uint64_t>(Value: Imm); |
295 | unsigned LO = llvm::countl_one<uint64_t>(Value: Imm); |
296 | uint32_t Hi32 = Hi_32(Value: Imm); |
297 | uint32_t Lo32 = Lo_32(Value: Imm); |
298 | uint32_t Shift = 0; |
299 | |
300 | // Following patterns use 1 instructions to materialize the Imm. |
301 | |
302 | // 1-1) Patterns : {zeros}{15-bit valve} |
303 | // {ones}{15-bit valve} |
304 | if (isInt<16>(x: Imm)) |
305 | return BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::LI8), DestReg: Reg) |
306 | .addImm(Val: Imm) |
307 | .constrainAllUses(TII, TRI, RBI); |
308 | // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros} |
309 | // {ones}{15-bit valve}{16 zeros} |
310 | if (TZ > 15 && (LZ > 32 || LO > 32)) |
311 | return BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::LIS8), DestReg: Reg) |
312 | .addImm(Val: (Imm >> 16) & 0xffff) |
313 | .constrainAllUses(TII, TRI, RBI); |
314 | |
315 | // Following patterns use 2 instructions to materialize the Imm. |
316 | |
317 | assert(LZ < 64 && "Unexpected leading zeros here." ); |
318 | // Count of ones follwing the leading zeros. |
319 | unsigned FO = llvm::countl_one<uint64_t>(Value: Imm << LZ); |
320 | // 2-1) Patterns : {zeros}{31-bit value} |
321 | // {ones}{31-bit value} |
322 | if (isInt<32>(x: Imm)) { |
323 | uint64_t ImmHi16 = (Imm >> 16) & 0xffff; |
324 | unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; |
325 | Register TmpReg = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass); |
326 | if (!BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode), DestReg: TmpReg) |
327 | .addImm(Val: (Imm >> 16) & 0xffff) |
328 | .constrainAllUses(TII, TRI, RBI)) |
329 | return false; |
330 | return BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::ORI8), DestReg: Reg) |
331 | .addReg(RegNo: TmpReg, flags: RegState::Kill) |
332 | .addImm(Val: Imm & 0xffff) |
333 | .constrainAllUses(TII, TRI, RBI); |
334 | } |
335 | // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros} |
336 | // {zeros}{15-bit value}{zeros} |
337 | // {zeros}{ones}{15-bit value} |
338 | // {ones}{15-bit value}{zeros} |
339 | // We can take advantage of LI's sign-extension semantics to generate leading |
340 | // ones, and then use RLDIC to mask off the ones in both sides after rotation. |
341 | if ((LZ + FO + TZ) > 48) { |
342 | Register TmpReg = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass); |
343 | if (!BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::LI8), DestReg: TmpReg) |
344 | .addImm(Val: (Imm >> TZ) & 0xffff) |
345 | .constrainAllUses(TII, TRI, RBI)) |
346 | return false; |
347 | return BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::RLDIC), DestReg: Reg) |
348 | .addReg(RegNo: TmpReg, flags: RegState::Kill) |
349 | .addImm(Val: TZ) |
350 | .addImm(Val: LZ) |
351 | .constrainAllUses(TII, TRI, RBI); |
352 | } |
353 | // 2-3) Pattern : {zeros}{15-bit value}{ones} |
354 | // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value, |
355 | // therefore we can take advantage of LI's sign-extension semantics, and then |
356 | // mask them off after rotation. |
357 | // |
358 | // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+ |
359 | // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1| |
360 | // +------------------------+ +------------------------+ |
361 | // 63 0 63 0 |
362 | // Imm (Imm >> (48 - LZ) & 0xffff) |
363 | // +----sext-----|--16-bit--+ +clear-|-----------------+ |
364 | // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111| |
365 | // +------------------------+ +------------------------+ |
366 | // 63 0 63 0 |
367 | // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ |
368 | if ((LZ + TO) > 48) { |
369 | // Since the immediates with (LZ > 32) have been handled by previous |
370 | // patterns, here we have (LZ <= 32) to make sure we will not shift right |
371 | // the Imm by a negative value. |
372 | assert(LZ <= 32 && "Unexpected shift value." ); |
373 | Register TmpReg = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass); |
374 | if (!BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::LI8), DestReg: TmpReg) |
375 | .addImm(Val: Imm >> (48 - LZ) & 0xffff) |
376 | .constrainAllUses(TII, TRI, RBI)) |
377 | return false; |
378 | return BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::RLDICL), DestReg: Reg) |
379 | .addReg(RegNo: TmpReg, flags: RegState::Kill) |
380 | .addImm(Val: 48 - LZ) |
381 | .addImm(Val: LZ) |
382 | .constrainAllUses(TII, TRI, RBI); |
383 | } |
384 | // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones} |
385 | // {ones}{15-bit value}{ones} |
386 | // We can take advantage of LI's sign-extension semantics to generate leading |
387 | // ones, and then use RLDICL to mask off the ones in left sides (if required) |
388 | // after rotation. |
389 | // |
390 | // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+ |
391 | // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb| |
392 | // +------------------------+ +------------------------+ |
393 | // 63 0 63 0 |
394 | // Imm (Imm >> TO) & 0xffff |
395 | // +----sext-----|--16-bit--+ +LZ|---------------------+ |
396 | // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111| |
397 | // +------------------------+ +------------------------+ |
398 | // 63 0 63 0 |
399 | // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ |
400 | if ((LZ + FO + TO) > 48) { |
401 | Register TmpReg = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass); |
402 | if (!BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::LI8), DestReg: TmpReg) |
403 | .addImm(Val: (Imm >> TO) & 0xffff) |
404 | .constrainAllUses(TII, TRI, RBI)) |
405 | return false; |
406 | return BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::RLDICL), DestReg: Reg) |
407 | .addReg(RegNo: TmpReg, flags: RegState::Kill) |
408 | .addImm(Val: TO) |
409 | .addImm(Val: LZ) |
410 | .constrainAllUses(TII, TRI, RBI); |
411 | } |
412 | // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value} |
413 | // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit |
414 | // value, we can use LI for Lo16 without generating leading ones then add the |
415 | // Hi16(in Lo32). |
416 | if (LZ == 32 && ((Lo32 & 0x8000) == 0)) { |
417 | Register TmpReg = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass); |
418 | if (!BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::LI8), DestReg: TmpReg) |
419 | .addImm(Val: Lo32 & 0xffff) |
420 | .constrainAllUses(TII, TRI, RBI)) |
421 | return false; |
422 | return BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::ORIS8), DestReg: Reg) |
423 | .addReg(RegNo: TmpReg, flags: RegState::Kill) |
424 | .addImm(Val: Lo32 >> 16) |
425 | .constrainAllUses(TII, TRI, RBI); |
426 | } |
427 | // 2-6) Patterns : {******}{49 zeros}{******} |
428 | // {******}{49 ones}{******} |
429 | // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15 |
430 | // bits remain on both sides. Rotate right the Imm to construct an int<16> |
431 | // value, use LI for int<16> value and then use RLDICL without mask to rotate |
432 | // it back. |
433 | // |
434 | // 1) findContiguousZerosAtLeast(Imm, 49) |
435 | // +------|--zeros-|------+ +---ones--||---15 bit--+ |
436 | // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb| |
437 | // +----------------------+ +----------------------+ |
438 | // 63 0 63 0 |
439 | // |
440 | // 2) findContiguousZerosAtLeast(~Imm, 49) |
441 | // +------|--ones--|------+ +---ones--||---15 bit--+ |
442 | // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb| |
443 | // +----------------------+ +----------------------+ |
444 | // 63 0 63 0 |
445 | if ((Shift = findContiguousZerosAtLeast(Imm, Num: 49)) || |
446 | (Shift = findContiguousZerosAtLeast(Imm: ~Imm, Num: 49))) { |
447 | uint64_t RotImm = APInt(64, Imm).rotr(rotateAmt: Shift).getZExtValue(); |
448 | Register TmpReg = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass); |
449 | if (!BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::LI8), DestReg: TmpReg) |
450 | .addImm(Val: RotImm & 0xffff) |
451 | .constrainAllUses(TII, TRI, RBI)) |
452 | return false; |
453 | return BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::RLDICL), DestReg: Reg) |
454 | .addReg(RegNo: TmpReg, flags: RegState::Kill) |
455 | .addImm(Val: Shift) |
456 | .addImm(Val: 0) |
457 | .constrainAllUses(TII, TRI, RBI); |
458 | } |
459 | |
460 | // Following patterns use 3 instructions to materialize the Imm. |
461 | |
462 | // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros} |
463 | // {zeros}{31-bit value}{zeros} |
464 | // {zeros}{ones}{31-bit value} |
465 | // {ones}{31-bit value}{zeros} |
466 | // We can take advantage of LIS's sign-extension semantics to generate leading |
467 | // ones, add the remaining bits with ORI, and then use RLDIC to mask off the |
468 | // ones in both sides after rotation. |
469 | if ((LZ + FO + TZ) > 32) { |
470 | uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff; |
471 | unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; |
472 | Register TmpReg = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass); |
473 | Register Tmp2Reg = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass); |
474 | if (!BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode), DestReg: TmpReg) |
475 | .addImm(Val: ImmHi16) |
476 | .constrainAllUses(TII, TRI, RBI)) |
477 | return false; |
478 | if (!BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::ORI8), DestReg: Tmp2Reg) |
479 | .addReg(RegNo: TmpReg, flags: RegState::Kill) |
480 | .addImm(Val: (Imm >> TZ) & 0xffff) |
481 | .constrainAllUses(TII, TRI, RBI)) |
482 | return false; |
483 | return BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::RLDIC), DestReg: Reg) |
484 | .addReg(RegNo: Tmp2Reg, flags: RegState::Kill) |
485 | .addImm(Val: TZ) |
486 | .addImm(Val: LZ) |
487 | .constrainAllUses(TII, TRI, RBI); |
488 | } |
489 | // 3-2) Pattern : {zeros}{31-bit value}{ones} |
490 | // Shift right the Imm by (32 - LZ) bits to construct a negative 32 bits |
491 | // value, therefore we can take advantage of LIS's sign-extension semantics, |
492 | // add the remaining bits with ORI, and then mask them off after rotation. |
493 | // This is similar to Pattern 2-3, please refer to the diagram there. |
494 | if ((LZ + TO) > 32) { |
495 | // Since the immediates with (LZ > 32) have been handled by previous |
496 | // patterns, here we have (LZ <= 32) to make sure we will not shift right |
497 | // the Imm by a negative value. |
498 | assert(LZ <= 32 && "Unexpected shift value." ); |
499 | Register TmpReg = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass); |
500 | Register Tmp2Reg = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass); |
501 | if (!BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::LIS8), DestReg: TmpReg) |
502 | .addImm(Val: (Imm >> (48 - LZ)) & 0xffff) |
503 | .constrainAllUses(TII, TRI, RBI)) |
504 | return false; |
505 | if (!BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::ORI8), DestReg: Tmp2Reg) |
506 | .addReg(RegNo: TmpReg, flags: RegState::Kill) |
507 | .addImm(Val: (Imm >> (32 - LZ)) & 0xffff) |
508 | .constrainAllUses(TII, TRI, RBI)) |
509 | return false; |
510 | return BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::RLDICL), DestReg: Reg) |
511 | .addReg(RegNo: Tmp2Reg, flags: RegState::Kill) |
512 | .addImm(Val: 32 - LZ) |
513 | .addImm(Val: LZ) |
514 | .constrainAllUses(TII, TRI, RBI); |
515 | } |
516 | // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones} |
517 | // {ones}{31-bit value}{ones} |
518 | // We can take advantage of LIS's sign-extension semantics to generate leading |
519 | // ones, add the remaining bits with ORI, and then use RLDICL to mask off the |
520 | // ones in left sides (if required) after rotation. |
521 | // This is similar to Pattern 2-4, please refer to the diagram there. |
522 | if ((LZ + FO + TO) > 32) { |
523 | Register TmpReg = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass); |
524 | Register Tmp2Reg = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass); |
525 | if (!BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::LIS8), DestReg: TmpReg) |
526 | .addImm(Val: (Imm >> (TO + 16)) & 0xffff) |
527 | .constrainAllUses(TII, TRI, RBI)) |
528 | return false; |
529 | if (!BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::ORI8), DestReg: Tmp2Reg) |
530 | .addReg(RegNo: TmpReg, flags: RegState::Kill) |
531 | .addImm(Val: (Imm >> TO) & 0xffff) |
532 | .constrainAllUses(TII, TRI, RBI)) |
533 | return false; |
534 | return BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::RLDICL), DestReg: Reg) |
535 | .addReg(RegNo: Tmp2Reg, flags: RegState::Kill) |
536 | .addImm(Val: TO) |
537 | .addImm(Val: LZ) |
538 | .constrainAllUses(TII, TRI, RBI); |
539 | } |
540 | // 3-4) Patterns : High word == Low word |
541 | if (Hi32 == Lo32) { |
542 | // Handle the first 32 bits. |
543 | uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff; |
544 | unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; |
545 | Register TmpReg = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass); |
546 | Register Tmp2Reg = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass); |
547 | if (!BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode), DestReg: TmpReg) |
548 | .addImm(Val: ImmHi16) |
549 | .constrainAllUses(TII, TRI, RBI)) |
550 | return false; |
551 | if (!BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::ORI8), DestReg: Tmp2Reg) |
552 | .addReg(RegNo: TmpReg, flags: RegState::Kill) |
553 | .addImm(Val: Lo32 & 0xffff) |
554 | .constrainAllUses(TII, TRI, RBI)) |
555 | return false; |
556 | return BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::RLDIMI), DestReg: Reg) |
557 | .addReg(RegNo: Tmp2Reg) |
558 | .addReg(RegNo: Tmp2Reg, flags: RegState::Kill) |
559 | .addImm(Val: 32) |
560 | .addImm(Val: 0) |
561 | .constrainAllUses(TII, TRI, RBI); |
562 | } |
563 | // 3-5) Patterns : {******}{33 zeros}{******} |
564 | // {******}{33 ones}{******} |
565 | // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31 |
566 | // bits remain on both sides. Rotate right the Imm to construct an int<32> |
567 | // value, use LIS + ORI for int<32> value and then use RLDICL without mask to |
568 | // rotate it back. |
569 | // This is similar to Pattern 2-6, please refer to the diagram there. |
570 | if ((Shift = findContiguousZerosAtLeast(Imm, Num: 33)) || |
571 | (Shift = findContiguousZerosAtLeast(Imm: ~Imm, Num: 33))) { |
572 | uint64_t RotImm = APInt(64, Imm).rotr(rotateAmt: Shift).getZExtValue(); |
573 | uint64_t ImmHi16 = (RotImm >> 16) & 0xffff; |
574 | unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; |
575 | Register TmpReg = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass); |
576 | Register Tmp2Reg = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass); |
577 | if (!BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode), DestReg: TmpReg) |
578 | .addImm(Val: ImmHi16) |
579 | .constrainAllUses(TII, TRI, RBI)) |
580 | return false; |
581 | if (!BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::ORI8), DestReg: Tmp2Reg) |
582 | .addReg(RegNo: TmpReg, flags: RegState::Kill) |
583 | .addImm(Val: RotImm & 0xffff) |
584 | .constrainAllUses(TII, TRI, RBI)) |
585 | return false; |
586 | return BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::RLDICL), DestReg: Reg) |
587 | .addReg(RegNo: Tmp2Reg, flags: RegState::Kill) |
588 | .addImm(Val: Shift) |
589 | .addImm(Val: 0) |
590 | .constrainAllUses(TII, TRI, RBI); |
591 | } |
592 | |
593 | // If we end up here then no instructions were inserted. |
594 | return std::nullopt; |
595 | } |
596 | |
597 | // Derived from PPCISelDAGToDAG::selectI64Imm(). |
598 | // TODO: Add support for prefixed instructions. |
599 | bool PPCInstructionSelector::selectI64Imm(MachineInstr &I, |
600 | MachineBasicBlock &MBB, |
601 | MachineRegisterInfo &MRI) const { |
602 | assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Unexpected G code" ); |
603 | |
604 | Register DstReg = I.getOperand(i: 0).getReg(); |
605 | int64_t Imm = I.getOperand(i: 1).getCImm()->getValue().getZExtValue(); |
606 | // No more than 3 instructions are used if we can select the i64 immediate |
607 | // directly. |
608 | if (std::optional<bool> Res = selectI64ImmDirect(I, MBB, MRI, Reg: DstReg, Imm)) { |
609 | I.eraseFromParent(); |
610 | return *Res; |
611 | } |
612 | |
613 | // Calculate the last bits as required. |
614 | uint32_t Hi16 = (Lo_32(Value: Imm) >> 16) & 0xffff; |
615 | uint32_t Lo16 = Lo_32(Value: Imm) & 0xffff; |
616 | |
617 | Register Reg = |
618 | (Hi16 || Lo16) ? MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass) : DstReg; |
619 | |
620 | // Handle the upper 32 bit value. |
621 | std::optional<bool> Res = |
622 | selectI64ImmDirect(I, MBB, MRI, Reg, Imm: Imm & 0xffffffff00000000); |
623 | if (!Res || !*Res) |
624 | return false; |
625 | |
626 | // Add in the last bits as required. |
627 | if (Hi16) { |
628 | Register TmpReg = |
629 | Lo16 ? MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass) : DstReg; |
630 | if (!BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::ORIS8), DestReg: TmpReg) |
631 | .addReg(RegNo: Reg, flags: RegState::Kill) |
632 | .addImm(Val: Hi16) |
633 | .constrainAllUses(TII, TRI, RBI)) |
634 | return false; |
635 | Reg = TmpReg; |
636 | } |
637 | if (Lo16) { |
638 | if (!BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: PPC::ORI8), DestReg: DstReg) |
639 | .addReg(RegNo: Reg, flags: RegState::Kill) |
640 | .addImm(Val: Lo16) |
641 | .constrainAllUses(TII, TRI, RBI)) |
642 | return false; |
643 | } |
644 | I.eraseFromParent(); |
645 | return true; |
646 | } |
647 | |
648 | bool PPCInstructionSelector::selectConstantPool( |
649 | MachineInstr &I, MachineBasicBlock &MBB, MachineRegisterInfo &MRI) const { |
650 | const DebugLoc &DbgLoc = I.getDebugLoc(); |
651 | MachineFunction *MF = MBB.getParent(); |
652 | |
653 | // TODO: handle 32-bit. |
654 | // TODO: Enabling floating point constant pool selection on AIX requires |
655 | // global isel on big endian target enabled first. |
656 | // See CallLowering::enableBigEndian(). |
657 | if (!STI.isPPC64() || !STI.isLittleEndian()) |
658 | return false; |
659 | |
660 | MF->getInfo<PPCFunctionInfo>()->setUsesTOCBasePtr(); |
661 | |
662 | const Register DstReg = I.getOperand(i: 0).getReg(); |
663 | unsigned CPI = I.getOperand(i: 1).getIndex(); |
664 | |
665 | // Address stored in the TOC entry. This is related to code model and the ABI |
666 | // we are currently using. For now we only handle 64-bit Linux LE. PowerPC |
667 | // only supports small, medium and large code model. |
668 | const CodeModel::Model CModel = TM.getCodeModel(); |
669 | assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) && |
670 | "PowerPC doesn't support tiny or kernel code models." ); |
671 | |
672 | const MCRegister TOCReg = STI.getTOCPointerRegister(); |
673 | MachineMemOperand *MMO = MF->getMachineMemOperand( |
674 | PtrInfo: MachinePointerInfo::getGOT(MF&: *MF), f: MachineMemOperand::MOLoad, |
675 | MemTy: MRI.getType(Reg: DstReg), base_alignment: MF->getDataLayout().getPointerABIAlignment(AS: 0)); |
676 | |
677 | MachineInstr *MI = nullptr; |
678 | // For now we only handle 64-bit Linux. |
679 | if (CModel == CodeModel::Small) { |
680 | // For small code model, generate LDtocCPT(CPI, X2). |
681 | MI = BuildMI(BB&: MBB, I, MIMD: DbgLoc, MCID: TII.get(Opcode: PPC::LDtocCPT), DestReg: DstReg) |
682 | .addConstantPoolIndex(Idx: CPI) |
683 | .addReg(RegNo: TOCReg) |
684 | .addMemOperand(MMO); |
685 | } else { |
686 | Register HaAddrReg = MRI.createVirtualRegister(RegClass: &PPC::G8RCRegClass); |
687 | BuildMI(BB&: MBB, I, MIMD: DbgLoc, MCID: TII.get(Opcode: PPC::ADDIStocHA8), DestReg: HaAddrReg) |
688 | .addReg(RegNo: TOCReg) |
689 | .addConstantPoolIndex(Idx: CPI); |
690 | |
691 | if (CModel == CodeModel::Large) |
692 | // For large code model, generate LDtocL(CPI, ADDIStocHA8(X2, CPI)) |
693 | MI = BuildMI(BB&: MBB, I, MIMD: DbgLoc, MCID: TII.get(Opcode: PPC::LDtocL), DestReg: DstReg) |
694 | .addConstantPoolIndex(Idx: CPI) |
695 | .addReg(RegNo: HaAddrReg) |
696 | .addMemOperand(MMO); |
697 | else |
698 | // For medium code model, generate ADDItocL8(CPI, ADDIStocHA8(X2, CPI)) |
699 | MI = BuildMI(BB&: MBB, I, MIMD: DbgLoc, MCID: TII.get(Opcode: PPC::ADDItocL8), DestReg: DstReg) |
700 | .addReg(RegNo: HaAddrReg) |
701 | .addConstantPoolIndex(Idx: CPI); |
702 | } |
703 | |
704 | I.eraseFromParent(); |
705 | return constrainSelectedInstRegOperands(I&: *MI, TII, TRI, RBI); |
706 | } |
707 | |
708 | bool PPCInstructionSelector::select(MachineInstr &I) { |
709 | auto &MBB = *I.getParent(); |
710 | auto &MF = *MBB.getParent(); |
711 | auto &MRI = MF.getRegInfo(); |
712 | |
713 | if (!isPreISelGenericOpcode(Opcode: I.getOpcode())) { |
714 | if (I.isCopy()) |
715 | return selectCopy(I, TII, MRI, TRI, RBI); |
716 | |
717 | return true; |
718 | } |
719 | |
720 | if (selectImpl(I, CoverageInfo&: *CoverageInfo)) |
721 | return true; |
722 | |
723 | unsigned Opcode = I.getOpcode(); |
724 | |
725 | switch (Opcode) { |
726 | default: |
727 | return false; |
728 | case TargetOpcode::G_LOAD: |
729 | case TargetOpcode::G_STORE: { |
730 | GLoadStore &LdSt = cast<GLoadStore>(Val&: I); |
731 | LLT PtrTy = MRI.getType(Reg: LdSt.getPointerReg()); |
732 | |
733 | if (PtrTy != LLT::pointer(AddressSpace: 0, SizeInBits: 64)) { |
734 | LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy |
735 | << ", expected: " << LLT::pointer(0, 64) << '\n'); |
736 | return false; |
737 | } |
738 | |
739 | auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * { |
740 | const unsigned NewOpc = selectLoadStoreOp( |
741 | GenericOpc: I.getOpcode(), RegBankID: RBI.getRegBank(Reg: LdSt.getReg(Idx: 0), MRI, TRI)->getID(), |
742 | OpSize: LdSt.getMemSizeInBits().getValue()); |
743 | |
744 | if (NewOpc == I.getOpcode()) |
745 | return nullptr; |
746 | |
747 | // For now, simply use DForm with load/store addr as base and 0 as imm. |
748 | // FIXME: optimize load/store with some specific address patterns. |
749 | I.setDesc(TII.get(Opcode: NewOpc)); |
750 | Register AddrReg = I.getOperand(i: 1).getReg(); |
751 | bool IsKill = I.getOperand(i: 1).isKill(); |
752 | I.getOperand(i: 1).ChangeToImmediate(ImmVal: 0); |
753 | I.addOperand(MF&: *I.getParent()->getParent(), |
754 | Op: MachineOperand::CreateReg(Reg: AddrReg, /* isDef */ false, |
755 | /* isImp */ false, isKill: IsKill)); |
756 | return &I; |
757 | }; |
758 | |
759 | MachineInstr *LoadStore = SelectLoadStoreAddressingMode(); |
760 | if (!LoadStore) |
761 | return false; |
762 | |
763 | return constrainSelectedInstRegOperands(I&: *LoadStore, TII, TRI, RBI); |
764 | } |
765 | case TargetOpcode::G_SITOFP: |
766 | case TargetOpcode::G_UITOFP: |
767 | return selectIntToFP(I, MBB, MRI); |
768 | case TargetOpcode::G_FPTOSI: |
769 | case TargetOpcode::G_FPTOUI: |
770 | return selectFPToInt(I, MBB, MRI); |
771 | // G_SEXT will be selected in tb-gen pattern. |
772 | case TargetOpcode::G_ZEXT: |
773 | return selectZExt(I, MBB, MRI); |
774 | case TargetOpcode::G_CONSTANT: |
775 | return selectI64Imm(I, MBB, MRI); |
776 | case TargetOpcode::G_CONSTANT_POOL: |
777 | return selectConstantPool(I, MBB, MRI); |
778 | } |
779 | return false; |
780 | } |
781 | |
782 | namespace llvm { |
783 | InstructionSelector * |
784 | createPPCInstructionSelector(const PPCTargetMachine &TM, |
785 | const PPCSubtarget &Subtarget, |
786 | const PPCRegisterBankInfo &RBI) { |
787 | return new PPCInstructionSelector(TM, Subtarget, RBI); |
788 | } |
789 | } // end namespace llvm |
790 | |