1 | //===- X86InstructionSelector.cpp -----------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This file implements the targeting of the InstructionSelector class for |
10 | /// X86. |
11 | /// \todo This should be generated by TableGen. |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "MCTargetDesc/X86BaseInfo.h" |
15 | #include "X86.h" |
16 | #include "X86InstrBuilder.h" |
17 | #include "X86InstrInfo.h" |
18 | #include "X86RegisterBankInfo.h" |
19 | #include "X86RegisterInfo.h" |
20 | #include "X86Subtarget.h" |
21 | #include "X86TargetMachine.h" |
22 | #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" |
23 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
24 | #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" |
25 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
26 | #include "llvm/CodeGen/MachineBasicBlock.h" |
27 | #include "llvm/CodeGen/MachineConstantPool.h" |
28 | #include "llvm/CodeGen/MachineFunction.h" |
29 | #include "llvm/CodeGen/MachineInstr.h" |
30 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
31 | #include "llvm/CodeGen/MachineMemOperand.h" |
32 | #include "llvm/CodeGen/MachineOperand.h" |
33 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
34 | #include "llvm/CodeGen/RegisterBank.h" |
35 | #include "llvm/CodeGen/TargetOpcodes.h" |
36 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
37 | #include "llvm/CodeGenTypes/LowLevelType.h" |
38 | #include "llvm/IR/DataLayout.h" |
39 | #include "llvm/IR/InstrTypes.h" |
40 | #include "llvm/IR/IntrinsicsX86.h" |
41 | #include "llvm/Support/CodeGen.h" |
42 | #include "llvm/Support/Debug.h" |
43 | #include "llvm/Support/ErrorHandling.h" |
44 | #include "llvm/Support/MathExtras.h" |
45 | #include "llvm/Support/raw_ostream.h" |
46 | #include <cassert> |
47 | #include <cstdint> |
48 | #include <tuple> |
49 | |
50 | #define DEBUG_TYPE "X86-isel" |
51 | |
52 | using namespace llvm; |
53 | |
54 | namespace { |
55 | |
56 | #define GET_GLOBALISEL_PREDICATE_BITSET |
57 | #include "X86GenGlobalISel.inc" |
58 | #undef GET_GLOBALISEL_PREDICATE_BITSET |
59 | |
60 | class X86InstructionSelector : public InstructionSelector { |
61 | public: |
62 | X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI, |
63 | const X86RegisterBankInfo &RBI); |
64 | |
65 | bool select(MachineInstr &I) override; |
66 | static const char *getName() { return DEBUG_TYPE; } |
67 | |
68 | private: |
69 | /// tblgen-erated 'select' implementation, used as the initial selector for |
70 | /// the patterns that don't require complex C++. |
71 | bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; |
72 | |
73 | // TODO: remove after supported by Tablegen-erated instruction selection. |
74 | unsigned getLoadStoreOp(const LLT &Ty, const RegisterBank &RB, unsigned Opc, |
75 | Align Alignment) const; |
76 | // TODO: remove once p0<->i32/i64 matching is available |
77 | unsigned getPtrLoadStoreOp(const LLT &Ty, const RegisterBank &RB, |
78 | unsigned Opc) const; |
79 | |
80 | bool selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI, |
81 | MachineFunction &MF) const; |
82 | bool selectFrameIndexOrGep(MachineInstr &I, MachineRegisterInfo &MRI, |
83 | MachineFunction &MF) const; |
84 | bool selectGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI, |
85 | MachineFunction &MF) const; |
86 | bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI, |
87 | MachineFunction &MF) const; |
88 | bool selectTruncOrPtrToInt(MachineInstr &I, MachineRegisterInfo &MRI, |
89 | MachineFunction &MF) const; |
90 | bool selectZext(MachineInstr &I, MachineRegisterInfo &MRI, |
91 | MachineFunction &MF) const; |
92 | bool selectAnyext(MachineInstr &I, MachineRegisterInfo &MRI, |
93 | MachineFunction &MF) const; |
94 | bool selectCmp(MachineInstr &I, MachineRegisterInfo &MRI, |
95 | MachineFunction &MF) const; |
96 | bool selectFCmp(MachineInstr &I, MachineRegisterInfo &MRI, |
97 | MachineFunction &MF) const; |
98 | bool selectUAddSub(MachineInstr &I, MachineRegisterInfo &MRI, |
99 | MachineFunction &MF) const; |
100 | bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI) const; |
101 | bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const; |
102 | bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI, |
103 | MachineFunction &MF); |
104 | bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI, |
105 | MachineFunction &MF); |
106 | bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI, |
107 | MachineFunction &MF) const; |
108 | bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI, |
109 | MachineFunction &MF) const; |
110 | bool selectCondBranch(MachineInstr &I, MachineRegisterInfo &MRI, |
111 | MachineFunction &MF) const; |
112 | bool selectTurnIntoCOPY(MachineInstr &I, MachineRegisterInfo &MRI, |
113 | const Register DstReg, |
114 | const TargetRegisterClass *DstRC, |
115 | const Register SrcReg, |
116 | const TargetRegisterClass *SrcRC) const; |
117 | bool materializeFP(MachineInstr &I, MachineRegisterInfo &MRI, |
118 | MachineFunction &MF) const; |
119 | bool selectImplicitDefOrPHI(MachineInstr &I, MachineRegisterInfo &MRI) const; |
120 | bool selectMulDivRem(MachineInstr &I, MachineRegisterInfo &MRI, |
121 | MachineFunction &MF) const; |
122 | bool selectSelect(MachineInstr &I, MachineRegisterInfo &MRI, |
123 | MachineFunction &MF) const; |
124 | |
125 | ComplexRendererFns selectAddr(MachineOperand &Root) const; |
126 | |
127 | // emit insert subreg instruction and insert it before MachineInstr &I |
128 | bool emitInsertSubreg(Register DstReg, Register SrcReg, MachineInstr &I, |
129 | MachineRegisterInfo &MRI, MachineFunction &MF) const; |
130 | // emit extract subreg instruction and insert it before MachineInstr &I |
131 | bool emitExtractSubreg(Register DstReg, Register SrcReg, MachineInstr &I, |
132 | MachineRegisterInfo &MRI, MachineFunction &MF) const; |
133 | |
134 | const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank &RB) const; |
135 | const TargetRegisterClass *getRegClass(LLT Ty, Register Reg, |
136 | MachineRegisterInfo &MRI) const; |
137 | |
138 | const X86TargetMachine &TM; |
139 | const X86Subtarget &STI; |
140 | const X86InstrInfo &TII; |
141 | const X86RegisterInfo &TRI; |
142 | const X86RegisterBankInfo &RBI; |
143 | |
144 | #define GET_GLOBALISEL_PREDICATES_DECL |
145 | #include "X86GenGlobalISel.inc" |
146 | #undef GET_GLOBALISEL_PREDICATES_DECL |
147 | |
148 | #define GET_GLOBALISEL_TEMPORARIES_DECL |
149 | #include "X86GenGlobalISel.inc" |
150 | #undef GET_GLOBALISEL_TEMPORARIES_DECL |
151 | }; |
152 | |
153 | } // end anonymous namespace |
154 | |
155 | #define GET_GLOBALISEL_IMPL |
156 | #include "X86GenGlobalISel.inc" |
157 | #undef GET_GLOBALISEL_IMPL |
158 | |
159 | X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM, |
160 | const X86Subtarget &STI, |
161 | const X86RegisterBankInfo &RBI) |
162 | : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), |
163 | RBI(RBI), |
164 | #define GET_GLOBALISEL_PREDICATES_INIT |
165 | #include "X86GenGlobalISel.inc" |
166 | #undef GET_GLOBALISEL_PREDICATES_INIT |
167 | #define GET_GLOBALISEL_TEMPORARIES_INIT |
168 | #include "X86GenGlobalISel.inc" |
169 | #undef GET_GLOBALISEL_TEMPORARIES_INIT |
170 | { |
171 | } |
172 | |
173 | // FIXME: This should be target-independent, inferred from the types declared |
174 | // for each class in the bank. |
175 | const TargetRegisterClass * |
176 | X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const { |
177 | if (RB.getID() == X86::GPRRegBankID) { |
178 | if (Ty.getSizeInBits() <= 8) |
179 | return &X86::GR8RegClass; |
180 | if (Ty.getSizeInBits() == 16) |
181 | return &X86::GR16RegClass; |
182 | if (Ty.getSizeInBits() == 32) |
183 | return &X86::GR32RegClass; |
184 | if (Ty.getSizeInBits() == 64) |
185 | return &X86::GR64RegClass; |
186 | } |
187 | if (RB.getID() == X86::VECRRegBankID) { |
188 | if (Ty.getSizeInBits() == 16) |
189 | return STI.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass; |
190 | if (Ty.getSizeInBits() == 32) |
191 | return STI.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass; |
192 | if (Ty.getSizeInBits() == 64) |
193 | return STI.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass; |
194 | if (Ty.getSizeInBits() == 128) |
195 | return STI.hasAVX512() ? &X86::VR128XRegClass : &X86::VR128RegClass; |
196 | if (Ty.getSizeInBits() == 256) |
197 | return STI.hasAVX512() ? &X86::VR256XRegClass : &X86::VR256RegClass; |
198 | if (Ty.getSizeInBits() == 512) |
199 | return &X86::VR512RegClass; |
200 | } |
201 | |
202 | if (RB.getID() == X86::PSRRegBankID) { |
203 | if (Ty.getSizeInBits() == 80) |
204 | return &X86::RFP80RegClass; |
205 | if (Ty.getSizeInBits() == 64) |
206 | return &X86::RFP64RegClass; |
207 | if (Ty.getSizeInBits() == 32) |
208 | return &X86::RFP32RegClass; |
209 | } |
210 | |
211 | llvm_unreachable("Unknown RegBank!" ); |
212 | } |
213 | |
214 | const TargetRegisterClass * |
215 | X86InstructionSelector::getRegClass(LLT Ty, Register Reg, |
216 | MachineRegisterInfo &MRI) const { |
217 | const RegisterBank &RegBank = *RBI.getRegBank(Reg, MRI, TRI); |
218 | return getRegClass(Ty, RB: RegBank); |
219 | } |
220 | |
221 | static unsigned getSubRegIndex(const TargetRegisterClass *RC) { |
222 | unsigned SubIdx = X86::NoSubRegister; |
223 | if (RC == &X86::GR32RegClass) { |
224 | SubIdx = X86::sub_32bit; |
225 | } else if (RC == &X86::GR16RegClass) { |
226 | SubIdx = X86::sub_16bit; |
227 | } else if (RC == &X86::GR8RegClass) { |
228 | SubIdx = X86::sub_8bit; |
229 | } |
230 | |
231 | return SubIdx; |
232 | } |
233 | |
234 | static const TargetRegisterClass *getRegClassFromGRPhysReg(Register Reg) { |
235 | assert(Reg.isPhysical()); |
236 | if (X86::GR64RegClass.contains(Reg)) |
237 | return &X86::GR64RegClass; |
238 | if (X86::GR32RegClass.contains(Reg)) |
239 | return &X86::GR32RegClass; |
240 | if (X86::GR16RegClass.contains(Reg)) |
241 | return &X86::GR16RegClass; |
242 | if (X86::GR8RegClass.contains(Reg)) |
243 | return &X86::GR8RegClass; |
244 | |
245 | llvm_unreachable("Unknown RegClass for PhysReg!" ); |
246 | } |
247 | |
248 | // FIXME: We need some sort of API in RBI/TRI to allow generic code to |
249 | // constrain operands of simple instructions given a TargetRegisterClass |
250 | // and LLT |
251 | bool X86InstructionSelector::selectDebugInstr(MachineInstr &I, |
252 | MachineRegisterInfo &MRI) const { |
253 | for (MachineOperand &MO : I.operands()) { |
254 | if (!MO.isReg()) |
255 | continue; |
256 | Register Reg = MO.getReg(); |
257 | if (!Reg) |
258 | continue; |
259 | if (Reg.isPhysical()) |
260 | continue; |
261 | LLT Ty = MRI.getType(Reg); |
262 | const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); |
263 | const TargetRegisterClass *RC = |
264 | dyn_cast_if_present<const TargetRegisterClass *>(Val: RegClassOrBank); |
265 | if (!RC) { |
266 | const RegisterBank &RB = *cast<const RegisterBank *>(Val: RegClassOrBank); |
267 | RC = getRegClass(Ty, RB); |
268 | if (!RC) { |
269 | LLVM_DEBUG( |
270 | dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n" ); |
271 | break; |
272 | } |
273 | } |
274 | RBI.constrainGenericRegister(Reg, RC: *RC, MRI); |
275 | } |
276 | |
277 | return true; |
278 | } |
279 | |
280 | // Set X86 Opcode and constrain DestReg. |
281 | bool X86InstructionSelector::selectCopy(MachineInstr &I, |
282 | MachineRegisterInfo &MRI) const { |
283 | Register DstReg = I.getOperand(i: 0).getReg(); |
284 | const unsigned DstSize = RBI.getSizeInBits(Reg: DstReg, MRI, TRI); |
285 | const RegisterBank &DstRegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
286 | |
287 | Register SrcReg = I.getOperand(i: 1).getReg(); |
288 | const unsigned SrcSize = RBI.getSizeInBits(Reg: SrcReg, MRI, TRI); |
289 | const RegisterBank &SrcRegBank = *RBI.getRegBank(Reg: SrcReg, MRI, TRI); |
290 | |
291 | if (DstReg.isPhysical()) { |
292 | assert(I.isCopy() && "Generic operators do not allow physical registers" ); |
293 | |
294 | if (DstSize > SrcSize && SrcRegBank.getID() == X86::GPRRegBankID && |
295 | DstRegBank.getID() == X86::GPRRegBankID) { |
296 | |
297 | const TargetRegisterClass *SrcRC = |
298 | getRegClass(Ty: MRI.getType(Reg: SrcReg), RB: SrcRegBank); |
299 | const TargetRegisterClass *DstRC = getRegClassFromGRPhysReg(Reg: DstReg); |
300 | |
301 | if (SrcRC != DstRC) { |
302 | // This case can be generated by ABI lowering, performe anyext |
303 | Register ExtSrc = MRI.createVirtualRegister(RegClass: DstRC); |
304 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), |
305 | MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG)) |
306 | .addDef(RegNo: ExtSrc) |
307 | .addImm(Val: 0) |
308 | .addReg(RegNo: SrcReg) |
309 | .addImm(Val: getSubRegIndex(RC: SrcRC)); |
310 | |
311 | I.getOperand(i: 1).setReg(ExtSrc); |
312 | } |
313 | } |
314 | |
315 | return true; |
316 | } |
317 | |
318 | assert((!SrcReg.isPhysical() || I.isCopy()) && |
319 | "No phys reg on generic operators" ); |
320 | assert((DstSize == SrcSize || |
321 | // Copies are a mean to setup initial types, the number of |
322 | // bits may not exactly match. |
323 | (SrcReg.isPhysical() && |
324 | DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI))) && |
325 | "Copy with different width?!" ); |
326 | |
327 | const TargetRegisterClass *DstRC = |
328 | getRegClass(Ty: MRI.getType(Reg: DstReg), RB: DstRegBank); |
329 | |
330 | if (SrcRegBank.getID() == X86::GPRRegBankID && |
331 | DstRegBank.getID() == X86::GPRRegBankID && SrcSize > DstSize && |
332 | SrcReg.isPhysical()) { |
333 | // Change the physical register to performe truncate. |
334 | |
335 | const TargetRegisterClass *SrcRC = getRegClassFromGRPhysReg(Reg: SrcReg); |
336 | |
337 | if (DstRC != SrcRC) { |
338 | I.getOperand(i: 1).setSubReg(getSubRegIndex(RC: DstRC)); |
339 | I.getOperand(i: 1).substPhysReg(Reg: SrcReg, TRI); |
340 | } |
341 | } |
342 | |
343 | // No need to constrain SrcReg. It will get constrained when |
344 | // we hit another of its use or its defs. |
345 | // Copies do not have constraints. |
346 | const TargetRegisterClass *OldRC = MRI.getRegClassOrNull(Reg: DstReg); |
347 | if (!OldRC || !DstRC->hasSubClassEq(RC: OldRC)) { |
348 | if (!RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) { |
349 | LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) |
350 | << " operand\n" ); |
351 | return false; |
352 | } |
353 | } |
354 | I.setDesc(TII.get(Opcode: X86::COPY)); |
355 | return true; |
356 | } |
357 | |
358 | bool X86InstructionSelector::select(MachineInstr &I) { |
359 | assert(I.getParent() && "Instruction should be in a basic block!" ); |
360 | assert(I.getParent()->getParent() && "Instruction should be in a function!" ); |
361 | |
362 | MachineBasicBlock &MBB = *I.getParent(); |
363 | MachineFunction &MF = *MBB.getParent(); |
364 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
365 | |
366 | unsigned Opcode = I.getOpcode(); |
367 | if (!isPreISelGenericOpcode(Opcode) && !I.isPreISelOpcode()) { |
368 | // Certain non-generic instructions also need some special handling. |
369 | |
370 | if (Opcode == TargetOpcode::LOAD_STACK_GUARD) |
371 | return false; |
372 | |
373 | if (I.isCopy()) |
374 | return selectCopy(I, MRI); |
375 | |
376 | if (I.isDebugInstr()) |
377 | return selectDebugInstr(I, MRI); |
378 | |
379 | return true; |
380 | } |
381 | |
382 | assert(I.getNumOperands() == I.getNumExplicitOperands() && |
383 | "Generic instruction has unexpected implicit operands\n" ); |
384 | |
385 | if (selectImpl(I, CoverageInfo&: *CoverageInfo)) |
386 | return true; |
387 | |
388 | LLVM_DEBUG(dbgs() << " C++ instruction selection: " ; I.print(dbgs())); |
389 | |
390 | // TODO: This should be implemented by tblgen. |
391 | switch (I.getOpcode()) { |
392 | default: |
393 | return false; |
394 | case TargetOpcode::G_STORE: |
395 | case TargetOpcode::G_LOAD: |
396 | return selectLoadStoreOp(I, MRI, MF); |
397 | case TargetOpcode::G_PTR_ADD: |
398 | case TargetOpcode::G_FRAME_INDEX: |
399 | return selectFrameIndexOrGep(I, MRI, MF); |
400 | case TargetOpcode::G_GLOBAL_VALUE: |
401 | return selectGlobalValue(I, MRI, MF); |
402 | case TargetOpcode::G_CONSTANT: |
403 | return selectConstant(I, MRI, MF); |
404 | case TargetOpcode::G_FCONSTANT: |
405 | return materializeFP(I, MRI, MF); |
406 | case TargetOpcode::G_PTRTOINT: |
407 | case TargetOpcode::G_TRUNC: |
408 | return selectTruncOrPtrToInt(I, MRI, MF); |
409 | case TargetOpcode::G_INTTOPTR: |
410 | return selectCopy(I, MRI); |
411 | case TargetOpcode::G_ZEXT: |
412 | return selectZext(I, MRI, MF); |
413 | case TargetOpcode::G_ANYEXT: |
414 | return selectAnyext(I, MRI, MF); |
415 | case TargetOpcode::G_ICMP: |
416 | return selectCmp(I, MRI, MF); |
417 | case TargetOpcode::G_FCMP: |
418 | return selectFCmp(I, MRI, MF); |
419 | case TargetOpcode::G_UADDE: |
420 | case TargetOpcode::G_UADDO: |
421 | case TargetOpcode::G_USUBE: |
422 | case TargetOpcode::G_USUBO: |
423 | return selectUAddSub(I, MRI, MF); |
424 | case TargetOpcode::G_UNMERGE_VALUES: |
425 | return selectUnmergeValues(I, MRI, MF); |
426 | case TargetOpcode::G_MERGE_VALUES: |
427 | case TargetOpcode::G_CONCAT_VECTORS: |
428 | return selectMergeValues(I, MRI, MF); |
429 | case TargetOpcode::G_EXTRACT: |
430 | return selectExtract(I, MRI, MF); |
431 | case TargetOpcode::G_INSERT: |
432 | return selectInsert(I, MRI, MF); |
433 | case TargetOpcode::G_BRCOND: |
434 | return selectCondBranch(I, MRI, MF); |
435 | case TargetOpcode::G_IMPLICIT_DEF: |
436 | case TargetOpcode::G_PHI: |
437 | return selectImplicitDefOrPHI(I, MRI); |
438 | case TargetOpcode::G_MUL: |
439 | case TargetOpcode::G_SMULH: |
440 | case TargetOpcode::G_UMULH: |
441 | case TargetOpcode::G_SDIV: |
442 | case TargetOpcode::G_UDIV: |
443 | case TargetOpcode::G_SREM: |
444 | case TargetOpcode::G_UREM: |
445 | return selectMulDivRem(I, MRI, MF); |
446 | case TargetOpcode::G_SELECT: |
447 | return selectSelect(I, MRI, MF); |
448 | } |
449 | |
450 | return false; |
451 | } |
452 | |
453 | unsigned X86InstructionSelector::getPtrLoadStoreOp(const LLT &Ty, |
454 | const RegisterBank &RB, |
455 | unsigned Opc) const { |
456 | assert((Opc == TargetOpcode::G_STORE || Opc == TargetOpcode::G_LOAD) && |
457 | "Only G_STORE and G_LOAD are expected for selection" ); |
458 | if (Ty.isPointer() && X86::GPRRegBankID == RB.getID()) { |
459 | bool IsLoad = (Opc == TargetOpcode::G_LOAD); |
460 | switch (Ty.getSizeInBits()) { |
461 | default: |
462 | break; |
463 | case 32: |
464 | return IsLoad ? X86::MOV32rm : X86::MOV32mr; |
465 | case 64: |
466 | return IsLoad ? X86::MOV64rm : X86::MOV64mr; |
467 | } |
468 | } |
469 | return Opc; |
470 | } |
471 | |
472 | unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty, |
473 | const RegisterBank &RB, |
474 | unsigned Opc, |
475 | Align Alignment) const { |
476 | bool Isload = (Opc == TargetOpcode::G_LOAD); |
477 | bool HasAVX = STI.hasAVX(); |
478 | bool HasAVX512 = STI.hasAVX512(); |
479 | bool HasVLX = STI.hasVLX(); |
480 | |
481 | if (Ty == LLT::scalar(SizeInBits: 8)) { |
482 | if (X86::GPRRegBankID == RB.getID()) |
483 | return Isload ? X86::MOV8rm : X86::MOV8mr; |
484 | } else if (Ty == LLT::scalar(SizeInBits: 16)) { |
485 | if (X86::GPRRegBankID == RB.getID()) |
486 | return Isload ? X86::MOV16rm : X86::MOV16mr; |
487 | } else if (Ty == LLT::scalar(SizeInBits: 32)) { |
488 | if (X86::GPRRegBankID == RB.getID()) |
489 | return Isload ? X86::MOV32rm : X86::MOV32mr; |
490 | if (X86::VECRRegBankID == RB.getID()) |
491 | return Isload ? (HasAVX512 ? X86::VMOVSSZrm_alt : |
492 | HasAVX ? X86::VMOVSSrm_alt : |
493 | X86::MOVSSrm_alt) |
494 | : (HasAVX512 ? X86::VMOVSSZmr : |
495 | HasAVX ? X86::VMOVSSmr : |
496 | X86::MOVSSmr); |
497 | if (X86::PSRRegBankID == RB.getID()) |
498 | return Isload ? X86::LD_Fp32m : X86::ST_Fp32m; |
499 | } else if (Ty == LLT::scalar(SizeInBits: 64)) { |
500 | if (X86::GPRRegBankID == RB.getID()) |
501 | return Isload ? X86::MOV64rm : X86::MOV64mr; |
502 | if (X86::VECRRegBankID == RB.getID()) |
503 | return Isload ? (HasAVX512 ? X86::VMOVSDZrm_alt : |
504 | HasAVX ? X86::VMOVSDrm_alt : |
505 | X86::MOVSDrm_alt) |
506 | : (HasAVX512 ? X86::VMOVSDZmr : |
507 | HasAVX ? X86::VMOVSDmr : |
508 | X86::MOVSDmr); |
509 | if (X86::PSRRegBankID == RB.getID()) |
510 | return Isload ? X86::LD_Fp64m : X86::ST_Fp64m; |
511 | } else if (Ty == LLT::scalar(SizeInBits: 80)) { |
512 | return Isload ? X86::LD_Fp80m : X86::ST_FpP80m; |
513 | } else if (Ty.isVector() && Ty.getSizeInBits() == 128) { |
514 | if (Alignment >= Align(16)) |
515 | return Isload ? (HasVLX ? X86::VMOVAPSZ128rm |
516 | : HasAVX512 |
517 | ? X86::VMOVAPSZ128rm_NOVLX |
518 | : HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm) |
519 | : (HasVLX ? X86::VMOVAPSZ128mr |
520 | : HasAVX512 |
521 | ? X86::VMOVAPSZ128mr_NOVLX |
522 | : HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr); |
523 | else |
524 | return Isload ? (HasVLX ? X86::VMOVUPSZ128rm |
525 | : HasAVX512 |
526 | ? X86::VMOVUPSZ128rm_NOVLX |
527 | : HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm) |
528 | : (HasVLX ? X86::VMOVUPSZ128mr |
529 | : HasAVX512 |
530 | ? X86::VMOVUPSZ128mr_NOVLX |
531 | : HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr); |
532 | } else if (Ty.isVector() && Ty.getSizeInBits() == 256) { |
533 | if (Alignment >= Align(32)) |
534 | return Isload ? (HasVLX ? X86::VMOVAPSZ256rm |
535 | : HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX |
536 | : X86::VMOVAPSYrm) |
537 | : (HasVLX ? X86::VMOVAPSZ256mr |
538 | : HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX |
539 | : X86::VMOVAPSYmr); |
540 | else |
541 | return Isload ? (HasVLX ? X86::VMOVUPSZ256rm |
542 | : HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX |
543 | : X86::VMOVUPSYrm) |
544 | : (HasVLX ? X86::VMOVUPSZ256mr |
545 | : HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX |
546 | : X86::VMOVUPSYmr); |
547 | } else if (Ty.isVector() && Ty.getSizeInBits() == 512) { |
548 | if (Alignment >= Align(64)) |
549 | return Isload ? X86::VMOVAPSZrm : X86::VMOVAPSZmr; |
550 | else |
551 | return Isload ? X86::VMOVUPSZrm : X86::VMOVUPSZmr; |
552 | } |
553 | return Opc; |
554 | } |
555 | |
556 | // Fill in an address from the given instruction. |
557 | static bool X86SelectAddress(MachineInstr &I, const X86TargetMachine &TM, |
558 | const MachineRegisterInfo &MRI, |
559 | const X86Subtarget &STI, X86AddressMode &AM) { |
560 | assert(I.getOperand(0).isReg() && "unsupported operand." ); |
561 | assert(MRI.getType(I.getOperand(0).getReg()).isPointer() && |
562 | "unsupported type." ); |
563 | |
564 | switch (I.getOpcode()) { |
565 | default: |
566 | break; |
567 | case TargetOpcode::G_FRAME_INDEX: |
568 | AM.Base.FrameIndex = I.getOperand(i: 1).getIndex(); |
569 | AM.BaseType = X86AddressMode::FrameIndexBase; |
570 | return true; |
571 | case TargetOpcode::G_PTR_ADD: { |
572 | if (auto COff = getIConstantVRegSExtVal(VReg: I.getOperand(i: 2).getReg(), MRI)) { |
573 | int64_t Imm = *COff; |
574 | if (isInt<32>(x: Imm)) { // Check for displacement overflow. |
575 | AM.Disp = static_cast<int32_t>(Imm); |
576 | AM.Base.Reg = I.getOperand(i: 1).getReg(); |
577 | return true; |
578 | } |
579 | } |
580 | break; |
581 | } |
582 | case TargetOpcode::G_GLOBAL_VALUE: { |
583 | auto GV = I.getOperand(i: 1).getGlobal(); |
584 | if (GV->isThreadLocal()) { |
585 | return false; // TODO: we don't support TLS yet. |
586 | } |
587 | // Can't handle alternate code models yet. |
588 | if (TM.getCodeModel() != CodeModel::Small) |
589 | return false; |
590 | AM.GV = GV; |
591 | AM.GVOpFlags = STI.classifyGlobalReference(GV); |
592 | |
593 | // TODO: The ABI requires an extra load. not supported yet. |
594 | if (isGlobalStubReference(TargetFlag: AM.GVOpFlags)) |
595 | return false; |
596 | |
597 | // TODO: This reference is relative to the pic base. not supported yet. |
598 | if (isGlobalRelativeToPICBase(TargetFlag: AM.GVOpFlags)) |
599 | return false; |
600 | |
601 | if (STI.isPICStyleRIPRel()) { |
602 | // Use rip-relative addressing. |
603 | assert(AM.Base.Reg == 0 && AM.IndexReg == 0 && |
604 | "RIP-relative addresses can't have additional register operands" ); |
605 | AM.Base.Reg = X86::RIP; |
606 | } |
607 | return true; |
608 | } |
609 | case TargetOpcode::G_CONSTANT_POOL: { |
610 | // TODO: Need a separate move for Large model |
611 | if (TM.getCodeModel() == CodeModel::Large) |
612 | return false; |
613 | |
614 | AM.GVOpFlags = STI.classifyLocalReference(GV: nullptr); |
615 | if (AM.GVOpFlags == X86II::MO_GOTOFF) |
616 | AM.Base.Reg = STI.getInstrInfo()->getGlobalBaseReg(MF: I.getMF()); |
617 | else if (STI.is64Bit()) |
618 | AM.Base.Reg = X86::RIP; |
619 | AM.CP = true; |
620 | AM.Disp = I.getOperand(i: 1).getIndex(); |
621 | return true; |
622 | } |
623 | } |
624 | // Default behavior. |
625 | AM.Base.Reg = I.getOperand(i: 0).getReg(); |
626 | return true; |
627 | } |
628 | |
629 | bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I, |
630 | MachineRegisterInfo &MRI, |
631 | MachineFunction &MF) const { |
632 | unsigned Opc = I.getOpcode(); |
633 | |
634 | assert((Opc == TargetOpcode::G_STORE || Opc == TargetOpcode::G_LOAD) && |
635 | "Only G_STORE and G_LOAD are expected for selection" ); |
636 | |
637 | const Register DefReg = I.getOperand(i: 0).getReg(); |
638 | LLT Ty = MRI.getType(Reg: DefReg); |
639 | const RegisterBank &RB = *RBI.getRegBank(Reg: DefReg, MRI, TRI); |
640 | |
641 | assert(I.hasOneMemOperand()); |
642 | auto &MemOp = **I.memoperands_begin(); |
643 | if (MemOp.isAtomic()) { |
644 | // Note: for unordered operations, we rely on the fact the appropriate MMO |
645 | // is already on the instruction we're mutating, and thus we don't need to |
646 | // make any changes. So long as we select an opcode which is capable of |
647 | // loading or storing the appropriate size atomically, the rest of the |
648 | // backend is required to respect the MMO state. |
649 | if (!MemOp.isUnordered()) { |
650 | LLVM_DEBUG(dbgs() << "Atomic ordering not supported yet\n" ); |
651 | return false; |
652 | } |
653 | if (MemOp.getAlign() < Ty.getSizeInBits() / 8) { |
654 | LLVM_DEBUG(dbgs() << "Unaligned atomics not supported yet\n" ); |
655 | return false; |
656 | } |
657 | } |
658 | |
659 | unsigned NewOpc = getPtrLoadStoreOp(Ty, RB, Opc); |
660 | if (NewOpc == Opc) |
661 | return false; |
662 | |
663 | I.setDesc(TII.get(Opcode: NewOpc)); |
664 | MachineInstrBuilder MIB(MF, I); |
665 | MachineInstr *Ptr = MRI.getVRegDef(Reg: I.getOperand(i: 1).getReg()); |
666 | |
667 | X86AddressMode AM; |
668 | if (!X86SelectAddress(I&: *Ptr, TM, MRI, STI, AM)) |
669 | return false; |
670 | |
671 | if (Opc == TargetOpcode::G_LOAD) { |
672 | I.removeOperand(OpNo: 1); |
673 | addFullAddress(MIB, AM); |
674 | } else { |
675 | // G_STORE (VAL, Addr), X86Store instruction (Addr, VAL) |
676 | I.removeOperand(OpNo: 1); |
677 | I.removeOperand(OpNo: 0); |
678 | addFullAddress(MIB, AM).addUse(RegNo: DefReg); |
679 | } |
680 | bool Constrained = constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
681 | I.addImplicitDefUseOperands(MF); |
682 | return Constrained; |
683 | } |
684 | |
685 | static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) { |
686 | if (Ty == LLT::pointer(AddressSpace: 0, SizeInBits: 64)) |
687 | return X86::LEA64r; |
688 | else if (Ty == LLT::pointer(AddressSpace: 0, SizeInBits: 32)) |
689 | return STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r; |
690 | else |
691 | llvm_unreachable("Can't get LEA opcode. Unsupported type." ); |
692 | } |
693 | |
694 | bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I, |
695 | MachineRegisterInfo &MRI, |
696 | MachineFunction &MF) const { |
697 | unsigned Opc = I.getOpcode(); |
698 | |
699 | assert((Opc == TargetOpcode::G_FRAME_INDEX || Opc == TargetOpcode::G_PTR_ADD) && |
700 | "unexpected instruction" ); |
701 | |
702 | const Register DefReg = I.getOperand(i: 0).getReg(); |
703 | LLT Ty = MRI.getType(Reg: DefReg); |
704 | |
705 | // Use LEA to calculate frame index and GEP |
706 | unsigned NewOpc = getLeaOP(Ty, STI); |
707 | I.setDesc(TII.get(Opcode: NewOpc)); |
708 | MachineInstrBuilder MIB(MF, I); |
709 | |
710 | if (Opc == TargetOpcode::G_FRAME_INDEX) { |
711 | addOffset(MIB, Offset: 0); |
712 | } else { |
713 | MachineOperand &InxOp = I.getOperand(i: 2); |
714 | I.addOperand(Op: InxOp); // set IndexReg |
715 | InxOp.ChangeToImmediate(ImmVal: 1); // set Scale |
716 | MIB.addImm(Val: 0).addReg(RegNo: 0); |
717 | } |
718 | |
719 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
720 | } |
721 | |
722 | bool X86InstructionSelector::selectGlobalValue(MachineInstr &I, |
723 | MachineRegisterInfo &MRI, |
724 | MachineFunction &MF) const { |
725 | assert((I.getOpcode() == TargetOpcode::G_GLOBAL_VALUE) && |
726 | "unexpected instruction" ); |
727 | |
728 | X86AddressMode AM; |
729 | if (!X86SelectAddress(I, TM, MRI, STI, AM)) |
730 | return false; |
731 | |
732 | const Register DefReg = I.getOperand(i: 0).getReg(); |
733 | LLT Ty = MRI.getType(Reg: DefReg); |
734 | unsigned NewOpc = getLeaOP(Ty, STI); |
735 | |
736 | I.setDesc(TII.get(Opcode: NewOpc)); |
737 | MachineInstrBuilder MIB(MF, I); |
738 | |
739 | I.removeOperand(OpNo: 1); |
740 | addFullAddress(MIB, AM); |
741 | |
742 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
743 | } |
744 | |
745 | bool X86InstructionSelector::selectConstant(MachineInstr &I, |
746 | MachineRegisterInfo &MRI, |
747 | MachineFunction &MF) const { |
748 | assert((I.getOpcode() == TargetOpcode::G_CONSTANT) && |
749 | "unexpected instruction" ); |
750 | |
751 | const Register DefReg = I.getOperand(i: 0).getReg(); |
752 | LLT Ty = MRI.getType(Reg: DefReg); |
753 | |
754 | if (RBI.getRegBank(Reg: DefReg, MRI, TRI)->getID() != X86::GPRRegBankID) |
755 | return false; |
756 | |
757 | uint64_t Val = 0; |
758 | if (I.getOperand(i: 1).isCImm()) { |
759 | Val = I.getOperand(i: 1).getCImm()->getZExtValue(); |
760 | I.getOperand(i: 1).ChangeToImmediate(ImmVal: Val); |
761 | } else if (I.getOperand(i: 1).isImm()) { |
762 | Val = I.getOperand(i: 1).getImm(); |
763 | } else |
764 | llvm_unreachable("Unsupported operand type." ); |
765 | |
766 | unsigned NewOpc; |
767 | switch (Ty.getSizeInBits()) { |
768 | case 8: |
769 | NewOpc = X86::MOV8ri; |
770 | break; |
771 | case 16: |
772 | NewOpc = X86::MOV16ri; |
773 | break; |
774 | case 32: |
775 | NewOpc = X86::MOV32ri; |
776 | break; |
777 | case 64: |
778 | // TODO: in case isUInt<32>(Val), X86::MOV32ri can be used |
779 | if (isInt<32>(x: Val)) |
780 | NewOpc = X86::MOV64ri32; |
781 | else |
782 | NewOpc = X86::MOV64ri; |
783 | break; |
784 | default: |
785 | llvm_unreachable("Can't select G_CONSTANT, unsupported type." ); |
786 | } |
787 | |
788 | I.setDesc(TII.get(Opcode: NewOpc)); |
789 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
790 | } |
791 | |
792 | // Helper function for selectTruncOrPtrToInt and selectAnyext. |
793 | // Returns true if DstRC lives on a floating register class and |
794 | // SrcRC lives on a 128-bit vector class. |
795 | static bool canTurnIntoCOPY(const TargetRegisterClass *DstRC, |
796 | const TargetRegisterClass *SrcRC) { |
797 | return (DstRC == &X86::FR32RegClass || DstRC == &X86::FR32XRegClass || |
798 | DstRC == &X86::FR64RegClass || DstRC == &X86::FR64XRegClass) && |
799 | (SrcRC == &X86::VR128RegClass || SrcRC == &X86::VR128XRegClass); |
800 | } |
801 | |
802 | bool X86InstructionSelector::selectTurnIntoCOPY( |
803 | MachineInstr &I, MachineRegisterInfo &MRI, const Register DstReg, |
804 | const TargetRegisterClass *DstRC, const Register SrcReg, |
805 | const TargetRegisterClass *SrcRC) const { |
806 | |
807 | if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: *SrcRC, MRI) || |
808 | !RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) { |
809 | LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) |
810 | << " operand\n" ); |
811 | return false; |
812 | } |
813 | I.setDesc(TII.get(Opcode: X86::COPY)); |
814 | return true; |
815 | } |
816 | |
817 | bool X86InstructionSelector::selectTruncOrPtrToInt(MachineInstr &I, |
818 | MachineRegisterInfo &MRI, |
819 | MachineFunction &MF) const { |
820 | assert((I.getOpcode() == TargetOpcode::G_TRUNC || |
821 | I.getOpcode() == TargetOpcode::G_PTRTOINT) && |
822 | "unexpected instruction" ); |
823 | |
824 | const Register DstReg = I.getOperand(i: 0).getReg(); |
825 | const Register SrcReg = I.getOperand(i: 1).getReg(); |
826 | |
827 | const LLT DstTy = MRI.getType(Reg: DstReg); |
828 | const LLT SrcTy = MRI.getType(Reg: SrcReg); |
829 | |
830 | const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
831 | const RegisterBank &SrcRB = *RBI.getRegBank(Reg: SrcReg, MRI, TRI); |
832 | |
833 | if (DstRB.getID() != SrcRB.getID()) { |
834 | LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode()) |
835 | << " input/output on different banks\n" ); |
836 | return false; |
837 | } |
838 | |
839 | const TargetRegisterClass *DstRC = getRegClass(Ty: DstTy, RB: DstRB); |
840 | const TargetRegisterClass *SrcRC = getRegClass(Ty: SrcTy, RB: SrcRB); |
841 | |
842 | if (!DstRC || !SrcRC) |
843 | return false; |
844 | |
845 | // If that's truncation of the value that lives on the vector class and goes |
846 | // into the floating class, just replace it with copy, as we are able to |
847 | // select it as a regular move. |
848 | if (canTurnIntoCOPY(DstRC, SrcRC)) |
849 | return selectTurnIntoCOPY(I, MRI, DstReg, DstRC, SrcReg, SrcRC); |
850 | |
851 | if (DstRB.getID() != X86::GPRRegBankID) |
852 | return false; |
853 | |
854 | unsigned SubIdx; |
855 | if (DstRC == SrcRC) { |
856 | // Nothing to be done |
857 | SubIdx = X86::NoSubRegister; |
858 | } else if (DstRC == &X86::GR32RegClass) { |
859 | SubIdx = X86::sub_32bit; |
860 | } else if (DstRC == &X86::GR16RegClass) { |
861 | SubIdx = X86::sub_16bit; |
862 | } else if (DstRC == &X86::GR8RegClass) { |
863 | SubIdx = X86::sub_8bit; |
864 | } else { |
865 | return false; |
866 | } |
867 | |
868 | SrcRC = TRI.getSubClassWithSubReg(RC: SrcRC, Idx: SubIdx); |
869 | |
870 | if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: *SrcRC, MRI) || |
871 | !RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) { |
872 | LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) |
873 | << "\n" ); |
874 | return false; |
875 | } |
876 | |
877 | I.getOperand(i: 1).setSubReg(SubIdx); |
878 | |
879 | I.setDesc(TII.get(Opcode: X86::COPY)); |
880 | return true; |
881 | } |
882 | |
883 | bool X86InstructionSelector::selectZext(MachineInstr &I, |
884 | MachineRegisterInfo &MRI, |
885 | MachineFunction &MF) const { |
886 | assert((I.getOpcode() == TargetOpcode::G_ZEXT) && "unexpected instruction" ); |
887 | |
888 | const Register DstReg = I.getOperand(i: 0).getReg(); |
889 | const Register SrcReg = I.getOperand(i: 1).getReg(); |
890 | |
891 | const LLT DstTy = MRI.getType(Reg: DstReg); |
892 | const LLT SrcTy = MRI.getType(Reg: SrcReg); |
893 | |
894 | assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(16)) && |
895 | "8=>16 Zext is handled by tablegen" ); |
896 | assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(32)) && |
897 | "8=>32 Zext is handled by tablegen" ); |
898 | assert(!(SrcTy == LLT::scalar(16) && DstTy == LLT::scalar(32)) && |
899 | "16=>32 Zext is handled by tablegen" ); |
900 | assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(64)) && |
901 | "8=>64 Zext is handled by tablegen" ); |
902 | assert(!(SrcTy == LLT::scalar(16) && DstTy == LLT::scalar(64)) && |
903 | "16=>64 Zext is handled by tablegen" ); |
904 | assert(!(SrcTy == LLT::scalar(32) && DstTy == LLT::scalar(64)) && |
905 | "32=>64 Zext is handled by tablegen" ); |
906 | |
907 | if (SrcTy != LLT::scalar(SizeInBits: 1)) |
908 | return false; |
909 | |
910 | unsigned AndOpc; |
911 | if (DstTy == LLT::scalar(SizeInBits: 8)) |
912 | AndOpc = X86::AND8ri; |
913 | else if (DstTy == LLT::scalar(SizeInBits: 16)) |
914 | AndOpc = X86::AND16ri; |
915 | else if (DstTy == LLT::scalar(SizeInBits: 32)) |
916 | AndOpc = X86::AND32ri; |
917 | else if (DstTy == LLT::scalar(SizeInBits: 64)) |
918 | AndOpc = X86::AND64ri32; |
919 | else |
920 | return false; |
921 | |
922 | Register DefReg = SrcReg; |
923 | if (DstTy != LLT::scalar(SizeInBits: 8)) { |
924 | Register ImpDefReg = |
925 | MRI.createVirtualRegister(RegClass: getRegClass(Ty: DstTy, Reg: DstReg, MRI)); |
926 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), |
927 | MCID: TII.get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: ImpDefReg); |
928 | |
929 | DefReg = MRI.createVirtualRegister(RegClass: getRegClass(Ty: DstTy, Reg: DstReg, MRI)); |
930 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), |
931 | MCID: TII.get(Opcode: TargetOpcode::INSERT_SUBREG), DestReg: DefReg) |
932 | .addReg(RegNo: ImpDefReg) |
933 | .addReg(RegNo: SrcReg) |
934 | .addImm(Val: X86::sub_8bit); |
935 | } |
936 | |
937 | MachineInstr &AndInst = |
938 | *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AndOpc), DestReg: DstReg) |
939 | .addReg(RegNo: DefReg) |
940 | .addImm(Val: 1); |
941 | |
942 | constrainSelectedInstRegOperands(I&: AndInst, TII, TRI, RBI); |
943 | |
944 | I.eraseFromParent(); |
945 | return true; |
946 | } |
947 | |
948 | bool X86InstructionSelector::selectAnyext(MachineInstr &I, |
949 | MachineRegisterInfo &MRI, |
950 | MachineFunction &MF) const { |
951 | assert((I.getOpcode() == TargetOpcode::G_ANYEXT) && "unexpected instruction" ); |
952 | |
953 | const Register DstReg = I.getOperand(i: 0).getReg(); |
954 | const Register SrcReg = I.getOperand(i: 1).getReg(); |
955 | |
956 | const LLT DstTy = MRI.getType(Reg: DstReg); |
957 | const LLT SrcTy = MRI.getType(Reg: SrcReg); |
958 | |
959 | const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
960 | const RegisterBank &SrcRB = *RBI.getRegBank(Reg: SrcReg, MRI, TRI); |
961 | |
962 | assert(DstRB.getID() == SrcRB.getID() && |
963 | "G_ANYEXT input/output on different banks\n" ); |
964 | |
965 | assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() && |
966 | "G_ANYEXT incorrect operand size" ); |
967 | |
968 | const TargetRegisterClass *DstRC = getRegClass(Ty: DstTy, RB: DstRB); |
969 | const TargetRegisterClass *SrcRC = getRegClass(Ty: SrcTy, RB: SrcRB); |
970 | |
971 | // If that's ANY_EXT of the value that lives on the floating class and goes |
972 | // into the vector class, just replace it with copy, as we are able to select |
973 | // it as a regular move. |
974 | if (canTurnIntoCOPY(DstRC: SrcRC, SrcRC: DstRC)) |
975 | return selectTurnIntoCOPY(I, MRI, DstReg: SrcReg, DstRC: SrcRC, SrcReg: DstReg, SrcRC: DstRC); |
976 | |
977 | if (DstRB.getID() != X86::GPRRegBankID) |
978 | return false; |
979 | |
980 | if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: *SrcRC, MRI) || |
981 | !RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) { |
982 | LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) |
983 | << " operand\n" ); |
984 | return false; |
985 | } |
986 | |
987 | if (SrcRC == DstRC) { |
988 | I.setDesc(TII.get(Opcode: X86::COPY)); |
989 | return true; |
990 | } |
991 | |
992 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), |
993 | MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG)) |
994 | .addDef(RegNo: DstReg) |
995 | .addImm(Val: 0) |
996 | .addReg(RegNo: SrcReg) |
997 | .addImm(Val: getSubRegIndex(RC: SrcRC)); |
998 | |
999 | I.eraseFromParent(); |
1000 | return true; |
1001 | } |
1002 | |
1003 | bool X86InstructionSelector::selectCmp(MachineInstr &I, |
1004 | MachineRegisterInfo &MRI, |
1005 | MachineFunction &MF) const { |
1006 | assert((I.getOpcode() == TargetOpcode::G_ICMP) && "unexpected instruction" ); |
1007 | |
1008 | X86::CondCode CC; |
1009 | bool SwapArgs; |
1010 | std::tie(args&: CC, args&: SwapArgs) = X86::getX86ConditionCode( |
1011 | Predicate: (CmpInst::Predicate)I.getOperand(i: 1).getPredicate()); |
1012 | |
1013 | Register LHS = I.getOperand(i: 2).getReg(); |
1014 | Register RHS = I.getOperand(i: 3).getReg(); |
1015 | |
1016 | if (SwapArgs) |
1017 | std::swap(a&: LHS, b&: RHS); |
1018 | |
1019 | unsigned OpCmp; |
1020 | LLT Ty = MRI.getType(Reg: LHS); |
1021 | |
1022 | switch (Ty.getSizeInBits()) { |
1023 | default: |
1024 | return false; |
1025 | case 8: |
1026 | OpCmp = X86::CMP8rr; |
1027 | break; |
1028 | case 16: |
1029 | OpCmp = X86::CMP16rr; |
1030 | break; |
1031 | case 32: |
1032 | OpCmp = X86::CMP32rr; |
1033 | break; |
1034 | case 64: |
1035 | OpCmp = X86::CMP64rr; |
1036 | break; |
1037 | } |
1038 | |
1039 | MachineInstr &CmpInst = |
1040 | *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: OpCmp)) |
1041 | .addReg(RegNo: LHS) |
1042 | .addReg(RegNo: RHS); |
1043 | |
1044 | MachineInstr &SetInst = *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), |
1045 | MCID: TII.get(Opcode: X86::SETCCr), DestReg: I.getOperand(i: 0).getReg()).addImm(Val: CC); |
1046 | |
1047 | constrainSelectedInstRegOperands(I&: CmpInst, TII, TRI, RBI); |
1048 | constrainSelectedInstRegOperands(I&: SetInst, TII, TRI, RBI); |
1049 | |
1050 | I.eraseFromParent(); |
1051 | return true; |
1052 | } |
1053 | |
1054 | bool X86InstructionSelector::selectFCmp(MachineInstr &I, |
1055 | MachineRegisterInfo &MRI, |
1056 | MachineFunction &MF) const { |
1057 | assert((I.getOpcode() == TargetOpcode::G_FCMP) && "unexpected instruction" ); |
1058 | |
1059 | Register LhsReg = I.getOperand(i: 2).getReg(); |
1060 | Register RhsReg = I.getOperand(i: 3).getReg(); |
1061 | CmpInst::Predicate Predicate = |
1062 | (CmpInst::Predicate)I.getOperand(i: 1).getPredicate(); |
1063 | |
1064 | // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. |
1065 | static const uint16_t SETFOpcTable[2][3] = { |
1066 | {X86::COND_E, X86::COND_NP, X86::AND8rr}, |
1067 | {X86::COND_NE, X86::COND_P, X86::OR8rr}}; |
1068 | const uint16_t *SETFOpc = nullptr; |
1069 | switch (Predicate) { |
1070 | default: |
1071 | break; |
1072 | case CmpInst::FCMP_OEQ: |
1073 | SETFOpc = &SETFOpcTable[0][0]; |
1074 | break; |
1075 | case CmpInst::FCMP_UNE: |
1076 | SETFOpc = &SETFOpcTable[1][0]; |
1077 | break; |
1078 | } |
1079 | |
1080 | assert((LhsReg.isVirtual() && RhsReg.isVirtual()) && |
1081 | "Both arguments of FCMP need to be virtual!" ); |
1082 | auto *LhsBank = RBI.getRegBank(Reg: LhsReg, MRI, TRI); |
1083 | [[maybe_unused]] auto *RhsBank = RBI.getRegBank(Reg: RhsReg, MRI, TRI); |
1084 | assert((LhsBank == RhsBank) && |
1085 | "Both banks assigned to FCMP arguments need to be same!" ); |
1086 | |
1087 | // Compute the opcode for the CMP instruction. |
1088 | unsigned OpCmp; |
1089 | LLT Ty = MRI.getType(Reg: LhsReg); |
1090 | switch (Ty.getSizeInBits()) { |
1091 | default: |
1092 | return false; |
1093 | case 32: |
1094 | OpCmp = LhsBank->getID() == X86::PSRRegBankID ? X86::UCOM_FpIr32 |
1095 | : X86::UCOMISSrr; |
1096 | break; |
1097 | case 64: |
1098 | OpCmp = LhsBank->getID() == X86::PSRRegBankID ? X86::UCOM_FpIr64 |
1099 | : X86::UCOMISDrr; |
1100 | break; |
1101 | case 80: |
1102 | OpCmp = X86::UCOM_FpIr80; |
1103 | break; |
1104 | } |
1105 | |
1106 | Register ResultReg = I.getOperand(i: 0).getReg(); |
1107 | RBI.constrainGenericRegister( |
1108 | Reg: ResultReg, |
1109 | RC: *getRegClass(Ty: LLT::scalar(SizeInBits: 8), RB: *RBI.getRegBank(Reg: ResultReg, MRI, TRI)), MRI); |
1110 | if (SETFOpc) { |
1111 | MachineInstr &CmpInst = |
1112 | *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: OpCmp)) |
1113 | .addReg(RegNo: LhsReg) |
1114 | .addReg(RegNo: RhsReg); |
1115 | |
1116 | Register FlagReg1 = MRI.createVirtualRegister(RegClass: &X86::GR8RegClass); |
1117 | Register FlagReg2 = MRI.createVirtualRegister(RegClass: &X86::GR8RegClass); |
1118 | MachineInstr &Set1 = *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), |
1119 | MCID: TII.get(Opcode: X86::SETCCr), DestReg: FlagReg1).addImm(Val: SETFOpc[0]); |
1120 | MachineInstr &Set2 = *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), |
1121 | MCID: TII.get(Opcode: X86::SETCCr), DestReg: FlagReg2).addImm(Val: SETFOpc[1]); |
1122 | MachineInstr &Set3 = *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), |
1123 | MCID: TII.get(Opcode: SETFOpc[2]), DestReg: ResultReg) |
1124 | .addReg(RegNo: FlagReg1) |
1125 | .addReg(RegNo: FlagReg2); |
1126 | constrainSelectedInstRegOperands(I&: CmpInst, TII, TRI, RBI); |
1127 | constrainSelectedInstRegOperands(I&: Set1, TII, TRI, RBI); |
1128 | constrainSelectedInstRegOperands(I&: Set2, TII, TRI, RBI); |
1129 | constrainSelectedInstRegOperands(I&: Set3, TII, TRI, RBI); |
1130 | |
1131 | I.eraseFromParent(); |
1132 | return true; |
1133 | } |
1134 | |
1135 | X86::CondCode CC; |
1136 | bool SwapArgs; |
1137 | std::tie(args&: CC, args&: SwapArgs) = X86::getX86ConditionCode(Predicate); |
1138 | assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code." ); |
1139 | |
1140 | if (SwapArgs) |
1141 | std::swap(a&: LhsReg, b&: RhsReg); |
1142 | |
1143 | // Emit a compare of LHS/RHS. |
1144 | MachineInstr &CmpInst = |
1145 | *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: OpCmp)) |
1146 | .addReg(RegNo: LhsReg) |
1147 | .addReg(RegNo: RhsReg); |
1148 | |
1149 | MachineInstr &Set = |
1150 | *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: X86::SETCCr), DestReg: ResultReg).addImm(Val: CC); |
1151 | constrainSelectedInstRegOperands(I&: CmpInst, TII, TRI, RBI); |
1152 | constrainSelectedInstRegOperands(I&: Set, TII, TRI, RBI); |
1153 | I.eraseFromParent(); |
1154 | return true; |
1155 | } |
1156 | |
1157 | bool X86InstructionSelector::selectUAddSub(MachineInstr &I, |
1158 | MachineRegisterInfo &MRI, |
1159 | MachineFunction &MF) const { |
1160 | assert((I.getOpcode() == TargetOpcode::G_UADDE || |
1161 | I.getOpcode() == TargetOpcode::G_UADDO || |
1162 | I.getOpcode() == TargetOpcode::G_USUBE || |
1163 | I.getOpcode() == TargetOpcode::G_USUBO) && |
1164 | "unexpected instruction" ); |
1165 | |
1166 | const Register DstReg = I.getOperand(i: 0).getReg(); |
1167 | const Register CarryOutReg = I.getOperand(i: 1).getReg(); |
1168 | const Register Op0Reg = I.getOperand(i: 2).getReg(); |
1169 | const Register Op1Reg = I.getOperand(i: 3).getReg(); |
1170 | bool IsSub = I.getOpcode() == TargetOpcode::G_USUBE || |
1171 | I.getOpcode() == TargetOpcode::G_USUBO; |
1172 | bool HasCarryIn = I.getOpcode() == TargetOpcode::G_UADDE || |
1173 | I.getOpcode() == TargetOpcode::G_USUBE; |
1174 | |
1175 | const LLT DstTy = MRI.getType(Reg: DstReg); |
1176 | assert(DstTy.isScalar() && "selectUAddSub only supported for scalar types" ); |
1177 | |
1178 | // TODO: Handle immediate argument variants? |
1179 | unsigned OpADC, OpADD, OpSBB, OpSUB; |
1180 | switch (DstTy.getSizeInBits()) { |
1181 | case 8: |
1182 | OpADC = X86::ADC8rr; |
1183 | OpADD = X86::ADD8rr; |
1184 | OpSBB = X86::SBB8rr; |
1185 | OpSUB = X86::SUB8rr; |
1186 | break; |
1187 | case 16: |
1188 | OpADC = X86::ADC16rr; |
1189 | OpADD = X86::ADD16rr; |
1190 | OpSBB = X86::SBB16rr; |
1191 | OpSUB = X86::SUB16rr; |
1192 | break; |
1193 | case 32: |
1194 | OpADC = X86::ADC32rr; |
1195 | OpADD = X86::ADD32rr; |
1196 | OpSBB = X86::SBB32rr; |
1197 | OpSUB = X86::SUB32rr; |
1198 | break; |
1199 | case 64: |
1200 | OpADC = X86::ADC64rr; |
1201 | OpADD = X86::ADD64rr; |
1202 | OpSBB = X86::SBB64rr; |
1203 | OpSUB = X86::SUB64rr; |
1204 | break; |
1205 | default: |
1206 | llvm_unreachable("selectUAddSub unsupported type." ); |
1207 | } |
1208 | |
1209 | const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
1210 | const TargetRegisterClass *DstRC = getRegClass(Ty: DstTy, RB: DstRB); |
1211 | |
1212 | unsigned Opcode = IsSub ? OpSUB : OpADD; |
1213 | |
1214 | // G_UADDE/G_USUBE - find CarryIn def instruction. |
1215 | if (HasCarryIn) { |
1216 | Register CarryInReg = I.getOperand(i: 4).getReg(); |
1217 | MachineInstr *Def = MRI.getVRegDef(Reg: CarryInReg); |
1218 | while (Def->getOpcode() == TargetOpcode::G_TRUNC) { |
1219 | CarryInReg = Def->getOperand(i: 1).getReg(); |
1220 | Def = MRI.getVRegDef(Reg: CarryInReg); |
1221 | } |
1222 | |
1223 | // TODO - handle more CF generating instructions |
1224 | if (Def->getOpcode() == TargetOpcode::G_UADDE || |
1225 | Def->getOpcode() == TargetOpcode::G_UADDO || |
1226 | Def->getOpcode() == TargetOpcode::G_USUBE || |
1227 | Def->getOpcode() == TargetOpcode::G_USUBO) { |
1228 | // carry set by prev ADD/SUB. |
1229 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: X86::COPY), |
1230 | DestReg: X86::EFLAGS) |
1231 | .addReg(RegNo: CarryInReg); |
1232 | |
1233 | if (!RBI.constrainGenericRegister(Reg: CarryInReg, RC: *DstRC, MRI)) |
1234 | return false; |
1235 | |
1236 | Opcode = IsSub ? OpSBB : OpADC; |
1237 | } else if (auto val = getIConstantVRegVal(VReg: CarryInReg, MRI)) { |
1238 | // carry is constant, support only 0. |
1239 | if (*val != 0) |
1240 | return false; |
1241 | |
1242 | Opcode = IsSub ? OpSUB : OpADD; |
1243 | } else |
1244 | return false; |
1245 | } |
1246 | |
1247 | MachineInstr &Inst = |
1248 | *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode), DestReg: DstReg) |
1249 | .addReg(RegNo: Op0Reg) |
1250 | .addReg(RegNo: Op1Reg); |
1251 | |
1252 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: X86::COPY), DestReg: CarryOutReg) |
1253 | .addReg(RegNo: X86::EFLAGS); |
1254 | |
1255 | if (!constrainSelectedInstRegOperands(I&: Inst, TII, TRI, RBI) || |
1256 | !RBI.constrainGenericRegister(Reg: CarryOutReg, RC: *DstRC, MRI)) |
1257 | return false; |
1258 | |
1259 | I.eraseFromParent(); |
1260 | return true; |
1261 | } |
1262 | |
1263 | bool X86InstructionSelector::(MachineInstr &I, |
1264 | MachineRegisterInfo &MRI, |
1265 | MachineFunction &MF) const { |
1266 | assert((I.getOpcode() == TargetOpcode::G_EXTRACT) && |
1267 | "unexpected instruction" ); |
1268 | |
1269 | const Register DstReg = I.getOperand(i: 0).getReg(); |
1270 | const Register SrcReg = I.getOperand(i: 1).getReg(); |
1271 | int64_t Index = I.getOperand(i: 2).getImm(); |
1272 | |
1273 | const LLT DstTy = MRI.getType(Reg: DstReg); |
1274 | const LLT SrcTy = MRI.getType(Reg: SrcReg); |
1275 | |
1276 | // Meanwile handle vector type only. |
1277 | if (!DstTy.isVector()) |
1278 | return false; |
1279 | |
1280 | if (Index % DstTy.getSizeInBits() != 0) |
1281 | return false; // Not extract subvector. |
1282 | |
1283 | if (Index == 0) { |
1284 | // Replace by extract subreg copy. |
1285 | if (!emitExtractSubreg(DstReg, SrcReg, I, MRI, MF)) |
1286 | return false; |
1287 | |
1288 | I.eraseFromParent(); |
1289 | return true; |
1290 | } |
1291 | |
1292 | bool HasAVX = STI.hasAVX(); |
1293 | bool HasAVX512 = STI.hasAVX512(); |
1294 | bool HasVLX = STI.hasVLX(); |
1295 | |
1296 | if (SrcTy.getSizeInBits() == 256 && DstTy.getSizeInBits() == 128) { |
1297 | if (HasVLX) |
1298 | I.setDesc(TII.get(Opcode: X86::VEXTRACTF32X4Z256rri)); |
1299 | else if (HasAVX) |
1300 | I.setDesc(TII.get(Opcode: X86::VEXTRACTF128rri)); |
1301 | else |
1302 | return false; |
1303 | } else if (SrcTy.getSizeInBits() == 512 && HasAVX512) { |
1304 | if (DstTy.getSizeInBits() == 128) |
1305 | I.setDesc(TII.get(Opcode: X86::VEXTRACTF32X4Zrri)); |
1306 | else if (DstTy.getSizeInBits() == 256) |
1307 | I.setDesc(TII.get(Opcode: X86::VEXTRACTF64X4Zrri)); |
1308 | else |
1309 | return false; |
1310 | } else |
1311 | return false; |
1312 | |
1313 | // Convert to X86 VEXTRACT immediate. |
1314 | Index = Index / DstTy.getSizeInBits(); |
1315 | I.getOperand(i: 2).setImm(Index); |
1316 | |
1317 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
1318 | } |
1319 | |
1320 | bool X86InstructionSelector::(Register DstReg, Register SrcReg, |
1321 | MachineInstr &I, |
1322 | MachineRegisterInfo &MRI, |
1323 | MachineFunction &MF) const { |
1324 | const LLT DstTy = MRI.getType(Reg: DstReg); |
1325 | const LLT SrcTy = MRI.getType(Reg: SrcReg); |
1326 | unsigned SubIdx = X86::NoSubRegister; |
1327 | |
1328 | if (!DstTy.isVector() || !SrcTy.isVector()) |
1329 | return false; |
1330 | |
1331 | assert(SrcTy.getSizeInBits() > DstTy.getSizeInBits() && |
1332 | "Incorrect Src/Dst register size" ); |
1333 | |
1334 | if (DstTy.getSizeInBits() == 128) |
1335 | SubIdx = X86::sub_xmm; |
1336 | else if (DstTy.getSizeInBits() == 256) |
1337 | SubIdx = X86::sub_ymm; |
1338 | else |
1339 | return false; |
1340 | |
1341 | const TargetRegisterClass *DstRC = getRegClass(Ty: DstTy, Reg: DstReg, MRI); |
1342 | const TargetRegisterClass *SrcRC = getRegClass(Ty: SrcTy, Reg: SrcReg, MRI); |
1343 | |
1344 | SrcRC = TRI.getSubClassWithSubReg(RC: SrcRC, Idx: SubIdx); |
1345 | |
1346 | if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: *SrcRC, MRI) || |
1347 | !RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) { |
1348 | LLVM_DEBUG(dbgs() << "Failed to constrain EXTRACT_SUBREG\n" ); |
1349 | return false; |
1350 | } |
1351 | |
1352 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: X86::COPY), DestReg: DstReg) |
1353 | .addReg(RegNo: SrcReg, flags: 0, SubReg: SubIdx); |
1354 | |
1355 | return true; |
1356 | } |
1357 | |
1358 | bool X86InstructionSelector::emitInsertSubreg(Register DstReg, Register SrcReg, |
1359 | MachineInstr &I, |
1360 | MachineRegisterInfo &MRI, |
1361 | MachineFunction &MF) const { |
1362 | const LLT DstTy = MRI.getType(Reg: DstReg); |
1363 | const LLT SrcTy = MRI.getType(Reg: SrcReg); |
1364 | unsigned SubIdx = X86::NoSubRegister; |
1365 | |
1366 | // TODO: support scalar types |
1367 | if (!DstTy.isVector() || !SrcTy.isVector()) |
1368 | return false; |
1369 | |
1370 | assert(SrcTy.getSizeInBits() < DstTy.getSizeInBits() && |
1371 | "Incorrect Src/Dst register size" ); |
1372 | |
1373 | if (SrcTy.getSizeInBits() == 128) |
1374 | SubIdx = X86::sub_xmm; |
1375 | else if (SrcTy.getSizeInBits() == 256) |
1376 | SubIdx = X86::sub_ymm; |
1377 | else |
1378 | return false; |
1379 | |
1380 | const TargetRegisterClass *SrcRC = getRegClass(Ty: SrcTy, Reg: SrcReg, MRI); |
1381 | const TargetRegisterClass *DstRC = getRegClass(Ty: DstTy, Reg: DstReg, MRI); |
1382 | |
1383 | if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: *SrcRC, MRI) || |
1384 | !RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) { |
1385 | LLVM_DEBUG(dbgs() << "Failed to constrain INSERT_SUBREG\n" ); |
1386 | return false; |
1387 | } |
1388 | |
1389 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: X86::COPY)) |
1390 | .addReg(RegNo: DstReg, flags: RegState::DefineNoRead, SubReg: SubIdx) |
1391 | .addReg(RegNo: SrcReg); |
1392 | |
1393 | return true; |
1394 | } |
1395 | |
1396 | bool X86InstructionSelector::selectInsert(MachineInstr &I, |
1397 | MachineRegisterInfo &MRI, |
1398 | MachineFunction &MF) const { |
1399 | assert((I.getOpcode() == TargetOpcode::G_INSERT) && "unexpected instruction" ); |
1400 | |
1401 | const Register DstReg = I.getOperand(i: 0).getReg(); |
1402 | const Register SrcReg = I.getOperand(i: 1).getReg(); |
1403 | const Register InsertReg = I.getOperand(i: 2).getReg(); |
1404 | int64_t Index = I.getOperand(i: 3).getImm(); |
1405 | |
1406 | const LLT DstTy = MRI.getType(Reg: DstReg); |
1407 | const LLT InsertRegTy = MRI.getType(Reg: InsertReg); |
1408 | |
1409 | // Meanwile handle vector type only. |
1410 | if (!DstTy.isVector()) |
1411 | return false; |
1412 | |
1413 | if (Index % InsertRegTy.getSizeInBits() != 0) |
1414 | return false; // Not insert subvector. |
1415 | |
1416 | if (Index == 0 && MRI.getVRegDef(Reg: SrcReg)->isImplicitDef()) { |
1417 | // Replace by subreg copy. |
1418 | if (!emitInsertSubreg(DstReg, SrcReg: InsertReg, I, MRI, MF)) |
1419 | return false; |
1420 | |
1421 | I.eraseFromParent(); |
1422 | return true; |
1423 | } |
1424 | |
1425 | bool HasAVX = STI.hasAVX(); |
1426 | bool HasAVX512 = STI.hasAVX512(); |
1427 | bool HasVLX = STI.hasVLX(); |
1428 | |
1429 | if (DstTy.getSizeInBits() == 256 && InsertRegTy.getSizeInBits() == 128) { |
1430 | if (HasVLX) |
1431 | I.setDesc(TII.get(Opcode: X86::VINSERTF32X4Z256rri)); |
1432 | else if (HasAVX) |
1433 | I.setDesc(TII.get(Opcode: X86::VINSERTF128rri)); |
1434 | else |
1435 | return false; |
1436 | } else if (DstTy.getSizeInBits() == 512 && HasAVX512) { |
1437 | if (InsertRegTy.getSizeInBits() == 128) |
1438 | I.setDesc(TII.get(Opcode: X86::VINSERTF32X4Zrri)); |
1439 | else if (InsertRegTy.getSizeInBits() == 256) |
1440 | I.setDesc(TII.get(Opcode: X86::VINSERTF64X4Zrri)); |
1441 | else |
1442 | return false; |
1443 | } else |
1444 | return false; |
1445 | |
1446 | // Convert to X86 VINSERT immediate. |
1447 | Index = Index / InsertRegTy.getSizeInBits(); |
1448 | |
1449 | I.getOperand(i: 3).setImm(Index); |
1450 | |
1451 | return constrainSelectedInstRegOperands(I, TII, TRI, RBI); |
1452 | } |
1453 | |
1454 | bool X86InstructionSelector::selectUnmergeValues( |
1455 | MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) { |
1456 | assert((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES) && |
1457 | "unexpected instruction" ); |
1458 | |
1459 | // Split to extracts. |
1460 | unsigned NumDefs = I.getNumOperands() - 1; |
1461 | Register SrcReg = I.getOperand(i: NumDefs).getReg(); |
1462 | unsigned DefSize = MRI.getType(Reg: I.getOperand(i: 0).getReg()).getSizeInBits(); |
1463 | |
1464 | for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { |
1465 | MachineInstr &ExtrInst = |
1466 | *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), |
1467 | MCID: TII.get(Opcode: TargetOpcode::G_EXTRACT), DestReg: I.getOperand(i: Idx).getReg()) |
1468 | .addReg(RegNo: SrcReg) |
1469 | .addImm(Val: Idx * DefSize); |
1470 | |
1471 | if (!select(I&: ExtrInst)) |
1472 | return false; |
1473 | } |
1474 | |
1475 | I.eraseFromParent(); |
1476 | return true; |
1477 | } |
1478 | |
1479 | bool X86InstructionSelector::selectMergeValues( |
1480 | MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) { |
1481 | assert((I.getOpcode() == TargetOpcode::G_MERGE_VALUES || |
1482 | I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS) && |
1483 | "unexpected instruction" ); |
1484 | |
1485 | // Split to inserts. |
1486 | Register DstReg = I.getOperand(i: 0).getReg(); |
1487 | Register SrcReg0 = I.getOperand(i: 1).getReg(); |
1488 | |
1489 | const LLT DstTy = MRI.getType(Reg: DstReg); |
1490 | const LLT SrcTy = MRI.getType(Reg: SrcReg0); |
1491 | unsigned SrcSize = SrcTy.getSizeInBits(); |
1492 | |
1493 | const RegisterBank &RegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
1494 | |
1495 | // For the first src use insertSubReg. |
1496 | Register DefReg = MRI.createGenericVirtualRegister(Ty: DstTy); |
1497 | MRI.setRegBank(Reg: DefReg, RegBank); |
1498 | if (!emitInsertSubreg(DstReg: DefReg, SrcReg: I.getOperand(i: 1).getReg(), I, MRI, MF)) |
1499 | return false; |
1500 | |
1501 | for (unsigned Idx = 2; Idx < I.getNumOperands(); ++Idx) { |
1502 | Register Tmp = MRI.createGenericVirtualRegister(Ty: DstTy); |
1503 | MRI.setRegBank(Reg: Tmp, RegBank); |
1504 | |
1505 | MachineInstr &InsertInst = *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), |
1506 | MCID: TII.get(Opcode: TargetOpcode::G_INSERT), DestReg: Tmp) |
1507 | .addReg(RegNo: DefReg) |
1508 | .addReg(RegNo: I.getOperand(i: Idx).getReg()) |
1509 | .addImm(Val: (Idx - 1) * SrcSize); |
1510 | |
1511 | DefReg = Tmp; |
1512 | |
1513 | if (!select(I&: InsertInst)) |
1514 | return false; |
1515 | } |
1516 | |
1517 | MachineInstr &CopyInst = *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), |
1518 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: DstReg) |
1519 | .addReg(RegNo: DefReg); |
1520 | |
1521 | if (!select(I&: CopyInst)) |
1522 | return false; |
1523 | |
1524 | I.eraseFromParent(); |
1525 | return true; |
1526 | } |
1527 | |
1528 | bool X86InstructionSelector::selectCondBranch(MachineInstr &I, |
1529 | MachineRegisterInfo &MRI, |
1530 | MachineFunction &MF) const { |
1531 | assert((I.getOpcode() == TargetOpcode::G_BRCOND) && "unexpected instruction" ); |
1532 | |
1533 | const Register CondReg = I.getOperand(i: 0).getReg(); |
1534 | MachineBasicBlock *DestMBB = I.getOperand(i: 1).getMBB(); |
1535 | |
1536 | MachineInstr &TestInst = |
1537 | *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: X86::TEST8ri)) |
1538 | .addReg(RegNo: CondReg) |
1539 | .addImm(Val: 1); |
1540 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: X86::JCC_1)) |
1541 | .addMBB(MBB: DestMBB).addImm(Val: X86::COND_NE); |
1542 | |
1543 | constrainSelectedInstRegOperands(I&: TestInst, TII, TRI, RBI); |
1544 | |
1545 | I.eraseFromParent(); |
1546 | return true; |
1547 | } |
1548 | |
1549 | bool X86InstructionSelector::materializeFP(MachineInstr &I, |
1550 | MachineRegisterInfo &MRI, |
1551 | MachineFunction &MF) const { |
1552 | assert((I.getOpcode() == TargetOpcode::G_FCONSTANT) && |
1553 | "unexpected instruction" ); |
1554 | |
1555 | // Can't handle alternate code models yet. |
1556 | CodeModel::Model CM = TM.getCodeModel(); |
1557 | if (CM != CodeModel::Small && CM != CodeModel::Large) |
1558 | return false; |
1559 | |
1560 | const Register DstReg = I.getOperand(i: 0).getReg(); |
1561 | const LLT DstTy = MRI.getType(Reg: DstReg); |
1562 | const RegisterBank &RegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI); |
1563 | // Create the load from the constant pool. |
1564 | const ConstantFP *CFP = I.getOperand(i: 1).getFPImm(); |
1565 | const auto &DL = MF.getDataLayout(); |
1566 | Align Alignment = DL.getPrefTypeAlign(Ty: CFP->getType()); |
1567 | const DebugLoc &DbgLoc = I.getDebugLoc(); |
1568 | |
1569 | unsigned Opc = |
1570 | getLoadStoreOp(Ty: DstTy, RB: RegBank, Opc: TargetOpcode::G_LOAD, Alignment); |
1571 | |
1572 | unsigned CPI = MF.getConstantPool()->getConstantPoolIndex(C: CFP, Alignment); |
1573 | MachineInstr *LoadInst = nullptr; |
1574 | unsigned char OpFlag = STI.classifyLocalReference(GV: nullptr); |
1575 | |
1576 | if (CM == CodeModel::Large && STI.is64Bit()) { |
1577 | // Under X86-64 non-small code model, GV (and friends) are 64-bits, so |
1578 | // they cannot be folded into immediate fields. |
1579 | |
1580 | Register AddrReg = MRI.createVirtualRegister(RegClass: &X86::GR64RegClass); |
1581 | BuildMI(BB&: *I.getParent(), I, MIMD: DbgLoc, MCID: TII.get(Opcode: X86::MOV64ri), DestReg: AddrReg) |
1582 | .addConstantPoolIndex(Idx: CPI, Offset: 0, TargetFlags: OpFlag); |
1583 | |
1584 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
1585 | PtrInfo: MachinePointerInfo::getConstantPool(MF), f: MachineMemOperand::MOLoad, |
1586 | MemTy: LLT::pointer(AddressSpace: 0, SizeInBits: DL.getPointerSizeInBits()), base_alignment: Alignment); |
1587 | |
1588 | LoadInst = |
1589 | addDirectMem(MIB: BuildMI(BB&: *I.getParent(), I, MIMD: DbgLoc, MCID: TII.get(Opcode: Opc), DestReg: DstReg), |
1590 | Reg: AddrReg) |
1591 | .addMemOperand(MMO); |
1592 | |
1593 | } else if (CM == CodeModel::Small || !STI.is64Bit()) { |
1594 | // Handle the case when globals fit in our immediate field. |
1595 | // This is true for X86-32 always and X86-64 when in -mcmodel=small mode. |
1596 | |
1597 | // x86-32 PIC requires a PIC base register for constant pools. |
1598 | unsigned PICBase = 0; |
1599 | if (OpFlag == X86II::MO_PIC_BASE_OFFSET || OpFlag == X86II::MO_GOTOFF) { |
1600 | // PICBase can be allocated by TII.getGlobalBaseReg(&MF). |
1601 | // In DAGISEL the code that initialize it generated by the CGBR pass. |
1602 | return false; // TODO support the mode. |
1603 | } else if (STI.is64Bit() && TM.getCodeModel() == CodeModel::Small) |
1604 | PICBase = X86::RIP; |
1605 | |
1606 | LoadInst = addConstantPoolReference( |
1607 | MIB: BuildMI(BB&: *I.getParent(), I, MIMD: DbgLoc, MCID: TII.get(Opcode: Opc), DestReg: DstReg), CPI, GlobalBaseReg: PICBase, |
1608 | OpFlags: OpFlag); |
1609 | } else |
1610 | return false; |
1611 | |
1612 | constrainSelectedInstRegOperands(I&: *LoadInst, TII, TRI, RBI); |
1613 | I.eraseFromParent(); |
1614 | return true; |
1615 | } |
1616 | |
1617 | bool X86InstructionSelector::selectImplicitDefOrPHI( |
1618 | MachineInstr &I, MachineRegisterInfo &MRI) const { |
1619 | assert((I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF || |
1620 | I.getOpcode() == TargetOpcode::G_PHI) && |
1621 | "unexpected instruction" ); |
1622 | |
1623 | Register DstReg = I.getOperand(i: 0).getReg(); |
1624 | |
1625 | if (!MRI.getRegClassOrNull(Reg: DstReg)) { |
1626 | const LLT DstTy = MRI.getType(Reg: DstReg); |
1627 | const TargetRegisterClass *RC = getRegClass(Ty: DstTy, Reg: DstReg, MRI); |
1628 | |
1629 | if (!RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI)) { |
1630 | LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) |
1631 | << " operand\n" ); |
1632 | return false; |
1633 | } |
1634 | } |
1635 | |
1636 | if (I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF) |
1637 | I.setDesc(TII.get(Opcode: X86::IMPLICIT_DEF)); |
1638 | else |
1639 | I.setDesc(TII.get(Opcode: X86::PHI)); |
1640 | |
1641 | return true; |
1642 | } |
1643 | |
1644 | bool X86InstructionSelector::selectMulDivRem(MachineInstr &I, |
1645 | MachineRegisterInfo &MRI, |
1646 | MachineFunction &MF) const { |
1647 | // The implementation of this function is adapted from X86FastISel. |
1648 | assert((I.getOpcode() == TargetOpcode::G_MUL || |
1649 | I.getOpcode() == TargetOpcode::G_SMULH || |
1650 | I.getOpcode() == TargetOpcode::G_UMULH || |
1651 | I.getOpcode() == TargetOpcode::G_SDIV || |
1652 | I.getOpcode() == TargetOpcode::G_SREM || |
1653 | I.getOpcode() == TargetOpcode::G_UDIV || |
1654 | I.getOpcode() == TargetOpcode::G_UREM) && |
1655 | "unexpected instruction" ); |
1656 | |
1657 | const Register DstReg = I.getOperand(i: 0).getReg(); |
1658 | const Register Op1Reg = I.getOperand(i: 1).getReg(); |
1659 | const Register Op2Reg = I.getOperand(i: 2).getReg(); |
1660 | |
1661 | const LLT RegTy = MRI.getType(Reg: DstReg); |
1662 | assert(RegTy == MRI.getType(Op1Reg) && RegTy == MRI.getType(Op2Reg) && |
1663 | "Arguments and return value types must match" ); |
1664 | |
1665 | const RegisterBank *RegRB = RBI.getRegBank(Reg: DstReg, MRI, TRI); |
1666 | if (!RegRB || RegRB->getID() != X86::GPRRegBankID) |
1667 | return false; |
1668 | |
1669 | const static unsigned NumTypes = 4; // i8, i16, i32, i64 |
1670 | const static unsigned NumOps = 7; // SDiv/SRem/UDiv/URem/Mul/SMulH/UMulh |
1671 | const static bool S = true; // IsSigned |
1672 | const static bool U = false; // !IsSigned |
1673 | const static unsigned Copy = TargetOpcode::COPY; |
1674 | |
1675 | // For the X86 IDIV instruction, in most cases the dividend |
1676 | // (numerator) must be in a specific register pair highreg:lowreg, |
1677 | // producing the quotient in lowreg and the remainder in highreg. |
1678 | // For most data types, to set up the instruction, the dividend is |
1679 | // copied into lowreg, and lowreg is sign-extended into highreg. The |
1680 | // exception is i8, where the dividend is defined as a single register rather |
1681 | // than a register pair, and we therefore directly sign-extend the dividend |
1682 | // into lowreg, instead of copying, and ignore the highreg. |
1683 | const static struct MulDivRemEntry { |
1684 | // The following portion depends only on the data type. |
1685 | unsigned SizeInBits; |
1686 | unsigned LowInReg; // low part of the register pair |
1687 | unsigned HighInReg; // high part of the register pair |
1688 | // The following portion depends on both the data type and the operation. |
1689 | struct MulDivRemResult { |
1690 | unsigned OpMulDivRem; // The specific MUL/DIV opcode to use. |
1691 | unsigned OpSignExtend; // Opcode for sign-extending lowreg into |
1692 | // highreg, or copying a zero into highreg. |
1693 | unsigned OpCopy; // Opcode for copying dividend into lowreg, or |
1694 | // zero/sign-extending into lowreg for i8. |
1695 | unsigned ResultReg; // Register containing the desired result. |
1696 | bool IsOpSigned; // Whether to use signed or unsigned form. |
1697 | } ResultTable[NumOps]; |
1698 | } OpTable[NumTypes] = { |
1699 | {.SizeInBits: 8, |
1700 | .LowInReg: X86::AX, |
1701 | .HighInReg: 0, |
1702 | .ResultTable: { |
1703 | {.OpMulDivRem: X86::IDIV8r, .OpSignExtend: 0, .OpCopy: X86::MOVSX16rr8, .ResultReg: X86::AL, .IsOpSigned: S}, // SDiv |
1704 | {.OpMulDivRem: X86::IDIV8r, .OpSignExtend: 0, .OpCopy: X86::MOVSX16rr8, .ResultReg: X86::AH, .IsOpSigned: S}, // SRem |
1705 | {.OpMulDivRem: X86::DIV8r, .OpSignExtend: 0, .OpCopy: X86::MOVZX16rr8, .ResultReg: X86::AL, .IsOpSigned: U}, // UDiv |
1706 | {.OpMulDivRem: X86::DIV8r, .OpSignExtend: 0, .OpCopy: X86::MOVZX16rr8, .ResultReg: X86::AH, .IsOpSigned: U}, // URem |
1707 | {.OpMulDivRem: X86::IMUL8r, .OpSignExtend: 0, .OpCopy: X86::MOVSX16rr8, .ResultReg: X86::AL, .IsOpSigned: S}, // Mul |
1708 | {.OpMulDivRem: X86::IMUL8r, .OpSignExtend: 0, .OpCopy: X86::MOVSX16rr8, .ResultReg: X86::AH, .IsOpSigned: S}, // SMulH |
1709 | {.OpMulDivRem: X86::MUL8r, .OpSignExtend: 0, .OpCopy: X86::MOVZX16rr8, .ResultReg: X86::AH, .IsOpSigned: U}, // UMulH |
1710 | }}, // i8 |
1711 | {.SizeInBits: 16, |
1712 | .LowInReg: X86::AX, |
1713 | .HighInReg: X86::DX, |
1714 | .ResultTable: { |
1715 | {.OpMulDivRem: X86::IDIV16r, .OpSignExtend: X86::CWD, .OpCopy: Copy, .ResultReg: X86::AX, .IsOpSigned: S}, // SDiv |
1716 | {.OpMulDivRem: X86::IDIV16r, .OpSignExtend: X86::CWD, .OpCopy: Copy, .ResultReg: X86::DX, .IsOpSigned: S}, // SRem |
1717 | {.OpMulDivRem: X86::DIV16r, .OpSignExtend: X86::MOV32r0, .OpCopy: Copy, .ResultReg: X86::AX, .IsOpSigned: U}, // UDiv |
1718 | {.OpMulDivRem: X86::DIV16r, .OpSignExtend: X86::MOV32r0, .OpCopy: Copy, .ResultReg: X86::DX, .IsOpSigned: U}, // URem |
1719 | {.OpMulDivRem: X86::IMUL16r, .OpSignExtend: X86::MOV32r0, .OpCopy: Copy, .ResultReg: X86::AX, .IsOpSigned: S}, // Mul |
1720 | {.OpMulDivRem: X86::IMUL16r, .OpSignExtend: X86::MOV32r0, .OpCopy: Copy, .ResultReg: X86::DX, .IsOpSigned: S}, // SMulH |
1721 | {.OpMulDivRem: X86::MUL16r, .OpSignExtend: X86::MOV32r0, .OpCopy: Copy, .ResultReg: X86::DX, .IsOpSigned: U}, // UMulH |
1722 | }}, // i16 |
1723 | {.SizeInBits: 32, |
1724 | .LowInReg: X86::EAX, |
1725 | .HighInReg: X86::EDX, |
1726 | .ResultTable: { |
1727 | {.OpMulDivRem: X86::IDIV32r, .OpSignExtend: X86::CDQ, .OpCopy: Copy, .ResultReg: X86::EAX, .IsOpSigned: S}, // SDiv |
1728 | {.OpMulDivRem: X86::IDIV32r, .OpSignExtend: X86::CDQ, .OpCopy: Copy, .ResultReg: X86::EDX, .IsOpSigned: S}, // SRem |
1729 | {.OpMulDivRem: X86::DIV32r, .OpSignExtend: X86::MOV32r0, .OpCopy: Copy, .ResultReg: X86::EAX, .IsOpSigned: U}, // UDiv |
1730 | {.OpMulDivRem: X86::DIV32r, .OpSignExtend: X86::MOV32r0, .OpCopy: Copy, .ResultReg: X86::EDX, .IsOpSigned: U}, // URem |
1731 | {.OpMulDivRem: X86::IMUL32r, .OpSignExtend: X86::MOV32r0, .OpCopy: Copy, .ResultReg: X86::EAX, .IsOpSigned: S}, // Mul |
1732 | {.OpMulDivRem: X86::IMUL32r, .OpSignExtend: X86::MOV32r0, .OpCopy: Copy, .ResultReg: X86::EDX, .IsOpSigned: S}, // SMulH |
1733 | {.OpMulDivRem: X86::MUL32r, .OpSignExtend: X86::MOV32r0, .OpCopy: Copy, .ResultReg: X86::EDX, .IsOpSigned: U}, // UMulH |
1734 | }}, // i32 |
1735 | {.SizeInBits: 64, |
1736 | .LowInReg: X86::RAX, |
1737 | .HighInReg: X86::RDX, |
1738 | .ResultTable: { |
1739 | {.OpMulDivRem: X86::IDIV64r, .OpSignExtend: X86::CQO, .OpCopy: Copy, .ResultReg: X86::RAX, .IsOpSigned: S}, // SDiv |
1740 | {.OpMulDivRem: X86::IDIV64r, .OpSignExtend: X86::CQO, .OpCopy: Copy, .ResultReg: X86::RDX, .IsOpSigned: S}, // SRem |
1741 | {.OpMulDivRem: X86::DIV64r, .OpSignExtend: X86::MOV32r0, .OpCopy: Copy, .ResultReg: X86::RAX, .IsOpSigned: U}, // UDiv |
1742 | {.OpMulDivRem: X86::DIV64r, .OpSignExtend: X86::MOV32r0, .OpCopy: Copy, .ResultReg: X86::RDX, .IsOpSigned: U}, // URem |
1743 | {.OpMulDivRem: X86::IMUL64r, .OpSignExtend: X86::MOV32r0, .OpCopy: Copy, .ResultReg: X86::RAX, .IsOpSigned: S}, // Mul |
1744 | {.OpMulDivRem: X86::IMUL64r, .OpSignExtend: X86::MOV32r0, .OpCopy: Copy, .ResultReg: X86::RDX, .IsOpSigned: S}, // SMulH |
1745 | {.OpMulDivRem: X86::MUL64r, .OpSignExtend: X86::MOV32r0, .OpCopy: Copy, .ResultReg: X86::RDX, .IsOpSigned: U}, // UMulH |
1746 | }}, // i64 |
1747 | }; |
1748 | |
1749 | auto OpEntryIt = llvm::find_if(Range: OpTable, P: [RegTy](const MulDivRemEntry &El) { |
1750 | return El.SizeInBits == RegTy.getSizeInBits(); |
1751 | }); |
1752 | if (OpEntryIt == std::end(arr: OpTable)) |
1753 | return false; |
1754 | |
1755 | unsigned OpIndex; |
1756 | switch (I.getOpcode()) { |
1757 | default: |
1758 | llvm_unreachable("Unexpected mul/div/rem opcode" ); |
1759 | case TargetOpcode::G_SDIV: |
1760 | OpIndex = 0; |
1761 | break; |
1762 | case TargetOpcode::G_SREM: |
1763 | OpIndex = 1; |
1764 | break; |
1765 | case TargetOpcode::G_UDIV: |
1766 | OpIndex = 2; |
1767 | break; |
1768 | case TargetOpcode::G_UREM: |
1769 | OpIndex = 3; |
1770 | break; |
1771 | case TargetOpcode::G_MUL: |
1772 | OpIndex = 4; |
1773 | break; |
1774 | case TargetOpcode::G_SMULH: |
1775 | OpIndex = 5; |
1776 | break; |
1777 | case TargetOpcode::G_UMULH: |
1778 | OpIndex = 6; |
1779 | break; |
1780 | } |
1781 | |
1782 | const MulDivRemEntry &TypeEntry = *OpEntryIt; |
1783 | const MulDivRemEntry::MulDivRemResult &OpEntry = |
1784 | TypeEntry.ResultTable[OpIndex]; |
1785 | |
1786 | const TargetRegisterClass *RegRC = getRegClass(Ty: RegTy, RB: *RegRB); |
1787 | if (!RBI.constrainGenericRegister(Reg: Op1Reg, RC: *RegRC, MRI) || |
1788 | !RBI.constrainGenericRegister(Reg: Op2Reg, RC: *RegRC, MRI) || |
1789 | !RBI.constrainGenericRegister(Reg: DstReg, RC: *RegRC, MRI)) { |
1790 | LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) |
1791 | << " operand\n" ); |
1792 | return false; |
1793 | } |
1794 | |
1795 | // Move op1 into low-order input register. |
1796 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: OpEntry.OpCopy), |
1797 | DestReg: TypeEntry.LowInReg) |
1798 | .addReg(RegNo: Op1Reg); |
1799 | |
1800 | // Zero-extend or sign-extend into high-order input register. |
1801 | if (OpEntry.OpSignExtend) { |
1802 | if (OpEntry.IsOpSigned) |
1803 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), |
1804 | MCID: TII.get(Opcode: OpEntry.OpSignExtend)); |
1805 | else { |
1806 | Register Zero32 = MRI.createVirtualRegister(RegClass: &X86::GR32RegClass); |
1807 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: X86::MOV32r0), |
1808 | DestReg: Zero32); |
1809 | |
1810 | // Copy the zero into the appropriate sub/super/identical physical |
1811 | // register. Unfortunately the operations needed are not uniform enough |
1812 | // to fit neatly into the table above. |
1813 | if (RegTy.getSizeInBits() == 16) { |
1814 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: Copy), |
1815 | DestReg: TypeEntry.HighInReg) |
1816 | .addReg(RegNo: Zero32, flags: 0, SubReg: X86::sub_16bit); |
1817 | } else if (RegTy.getSizeInBits() == 32) { |
1818 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: Copy), |
1819 | DestReg: TypeEntry.HighInReg) |
1820 | .addReg(RegNo: Zero32); |
1821 | } else if (RegTy.getSizeInBits() == 64) { |
1822 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), |
1823 | MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG), DestReg: TypeEntry.HighInReg) |
1824 | .addImm(Val: 0) |
1825 | .addReg(RegNo: Zero32) |
1826 | .addImm(Val: X86::sub_32bit); |
1827 | } |
1828 | } |
1829 | } |
1830 | |
1831 | // Generate the DIV/IDIV/MUL/IMUL instruction. |
1832 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: OpEntry.OpMulDivRem)) |
1833 | .addReg(RegNo: Op2Reg); |
1834 | |
1835 | // For i8 remainder, we can't reference ah directly, as we'll end |
1836 | // up with bogus copies like %r9b = COPY %ah. Reference ax |
1837 | // instead to prevent ah references in a rex instruction. |
1838 | // |
1839 | // The current assumption of the fast register allocator is that isel |
1840 | // won't generate explicit references to the GR8_NOREX registers. If |
1841 | // the allocator and/or the backend get enhanced to be more robust in |
1842 | // that regard, this can be, and should be, removed. |
1843 | if (OpEntry.ResultReg == X86::AH && STI.is64Bit()) { |
1844 | Register SourceSuperReg = MRI.createVirtualRegister(RegClass: &X86::GR16RegClass); |
1845 | Register ResultSuperReg = MRI.createVirtualRegister(RegClass: &X86::GR16RegClass); |
1846 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: Copy), DestReg: SourceSuperReg) |
1847 | .addReg(RegNo: X86::AX); |
1848 | |
1849 | // Shift AX right by 8 bits instead of using AH. |
1850 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: X86::SHR16ri), |
1851 | DestReg: ResultSuperReg) |
1852 | .addReg(RegNo: SourceSuperReg) |
1853 | .addImm(Val: 8); |
1854 | |
1855 | // Now reference the 8-bit subreg of the result. |
1856 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: TargetOpcode::COPY), |
1857 | DestReg: DstReg) |
1858 | .addReg(RegNo: ResultSuperReg, flags: 0, SubReg: X86::sub_8bit); |
1859 | } else { |
1860 | BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: TargetOpcode::COPY), |
1861 | DestReg: DstReg) |
1862 | .addReg(RegNo: OpEntry.ResultReg); |
1863 | } |
1864 | I.eraseFromParent(); |
1865 | |
1866 | return true; |
1867 | } |
1868 | |
1869 | bool X86InstructionSelector::selectSelect(MachineInstr &I, |
1870 | MachineRegisterInfo &MRI, |
1871 | MachineFunction &MF) const { |
1872 | GSelect &Sel = cast<GSelect>(Val&: I); |
1873 | Register DstReg = Sel.getReg(Idx: 0); |
1874 | BuildMI(BB&: *Sel.getParent(), I&: Sel, MIMD: Sel.getDebugLoc(), MCID: TII.get(Opcode: X86::TEST32rr)) |
1875 | .addReg(RegNo: Sel.getCondReg()) |
1876 | .addReg(RegNo: Sel.getCondReg()); |
1877 | |
1878 | unsigned OpCmp; |
1879 | LLT Ty = MRI.getType(Reg: DstReg); |
1880 | switch (Ty.getSizeInBits()) { |
1881 | default: |
1882 | return false; |
1883 | case 8: |
1884 | OpCmp = X86::CMOV_GR8; |
1885 | break; |
1886 | case 16: |
1887 | OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16; |
1888 | break; |
1889 | case 32: |
1890 | OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32; |
1891 | break; |
1892 | case 64: |
1893 | assert(STI.is64Bit() && STI.canUseCMOV()); |
1894 | OpCmp = X86::CMOV64rr; |
1895 | break; |
1896 | } |
1897 | BuildMI(BB&: *Sel.getParent(), I&: Sel, MIMD: Sel.getDebugLoc(), MCID: TII.get(Opcode: OpCmp), DestReg: DstReg) |
1898 | .addReg(RegNo: Sel.getTrueReg()) |
1899 | .addReg(RegNo: Sel.getFalseReg()) |
1900 | .addImm(Val: X86::COND_E); |
1901 | |
1902 | const TargetRegisterClass *DstRC = getRegClass(Ty, Reg: DstReg, MRI); |
1903 | if (!RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) { |
1904 | LLVM_DEBUG(dbgs() << "Failed to constrain CMOV\n" ); |
1905 | return false; |
1906 | } |
1907 | |
1908 | Sel.eraseFromParent(); |
1909 | return true; |
1910 | } |
1911 | |
1912 | InstructionSelector::ComplexRendererFns |
1913 | X86InstructionSelector::selectAddr(MachineOperand &Root) const { |
1914 | MachineInstr *MI = Root.getParent(); |
1915 | MachineIRBuilder MIRBuilder(*MI); |
1916 | |
1917 | MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); |
1918 | MachineInstr *Ptr = MRI.getVRegDef(Reg: Root.getReg()); |
1919 | X86AddressMode AM; |
1920 | X86SelectAddress(I&: *Ptr, TM, MRI, STI, AM); |
1921 | |
1922 | if (AM.IndexReg) |
1923 | return std::nullopt; |
1924 | |
1925 | return {// Base |
1926 | {[=](MachineInstrBuilder &MIB) { |
1927 | if (AM.BaseType == X86AddressMode::RegBase) |
1928 | MIB.addUse(RegNo: AM.Base.Reg); |
1929 | else { |
1930 | assert(AM.BaseType == X86AddressMode::FrameIndexBase && |
1931 | "Unknown type of address base" ); |
1932 | MIB.addFrameIndex(Idx: AM.Base.FrameIndex); |
1933 | } |
1934 | }, |
1935 | // Scale |
1936 | [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: AM.Scale); }, |
1937 | // Index |
1938 | [=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: 0); }, |
1939 | // Disp |
1940 | [=](MachineInstrBuilder &MIB) { |
1941 | if (AM.GV) |
1942 | MIB.addGlobalAddress(GV: AM.GV, Offset: AM.Disp, TargetFlags: AM.GVOpFlags); |
1943 | else if (AM.CP) |
1944 | MIB.addConstantPoolIndex(Idx: AM.Disp, Offset: 0, TargetFlags: AM.GVOpFlags); |
1945 | else |
1946 | MIB.addImm(Val: AM.Disp); |
1947 | }, |
1948 | // Segment |
1949 | [=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: 0); }}}; |
1950 | } |
1951 | |
1952 | InstructionSelector * |
1953 | llvm::createX86InstructionSelector(const X86TargetMachine &TM, |
1954 | const X86Subtarget &Subtarget, |
1955 | const X86RegisterBankInfo &RBI) { |
1956 | return new X86InstructionSelector(TM, Subtarget, RBI); |
1957 | } |
1958 | |