1 | //===-- SystemZElimCompare.cpp - Eliminate comparison instructions --------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass: |
10 | // (1) tries to remove compares if CC already contains the required information |
11 | // (2) fuses compares and branches into COMPARE AND BRANCH instructions |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "SystemZ.h" |
16 | #include "SystemZInstrInfo.h" |
17 | #include "SystemZTargetMachine.h" |
18 | #include "llvm/ADT/SmallVector.h" |
19 | #include "llvm/ADT/Statistic.h" |
20 | #include "llvm/ADT/StringRef.h" |
21 | #include "llvm/CodeGen/LiveRegUnits.h" |
22 | #include "llvm/CodeGen/MachineBasicBlock.h" |
23 | #include "llvm/CodeGen/MachineFunction.h" |
24 | #include "llvm/CodeGen/MachineFunctionPass.h" |
25 | #include "llvm/CodeGen/MachineInstr.h" |
26 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
27 | #include "llvm/CodeGen/MachineOperand.h" |
28 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
29 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
30 | #include "llvm/MC/MCInstrDesc.h" |
31 | #include <cassert> |
32 | #include <cstdint> |
33 | |
34 | using namespace llvm; |
35 | |
36 | #define DEBUG_TYPE "systemz-elim-compare" |
37 | |
38 | STATISTIC(BranchOnCounts, "Number of branch-on-count instructions" ); |
39 | STATISTIC(LoadAndTraps, "Number of load-and-trap instructions" ); |
40 | STATISTIC(EliminatedComparisons, "Number of eliminated comparisons" ); |
41 | STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions" ); |
42 | |
43 | namespace { |
44 | |
45 | // Represents the references to a particular register in one or more |
46 | // instructions. |
47 | struct Reference { |
48 | Reference() = default; |
49 | |
50 | Reference &operator|=(const Reference &Other) { |
51 | Def |= Other.Def; |
52 | Use |= Other.Use; |
53 | return *this; |
54 | } |
55 | |
56 | explicit operator bool() const { return Def || Use; } |
57 | |
58 | // True if the register is defined or used in some form, either directly or |
59 | // via a sub- or super-register. |
60 | bool Def = false; |
61 | bool Use = false; |
62 | }; |
63 | |
64 | class SystemZElimCompare : public MachineFunctionPass { |
65 | public: |
66 | static char ID; |
67 | |
68 | SystemZElimCompare() : MachineFunctionPass(ID) {} |
69 | |
70 | bool processBlock(MachineBasicBlock &MBB); |
71 | bool runOnMachineFunction(MachineFunction &F) override; |
72 | |
73 | MachineFunctionProperties getRequiredProperties() const override { |
74 | return MachineFunctionProperties().setNoVRegs(); |
75 | } |
76 | |
77 | private: |
78 | Reference getRegReferences(MachineInstr &MI, unsigned Reg); |
79 | bool convertToBRCT(MachineInstr &MI, MachineInstr &Compare, |
80 | SmallVectorImpl<MachineInstr *> &CCUsers); |
81 | bool convertToLoadAndTrap(MachineInstr &MI, MachineInstr &Compare, |
82 | SmallVectorImpl<MachineInstr *> &CCUsers); |
83 | bool convertToLoadAndTest(MachineInstr &MI, MachineInstr &Compare, |
84 | SmallVectorImpl<MachineInstr *> &CCUsers); |
85 | bool convertToLogical(MachineInstr &MI, MachineInstr &Compare, |
86 | SmallVectorImpl<MachineInstr *> &CCUsers); |
87 | bool adjustCCMasksForInstr(MachineInstr &MI, MachineInstr &Compare, |
88 | SmallVectorImpl<MachineInstr *> &CCUsers, |
89 | unsigned ConvOpc = 0); |
90 | bool optimizeCompareZero(MachineInstr &Compare, |
91 | SmallVectorImpl<MachineInstr *> &CCUsers); |
92 | bool fuseCompareOperations(MachineInstr &Compare, |
93 | SmallVectorImpl<MachineInstr *> &CCUsers); |
94 | |
95 | const SystemZInstrInfo *TII = nullptr; |
96 | const TargetRegisterInfo *TRI = nullptr; |
97 | }; |
98 | |
99 | char SystemZElimCompare::ID = 0; |
100 | |
101 | } // end anonymous namespace |
102 | |
103 | INITIALIZE_PASS(SystemZElimCompare, DEBUG_TYPE, |
104 | "SystemZ Comparison Elimination" , false, false) |
105 | |
106 | // Returns true if MI is an instruction whose output equals the value in Reg. |
107 | static bool preservesValueOf(MachineInstr &MI, unsigned Reg) { |
108 | switch (MI.getOpcode()) { |
109 | case SystemZ::LR: |
110 | case SystemZ::LGR: |
111 | case SystemZ::LGFR: |
112 | case SystemZ::LTR: |
113 | case SystemZ::LTGR: |
114 | case SystemZ::LTGFR: |
115 | if (MI.getOperand(i: 1).getReg() == Reg) |
116 | return true; |
117 | } |
118 | |
119 | return false; |
120 | } |
121 | |
122 | // Return true if any CC result of MI would (perhaps after conversion) |
123 | // reflect the value of Reg. |
124 | static bool resultTests(MachineInstr &MI, unsigned Reg) { |
125 | if (MI.getNumOperands() > 0 && MI.getOperand(i: 0).isReg() && |
126 | MI.getOperand(i: 0).isDef() && MI.getOperand(i: 0).getReg() == Reg) |
127 | return true; |
128 | |
129 | return (preservesValueOf(MI, Reg)); |
130 | } |
131 | |
132 | // Describe the references to Reg or any of its aliases in MI. |
133 | Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) { |
134 | Reference Ref; |
135 | if (MI.isDebugInstr()) |
136 | return Ref; |
137 | |
138 | for (const MachineOperand &MO : MI.operands()) { |
139 | if (MO.isReg()) { |
140 | if (Register MOReg = MO.getReg()) { |
141 | if (TRI->regsOverlap(RegA: MOReg, RegB: Reg)) { |
142 | if (MO.isUse()) |
143 | Ref.Use = true; |
144 | else if (MO.isDef()) |
145 | Ref.Def = true; |
146 | } |
147 | } |
148 | } |
149 | } |
150 | return Ref; |
151 | } |
152 | |
153 | // Return true if this is a load and test which can be optimized the |
154 | // same way as compare instruction. |
155 | static bool isLoadAndTestAsCmp(MachineInstr &MI) { |
156 | // If we during isel used a load-and-test as a compare with 0, the |
157 | // def operand is dead. |
158 | return (MI.getOpcode() == SystemZ::LTEBR || |
159 | MI.getOpcode() == SystemZ::LTDBR || |
160 | MI.getOpcode() == SystemZ::LTXBR) && |
161 | MI.getOperand(i: 0).isDead(); |
162 | } |
163 | |
164 | // Return the source register of Compare, which is the unknown value |
165 | // being tested. |
166 | static unsigned getCompareSourceReg(MachineInstr &Compare) { |
167 | unsigned reg = 0; |
168 | if (Compare.isCompare()) |
169 | reg = Compare.getOperand(i: 0).getReg(); |
170 | else if (isLoadAndTestAsCmp(MI&: Compare)) |
171 | reg = Compare.getOperand(i: 1).getReg(); |
172 | assert(reg); |
173 | |
174 | return reg; |
175 | } |
176 | |
177 | // Compare compares the result of MI against zero. If MI is an addition |
178 | // of -1 and if CCUsers is a single branch on nonzero, eliminate the addition |
179 | // and convert the branch to a BRCT(G) or BRCTH. Return true on success. |
180 | bool SystemZElimCompare::convertToBRCT( |
181 | MachineInstr &MI, MachineInstr &Compare, |
182 | SmallVectorImpl<MachineInstr *> &CCUsers) { |
183 | // Check whether we have an addition of -1. |
184 | unsigned Opcode = MI.getOpcode(); |
185 | unsigned BRCT; |
186 | if (Opcode == SystemZ::AHI) |
187 | BRCT = SystemZ::BRCT; |
188 | else if (Opcode == SystemZ::AGHI) |
189 | BRCT = SystemZ::BRCTG; |
190 | else if (Opcode == SystemZ::AIH) |
191 | BRCT = SystemZ::BRCTH; |
192 | else |
193 | return false; |
194 | if (MI.getOperand(i: 2).getImm() != -1) |
195 | return false; |
196 | |
197 | // Check whether we have a single JLH. |
198 | if (CCUsers.size() != 1) |
199 | return false; |
200 | MachineInstr *Branch = CCUsers[0]; |
201 | if (Branch->getOpcode() != SystemZ::BRC || |
202 | Branch->getOperand(i: 0).getImm() != SystemZ::CCMASK_ICMP || |
203 | Branch->getOperand(i: 1).getImm() != SystemZ::CCMASK_CMP_NE) |
204 | return false; |
205 | |
206 | // We already know that there are no references to the register between |
207 | // MI and Compare. Make sure that there are also no references between |
208 | // Compare and Branch. |
209 | unsigned SrcReg = getCompareSourceReg(Compare); |
210 | MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; |
211 | for (++MBBI; MBBI != MBBE; ++MBBI) |
212 | if (getRegReferences(MI&: *MBBI, Reg: SrcReg)) |
213 | return false; |
214 | |
215 | // The transformation is OK. Rebuild Branch as a BRCT(G) or BRCTH. |
216 | MachineOperand Target(Branch->getOperand(i: 2)); |
217 | while (Branch->getNumOperands()) |
218 | Branch->removeOperand(OpNo: 0); |
219 | Branch->setDesc(TII->get(Opcode: BRCT)); |
220 | MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch); |
221 | MIB.add(MO: MI.getOperand(i: 0)).add(MO: MI.getOperand(i: 1)).add(MO: Target); |
222 | // Add a CC def to BRCT(G), since we may have to split them again if the |
223 | // branch displacement overflows. BRCTH has a 32-bit displacement, so |
224 | // this is not necessary there. |
225 | if (BRCT != SystemZ::BRCTH) |
226 | MIB.addReg(RegNo: SystemZ::CC, flags: RegState::ImplicitDefine | RegState::Dead); |
227 | // The debug instr tracking for the counter now used by BRCT needs to be |
228 | // updated. |
229 | MI.getParent()->getParent()->substituteDebugValuesForInst(Old: MI, New&: *MIB); |
230 | MI.eraseFromParent(); |
231 | return true; |
232 | } |
233 | |
234 | // Compare compares the result of MI against zero. If MI is a suitable load |
235 | // instruction and if CCUsers is a single conditional trap on zero, eliminate |
236 | // the load and convert the branch to a load-and-trap. Return true on success. |
237 | bool SystemZElimCompare::convertToLoadAndTrap( |
238 | MachineInstr &MI, MachineInstr &Compare, |
239 | SmallVectorImpl<MachineInstr *> &CCUsers) { |
240 | unsigned LATOpcode = TII->getLoadAndTrap(Opcode: MI.getOpcode()); |
241 | if (!LATOpcode) |
242 | return false; |
243 | |
244 | // Check whether we have a single CondTrap that traps on zero. |
245 | if (CCUsers.size() != 1) |
246 | return false; |
247 | MachineInstr *Branch = CCUsers[0]; |
248 | if (Branch->getOpcode() != SystemZ::CondTrap || |
249 | Branch->getOperand(i: 0).getImm() != SystemZ::CCMASK_ICMP || |
250 | Branch->getOperand(i: 1).getImm() != SystemZ::CCMASK_CMP_EQ) |
251 | return false; |
252 | |
253 | // We already know that there are no references to the register between |
254 | // MI and Compare. Make sure that there are also no references between |
255 | // Compare and Branch. |
256 | unsigned SrcReg = getCompareSourceReg(Compare); |
257 | MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; |
258 | for (++MBBI; MBBI != MBBE; ++MBBI) |
259 | if (getRegReferences(MI&: *MBBI, Reg: SrcReg)) |
260 | return false; |
261 | |
262 | // The transformation is OK. Rebuild Branch as a load-and-trap. |
263 | while (Branch->getNumOperands()) |
264 | Branch->removeOperand(OpNo: 0); |
265 | Branch->setDesc(TII->get(Opcode: LATOpcode)); |
266 | MachineInstrBuilder(*Branch->getParent()->getParent(), Branch) |
267 | .add(MO: MI.getOperand(i: 0)) |
268 | .add(MO: MI.getOperand(i: 1)) |
269 | .add(MO: MI.getOperand(i: 2)) |
270 | .add(MO: MI.getOperand(i: 3)); |
271 | // The debug instr tracking for the load target now used by the load-and-trap |
272 | // needs to be updated. |
273 | MI.getParent()->getParent()->substituteDebugValuesForInst(Old: MI, New&: *Branch); |
274 | MI.eraseFromParent(); |
275 | return true; |
276 | } |
277 | |
278 | // If MI is a load instruction, try to convert it into a LOAD AND TEST. |
279 | // Return true on success. |
280 | bool SystemZElimCompare::convertToLoadAndTest( |
281 | MachineInstr &MI, MachineInstr &Compare, |
282 | SmallVectorImpl<MachineInstr *> &CCUsers) { |
283 | |
284 | // Try to adjust CC masks for the LOAD AND TEST opcode that could replace MI. |
285 | unsigned Opcode = TII->getLoadAndTest(Opcode: MI.getOpcode()); |
286 | if (!Opcode || !adjustCCMasksForInstr(MI, Compare, CCUsers, ConvOpc: Opcode)) |
287 | return false; |
288 | |
289 | // Rebuild to get the CC operand in the right place. |
290 | auto MIB = BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode)); |
291 | for (const auto &MO : MI.operands()) |
292 | MIB.add(MO); |
293 | MIB.setMemRefs(MI.memoperands()); |
294 | // The debug instr tracking for the load target now needs to be updated |
295 | // because the load has moved to a new instruction |
296 | MI.getParent()->getParent()->substituteDebugValuesForInst(Old: MI, New&: *MIB); |
297 | MI.eraseFromParent(); |
298 | |
299 | // Mark instruction as not raising an FP exception if applicable. We already |
300 | // verified earlier that this move is valid. |
301 | if (!Compare.mayRaiseFPException()) |
302 | MIB.setMIFlag(MachineInstr::MIFlag::NoFPExcept); |
303 | |
304 | return true; |
305 | } |
306 | |
307 | // See if MI is an instruction with an equivalent "logical" opcode that can |
308 | // be used and replace MI. This is useful for EQ/NE comparisons where the |
309 | // "nsw" flag is missing since the "logical" opcode always sets CC to reflect |
310 | // the result being zero or non-zero. |
311 | bool SystemZElimCompare::convertToLogical( |
312 | MachineInstr &MI, MachineInstr &Compare, |
313 | SmallVectorImpl<MachineInstr *> &CCUsers) { |
314 | |
315 | unsigned ConvOpc = 0; |
316 | switch (MI.getOpcode()) { |
317 | case SystemZ::AR: ConvOpc = SystemZ::ALR; break; |
318 | case SystemZ::ARK: ConvOpc = SystemZ::ALRK; break; |
319 | case SystemZ::AGR: ConvOpc = SystemZ::ALGR; break; |
320 | case SystemZ::AGRK: ConvOpc = SystemZ::ALGRK; break; |
321 | case SystemZ::A: ConvOpc = SystemZ::AL; break; |
322 | case SystemZ::AY: ConvOpc = SystemZ::ALY; break; |
323 | case SystemZ::AG: ConvOpc = SystemZ::ALG; break; |
324 | default: break; |
325 | } |
326 | if (!ConvOpc || !adjustCCMasksForInstr(MI, Compare, CCUsers, ConvOpc)) |
327 | return false; |
328 | |
329 | // Operands should be identical, so just change the opcode and remove the |
330 | // dead flag on CC. |
331 | MI.setDesc(TII->get(Opcode: ConvOpc)); |
332 | MI.clearRegisterDeads(Reg: SystemZ::CC); |
333 | return true; |
334 | } |
335 | |
336 | #ifndef NDEBUG |
337 | static bool isAddWithImmediate(unsigned Opcode) { |
338 | switch(Opcode) { |
339 | case SystemZ::AHI: |
340 | case SystemZ::AHIK: |
341 | case SystemZ::AGHI: |
342 | case SystemZ::AGHIK: |
343 | case SystemZ::AFI: |
344 | case SystemZ::AIH: |
345 | case SystemZ::AGFI: |
346 | return true; |
347 | default: break; |
348 | } |
349 | return false; |
350 | } |
351 | #endif |
352 | |
353 | // The CC users in CCUsers are testing the result of a comparison of some |
354 | // value X against zero and we know that any CC value produced by MI would |
355 | // also reflect the value of X. ConvOpc may be used to pass the transfomed |
356 | // opcode MI will have if this succeeds. Try to adjust CCUsers so that they |
357 | // test the result of MI directly, returning true on success. Leave |
358 | // everything unchanged on failure. |
359 | bool SystemZElimCompare::adjustCCMasksForInstr( |
360 | MachineInstr &MI, MachineInstr &Compare, |
361 | SmallVectorImpl<MachineInstr *> &CCUsers, |
362 | unsigned ConvOpc) { |
363 | unsigned CompareFlags = Compare.getDesc().TSFlags; |
364 | unsigned CompareCCValues = SystemZII::getCCValues(Flags: CompareFlags); |
365 | int Opcode = (ConvOpc ? ConvOpc : MI.getOpcode()); |
366 | const MCInstrDesc &Desc = TII->get(Opcode); |
367 | unsigned MIFlags = Desc.TSFlags; |
368 | |
369 | // If Compare may raise an FP exception, we can only eliminate it |
370 | // if MI itself would have already raised the exception. |
371 | if (Compare.mayRaiseFPException()) { |
372 | // If the caller will change MI to use ConvOpc, only test whether |
373 | // ConvOpc is suitable; it is on the caller to set the MI flag. |
374 | if (ConvOpc && !Desc.mayRaiseFPException()) |
375 | return false; |
376 | // If the caller will not change MI, we test the MI flag here. |
377 | if (!ConvOpc && !MI.mayRaiseFPException()) |
378 | return false; |
379 | } |
380 | |
381 | // See which compare-style condition codes are available. |
382 | unsigned CCValues = SystemZII::getCCValues(Flags: MIFlags); |
383 | unsigned ReusableCCMask = CCValues; |
384 | // For unsigned comparisons with zero, only equality makes sense. |
385 | if (CompareFlags & SystemZII::IsLogical) |
386 | ReusableCCMask &= SystemZ::CCMASK_CMP_EQ; |
387 | unsigned OFImplies = 0; |
388 | bool LogicalMI = false; |
389 | bool MIEquivalentToCmp = false; |
390 | if (MI.getFlag(Flag: MachineInstr::NoSWrap) && |
391 | (MIFlags & SystemZII::CCIfNoSignedWrap)) { |
392 | // If MI has the NSW flag set in combination with the |
393 | // SystemZII::CCIfNoSignedWrap flag, all CCValues are valid. |
394 | } |
395 | else if ((MIFlags & SystemZII::CCIfNoSignedWrap) && |
396 | MI.getOperand(i: 2).isImm()) { |
397 | // Signed addition of immediate. If adding a positive immediate |
398 | // overflows, the result must be less than zero. If adding a negative |
399 | // immediate overflows, the result must be larger than zero (except in |
400 | // the special case of adding the minimum value of the result range, in |
401 | // which case we cannot predict whether the result is larger than or |
402 | // equal to zero). |
403 | assert(isAddWithImmediate(Opcode) && "Expected an add with immediate." ); |
404 | assert(!MI.mayLoadOrStore() && "Expected an immediate term." ); |
405 | int64_t RHS = MI.getOperand(i: 2).getImm(); |
406 | if (SystemZ::GRX32BitRegClass.contains(Reg: MI.getOperand(i: 0).getReg()) && |
407 | RHS == INT32_MIN) |
408 | return false; |
409 | OFImplies = (RHS > 0 ? SystemZ::CCMASK_CMP_LT : SystemZ::CCMASK_CMP_GT); |
410 | } |
411 | else if ((MIFlags & SystemZII::IsLogical) && CCValues) { |
412 | // Use CCMASK_CMP_EQ to match with CCUsers. On success CCMask:s will be |
413 | // converted to CCMASK_LOGICAL_ZERO or CCMASK_LOGICAL_NONZERO. |
414 | LogicalMI = true; |
415 | ReusableCCMask = SystemZ::CCMASK_CMP_EQ; |
416 | } |
417 | else { |
418 | ReusableCCMask &= SystemZII::getCompareZeroCCMask(Flags: MIFlags); |
419 | assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues" ); |
420 | MIEquivalentToCmp = |
421 | ReusableCCMask == CCValues && CCValues == CompareCCValues; |
422 | } |
423 | if (ReusableCCMask == 0) |
424 | return false; |
425 | |
426 | if (!MIEquivalentToCmp) { |
427 | // Now check whether these flags are enough for all users. |
428 | SmallVector<MachineOperand *, 4> AlterMasks; |
429 | for (MachineInstr *CCUserMI : CCUsers) { |
430 | // Fail if this isn't a use of CC that we understand. |
431 | unsigned Flags = CCUserMI->getDesc().TSFlags; |
432 | unsigned FirstOpNum; |
433 | if (Flags & SystemZII::CCMaskFirst) |
434 | FirstOpNum = 0; |
435 | else if (Flags & SystemZII::CCMaskLast) |
436 | FirstOpNum = CCUserMI->getNumExplicitOperands() - 2; |
437 | else |
438 | return false; |
439 | |
440 | // Check whether the instruction predicate treats all CC values |
441 | // outside of ReusableCCMask in the same way. In that case it |
442 | // doesn't matter what those CC values mean. |
443 | unsigned CCValid = CCUserMI->getOperand(i: FirstOpNum).getImm(); |
444 | unsigned CCMask = CCUserMI->getOperand(i: FirstOpNum + 1).getImm(); |
445 | assert(CCValid == CompareCCValues && (CCMask & ~CCValid) == 0 && |
446 | "Corrupt CC operands of CCUser." ); |
447 | unsigned OutValid = ~ReusableCCMask & CCValid; |
448 | unsigned OutMask = ~ReusableCCMask & CCMask; |
449 | if (OutMask != 0 && OutMask != OutValid) |
450 | return false; |
451 | |
452 | AlterMasks.push_back(Elt: &CCUserMI->getOperand(i: FirstOpNum)); |
453 | AlterMasks.push_back(Elt: &CCUserMI->getOperand(i: FirstOpNum + 1)); |
454 | } |
455 | |
456 | // All users are OK. Adjust the masks for MI. |
457 | for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) { |
458 | AlterMasks[I]->setImm(CCValues); |
459 | unsigned CCMask = AlterMasks[I + 1]->getImm(); |
460 | if (LogicalMI) { |
461 | // Translate the CCMask into its "logical" value. |
462 | CCMask = (CCMask == SystemZ::CCMASK_CMP_EQ ? |
463 | SystemZ::CCMASK_LOGICAL_ZERO : SystemZ::CCMASK_LOGICAL_NONZERO); |
464 | CCMask &= CCValues; // Logical subtracts never set CC=0. |
465 | } else { |
466 | if (CCMask & ~ReusableCCMask) |
467 | CCMask = (CCMask & ReusableCCMask) | (CCValues & ~ReusableCCMask); |
468 | CCMask |= (CCMask & OFImplies) ? SystemZ::CCMASK_ARITH_OVERFLOW : 0; |
469 | } |
470 | AlterMasks[I + 1]->setImm(CCMask); |
471 | } |
472 | } |
473 | |
474 | // CC is now live after MI. |
475 | if (!ConvOpc) |
476 | MI.clearRegisterDeads(Reg: SystemZ::CC); |
477 | |
478 | // Check if MI lies before Compare. |
479 | bool BeforeCmp = false; |
480 | MachineBasicBlock::iterator MBBI = MI, MBBE = MI.getParent()->end(); |
481 | for (++MBBI; MBBI != MBBE; ++MBBI) |
482 | if (MBBI == Compare) { |
483 | BeforeCmp = true; |
484 | break; |
485 | } |
486 | |
487 | // Clear any intervening kills of CC. |
488 | if (BeforeCmp) { |
489 | MachineBasicBlock::iterator MBBI = MI, MBBE = Compare; |
490 | for (++MBBI; MBBI != MBBE; ++MBBI) |
491 | MBBI->clearRegisterKills(Reg: SystemZ::CC, RegInfo: TRI); |
492 | } |
493 | |
494 | return true; |
495 | } |
496 | |
497 | // Return true if Compare is a comparison against zero. |
498 | static bool isCompareZero(MachineInstr &Compare) { |
499 | if (isLoadAndTestAsCmp(MI&: Compare)) |
500 | return true; |
501 | return Compare.getNumExplicitOperands() == 2 && |
502 | Compare.getOperand(i: 1).isImm() && Compare.getOperand(i: 1).getImm() == 0; |
503 | } |
504 | |
505 | // Try to optimize cases where comparison instruction Compare is testing |
506 | // a value against zero. Return true on success and if Compare should be |
507 | // deleted as dead. CCUsers is the list of instructions that use the CC |
508 | // value produced by Compare. |
509 | bool SystemZElimCompare::optimizeCompareZero( |
510 | MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) { |
511 | if (!isCompareZero(Compare)) |
512 | return false; |
513 | |
514 | // Search back for CC results that are based on the first operand. |
515 | unsigned SrcReg = getCompareSourceReg(Compare); |
516 | MachineBasicBlock &MBB = *Compare.getParent(); |
517 | Reference CCRefs; |
518 | Reference SrcRefs; |
519 | for (MachineBasicBlock::reverse_iterator MBBI = |
520 | std::next(x: MachineBasicBlock::reverse_iterator(&Compare)), |
521 | MBBE = MBB.rend(); MBBI != MBBE;) { |
522 | MachineInstr &MI = *MBBI++; |
523 | if (resultTests(MI, Reg: SrcReg)) { |
524 | // Try to remove both MI and Compare by converting a branch to BRCT(G). |
525 | // or a load-and-trap instruction. We don't care in this case whether |
526 | // CC is modified between MI and Compare. |
527 | if (!CCRefs.Use && !SrcRefs) { |
528 | if (convertToBRCT(MI, Compare, CCUsers)) { |
529 | BranchOnCounts += 1; |
530 | return true; |
531 | } |
532 | if (convertToLoadAndTrap(MI, Compare, CCUsers)) { |
533 | LoadAndTraps += 1; |
534 | return true; |
535 | } |
536 | } |
537 | // Try to eliminate Compare by reusing a CC result from MI. |
538 | if ((!CCRefs && convertToLoadAndTest(MI, Compare, CCUsers)) || |
539 | (!CCRefs.Def && |
540 | (adjustCCMasksForInstr(MI, Compare, CCUsers) || |
541 | convertToLogical(MI, Compare, CCUsers)))) { |
542 | EliminatedComparisons += 1; |
543 | return true; |
544 | } |
545 | } |
546 | SrcRefs |= getRegReferences(MI, Reg: SrcReg); |
547 | if (SrcRefs.Def) |
548 | break; |
549 | CCRefs |= getRegReferences(MI, Reg: SystemZ::CC); |
550 | if (CCRefs.Use && CCRefs.Def) |
551 | break; |
552 | // Eliminating a Compare that may raise an FP exception will move |
553 | // raising the exception to some earlier MI. We cannot do this if |
554 | // there is anything in between that might change exception flags. |
555 | if (Compare.mayRaiseFPException() && |
556 | (MI.isCall() || MI.hasUnmodeledSideEffects())) |
557 | break; |
558 | } |
559 | |
560 | // Also do a forward search to handle cases where an instruction after the |
561 | // compare can be converted, like |
562 | // CGHI %r0d, 0; %r1d = LGR %r0d => LTGR %r1d, %r0d |
563 | auto MIRange = llvm::make_range( |
564 | x: std::next(x: MachineBasicBlock::iterator(&Compare)), y: MBB.end()); |
565 | for (MachineInstr &MI : llvm::make_early_inc_range(Range&: MIRange)) { |
566 | if (preservesValueOf(MI, Reg: SrcReg)) { |
567 | // Try to eliminate Compare by reusing a CC result from MI. |
568 | if (convertToLoadAndTest(MI, Compare, CCUsers)) { |
569 | EliminatedComparisons += 1; |
570 | return true; |
571 | } |
572 | } |
573 | if (getRegReferences(MI, Reg: SrcReg).Def) |
574 | return false; |
575 | if (getRegReferences(MI, Reg: SystemZ::CC)) |
576 | return false; |
577 | } |
578 | |
579 | return false; |
580 | } |
581 | |
582 | // Try to fuse comparison instruction Compare into a later branch. |
583 | // Return true on success and if Compare is therefore redundant. |
584 | bool SystemZElimCompare::fuseCompareOperations( |
585 | MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) { |
586 | // See whether we have a single branch with which to fuse. |
587 | if (CCUsers.size() != 1) |
588 | return false; |
589 | MachineInstr *Branch = CCUsers[0]; |
590 | SystemZII::FusedCompareType Type; |
591 | switch (Branch->getOpcode()) { |
592 | case SystemZ::BRC: |
593 | Type = SystemZII::CompareAndBranch; |
594 | break; |
595 | case SystemZ::CondReturn: |
596 | Type = SystemZII::CompareAndReturn; |
597 | break; |
598 | case SystemZ::CallBCR: |
599 | Type = SystemZII::CompareAndSibcall; |
600 | break; |
601 | case SystemZ::CondTrap: |
602 | Type = SystemZII::CompareAndTrap; |
603 | break; |
604 | default: |
605 | return false; |
606 | } |
607 | |
608 | // See whether we have a comparison that can be fused. |
609 | unsigned FusedOpcode = |
610 | TII->getFusedCompare(Opcode: Compare.getOpcode(), Type, MI: &Compare); |
611 | if (!FusedOpcode) |
612 | return false; |
613 | |
614 | // Make sure that the operands are available at the branch. |
615 | // SrcReg2 is the register if the source operand is a register, |
616 | // 0 if the source operand is immediate, and the base register |
617 | // if the source operand is memory (index is not supported). |
618 | Register SrcReg = Compare.getOperand(i: 0).getReg(); |
619 | Register SrcReg2 = |
620 | Compare.getOperand(i: 1).isReg() ? Compare.getOperand(i: 1).getReg() : Register(); |
621 | MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; |
622 | for (++MBBI; MBBI != MBBE; ++MBBI) |
623 | if (MBBI->modifiesRegister(Reg: SrcReg, TRI) || |
624 | (SrcReg2 && MBBI->modifiesRegister(Reg: SrcReg2, TRI))) |
625 | return false; |
626 | |
627 | // Read the branch mask, target (if applicable), regmask (if applicable). |
628 | MachineOperand CCMask(MBBI->getOperand(i: 1)); |
629 | assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 && |
630 | "Invalid condition-code mask for integer comparison" ); |
631 | // This is only valid for CompareAndBranch and CompareAndSibcall. |
632 | MachineOperand Target(MBBI->getOperand( |
633 | i: (Type == SystemZII::CompareAndBranch || |
634 | Type == SystemZII::CompareAndSibcall) ? 2 : 0)); |
635 | const uint32_t *RegMask; |
636 | if (Type == SystemZII::CompareAndSibcall) |
637 | RegMask = MBBI->getOperand(i: 3).getRegMask(); |
638 | |
639 | // Clear out all current operands. |
640 | int CCUse = MBBI->findRegisterUseOperandIdx(Reg: SystemZ::CC, TRI, isKill: false); |
641 | assert(CCUse >= 0 && "BRC/BCR must use CC" ); |
642 | Branch->removeOperand(OpNo: CCUse); |
643 | // Remove regmask (sibcall). |
644 | if (Type == SystemZII::CompareAndSibcall) |
645 | Branch->removeOperand(OpNo: 3); |
646 | // Remove target (branch or sibcall). |
647 | if (Type == SystemZII::CompareAndBranch || |
648 | Type == SystemZII::CompareAndSibcall) |
649 | Branch->removeOperand(OpNo: 2); |
650 | Branch->removeOperand(OpNo: 1); |
651 | Branch->removeOperand(OpNo: 0); |
652 | |
653 | // Rebuild Branch as a fused compare and branch. |
654 | // SrcNOps is the number of MI operands of the compare instruction |
655 | // that we need to copy over. |
656 | unsigned SrcNOps = 2; |
657 | if (FusedOpcode == SystemZ::CLT || FusedOpcode == SystemZ::CLGT) |
658 | SrcNOps = 3; |
659 | Branch->setDesc(TII->get(Opcode: FusedOpcode)); |
660 | MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch); |
661 | for (unsigned I = 0; I < SrcNOps; I++) |
662 | MIB.add(MO: Compare.getOperand(i: I)); |
663 | MIB.add(MO: CCMask); |
664 | |
665 | if (Type == SystemZII::CompareAndBranch) { |
666 | // Only conditional branches define CC, as they may be converted back |
667 | // to a non-fused branch because of a long displacement. Conditional |
668 | // returns don't have that problem. |
669 | MIB.add(MO: Target).addReg(RegNo: SystemZ::CC, |
670 | flags: RegState::ImplicitDefine | RegState::Dead); |
671 | } |
672 | |
673 | if (Type == SystemZII::CompareAndSibcall) { |
674 | MIB.add(MO: Target); |
675 | MIB.addRegMask(Mask: RegMask); |
676 | } |
677 | |
678 | // Clear any intervening kills of SrcReg and SrcReg2. |
679 | MBBI = Compare; |
680 | for (++MBBI; MBBI != MBBE; ++MBBI) { |
681 | MBBI->clearRegisterKills(Reg: SrcReg, RegInfo: TRI); |
682 | if (SrcReg2) |
683 | MBBI->clearRegisterKills(Reg: SrcReg2, RegInfo: TRI); |
684 | } |
685 | FusedComparisons += 1; |
686 | return true; |
687 | } |
688 | |
689 | // Process all comparison instructions in MBB. Return true if something |
690 | // changed. |
691 | bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) { |
692 | bool Changed = false; |
693 | |
694 | // Walk backwards through the block looking for comparisons, recording |
695 | // all CC users as we go. The subroutines can delete Compare and |
696 | // instructions before it. |
697 | LiveRegUnits LiveRegs(*TRI); |
698 | LiveRegs.addLiveOuts(MBB); |
699 | bool CompleteCCUsers = LiveRegs.available(Reg: SystemZ::CC); |
700 | SmallVector<MachineInstr *, 4> CCUsers; |
701 | MachineBasicBlock::iterator MBBI = MBB.end(); |
702 | while (MBBI != MBB.begin()) { |
703 | MachineInstr &MI = *--MBBI; |
704 | if (CompleteCCUsers && (MI.isCompare() || isLoadAndTestAsCmp(MI)) && |
705 | (optimizeCompareZero(Compare&: MI, CCUsers) || |
706 | fuseCompareOperations(Compare&: MI, CCUsers))) { |
707 | ++MBBI; |
708 | MI.eraseFromParent(); |
709 | Changed = true; |
710 | CCUsers.clear(); |
711 | continue; |
712 | } |
713 | |
714 | if (MI.definesRegister(Reg: SystemZ::CC, /*TRI=*/nullptr)) { |
715 | CCUsers.clear(); |
716 | CompleteCCUsers = true; |
717 | } |
718 | if (MI.readsRegister(Reg: SystemZ::CC, /*TRI=*/nullptr) && CompleteCCUsers) |
719 | CCUsers.push_back(Elt: &MI); |
720 | } |
721 | return Changed; |
722 | } |
723 | |
724 | bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) { |
725 | if (skipFunction(F: F.getFunction())) |
726 | return false; |
727 | |
728 | TII = F.getSubtarget<SystemZSubtarget>().getInstrInfo(); |
729 | TRI = &TII->getRegisterInfo(); |
730 | |
731 | bool Changed = false; |
732 | for (auto &MBB : F) |
733 | Changed |= processBlock(MBB); |
734 | |
735 | return Changed; |
736 | } |
737 | |
738 | FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) { |
739 | return new SystemZElimCompare(); |
740 | } |
741 | |