1 | //===-- SystemZElimCompare.cpp - Eliminate comparison instructions --------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass: |
10 | // (1) tries to remove compares if CC already contains the required information |
11 | // (2) fuses compares and branches into COMPARE AND BRANCH instructions |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "SystemZ.h" |
16 | #include "SystemZInstrInfo.h" |
17 | #include "SystemZTargetMachine.h" |
18 | #include "llvm/ADT/SmallVector.h" |
19 | #include "llvm/ADT/Statistic.h" |
20 | #include "llvm/ADT/StringRef.h" |
21 | #include "llvm/CodeGen/LiveRegUnits.h" |
22 | #include "llvm/CodeGen/MachineBasicBlock.h" |
23 | #include "llvm/CodeGen/MachineFunction.h" |
24 | #include "llvm/CodeGen/MachineFunctionPass.h" |
25 | #include "llvm/CodeGen/MachineInstr.h" |
26 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
27 | #include "llvm/CodeGen/MachineOperand.h" |
28 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
29 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
30 | #include "llvm/MC/MCInstrDesc.h" |
31 | #include <cassert> |
32 | #include <cstdint> |
33 | |
34 | using namespace llvm; |
35 | |
36 | #define DEBUG_TYPE "systemz-elim-compare" |
37 | |
38 | STATISTIC(BranchOnCounts, "Number of branch-on-count instructions" ); |
39 | STATISTIC(LoadAndTraps, "Number of load-and-trap instructions" ); |
40 | STATISTIC(EliminatedComparisons, "Number of eliminated comparisons" ); |
41 | STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions" ); |
42 | |
43 | namespace { |
44 | |
45 | // Represents the references to a particular register in one or more |
46 | // instructions. |
47 | struct Reference { |
48 | Reference() = default; |
49 | |
50 | Reference &operator|=(const Reference &Other) { |
51 | Def |= Other.Def; |
52 | Use |= Other.Use; |
53 | return *this; |
54 | } |
55 | |
56 | explicit operator bool() const { return Def || Use; } |
57 | |
58 | // True if the register is defined or used in some form, either directly or |
59 | // via a sub- or super-register. |
60 | bool Def = false; |
61 | bool Use = false; |
62 | }; |
63 | |
64 | class SystemZElimCompare : public MachineFunctionPass { |
65 | public: |
66 | static char ID; |
67 | |
68 | SystemZElimCompare() : MachineFunctionPass(ID) { |
69 | initializeSystemZElimComparePass(*PassRegistry::getPassRegistry()); |
70 | } |
71 | |
72 | bool processBlock(MachineBasicBlock &MBB); |
73 | bool runOnMachineFunction(MachineFunction &F) override; |
74 | |
75 | MachineFunctionProperties getRequiredProperties() const override { |
76 | return MachineFunctionProperties().set( |
77 | MachineFunctionProperties::Property::NoVRegs); |
78 | } |
79 | |
80 | private: |
81 | Reference getRegReferences(MachineInstr &MI, unsigned Reg); |
82 | bool convertToBRCT(MachineInstr &MI, MachineInstr &Compare, |
83 | SmallVectorImpl<MachineInstr *> &CCUsers); |
84 | bool convertToLoadAndTrap(MachineInstr &MI, MachineInstr &Compare, |
85 | SmallVectorImpl<MachineInstr *> &CCUsers); |
86 | bool convertToLoadAndTest(MachineInstr &MI, MachineInstr &Compare, |
87 | SmallVectorImpl<MachineInstr *> &CCUsers); |
88 | bool convertToLogical(MachineInstr &MI, MachineInstr &Compare, |
89 | SmallVectorImpl<MachineInstr *> &CCUsers); |
90 | bool adjustCCMasksForInstr(MachineInstr &MI, MachineInstr &Compare, |
91 | SmallVectorImpl<MachineInstr *> &CCUsers, |
92 | unsigned ConvOpc = 0); |
93 | bool optimizeCompareZero(MachineInstr &Compare, |
94 | SmallVectorImpl<MachineInstr *> &CCUsers); |
95 | bool fuseCompareOperations(MachineInstr &Compare, |
96 | SmallVectorImpl<MachineInstr *> &CCUsers); |
97 | |
98 | const SystemZInstrInfo *TII = nullptr; |
99 | const TargetRegisterInfo *TRI = nullptr; |
100 | }; |
101 | |
102 | char SystemZElimCompare::ID = 0; |
103 | |
104 | } // end anonymous namespace |
105 | |
106 | INITIALIZE_PASS(SystemZElimCompare, DEBUG_TYPE, |
107 | "SystemZ Comparison Elimination" , false, false) |
108 | |
109 | // Returns true if MI is an instruction whose output equals the value in Reg. |
110 | static bool preservesValueOf(MachineInstr &MI, unsigned Reg) { |
111 | switch (MI.getOpcode()) { |
112 | case SystemZ::LR: |
113 | case SystemZ::LGR: |
114 | case SystemZ::LGFR: |
115 | case SystemZ::LTR: |
116 | case SystemZ::LTGR: |
117 | case SystemZ::LTGFR: |
118 | if (MI.getOperand(i: 1).getReg() == Reg) |
119 | return true; |
120 | } |
121 | |
122 | return false; |
123 | } |
124 | |
125 | // Return true if any CC result of MI would (perhaps after conversion) |
126 | // reflect the value of Reg. |
127 | static bool resultTests(MachineInstr &MI, unsigned Reg) { |
128 | if (MI.getNumOperands() > 0 && MI.getOperand(i: 0).isReg() && |
129 | MI.getOperand(i: 0).isDef() && MI.getOperand(i: 0).getReg() == Reg) |
130 | return true; |
131 | |
132 | return (preservesValueOf(MI, Reg)); |
133 | } |
134 | |
135 | // Describe the references to Reg or any of its aliases in MI. |
136 | Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) { |
137 | Reference Ref; |
138 | if (MI.isDebugInstr()) |
139 | return Ref; |
140 | |
141 | for (const MachineOperand &MO : MI.operands()) { |
142 | if (MO.isReg()) { |
143 | if (Register MOReg = MO.getReg()) { |
144 | if (TRI->regsOverlap(RegA: MOReg, RegB: Reg)) { |
145 | if (MO.isUse()) |
146 | Ref.Use = true; |
147 | else if (MO.isDef()) |
148 | Ref.Def = true; |
149 | } |
150 | } |
151 | } |
152 | } |
153 | return Ref; |
154 | } |
155 | |
156 | // Return true if this is a load and test which can be optimized the |
157 | // same way as compare instruction. |
158 | static bool isLoadAndTestAsCmp(MachineInstr &MI) { |
159 | // If we during isel used a load-and-test as a compare with 0, the |
160 | // def operand is dead. |
161 | return (MI.getOpcode() == SystemZ::LTEBR || |
162 | MI.getOpcode() == SystemZ::LTDBR || |
163 | MI.getOpcode() == SystemZ::LTXBR) && |
164 | MI.getOperand(i: 0).isDead(); |
165 | } |
166 | |
167 | // Return the source register of Compare, which is the unknown value |
168 | // being tested. |
169 | static unsigned getCompareSourceReg(MachineInstr &Compare) { |
170 | unsigned reg = 0; |
171 | if (Compare.isCompare()) |
172 | reg = Compare.getOperand(i: 0).getReg(); |
173 | else if (isLoadAndTestAsCmp(MI&: Compare)) |
174 | reg = Compare.getOperand(i: 1).getReg(); |
175 | assert(reg); |
176 | |
177 | return reg; |
178 | } |
179 | |
180 | // Compare compares the result of MI against zero. If MI is an addition |
181 | // of -1 and if CCUsers is a single branch on nonzero, eliminate the addition |
182 | // and convert the branch to a BRCT(G) or BRCTH. Return true on success. |
183 | bool SystemZElimCompare::convertToBRCT( |
184 | MachineInstr &MI, MachineInstr &Compare, |
185 | SmallVectorImpl<MachineInstr *> &CCUsers) { |
186 | // Check whether we have an addition of -1. |
187 | unsigned Opcode = MI.getOpcode(); |
188 | unsigned BRCT; |
189 | if (Opcode == SystemZ::AHI) |
190 | BRCT = SystemZ::BRCT; |
191 | else if (Opcode == SystemZ::AGHI) |
192 | BRCT = SystemZ::BRCTG; |
193 | else if (Opcode == SystemZ::AIH) |
194 | BRCT = SystemZ::BRCTH; |
195 | else |
196 | return false; |
197 | if (MI.getOperand(i: 2).getImm() != -1) |
198 | return false; |
199 | |
200 | // Check whether we have a single JLH. |
201 | if (CCUsers.size() != 1) |
202 | return false; |
203 | MachineInstr *Branch = CCUsers[0]; |
204 | if (Branch->getOpcode() != SystemZ::BRC || |
205 | Branch->getOperand(i: 0).getImm() != SystemZ::CCMASK_ICMP || |
206 | Branch->getOperand(i: 1).getImm() != SystemZ::CCMASK_CMP_NE) |
207 | return false; |
208 | |
209 | // We already know that there are no references to the register between |
210 | // MI and Compare. Make sure that there are also no references between |
211 | // Compare and Branch. |
212 | unsigned SrcReg = getCompareSourceReg(Compare); |
213 | MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; |
214 | for (++MBBI; MBBI != MBBE; ++MBBI) |
215 | if (getRegReferences(MI&: *MBBI, Reg: SrcReg)) |
216 | return false; |
217 | |
218 | // The transformation is OK. Rebuild Branch as a BRCT(G) or BRCTH. |
219 | MachineOperand Target(Branch->getOperand(i: 2)); |
220 | while (Branch->getNumOperands()) |
221 | Branch->removeOperand(OpNo: 0); |
222 | Branch->setDesc(TII->get(Opcode: BRCT)); |
223 | MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch); |
224 | MIB.add(MO: MI.getOperand(i: 0)).add(MO: MI.getOperand(i: 1)).add(MO: Target); |
225 | // Add a CC def to BRCT(G), since we may have to split them again if the |
226 | // branch displacement overflows. BRCTH has a 32-bit displacement, so |
227 | // this is not necessary there. |
228 | if (BRCT != SystemZ::BRCTH) |
229 | MIB.addReg(RegNo: SystemZ::CC, flags: RegState::ImplicitDefine | RegState::Dead); |
230 | MI.eraseFromParent(); |
231 | return true; |
232 | } |
233 | |
234 | // Compare compares the result of MI against zero. If MI is a suitable load |
235 | // instruction and if CCUsers is a single conditional trap on zero, eliminate |
236 | // the load and convert the branch to a load-and-trap. Return true on success. |
237 | bool SystemZElimCompare::convertToLoadAndTrap( |
238 | MachineInstr &MI, MachineInstr &Compare, |
239 | SmallVectorImpl<MachineInstr *> &CCUsers) { |
240 | unsigned LATOpcode = TII->getLoadAndTrap(Opcode: MI.getOpcode()); |
241 | if (!LATOpcode) |
242 | return false; |
243 | |
244 | // Check whether we have a single CondTrap that traps on zero. |
245 | if (CCUsers.size() != 1) |
246 | return false; |
247 | MachineInstr *Branch = CCUsers[0]; |
248 | if (Branch->getOpcode() != SystemZ::CondTrap || |
249 | Branch->getOperand(i: 0).getImm() != SystemZ::CCMASK_ICMP || |
250 | Branch->getOperand(i: 1).getImm() != SystemZ::CCMASK_CMP_EQ) |
251 | return false; |
252 | |
253 | // We already know that there are no references to the register between |
254 | // MI and Compare. Make sure that there are also no references between |
255 | // Compare and Branch. |
256 | unsigned SrcReg = getCompareSourceReg(Compare); |
257 | MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; |
258 | for (++MBBI; MBBI != MBBE; ++MBBI) |
259 | if (getRegReferences(MI&: *MBBI, Reg: SrcReg)) |
260 | return false; |
261 | |
262 | // The transformation is OK. Rebuild Branch as a load-and-trap. |
263 | while (Branch->getNumOperands()) |
264 | Branch->removeOperand(OpNo: 0); |
265 | Branch->setDesc(TII->get(Opcode: LATOpcode)); |
266 | MachineInstrBuilder(*Branch->getParent()->getParent(), Branch) |
267 | .add(MO: MI.getOperand(i: 0)) |
268 | .add(MO: MI.getOperand(i: 1)) |
269 | .add(MO: MI.getOperand(i: 2)) |
270 | .add(MO: MI.getOperand(i: 3)); |
271 | MI.eraseFromParent(); |
272 | return true; |
273 | } |
274 | |
275 | // If MI is a load instruction, try to convert it into a LOAD AND TEST. |
276 | // Return true on success. |
277 | bool SystemZElimCompare::convertToLoadAndTest( |
278 | MachineInstr &MI, MachineInstr &Compare, |
279 | SmallVectorImpl<MachineInstr *> &CCUsers) { |
280 | |
281 | // Try to adjust CC masks for the LOAD AND TEST opcode that could replace MI. |
282 | unsigned Opcode = TII->getLoadAndTest(Opcode: MI.getOpcode()); |
283 | if (!Opcode || !adjustCCMasksForInstr(MI, Compare, CCUsers, ConvOpc: Opcode)) |
284 | return false; |
285 | |
286 | // Rebuild to get the CC operand in the right place. |
287 | auto MIB = BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode)); |
288 | for (const auto &MO : MI.operands()) |
289 | MIB.add(MO); |
290 | MIB.setMemRefs(MI.memoperands()); |
291 | MI.eraseFromParent(); |
292 | |
293 | // Mark instruction as not raising an FP exception if applicable. We already |
294 | // verified earlier that this move is valid. |
295 | if (!Compare.mayRaiseFPException()) |
296 | MIB.setMIFlag(MachineInstr::MIFlag::NoFPExcept); |
297 | |
298 | return true; |
299 | } |
300 | |
301 | // See if MI is an instruction with an equivalent "logical" opcode that can |
302 | // be used and replace MI. This is useful for EQ/NE comparisons where the |
303 | // "nsw" flag is missing since the "logical" opcode always sets CC to reflect |
304 | // the result being zero or non-zero. |
305 | bool SystemZElimCompare::convertToLogical( |
306 | MachineInstr &MI, MachineInstr &Compare, |
307 | SmallVectorImpl<MachineInstr *> &CCUsers) { |
308 | |
309 | unsigned ConvOpc = 0; |
310 | switch (MI.getOpcode()) { |
311 | case SystemZ::AR: ConvOpc = SystemZ::ALR; break; |
312 | case SystemZ::ARK: ConvOpc = SystemZ::ALRK; break; |
313 | case SystemZ::AGR: ConvOpc = SystemZ::ALGR; break; |
314 | case SystemZ::AGRK: ConvOpc = SystemZ::ALGRK; break; |
315 | case SystemZ::A: ConvOpc = SystemZ::AL; break; |
316 | case SystemZ::AY: ConvOpc = SystemZ::ALY; break; |
317 | case SystemZ::AG: ConvOpc = SystemZ::ALG; break; |
318 | default: break; |
319 | } |
320 | if (!ConvOpc || !adjustCCMasksForInstr(MI, Compare, CCUsers, ConvOpc)) |
321 | return false; |
322 | |
323 | // Operands should be identical, so just change the opcode and remove the |
324 | // dead flag on CC. |
325 | MI.setDesc(TII->get(Opcode: ConvOpc)); |
326 | MI.clearRegisterDeads(Reg: SystemZ::CC); |
327 | return true; |
328 | } |
329 | |
330 | #ifndef NDEBUG |
331 | static bool isAddWithImmediate(unsigned Opcode) { |
332 | switch(Opcode) { |
333 | case SystemZ::AHI: |
334 | case SystemZ::AHIK: |
335 | case SystemZ::AGHI: |
336 | case SystemZ::AGHIK: |
337 | case SystemZ::AFI: |
338 | case SystemZ::AIH: |
339 | case SystemZ::AGFI: |
340 | return true; |
341 | default: break; |
342 | } |
343 | return false; |
344 | } |
345 | #endif |
346 | |
347 | // The CC users in CCUsers are testing the result of a comparison of some |
348 | // value X against zero and we know that any CC value produced by MI would |
349 | // also reflect the value of X. ConvOpc may be used to pass the transfomed |
350 | // opcode MI will have if this succeeds. Try to adjust CCUsers so that they |
351 | // test the result of MI directly, returning true on success. Leave |
352 | // everything unchanged on failure. |
353 | bool SystemZElimCompare::adjustCCMasksForInstr( |
354 | MachineInstr &MI, MachineInstr &Compare, |
355 | SmallVectorImpl<MachineInstr *> &CCUsers, |
356 | unsigned ConvOpc) { |
357 | unsigned CompareFlags = Compare.getDesc().TSFlags; |
358 | unsigned CompareCCValues = SystemZII::getCCValues(Flags: CompareFlags); |
359 | int Opcode = (ConvOpc ? ConvOpc : MI.getOpcode()); |
360 | const MCInstrDesc &Desc = TII->get(Opcode); |
361 | unsigned MIFlags = Desc.TSFlags; |
362 | |
363 | // If Compare may raise an FP exception, we can only eliminate it |
364 | // if MI itself would have already raised the exception. |
365 | if (Compare.mayRaiseFPException()) { |
366 | // If the caller will change MI to use ConvOpc, only test whether |
367 | // ConvOpc is suitable; it is on the caller to set the MI flag. |
368 | if (ConvOpc && !Desc.mayRaiseFPException()) |
369 | return false; |
370 | // If the caller will not change MI, we test the MI flag here. |
371 | if (!ConvOpc && !MI.mayRaiseFPException()) |
372 | return false; |
373 | } |
374 | |
375 | // See which compare-style condition codes are available. |
376 | unsigned CCValues = SystemZII::getCCValues(Flags: MIFlags); |
377 | unsigned ReusableCCMask = CCValues; |
378 | // For unsigned comparisons with zero, only equality makes sense. |
379 | if (CompareFlags & SystemZII::IsLogical) |
380 | ReusableCCMask &= SystemZ::CCMASK_CMP_EQ; |
381 | unsigned OFImplies = 0; |
382 | bool LogicalMI = false; |
383 | bool MIEquivalentToCmp = false; |
384 | if (MI.getFlag(Flag: MachineInstr::NoSWrap) && |
385 | (MIFlags & SystemZII::CCIfNoSignedWrap)) { |
386 | // If MI has the NSW flag set in combination with the |
387 | // SystemZII::CCIfNoSignedWrap flag, all CCValues are valid. |
388 | } |
389 | else if ((MIFlags & SystemZII::CCIfNoSignedWrap) && |
390 | MI.getOperand(i: 2).isImm()) { |
391 | // Signed addition of immediate. If adding a positive immediate |
392 | // overflows, the result must be less than zero. If adding a negative |
393 | // immediate overflows, the result must be larger than zero (except in |
394 | // the special case of adding the minimum value of the result range, in |
395 | // which case we cannot predict whether the result is larger than or |
396 | // equal to zero). |
397 | assert(isAddWithImmediate(Opcode) && "Expected an add with immediate." ); |
398 | assert(!MI.mayLoadOrStore() && "Expected an immediate term." ); |
399 | int64_t RHS = MI.getOperand(i: 2).getImm(); |
400 | if (SystemZ::GRX32BitRegClass.contains(Reg: MI.getOperand(i: 0).getReg()) && |
401 | RHS == INT32_MIN) |
402 | return false; |
403 | OFImplies = (RHS > 0 ? SystemZ::CCMASK_CMP_LT : SystemZ::CCMASK_CMP_GT); |
404 | } |
405 | else if ((MIFlags & SystemZII::IsLogical) && CCValues) { |
406 | // Use CCMASK_CMP_EQ to match with CCUsers. On success CCMask:s will be |
407 | // converted to CCMASK_LOGICAL_ZERO or CCMASK_LOGICAL_NONZERO. |
408 | LogicalMI = true; |
409 | ReusableCCMask = SystemZ::CCMASK_CMP_EQ; |
410 | } |
411 | else { |
412 | ReusableCCMask &= SystemZII::getCompareZeroCCMask(Flags: MIFlags); |
413 | assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues" ); |
414 | MIEquivalentToCmp = |
415 | ReusableCCMask == CCValues && CCValues == CompareCCValues; |
416 | } |
417 | if (ReusableCCMask == 0) |
418 | return false; |
419 | |
420 | if (!MIEquivalentToCmp) { |
421 | // Now check whether these flags are enough for all users. |
422 | SmallVector<MachineOperand *, 4> AlterMasks; |
423 | for (MachineInstr *CCUserMI : CCUsers) { |
424 | // Fail if this isn't a use of CC that we understand. |
425 | unsigned Flags = CCUserMI->getDesc().TSFlags; |
426 | unsigned FirstOpNum; |
427 | if (Flags & SystemZII::CCMaskFirst) |
428 | FirstOpNum = 0; |
429 | else if (Flags & SystemZII::CCMaskLast) |
430 | FirstOpNum = CCUserMI->getNumExplicitOperands() - 2; |
431 | else |
432 | return false; |
433 | |
434 | // Check whether the instruction predicate treats all CC values |
435 | // outside of ReusableCCMask in the same way. In that case it |
436 | // doesn't matter what those CC values mean. |
437 | unsigned CCValid = CCUserMI->getOperand(i: FirstOpNum).getImm(); |
438 | unsigned CCMask = CCUserMI->getOperand(i: FirstOpNum + 1).getImm(); |
439 | assert(CCValid == CompareCCValues && (CCMask & ~CCValid) == 0 && |
440 | "Corrupt CC operands of CCUser." ); |
441 | unsigned OutValid = ~ReusableCCMask & CCValid; |
442 | unsigned OutMask = ~ReusableCCMask & CCMask; |
443 | if (OutMask != 0 && OutMask != OutValid) |
444 | return false; |
445 | |
446 | AlterMasks.push_back(Elt: &CCUserMI->getOperand(i: FirstOpNum)); |
447 | AlterMasks.push_back(Elt: &CCUserMI->getOperand(i: FirstOpNum + 1)); |
448 | } |
449 | |
450 | // All users are OK. Adjust the masks for MI. |
451 | for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) { |
452 | AlterMasks[I]->setImm(CCValues); |
453 | unsigned CCMask = AlterMasks[I + 1]->getImm(); |
454 | if (LogicalMI) { |
455 | // Translate the CCMask into its "logical" value. |
456 | CCMask = (CCMask == SystemZ::CCMASK_CMP_EQ ? |
457 | SystemZ::CCMASK_LOGICAL_ZERO : SystemZ::CCMASK_LOGICAL_NONZERO); |
458 | CCMask &= CCValues; // Logical subtracts never set CC=0. |
459 | } else { |
460 | if (CCMask & ~ReusableCCMask) |
461 | CCMask = (CCMask & ReusableCCMask) | (CCValues & ~ReusableCCMask); |
462 | CCMask |= (CCMask & OFImplies) ? SystemZ::CCMASK_ARITH_OVERFLOW : 0; |
463 | } |
464 | AlterMasks[I + 1]->setImm(CCMask); |
465 | } |
466 | } |
467 | |
468 | // CC is now live after MI. |
469 | if (!ConvOpc) |
470 | MI.clearRegisterDeads(Reg: SystemZ::CC); |
471 | |
472 | // Check if MI lies before Compare. |
473 | bool BeforeCmp = false; |
474 | MachineBasicBlock::iterator MBBI = MI, MBBE = MI.getParent()->end(); |
475 | for (++MBBI; MBBI != MBBE; ++MBBI) |
476 | if (MBBI == Compare) { |
477 | BeforeCmp = true; |
478 | break; |
479 | } |
480 | |
481 | // Clear any intervening kills of CC. |
482 | if (BeforeCmp) { |
483 | MachineBasicBlock::iterator MBBI = MI, MBBE = Compare; |
484 | for (++MBBI; MBBI != MBBE; ++MBBI) |
485 | MBBI->clearRegisterKills(Reg: SystemZ::CC, RegInfo: TRI); |
486 | } |
487 | |
488 | return true; |
489 | } |
490 | |
491 | // Return true if Compare is a comparison against zero. |
492 | static bool isCompareZero(MachineInstr &Compare) { |
493 | if (isLoadAndTestAsCmp(MI&: Compare)) |
494 | return true; |
495 | return Compare.getNumExplicitOperands() == 2 && |
496 | Compare.getOperand(i: 1).isImm() && Compare.getOperand(i: 1).getImm() == 0; |
497 | } |
498 | |
499 | // Try to optimize cases where comparison instruction Compare is testing |
500 | // a value against zero. Return true on success and if Compare should be |
501 | // deleted as dead. CCUsers is the list of instructions that use the CC |
502 | // value produced by Compare. |
503 | bool SystemZElimCompare::optimizeCompareZero( |
504 | MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) { |
505 | if (!isCompareZero(Compare)) |
506 | return false; |
507 | |
508 | // Search back for CC results that are based on the first operand. |
509 | unsigned SrcReg = getCompareSourceReg(Compare); |
510 | MachineBasicBlock &MBB = *Compare.getParent(); |
511 | Reference CCRefs; |
512 | Reference SrcRefs; |
513 | for (MachineBasicBlock::reverse_iterator MBBI = |
514 | std::next(x: MachineBasicBlock::reverse_iterator(&Compare)), |
515 | MBBE = MBB.rend(); MBBI != MBBE;) { |
516 | MachineInstr &MI = *MBBI++; |
517 | if (resultTests(MI, Reg: SrcReg)) { |
518 | // Try to remove both MI and Compare by converting a branch to BRCT(G). |
519 | // or a load-and-trap instruction. We don't care in this case whether |
520 | // CC is modified between MI and Compare. |
521 | if (!CCRefs.Use && !SrcRefs) { |
522 | if (convertToBRCT(MI, Compare, CCUsers)) { |
523 | BranchOnCounts += 1; |
524 | return true; |
525 | } |
526 | if (convertToLoadAndTrap(MI, Compare, CCUsers)) { |
527 | LoadAndTraps += 1; |
528 | return true; |
529 | } |
530 | } |
531 | // Try to eliminate Compare by reusing a CC result from MI. |
532 | if ((!CCRefs && convertToLoadAndTest(MI, Compare, CCUsers)) || |
533 | (!CCRefs.Def && |
534 | (adjustCCMasksForInstr(MI, Compare, CCUsers) || |
535 | convertToLogical(MI, Compare, CCUsers)))) { |
536 | EliminatedComparisons += 1; |
537 | return true; |
538 | } |
539 | } |
540 | SrcRefs |= getRegReferences(MI, Reg: SrcReg); |
541 | if (SrcRefs.Def) |
542 | break; |
543 | CCRefs |= getRegReferences(MI, Reg: SystemZ::CC); |
544 | if (CCRefs.Use && CCRefs.Def) |
545 | break; |
546 | // Eliminating a Compare that may raise an FP exception will move |
547 | // raising the exception to some earlier MI. We cannot do this if |
548 | // there is anything in between that might change exception flags. |
549 | if (Compare.mayRaiseFPException() && |
550 | (MI.isCall() || MI.hasUnmodeledSideEffects())) |
551 | break; |
552 | } |
553 | |
554 | // Also do a forward search to handle cases where an instruction after the |
555 | // compare can be converted, like |
556 | // CGHI %r0d, 0; %r1d = LGR %r0d => LTGR %r1d, %r0d |
557 | auto MIRange = llvm::make_range( |
558 | x: std::next(x: MachineBasicBlock::iterator(&Compare)), y: MBB.end()); |
559 | for (MachineInstr &MI : llvm::make_early_inc_range(Range&: MIRange)) { |
560 | if (preservesValueOf(MI, Reg: SrcReg)) { |
561 | // Try to eliminate Compare by reusing a CC result from MI. |
562 | if (convertToLoadAndTest(MI, Compare, CCUsers)) { |
563 | EliminatedComparisons += 1; |
564 | return true; |
565 | } |
566 | } |
567 | if (getRegReferences(MI, Reg: SrcReg).Def) |
568 | return false; |
569 | if (getRegReferences(MI, Reg: SystemZ::CC)) |
570 | return false; |
571 | } |
572 | |
573 | return false; |
574 | } |
575 | |
576 | // Try to fuse comparison instruction Compare into a later branch. |
577 | // Return true on success and if Compare is therefore redundant. |
578 | bool SystemZElimCompare::fuseCompareOperations( |
579 | MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) { |
580 | // See whether we have a single branch with which to fuse. |
581 | if (CCUsers.size() != 1) |
582 | return false; |
583 | MachineInstr *Branch = CCUsers[0]; |
584 | SystemZII::FusedCompareType Type; |
585 | switch (Branch->getOpcode()) { |
586 | case SystemZ::BRC: |
587 | Type = SystemZII::CompareAndBranch; |
588 | break; |
589 | case SystemZ::CondReturn: |
590 | Type = SystemZII::CompareAndReturn; |
591 | break; |
592 | case SystemZ::CallBCR: |
593 | Type = SystemZII::CompareAndSibcall; |
594 | break; |
595 | case SystemZ::CondTrap: |
596 | Type = SystemZII::CompareAndTrap; |
597 | break; |
598 | default: |
599 | return false; |
600 | } |
601 | |
602 | // See whether we have a comparison that can be fused. |
603 | unsigned FusedOpcode = |
604 | TII->getFusedCompare(Opcode: Compare.getOpcode(), Type, MI: &Compare); |
605 | if (!FusedOpcode) |
606 | return false; |
607 | |
608 | // Make sure that the operands are available at the branch. |
609 | // SrcReg2 is the register if the source operand is a register, |
610 | // 0 if the source operand is immediate, and the base register |
611 | // if the source operand is memory (index is not supported). |
612 | Register SrcReg = Compare.getOperand(i: 0).getReg(); |
613 | Register SrcReg2 = |
614 | Compare.getOperand(i: 1).isReg() ? Compare.getOperand(i: 1).getReg() : Register(); |
615 | MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; |
616 | for (++MBBI; MBBI != MBBE; ++MBBI) |
617 | if (MBBI->modifiesRegister(Reg: SrcReg, TRI) || |
618 | (SrcReg2 && MBBI->modifiesRegister(Reg: SrcReg2, TRI))) |
619 | return false; |
620 | |
621 | // Read the branch mask, target (if applicable), regmask (if applicable). |
622 | MachineOperand CCMask(MBBI->getOperand(i: 1)); |
623 | assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 && |
624 | "Invalid condition-code mask for integer comparison" ); |
625 | // This is only valid for CompareAndBranch and CompareAndSibcall. |
626 | MachineOperand Target(MBBI->getOperand( |
627 | i: (Type == SystemZII::CompareAndBranch || |
628 | Type == SystemZII::CompareAndSibcall) ? 2 : 0)); |
629 | const uint32_t *RegMask; |
630 | if (Type == SystemZII::CompareAndSibcall) |
631 | RegMask = MBBI->getOperand(i: 3).getRegMask(); |
632 | |
633 | // Clear out all current operands. |
634 | int CCUse = MBBI->findRegisterUseOperandIdx(Reg: SystemZ::CC, TRI, isKill: false); |
635 | assert(CCUse >= 0 && "BRC/BCR must use CC" ); |
636 | Branch->removeOperand(OpNo: CCUse); |
637 | // Remove regmask (sibcall). |
638 | if (Type == SystemZII::CompareAndSibcall) |
639 | Branch->removeOperand(OpNo: 3); |
640 | // Remove target (branch or sibcall). |
641 | if (Type == SystemZII::CompareAndBranch || |
642 | Type == SystemZII::CompareAndSibcall) |
643 | Branch->removeOperand(OpNo: 2); |
644 | Branch->removeOperand(OpNo: 1); |
645 | Branch->removeOperand(OpNo: 0); |
646 | |
647 | // Rebuild Branch as a fused compare and branch. |
648 | // SrcNOps is the number of MI operands of the compare instruction |
649 | // that we need to copy over. |
650 | unsigned SrcNOps = 2; |
651 | if (FusedOpcode == SystemZ::CLT || FusedOpcode == SystemZ::CLGT) |
652 | SrcNOps = 3; |
653 | Branch->setDesc(TII->get(Opcode: FusedOpcode)); |
654 | MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch); |
655 | for (unsigned I = 0; I < SrcNOps; I++) |
656 | MIB.add(MO: Compare.getOperand(i: I)); |
657 | MIB.add(MO: CCMask); |
658 | |
659 | if (Type == SystemZII::CompareAndBranch) { |
660 | // Only conditional branches define CC, as they may be converted back |
661 | // to a non-fused branch because of a long displacement. Conditional |
662 | // returns don't have that problem. |
663 | MIB.add(MO: Target).addReg(RegNo: SystemZ::CC, |
664 | flags: RegState::ImplicitDefine | RegState::Dead); |
665 | } |
666 | |
667 | if (Type == SystemZII::CompareAndSibcall) { |
668 | MIB.add(MO: Target); |
669 | MIB.addRegMask(Mask: RegMask); |
670 | } |
671 | |
672 | // Clear any intervening kills of SrcReg and SrcReg2. |
673 | MBBI = Compare; |
674 | for (++MBBI; MBBI != MBBE; ++MBBI) { |
675 | MBBI->clearRegisterKills(Reg: SrcReg, RegInfo: TRI); |
676 | if (SrcReg2) |
677 | MBBI->clearRegisterKills(Reg: SrcReg2, RegInfo: TRI); |
678 | } |
679 | FusedComparisons += 1; |
680 | return true; |
681 | } |
682 | |
683 | // Process all comparison instructions in MBB. Return true if something |
684 | // changed. |
685 | bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) { |
686 | bool Changed = false; |
687 | |
688 | // Walk backwards through the block looking for comparisons, recording |
689 | // all CC users as we go. The subroutines can delete Compare and |
690 | // instructions before it. |
691 | LiveRegUnits LiveRegs(*TRI); |
692 | LiveRegs.addLiveOuts(MBB); |
693 | bool CompleteCCUsers = LiveRegs.available(Reg: SystemZ::CC); |
694 | SmallVector<MachineInstr *, 4> CCUsers; |
695 | MachineBasicBlock::iterator MBBI = MBB.end(); |
696 | while (MBBI != MBB.begin()) { |
697 | MachineInstr &MI = *--MBBI; |
698 | if (CompleteCCUsers && (MI.isCompare() || isLoadAndTestAsCmp(MI)) && |
699 | (optimizeCompareZero(Compare&: MI, CCUsers) || |
700 | fuseCompareOperations(Compare&: MI, CCUsers))) { |
701 | ++MBBI; |
702 | MI.eraseFromParent(); |
703 | Changed = true; |
704 | CCUsers.clear(); |
705 | continue; |
706 | } |
707 | |
708 | if (MI.definesRegister(Reg: SystemZ::CC, /*TRI=*/nullptr)) { |
709 | CCUsers.clear(); |
710 | CompleteCCUsers = true; |
711 | } |
712 | if (MI.readsRegister(Reg: SystemZ::CC, /*TRI=*/nullptr) && CompleteCCUsers) |
713 | CCUsers.push_back(Elt: &MI); |
714 | } |
715 | return Changed; |
716 | } |
717 | |
718 | bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) { |
719 | if (skipFunction(F: F.getFunction())) |
720 | return false; |
721 | |
722 | TII = F.getSubtarget<SystemZSubtarget>().getInstrInfo(); |
723 | TRI = &TII->getRegisterInfo(); |
724 | |
725 | bool Changed = false; |
726 | for (auto &MBB : F) |
727 | Changed |= processBlock(MBB); |
728 | |
729 | return Changed; |
730 | } |
731 | |
732 | FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) { |
733 | return new SystemZElimCompare(); |
734 | } |
735 | |