1 | //===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass looks for instructions that can be replaced by a Test Data Class |
10 | // instruction, and replaces them when profitable. |
11 | // |
12 | // Roughly, the following rules are recognized: |
13 | // |
14 | // 1: fcmp pred X, 0 -> tdc X, mask |
15 | // 2: fcmp pred X, +-inf -> tdc X, mask |
16 | // 3: fcmp pred X, +-minnorm -> tdc X, mask |
17 | // 4: tdc (fabs X), mask -> tdc X, newmask |
18 | // 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit] |
19 | // 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask |
20 | // 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask |
21 | // 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2) |
22 | // 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2) |
23 | // 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2) |
24 | // |
25 | // The pass works in 4 steps: |
26 | // |
27 | // 1. All fcmp and icmp instructions in a function are checked for a match |
28 | // with rules 1-3 and 5-7. Their TDC equivalents are stored in |
29 | // the ConvertedInsts mapping. If the operand of a fcmp instruction is |
30 | // a fabs, it's also folded according to rule 4. |
31 | // 2. All and/or/xor i1 instructions whose both operands have been already |
32 | // mapped are mapped according to rules 8-10. LogicOpsWorklist is used |
33 | // as a queue of instructions to check. |
34 | // 3. All mapped instructions that are considered worthy of conversion (ie. |
35 | // replacing them will actually simplify the final code) are replaced |
36 | // with a call to the s390.tdc intrinsic. |
37 | // 4. All intermediate results of replaced instructions are removed if unused. |
38 | // |
39 | // Instructions that match rules 1-3 are considered unworthy of conversion |
40 | // on their own (since a comparison instruction is superior), but are mapped |
41 | // in the hopes of folding the result using rules 4 and 8-10 (likely removing |
42 | // the original comparison in the process). |
43 | // |
44 | //===----------------------------------------------------------------------===// |
45 | |
46 | #include "SystemZ.h" |
47 | #include "SystemZSubtarget.h" |
48 | #include "llvm/ADT/MapVector.h" |
49 | #include "llvm/CodeGen/TargetPassConfig.h" |
50 | #include "llvm/IR/Constants.h" |
51 | #include "llvm/IR/IRBuilder.h" |
52 | #include "llvm/IR/InstIterator.h" |
53 | #include "llvm/IR/Instructions.h" |
54 | #include "llvm/IR/IntrinsicInst.h" |
55 | #include "llvm/IR/IntrinsicsS390.h" |
56 | #include "llvm/IR/LegacyPassManager.h" |
57 | #include "llvm/IR/Module.h" |
58 | #include "llvm/Target/TargetMachine.h" |
59 | #include <deque> |
60 | #include <set> |
61 | |
62 | using namespace llvm; |
63 | |
64 | namespace { |
65 | |
66 | class SystemZTDCPass : public FunctionPass { |
67 | public: |
68 | static char ID; |
69 | SystemZTDCPass() : FunctionPass(ID) { |
70 | initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry()); |
71 | } |
72 | |
73 | bool runOnFunction(Function &F) override; |
74 | |
75 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
76 | AU.addRequired<TargetPassConfig>(); |
77 | } |
78 | |
79 | private: |
80 | // Maps seen instructions that can be mapped to a TDC, values are |
81 | // (TDC operand, TDC mask, worthy flag) triples. |
82 | MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts; |
83 | // The queue of and/or/xor i1 instructions to be potentially folded. |
84 | std::vector<BinaryOperator *> LogicOpsWorklist; |
85 | // Instructions matched while folding, to be removed at the end if unused. |
86 | std::set<Instruction *> PossibleJunk; |
87 | |
88 | // Tries to convert a fcmp instruction. |
89 | void convertFCmp(CmpInst &I); |
90 | |
91 | // Tries to convert an icmp instruction. |
92 | void convertICmp(CmpInst &I); |
93 | |
94 | // Tries to convert an i1 and/or/xor instruction, whose both operands |
95 | // have been already converted. |
96 | void convertLogicOp(BinaryOperator &I); |
97 | |
98 | // Marks an instruction as converted - adds it to ConvertedInsts and adds |
99 | // any and/or/xor i1 users to the queue. |
100 | void converted(Instruction *I, Value *V, int Mask, bool Worthy) { |
101 | ConvertedInsts[I] = std::make_tuple(args&: V, args&: Mask, args&: Worthy); |
102 | auto &M = *I->getFunction()->getParent(); |
103 | auto &Ctx = M.getContext(); |
104 | for (auto *U : I->users()) { |
105 | auto *LI = dyn_cast<BinaryOperator>(Val: U); |
106 | if (LI && LI->getType() == Type::getInt1Ty(C&: Ctx) && |
107 | (LI->getOpcode() == Instruction::And || |
108 | LI->getOpcode() == Instruction::Or || |
109 | LI->getOpcode() == Instruction::Xor)) { |
110 | LogicOpsWorklist.push_back(x: LI); |
111 | } |
112 | } |
113 | } |
114 | }; |
115 | |
116 | } // end anonymous namespace |
117 | |
118 | char SystemZTDCPass::ID = 0; |
119 | INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc" , |
120 | "SystemZ Test Data Class optimization" , false, false) |
121 | |
122 | FunctionPass *llvm::createSystemZTDCPass() { |
123 | return new SystemZTDCPass(); |
124 | } |
125 | |
126 | void SystemZTDCPass::convertFCmp(CmpInst &I) { |
127 | Value *Op0 = I.getOperand(i_nocapture: 0); |
128 | auto *Const = dyn_cast<ConstantFP>(Val: I.getOperand(i_nocapture: 1)); |
129 | auto Pred = I.getPredicate(); |
130 | // Only comparisons with consts are interesting. |
131 | if (!Const) |
132 | return; |
133 | // Compute the smallest normal number (and its negation). |
134 | auto &Sem = Op0->getType()->getFltSemantics(); |
135 | APFloat Smallest = APFloat::getSmallestNormalized(Sem); |
136 | APFloat NegSmallest = Smallest; |
137 | NegSmallest.changeSign(); |
138 | // Check if Const is one of our recognized consts. |
139 | int WhichConst; |
140 | if (Const->isZero()) { |
141 | // All comparisons with 0 can be converted. |
142 | WhichConst = 0; |
143 | } else if (Const->isInfinity()) { |
144 | // Likewise for infinities. |
145 | WhichConst = Const->isNegative() ? 2 : 1; |
146 | } else if (Const->isExactlyValue(V: Smallest)) { |
147 | // For Smallest, we cannot do EQ separately from GT. |
148 | if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE && |
149 | (Pred & CmpInst::FCMP_OGE) != 0) |
150 | return; |
151 | WhichConst = 3; |
152 | } else if (Const->isExactlyValue(V: NegSmallest)) { |
153 | // Likewise for NegSmallest, we cannot do EQ separately from LT. |
154 | if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE && |
155 | (Pred & CmpInst::FCMP_OLE) != 0) |
156 | return; |
157 | WhichConst = 4; |
158 | } else { |
159 | // Not one of our special constants. |
160 | return; |
161 | } |
162 | // Partial masks to use for EQ, GT, LT, UN comparisons, respectively. |
163 | static const int Masks[][4] = { |
164 | { // 0 |
165 | SystemZ::TDCMASK_ZERO, // eq |
166 | SystemZ::TDCMASK_POSITIVE, // gt |
167 | SystemZ::TDCMASK_NEGATIVE, // lt |
168 | SystemZ::TDCMASK_NAN, // un |
169 | }, |
170 | { // inf |
171 | SystemZ::TDCMASK_INFINITY_PLUS, // eq |
172 | 0, // gt |
173 | (SystemZ::TDCMASK_ZERO | |
174 | SystemZ::TDCMASK_NEGATIVE | |
175 | SystemZ::TDCMASK_NORMAL_PLUS | |
176 | SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt |
177 | SystemZ::TDCMASK_NAN, // un |
178 | }, |
179 | { // -inf |
180 | SystemZ::TDCMASK_INFINITY_MINUS, // eq |
181 | (SystemZ::TDCMASK_ZERO | |
182 | SystemZ::TDCMASK_POSITIVE | |
183 | SystemZ::TDCMASK_NORMAL_MINUS | |
184 | SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt |
185 | 0, // lt |
186 | SystemZ::TDCMASK_NAN, // un |
187 | }, |
188 | { // minnorm |
189 | 0, // eq (unsupported) |
190 | (SystemZ::TDCMASK_NORMAL_PLUS | |
191 | SystemZ::TDCMASK_INFINITY_PLUS), // gt (actually ge) |
192 | (SystemZ::TDCMASK_ZERO | |
193 | SystemZ::TDCMASK_NEGATIVE | |
194 | SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt |
195 | SystemZ::TDCMASK_NAN, // un |
196 | }, |
197 | { // -minnorm |
198 | 0, // eq (unsupported) |
199 | (SystemZ::TDCMASK_ZERO | |
200 | SystemZ::TDCMASK_POSITIVE | |
201 | SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt |
202 | (SystemZ::TDCMASK_NORMAL_MINUS | |
203 | SystemZ::TDCMASK_INFINITY_MINUS), // lt (actually le) |
204 | SystemZ::TDCMASK_NAN, // un |
205 | } |
206 | }; |
207 | // Construct the mask as a combination of the partial masks. |
208 | int Mask = 0; |
209 | if (Pred & CmpInst::FCMP_OEQ) |
210 | Mask |= Masks[WhichConst][0]; |
211 | if (Pred & CmpInst::FCMP_OGT) |
212 | Mask |= Masks[WhichConst][1]; |
213 | if (Pred & CmpInst::FCMP_OLT) |
214 | Mask |= Masks[WhichConst][2]; |
215 | if (Pred & CmpInst::FCMP_UNO) |
216 | Mask |= Masks[WhichConst][3]; |
217 | // A lone fcmp is unworthy of tdc conversion on its own, but may become |
218 | // worthy if combined with fabs. |
219 | bool Worthy = false; |
220 | if (CallInst *CI = dyn_cast<CallInst>(Val: Op0)) { |
221 | Function *F = CI->getCalledFunction(); |
222 | if (F && F->getIntrinsicID() == Intrinsic::fabs) { |
223 | // Fold with fabs - adjust the mask appropriately. |
224 | Mask &= SystemZ::TDCMASK_PLUS; |
225 | Mask |= Mask >> 1; |
226 | Op0 = CI->getArgOperand(i: 0); |
227 | // A combination of fcmp with fabs is a win, unless the constant |
228 | // involved is 0 (which is handled by later passes). |
229 | Worthy = WhichConst != 0; |
230 | PossibleJunk.insert(x: CI); |
231 | } |
232 | } |
233 | converted(I: &I, V: Op0, Mask, Worthy); |
234 | } |
235 | |
236 | void SystemZTDCPass::convertICmp(CmpInst &I) { |
237 | Value *Op0 = I.getOperand(i_nocapture: 0); |
238 | auto *Const = dyn_cast<ConstantInt>(Val: I.getOperand(i_nocapture: 1)); |
239 | auto Pred = I.getPredicate(); |
240 | // All our icmp rules involve comparisons with consts. |
241 | if (!Const) |
242 | return; |
243 | if (auto *Cast = dyn_cast<BitCastInst>(Val: Op0)) { |
244 | // Check for icmp+bitcast used for signbit. |
245 | if (!Cast->getSrcTy()->isFloatTy() && |
246 | !Cast->getSrcTy()->isDoubleTy() && |
247 | !Cast->getSrcTy()->isFP128Ty()) |
248 | return; |
249 | Value *V = Cast->getOperand(i_nocapture: 0); |
250 | int Mask; |
251 | if (Pred == CmpInst::ICMP_SLT && Const->isZero()) { |
252 | // icmp slt (bitcast X), 0 - set if sign bit true |
253 | Mask = SystemZ::TDCMASK_MINUS; |
254 | } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) { |
255 | // icmp sgt (bitcast X), -1 - set if sign bit false |
256 | Mask = SystemZ::TDCMASK_PLUS; |
257 | } else { |
258 | // Not a sign bit check. |
259 | return; |
260 | } |
261 | PossibleJunk.insert(x: Cast); |
262 | converted(I: &I, V, Mask, Worthy: true); |
263 | } else if (auto *CI = dyn_cast<CallInst>(Val: Op0)) { |
264 | // Check if this is a pre-existing call of our tdc intrinsic. |
265 | Function *F = CI->getCalledFunction(); |
266 | if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc) |
267 | return; |
268 | if (!Const->isZero()) |
269 | return; |
270 | Value *V = CI->getArgOperand(i: 0); |
271 | auto *MaskC = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 1)); |
272 | // Bail if the mask is not a constant. |
273 | if (!MaskC) |
274 | return; |
275 | int Mask = MaskC->getZExtValue(); |
276 | Mask &= SystemZ::TDCMASK_ALL; |
277 | if (Pred == CmpInst::ICMP_NE) { |
278 | // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC |
279 | } else if (Pred == CmpInst::ICMP_EQ) { |
280 | // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask |
281 | Mask ^= SystemZ::TDCMASK_ALL; |
282 | } else { |
283 | // An unknown comparison - ignore. |
284 | return; |
285 | } |
286 | PossibleJunk.insert(x: CI); |
287 | converted(I: &I, V, Mask, Worthy: false); |
288 | } |
289 | } |
290 | |
291 | void SystemZTDCPass::convertLogicOp(BinaryOperator &I) { |
292 | Value *Op0, *Op1; |
293 | int Mask0, Mask1; |
294 | bool Worthy0, Worthy1; |
295 | std::tie(args&: Op0, args&: Mask0, args&: Worthy0) = ConvertedInsts[cast<Instruction>(Val: I.getOperand(i_nocapture: 0))]; |
296 | std::tie(args&: Op1, args&: Mask1, args&: Worthy1) = ConvertedInsts[cast<Instruction>(Val: I.getOperand(i_nocapture: 1))]; |
297 | if (Op0 != Op1) |
298 | return; |
299 | int Mask; |
300 | switch (I.getOpcode()) { |
301 | case Instruction::And: |
302 | Mask = Mask0 & Mask1; |
303 | break; |
304 | case Instruction::Or: |
305 | Mask = Mask0 | Mask1; |
306 | break; |
307 | case Instruction::Xor: |
308 | Mask = Mask0 ^ Mask1; |
309 | break; |
310 | default: |
311 | llvm_unreachable("Unknown op in convertLogicOp" ); |
312 | } |
313 | converted(I: &I, V: Op0, Mask, Worthy: true); |
314 | } |
315 | |
316 | bool SystemZTDCPass::runOnFunction(Function &F) { |
317 | auto &TPC = getAnalysis<TargetPassConfig>(); |
318 | if (TPC.getTM<TargetMachine>() |
319 | .getSubtarget<SystemZSubtarget>(F) |
320 | .hasSoftFloat()) |
321 | return false; |
322 | |
323 | ConvertedInsts.clear(); |
324 | LogicOpsWorklist.clear(); |
325 | PossibleJunk.clear(); |
326 | |
327 | // Look for icmp+fcmp instructions. |
328 | for (auto &I : instructions(F)) { |
329 | if (I.getOpcode() == Instruction::FCmp) |
330 | convertFCmp(I&: cast<CmpInst>(Val&: I)); |
331 | else if (I.getOpcode() == Instruction::ICmp) |
332 | convertICmp(I&: cast<CmpInst>(Val&: I)); |
333 | } |
334 | |
335 | // If none found, bail already. |
336 | if (ConvertedInsts.empty()) |
337 | return false; |
338 | |
339 | // Process the queue of logic instructions. |
340 | while (!LogicOpsWorklist.empty()) { |
341 | BinaryOperator *Op = LogicOpsWorklist.back(); |
342 | LogicOpsWorklist.pop_back(); |
343 | // If both operands mapped, and the instruction itself not yet mapped, |
344 | // convert it. |
345 | if (ConvertedInsts.count(Key: dyn_cast<Instruction>(Val: Op->getOperand(i_nocapture: 0))) && |
346 | ConvertedInsts.count(Key: dyn_cast<Instruction>(Val: Op->getOperand(i_nocapture: 1))) && |
347 | !ConvertedInsts.count(Key: Op)) |
348 | convertLogicOp(I&: *Op); |
349 | } |
350 | |
351 | // Time to actually replace the instructions. Do it in the reverse order |
352 | // of finding them, since there's a good chance the earlier ones will be |
353 | // unused (due to being folded into later ones). |
354 | Module &M = *F.getParent(); |
355 | auto &Ctx = M.getContext(); |
356 | Value *Zero32 = ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx), V: 0); |
357 | bool MadeChange = false; |
358 | for (auto &It : reverse(C&: ConvertedInsts)) { |
359 | Instruction *I = It.first; |
360 | Value *V; |
361 | int Mask; |
362 | bool Worthy; |
363 | std::tie(args&: V, args&: Mask, args&: Worthy) = It.second; |
364 | if (!I->user_empty()) { |
365 | // If used and unworthy of conversion, skip it. |
366 | if (!Worthy) |
367 | continue; |
368 | // Call the intrinsic, compare result with 0. |
369 | Function *TDCFunc = |
370 | Intrinsic::getDeclaration(M: &M, id: Intrinsic::s390_tdc, Tys: V->getType()); |
371 | IRBuilder<> IRB(I); |
372 | Value *MaskVal = ConstantInt::get(Ty: Type::getInt64Ty(C&: Ctx), V: Mask); |
373 | Instruction *TDC = IRB.CreateCall(Callee: TDCFunc, Args: {V, MaskVal}); |
374 | Value *ICmp = IRB.CreateICmp(P: CmpInst::ICMP_NE, LHS: TDC, RHS: Zero32); |
375 | I->replaceAllUsesWith(V: ICmp); |
376 | } |
377 | // If unused, or used and converted, remove it. |
378 | I->eraseFromParent(); |
379 | MadeChange = true; |
380 | } |
381 | |
382 | if (!MadeChange) |
383 | return false; |
384 | |
385 | // We've actually done something - now clear misc accumulated junk (fabs, |
386 | // bitcast). |
387 | for (auto *I : PossibleJunk) |
388 | if (I->user_empty()) |
389 | I->eraseFromParent(); |
390 | |
391 | return true; |
392 | } |
393 | |