1//===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass looks for instructions that can be replaced by a Test Data Class
10// instruction, and replaces them when profitable.
11//
12// Roughly, the following rules are recognized:
13//
14// 1: fcmp pred X, 0 -> tdc X, mask
15// 2: fcmp pred X, +-inf -> tdc X, mask
16// 3: fcmp pred X, +-minnorm -> tdc X, mask
17// 4: tdc (fabs X), mask -> tdc X, newmask
18// 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit]
19// 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask
20// 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask
21// 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2)
22// 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2)
23// 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2)
24//
25// The pass works in 4 steps:
26//
27// 1. All fcmp and icmp instructions in a function are checked for a match
28// with rules 1-3 and 5-7. Their TDC equivalents are stored in
29// the ConvertedInsts mapping. If the operand of a fcmp instruction is
30// a fabs, it's also folded according to rule 4.
31// 2. All and/or/xor i1 instructions whose both operands have been already
32// mapped are mapped according to rules 8-10. LogicOpsWorklist is used
33// as a queue of instructions to check.
34// 3. All mapped instructions that are considered worthy of conversion (ie.
35// replacing them will actually simplify the final code) are replaced
36// with a call to the s390.tdc intrinsic.
37// 4. All intermediate results of replaced instructions are removed if unused.
38//
39// Instructions that match rules 1-3 are considered unworthy of conversion
40// on their own (since a comparison instruction is superior), but are mapped
41// in the hopes of folding the result using rules 4 and 8-10 (likely removing
42// the original comparison in the process).
43//
44//===----------------------------------------------------------------------===//
45
46#include "SystemZ.h"
47#include "SystemZSubtarget.h"
48#include "llvm/ADT/MapVector.h"
49#include "llvm/CodeGen/TargetPassConfig.h"
50#include "llvm/IR/Constants.h"
51#include "llvm/IR/IRBuilder.h"
52#include "llvm/IR/InstIterator.h"
53#include "llvm/IR/Instructions.h"
54#include "llvm/IR/IntrinsicInst.h"
55#include "llvm/IR/IntrinsicsS390.h"
56#include "llvm/IR/LegacyPassManager.h"
57#include "llvm/IR/Module.h"
58#include "llvm/Target/TargetMachine.h"
59#include <deque>
60#include <set>
61
62using namespace llvm;
63
64namespace {
65
66class SystemZTDCPass : public FunctionPass {
67public:
68 static char ID;
69 SystemZTDCPass() : FunctionPass(ID) {
70 initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry());
71 }
72
73 bool runOnFunction(Function &F) override;
74
75 void getAnalysisUsage(AnalysisUsage &AU) const override {
76 AU.addRequired<TargetPassConfig>();
77 }
78
79private:
80 // Maps seen instructions that can be mapped to a TDC, values are
81 // (TDC operand, TDC mask, worthy flag) triples.
82 MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts;
83 // The queue of and/or/xor i1 instructions to be potentially folded.
84 std::vector<BinaryOperator *> LogicOpsWorklist;
85 // Instructions matched while folding, to be removed at the end if unused.
86 std::set<Instruction *> PossibleJunk;
87
88 // Tries to convert a fcmp instruction.
89 void convertFCmp(CmpInst &I);
90
91 // Tries to convert an icmp instruction.
92 void convertICmp(CmpInst &I);
93
94 // Tries to convert an i1 and/or/xor instruction, whose both operands
95 // have been already converted.
96 void convertLogicOp(BinaryOperator &I);
97
98 // Marks an instruction as converted - adds it to ConvertedInsts and adds
99 // any and/or/xor i1 users to the queue.
100 void converted(Instruction *I, Value *V, int Mask, bool Worthy) {
101 ConvertedInsts[I] = std::make_tuple(args&: V, args&: Mask, args&: Worthy);
102 auto &M = *I->getFunction()->getParent();
103 auto &Ctx = M.getContext();
104 for (auto *U : I->users()) {
105 auto *LI = dyn_cast<BinaryOperator>(Val: U);
106 if (LI && LI->getType() == Type::getInt1Ty(C&: Ctx) &&
107 (LI->getOpcode() == Instruction::And ||
108 LI->getOpcode() == Instruction::Or ||
109 LI->getOpcode() == Instruction::Xor)) {
110 LogicOpsWorklist.push_back(x: LI);
111 }
112 }
113 }
114};
115
116} // end anonymous namespace
117
118char SystemZTDCPass::ID = 0;
119INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc",
120 "SystemZ Test Data Class optimization", false, false)
121
122FunctionPass *llvm::createSystemZTDCPass() {
123 return new SystemZTDCPass();
124}
125
126void SystemZTDCPass::convertFCmp(CmpInst &I) {
127 Value *Op0 = I.getOperand(i_nocapture: 0);
128 auto *Const = dyn_cast<ConstantFP>(Val: I.getOperand(i_nocapture: 1));
129 auto Pred = I.getPredicate();
130 // Only comparisons with consts are interesting.
131 if (!Const)
132 return;
133 // Compute the smallest normal number (and its negation).
134 auto &Sem = Op0->getType()->getFltSemantics();
135 APFloat Smallest = APFloat::getSmallestNormalized(Sem);
136 APFloat NegSmallest = Smallest;
137 NegSmallest.changeSign();
138 // Check if Const is one of our recognized consts.
139 int WhichConst;
140 if (Const->isZero()) {
141 // All comparisons with 0 can be converted.
142 WhichConst = 0;
143 } else if (Const->isInfinity()) {
144 // Likewise for infinities.
145 WhichConst = Const->isNegative() ? 2 : 1;
146 } else if (Const->isExactlyValue(V: Smallest)) {
147 // For Smallest, we cannot do EQ separately from GT.
148 if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE &&
149 (Pred & CmpInst::FCMP_OGE) != 0)
150 return;
151 WhichConst = 3;
152 } else if (Const->isExactlyValue(V: NegSmallest)) {
153 // Likewise for NegSmallest, we cannot do EQ separately from LT.
154 if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE &&
155 (Pred & CmpInst::FCMP_OLE) != 0)
156 return;
157 WhichConst = 4;
158 } else {
159 // Not one of our special constants.
160 return;
161 }
162 // Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
163 static const int Masks[][4] = {
164 { // 0
165 SystemZ::TDCMASK_ZERO, // eq
166 SystemZ::TDCMASK_POSITIVE, // gt
167 SystemZ::TDCMASK_NEGATIVE, // lt
168 SystemZ::TDCMASK_NAN, // un
169 },
170 { // inf
171 SystemZ::TDCMASK_INFINITY_PLUS, // eq
172 0, // gt
173 (SystemZ::TDCMASK_ZERO |
174 SystemZ::TDCMASK_NEGATIVE |
175 SystemZ::TDCMASK_NORMAL_PLUS |
176 SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt
177 SystemZ::TDCMASK_NAN, // un
178 },
179 { // -inf
180 SystemZ::TDCMASK_INFINITY_MINUS, // eq
181 (SystemZ::TDCMASK_ZERO |
182 SystemZ::TDCMASK_POSITIVE |
183 SystemZ::TDCMASK_NORMAL_MINUS |
184 SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
185 0, // lt
186 SystemZ::TDCMASK_NAN, // un
187 },
188 { // minnorm
189 0, // eq (unsupported)
190 (SystemZ::TDCMASK_NORMAL_PLUS |
191 SystemZ::TDCMASK_INFINITY_PLUS), // gt (actually ge)
192 (SystemZ::TDCMASK_ZERO |
193 SystemZ::TDCMASK_NEGATIVE |
194 SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt
195 SystemZ::TDCMASK_NAN, // un
196 },
197 { // -minnorm
198 0, // eq (unsupported)
199 (SystemZ::TDCMASK_ZERO |
200 SystemZ::TDCMASK_POSITIVE |
201 SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt
202 (SystemZ::TDCMASK_NORMAL_MINUS |
203 SystemZ::TDCMASK_INFINITY_MINUS), // lt (actually le)
204 SystemZ::TDCMASK_NAN, // un
205 }
206 };
207 // Construct the mask as a combination of the partial masks.
208 int Mask = 0;
209 if (Pred & CmpInst::FCMP_OEQ)
210 Mask |= Masks[WhichConst][0];
211 if (Pred & CmpInst::FCMP_OGT)
212 Mask |= Masks[WhichConst][1];
213 if (Pred & CmpInst::FCMP_OLT)
214 Mask |= Masks[WhichConst][2];
215 if (Pred & CmpInst::FCMP_UNO)
216 Mask |= Masks[WhichConst][3];
217 // A lone fcmp is unworthy of tdc conversion on its own, but may become
218 // worthy if combined with fabs.
219 bool Worthy = false;
220 if (CallInst *CI = dyn_cast<CallInst>(Val: Op0)) {
221 Function *F = CI->getCalledFunction();
222 if (F && F->getIntrinsicID() == Intrinsic::fabs) {
223 // Fold with fabs - adjust the mask appropriately.
224 Mask &= SystemZ::TDCMASK_PLUS;
225 Mask |= Mask >> 1;
226 Op0 = CI->getArgOperand(i: 0);
227 // A combination of fcmp with fabs is a win, unless the constant
228 // involved is 0 (which is handled by later passes).
229 Worthy = WhichConst != 0;
230 PossibleJunk.insert(x: CI);
231 }
232 }
233 converted(I: &I, V: Op0, Mask, Worthy);
234}
235
236void SystemZTDCPass::convertICmp(CmpInst &I) {
237 Value *Op0 = I.getOperand(i_nocapture: 0);
238 auto *Const = dyn_cast<ConstantInt>(Val: I.getOperand(i_nocapture: 1));
239 auto Pred = I.getPredicate();
240 // All our icmp rules involve comparisons with consts.
241 if (!Const)
242 return;
243 if (auto *Cast = dyn_cast<BitCastInst>(Val: Op0)) {
244 // Check for icmp+bitcast used for signbit.
245 if (!Cast->getSrcTy()->isFloatTy() &&
246 !Cast->getSrcTy()->isDoubleTy() &&
247 !Cast->getSrcTy()->isFP128Ty())
248 return;
249 Value *V = Cast->getOperand(i_nocapture: 0);
250 int Mask;
251 if (Pred == CmpInst::ICMP_SLT && Const->isZero()) {
252 // icmp slt (bitcast X), 0 - set if sign bit true
253 Mask = SystemZ::TDCMASK_MINUS;
254 } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) {
255 // icmp sgt (bitcast X), -1 - set if sign bit false
256 Mask = SystemZ::TDCMASK_PLUS;
257 } else {
258 // Not a sign bit check.
259 return;
260 }
261 PossibleJunk.insert(x: Cast);
262 converted(I: &I, V, Mask, Worthy: true);
263 } else if (auto *CI = dyn_cast<CallInst>(Val: Op0)) {
264 // Check if this is a pre-existing call of our tdc intrinsic.
265 Function *F = CI->getCalledFunction();
266 if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc)
267 return;
268 if (!Const->isZero())
269 return;
270 Value *V = CI->getArgOperand(i: 0);
271 auto *MaskC = dyn_cast<ConstantInt>(Val: CI->getArgOperand(i: 1));
272 // Bail if the mask is not a constant.
273 if (!MaskC)
274 return;
275 int Mask = MaskC->getZExtValue();
276 Mask &= SystemZ::TDCMASK_ALL;
277 if (Pred == CmpInst::ICMP_NE) {
278 // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC
279 } else if (Pred == CmpInst::ICMP_EQ) {
280 // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask
281 Mask ^= SystemZ::TDCMASK_ALL;
282 } else {
283 // An unknown comparison - ignore.
284 return;
285 }
286 PossibleJunk.insert(x: CI);
287 converted(I: &I, V, Mask, Worthy: false);
288 }
289}
290
291void SystemZTDCPass::convertLogicOp(BinaryOperator &I) {
292 Value *Op0, *Op1;
293 int Mask0, Mask1;
294 bool Worthy0, Worthy1;
295 std::tie(args&: Op0, args&: Mask0, args&: Worthy0) = ConvertedInsts[cast<Instruction>(Val: I.getOperand(i_nocapture: 0))];
296 std::tie(args&: Op1, args&: Mask1, args&: Worthy1) = ConvertedInsts[cast<Instruction>(Val: I.getOperand(i_nocapture: 1))];
297 if (Op0 != Op1)
298 return;
299 int Mask;
300 switch (I.getOpcode()) {
301 case Instruction::And:
302 Mask = Mask0 & Mask1;
303 break;
304 case Instruction::Or:
305 Mask = Mask0 | Mask1;
306 break;
307 case Instruction::Xor:
308 Mask = Mask0 ^ Mask1;
309 break;
310 default:
311 llvm_unreachable("Unknown op in convertLogicOp");
312 }
313 converted(I: &I, V: Op0, Mask, Worthy: true);
314}
315
316bool SystemZTDCPass::runOnFunction(Function &F) {
317 auto &TPC = getAnalysis<TargetPassConfig>();
318 if (TPC.getTM<TargetMachine>()
319 .getSubtarget<SystemZSubtarget>(F)
320 .hasSoftFloat())
321 return false;
322
323 ConvertedInsts.clear();
324 LogicOpsWorklist.clear();
325 PossibleJunk.clear();
326
327 // Look for icmp+fcmp instructions.
328 for (auto &I : instructions(F)) {
329 if (I.getOpcode() == Instruction::FCmp)
330 convertFCmp(I&: cast<CmpInst>(Val&: I));
331 else if (I.getOpcode() == Instruction::ICmp)
332 convertICmp(I&: cast<CmpInst>(Val&: I));
333 }
334
335 // If none found, bail already.
336 if (ConvertedInsts.empty())
337 return false;
338
339 // Process the queue of logic instructions.
340 while (!LogicOpsWorklist.empty()) {
341 BinaryOperator *Op = LogicOpsWorklist.back();
342 LogicOpsWorklist.pop_back();
343 // If both operands mapped, and the instruction itself not yet mapped,
344 // convert it.
345 if (ConvertedInsts.count(Key: dyn_cast<Instruction>(Val: Op->getOperand(i_nocapture: 0))) &&
346 ConvertedInsts.count(Key: dyn_cast<Instruction>(Val: Op->getOperand(i_nocapture: 1))) &&
347 !ConvertedInsts.count(Key: Op))
348 convertLogicOp(I&: *Op);
349 }
350
351 // Time to actually replace the instructions. Do it in the reverse order
352 // of finding them, since there's a good chance the earlier ones will be
353 // unused (due to being folded into later ones).
354 Module &M = *F.getParent();
355 auto &Ctx = M.getContext();
356 Value *Zero32 = ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx), V: 0);
357 bool MadeChange = false;
358 for (auto &It : reverse(C&: ConvertedInsts)) {
359 Instruction *I = It.first;
360 Value *V;
361 int Mask;
362 bool Worthy;
363 std::tie(args&: V, args&: Mask, args&: Worthy) = It.second;
364 if (!I->user_empty()) {
365 // If used and unworthy of conversion, skip it.
366 if (!Worthy)
367 continue;
368 // Call the intrinsic, compare result with 0.
369 Function *TDCFunc =
370 Intrinsic::getDeclaration(M: &M, id: Intrinsic::s390_tdc, Tys: V->getType());
371 IRBuilder<> IRB(I);
372 Value *MaskVal = ConstantInt::get(Ty: Type::getInt64Ty(C&: Ctx), V: Mask);
373 Instruction *TDC = IRB.CreateCall(Callee: TDCFunc, Args: {V, MaskVal});
374 Value *ICmp = IRB.CreateICmp(P: CmpInst::ICMP_NE, LHS: TDC, RHS: Zero32);
375 I->replaceAllUsesWith(V: ICmp);
376 }
377 // If unused, or used and converted, remove it.
378 I->eraseFromParent();
379 MadeChange = true;
380 }
381
382 if (!MadeChange)
383 return false;
384
385 // We've actually done something - now clear misc accumulated junk (fabs,
386 // bitcast).
387 for (auto *I : PossibleJunk)
388 if (I->user_empty())
389 I->eraseFromParent();
390
391 return true;
392}
393