1 | //===- PoisonChecking.cpp - -----------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Implements a transform pass which instruments IR such that poison semantics |
10 | // are made explicit. That is, it provides a (possibly partial) executable |
11 | // semantics for every instruction w.r.t. poison as specified in the LLVM |
12 | // LangRef. There are obvious parallels to the sanitizer tools, but this pass |
13 | // is focused purely on the semantics of LLVM IR, not any particular source |
14 | // language. If you're looking for something to see if your C/C++ contains |
15 | // UB, this is not it. |
16 | // |
17 | // The rewritten semantics of each instruction will include the following |
18 | // components: |
19 | // |
20 | // 1) The original instruction, unmodified. |
21 | // 2) A propagation rule which translates dynamic information about the poison |
22 | // state of each input to whether the dynamic output of the instruction |
23 | // produces poison. |
24 | // 3) A creation rule which validates any poison producing flags on the |
25 | // instruction itself (e.g. checks for overflow on nsw). |
26 | // 4) A check rule which traps (to a handler function) if this instruction must |
27 | // execute undefined behavior given the poison state of it's inputs. |
28 | // |
29 | // This is a must analysis based transform; that is, the resulting code may |
30 | // produce a false negative result (not report UB when actually exists |
31 | // according to the LangRef spec), but should never produce a false positive |
32 | // (report UB where it doesn't exist). |
33 | // |
34 | // Use cases for this pass include: |
35 | // - Understanding (and testing!) the implications of the definition of poison |
36 | // from the LangRef. |
37 | // - Validating the output of a IR fuzzer to ensure that all programs produced |
38 | // are well defined on the specific input used. |
39 | // - Finding/confirming poison specific miscompiles by checking the poison |
40 | // status of an input/IR pair is the same before and after an optimization |
41 | // transform. |
42 | // - Checking that a bugpoint reduction does not introduce UB which didn't |
43 | // exist in the original program being reduced. |
44 | // |
45 | // The major sources of inaccuracy are currently: |
46 | // - Most validation rules not yet implemented for instructions with poison |
47 | // relavant flags. At the moment, only nsw/nuw on add/sub are supported. |
48 | // - UB which is control dependent on a branch on poison is not yet |
49 | // reported. Currently, only data flow dependence is modeled. |
50 | // - Poison which is propagated through memory is not modeled. As such, |
51 | // storing poison to memory and then reloading it will cause a false negative |
52 | // as we consider the reloaded value to not be poisoned. |
53 | // - Poison propagation across function boundaries is not modeled. At the |
54 | // moment, all arguments and return values are assumed not to be poison. |
55 | // - Undef is not modeled. In particular, the optimizer's freedom to pick |
56 | // concrete values for undef bits so as to maximize potential for producing |
57 | // poison is not modeled. |
58 | // |
59 | //===----------------------------------------------------------------------===// |
60 | |
61 | #include "llvm/Transforms/Instrumentation/PoisonChecking.h" |
62 | #include "llvm/ADT/DenseMap.h" |
63 | #include "llvm/Analysis/ValueTracking.h" |
64 | #include "llvm/IR/IRBuilder.h" |
65 | #include "llvm/IR/Module.h" |
66 | #include "llvm/Support/CommandLine.h" |
67 | |
68 | using namespace llvm; |
69 | |
70 | #define DEBUG_TYPE "poison-checking" |
71 | |
72 | static cl::opt<bool> |
73 | LocalCheck("poison-checking-function-local" , |
74 | cl::init(Val: false), |
75 | cl::desc("Check that returns are non-poison (for testing)" )); |
76 | |
77 | |
78 | static bool isConstantFalse(Value* V) { |
79 | assert(V->getType()->isIntegerTy(1)); |
80 | if (auto *CI = dyn_cast<ConstantInt>(Val: V)) |
81 | return CI->isZero(); |
82 | return false; |
83 | } |
84 | |
85 | static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) { |
86 | if (Ops.size() == 0) |
87 | return B.getFalse(); |
88 | unsigned i = 0; |
89 | for (; i < Ops.size() && isConstantFalse(V: Ops[i]); i++) {} |
90 | if (i == Ops.size()) |
91 | return B.getFalse(); |
92 | Value *Accum = Ops[i++]; |
93 | for (Value *Op : llvm::drop_begin(RangeOrContainer&: Ops, N: i)) |
94 | if (!isConstantFalse(V: Op)) |
95 | Accum = B.CreateOr(LHS: Accum, RHS: Op); |
96 | return Accum; |
97 | } |
98 | |
99 | static void generateCreationChecksForBinOp(Instruction &I, |
100 | SmallVectorImpl<Value*> &Checks) { |
101 | assert(isa<BinaryOperator>(I)); |
102 | |
103 | IRBuilder<> B(&I); |
104 | Value *LHS = I.getOperand(i: 0); |
105 | Value *RHS = I.getOperand(i: 1); |
106 | switch (I.getOpcode()) { |
107 | default: |
108 | return; |
109 | case Instruction::Add: { |
110 | if (I.hasNoSignedWrap()) { |
111 | auto *OverflowOp = |
112 | B.CreateBinaryIntrinsic(ID: Intrinsic::sadd_with_overflow, LHS, RHS); |
113 | Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: 1)); |
114 | } |
115 | if (I.hasNoUnsignedWrap()) { |
116 | auto *OverflowOp = |
117 | B.CreateBinaryIntrinsic(ID: Intrinsic::uadd_with_overflow, LHS, RHS); |
118 | Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: 1)); |
119 | } |
120 | break; |
121 | } |
122 | case Instruction::Sub: { |
123 | if (I.hasNoSignedWrap()) { |
124 | auto *OverflowOp = |
125 | B.CreateBinaryIntrinsic(ID: Intrinsic::ssub_with_overflow, LHS, RHS); |
126 | Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: 1)); |
127 | } |
128 | if (I.hasNoUnsignedWrap()) { |
129 | auto *OverflowOp = |
130 | B.CreateBinaryIntrinsic(ID: Intrinsic::usub_with_overflow, LHS, RHS); |
131 | Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: 1)); |
132 | } |
133 | break; |
134 | } |
135 | case Instruction::Mul: { |
136 | if (I.hasNoSignedWrap()) { |
137 | auto *OverflowOp = |
138 | B.CreateBinaryIntrinsic(ID: Intrinsic::smul_with_overflow, LHS, RHS); |
139 | Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: 1)); |
140 | } |
141 | if (I.hasNoUnsignedWrap()) { |
142 | auto *OverflowOp = |
143 | B.CreateBinaryIntrinsic(ID: Intrinsic::umul_with_overflow, LHS, RHS); |
144 | Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: 1)); |
145 | } |
146 | break; |
147 | } |
148 | case Instruction::UDiv: { |
149 | if (I.isExact()) { |
150 | auto *Check = |
151 | B.CreateICmp(P: ICmpInst::ICMP_NE, LHS: B.CreateURem(LHS, RHS), |
152 | RHS: ConstantInt::get(Ty: LHS->getType(), V: 0)); |
153 | Checks.push_back(Elt: Check); |
154 | } |
155 | break; |
156 | } |
157 | case Instruction::SDiv: { |
158 | if (I.isExact()) { |
159 | auto *Check = |
160 | B.CreateICmp(P: ICmpInst::ICMP_NE, LHS: B.CreateSRem(LHS, RHS), |
161 | RHS: ConstantInt::get(Ty: LHS->getType(), V: 0)); |
162 | Checks.push_back(Elt: Check); |
163 | } |
164 | break; |
165 | } |
166 | case Instruction::AShr: |
167 | case Instruction::LShr: |
168 | case Instruction::Shl: { |
169 | Value *ShiftCheck = |
170 | B.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: RHS, |
171 | RHS: ConstantInt::get(Ty: RHS->getType(), |
172 | V: LHS->getType()->getScalarSizeInBits())); |
173 | Checks.push_back(Elt: ShiftCheck); |
174 | break; |
175 | } |
176 | }; |
177 | } |
178 | |
179 | /// Given an instruction which can produce poison on non-poison inputs |
180 | /// (i.e. canCreatePoison returns true), generate runtime checks to produce |
181 | /// boolean indicators of when poison would result. |
182 | static void generateCreationChecks(Instruction &I, |
183 | SmallVectorImpl<Value*> &Checks) { |
184 | IRBuilder<> B(&I); |
185 | if (isa<BinaryOperator>(Val: I) && !I.getType()->isVectorTy()) |
186 | generateCreationChecksForBinOp(I, Checks); |
187 | |
188 | // Handle non-binops separately |
189 | switch (I.getOpcode()) { |
190 | default: |
191 | // Note there are a couple of missing cases here, once implemented, this |
192 | // should become an llvm_unreachable. |
193 | break; |
194 | case Instruction::ExtractElement: { |
195 | Value *Vec = I.getOperand(i: 0); |
196 | auto *VecVTy = dyn_cast<FixedVectorType>(Val: Vec->getType()); |
197 | if (!VecVTy) |
198 | break; |
199 | Value *Idx = I.getOperand(i: 1); |
200 | unsigned NumElts = VecVTy->getNumElements(); |
201 | Value *Check = |
202 | B.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: Idx, |
203 | RHS: ConstantInt::get(Ty: Idx->getType(), V: NumElts)); |
204 | Checks.push_back(Elt: Check); |
205 | break; |
206 | } |
207 | case Instruction::InsertElement: { |
208 | Value *Vec = I.getOperand(i: 0); |
209 | auto *VecVTy = dyn_cast<FixedVectorType>(Val: Vec->getType()); |
210 | if (!VecVTy) |
211 | break; |
212 | Value *Idx = I.getOperand(i: 2); |
213 | unsigned NumElts = VecVTy->getNumElements(); |
214 | Value *Check = |
215 | B.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: Idx, |
216 | RHS: ConstantInt::get(Ty: Idx->getType(), V: NumElts)); |
217 | Checks.push_back(Elt: Check); |
218 | break; |
219 | } |
220 | }; |
221 | } |
222 | |
223 | static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) { |
224 | auto Itr = ValToPoison.find(Val: V); |
225 | if (Itr != ValToPoison.end()) |
226 | return Itr->second; |
227 | if (isa<Constant>(Val: V)) { |
228 | return ConstantInt::getFalse(Context&: V->getContext()); |
229 | } |
230 | // Return false for unknwon values - this implements a non-strict mode where |
231 | // unhandled IR constructs are simply considered to never produce poison. At |
232 | // some point in the future, we probably want a "strict mode" for testing if |
233 | // nothing else. |
234 | return ConstantInt::getFalse(Context&: V->getContext()); |
235 | } |
236 | |
237 | static void CreateAssert(IRBuilder<> &B, Value *Cond) { |
238 | assert(Cond->getType()->isIntegerTy(1)); |
239 | if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) |
240 | if (CI->isAllOnesValue()) |
241 | return; |
242 | |
243 | Module *M = B.GetInsertBlock()->getModule(); |
244 | M->getOrInsertFunction(Name: "__poison_checker_assert" , |
245 | RetTy: Type::getVoidTy(C&: M->getContext()), |
246 | Args: Type::getInt1Ty(C&: M->getContext())); |
247 | Function *TrapFunc = M->getFunction(Name: "__poison_checker_assert" ); |
248 | B.CreateCall(Callee: TrapFunc, Args: Cond); |
249 | } |
250 | |
251 | static void CreateAssertNot(IRBuilder<> &B, Value *Cond) { |
252 | assert(Cond->getType()->isIntegerTy(1)); |
253 | CreateAssert(B, Cond: B.CreateNot(V: Cond)); |
254 | } |
255 | |
256 | static bool rewrite(Function &F) { |
257 | auto * const Int1Ty = Type::getInt1Ty(C&: F.getContext()); |
258 | |
259 | DenseMap<Value *, Value *> ValToPoison; |
260 | |
261 | for (BasicBlock &BB : F) |
262 | for (auto I = BB.begin(); isa<PHINode>(Val: &*I); I++) { |
263 | auto *OldPHI = cast<PHINode>(Val: &*I); |
264 | auto *NewPHI = PHINode::Create(Ty: Int1Ty, NumReservedValues: OldPHI->getNumIncomingValues()); |
265 | for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) |
266 | NewPHI->addIncoming(V: UndefValue::get(T: Int1Ty), |
267 | BB: OldPHI->getIncomingBlock(i)); |
268 | NewPHI->insertBefore(InsertPos: OldPHI); |
269 | ValToPoison[OldPHI] = NewPHI; |
270 | } |
271 | |
272 | for (BasicBlock &BB : F) |
273 | for (Instruction &I : BB) { |
274 | if (isa<PHINode>(Val: I)) continue; |
275 | |
276 | IRBuilder<> B(cast<Instruction>(Val: &I)); |
277 | |
278 | // Note: There are many more sources of documented UB, but this pass only |
279 | // attempts to find UB triggered by propagation of poison. |
280 | SmallVector<const Value *, 4> NonPoisonOps; |
281 | SmallPtrSet<const Value *, 4> SeenNonPoisonOps; |
282 | getGuaranteedNonPoisonOps(I: &I, Ops&: NonPoisonOps); |
283 | for (const Value *Op : NonPoisonOps) |
284 | if (SeenNonPoisonOps.insert(Ptr: Op).second) |
285 | CreateAssertNot(B, |
286 | Cond: getPoisonFor(ValToPoison, V: const_cast<Value *>(Op))); |
287 | |
288 | if (LocalCheck) |
289 | if (auto *RI = dyn_cast<ReturnInst>(Val: &I)) |
290 | if (RI->getNumOperands() != 0) { |
291 | Value *Op = RI->getOperand(i_nocapture: 0); |
292 | CreateAssertNot(B, Cond: getPoisonFor(ValToPoison, V: Op)); |
293 | } |
294 | |
295 | SmallVector<Value*, 4> Checks; |
296 | for (const Use &U : I.operands()) { |
297 | if (ValToPoison.count(Val: U) && propagatesPoison(PoisonOp: U)) |
298 | Checks.push_back(Elt: getPoisonFor(ValToPoison, V: U)); |
299 | } |
300 | |
301 | if (canCreatePoison(Op: cast<Operator>(Val: &I))) |
302 | generateCreationChecks(I, Checks); |
303 | ValToPoison[&I] = buildOrChain(B, Ops: Checks); |
304 | } |
305 | |
306 | for (BasicBlock &BB : F) |
307 | for (auto I = BB.begin(); isa<PHINode>(Val: &*I); I++) { |
308 | auto *OldPHI = cast<PHINode>(Val: &*I); |
309 | if (!ValToPoison.count(Val: OldPHI)) |
310 | continue; // skip the newly inserted phis |
311 | auto *NewPHI = cast<PHINode>(Val: ValToPoison[OldPHI]); |
312 | for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) { |
313 | auto *OldVal = OldPHI->getIncomingValue(i); |
314 | NewPHI->setIncomingValue(i, V: getPoisonFor(ValToPoison, V: OldVal)); |
315 | } |
316 | } |
317 | return true; |
318 | } |
319 | |
320 | |
321 | PreservedAnalyses PoisonCheckingPass::run(Module &M, |
322 | ModuleAnalysisManager &AM) { |
323 | bool Changed = false; |
324 | for (auto &F : M) |
325 | Changed |= rewrite(F); |
326 | |
327 | return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); |
328 | } |
329 | |
330 | PreservedAnalyses PoisonCheckingPass::run(Function &F, |
331 | FunctionAnalysisManager &AM) { |
332 | return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all(); |
333 | } |
334 | |
335 | /* Major TODO Items: |
336 | - Control dependent poison UB |
337 | - Strict mode - (i.e. must analyze every operand) |
338 | - Poison through memory |
339 | - Function ABIs |
340 | - Full coverage of intrinsics, etc.. (ouch) |
341 | |
342 | Instructions w/Unclear Semantics: |
343 | - shufflevector - It would seem reasonable for an out of bounds mask element |
344 | to produce poison, but the LangRef does not state. |
345 | - all binary ops w/vector operands - The likely interpretation would be that |
346 | any element overflowing should produce poison for the entire result, but |
347 | the LangRef does not state. |
348 | - Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems |
349 | strange that only certian flags should be documented as producing poison. |
350 | |
351 | Cases of clear poison semantics not yet implemented: |
352 | - Exact flags on ashr/lshr produce poison |
353 | - NSW/NUW flags on shl produce poison |
354 | - Inbounds flag on getelementptr produce poison |
355 | - fptosi/fptoui (out of bounds input) produce poison |
356 | - Scalable vector types for insertelement/extractelement |
357 | - Floating point binary ops w/fmf nnan/noinfs flags produce poison |
358 | */ |
359 | |