PoisonChecking.cpp source code [llvm_projects/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp]

1	//===- PoisonChecking.cpp - -----------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Implements a transform pass which instruments IR such that poison semantics
10	// are made explicit. That is, it provides a (possibly partial) executable
11	// semantics for every instruction w.r.t. poison as specified in the LLVM
12	// LangRef. There are obvious parallels to the sanitizer tools, but this pass
13	// is focused purely on the semantics of LLVM IR, not any particular source
14	// language. If you're looking for something to see if your C/C++ contains
15	// UB, this is not it.
16	//
17	// The rewritten semantics of each instruction will include the following
18	// components:
19	//
20	// 1) The original instruction, unmodified.
21	// 2) A propagation rule which translates dynamic information about the poison
22	// state of each input to whether the dynamic output of the instruction
23	// produces poison.
24	// 3) A creation rule which validates any poison producing flags on the
25	// instruction itself (e.g. checks for overflow on nsw).
26	// 4) A check rule which traps (to a handler function) if this instruction must
27	// execute undefined behavior given the poison state of it's inputs.
28	//
29	// This is a must analysis based transform; that is, the resulting code may
30	// produce a false negative result (not report UB when actually exists
31	// according to the LangRef spec), but should never produce a false positive
32	// (report UB where it doesn't exist).
33	//
34	// Use cases for this pass include:
35	// - Understanding (and testing!) the implications of the definition of poison
36	// from the LangRef.
37	// - Validating the output of a IR fuzzer to ensure that all programs produced
38	// are well defined on the specific input used.
39	// - Finding/confirming poison specific miscompiles by checking the poison
40	// status of an input/IR pair is the same before and after an optimization
41	// transform.
42	// - Checking that a bugpoint reduction does not introduce UB which didn't
43	// exist in the original program being reduced.
44	//
45	// The major sources of inaccuracy are currently:
46	// - Most validation rules not yet implemented for instructions with poison
47	// relavant flags. At the moment, only nsw/nuw on add/sub are supported.
48	// - UB which is control dependent on a branch on poison is not yet
49	// reported. Currently, only data flow dependence is modeled.
50	// - Poison which is propagated through memory is not modeled. As such,
51	// storing poison to memory and then reloading it will cause a false negative
52	// as we consider the reloaded value to not be poisoned.
53	// - Poison propagation across function boundaries is not modeled. At the
54	// moment, all arguments and return values are assumed not to be poison.
55	// - Undef is not modeled. In particular, the optimizer's freedom to pick
56	// concrete values for undef bits so as to maximize potential for producing
57	// poison is not modeled.
58	//
59	//===----------------------------------------------------------------------===//
60
61	#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
62	#include "llvm/ADT/DenseMap.h"
63	#include "llvm/Analysis/ValueTracking.h"
64	#include "llvm/IR/IRBuilder.h"
65	#include "llvm/IR/Module.h"
66	#include "llvm/Support/CommandLine.h"
67
68	using namespace llvm;
69
70	#define DEBUG_TYPE "poison-checking"
71
72	static cl::opt<bool>
73	LocalCheck("poison-checking-function-local",
74	cl::init(Val: false),
75	cl::desc ("Check that returns are non-poison (for testing)"));
76
77
78	static bool isConstantFalse(Value* V) {
79	assert(V->getType()->isIntegerTy(`1`));
80	if (auto *CI = dyn_cast<ConstantInt>(Val: V))
81	return CI->isZero();
82	return false;
83	}
84
85	static Value buildOrChain(IRBuilder<> &B, ArrayRef<Value> Ops) {
86	if (Ops.size() == `0`)
87	return B.getFalse();
88	unsigned i = `0`;
89	for (; i < Ops.size() && isConstantFalse(V: Ops [i]); i++) {}
90	if (i == Ops.size())
91	return B.getFalse();
92	Value *Accum = Ops [i++];
93	for (Value *Op : llvm::drop_begin(RangeOrContainer&: Ops, N: i))
94	if (!isConstantFalse(V: Op))
95	Accum = B.CreateOr(LHS: Accum, RHS: Op);
96	return Accum;
97	}
98
99	static void generateCreationChecksForBinOp(Instruction &I,
100	SmallVectorImpl<Value*> &Checks) {
101	assert(isa<BinaryOperator>(I));
102
103	IRBuilder<> B(&I);
104	Value *LHS = I.getOperand(i: `0`);
105	Value *RHS = I.getOperand(i: `1`);
106	switch (I.getOpcode()) {
107	default:
108	return;
109	case Instruction::Add: {
110	if (I.hasNoSignedWrap()) {
111	auto *OverflowOp =
112	B.CreateBinaryIntrinsic(ID: Intrinsic::sadd_with_overflow, LHS, RHS);
113	Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: `1`));
114	}
115	if (I.hasNoUnsignedWrap()) {
116	auto *OverflowOp =
117	B.CreateBinaryIntrinsic(ID: Intrinsic::uadd_with_overflow, LHS, RHS);
118	Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: `1`));
119	}
120	break;
121	}
122	case Instruction::Sub: {
123	if (I.hasNoSignedWrap()) {
124	auto *OverflowOp =
125	B.CreateBinaryIntrinsic(ID: Intrinsic::ssub_with_overflow, LHS, RHS);
126	Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: `1`));
127	}
128	if (I.hasNoUnsignedWrap()) {
129	auto *OverflowOp =
130	B.CreateBinaryIntrinsic(ID: Intrinsic::usub_with_overflow, LHS, RHS);
131	Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: `1`));
132	}
133	break;
134	}
135	case Instruction::Mul: {
136	if (I.hasNoSignedWrap()) {
137	auto *OverflowOp =
138	B.CreateBinaryIntrinsic(ID: Intrinsic::smul_with_overflow, LHS, RHS);
139	Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: `1`));
140	}
141	if (I.hasNoUnsignedWrap()) {
142	auto *OverflowOp =
143	B.CreateBinaryIntrinsic(ID: Intrinsic::umul_with_overflow, LHS, RHS);
144	Checks.push_back(Elt: B.CreateExtractValue(Agg: OverflowOp, Idxs: `1`));
145	}
146	break;
147	}
148	case Instruction::UDiv: {
149	if (I.isExact()) {
150	auto *Check =
151	B.CreateICmp(P: ICmpInst::ICMP_NE, LHS: B.CreateURem(LHS, RHS),
152	RHS: ConstantInt::get(Ty: LHS->getType(), V: `0`));
153	Checks.push_back(Elt: Check);
154	}
155	break;
156	}
157	case Instruction::SDiv: {
158	if (I.isExact()) {
159	auto *Check =
160	B.CreateICmp(P: ICmpInst::ICMP_NE, LHS: B.CreateSRem(LHS, RHS),
161	RHS: ConstantInt::get(Ty: LHS->getType(), V: `0`));
162	Checks.push_back(Elt: Check);
163	}
164	break;
165	}
166	case Instruction::AShr:
167	case Instruction::LShr:
168	case Instruction::Shl: {
169	Value *ShiftCheck =
170	B.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: RHS,
171	RHS: ConstantInt::get(Ty: RHS->getType(),
172	V: LHS->getType()->getScalarSizeInBits()));
173	Checks.push_back(Elt: ShiftCheck);
174	break;
175	}
176	};
177	}
178
179	/// Given an instruction which can produce poison on non-poison inputs
180	/// (i.e. canCreatePoison returns true), generate runtime checks to produce
181	/// boolean indicators of when poison would result.
182	static void generateCreationChecks(Instruction &I,
183	SmallVectorImpl<Value*> &Checks) {
184	IRBuilder<> B(&I);
185	if (isa<BinaryOperator>(Val: I) && !I.getType()->isVectorTy())
186	generateCreationChecksForBinOp(I, Checks);
187
188	// Handle non-binops separately
189	switch (I.getOpcode()) {
190	default:
191	// Note there are a couple of missing cases here, once implemented, this
192	// should become an llvm_unreachable.
193	break;
194	case Instruction::ExtractElement: {
195	Value *Vec = I.getOperand(i: `0`);
196	auto *VecVTy = dyn_cast<FixedVectorType>(Val: Vec->getType());
197	if (!VecVTy)
198	break;
199	Value *Idx = I.getOperand(i: `1`);
200	unsigned NumElts = VecVTy->getNumElements();
201	Value *Check =
202	B.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: Idx,
203	RHS: ConstantInt::get(Ty: Idx->getType(), V: NumElts));
204	Checks.push_back(Elt: Check);
205	break;
206	}
207	case Instruction::InsertElement: {
208	Value *Vec = I.getOperand(i: `0`);
209	auto *VecVTy = dyn_cast<FixedVectorType>(Val: Vec->getType());
210	if (!VecVTy)
211	break;
212	Value *Idx = I.getOperand(i: `2`);
213	unsigned NumElts = VecVTy->getNumElements();
214	Value *Check =
215	B.CreateICmp(P: ICmpInst::ICMP_UGE, LHS: Idx,
216	RHS: ConstantInt::get(Ty: Idx->getType(), V: NumElts));
217	Checks.push_back(Elt: Check);
218	break;
219	}
220	};
221	}
222
223	static Value getPoisonFor(DenseMap<Value , Value > &ValToPoison, Value V) {
224	auto Itr = ValToPoison.find(Val: V);
225	if (Itr != ValToPoison.end())
226	return Itr ->second;
227	if (isa<Constant>(Val: V)) {
228	return ConstantInt::getFalse(Context&: V->getContext());
229	}
230	// Return false for unknwon values - this implements a non-strict mode where
231	// unhandled IR constructs are simply considered to never produce poison. At
232	// some point in the future, we probably want a "strict mode" for testing if
233	// nothing else.
234	return ConstantInt::getFalse(Context&: V->getContext());
235	}
236
237	static void CreateAssert(IRBuilder<> &B, Value *Cond) {
238	assert(Cond->getType()->isIntegerTy(`1`));
239	if (auto *CI = dyn_cast<ConstantInt>(Val: Cond))
240	if (CI->isAllOnesValue())
241	return;
242
243	Module *M = B.GetInsertBlock()->getModule();
244	M->getOrInsertFunction(Name: "__poison_checker_assert",
245	RetTy: Type::getVoidTy(C&: M->getContext()),
246	Args: Type::getInt1Ty(C&: M->getContext()));
247	Function *TrapFunc = M->getFunction(Name: "__poison_checker_assert");
248	B.CreateCall(Callee: TrapFunc, Args: Cond);
249	}
250
251	static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
252	assert(Cond->getType()->isIntegerTy(`1`));
253	CreateAssert(B, Cond: B.CreateNot(V: Cond));
254	}
255
256	static bool rewrite(Function &F) {
257	auto * const Int1Ty = Type::getInt1Ty(C&: F.getContext());
258
259	DenseMap<Value , Value > ValToPoison;
260
261	for (BasicBlock &BB : F)
262	for (auto I = BB.begin(); isa<PHINode>(Val: &*I); I ++) {
263	auto OldPHI = cast<PHINode>(Val: &I);
264	auto *NewPHI = PHINode::Create(Ty: Int1Ty, NumReservedValues: OldPHI->getNumIncomingValues());
265	for (unsigned i = `0`; i < OldPHI->getNumIncomingValues(); i++)
266	NewPHI->addIncoming(V: UndefValue::get(T: Int1Ty),
267	BB: OldPHI->getIncomingBlock(i));
268	NewPHI->insertBefore(InsertPos: OldPHI);
269	ValToPoison [OldPHI] = NewPHI;
270	}
271
272	for (BasicBlock &BB : F)
273	for (Instruction &I : BB) {
274	if (isa<PHINode>(Val: I)) continue;
275
276	IRBuilder<> B(cast<Instruction>(Val: &I));
277
278	// Note: There are many more sources of documented UB, but this pass only
279	// attempts to find UB triggered by propagation of poison.
280	SmallVector<const Value *, `4`> NonPoisonOps;
281	SmallPtrSet<const Value *, `4`> SeenNonPoisonOps;
282	getGuaranteedNonPoisonOps(I: &I, Ops&: NonPoisonOps);
283	for (const Value *Op : NonPoisonOps)
284	if (SeenNonPoisonOps.insert(Ptr: Op).second)
285	CreateAssertNot(B,
286	Cond: getPoisonFor(ValToPoison, V: const_cast<Value *>(Op)));
287
288	if (LocalCheck)
289	if (auto *RI = dyn_cast<ReturnInst>(Val: &I))
290	if (RI->getNumOperands() != `0`) {
291	Value *Op = RI->getOperand(i_nocapture: `0`);
292	CreateAssertNot(B, Cond: getPoisonFor(ValToPoison, V: Op));
293	}
294
295	SmallVector<Value*, `4`> Checks;
296	for (const Use &U : I.operands()) {
297	if (ValToPoison.count(Val: U) && propagatesPoison(PoisonOp: U))
298	Checks.push_back(Elt: getPoisonFor(ValToPoison, V: U));
299	}
300
301	if (canCreatePoison(Op: cast<Operator>(Val: &I)))
302	generateCreationChecks(I, Checks);
303	ValToPoison [&I] = buildOrChain(B, Ops: Checks);
304	}
305
306	for (BasicBlock &BB : F)
307	for (auto I = BB.begin(); isa<PHINode>(Val: &*I); I ++) {
308	auto OldPHI = cast<PHINode>(Val: &I);
309	if (!ValToPoison.count(Val: OldPHI))
310	continue; // skip the newly inserted phis
311	auto *NewPHI = cast<PHINode>(Val: ValToPoison [OldPHI]);
312	for (unsigned i = `0`; i < OldPHI->getNumIncomingValues(); i++) {
313	auto *OldVal = OldPHI->getIncomingValue(i);
314	NewPHI->setIncomingValue(i, V: getPoisonFor(ValToPoison, V: OldVal));
315	}
316	}
317	return true;
318	}
319
320
321	PreservedAnalyses PoisonCheckingPass::run(Module &M,
322	ModuleAnalysisManager &AM) {
323	bool Changed = false;
324	for (auto &F : M)
325	Changed \|= rewrite(F);
326
327	return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
328	}
329
330	PreservedAnalyses PoisonCheckingPass::run(Function &F,
331	FunctionAnalysisManager &AM) {
332	return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
333	}
334
335	/ Major TODO Items:*
336	- Control dependent poison UB
337	- Strict mode - (i.e. must analyze every operand)
338	- Poison through memory
339	- Function ABIs
340	- Full coverage of intrinsics, etc.. (ouch)
341
342	Instructions w/Unclear Semantics:
343	- shufflevector - It would seem reasonable for an out of bounds mask element
344	to produce poison, but the LangRef does not state.
345	- all binary ops w/vector operands - The likely interpretation would be that
346	any element overflowing should produce poison for the entire result, but
347	the LangRef does not state.
348	- Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems
349	strange that only certian flags should be documented as producing poison.
350
351	Cases of clear poison semantics not yet implemented:
352	- Exact flags on ashr/lshr produce poison
353	- NSW/NUW flags on shl produce poison
354	- Inbounds flag on getelementptr produce poison
355	- fptosi/fptoui (out of bounds input) produce poison
356	- Scalable vector types for insertelement/extractelement
357	- Floating point binary ops w/fmf nnan/noinfs flags produce poison
358	*/
359

Browse the source code of llvm_projects/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp