1//===- Interpreter.cpp - Interpreter Loop for llubi -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the evaluation loop for each kind of instruction.
10//
11//===----------------------------------------------------------------------===//
12
13#include "Context.h"
14#include "Value.h"
15#include "llvm/IR/GetElementPtrTypeIterator.h"
16#include "llvm/IR/InlineAsm.h"
17#include "llvm/IR/InstVisitor.h"
18#include "llvm/IR/Operator.h"
19#include "llvm/Support/Allocator.h"
20
21namespace llvm::ubi {
22
23enum class FrameState {
24 // It is about to enter the function.
25 // Valid transition:
26 // -> Running
27 Entry,
28 // It is executing instructions inside the function.
29 // Valid transitions:
30 // -> Pending (on call)
31 // -> Exit (on return)
32 Running,
33 // It is about to enter a callee or handle return value from the callee.
34 // Valid transitions:
35 // -> Running (after returning from callee)
36 Pending,
37 // It is about to return the control to the caller.
38 Exit,
39};
40
41/// Context for a function call.
42/// This struct maintains the state during the execution of a function,
43/// including the control flow, values of executed instructions, and stack
44/// objects.
45struct Frame {
46 Function &Func;
47 Frame *LastFrame;
48 CallBase *CallSite;
49 ArrayRef<AnyValue> Args;
50 AnyValue &RetVal;
51
52 TargetLibraryInfo TLI;
53 BasicBlock *BB;
54 BasicBlock::iterator PC;
55 FrameState State = FrameState::Entry;
56 // Stack objects allocated in this frame. They will be automatically freed
57 // when the function returns.
58 SmallVector<IntrusiveRefCntPtr<MemoryObject>> Allocas;
59 // Values of arguments and executed instructions in this function.
60 DenseMap<Value *, AnyValue> ValueMap;
61
62 // Reserved for in-flight subroutines.
63 Function *ResolvedCallee = nullptr;
64 SmallVector<AnyValue> CalleeArgs;
65 AnyValue CalleeRetVal;
66
67 Frame(Function &F, CallBase *CallSite, Frame *LastFrame,
68 ArrayRef<AnyValue> Args, AnyValue &RetVal,
69 const TargetLibraryInfoImpl &TLIImpl)
70 : Func(F), LastFrame(LastFrame), CallSite(CallSite), Args(Args),
71 RetVal(RetVal), TLI(TLIImpl, &F) {
72 assert((Args.size() == F.arg_size() ||
73 (F.isVarArg() && Args.size() >= F.arg_size())) &&
74 "Expected enough arguments to call the function.");
75 BB = &Func.getEntryBlock();
76 PC = BB->begin();
77 for (Argument &Arg : F.args())
78 ValueMap[&Arg] = Args[Arg.getArgNo()];
79 }
80};
81
82static AnyValue addNoWrap(const APInt &LHS, const APInt &RHS, bool HasNSW,
83 bool HasNUW) {
84 APInt Res = LHS + RHS;
85 if (HasNUW && Res.ult(RHS))
86 return AnyValue::poison();
87 if (HasNSW && LHS.isNonNegative() == RHS.isNonNegative() &&
88 LHS.isNonNegative() != Res.isNonNegative())
89 return AnyValue::poison();
90 return Res;
91}
92
93static AnyValue subNoWrap(const APInt &LHS, const APInt &RHS, bool HasNSW,
94 bool HasNUW) {
95 APInt Res = LHS - RHS;
96 if (HasNUW && Res.ugt(RHS: LHS))
97 return AnyValue::poison();
98 if (HasNSW && LHS.isNonNegative() != RHS.isNonNegative() &&
99 LHS.isNonNegative() != Res.isNonNegative())
100 return AnyValue::poison();
101 return Res;
102}
103
104static AnyValue mulNoWrap(const APInt &LHS, const APInt &RHS, bool HasNSW,
105 bool HasNUW) {
106 bool Overflow = false;
107 APInt Res = LHS.smul_ov(RHS, Overflow);
108 if (HasNSW && Overflow)
109 return AnyValue::poison();
110 if (HasNUW) {
111 (void)LHS.umul_ov(RHS, Overflow);
112 if (Overflow)
113 return AnyValue::poison();
114 }
115 return Res;
116}
117
118/// Instruction executor using the visitor pattern.
119/// Unlike the Context class that manages the global state,
120/// InstExecutor only maintains the state for call frames.
121class InstExecutor : public InstVisitor<InstExecutor, void> {
122 Context &Ctx;
123 const DataLayout &DL;
124 EventHandler &Handler;
125 std::list<Frame> CallStack;
126 // Used to indicate whether the interpreter should continue execution.
127 bool Status;
128 Frame *CurrentFrame = nullptr;
129 AnyValue None;
130
131 void reportImmediateUB(StringRef Msg) {
132 // Check if we have already reported an immediate UB.
133 if (!Status)
134 return;
135 Status = false;
136 // TODO: Provide stack trace information.
137 Handler.onImmediateUB(Msg);
138 }
139
140 void reportError(StringRef Msg) {
141 // Check if we have already reported an error message.
142 if (!Status)
143 return;
144 Status = false;
145 Handler.onError(Msg);
146 }
147
148 const AnyValue &getValue(Value *V) {
149 if (auto *C = dyn_cast<Constant>(Val: V))
150 return Ctx.getConstantValue(C);
151 return CurrentFrame->ValueMap.at(Val: V);
152 }
153
154 void setResult(Instruction &I, AnyValue V) {
155 if (Status)
156 Status &= Handler.onInstructionExecuted(I, Result: V);
157 CurrentFrame->ValueMap.insert_or_assign(Key: &I, Val: std::move(V));
158 }
159
160 AnyValue computeUnOp(Type *Ty, const AnyValue &Operand,
161 function_ref<AnyValue(const AnyValue &)> ScalarFn) {
162 if (Ty->isVectorTy()) {
163 auto &OperandVec = Operand.asAggregate();
164 std::vector<AnyValue> ResVec;
165 ResVec.reserve(n: OperandVec.size());
166 for (const auto &Scalar : OperandVec)
167 ResVec.push_back(x: ScalarFn(Scalar));
168 return std::move(ResVec);
169 }
170 return ScalarFn(Operand);
171 }
172
173 void visitUnOp(Instruction &I,
174 function_ref<AnyValue(const AnyValue &)> ScalarFn) {
175 setResult(I, V: computeUnOp(Ty: I.getType(), Operand: getValue(V: I.getOperand(i: 0)), ScalarFn));
176 }
177
178 void visitIntUnOp(Instruction &I,
179 function_ref<AnyValue(const APInt &)> ScalarFn) {
180 visitUnOp(I, ScalarFn: [&](const AnyValue &Operand) -> AnyValue {
181 if (Operand.isPoison())
182 return AnyValue::poison();
183 return ScalarFn(Operand.asInteger());
184 });
185 }
186
187 AnyValue computeBinOp(
188 Type *Ty, const AnyValue &LHS, const AnyValue &RHS,
189 function_ref<AnyValue(const AnyValue &, const AnyValue &)> ScalarFn) {
190 if (Ty->isVectorTy()) {
191 auto &LHSVec = LHS.asAggregate();
192 auto &RHSVec = RHS.asAggregate();
193 std::vector<AnyValue> ResVec;
194 ResVec.reserve(n: LHSVec.size());
195 for (const auto &[ScalarLHS, ScalarRHS] : zip(t: LHSVec, u: RHSVec))
196 ResVec.push_back(x: ScalarFn(ScalarLHS, ScalarRHS));
197 return std::move(ResVec);
198 }
199 return ScalarFn(LHS, RHS);
200 }
201
202 void visitBinOp(
203 Instruction &I,
204 function_ref<AnyValue(const AnyValue &, const AnyValue &)> ScalarFn) {
205 setResult(I, V: computeBinOp(Ty: I.getType(), LHS: getValue(V: I.getOperand(i: 0)),
206 RHS: getValue(V: I.getOperand(i: 1)), ScalarFn));
207 }
208
209 void
210 visitIntBinOp(Instruction &I,
211 function_ref<AnyValue(const APInt &, const APInt &)> ScalarFn) {
212 visitBinOp(I, ScalarFn: [&](const AnyValue &LHS, const AnyValue &RHS) -> AnyValue {
213 if (LHS.isPoison() || RHS.isPoison())
214 return AnyValue::poison();
215 return ScalarFn(LHS.asInteger(), RHS.asInteger());
216 });
217 }
218
219 void jumpTo(Instruction &Terminator, BasicBlock *DestBB) {
220 if (!Handler.onBBJump(I&: Terminator, To&: *DestBB)) {
221 Status = false;
222 return;
223 }
224 BasicBlock *From = CurrentFrame->BB;
225 CurrentFrame->BB = DestBB;
226 CurrentFrame->PC = DestBB->begin();
227 // Update PHI nodes in batch to avoid the interference between PHI nodes.
228 // We need to store the incoming values into a temporary buffer.
229 // Otherwise, the incoming value may be overwritten before it is
230 // used by other PHI nodes.
231 SmallVector<std::pair<PHINode *, AnyValue>> IncomingValues;
232 PHINode *PHI = nullptr;
233 while ((PHI = dyn_cast<PHINode>(Val&: CurrentFrame->PC))) {
234 Value *Incoming = PHI->getIncomingValueForBlock(BB: From);
235 // TODO: handle fast-math flags.
236 IncomingValues.emplace_back(Args&: PHI, Args: getValue(V: Incoming));
237 ++CurrentFrame->PC;
238 }
239 for (auto &[K, V] : IncomingValues)
240 setResult(I&: *K, V: std::move(V));
241 }
242
243 /// Helper function to determine whether an inline asm is a no-op, which is
244 /// used to implement black_box style optimization blockers.
245 bool isNoopInlineAsm(Value *V, Type *RetTy) {
246 if (auto *Asm = dyn_cast<InlineAsm>(Val: V))
247 return Asm->getAsmString().empty() && RetTy->isVoidTy();
248 return false;
249 }
250
251 AnyValue computePtrAdd(const Pointer &Ptr, const APInt &Offset,
252 GEPNoWrapFlags Flags, AnyValue &AccumulatedOffset) {
253 if (Offset.isZero())
254 return Ptr;
255 APInt IndexBits = Ptr.address().trunc(width: Offset.getBitWidth());
256 auto NewIndex = addNoWrap(LHS: IndexBits, RHS: Offset, /*HasNSW=*/false,
257 HasNUW: Flags.hasNoUnsignedWrap());
258 if (NewIndex.isPoison())
259 return AnyValue::poison();
260 if (Flags.hasNoUnsignedSignedWrap()) {
261 // The successive addition of the current address, truncated to the
262 // pointer index type and interpreted as an unsigned number, and each
263 // offset, interpreted as a signed number, does not wrap the pointer index
264 // type.
265 if (Offset.isNonNegative() ? NewIndex.asInteger().ult(RHS: IndexBits)
266 : NewIndex.asInteger().ugt(RHS: IndexBits))
267 return AnyValue::poison();
268 }
269 APInt NewAddr = Ptr.address();
270 NewAddr.insertBits(SubBits: NewIndex.asInteger(), bitPosition: 0);
271
272 auto *MO = Ptr.getMemoryObject();
273 if (Flags.isInBounds() && (!MO || !MO->inBounds(NewAddr)))
274 return AnyValue::poison();
275
276 if (!AccumulatedOffset.isPoison()) {
277 AccumulatedOffset =
278 addNoWrap(LHS: AccumulatedOffset.asInteger(), RHS: Offset,
279 HasNSW: Flags.hasNoUnsignedSignedWrap(), HasNUW: Flags.hasNoUnsignedWrap());
280 if (AccumulatedOffset.isPoison())
281 return AnyValue::poison();
282 }
283
284 // Should not expose provenance here even if the new address doesn't point
285 // to the original object.
286 return Ptr.getWithNewAddr(NewAddr);
287 }
288
289 AnyValue computePtrAdd(const AnyValue &Ptr, const APInt &Offset,
290 GEPNoWrapFlags Flags, AnyValue &AccumulatedOffset) {
291 if (Ptr.isPoison())
292 return AnyValue::poison();
293 return computePtrAdd(Ptr: Ptr.asPointer(), Offset, Flags, AccumulatedOffset);
294 }
295
296 AnyValue computeScaledPtrAdd(const AnyValue &Ptr, const AnyValue &Index,
297 const APInt &Scale, GEPNoWrapFlags Flags,
298 AnyValue &AccumulatedOffset) {
299 if (Ptr.isPoison() || Index.isPoison())
300 return AnyValue::poison();
301 assert(Ptr.isPointer() && Index.isInteger() && "Unexpected type.");
302 if (Scale.isOne())
303 return computePtrAdd(Ptr, Offset: Index.asInteger(), Flags, AccumulatedOffset);
304 auto ScaledOffset =
305 mulNoWrap(LHS: Index.asInteger(), RHS: Scale, HasNSW: Flags.hasNoUnsignedSignedWrap(),
306 HasNUW: Flags.hasNoUnsignedWrap());
307 if (ScaledOffset.isPoison())
308 return AnyValue::poison();
309 return computePtrAdd(Ptr, Offset: ScaledOffset.asInteger(), Flags,
310 AccumulatedOffset);
311 }
312
313 AnyValue canonicalizeIndex(const AnyValue &Idx, unsigned IndexBitWidth,
314 GEPNoWrapFlags Flags) {
315 if (Idx.isPoison())
316 return AnyValue::poison();
317 auto &IdxInt = Idx.asInteger();
318 if (IdxInt.getBitWidth() == IndexBitWidth)
319 return Idx;
320 if (IdxInt.getBitWidth() > IndexBitWidth) {
321 if (Flags.hasNoUnsignedSignedWrap() &&
322 !IdxInt.isSignedIntN(N: IndexBitWidth))
323 return AnyValue::poison();
324
325 if (Flags.hasNoUnsignedWrap() && !IdxInt.isIntN(N: IndexBitWidth))
326 return AnyValue::poison();
327
328 return IdxInt.trunc(width: IndexBitWidth);
329 }
330 return IdxInt.sext(width: IndexBitWidth);
331 }
332
333public:
334 InstExecutor(Context &C, EventHandler &H, Function &F,
335 ArrayRef<AnyValue> Args, AnyValue &RetVal)
336 : Ctx(C), DL(Ctx.getDataLayout()), Handler(H), Status(true) {
337 CallStack.emplace_back(args&: F, /*CallSite=*/args: nullptr, /*LastFrame=*/args: nullptr, args&: Args,
338 args&: RetVal, args: Ctx.getTLIImpl());
339 }
340
341 void visitReturnInst(ReturnInst &RI) {
342 if (auto *RV = RI.getReturnValue())
343 CurrentFrame->RetVal = getValue(V: RV);
344 CurrentFrame->State = FrameState::Exit;
345 Status &= Handler.onInstructionExecuted(I&: RI, Result: None);
346 }
347
348 void visitBranchInst(BranchInst &BI) {
349 if (BI.isConditional()) {
350 switch (getValue(V: BI.getCondition()).asBoolean()) {
351 case BooleanKind::True:
352 jumpTo(Terminator&: BI, DestBB: BI.getSuccessor(i: 0));
353 return;
354 case BooleanKind::False:
355 jumpTo(Terminator&: BI, DestBB: BI.getSuccessor(i: 1));
356 return;
357 case BooleanKind::Poison:
358 reportImmediateUB(Msg: "Branch on poison condition.");
359 return;
360 }
361 }
362 jumpTo(Terminator&: BI, DestBB: BI.getSuccessor(i: 0));
363 }
364
365 void visitSwitchInst(SwitchInst &SI) {
366 auto &Cond = getValue(V: SI.getCondition());
367 if (Cond.isPoison()) {
368 reportImmediateUB(Msg: "Switch on poison condition.");
369 return;
370 }
371 for (auto &Case : SI.cases()) {
372 if (Case.getCaseValue()->getValue() == Cond.asInteger()) {
373 jumpTo(Terminator&: SI, DestBB: Case.getCaseSuccessor());
374 return;
375 }
376 }
377 jumpTo(Terminator&: SI, DestBB: SI.getDefaultDest());
378 }
379
380 void visitUnreachableInst(UnreachableInst &) {
381 reportImmediateUB(Msg: "Unreachable code.");
382 }
383
384 void visitCallBrInst(CallBrInst &CI) {
385 if (isNoopInlineAsm(V: CI.getCalledOperand(), RetTy: CI.getType())) {
386 jumpTo(Terminator&: CI, DestBB: CI.getDefaultDest());
387 return;
388 }
389
390 Handler.onUnrecognizedInstruction(I&: CI);
391 Status = false;
392 }
393
394 void visitIndirectBrInst(IndirectBrInst &IBI) {
395 auto &Target = getValue(V: IBI.getAddress());
396 if (Target.isPoison()) {
397 reportImmediateUB(Msg: "Indirect branch on poison.");
398 return;
399 }
400 if (BasicBlock *DestBB = Ctx.getTargetBlock(Ptr: Target.asPointer())) {
401 if (any_of(Range: IBI.successors(),
402 P: [DestBB](BasicBlock *Succ) { return Succ == DestBB; }))
403 jumpTo(Terminator&: IBI, DestBB);
404 else
405 reportImmediateUB(Msg: "Indirect branch on unlisted target BB.");
406
407 return;
408 }
409 reportImmediateUB(Msg: "Indirect branch on invalid target BB.");
410 }
411
412 void returnFromCallee() {
413 // TODO: handle retval attributes (Attributes from known callee should be
414 // applied if available).
415 // TODO: handle metadata
416 auto &CB = cast<CallBase>(Val&: *CurrentFrame->PC);
417 CurrentFrame->CalleeArgs.clear();
418 AnyValue &RetVal = CurrentFrame->CalleeRetVal;
419 setResult(I&: CB, V: std::move(RetVal));
420
421 if (auto *II = dyn_cast<InvokeInst>(Val: &CB))
422 jumpTo(Terminator&: *II, DestBB: II->getNormalDest());
423 else if (CurrentFrame->State == FrameState::Pending)
424 ++CurrentFrame->PC;
425 }
426
427 AnyValue callIntrinsic(CallBase &CB) {
428 Intrinsic::ID IID = CB.getIntrinsicID();
429 switch (IID) {
430 case Intrinsic::assume:
431 switch (getValue(V: CB.getArgOperand(i: 0)).asBoolean()) {
432 case BooleanKind::True:
433 break;
434 case BooleanKind::False:
435 case BooleanKind::Poison:
436 reportImmediateUB(Msg: "Assume on false or poison condition.");
437 break;
438 }
439 // TODO: handle llvm.assume with operand bundles
440 return AnyValue();
441 default:
442 Handler.onUnrecognizedInstruction(I&: CB);
443 Status = false;
444 return AnyValue();
445 }
446 }
447
448 AnyValue callLibFunc(CallBase &CB, Function *ResolvedCallee) {
449 LibFunc LF;
450 // Respect nobuiltin attributes on call site.
451 if (CB.isNoBuiltin() ||
452 !CurrentFrame->TLI.getLibFunc(FDecl: *ResolvedCallee, F&: LF)) {
453 Handler.onUnrecognizedInstruction(I&: CB);
454 Status = false;
455 return AnyValue();
456 }
457
458 Handler.onUnrecognizedInstruction(I&: CB);
459 Status = false;
460 return AnyValue();
461 }
462
463 void enterCall(CallBase &CB) {
464 Function *Callee = CB.getCalledFunction();
465 // TODO: handle parameter attributes (Attributes from known callee should be
466 // applied if available).
467 // TODO: handle byval/initializes
468 auto &CalleeArgs = CurrentFrame->CalleeArgs;
469 assert(CalleeArgs.empty() &&
470 "Forgot to call returnFromCallee before entering a new call.");
471 for (Value *Arg : CB.args())
472 CalleeArgs.push_back(Elt: getValue(V: Arg));
473
474 if (!Callee) {
475 Value *CalledOperand = CB.getCalledOperand();
476 if (isNoopInlineAsm(V: CalledOperand, RetTy: CB.getType())) {
477 CurrentFrame->ResolvedCallee = nullptr;
478 returnFromCallee();
479 return;
480 }
481
482 if (isa<InlineAsm>(Val: CalledOperand)) {
483 Handler.onUnrecognizedInstruction(I&: CB);
484 Status = false;
485 return;
486 }
487
488 auto &CalleeVal = getValue(V: CalledOperand);
489 if (CalleeVal.isPoison()) {
490 reportImmediateUB(Msg: "Indirect call through poison function pointer.");
491 return;
492 }
493 Callee = Ctx.getTargetFunction(Ptr: CalleeVal.asPointer());
494 if (!Callee) {
495 reportImmediateUB(Msg: "Indirect call through invalid function pointer.");
496 return;
497 }
498 if (Callee->getFunctionType() != CB.getFunctionType()) {
499 reportImmediateUB(Msg: "Indirect call through a function pointer with "
500 "mismatched signature.");
501 return;
502 }
503 }
504
505 assert(Callee && "Expected a resolved callee function.");
506 assert(
507 Callee->getFunctionType() == CB.getFunctionType() &&
508 "Expected the callee function type to match the call site signature.");
509 CurrentFrame->ResolvedCallee = Callee;
510 if (Callee->isIntrinsic()) {
511 CurrentFrame->CalleeRetVal = callIntrinsic(CB);
512 returnFromCallee();
513 return;
514 } else if (Callee->isDeclaration()) {
515 CurrentFrame->CalleeRetVal = callLibFunc(CB, ResolvedCallee: Callee);
516 returnFromCallee();
517 return;
518 } else {
519 uint32_t MaxStackDepth = Ctx.getMaxStackDepth();
520 if (MaxStackDepth && CallStack.size() >= MaxStackDepth) {
521 reportError(Msg: "Maximum stack depth exceeded.");
522 return;
523 }
524 assert(!Callee->empty() && "Expected a defined function.");
525 // Suspend the current frame and push the callee frame onto the stack.
526 ArrayRef<AnyValue> Args = CurrentFrame->CalleeArgs;
527 AnyValue &RetVal = CurrentFrame->CalleeRetVal;
528 CurrentFrame->State = FrameState::Pending;
529 CallStack.emplace_back(args&: *Callee, args: &CB, args&: CurrentFrame, args&: Args, args&: RetVal,
530 args: Ctx.getTLIImpl());
531 }
532 }
533
534 void visitCallInst(CallInst &CI) { enterCall(CB&: CI); }
535
536 void visitInvokeInst(InvokeInst &II) {
537 // TODO: handle exceptions
538 enterCall(CB&: II);
539 }
540
541 void visitAdd(BinaryOperator &I) {
542 visitIntBinOp(I, ScalarFn: [&](const APInt &LHS, const APInt &RHS) {
543 return addNoWrap(LHS, RHS, HasNSW: I.hasNoSignedWrap(), HasNUW: I.hasNoUnsignedWrap());
544 });
545 }
546
547 void visitSub(BinaryOperator &I) {
548 visitIntBinOp(I, ScalarFn: [&](const APInt &LHS, const APInt &RHS) {
549 return subNoWrap(LHS, RHS, HasNSW: I.hasNoSignedWrap(), HasNUW: I.hasNoUnsignedWrap());
550 });
551 }
552
553 void visitMul(BinaryOperator &I) {
554 visitIntBinOp(I, ScalarFn: [&](const APInt &LHS, const APInt &RHS) {
555 return mulNoWrap(LHS, RHS, HasNSW: I.hasNoSignedWrap(), HasNUW: I.hasNoUnsignedWrap());
556 });
557 }
558
559 void visitSDiv(BinaryOperator &I) {
560 visitBinOp(I, ScalarFn: [&](const AnyValue &LHS, const AnyValue &RHS) -> AnyValue {
561 // Priority: Immediate UB > poison > normal value
562 if (RHS.isPoison()) {
563 reportImmediateUB(Msg: "Division by zero (refine RHS to 0).");
564 return AnyValue::poison();
565 }
566 const APInt &RHSVal = RHS.asInteger();
567 if (RHSVal.isZero()) {
568 reportImmediateUB(Msg: "Division by zero.");
569 return AnyValue::poison();
570 }
571 if (LHS.isPoison()) {
572 if (RHSVal.isAllOnes())
573 reportImmediateUB(
574 Msg: "Signed division overflow (refine LHS to INT_MIN).");
575 return AnyValue::poison();
576 }
577 const APInt &LHSVal = LHS.asInteger();
578 if (LHSVal.isMinSignedValue() && RHSVal.isAllOnes()) {
579 reportImmediateUB(Msg: "Signed division overflow.");
580 return AnyValue::poison();
581 }
582
583 if (I.isExact()) {
584 APInt Q, R;
585 APInt::sdivrem(LHS: LHSVal, RHS: RHSVal, Quotient&: Q, Remainder&: R);
586 if (!R.isZero())
587 return AnyValue::poison();
588 return Q;
589 } else {
590 return LHSVal.sdiv(RHS: RHSVal);
591 }
592 });
593 }
594
595 void visitSRem(BinaryOperator &I) {
596 visitBinOp(I, ScalarFn: [&](const AnyValue &LHS, const AnyValue &RHS) -> AnyValue {
597 // Priority: Immediate UB > poison > normal value
598 if (RHS.isPoison()) {
599 reportImmediateUB(Msg: "Division by zero (refine RHS to 0).");
600 return AnyValue::poison();
601 }
602 const APInt &RHSVal = RHS.asInteger();
603 if (RHSVal.isZero()) {
604 reportImmediateUB(Msg: "Division by zero.");
605 return AnyValue::poison();
606 }
607 if (LHS.isPoison()) {
608 if (RHSVal.isAllOnes())
609 reportImmediateUB(
610 Msg: "Signed division overflow (refine LHS to INT_MIN).");
611 return AnyValue::poison();
612 }
613 const APInt &LHSVal = LHS.asInteger();
614 if (LHSVal.isMinSignedValue() && RHSVal.isAllOnes()) {
615 reportImmediateUB(Msg: "Signed division overflow.");
616 return AnyValue::poison();
617 }
618
619 return LHSVal.srem(RHS: RHSVal);
620 });
621 }
622
623 void visitUDiv(BinaryOperator &I) {
624 visitBinOp(I, ScalarFn: [&](const AnyValue &LHS, const AnyValue &RHS) -> AnyValue {
625 // Priority: Immediate UB > poison > normal value
626 if (RHS.isPoison()) {
627 reportImmediateUB(Msg: "Division by zero (refine RHS to 0).");
628 return AnyValue::poison();
629 }
630 const APInt &RHSVal = RHS.asInteger();
631 if (RHSVal.isZero()) {
632 reportImmediateUB(Msg: "Division by zero.");
633 return AnyValue::poison();
634 }
635 if (LHS.isPoison())
636 return AnyValue::poison();
637 const APInt &LHSVal = LHS.asInteger();
638
639 if (I.isExact()) {
640 APInt Q, R;
641 APInt::udivrem(LHS: LHSVal, RHS: RHSVal, Quotient&: Q, Remainder&: R);
642 if (!R.isZero())
643 return AnyValue::poison();
644 return Q;
645 } else {
646 return LHSVal.udiv(RHS: RHSVal);
647 }
648 });
649 }
650
651 void visitURem(BinaryOperator &I) {
652 visitBinOp(I, ScalarFn: [&](const AnyValue &LHS, const AnyValue &RHS) -> AnyValue {
653 // Priority: Immediate UB > poison > normal value
654 if (RHS.isPoison()) {
655 reportImmediateUB(Msg: "Division by zero (refine RHS to 0).");
656 return AnyValue::poison();
657 }
658 const APInt &RHSVal = RHS.asInteger();
659 if (RHSVal.isZero()) {
660 reportImmediateUB(Msg: "Division by zero.");
661 return AnyValue::poison();
662 }
663 if (LHS.isPoison())
664 return AnyValue::poison();
665 const APInt &LHSVal = LHS.asInteger();
666 return LHSVal.urem(RHS: RHSVal);
667 });
668 }
669
670 void visitTruncInst(TruncInst &Trunc) {
671 visitIntUnOp(I&: Trunc, ScalarFn: [&](const APInt &Operand) -> AnyValue {
672 unsigned DestBW = Trunc.getType()->getScalarSizeInBits();
673 if (Trunc.hasNoSignedWrap() && Operand.getSignificantBits() > DestBW)
674 return AnyValue::poison();
675 if (Trunc.hasNoUnsignedWrap() && Operand.getActiveBits() > DestBW)
676 return AnyValue::poison();
677 return Operand.trunc(width: DestBW);
678 });
679 }
680
681 void visitZExtInst(ZExtInst &ZExt) {
682 visitIntUnOp(I&: ZExt, ScalarFn: [&](const APInt &Operand) -> AnyValue {
683 uint32_t DestBW = ZExt.getDestTy()->getScalarSizeInBits();
684 if (ZExt.hasNonNeg() && Operand.isNegative())
685 return AnyValue::poison();
686 return Operand.zext(width: DestBW);
687 });
688 }
689
690 void visitSExtInst(SExtInst &SExt) {
691 visitIntUnOp(I&: SExt, ScalarFn: [&](const APInt &Operand) -> AnyValue {
692 uint32_t DestBW = SExt.getDestTy()->getScalarSizeInBits();
693 return Operand.sext(width: DestBW);
694 });
695 }
696
697 void visitAnd(BinaryOperator &I) {
698 visitIntBinOp(I, ScalarFn: [](const APInt &LHS, const APInt &RHS) -> AnyValue {
699 return LHS & RHS;
700 });
701 }
702
703 void visitXor(BinaryOperator &I) {
704 visitIntBinOp(I, ScalarFn: [](const APInt &LHS, const APInt &RHS) -> AnyValue {
705 return LHS ^ RHS;
706 });
707 }
708
709 void visitOr(BinaryOperator &I) {
710 visitIntBinOp(I, ScalarFn: [&](const APInt &LHS, const APInt &RHS) -> AnyValue {
711 if (cast<PossiblyDisjointInst>(Val&: I).isDisjoint() && LHS.intersects(RHS))
712 return AnyValue::poison();
713 return LHS | RHS;
714 });
715 }
716
717 void visitShl(BinaryOperator &I) {
718 visitIntBinOp(I, ScalarFn: [&](const APInt &LHS, const APInt &RHS) -> AnyValue {
719 if (RHS.uge(RHS: LHS.getBitWidth()))
720 return AnyValue::poison();
721 if (I.hasNoSignedWrap() && RHS.uge(RHS: LHS.getNumSignBits()))
722 return AnyValue::poison();
723 if (I.hasNoUnsignedWrap() && RHS.ugt(RHS: LHS.countl_zero()))
724 return AnyValue::poison();
725 return LHS.shl(ShiftAmt: RHS);
726 });
727 }
728
729 void visitLShr(BinaryOperator &I) {
730 visitIntBinOp(I, ScalarFn: [&](const APInt &LHS, const APInt &RHS) -> AnyValue {
731 if (RHS.uge(RHS: cast<PossiblyExactOperator>(Val&: I).isExact()
732 ? LHS.countr_zero() + 1
733 : LHS.getBitWidth()))
734 return AnyValue::poison();
735 return LHS.lshr(ShiftAmt: RHS);
736 });
737 }
738
739 void visitAShr(BinaryOperator &I) {
740 visitIntBinOp(I, ScalarFn: [&](const APInt &LHS, const APInt &RHS) -> AnyValue {
741 if (RHS.uge(RHS: cast<PossiblyExactOperator>(Val&: I).isExact()
742 ? LHS.countr_zero() + 1
743 : LHS.getBitWidth()))
744 return AnyValue::poison();
745 return LHS.ashr(ShiftAmt: RHS);
746 });
747 }
748
749 void visitICmpInst(ICmpInst &I) {
750 visitBinOp(I, ScalarFn: [&](const AnyValue &LHS, const AnyValue &RHS) -> AnyValue {
751 if (LHS.isPoison() || RHS.isPoison())
752 return AnyValue::poison();
753 // TODO: handle pointer comparison.
754 const APInt &LHSVal = LHS.asInteger();
755 const APInt &RHSVal = RHS.asInteger();
756 if (I.hasSameSign() && LHSVal.isNonNegative() != RHSVal.isNonNegative())
757 return AnyValue::poison();
758 return AnyValue::boolean(
759 Val: ICmpInst::compare(LHS: LHSVal, RHS: RHSVal, Pred: I.getPredicate()));
760 });
761 }
762
763 void visitSelect(SelectInst &SI) {
764 // TODO: handle fast-math flags.
765 if (SI.getCondition()->getType()->isIntegerTy(Bitwidth: 1)) {
766 switch (getValue(V: SI.getCondition()).asBoolean()) {
767 case BooleanKind::True:
768 setResult(I&: SI, V: getValue(V: SI.getTrueValue()));
769 return;
770 case BooleanKind::False:
771 setResult(I&: SI, V: getValue(V: SI.getFalseValue()));
772 return;
773 case BooleanKind::Poison:
774 setResult(I&: SI, V: AnyValue::getPoisonValue(Ctx, Ty: SI.getType()));
775 return;
776 }
777 }
778
779 auto &Cond = getValue(V: SI.getCondition()).asAggregate();
780 auto &TV = getValue(V: SI.getTrueValue()).asAggregate();
781 auto &FV = getValue(V: SI.getFalseValue()).asAggregate();
782 std::vector<AnyValue> Res;
783 size_t Len = Cond.size();
784 Res.reserve(n: Len);
785 for (uint32_t I = 0; I != Len; ++I) {
786 switch (Cond[I].asBoolean()) {
787 case BooleanKind::True:
788 Res.push_back(x: TV[I]);
789 break;
790 case BooleanKind::False:
791 Res.push_back(x: FV[I]);
792 break;
793 case BooleanKind::Poison:
794 Res.push_back(x: AnyValue::poison());
795 break;
796 }
797 }
798 setResult(I&: SI, V: std::move(Res));
799 }
800
801 void visitAllocaInst(AllocaInst &AI) {
802 uint64_t AllocSize =
803 DL.getTypeAllocSize(Ty: AI.getAllocatedType()).getFixedValue();
804 if (AI.isArrayAllocation()) {
805 auto &Size = getValue(V: AI.getArraySize());
806 if (Size.isPoison()) {
807 reportImmediateUB(Msg: "Alloca with poison array size.");
808 return;
809 }
810 if (Size.asInteger().getActiveBits() > 64) {
811 reportImmediateUB(
812 Msg: "Alloca with large array size that overflows uint64_t.");
813 return;
814 }
815 bool Overflowed = false;
816 AllocSize = SaturatingMultiply(X: AllocSize, Y: Size.asInteger().getZExtValue(),
817 ResultOverflowed: &Overflowed);
818 if (Overflowed) {
819 reportImmediateUB(
820 Msg: "Alloca with allocation size that overflows uint64_t.");
821 return;
822 }
823 }
824 // FIXME: If it is used by llvm.lifetime.start, it should be initially dead.
825 auto Obj = Ctx.allocate(Size: AllocSize, Align: AI.getPointerAlignment(DL).value(),
826 Name: AI.getName(), AS: AI.getAddressSpace(),
827 InitKind: MemInitKind::Uninitialized);
828 if (!Obj) {
829 reportError(Msg: "Insufficient stack space.");
830 return;
831 }
832 CurrentFrame->Allocas.push_back(Elt: Obj);
833 setResult(I&: AI, V: Ctx.deriveFromMemoryObject(Obj));
834 }
835
836 void visitGetElementPtrInst(GetElementPtrInst &GEP) {
837 uint32_t IndexBitWidth =
838 DL.getIndexSizeInBits(AS: GEP.getType()->getPointerAddressSpace());
839 GEPNoWrapFlags Flags = GEP.getNoWrapFlags();
840 AnyValue Res = getValue(V: GEP.getPointerOperand());
841 AnyValue AccumulatedOffset = APInt(IndexBitWidth, 0);
842 if (Res.isAggregate())
843 AccumulatedOffset =
844 AnyValue::getVectorSplat(Scalar: AccumulatedOffset, NumElements: Res.asAggregate().size());
845 auto ApplyScaledOffset = [&](const AnyValue &Index, const APInt &Scale) {
846 if (Index.isAggregate() && !Res.isAggregate()) {
847 Res = AnyValue::getVectorSplat(Scalar: Res, NumElements: Index.asAggregate().size());
848 AccumulatedOffset = AnyValue::getVectorSplat(
849 Scalar: AccumulatedOffset, NumElements: Index.asAggregate().size());
850 }
851 if (Index.isAggregate() && Res.isAggregate()) {
852 for (auto &&[ResElem, IndexElem, OffsetElem] :
853 zip(t&: Res.asAggregate(), u: Index.asAggregate(),
854 args&: AccumulatedOffset.asAggregate()))
855 ResElem = computeScaledPtrAdd(
856 Ptr: ResElem, Index: canonicalizeIndex(Idx: IndexElem, IndexBitWidth, Flags),
857 Scale, Flags, AccumulatedOffset&: OffsetElem);
858 } else {
859 AnyValue CanonicalIndex =
860 canonicalizeIndex(Idx: Index, IndexBitWidth, Flags);
861 if (Res.isAggregate()) {
862 for (auto &&[ResElem, OffsetElem] :
863 zip(t&: Res.asAggregate(), u&: AccumulatedOffset.asAggregate()))
864 ResElem = computeScaledPtrAdd(Ptr: ResElem, Index: CanonicalIndex, Scale, Flags,
865 AccumulatedOffset&: OffsetElem);
866 } else {
867 Res = computeScaledPtrAdd(Ptr: Res, Index: CanonicalIndex, Scale, Flags,
868 AccumulatedOffset);
869 }
870 }
871 };
872
873 for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
874 GTI != GTE; ++GTI) {
875 Value *V = GTI.getOperand();
876
877 // Fast path for zero offsets.
878 if (auto *CI = dyn_cast<ConstantInt>(Val: V)) {
879 if (CI->isZero())
880 continue;
881 }
882 if (isa<ConstantAggregateZero>(Val: V))
883 continue;
884
885 // Handle a struct index, which adds its field offset to the pointer.
886 if (StructType *STy = GTI.getStructTypeOrNull()) {
887 unsigned ElementIdx = cast<ConstantInt>(Val: V)->getZExtValue();
888 const StructLayout *SL = DL.getStructLayout(Ty: STy);
889 // Element offset is in bytes.
890 ApplyScaledOffset(
891 APInt(IndexBitWidth, SL->getElementOffset(Idx: ElementIdx)),
892 APInt(IndexBitWidth, 1));
893 continue;
894 }
895
896 // Truncate if type size exceeds index space.
897 // TODO: Should be documented in LangRef: GEPs with nowrap flags should
898 // return poison when the type size exceeds index space.
899 TypeSize Offset = GTI.getSequentialElementStride(DL);
900 APInt Scale(IndexBitWidth,
901 Offset.isScalable()
902 ? Offset.getKnownMinValue() * Ctx.getVScale()
903 : Offset.getFixedValue(),
904 /*isSigned=*/false, /*implicitTrunc=*/true);
905 if (!Scale.isZero())
906 ApplyScaledOffset(getValue(V), Scale);
907 }
908
909 setResult(I&: GEP, V: std::move(Res));
910 }
911
912 void visitIntToPtr(IntToPtrInst &I) {
913 return visitUnOp(I, ScalarFn: [&](const AnyValue &V) -> AnyValue {
914 if (V.isPoison())
915 return AnyValue::poison();
916 // TODO: expose provenance
917 // TODO: check metadata
918 return Pointer(V.asInteger().zextOrTrunc(
919 width: DL.getPointerSizeInBits(AS: I.getType()->getPointerAddressSpace())));
920 });
921 }
922
923 void visitInstruction(Instruction &I) {
924 Handler.onUnrecognizedInstruction(I);
925 Status = false;
926 }
927
928 void visitExtractValueInst(ExtractValueInst &EVI) {
929 auto &Res = getValue(V: EVI.getAggregateOperand());
930 const AnyValue *Pos = &Res;
931 for (unsigned Idx : EVI.indices())
932 Pos = &Pos->asAggregate()[Idx];
933 setResult(I&: EVI, V: *Pos);
934 }
935
936 void visitInsertValueInst(InsertValueInst &IVI) {
937 AnyValue Res = getValue(V: IVI.getAggregateOperand());
938 AnyValue *Pos = &Res;
939 for (unsigned Idx : IVI.indices())
940 Pos = &Pos->asAggregate()[Idx];
941 *Pos = getValue(V: IVI.getInsertedValueOperand());
942 setResult(I&: IVI, V: std::move(Res));
943 }
944
945 void visitInsertElementInst(InsertElementInst &IEI) {
946 auto Res = getValue(V: IEI.getOperand(i_nocapture: 0));
947 auto &ResVec = Res.asAggregate();
948 auto &Idx = getValue(V: IEI.getOperand(i_nocapture: 2));
949 if (Idx.isPoison() || Idx.asInteger().uge(RHS: ResVec.size())) {
950 setResult(I&: IEI, V: AnyValue::getPoisonValue(Ctx, Ty: IEI.getType()));
951 return;
952 }
953 ResVec[Idx.asInteger().getZExtValue()] = getValue(V: IEI.getOperand(i_nocapture: 1));
954 setResult(I&: IEI, V: std::move(Res));
955 }
956
957 void visitExtractElementInst(ExtractElementInst &EEI) {
958 auto &SrcVec = getValue(V: EEI.getOperand(i_nocapture: 0)).asAggregate();
959 auto &Idx = getValue(V: EEI.getOperand(i_nocapture: 1));
960 if (Idx.isPoison() || Idx.asInteger().uge(RHS: SrcVec.size())) {
961 setResult(I&: EEI, V: AnyValue::getPoisonValue(Ctx, Ty: EEI.getType()));
962 return;
963 }
964 setResult(I&: EEI, V: SrcVec[Idx.asInteger().getZExtValue()]);
965 }
966
967 void visitShuffleVectorInst(ShuffleVectorInst &SVI) {
968 auto &LHSVec = getValue(V: SVI.getOperand(i_nocapture: 0)).asAggregate();
969 auto &RHSVec = getValue(V: SVI.getOperand(i_nocapture: 1)).asAggregate();
970 uint32_t Size = cast<VectorType>(Val: SVI.getOperand(i_nocapture: 0)->getType())
971 ->getElementCount()
972 .getKnownMinValue();
973 std::vector<AnyValue> Res;
974 uint32_t DstLen = Ctx.getEVL(EC: SVI.getType()->getElementCount());
975 Res.reserve(n: DstLen);
976 uint32_t Stride = SVI.getShuffleMask().size();
977 // For scalable vectors, we need to repeat the shuffle mask until we fill
978 // the destination vector.
979 for (uint32_t Off = 0; Off != DstLen; Off += Stride) {
980 for (int Idx : SVI.getShuffleMask()) {
981 if (Idx == PoisonMaskElem)
982 Res.push_back(x: AnyValue::poison());
983 else if (Idx < static_cast<int>(Size))
984 Res.push_back(x: LHSVec[Idx]);
985 else
986 Res.push_back(x: RHSVec[Idx - Size]);
987 }
988 }
989 setResult(I&: SVI, V: std::move(Res));
990 }
991
992 /// This function implements the main interpreter loop.
993 /// It handles function calls in a non-recursive manner to avoid stack
994 /// overflows.
995 bool runMainLoop() {
996 uint32_t MaxSteps = Ctx.getMaxSteps();
997 uint32_t Steps = 0;
998 while (Status && !CallStack.empty()) {
999 Frame &Top = CallStack.back();
1000 CurrentFrame = &Top;
1001 if (Top.State == FrameState::Entry) {
1002 Handler.onFunctionEntry(F&: Top.Func, Args: Top.Args, CallSite: Top.CallSite);
1003 } else {
1004 assert(Top.State == FrameState::Pending &&
1005 "Expected to return from a callee.");
1006 returnFromCallee();
1007 }
1008
1009 Top.State = FrameState::Running;
1010 // Interpreter loop inside a function
1011 while (Status) {
1012 assert(Top.State == FrameState::Running &&
1013 "Expected to be in running state.");
1014 if (MaxSteps != 0 && Steps >= MaxSteps) {
1015 reportError(Msg: "Exceeded maximum number of execution steps.");
1016 break;
1017 }
1018 ++Steps;
1019
1020 Instruction &I = *Top.PC;
1021 visit(I: &I);
1022 if (!Status)
1023 break;
1024
1025 // A function call or return has occurred.
1026 // We need to exit the inner loop and switch to a different frame.
1027 if (Top.State != FrameState::Running)
1028 break;
1029
1030 // Otherwise, move to the next instruction if it is not a terminator.
1031 // For terminators, the PC is updated in the visit* method.
1032 if (!I.isTerminator())
1033 ++Top.PC;
1034 }
1035
1036 if (!Status)
1037 break;
1038
1039 if (Top.State == FrameState::Exit) {
1040 assert((Top.Func.getReturnType()->isVoidTy() || !Top.RetVal.isNone()) &&
1041 "Expected return value to be set on function exit.");
1042 Handler.onFunctionExit(F&: Top.Func, RetVal: Top.RetVal);
1043 // Free stack objects allocated in this frame.
1044 for (auto &Obj : Top.Allocas)
1045 Ctx.free(Address: Obj->getAddress());
1046 CallStack.pop_back();
1047 } else {
1048 assert(Top.State == FrameState::Pending &&
1049 "Expected to enter a callee.");
1050 }
1051 }
1052 return Status;
1053 }
1054};
1055
1056bool Context::runFunction(Function &F, ArrayRef<AnyValue> Args,
1057 AnyValue &RetVal, EventHandler &Handler) {
1058 InstExecutor Executor(*this, Handler, F, Args, RetVal);
1059 return Executor.runMainLoop();
1060}
1061
1062} // namespace llvm::ubi
1063