1//===-- NumericalStabilitySanitizer.cpp -----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the instrumentation pass for the numerical sanitizer.
10// Conceptually the pass injects shadow computations using higher precision
11// types and inserts consistency checks. For details see the paper
12// https://arxiv.org/abs/2102.12782.
13//
14//===----------------------------------------------------------------------===//
15
16#include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h"
17
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/SmallString.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/ADT/StringExtras.h"
23#include "llvm/Analysis/TargetLibraryInfo.h"
24#include "llvm/Analysis/ValueTracking.h"
25#include "llvm/IR/DataLayout.h"
26#include "llvm/IR/Function.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/IntrinsicInst.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/IR/MDBuilder.h"
32#include "llvm/IR/Metadata.h"
33#include "llvm/IR/Module.h"
34#include "llvm/IR/Type.h"
35#include "llvm/InitializePasses.h"
36#include "llvm/Support/CommandLine.h"
37#include "llvm/Support/Debug.h"
38#include "llvm/Support/MathExtras.h"
39#include "llvm/Support/Regex.h"
40#include "llvm/Support/raw_ostream.h"
41#include "llvm/Transforms/Instrumentation.h"
42#include "llvm/Transforms/Utils/BasicBlockUtils.h"
43#include "llvm/Transforms/Utils/EscapeEnumerator.h"
44#include "llvm/Transforms/Utils/Local.h"
45#include "llvm/Transforms/Utils/ModuleUtils.h"
46
47#include <cstdint>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "nsan"
52
53STATISTIC(NumInstrumentedFTLoads,
54 "Number of instrumented floating-point loads");
55
56STATISTIC(NumInstrumentedFTCalls,
57 "Number of instrumented floating-point calls");
58STATISTIC(NumInstrumentedFTRets,
59 "Number of instrumented floating-point returns");
60STATISTIC(NumInstrumentedFTStores,
61 "Number of instrumented floating-point stores");
62STATISTIC(NumInstrumentedNonFTStores,
63 "Number of instrumented non floating-point stores");
64STATISTIC(
65 NumInstrumentedNonFTMemcpyStores,
66 "Number of instrumented non floating-point stores with memcpy semantics");
67STATISTIC(NumInstrumentedFCmp, "Number of instrumented fcmps");
68
69// Using smaller shadow types types can help improve speed. For example, `dlq`
70// is 3x slower to 5x faster in opt mode and 2-6x faster in dbg mode compared to
71// `dqq`.
72static cl::opt<std::string> ClShadowMapping(
73 "nsan-shadow-type-mapping", cl::init(Val: "dqq"),
74 cl::desc("One shadow type id for each of `float`, `double`, `long double`. "
75 "`d`,`l`,`q`,`e` mean double, x86_fp80, fp128 (quad) and "
76 "ppc_fp128 (extended double) respectively. The default is to "
77 "shadow `float` as `double`, and `double` and `x86_fp80` as "
78 "`fp128`"),
79 cl::Hidden);
80
81static cl::opt<bool>
82 ClInstrumentFCmp("nsan-instrument-fcmp", cl::init(Val: true),
83 cl::desc("Instrument floating-point comparisons"),
84 cl::Hidden);
85
86static cl::opt<std::string> ClCheckFunctionsFilter(
87 "check-functions-filter",
88 cl::desc("Only emit checks for arguments of functions "
89 "whose names match the given regular expression"),
90 cl::value_desc("regex"));
91
92static cl::opt<bool> ClTruncateFCmpEq(
93 "nsan-truncate-fcmp-eq", cl::init(Val: true),
94 cl::desc(
95 "This flag controls the behaviour of fcmp equality comparisons."
96 "For equality comparisons such as `x == 0.0f`, we can perform the "
97 "shadow check in the shadow (`x_shadow == 0.0) == (x == 0.0f)`) or app "
98 " domain (`(trunc(x_shadow) == 0.0f) == (x == 0.0f)`). This helps "
99 "catch the case when `x_shadow` is accurate enough (and therefore "
100 "close enough to zero) so that `trunc(x_shadow)` is zero even though "
101 "both `x` and `x_shadow` are not"),
102 cl::Hidden);
103
104// When there is external, uninstrumented code writing to memory, the shadow
105// memory can get out of sync with the application memory. Enabling this flag
106// emits consistency checks for loads to catch this situation.
107// When everything is instrumented, this is not strictly necessary because any
108// load should have a corresponding store, but can help debug cases when the
109// framework did a bad job at tracking shadow memory modifications by failing on
110// load rather than store.
111// TODO: provide a way to resume computations from the FT value when the load
112// is inconsistent. This ensures that further computations are not polluted.
113static cl::opt<bool> ClCheckLoads("nsan-check-loads",
114 cl::desc("Check floating-point load"),
115 cl::Hidden);
116
117static cl::opt<bool> ClCheckStores("nsan-check-stores", cl::init(Val: true),
118 cl::desc("Check floating-point stores"),
119 cl::Hidden);
120
121static cl::opt<bool> ClCheckRet("nsan-check-ret", cl::init(Val: true),
122 cl::desc("Check floating-point return values"),
123 cl::Hidden);
124
125// LLVM may store constant floats as bitcasted ints.
126// It's not really necessary to shadow such stores,
127// if the shadow value is unknown the framework will re-extend it on load
128// anyway. Moreover, because of size collisions (e.g. bf16 vs f16) it is
129// impossible to determine the floating-point type based on the size.
130// However, for debugging purposes it can be useful to model such stores.
131static cl::opt<bool> ClPropagateNonFTConstStoresAsFT(
132 "nsan-propagate-non-ft-const-stores-as-ft",
133 cl::desc(
134 "Propagate non floating-point const stores as floating point values."
135 "For debugging purposes only"),
136 cl::Hidden);
137
138constexpr StringLiteral kNsanModuleCtorName("nsan.module_ctor");
139constexpr StringLiteral kNsanInitName("__nsan_init");
140
141// The following values must be kept in sync with the runtime.
142constexpr int kShadowScale = 2;
143constexpr int kMaxVectorWidth = 8;
144constexpr int kMaxNumArgs = 128;
145constexpr int kMaxShadowTypeSizeBytes = 16; // fp128
146
147namespace {
148
149// Defines the characteristics (type id, type, and floating-point semantics)
150// attached for all possible shadow types.
151class ShadowTypeConfig {
152public:
153 static std::unique_ptr<ShadowTypeConfig> fromNsanTypeId(char TypeId);
154
155 // The LLVM Type corresponding to the shadow type.
156 virtual Type *getType(LLVMContext &Context) const = 0;
157
158 // The nsan type id of the shadow type (`d`, `l`, `q`, ...).
159 virtual char getNsanTypeId() const = 0;
160
161 virtual ~ShadowTypeConfig() = default;
162};
163
164template <char NsanTypeId>
165class ShadowTypeConfigImpl : public ShadowTypeConfig {
166public:
167 char getNsanTypeId() const override { return NsanTypeId; }
168 static constexpr const char kNsanTypeId = NsanTypeId;
169};
170
171// `double` (`d`) shadow type.
172class F64ShadowConfig : public ShadowTypeConfigImpl<'d'> {
173 Type *getType(LLVMContext &Context) const override {
174 return Type::getDoubleTy(C&: Context);
175 }
176};
177
178// `x86_fp80` (`l`) shadow type: X86 long double.
179class F80ShadowConfig : public ShadowTypeConfigImpl<'l'> {
180 Type *getType(LLVMContext &Context) const override {
181 return Type::getX86_FP80Ty(C&: Context);
182 }
183};
184
185// `fp128` (`q`) shadow type.
186class F128ShadowConfig : public ShadowTypeConfigImpl<'q'> {
187 Type *getType(LLVMContext &Context) const override {
188 return Type::getFP128Ty(C&: Context);
189 }
190};
191
192// `ppc_fp128` (`e`) shadow type: IBM extended double with 106 bits of mantissa.
193class PPC128ShadowConfig : public ShadowTypeConfigImpl<'e'> {
194 Type *getType(LLVMContext &Context) const override {
195 return Type::getPPC_FP128Ty(C&: Context);
196 }
197};
198
199// Creates a ShadowTypeConfig given its type id.
200std::unique_ptr<ShadowTypeConfig>
201ShadowTypeConfig::fromNsanTypeId(const char TypeId) {
202 switch (TypeId) {
203 case F64ShadowConfig::kNsanTypeId:
204 return std::make_unique<F64ShadowConfig>();
205 case F80ShadowConfig::kNsanTypeId:
206 return std::make_unique<F80ShadowConfig>();
207 case F128ShadowConfig::kNsanTypeId:
208 return std::make_unique<F128ShadowConfig>();
209 case PPC128ShadowConfig::kNsanTypeId:
210 return std::make_unique<PPC128ShadowConfig>();
211 }
212 report_fatal_error(reason: "nsan: invalid shadow type id '" + Twine(TypeId) + "'");
213}
214
215// An enum corresponding to shadow value types. Used as indices in arrays, so
216// not an `enum class`.
217enum FTValueType { kFloat, kDouble, kLongDouble, kNumValueTypes };
218
219// If `FT` corresponds to a primitive FTValueType, return it.
220static std::optional<FTValueType> ftValueTypeFromType(Type *FT) {
221 if (FT->isFloatTy())
222 return kFloat;
223 if (FT->isDoubleTy())
224 return kDouble;
225 if (FT->isX86_FP80Ty())
226 return kLongDouble;
227 return {};
228}
229
230// Returns the LLVM type for an FTValueType.
231static Type *typeFromFTValueType(FTValueType VT, LLVMContext &Context) {
232 switch (VT) {
233 case kFloat:
234 return Type::getFloatTy(C&: Context);
235 case kDouble:
236 return Type::getDoubleTy(C&: Context);
237 case kLongDouble:
238 return Type::getX86_FP80Ty(C&: Context);
239 case kNumValueTypes:
240 return nullptr;
241 }
242 llvm_unreachable("Unhandled FTValueType enum");
243}
244
245// Returns the type name for an FTValueType.
246static const char *typeNameFromFTValueType(FTValueType VT) {
247 switch (VT) {
248 case kFloat:
249 return "float";
250 case kDouble:
251 return "double";
252 case kLongDouble:
253 return "longdouble";
254 case kNumValueTypes:
255 return nullptr;
256 }
257 llvm_unreachable("Unhandled FTValueType enum");
258}
259
260// A specific mapping configuration of application type to shadow type for nsan
261// (see -nsan-shadow-mapping flag).
262class MappingConfig {
263public:
264 explicit MappingConfig(LLVMContext &C) : Context(C) {
265 if (ClShadowMapping.size() != 3)
266 report_fatal_error(reason: "Invalid nsan mapping: " + Twine(ClShadowMapping));
267 unsigned ShadowTypeSizeBits[kNumValueTypes];
268 for (int VT = 0; VT < kNumValueTypes; ++VT) {
269 auto Config = ShadowTypeConfig::fromNsanTypeId(TypeId: ClShadowMapping[VT]);
270 if (!Config)
271 report_fatal_error(reason: "Failed to get ShadowTypeConfig for " +
272 Twine(ClShadowMapping[VT]));
273 const unsigned AppTypeSize =
274 typeFromFTValueType(VT: static_cast<FTValueType>(VT), Context)
275 ->getScalarSizeInBits();
276 const unsigned ShadowTypeSize =
277 Config->getType(Context)->getScalarSizeInBits();
278 // Check that the shadow type size is at most kShadowScale times the
279 // application type size, so that shadow memory compoutations are valid.
280 if (ShadowTypeSize > kShadowScale * AppTypeSize)
281 report_fatal_error(reason: "Invalid nsan mapping f" + Twine(AppTypeSize) +
282 "->f" + Twine(ShadowTypeSize) +
283 ": The shadow type size should be at most " +
284 Twine(kShadowScale) +
285 " times the application type size");
286 ShadowTypeSizeBits[VT] = ShadowTypeSize;
287 Configs[VT] = std::move(Config);
288 }
289
290 // Check that the mapping is monotonous. This is required because if one
291 // does an fpextend of `float->long double` in application code, nsan is
292 // going to do an fpextend of `shadow(float) -> shadow(long double)` in
293 // shadow code. This will fail in `qql` mode, since nsan would be
294 // fpextending `f128->long`, which is invalid.
295 // TODO: Relax this.
296 if (ShadowTypeSizeBits[kFloat] > ShadowTypeSizeBits[kDouble] ||
297 ShadowTypeSizeBits[kDouble] > ShadowTypeSizeBits[kLongDouble])
298 report_fatal_error(reason: "Invalid nsan mapping: { float->f" +
299 Twine(ShadowTypeSizeBits[kFloat]) + "; double->f" +
300 Twine(ShadowTypeSizeBits[kDouble]) +
301 "; long double->f" +
302 Twine(ShadowTypeSizeBits[kLongDouble]) + " }");
303 }
304
305 const ShadowTypeConfig &byValueType(FTValueType VT) const {
306 assert(VT < FTValueType::kNumValueTypes && "invalid value type");
307 return *Configs[VT];
308 }
309
310 // Returns the extended shadow type for a given application type.
311 Type *getExtendedFPType(Type *FT) const {
312 if (const auto VT = ftValueTypeFromType(FT))
313 return Configs[*VT]->getType(Context);
314 if (FT->isVectorTy()) {
315 auto *VecTy = cast<VectorType>(Val: FT);
316 // TODO: add support for scalable vector types.
317 if (VecTy->isScalableTy())
318 return nullptr;
319 Type *ExtendedScalar = getExtendedFPType(FT: VecTy->getElementType());
320 return ExtendedScalar
321 ? VectorType::get(ElementType: ExtendedScalar, EC: VecTy->getElementCount())
322 : nullptr;
323 }
324 return nullptr;
325 }
326
327private:
328 LLVMContext &Context;
329 std::unique_ptr<ShadowTypeConfig> Configs[FTValueType::kNumValueTypes];
330};
331
332// The memory extents of a type specifies how many elements of a given
333// FTValueType needs to be stored when storing this type.
334struct MemoryExtents {
335 FTValueType ValueType;
336 uint64_t NumElts;
337};
338
339static MemoryExtents getMemoryExtentsOrDie(Type *FT) {
340 if (const auto VT = ftValueTypeFromType(FT))
341 return {.ValueType: *VT, .NumElts: 1};
342 if (auto *VecTy = dyn_cast<VectorType>(Val: FT)) {
343 const auto ScalarExtents = getMemoryExtentsOrDie(FT: VecTy->getElementType());
344 return {.ValueType: ScalarExtents.ValueType,
345 .NumElts: ScalarExtents.NumElts * VecTy->getElementCount().getFixedValue()};
346 }
347 llvm_unreachable("invalid value type");
348}
349
350// The location of a check. Passed as parameters to runtime checking functions.
351class CheckLoc {
352public:
353 // Creates a location that references an application memory location.
354 static CheckLoc makeStore(Value *Address) {
355 CheckLoc Result(kStore);
356 Result.Address = Address;
357 return Result;
358 }
359 static CheckLoc makeLoad(Value *Address) {
360 CheckLoc Result(kLoad);
361 Result.Address = Address;
362 return Result;
363 }
364
365 // Creates a location that references an argument, given by id.
366 static CheckLoc makeArg(int ArgId) {
367 CheckLoc Result(kArg);
368 Result.ArgId = ArgId;
369 return Result;
370 }
371
372 // Creates a location that references the return value of a function.
373 static CheckLoc makeRet() { return CheckLoc(kRet); }
374
375 // Creates a location that references a vector insert.
376 static CheckLoc makeInsert() { return CheckLoc(kInsert); }
377
378 // Returns the CheckType of location this refers to, as an integer-typed LLVM
379 // IR value.
380 Value *getType(LLVMContext &C) const {
381 return ConstantInt::get(Ty: Type::getInt32Ty(C), V: static_cast<int>(CheckTy));
382 }
383
384 // Returns a CheckType-specific value representing details of the location
385 // (e.g. application address for loads or stores), as an `IntptrTy`-typed LLVM
386 // IR value.
387 Value *getValue(Type *IntptrTy, IRBuilder<> &Builder) const {
388 switch (CheckTy) {
389 case kUnknown:
390 llvm_unreachable("unknown type");
391 case kRet:
392 case kInsert:
393 return ConstantInt::get(Ty: IntptrTy, V: 0);
394 case kArg:
395 return ConstantInt::get(Ty: IntptrTy, V: ArgId);
396 case kLoad:
397 case kStore:
398 return Builder.CreatePtrToInt(V: Address, DestTy: IntptrTy);
399 }
400 llvm_unreachable("Unhandled CheckType enum");
401 }
402
403private:
404 // Must be kept in sync with the runtime,
405 // see compiler-rt/lib/nsan/nsan_stats.h
406 enum CheckType {
407 kUnknown = 0,
408 kRet,
409 kArg,
410 kLoad,
411 kStore,
412 kInsert,
413 };
414 explicit CheckLoc(CheckType CheckTy) : CheckTy(CheckTy) {}
415
416 Value *Address = nullptr;
417 const CheckType CheckTy;
418 int ArgId = -1;
419};
420
421// A map of LLVM IR values to shadow LLVM IR values.
422class ValueToShadowMap {
423public:
424 explicit ValueToShadowMap(const MappingConfig &Config) : Config(Config) {}
425
426 ValueToShadowMap(const ValueToShadowMap &) = delete;
427 ValueToShadowMap &operator=(const ValueToShadowMap &) = delete;
428
429 // Sets the shadow value for a value. Asserts that the value does not already
430 // have a value.
431 void setShadow(Value &V, Value &Shadow) {
432 [[maybe_unused]] const bool Inserted = Map.try_emplace(Key: &V, Args: &Shadow).second;
433 LLVM_DEBUG({
434 if (!Inserted) {
435 if (auto *I = dyn_cast<Instruction>(&V))
436 errs() << I->getFunction()->getName() << ": ";
437 errs() << "duplicate shadow (" << &V << "): ";
438 V.dump();
439 }
440 });
441 assert(Inserted && "duplicate shadow");
442 }
443
444 // Returns true if the value already has a shadow (including if the value is a
445 // constant). If true, calling getShadow() is valid.
446 bool hasShadow(Value *V) const {
447 return isa<Constant>(Val: V) || (Map.find(Val: V) != Map.end());
448 }
449
450 // Returns the shadow value for a given value. Asserts that the value has
451 // a shadow value. Lazily creates shadows for constant values.
452 Value *getShadow(Value *V) const {
453 if (Constant *C = dyn_cast<Constant>(Val: V))
454 return getShadowConstant(C);
455 return Map.find(Val: V)->second;
456 }
457
458 bool empty() const { return Map.empty(); }
459
460private:
461 // Extends a constant application value to its shadow counterpart.
462 APFloat extendConstantFP(APFloat CV, const fltSemantics &To) const {
463 bool LosesInfo = false;
464 CV.convert(ToSemantics: To, RM: APFloatBase::rmTowardZero, losesInfo: &LosesInfo);
465 return CV;
466 }
467
468 // Returns the shadow constant for the given application constant.
469 Constant *getShadowConstant(Constant *C) const {
470 if (UndefValue *U = dyn_cast<UndefValue>(Val: C)) {
471 return UndefValue::get(T: Config.getExtendedFPType(FT: U->getType()));
472 }
473 if (ConstantFP *CFP = dyn_cast<ConstantFP>(Val: C)) {
474 // Floating-point constants.
475 Type *Ty = Config.getExtendedFPType(FT: CFP->getType());
476 return ConstantFP::get(
477 Ty, V: extendConstantFP(CV: CFP->getValueAPF(), To: Ty->getFltSemantics()));
478 }
479 // Vector, array, or aggregate constants.
480 if (C->getType()->isVectorTy()) {
481 SmallVector<Constant *, 8> Elements;
482 for (int I = 0, E = cast<VectorType>(Val: C->getType())
483 ->getElementCount()
484 .getFixedValue();
485 I < E; ++I)
486 Elements.push_back(Elt: getShadowConstant(C: C->getAggregateElement(Elt: I)));
487 return ConstantVector::get(V: Elements);
488 }
489 llvm_unreachable("unimplemented");
490 }
491
492 const MappingConfig &Config;
493 DenseMap<Value *, Value *> Map;
494};
495
496/// Instantiating NumericalStabilitySanitizer inserts the nsan runtime library
497/// API function declarations into the module if they don't exist already.
498/// Instantiating ensures the __nsan_init function is in the list of global
499/// constructors for the module.
500class NumericalStabilitySanitizer {
501public:
502 NumericalStabilitySanitizer(Module &M);
503 bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI);
504
505private:
506 bool instrumentMemIntrinsic(MemIntrinsic *MI);
507 void maybeAddSuffixForNsanInterface(CallBase *CI);
508 bool addrPointsToConstantData(Value *Addr);
509 void maybeCreateShadowValue(Instruction &Root, const TargetLibraryInfo &TLI,
510 ValueToShadowMap &Map);
511 Value *createShadowValueWithOperandsAvailable(Instruction &Inst,
512 const TargetLibraryInfo &TLI,
513 const ValueToShadowMap &Map);
514 PHINode *maybeCreateShadowPhi(PHINode &Phi, const TargetLibraryInfo &TLI);
515 void createShadowArguments(Function &F, const TargetLibraryInfo &TLI,
516 ValueToShadowMap &Map);
517
518 void populateShadowStack(CallBase &CI, const TargetLibraryInfo &TLI,
519 const ValueToShadowMap &Map);
520
521 void propagateShadowValues(Instruction &Inst, const TargetLibraryInfo &TLI,
522 const ValueToShadowMap &Map);
523 Value *emitCheck(Value *V, Value *ShadowV, IRBuilder<> &Builder,
524 CheckLoc Loc);
525 Value *emitCheckInternal(Value *V, Value *ShadowV, IRBuilder<> &Builder,
526 CheckLoc Loc);
527 void emitFCmpCheck(FCmpInst &FCmp, const ValueToShadowMap &Map);
528
529 // Value creation handlers.
530 Value *handleLoad(LoadInst &Load, Type *VT, Type *ExtendedVT);
531 Value *handleCallBase(CallBase &Call, Type *VT, Type *ExtendedVT,
532 const TargetLibraryInfo &TLI,
533 const ValueToShadowMap &Map, IRBuilder<> &Builder);
534 Value *maybeHandleKnownCallBase(CallBase &Call, Type *VT, Type *ExtendedVT,
535 const TargetLibraryInfo &TLI,
536 const ValueToShadowMap &Map,
537 IRBuilder<> &Builder);
538 Value *handleTrunc(const FPTruncInst &Trunc, Type *VT, Type *ExtendedVT,
539 const ValueToShadowMap &Map, IRBuilder<> &Builder);
540 Value *handleExt(const FPExtInst &Ext, Type *VT, Type *ExtendedVT,
541 const ValueToShadowMap &Map, IRBuilder<> &Builder);
542
543 // Value propagation handlers.
544 void propagateFTStore(StoreInst &Store, Type *VT, Type *ExtendedVT,
545 const ValueToShadowMap &Map);
546 void propagateNonFTStore(StoreInst &Store, Type *VT,
547 const ValueToShadowMap &Map);
548
549 const DataLayout &DL;
550 LLVMContext &Context;
551 MappingConfig Config;
552 IntegerType *IntptrTy = nullptr;
553 FunctionCallee NsanGetShadowPtrForStore[FTValueType::kNumValueTypes] = {};
554 FunctionCallee NsanGetShadowPtrForLoad[FTValueType::kNumValueTypes] = {};
555 FunctionCallee NsanCheckValue[FTValueType::kNumValueTypes] = {};
556 FunctionCallee NsanFCmpFail[FTValueType::kNumValueTypes] = {};
557 FunctionCallee NsanCopyValues;
558 FunctionCallee NsanSetValueUnknown;
559 FunctionCallee NsanGetRawShadowTypePtr;
560 FunctionCallee NsanGetRawShadowPtr;
561 GlobalValue *NsanShadowRetTag = nullptr;
562
563 Type *NsanShadowRetType = nullptr;
564 GlobalValue *NsanShadowRetPtr = nullptr;
565
566 GlobalValue *NsanShadowArgsTag = nullptr;
567
568 Type *NsanShadowArgsType = nullptr;
569 GlobalValue *NsanShadowArgsPtr = nullptr;
570
571 std::optional<Regex> CheckFunctionsFilter;
572};
573} // end anonymous namespace
574
575PreservedAnalyses
576NumericalStabilitySanitizerPass::run(Module &M, ModuleAnalysisManager &MAM) {
577 getOrCreateSanitizerCtorAndInitFunctions(
578 M, CtorName: kNsanModuleCtorName, InitName: kNsanInitName, /*InitArgTypes=*/{},
579 /*InitArgs=*/{},
580 // This callback is invoked when the functions are created the first
581 // time. Hook them into the global ctors list in that case:
582 FunctionsCreatedCallback: [&](Function *Ctor, FunctionCallee) { appendToGlobalCtors(M, F: Ctor, Priority: 0); });
583
584 NumericalStabilitySanitizer Nsan(M);
585 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
586 for (Function &F : M)
587 Nsan.sanitizeFunction(F, TLI: FAM.getResult<TargetLibraryAnalysis>(IR&: F));
588
589 return PreservedAnalyses::none();
590}
591
592static GlobalValue *createThreadLocalGV(const char *Name, Module &M, Type *Ty) {
593 return dyn_cast<GlobalValue>(Val: M.getOrInsertGlobal(Name, Ty, CreateGlobalCallback: [&M, Ty, Name] {
594 return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
595 nullptr, Name, nullptr,
596 GlobalVariable::InitialExecTLSModel);
597 }));
598}
599
600NumericalStabilitySanitizer::NumericalStabilitySanitizer(Module &M)
601 : DL(M.getDataLayout()), Context(M.getContext()), Config(Context) {
602 IntptrTy = DL.getIntPtrType(C&: Context);
603 Type *PtrTy = PointerType::getUnqual(C&: Context);
604 Type *Int32Ty = Type::getInt32Ty(C&: Context);
605 Type *Int1Ty = Type::getInt1Ty(C&: Context);
606 Type *VoidTy = Type::getVoidTy(C&: Context);
607
608 AttributeList Attr;
609 Attr = Attr.addFnAttribute(C&: Context, Kind: Attribute::NoUnwind);
610 // Initialize the runtime values (functions and global variables).
611 for (int I = 0; I < kNumValueTypes; ++I) {
612 const FTValueType VT = static_cast<FTValueType>(I);
613 const char *VTName = typeNameFromFTValueType(VT);
614 Type *VTTy = typeFromFTValueType(VT, Context);
615
616 // Load/store.
617 const std::string GetterPrefix =
618 std::string("__nsan_get_shadow_ptr_for_") + VTName;
619 NsanGetShadowPtrForStore[VT] = M.getOrInsertFunction(
620 Name: GetterPrefix + "_store", AttributeList: Attr, RetTy: PtrTy, Args: PtrTy, Args: IntptrTy);
621 NsanGetShadowPtrForLoad[VT] = M.getOrInsertFunction(
622 Name: GetterPrefix + "_load", AttributeList: Attr, RetTy: PtrTy, Args: PtrTy, Args: IntptrTy);
623
624 // Check.
625 const auto &ShadowConfig = Config.byValueType(VT);
626 Type *ShadowTy = ShadowConfig.getType(Context);
627 NsanCheckValue[VT] =
628 M.getOrInsertFunction(Name: std::string("__nsan_internal_check_") + VTName +
629 "_" + ShadowConfig.getNsanTypeId(),
630 AttributeList: Attr, RetTy: Int32Ty, Args: VTTy, Args: ShadowTy, Args: Int32Ty, Args: IntptrTy);
631 NsanFCmpFail[VT] = M.getOrInsertFunction(
632 Name: std::string("__nsan_fcmp_fail_") + VTName + "_" +
633 ShadowConfig.getNsanTypeId(),
634 AttributeList: Attr, RetTy: VoidTy, Args: VTTy, Args: VTTy, Args: ShadowTy, Args: ShadowTy, Args: Int32Ty, Args: Int1Ty, Args: Int1Ty);
635 }
636
637 NsanCopyValues = M.getOrInsertFunction(Name: "__nsan_copy_values", AttributeList: Attr, RetTy: VoidTy,
638 Args: PtrTy, Args: PtrTy, Args: IntptrTy);
639 NsanSetValueUnknown = M.getOrInsertFunction(Name: "__nsan_set_value_unknown", AttributeList: Attr,
640 RetTy: VoidTy, Args: PtrTy, Args: IntptrTy);
641
642 // TODO: Add attributes nofree, nosync, readnone, readonly,
643 NsanGetRawShadowTypePtr = M.getOrInsertFunction(
644 Name: "__nsan_internal_get_raw_shadow_type_ptr", AttributeList: Attr, RetTy: PtrTy, Args: PtrTy);
645 NsanGetRawShadowPtr = M.getOrInsertFunction(
646 Name: "__nsan_internal_get_raw_shadow_ptr", AttributeList: Attr, RetTy: PtrTy, Args: PtrTy);
647
648 NsanShadowRetTag = createThreadLocalGV(Name: "__nsan_shadow_ret_tag", M, Ty: IntptrTy);
649
650 NsanShadowRetType = ArrayType::get(ElementType: Type::getInt8Ty(C&: Context),
651 NumElements: kMaxVectorWidth * kMaxShadowTypeSizeBytes);
652 NsanShadowRetPtr =
653 createThreadLocalGV(Name: "__nsan_shadow_ret_ptr", M, Ty: NsanShadowRetType);
654
655 NsanShadowArgsTag =
656 createThreadLocalGV(Name: "__nsan_shadow_args_tag", M, Ty: IntptrTy);
657
658 NsanShadowArgsType =
659 ArrayType::get(ElementType: Type::getInt8Ty(C&: Context),
660 NumElements: kMaxVectorWidth * kMaxNumArgs * kMaxShadowTypeSizeBytes);
661
662 NsanShadowArgsPtr =
663 createThreadLocalGV(Name: "__nsan_shadow_args_ptr", M, Ty: NsanShadowArgsType);
664
665 if (!ClCheckFunctionsFilter.empty()) {
666 Regex R = Regex(ClCheckFunctionsFilter);
667 std::string RegexError;
668 assert(R.isValid(RegexError));
669 CheckFunctionsFilter = std::move(R);
670 }
671}
672
673// Returns true if the given LLVM Value points to constant data (typically, a
674// global variable reference).
675bool NumericalStabilitySanitizer::addrPointsToConstantData(Value *Addr) {
676 // If this is a GEP, just analyze its pointer operand.
677 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Val: Addr))
678 Addr = GEP->getPointerOperand();
679
680 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Val: Addr))
681 return GV->isConstant();
682 return false;
683}
684
685// This instruments the function entry to create shadow arguments.
686// Pseudocode:
687// if (this_fn_ptr == __nsan_shadow_args_tag) {
688// s(arg0) = LOAD<sizeof(arg0)>(__nsan_shadow_args);
689// s(arg1) = LOAD<sizeof(arg1)>(__nsan_shadow_args + sizeof(arg0));
690// ...
691// __nsan_shadow_args_tag = 0;
692// } else {
693// s(arg0) = fext(arg0);
694// s(arg1) = fext(arg1);
695// ...
696// }
697void NumericalStabilitySanitizer::createShadowArguments(
698 Function &F, const TargetLibraryInfo &TLI, ValueToShadowMap &Map) {
699 assert(!F.getIntrinsicID() && "found a definition of an intrinsic");
700
701 // Do not bother if there are no FP args.
702 if (all_of(Range: F.args(), P: [this](const Argument &Arg) {
703 return Config.getExtendedFPType(FT: Arg.getType()) == nullptr;
704 }))
705 return;
706
707 IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHI());
708 // The function has shadow args if the shadow args tag matches the function
709 // address.
710 Value *HasShadowArgs = Builder.CreateICmpEQ(
711 LHS: Builder.CreateLoad(Ty: IntptrTy, Ptr: NsanShadowArgsTag, /*isVolatile=*/false),
712 RHS: Builder.CreatePtrToInt(V: &F, DestTy: IntptrTy));
713
714 unsigned ShadowArgsOffsetBytes = 0;
715 for (Argument &Arg : F.args()) {
716 Type *VT = Arg.getType();
717 Type *ExtendedVT = Config.getExtendedFPType(FT: VT);
718 if (ExtendedVT == nullptr)
719 continue; // Not an FT value.
720 Value *L = Builder.CreateAlignedLoad(
721 Ty: ExtendedVT,
722 Ptr: Builder.CreateConstGEP2_64(Ty: NsanShadowArgsType, Ptr: NsanShadowArgsPtr, Idx0: 0,
723 Idx1: ShadowArgsOffsetBytes),
724 Align: Align(1), /*isVolatile=*/false);
725 Value *Shadow = Builder.CreateSelect(C: HasShadowArgs, True: L,
726 False: Builder.CreateFPExt(V: &Arg, DestTy: ExtendedVT));
727 Map.setShadow(V&: Arg, Shadow&: *Shadow);
728 TypeSize SlotSize = DL.getTypeStoreSize(Ty: ExtendedVT);
729 assert(!SlotSize.isScalable() && "unsupported");
730 ShadowArgsOffsetBytes += SlotSize;
731 }
732 Builder.CreateStore(Val: ConstantInt::get(Ty: IntptrTy, V: 0), Ptr: NsanShadowArgsTag);
733}
734
735// Returns true if the instrumentation should emit code to check arguments
736// before a function call.
737static bool shouldCheckArgs(CallBase &CI, const TargetLibraryInfo &TLI,
738 const std::optional<Regex> &CheckFunctionsFilter) {
739
740 Function *Fn = CI.getCalledFunction();
741
742 if (CheckFunctionsFilter) {
743 // Skip checking args of indirect calls.
744 if (Fn == nullptr)
745 return false;
746 if (CheckFunctionsFilter->match(String: Fn->getName()))
747 return true;
748 return false;
749 }
750
751 if (Fn == nullptr)
752 return true; // Always check args of indirect calls.
753
754 // Never check nsan functions, the user called them for a reason.
755 if (Fn->getName().starts_with(Prefix: "__nsan_"))
756 return false;
757
758 const auto ID = Fn->getIntrinsicID();
759 LibFunc LFunc = LibFunc::NumLibFuncs;
760 // Always check args of unknown functions.
761 if (ID == Intrinsic::ID() && !TLI.getLibFunc(FDecl: *Fn, F&: LFunc))
762 return true;
763
764 // Do not check args of an `fabs` call that is used for a comparison.
765 // This is typically used for `fabs(a-b) < tolerance`, where what matters is
766 // the result of the comparison, which is already caught be the fcmp checks.
767 if (ID == Intrinsic::fabs || LFunc == LibFunc_fabsf ||
768 LFunc == LibFunc_fabs || LFunc == LibFunc_fabsl)
769 for (const auto &U : CI.users())
770 if (isa<CmpInst>(Val: U))
771 return false;
772
773 return true; // Default is check.
774}
775
776// Populates the shadow call stack (which contains shadow values for every
777// floating-point parameter to the function).
778void NumericalStabilitySanitizer::populateShadowStack(
779 CallBase &CI, const TargetLibraryInfo &TLI, const ValueToShadowMap &Map) {
780 // Do not create a shadow stack for inline asm.
781 if (CI.isInlineAsm())
782 return;
783
784 // Do not bother if there are no FP args.
785 if (all_of(Range: CI.operands(), P: [this](const Value *Arg) {
786 return Config.getExtendedFPType(FT: Arg->getType()) == nullptr;
787 }))
788 return;
789
790 IRBuilder<> Builder(&CI);
791 SmallVector<Value *, 8> ArgShadows;
792 const bool ShouldCheckArgs = shouldCheckArgs(CI, TLI, CheckFunctionsFilter);
793 for (auto [ArgIdx, Arg] : enumerate(First: CI.operands())) {
794 if (Config.getExtendedFPType(FT: Arg->getType()) == nullptr)
795 continue; // Not an FT value.
796 Value *ArgShadow = Map.getShadow(V: Arg);
797 ArgShadows.push_back(Elt: ShouldCheckArgs ? emitCheck(V: Arg, ShadowV: ArgShadow, Builder,
798 Loc: CheckLoc::makeArg(ArgId: ArgIdx))
799 : ArgShadow);
800 }
801
802 // Do not create shadow stacks for intrinsics/known lib funcs.
803 if (Function *Fn = CI.getCalledFunction()) {
804 LibFunc LFunc;
805 if (Fn->isIntrinsic() || TLI.getLibFunc(FDecl: *Fn, F&: LFunc))
806 return;
807 }
808
809 // Set the shadow stack tag.
810 Builder.CreateStore(Val: CI.getCalledOperand(), Ptr: NsanShadowArgsTag);
811 TypeSize ShadowArgsOffsetBytes = TypeSize::getFixed(ExactSize: 0);
812
813 unsigned ShadowArgId = 0;
814 for (const Value *Arg : CI.operands()) {
815 Type *VT = Arg->getType();
816 Type *ExtendedVT = Config.getExtendedFPType(FT: VT);
817 if (ExtendedVT == nullptr)
818 continue; // Not an FT value.
819 Builder.CreateAlignedStore(
820 Val: ArgShadows[ShadowArgId++],
821 Ptr: Builder.CreateConstGEP2_64(Ty: NsanShadowArgsType, Ptr: NsanShadowArgsPtr, Idx0: 0,
822 Idx1: ShadowArgsOffsetBytes),
823 Align: Align(1), /*isVolatile=*/false);
824 TypeSize SlotSize = DL.getTypeStoreSize(Ty: ExtendedVT);
825 assert(!SlotSize.isScalable() && "unsupported");
826 ShadowArgsOffsetBytes += SlotSize;
827 }
828}
829
830// Internal part of emitCheck(). Returns a value that indicates whether
831// computation should continue with the shadow or resume by re-fextending the
832// value.
833enum class ContinuationType { // Keep in sync with runtime.
834 ContinueWithShadow = 0,
835 ResumeFromValue = 1,
836};
837
838Value *NumericalStabilitySanitizer::emitCheckInternal(Value *V, Value *ShadowV,
839 IRBuilder<> &Builder,
840 CheckLoc Loc) {
841 // Do not emit checks for constant values, this is redundant.
842 if (isa<Constant>(Val: V))
843 return ConstantInt::get(
844 Ty: Builder.getInt32Ty(),
845 V: static_cast<int>(ContinuationType::ContinueWithShadow));
846
847 Type *Ty = V->getType();
848 if (const auto VT = ftValueTypeFromType(FT: Ty))
849 return Builder.CreateCall(
850 Callee: NsanCheckValue[*VT],
851 Args: {V, ShadowV, Loc.getType(C&: Context), Loc.getValue(IntptrTy, Builder)});
852
853 if (Ty->isVectorTy()) {
854 auto *VecTy = cast<VectorType>(Val: Ty);
855 // We currently skip scalable vector types in MappingConfig,
856 // thus we should not encounter any such types here.
857 assert(!VecTy->isScalableTy() &&
858 "Scalable vector types are not supported yet");
859 Value *CheckResult = nullptr;
860 for (int I = 0, E = VecTy->getElementCount().getFixedValue(); I < E; ++I) {
861 // We resume if any element resumes. Another option would be to create a
862 // vector shuffle with the array of ContinueWithShadow, but that is too
863 // complex.
864 Value *ExtractV = Builder.CreateExtractElement(Vec: V, Idx: I);
865 Value *ExtractShadowV = Builder.CreateExtractElement(Vec: ShadowV, Idx: I);
866 Value *ComponentCheckResult =
867 emitCheckInternal(V: ExtractV, ShadowV: ExtractShadowV, Builder, Loc);
868 CheckResult = CheckResult
869 ? Builder.CreateOr(LHS: CheckResult, RHS: ComponentCheckResult)
870 : ComponentCheckResult;
871 }
872 return CheckResult;
873 }
874 if (Ty->isArrayTy()) {
875 Value *CheckResult = nullptr;
876 for (auto I : seq(Size: Ty->getArrayNumElements())) {
877 Value *ExtractV = Builder.CreateExtractElement(Vec: V, Idx: I);
878 Value *ExtractShadowV = Builder.CreateExtractElement(Vec: ShadowV, Idx: I);
879 Value *ComponentCheckResult =
880 emitCheckInternal(V: ExtractV, ShadowV: ExtractShadowV, Builder, Loc);
881 CheckResult = CheckResult
882 ? Builder.CreateOr(LHS: CheckResult, RHS: ComponentCheckResult)
883 : ComponentCheckResult;
884 }
885 return CheckResult;
886 }
887 if (Ty->isStructTy()) {
888 Value *CheckResult = nullptr;
889 for (auto I : seq(Size: Ty->getStructNumElements())) {
890 if (Config.getExtendedFPType(FT: Ty->getStructElementType(N: I)) == nullptr)
891 continue; // Only check FT values.
892 Value *ExtractV = Builder.CreateExtractValue(Agg: V, Idxs: I);
893 Value *ExtractShadowV = Builder.CreateExtractElement(Vec: ShadowV, Idx: I);
894 Value *ComponentCheckResult =
895 emitCheckInternal(V: ExtractV, ShadowV: ExtractShadowV, Builder, Loc);
896 CheckResult = CheckResult
897 ? Builder.CreateOr(LHS: CheckResult, RHS: ComponentCheckResult)
898 : ComponentCheckResult;
899 }
900 if (!CheckResult)
901 return ConstantInt::get(
902 Ty: Builder.getInt32Ty(),
903 V: static_cast<int>(ContinuationType::ContinueWithShadow));
904 return CheckResult;
905 }
906
907 llvm_unreachable("not implemented");
908}
909
910// Inserts a runtime check of V against its shadow value ShadowV.
911// We check values whenever they escape: on return, call, stores, and
912// insertvalue.
913// Returns the shadow value that should be used to continue the computations,
914// depending on the answer from the runtime.
915// TODO: Should we check on select ? phi ?
916Value *NumericalStabilitySanitizer::emitCheck(Value *V, Value *ShadowV,
917 IRBuilder<> &Builder,
918 CheckLoc Loc) {
919 // Do not emit checks for constant values, this is redundant.
920 if (isa<Constant>(Val: V))
921 return ShadowV;
922
923 if (Instruction *Inst = dyn_cast<Instruction>(Val: V)) {
924 Function *F = Inst->getFunction();
925 if (CheckFunctionsFilter && !CheckFunctionsFilter->match(String: F->getName())) {
926 return ShadowV;
927 }
928 }
929
930 Value *CheckResult = emitCheckInternal(V, ShadowV, Builder, Loc);
931 Value *ICmpEQ = Builder.CreateICmpEQ(
932 LHS: CheckResult,
933 RHS: ConstantInt::get(Ty: Builder.getInt32Ty(),
934 V: static_cast<int>(ContinuationType::ResumeFromValue)));
935 return Builder.CreateSelect(
936 C: ICmpEQ, True: Builder.CreateFPExt(V, DestTy: Config.getExtendedFPType(FT: V->getType())),
937 False: ShadowV);
938}
939
940// Inserts a check that fcmp on shadow values are consistent with that on base
941// values.
942void NumericalStabilitySanitizer::emitFCmpCheck(FCmpInst &FCmp,
943 const ValueToShadowMap &Map) {
944 if (!ClInstrumentFCmp)
945 return;
946
947 Function *F = FCmp.getFunction();
948 if (CheckFunctionsFilter && !CheckFunctionsFilter->match(String: F->getName()))
949 return;
950
951 Value *LHS = FCmp.getOperand(i_nocapture: 0);
952 if (Config.getExtendedFPType(FT: LHS->getType()) == nullptr)
953 return;
954 Value *RHS = FCmp.getOperand(i_nocapture: 1);
955
956 // Split the basic block. On mismatch, we'll jump to the new basic block with
957 // a call to the runtime for error reporting.
958 BasicBlock *FCmpBB = FCmp.getParent();
959 BasicBlock *NextBB = FCmpBB->splitBasicBlock(I: FCmp.getNextNode());
960 // Remove the newly created terminator unconditional branch.
961 FCmpBB->back().eraseFromParent();
962 BasicBlock *FailBB =
963 BasicBlock::Create(Context, Name: "", Parent: FCmpBB->getParent(), InsertBefore: NextBB);
964
965 // Create the shadow fcmp and comparison between the fcmps.
966 IRBuilder<> FCmpBuilder(FCmpBB);
967 FCmpBuilder.SetCurrentDebugLocation(FCmp.getDebugLoc());
968 Value *ShadowLHS = Map.getShadow(V: LHS);
969 Value *ShadowRHS = Map.getShadow(V: RHS);
970 // See comment on ClTruncateFCmpEq.
971 if (FCmp.isEquality() && ClTruncateFCmpEq) {
972 Type *Ty = ShadowLHS->getType();
973 ShadowLHS = FCmpBuilder.CreateFPExt(
974 V: FCmpBuilder.CreateFPTrunc(V: ShadowLHS, DestTy: LHS->getType()), DestTy: Ty);
975 ShadowRHS = FCmpBuilder.CreateFPExt(
976 V: FCmpBuilder.CreateFPTrunc(V: ShadowRHS, DestTy: RHS->getType()), DestTy: Ty);
977 }
978 Value *ShadowFCmp =
979 FCmpBuilder.CreateFCmp(P: FCmp.getPredicate(), LHS: ShadowLHS, RHS: ShadowRHS);
980 Value *OriginalAndShadowFcmpMatch =
981 FCmpBuilder.CreateICmpEQ(LHS: &FCmp, RHS: ShadowFCmp);
982
983 if (OriginalAndShadowFcmpMatch->getType()->isVectorTy()) {
984 // If we have a vector type, `OriginalAndShadowFcmpMatch` is a vector of i1,
985 // where an element is true if the corresponding elements in original and
986 // shadow are the same. We want all elements to be 1.
987 OriginalAndShadowFcmpMatch =
988 FCmpBuilder.CreateAndReduce(Src: OriginalAndShadowFcmpMatch);
989 }
990
991 // Use MDBuilder(*C).createLikelyBranchWeights() because "match" is the common
992 // case.
993 FCmpBuilder.CreateCondBr(Cond: OriginalAndShadowFcmpMatch, True: NextBB, False: FailBB,
994 BranchWeights: MDBuilder(Context).createLikelyBranchWeights());
995
996 // Fill in FailBB.
997 IRBuilder<> FailBuilder(FailBB);
998 FailBuilder.SetCurrentDebugLocation(FCmp.getDebugLoc());
999
1000 const auto EmitFailCall = [this, &FCmp, &FCmpBuilder,
1001 &FailBuilder](Value *L, Value *R, Value *ShadowL,
1002 Value *ShadowR, Value *Result,
1003 Value *ShadowResult) {
1004 Type *FT = L->getType();
1005 FunctionCallee *Callee = nullptr;
1006 if (FT->isFloatTy()) {
1007 Callee = &(NsanFCmpFail[kFloat]);
1008 } else if (FT->isDoubleTy()) {
1009 Callee = &(NsanFCmpFail[kDouble]);
1010 } else if (FT->isX86_FP80Ty()) {
1011 // TODO: make NsanFCmpFailLongDouble work.
1012 Callee = &(NsanFCmpFail[kDouble]);
1013 L = FailBuilder.CreateFPTrunc(V: L, DestTy: Type::getDoubleTy(C&: Context));
1014 R = FailBuilder.CreateFPTrunc(V: L, DestTy: Type::getDoubleTy(C&: Context));
1015 } else {
1016 llvm_unreachable("not implemented");
1017 }
1018 FailBuilder.CreateCall(Callee: *Callee, Args: {L, R, ShadowL, ShadowR,
1019 ConstantInt::get(Ty: FCmpBuilder.getInt32Ty(),
1020 V: FCmp.getPredicate()),
1021 Result, ShadowResult});
1022 };
1023 if (LHS->getType()->isVectorTy()) {
1024 for (int I = 0, E = cast<VectorType>(Val: LHS->getType())
1025 ->getElementCount()
1026 .getFixedValue();
1027 I < E; ++I) {
1028 Value *ExtractLHS = FailBuilder.CreateExtractElement(Vec: LHS, Idx: I);
1029 Value *ExtractRHS = FailBuilder.CreateExtractElement(Vec: RHS, Idx: I);
1030 Value *ExtractShaodwLHS = FailBuilder.CreateExtractElement(Vec: ShadowLHS, Idx: I);
1031 Value *ExtractShaodwRHS = FailBuilder.CreateExtractElement(Vec: ShadowRHS, Idx: I);
1032 Value *ExtractFCmp = FailBuilder.CreateExtractElement(Vec: &FCmp, Idx: I);
1033 Value *ExtractShadowFCmp =
1034 FailBuilder.CreateExtractElement(Vec: ShadowFCmp, Idx: I);
1035 EmitFailCall(ExtractLHS, ExtractRHS, ExtractShaodwLHS, ExtractShaodwRHS,
1036 ExtractFCmp, ExtractShadowFCmp);
1037 }
1038 } else {
1039 EmitFailCall(LHS, RHS, ShadowLHS, ShadowRHS, &FCmp, ShadowFCmp);
1040 }
1041 FailBuilder.CreateBr(Dest: NextBB);
1042
1043 ++NumInstrumentedFCmp;
1044}
1045
1046// Creates a shadow phi value for any phi that defines a value of FT type.
1047PHINode *NumericalStabilitySanitizer::maybeCreateShadowPhi(
1048 PHINode &Phi, const TargetLibraryInfo &TLI) {
1049 Type *VT = Phi.getType();
1050 Type *ExtendedVT = Config.getExtendedFPType(FT: VT);
1051 if (ExtendedVT == nullptr)
1052 return nullptr; // Not an FT value.
1053 // The phi operands are shadow values and are not available when the phi is
1054 // created. They will be populated in a final phase, once all shadow values
1055 // have been created.
1056 PHINode *Shadow = PHINode::Create(Ty: ExtendedVT, NumReservedValues: Phi.getNumIncomingValues());
1057 Shadow->insertAfter(InsertPos: &Phi);
1058 return Shadow;
1059}
1060
1061Value *NumericalStabilitySanitizer::handleLoad(LoadInst &Load, Type *VT,
1062 Type *ExtendedVT) {
1063 IRBuilder<> Builder(Load.getNextNode());
1064 Builder.SetCurrentDebugLocation(Load.getDebugLoc());
1065 if (addrPointsToConstantData(Addr: Load.getPointerOperand())) {
1066 // No need to look into the shadow memory, the value is a constant. Just
1067 // convert from FT to 2FT.
1068 return Builder.CreateFPExt(V: &Load, DestTy: ExtendedVT);
1069 }
1070
1071 // if (%shadowptr == &)
1072 // %shadow = fpext %v
1073 // else
1074 // %shadow = load (ptrcast %shadow_ptr))
1075 // Considered options here:
1076 // - Have `NsanGetShadowPtrForLoad` return a fixed address
1077 // &__nsan_unknown_value_shadow_address that is valid to load from, and
1078 // use a select. This has the advantage that the generated IR is simpler.
1079 // - Have `NsanGetShadowPtrForLoad` return nullptr. Because `select` does
1080 // not short-circuit, dereferencing the returned pointer is no longer an
1081 // option, have to split and create a separate basic block. This has the
1082 // advantage of being easier to debug because it crashes if we ever mess
1083 // up.
1084
1085 const auto Extents = getMemoryExtentsOrDie(FT: VT);
1086 Value *ShadowPtr = Builder.CreateCall(
1087 Callee: NsanGetShadowPtrForLoad[Extents.ValueType],
1088 Args: {Load.getPointerOperand(), ConstantInt::get(Ty: IntptrTy, V: Extents.NumElts)});
1089 ++NumInstrumentedFTLoads;
1090
1091 // Split the basic block.
1092 BasicBlock *LoadBB = Load.getParent();
1093 BasicBlock *NextBB = LoadBB->splitBasicBlock(I: Builder.GetInsertPoint());
1094 // Create the two options for creating the shadow value.
1095 BasicBlock *ShadowLoadBB =
1096 BasicBlock::Create(Context, Name: "", Parent: LoadBB->getParent(), InsertBefore: NextBB);
1097 BasicBlock *FExtBB =
1098 BasicBlock::Create(Context, Name: "", Parent: LoadBB->getParent(), InsertBefore: NextBB);
1099
1100 // Replace the newly created terminator unconditional branch by a conditional
1101 // branch to one of the options.
1102 {
1103 LoadBB->back().eraseFromParent();
1104 IRBuilder<> LoadBBBuilder(LoadBB); // The old builder has been invalidated.
1105 LoadBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1106 LoadBBBuilder.CreateCondBr(Cond: LoadBBBuilder.CreateIsNull(Arg: ShadowPtr), True: FExtBB,
1107 False: ShadowLoadBB);
1108 }
1109
1110 // Fill in ShadowLoadBB.
1111 IRBuilder<> ShadowLoadBBBuilder(ShadowLoadBB);
1112 ShadowLoadBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1113 Value *ShadowLoad = ShadowLoadBBBuilder.CreateAlignedLoad(
1114 Ty: ExtendedVT, Ptr: ShadowPtr, Align: Align(1), isVolatile: Load.isVolatile());
1115 if (ClCheckLoads) {
1116 ShadowLoad = emitCheck(V: &Load, ShadowV: ShadowLoad, Builder&: ShadowLoadBBBuilder,
1117 Loc: CheckLoc::makeLoad(Address: Load.getPointerOperand()));
1118 }
1119 ShadowLoadBBBuilder.CreateBr(Dest: NextBB);
1120
1121 // Fill in FExtBB.
1122 IRBuilder<> FExtBBBuilder(FExtBB);
1123 FExtBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1124 Value *FExt = FExtBBBuilder.CreateFPExt(V: &Load, DestTy: ExtendedVT);
1125 FExtBBBuilder.CreateBr(Dest: NextBB);
1126
1127 // The shadow value come from any of the options.
1128 IRBuilder<> NextBBBuilder(&*NextBB->begin());
1129 NextBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1130 PHINode *ShadowPhi = NextBBBuilder.CreatePHI(Ty: ExtendedVT, NumReservedValues: 2);
1131 ShadowPhi->addIncoming(V: ShadowLoad, BB: ShadowLoadBB);
1132 ShadowPhi->addIncoming(V: FExt, BB: FExtBB);
1133 return ShadowPhi;
1134}
1135
1136Value *NumericalStabilitySanitizer::handleTrunc(const FPTruncInst &Trunc,
1137 Type *VT, Type *ExtendedVT,
1138 const ValueToShadowMap &Map,
1139 IRBuilder<> &Builder) {
1140 Value *OrigSource = Trunc.getOperand(i_nocapture: 0);
1141 Type *OrigSourceTy = OrigSource->getType();
1142 Type *ExtendedSourceTy = Config.getExtendedFPType(FT: OrigSourceTy);
1143
1144 // When truncating:
1145 // - (A) If the source has a shadow, we truncate from the shadow, else we
1146 // truncate from the original source.
1147 // - (B) If the shadow of the source is larger than the shadow of the dest,
1148 // we still need a truncate. Else, the shadow of the source is the same
1149 // type as the shadow of the dest (because mappings are non-decreasing), so
1150 // we don't need to emit a truncate.
1151 // Examples,
1152 // with a mapping of {f32->f64;f64->f80;f80->f128}
1153 // fptrunc double %1 to float -> fptrunc x86_fp80 s(%1) to double
1154 // fptrunc x86_fp80 %1 to float -> fptrunc fp128 s(%1) to double
1155 // fptrunc fp128 %1 to float -> fptrunc fp128 %1 to double
1156 // fptrunc x86_fp80 %1 to double -> x86_fp80 s(%1)
1157 // fptrunc fp128 %1 to double -> fptrunc fp128 %1 to x86_fp80
1158 // fptrunc fp128 %1 to x86_fp80 -> fp128 %1
1159 // with a mapping of {f32->f64;f64->f128;f80->f128}
1160 // fptrunc double %1 to float -> fptrunc fp128 s(%1) to double
1161 // fptrunc x86_fp80 %1 to float -> fptrunc fp128 s(%1) to double
1162 // fptrunc fp128 %1 to float -> fptrunc fp128 %1 to double
1163 // fptrunc x86_fp80 %1 to double -> fp128 %1
1164 // fptrunc fp128 %1 to double -> fp128 %1
1165 // fptrunc fp128 %1 to x86_fp80 -> fp128 %1
1166 // with a mapping of {f32->f32;f64->f32;f80->f64}
1167 // fptrunc double %1 to float -> float s(%1)
1168 // fptrunc x86_fp80 %1 to float -> fptrunc double s(%1) to float
1169 // fptrunc fp128 %1 to float -> fptrunc fp128 %1 to float
1170 // fptrunc x86_fp80 %1 to double -> fptrunc double s(%1) to float
1171 // fptrunc fp128 %1 to double -> fptrunc fp128 %1 to float
1172 // fptrunc fp128 %1 to x86_fp80 -> fptrunc fp128 %1 to double
1173
1174 // See (A) above.
1175 Value *Source = ExtendedSourceTy ? Map.getShadow(V: OrigSource) : OrigSource;
1176 Type *SourceTy = ExtendedSourceTy ? ExtendedSourceTy : OrigSourceTy;
1177 // See (B) above.
1178 if (SourceTy == ExtendedVT)
1179 return Source;
1180
1181 return Builder.CreateFPTrunc(V: Source, DestTy: ExtendedVT);
1182}
1183
1184Value *NumericalStabilitySanitizer::handleExt(const FPExtInst &Ext, Type *VT,
1185 Type *ExtendedVT,
1186 const ValueToShadowMap &Map,
1187 IRBuilder<> &Builder) {
1188 Value *OrigSource = Ext.getOperand(i_nocapture: 0);
1189 Type *OrigSourceTy = OrigSource->getType();
1190 Type *ExtendedSourceTy = Config.getExtendedFPType(FT: OrigSourceTy);
1191 // When extending:
1192 // - (A) If the source has a shadow, we extend from the shadow, else we
1193 // extend from the original source.
1194 // - (B) If the shadow of the dest is larger than the shadow of the source,
1195 // we still need an extend. Else, the shadow of the source is the same
1196 // type as the shadow of the dest (because mappings are non-decreasing), so
1197 // we don't need to emit an extend.
1198 // Examples,
1199 // with a mapping of {f32->f64;f64->f80;f80->f128}
1200 // fpext half %1 to float -> fpext half %1 to double
1201 // fpext half %1 to double -> fpext half %1 to x86_fp80
1202 // fpext half %1 to x86_fp80 -> fpext half %1 to fp128
1203 // fpext float %1 to double -> double s(%1)
1204 // fpext float %1 to x86_fp80 -> fpext double s(%1) to fp128
1205 // fpext double %1 to x86_fp80 -> fpext x86_fp80 s(%1) to fp128
1206 // with a mapping of {f32->f64;f64->f128;f80->f128}
1207 // fpext half %1 to float -> fpext half %1 to double
1208 // fpext half %1 to double -> fpext half %1 to fp128
1209 // fpext half %1 to x86_fp80 -> fpext half %1 to fp128
1210 // fpext float %1 to double -> fpext double s(%1) to fp128
1211 // fpext float %1 to x86_fp80 -> fpext double s(%1) to fp128
1212 // fpext double %1 to x86_fp80 -> fp128 s(%1)
1213 // with a mapping of {f32->f32;f64->f32;f80->f64}
1214 // fpext half %1 to float -> fpext half %1 to float
1215 // fpext half %1 to double -> fpext half %1 to float
1216 // fpext half %1 to x86_fp80 -> fpext half %1 to double
1217 // fpext float %1 to double -> s(%1)
1218 // fpext float %1 to x86_fp80 -> fpext float s(%1) to double
1219 // fpext double %1 to x86_fp80 -> fpext float s(%1) to double
1220
1221 // See (A) above.
1222 Value *Source = ExtendedSourceTy ? Map.getShadow(V: OrigSource) : OrigSource;
1223 Type *SourceTy = ExtendedSourceTy ? ExtendedSourceTy : OrigSourceTy;
1224 // See (B) above.
1225 if (SourceTy == ExtendedVT)
1226 return Source;
1227
1228 return Builder.CreateFPExt(V: Source, DestTy: ExtendedVT);
1229}
1230
1231namespace {
1232// TODO: This should be tablegen-ed.
1233struct KnownIntrinsic {
1234 struct WidenedIntrinsic {
1235 const char *NarrowName;
1236 Intrinsic::ID ID; // wide id.
1237 using FnTypeFactory = FunctionType *(*)(LLVMContext &);
1238 FnTypeFactory MakeFnTy;
1239 };
1240
1241 static const char *get(LibFunc LFunc);
1242
1243 // Given an intrinsic with an `FT` argument, try to find a wider intrinsic
1244 // that applies the same operation on the shadow argument.
1245 // Options are:
1246 // - pass in the ID and full function type,
1247 // - pass in the name, which includes the function type through mangling.
1248 static const WidenedIntrinsic *widen(StringRef Name);
1249
1250private:
1251 struct LFEntry {
1252 LibFunc LFunc;
1253 const char *IntrinsicName;
1254 };
1255 static const LFEntry kLibfuncIntrinsics[];
1256
1257 static const WidenedIntrinsic kWidenedIntrinsics[];
1258};
1259} // namespace
1260
1261static FunctionType *makeDoubleDouble(LLVMContext &C) {
1262 return FunctionType::get(Result: Type::getDoubleTy(C), Params: {Type::getDoubleTy(C)}, isVarArg: false);
1263}
1264
1265static FunctionType *makeX86FP80X86FP80(LLVMContext &C) {
1266 return FunctionType::get(Result: Type::getX86_FP80Ty(C), Params: {Type::getX86_FP80Ty(C)},
1267 isVarArg: false);
1268}
1269
1270static FunctionType *makeDoubleDoubleI32(LLVMContext &C) {
1271 return FunctionType::get(Result: Type::getDoubleTy(C),
1272 Params: {Type::getDoubleTy(C), Type::getInt32Ty(C)}, isVarArg: false);
1273}
1274
1275static FunctionType *makeX86FP80X86FP80I32(LLVMContext &C) {
1276 return FunctionType::get(Result: Type::getX86_FP80Ty(C),
1277 Params: {Type::getX86_FP80Ty(C), Type::getInt32Ty(C)},
1278 isVarArg: false);
1279}
1280
1281static FunctionType *makeDoubleDoubleDouble(LLVMContext &C) {
1282 return FunctionType::get(Result: Type::getDoubleTy(C),
1283 Params: {Type::getDoubleTy(C), Type::getDoubleTy(C)}, isVarArg: false);
1284}
1285
1286static FunctionType *makeX86FP80X86FP80X86FP80(LLVMContext &C) {
1287 return FunctionType::get(Result: Type::getX86_FP80Ty(C),
1288 Params: {Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C)},
1289 isVarArg: false);
1290}
1291
1292static FunctionType *makeDoubleDoubleDoubleDouble(LLVMContext &C) {
1293 return FunctionType::get(
1294 Result: Type::getDoubleTy(C),
1295 Params: {Type::getDoubleTy(C), Type::getDoubleTy(C), Type::getDoubleTy(C)},
1296 isVarArg: false);
1297}
1298
1299static FunctionType *makeX86FP80X86FP80X86FP80X86FP80(LLVMContext &C) {
1300 return FunctionType::get(
1301 Result: Type::getX86_FP80Ty(C),
1302 Params: {Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C)},
1303 isVarArg: false);
1304}
1305
1306const KnownIntrinsic::WidenedIntrinsic KnownIntrinsic::kWidenedIntrinsics[] = {
1307 // TODO: Right now we ignore vector intrinsics.
1308 // This is hard because we have to model the semantics of the intrinsics,
1309 // e.g. llvm.x86.sse2.min.sd means extract first element, min, insert back.
1310 // Intrinsics that take any non-vector FT types:
1311 // NOTE: Right now because of
1312 // https://github.com/llvm/llvm-project/issues/44744
1313 // for f128 we need to use makeX86FP80X86FP80 (go to a lower precision and
1314 // come back).
1315 {.NarrowName: "llvm.sqrt.f32", .ID: Intrinsic::sqrt, .MakeFnTy: makeDoubleDouble},
1316 {.NarrowName: "llvm.sqrt.f64", .ID: Intrinsic::sqrt, .MakeFnTy: makeX86FP80X86FP80},
1317 {.NarrowName: "llvm.sqrt.f80", .ID: Intrinsic::sqrt, .MakeFnTy: makeX86FP80X86FP80},
1318 {.NarrowName: "llvm.powi.f32", .ID: Intrinsic::powi, .MakeFnTy: makeDoubleDoubleI32},
1319 {.NarrowName: "llvm.powi.f64", .ID: Intrinsic::powi, .MakeFnTy: makeX86FP80X86FP80I32},
1320 {.NarrowName: "llvm.powi.f80", .ID: Intrinsic::powi, .MakeFnTy: makeX86FP80X86FP80I32},
1321 {.NarrowName: "llvm.sin.f32", .ID: Intrinsic::sin, .MakeFnTy: makeDoubleDouble},
1322 {.NarrowName: "llvm.sin.f64", .ID: Intrinsic::sin, .MakeFnTy: makeX86FP80X86FP80},
1323 {.NarrowName: "llvm.sin.f80", .ID: Intrinsic::sin, .MakeFnTy: makeX86FP80X86FP80},
1324 {.NarrowName: "llvm.cos.f32", .ID: Intrinsic::cos, .MakeFnTy: makeDoubleDouble},
1325 {.NarrowName: "llvm.cos.f64", .ID: Intrinsic::cos, .MakeFnTy: makeX86FP80X86FP80},
1326 {.NarrowName: "llvm.cos.f80", .ID: Intrinsic::cos, .MakeFnTy: makeX86FP80X86FP80},
1327 {.NarrowName: "llvm.pow.f32", .ID: Intrinsic::pow, .MakeFnTy: makeDoubleDoubleDouble},
1328 {.NarrowName: "llvm.pow.f64", .ID: Intrinsic::pow, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1329 {.NarrowName: "llvm.pow.f80", .ID: Intrinsic::pow, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1330 {.NarrowName: "llvm.exp.f32", .ID: Intrinsic::exp, .MakeFnTy: makeDoubleDouble},
1331 {.NarrowName: "llvm.exp.f64", .ID: Intrinsic::exp, .MakeFnTy: makeX86FP80X86FP80},
1332 {.NarrowName: "llvm.exp.f80", .ID: Intrinsic::exp, .MakeFnTy: makeX86FP80X86FP80},
1333 {.NarrowName: "llvm.exp2.f32", .ID: Intrinsic::exp2, .MakeFnTy: makeDoubleDouble},
1334 {.NarrowName: "llvm.exp2.f64", .ID: Intrinsic::exp2, .MakeFnTy: makeX86FP80X86FP80},
1335 {.NarrowName: "llvm.exp2.f80", .ID: Intrinsic::exp2, .MakeFnTy: makeX86FP80X86FP80},
1336 {.NarrowName: "llvm.log.f32", .ID: Intrinsic::log, .MakeFnTy: makeDoubleDouble},
1337 {.NarrowName: "llvm.log.f64", .ID: Intrinsic::log, .MakeFnTy: makeX86FP80X86FP80},
1338 {.NarrowName: "llvm.log.f80", .ID: Intrinsic::log, .MakeFnTy: makeX86FP80X86FP80},
1339 {.NarrowName: "llvm.log10.f32", .ID: Intrinsic::log10, .MakeFnTy: makeDoubleDouble},
1340 {.NarrowName: "llvm.log10.f64", .ID: Intrinsic::log10, .MakeFnTy: makeX86FP80X86FP80},
1341 {.NarrowName: "llvm.log10.f80", .ID: Intrinsic::log10, .MakeFnTy: makeX86FP80X86FP80},
1342 {.NarrowName: "llvm.log2.f32", .ID: Intrinsic::log2, .MakeFnTy: makeDoubleDouble},
1343 {.NarrowName: "llvm.log2.f64", .ID: Intrinsic::log2, .MakeFnTy: makeX86FP80X86FP80},
1344 {.NarrowName: "llvm.log2.f80", .ID: Intrinsic::log2, .MakeFnTy: makeX86FP80X86FP80},
1345 {.NarrowName: "llvm.fma.f32", .ID: Intrinsic::fma, .MakeFnTy: makeDoubleDoubleDoubleDouble},
1346
1347 {.NarrowName: "llvm.fmuladd.f32", .ID: Intrinsic::fmuladd, .MakeFnTy: makeDoubleDoubleDoubleDouble},
1348
1349 {.NarrowName: "llvm.fma.f64", .ID: Intrinsic::fma, .MakeFnTy: makeX86FP80X86FP80X86FP80X86FP80},
1350
1351 {.NarrowName: "llvm.fmuladd.f64", .ID: Intrinsic::fma, .MakeFnTy: makeX86FP80X86FP80X86FP80X86FP80},
1352
1353 {.NarrowName: "llvm.fma.f80", .ID: Intrinsic::fma, .MakeFnTy: makeX86FP80X86FP80X86FP80X86FP80},
1354 {.NarrowName: "llvm.fabs.f32", .ID: Intrinsic::fabs, .MakeFnTy: makeDoubleDouble},
1355 {.NarrowName: "llvm.fabs.f64", .ID: Intrinsic::fabs, .MakeFnTy: makeX86FP80X86FP80},
1356 {.NarrowName: "llvm.fabs.f80", .ID: Intrinsic::fabs, .MakeFnTy: makeX86FP80X86FP80},
1357 {.NarrowName: "llvm.minnum.f32", .ID: Intrinsic::minnum, .MakeFnTy: makeDoubleDoubleDouble},
1358 {.NarrowName: "llvm.minnum.f64", .ID: Intrinsic::minnum, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1359 {.NarrowName: "llvm.minnum.f80", .ID: Intrinsic::minnum, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1360 {.NarrowName: "llvm.maxnum.f32", .ID: Intrinsic::maxnum, .MakeFnTy: makeDoubleDoubleDouble},
1361 {.NarrowName: "llvm.maxnum.f64", .ID: Intrinsic::maxnum, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1362 {.NarrowName: "llvm.maxnum.f80", .ID: Intrinsic::maxnum, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1363 {.NarrowName: "llvm.minimum.f32", .ID: Intrinsic::minimum, .MakeFnTy: makeDoubleDoubleDouble},
1364 {.NarrowName: "llvm.minimum.f64", .ID: Intrinsic::minimum, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1365 {.NarrowName: "llvm.minimum.f80", .ID: Intrinsic::minimum, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1366 {.NarrowName: "llvm.maximum.f32", .ID: Intrinsic::maximum, .MakeFnTy: makeDoubleDoubleDouble},
1367 {.NarrowName: "llvm.maximum.f64", .ID: Intrinsic::maximum, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1368 {.NarrowName: "llvm.maximum.f80", .ID: Intrinsic::maximum, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1369 {.NarrowName: "llvm.copysign.f32", .ID: Intrinsic::copysign, .MakeFnTy: makeDoubleDoubleDouble},
1370 {.NarrowName: "llvm.copysign.f64", .ID: Intrinsic::copysign, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1371 {.NarrowName: "llvm.copysign.f80", .ID: Intrinsic::copysign, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1372 {.NarrowName: "llvm.floor.f32", .ID: Intrinsic::floor, .MakeFnTy: makeDoubleDouble},
1373 {.NarrowName: "llvm.floor.f64", .ID: Intrinsic::floor, .MakeFnTy: makeX86FP80X86FP80},
1374 {.NarrowName: "llvm.floor.f80", .ID: Intrinsic::floor, .MakeFnTy: makeX86FP80X86FP80},
1375 {.NarrowName: "llvm.ceil.f32", .ID: Intrinsic::ceil, .MakeFnTy: makeDoubleDouble},
1376 {.NarrowName: "llvm.ceil.f64", .ID: Intrinsic::ceil, .MakeFnTy: makeX86FP80X86FP80},
1377 {.NarrowName: "llvm.ceil.f80", .ID: Intrinsic::ceil, .MakeFnTy: makeX86FP80X86FP80},
1378 {.NarrowName: "llvm.trunc.f32", .ID: Intrinsic::trunc, .MakeFnTy: makeDoubleDouble},
1379 {.NarrowName: "llvm.trunc.f64", .ID: Intrinsic::trunc, .MakeFnTy: makeX86FP80X86FP80},
1380 {.NarrowName: "llvm.trunc.f80", .ID: Intrinsic::trunc, .MakeFnTy: makeX86FP80X86FP80},
1381 {.NarrowName: "llvm.rint.f32", .ID: Intrinsic::rint, .MakeFnTy: makeDoubleDouble},
1382 {.NarrowName: "llvm.rint.f64", .ID: Intrinsic::rint, .MakeFnTy: makeX86FP80X86FP80},
1383 {.NarrowName: "llvm.rint.f80", .ID: Intrinsic::rint, .MakeFnTy: makeX86FP80X86FP80},
1384 {.NarrowName: "llvm.nearbyint.f32", .ID: Intrinsic::nearbyint, .MakeFnTy: makeDoubleDouble},
1385 {.NarrowName: "llvm.nearbyint.f64", .ID: Intrinsic::nearbyint, .MakeFnTy: makeX86FP80X86FP80},
1386 {.NarrowName: "llvm.nearbyin80f64", .ID: Intrinsic::nearbyint, .MakeFnTy: makeX86FP80X86FP80},
1387 {.NarrowName: "llvm.round.f32", .ID: Intrinsic::round, .MakeFnTy: makeDoubleDouble},
1388 {.NarrowName: "llvm.round.f64", .ID: Intrinsic::round, .MakeFnTy: makeX86FP80X86FP80},
1389 {.NarrowName: "llvm.round.f80", .ID: Intrinsic::round, .MakeFnTy: makeX86FP80X86FP80},
1390 {.NarrowName: "llvm.lround.f32", .ID: Intrinsic::lround, .MakeFnTy: makeDoubleDouble},
1391 {.NarrowName: "llvm.lround.f64", .ID: Intrinsic::lround, .MakeFnTy: makeX86FP80X86FP80},
1392 {.NarrowName: "llvm.lround.f80", .ID: Intrinsic::lround, .MakeFnTy: makeX86FP80X86FP80},
1393 {.NarrowName: "llvm.llround.f32", .ID: Intrinsic::llround, .MakeFnTy: makeDoubleDouble},
1394 {.NarrowName: "llvm.llround.f64", .ID: Intrinsic::llround, .MakeFnTy: makeX86FP80X86FP80},
1395 {.NarrowName: "llvm.llround.f80", .ID: Intrinsic::llround, .MakeFnTy: makeX86FP80X86FP80},
1396 {.NarrowName: "llvm.lrint.f32", .ID: Intrinsic::lrint, .MakeFnTy: makeDoubleDouble},
1397 {.NarrowName: "llvm.lrint.f64", .ID: Intrinsic::lrint, .MakeFnTy: makeX86FP80X86FP80},
1398 {.NarrowName: "llvm.lrint.f80", .ID: Intrinsic::lrint, .MakeFnTy: makeX86FP80X86FP80},
1399 {.NarrowName: "llvm.llrint.f32", .ID: Intrinsic::llrint, .MakeFnTy: makeDoubleDouble},
1400 {.NarrowName: "llvm.llrint.f64", .ID: Intrinsic::llrint, .MakeFnTy: makeX86FP80X86FP80},
1401 {.NarrowName: "llvm.llrint.f80", .ID: Intrinsic::llrint, .MakeFnTy: makeX86FP80X86FP80},
1402};
1403
1404const KnownIntrinsic::LFEntry KnownIntrinsic::kLibfuncIntrinsics[] = {
1405 {.LFunc: LibFunc_sqrtf, .IntrinsicName: "llvm.sqrt.f32"},
1406 {.LFunc: LibFunc_sqrt, .IntrinsicName: "llvm.sqrt.f64"},
1407 {.LFunc: LibFunc_sqrtl, .IntrinsicName: "llvm.sqrt.f80"},
1408 {.LFunc: LibFunc_sinf, .IntrinsicName: "llvm.sin.f32"},
1409 {.LFunc: LibFunc_sin, .IntrinsicName: "llvm.sin.f64"},
1410 {.LFunc: LibFunc_sinl, .IntrinsicName: "llvm.sin.f80"},
1411 {.LFunc: LibFunc_cosf, .IntrinsicName: "llvm.cos.f32"},
1412 {.LFunc: LibFunc_cos, .IntrinsicName: "llvm.cos.f64"},
1413 {.LFunc: LibFunc_cosl, .IntrinsicName: "llvm.cos.f80"},
1414 {.LFunc: LibFunc_powf, .IntrinsicName: "llvm.pow.f32"},
1415 {.LFunc: LibFunc_pow, .IntrinsicName: "llvm.pow.f64"},
1416 {.LFunc: LibFunc_powl, .IntrinsicName: "llvm.pow.f80"},
1417 {.LFunc: LibFunc_expf, .IntrinsicName: "llvm.exp.f32"},
1418 {.LFunc: LibFunc_exp, .IntrinsicName: "llvm.exp.f64"},
1419 {.LFunc: LibFunc_expl, .IntrinsicName: "llvm.exp.f80"},
1420 {.LFunc: LibFunc_exp2f, .IntrinsicName: "llvm.exp2.f32"},
1421 {.LFunc: LibFunc_exp2, .IntrinsicName: "llvm.exp2.f64"},
1422 {.LFunc: LibFunc_exp2l, .IntrinsicName: "llvm.exp2.f80"},
1423 {.LFunc: LibFunc_logf, .IntrinsicName: "llvm.log.f32"},
1424 {.LFunc: LibFunc_log, .IntrinsicName: "llvm.log.f64"},
1425 {.LFunc: LibFunc_logl, .IntrinsicName: "llvm.log.f80"},
1426 {.LFunc: LibFunc_log10f, .IntrinsicName: "llvm.log10.f32"},
1427 {.LFunc: LibFunc_log10, .IntrinsicName: "llvm.log10.f64"},
1428 {.LFunc: LibFunc_log10l, .IntrinsicName: "llvm.log10.f80"},
1429 {.LFunc: LibFunc_log2f, .IntrinsicName: "llvm.log2.f32"},
1430 {.LFunc: LibFunc_log2, .IntrinsicName: "llvm.log2.f64"},
1431 {.LFunc: LibFunc_log2l, .IntrinsicName: "llvm.log2.f80"},
1432 {.LFunc: LibFunc_fabsf, .IntrinsicName: "llvm.fabs.f32"},
1433 {.LFunc: LibFunc_fabs, .IntrinsicName: "llvm.fabs.f64"},
1434 {.LFunc: LibFunc_fabsl, .IntrinsicName: "llvm.fabs.f80"},
1435 {.LFunc: LibFunc_copysignf, .IntrinsicName: "llvm.copysign.f32"},
1436 {.LFunc: LibFunc_copysign, .IntrinsicName: "llvm.copysign.f64"},
1437 {.LFunc: LibFunc_copysignl, .IntrinsicName: "llvm.copysign.f80"},
1438 {.LFunc: LibFunc_floorf, .IntrinsicName: "llvm.floor.f32"},
1439 {.LFunc: LibFunc_floor, .IntrinsicName: "llvm.floor.f64"},
1440 {.LFunc: LibFunc_floorl, .IntrinsicName: "llvm.floor.f80"},
1441 {.LFunc: LibFunc_fmaxf, .IntrinsicName: "llvm.maxnum.f32"},
1442 {.LFunc: LibFunc_fmax, .IntrinsicName: "llvm.maxnum.f64"},
1443 {.LFunc: LibFunc_fmaxl, .IntrinsicName: "llvm.maxnum.f80"},
1444 {.LFunc: LibFunc_fminf, .IntrinsicName: "llvm.minnum.f32"},
1445 {.LFunc: LibFunc_fmin, .IntrinsicName: "llvm.minnum.f64"},
1446 {.LFunc: LibFunc_fminl, .IntrinsicName: "llvm.minnum.f80"},
1447 {.LFunc: LibFunc_ceilf, .IntrinsicName: "llvm.ceil.f32"},
1448 {.LFunc: LibFunc_ceil, .IntrinsicName: "llvm.ceil.f64"},
1449 {.LFunc: LibFunc_ceill, .IntrinsicName: "llvm.ceil.f80"},
1450 {.LFunc: LibFunc_truncf, .IntrinsicName: "llvm.trunc.f32"},
1451 {.LFunc: LibFunc_trunc, .IntrinsicName: "llvm.trunc.f64"},
1452 {.LFunc: LibFunc_truncl, .IntrinsicName: "llvm.trunc.f80"},
1453 {.LFunc: LibFunc_rintf, .IntrinsicName: "llvm.rint.f32"},
1454 {.LFunc: LibFunc_rint, .IntrinsicName: "llvm.rint.f64"},
1455 {.LFunc: LibFunc_rintl, .IntrinsicName: "llvm.rint.f80"},
1456 {.LFunc: LibFunc_nearbyintf, .IntrinsicName: "llvm.nearbyint.f32"},
1457 {.LFunc: LibFunc_nearbyint, .IntrinsicName: "llvm.nearbyint.f64"},
1458 {.LFunc: LibFunc_nearbyintl, .IntrinsicName: "llvm.nearbyint.f80"},
1459 {.LFunc: LibFunc_roundf, .IntrinsicName: "llvm.round.f32"},
1460 {.LFunc: LibFunc_round, .IntrinsicName: "llvm.round.f64"},
1461 {.LFunc: LibFunc_roundl, .IntrinsicName: "llvm.round.f80"},
1462};
1463
1464const char *KnownIntrinsic::get(LibFunc LFunc) {
1465 for (const auto &E : kLibfuncIntrinsics) {
1466 if (E.LFunc == LFunc)
1467 return E.IntrinsicName;
1468 }
1469 return nullptr;
1470}
1471
1472const KnownIntrinsic::WidenedIntrinsic *KnownIntrinsic::widen(StringRef Name) {
1473 for (const auto &E : kWidenedIntrinsics) {
1474 if (E.NarrowName == Name)
1475 return &E;
1476 }
1477 return nullptr;
1478}
1479
1480// Returns the name of the LLVM intrinsic corresponding to the given function.
1481static const char *getIntrinsicFromLibfunc(Function &Fn, Type *VT,
1482 const TargetLibraryInfo &TLI) {
1483 LibFunc LFunc;
1484 if (!TLI.getLibFunc(FDecl: Fn, F&: LFunc))
1485 return nullptr;
1486
1487 if (const char *Name = KnownIntrinsic::get(LFunc))
1488 return Name;
1489
1490 LLVM_DEBUG(errs() << "TODO: LibFunc: " << TLI.getName(LFunc) << "\n");
1491 return nullptr;
1492}
1493
1494// Try to handle a known function call.
1495Value *NumericalStabilitySanitizer::maybeHandleKnownCallBase(
1496 CallBase &Call, Type *VT, Type *ExtendedVT, const TargetLibraryInfo &TLI,
1497 const ValueToShadowMap &Map, IRBuilder<> &Builder) {
1498 Function *Fn = Call.getCalledFunction();
1499 if (Fn == nullptr)
1500 return nullptr;
1501
1502 Intrinsic::ID WidenedId = Intrinsic::ID();
1503 FunctionType *WidenedFnTy = nullptr;
1504 if (const auto ID = Fn->getIntrinsicID()) {
1505 const auto *Widened = KnownIntrinsic::widen(Name: Fn->getName());
1506 if (Widened) {
1507 WidenedId = Widened->ID;
1508 WidenedFnTy = Widened->MakeFnTy(Context);
1509 } else {
1510 // If we don't know how to widen the intrinsic, we have no choice but to
1511 // call the non-wide version on a truncated shadow and extend again
1512 // afterwards.
1513 WidenedId = ID;
1514 WidenedFnTy = Fn->getFunctionType();
1515 }
1516 } else if (const char *Name = getIntrinsicFromLibfunc(Fn&: *Fn, VT, TLI)) {
1517 // We might have a call to a library function that we can replace with a
1518 // wider Intrinsic.
1519 const auto *Widened = KnownIntrinsic::widen(Name);
1520 assert(Widened && "make sure KnownIntrinsic entries are consistent");
1521 WidenedId = Widened->ID;
1522 WidenedFnTy = Widened->MakeFnTy(Context);
1523 } else {
1524 // This is not a known library function or intrinsic.
1525 return nullptr;
1526 }
1527
1528 // Check that the widened intrinsic is valid.
1529 SmallVector<Intrinsic::IITDescriptor, 8> Table;
1530 getIntrinsicInfoTableEntries(id: WidenedId, T&: Table);
1531 SmallVector<Type *, 4> ArgTys;
1532 ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
1533 [[maybe_unused]] Intrinsic::MatchIntrinsicTypesResult MatchResult =
1534 Intrinsic::matchIntrinsicSignature(FTy: WidenedFnTy, Infos&: TableRef, ArgTys);
1535 assert(MatchResult == Intrinsic::MatchIntrinsicTypes_Match &&
1536 "invalid widened intrinsic");
1537 // For known intrinsic functions, we create a second call to the same
1538 // intrinsic with a different type.
1539 SmallVector<Value *, 4> Args;
1540 // The last operand is the intrinsic itself, skip it.
1541 for (unsigned I = 0, E = Call.getNumOperands() - 1; I < E; ++I) {
1542 Value *Arg = Call.getOperand(i_nocapture: I);
1543 Type *OrigArgTy = Arg->getType();
1544 Type *IntrinsicArgTy = WidenedFnTy->getParamType(i: I);
1545 if (OrigArgTy == IntrinsicArgTy) {
1546 Args.push_back(Elt: Arg); // The arg is passed as is.
1547 continue;
1548 }
1549 Type *ShadowArgTy = Config.getExtendedFPType(FT: Arg->getType());
1550 assert(ShadowArgTy &&
1551 "don't know how to get the shadow value for a non-FT");
1552 Value *Shadow = Map.getShadow(V: Arg);
1553 if (ShadowArgTy == IntrinsicArgTy) {
1554 // The shadow is the right type for the intrinsic.
1555 assert(Shadow->getType() == ShadowArgTy);
1556 Args.push_back(Elt: Shadow);
1557 continue;
1558 }
1559 // There is no intrinsic with his level of precision, truncate the shadow.
1560 Args.push_back(Elt: Builder.CreateFPTrunc(V: Shadow, DestTy: IntrinsicArgTy));
1561 }
1562 Value *IntrinsicCall = Builder.CreateIntrinsic(ID: WidenedId, Types: ArgTys, Args);
1563 return WidenedFnTy->getReturnType() == ExtendedVT
1564 ? IntrinsicCall
1565 : Builder.CreateFPExt(V: IntrinsicCall, DestTy: ExtendedVT);
1566}
1567
1568// Handle a CallBase, i.e. a function call, an inline asm sequence, or an
1569// invoke.
1570Value *NumericalStabilitySanitizer::handleCallBase(CallBase &Call, Type *VT,
1571 Type *ExtendedVT,
1572 const TargetLibraryInfo &TLI,
1573 const ValueToShadowMap &Map,
1574 IRBuilder<> &Builder) {
1575 // We cannot look inside inline asm, just expand the result again.
1576 if (Call.isInlineAsm())
1577 return Builder.CreateFPExt(V: &Call, DestTy: ExtendedVT);
1578
1579 // Intrinsics and library functions (e.g. sin, exp) are handled
1580 // specifically, because we know their semantics and can do better than
1581 // blindly calling them (e.g. compute the sinus in the actual shadow domain).
1582 if (Value *V =
1583 maybeHandleKnownCallBase(Call, VT, ExtendedVT, TLI, Map, Builder))
1584 return V;
1585
1586 // If the return tag matches that of the called function, read the extended
1587 // return value from the shadow ret ptr. Else, just extend the return value.
1588 Value *L =
1589 Builder.CreateLoad(Ty: IntptrTy, Ptr: NsanShadowRetTag, /*isVolatile=*/false);
1590 Value *HasShadowRet = Builder.CreateICmpEQ(
1591 LHS: L, RHS: Builder.CreatePtrToInt(V: Call.getCalledOperand(), DestTy: IntptrTy));
1592
1593 Value *ShadowRetVal = Builder.CreateLoad(
1594 Ty: ExtendedVT,
1595 Ptr: Builder.CreateConstGEP2_64(Ty: NsanShadowRetType, Ptr: NsanShadowRetPtr, Idx0: 0, Idx1: 0),
1596 /*isVolatile=*/false);
1597 Value *Shadow = Builder.CreateSelect(C: HasShadowRet, True: ShadowRetVal,
1598 False: Builder.CreateFPExt(V: &Call, DestTy: ExtendedVT));
1599 ++NumInstrumentedFTCalls;
1600 return Shadow;
1601}
1602
1603// Creates a shadow value for the given FT value. At that point all operands are
1604// guaranteed to be available.
1605Value *NumericalStabilitySanitizer::createShadowValueWithOperandsAvailable(
1606 Instruction &Inst, const TargetLibraryInfo &TLI,
1607 const ValueToShadowMap &Map) {
1608 Type *VT = Inst.getType();
1609 Type *ExtendedVT = Config.getExtendedFPType(FT: VT);
1610 assert(ExtendedVT != nullptr && "trying to create a shadow for a non-FT");
1611
1612 if (auto *Load = dyn_cast<LoadInst>(Val: &Inst))
1613 return handleLoad(Load&: *Load, VT, ExtendedVT);
1614
1615 if (auto *Call = dyn_cast<CallInst>(Val: &Inst)) {
1616 // Insert after the call.
1617 BasicBlock::iterator It(Inst);
1618 IRBuilder<> Builder(Call->getParent(), ++It);
1619 Builder.SetCurrentDebugLocation(Call->getDebugLoc());
1620 return handleCallBase(Call&: *Call, VT, ExtendedVT, TLI, Map, Builder);
1621 }
1622
1623 if (auto *Invoke = dyn_cast<InvokeInst>(Val: &Inst)) {
1624 // The Invoke terminates the basic block, create a new basic block in
1625 // between the successful invoke and the next block.
1626 BasicBlock *InvokeBB = Invoke->getParent();
1627 BasicBlock *NextBB = Invoke->getNormalDest();
1628 BasicBlock *NewBB =
1629 BasicBlock::Create(Context, Name: "", Parent: NextBB->getParent(), InsertBefore: NextBB);
1630 Inst.replaceSuccessorWith(OldBB: NextBB, NewBB);
1631
1632 IRBuilder<> Builder(NewBB);
1633 Builder.SetCurrentDebugLocation(Invoke->getDebugLoc());
1634 Value *Shadow = handleCallBase(Call&: *Invoke, VT, ExtendedVT, TLI, Map, Builder);
1635 Builder.CreateBr(Dest: NextBB);
1636 NewBB->replaceSuccessorsPhiUsesWith(Old: InvokeBB, New: NewBB);
1637 return Shadow;
1638 }
1639
1640 IRBuilder<> Builder(Inst.getNextNode());
1641 Builder.SetCurrentDebugLocation(Inst.getDebugLoc());
1642
1643 if (auto *Trunc = dyn_cast<FPTruncInst>(Val: &Inst))
1644 return handleTrunc(Trunc: *Trunc, VT, ExtendedVT, Map, Builder);
1645 if (auto *Ext = dyn_cast<FPExtInst>(Val: &Inst))
1646 return handleExt(Ext: *Ext, VT, ExtendedVT, Map, Builder);
1647
1648 if (auto *UnaryOp = dyn_cast<UnaryOperator>(Val: &Inst))
1649 return Builder.CreateUnOp(Opc: UnaryOp->getOpcode(),
1650 V: Map.getShadow(V: UnaryOp->getOperand(i_nocapture: 0)));
1651
1652 if (auto *BinOp = dyn_cast<BinaryOperator>(Val: &Inst))
1653 return Builder.CreateBinOp(Opc: BinOp->getOpcode(),
1654 LHS: Map.getShadow(V: BinOp->getOperand(i_nocapture: 0)),
1655 RHS: Map.getShadow(V: BinOp->getOperand(i_nocapture: 1)));
1656
1657 if (isa<UIToFPInst>(Val: &Inst) || isa<SIToFPInst>(Val: &Inst)) {
1658 auto *Cast = dyn_cast<CastInst>(Val: &Inst);
1659 return Builder.CreateCast(Op: Cast->getOpcode(), V: Cast->getOperand(i_nocapture: 0),
1660 DestTy: ExtendedVT);
1661 }
1662
1663 if (auto *S = dyn_cast<SelectInst>(Val: &Inst))
1664 return Builder.CreateSelect(C: S->getCondition(),
1665 True: Map.getShadow(V: S->getTrueValue()),
1666 False: Map.getShadow(V: S->getFalseValue()));
1667
1668 if (auto *Extract = dyn_cast<ExtractElementInst>(Val: &Inst))
1669 return Builder.CreateExtractElement(
1670 Vec: Map.getShadow(V: Extract->getVectorOperand()), Idx: Extract->getIndexOperand());
1671
1672 if (auto *Insert = dyn_cast<InsertElementInst>(Val: &Inst))
1673 return Builder.CreateInsertElement(Vec: Map.getShadow(V: Insert->getOperand(i_nocapture: 0)),
1674 NewElt: Map.getShadow(V: Insert->getOperand(i_nocapture: 1)),
1675 Idx: Insert->getOperand(i_nocapture: 2));
1676
1677 if (auto *Shuffle = dyn_cast<ShuffleVectorInst>(Val: &Inst))
1678 return Builder.CreateShuffleVector(V1: Map.getShadow(V: Shuffle->getOperand(i_nocapture: 0)),
1679 V2: Map.getShadow(V: Shuffle->getOperand(i_nocapture: 1)),
1680 Mask: Shuffle->getShuffleMask());
1681 // TODO: We could make aggregate object first class citizens. For now we
1682 // just extend the extracted value.
1683 if (auto *Extract = dyn_cast<ExtractValueInst>(Val: &Inst))
1684 return Builder.CreateFPExt(V: Extract, DestTy: ExtendedVT);
1685
1686 if (auto *BC = dyn_cast<BitCastInst>(Val: &Inst))
1687 return Builder.CreateFPExt(V: BC, DestTy: ExtendedVT);
1688
1689 report_fatal_error(reason: "Unimplemented support for " +
1690 Twine(Inst.getOpcodeName()));
1691}
1692
1693// Creates a shadow value for an instruction that defines a value of FT type.
1694// FT operands that do not already have shadow values are created recursively.
1695// The DFS is guaranteed to not loop as phis and arguments already have
1696// shadows.
1697void NumericalStabilitySanitizer::maybeCreateShadowValue(
1698 Instruction &Root, const TargetLibraryInfo &TLI, ValueToShadowMap &Map) {
1699 Type *VT = Root.getType();
1700 Type *ExtendedVT = Config.getExtendedFPType(FT: VT);
1701 if (ExtendedVT == nullptr)
1702 return; // Not an FT value.
1703
1704 if (Map.hasShadow(V: &Root))
1705 return; // Shadow already exists.
1706
1707 assert(!isa<PHINode>(Root) && "phi nodes should already have shadows");
1708
1709 std::vector<Instruction *> DfsStack(1, &Root);
1710 while (!DfsStack.empty()) {
1711 // Ensure that all operands to the instruction have shadows before
1712 // proceeding.
1713 Instruction *I = DfsStack.back();
1714 // The shadow for the instruction might have been created deeper in the DFS,
1715 // see `forward_use_with_two_uses` test.
1716 if (Map.hasShadow(V: I)) {
1717 DfsStack.pop_back();
1718 continue;
1719 }
1720
1721 bool MissingShadow = false;
1722 for (Value *Op : I->operands()) {
1723 Type *VT = Op->getType();
1724 if (!Config.getExtendedFPType(FT: VT))
1725 continue; // Not an FT value.
1726 if (Map.hasShadow(V: Op))
1727 continue; // Shadow is already available.
1728 MissingShadow = true;
1729 DfsStack.push_back(x: cast<Instruction>(Val: Op));
1730 }
1731 if (MissingShadow)
1732 continue; // Process operands and come back to this instruction later.
1733
1734 // All operands have shadows. Create a shadow for the current value.
1735 Value *Shadow = createShadowValueWithOperandsAvailable(Inst&: *I, TLI, Map);
1736 Map.setShadow(V&: *I, Shadow&: *Shadow);
1737 DfsStack.pop_back();
1738 }
1739}
1740
1741// A floating-point store needs its value and type written to shadow memory.
1742void NumericalStabilitySanitizer::propagateFTStore(
1743 StoreInst &Store, Type *VT, Type *ExtendedVT, const ValueToShadowMap &Map) {
1744 Value *StoredValue = Store.getValueOperand();
1745 IRBuilder<> Builder(&Store);
1746 Builder.SetCurrentDebugLocation(Store.getDebugLoc());
1747 const auto Extents = getMemoryExtentsOrDie(FT: VT);
1748 Value *ShadowPtr = Builder.CreateCall(
1749 Callee: NsanGetShadowPtrForStore[Extents.ValueType],
1750 Args: {Store.getPointerOperand(), ConstantInt::get(Ty: IntptrTy, V: Extents.NumElts)});
1751
1752 Value *StoredShadow = Map.getShadow(V: StoredValue);
1753 if (!Store.getParent()->getParent()->hasOptNone()) {
1754 // Only check stores when optimizing, because non-optimized code generates
1755 // too many stores to the stack, creating false positives.
1756 if (ClCheckStores) {
1757 StoredShadow = emitCheck(V: StoredValue, ShadowV: StoredShadow, Builder,
1758 Loc: CheckLoc::makeStore(Address: Store.getPointerOperand()));
1759 ++NumInstrumentedFTStores;
1760 }
1761 }
1762
1763 Builder.CreateAlignedStore(Val: StoredShadow, Ptr: ShadowPtr, Align: Align(1),
1764 isVolatile: Store.isVolatile());
1765}
1766
1767// A non-ft store needs to invalidate shadow memory. Exceptions are:
1768// - memory transfers of floating-point data through other pointer types (llvm
1769// optimization passes transform `*(float*)a = *(float*)b` into
1770// `*(i32*)a = *(i32*)b` ). These have the same semantics as memcpy.
1771// - Writes of FT-sized constants. LLVM likes to do float stores as bitcasted
1772// ints. Note that this is not really necessary because if the value is
1773// unknown the framework will re-extend it on load anyway. It just felt
1774// easier to debug tests with vectors of FTs.
1775void NumericalStabilitySanitizer::propagateNonFTStore(
1776 StoreInst &Store, Type *VT, const ValueToShadowMap &Map) {
1777 Value *PtrOp = Store.getPointerOperand();
1778 IRBuilder<> Builder(Store.getNextNode());
1779 Builder.SetCurrentDebugLocation(Store.getDebugLoc());
1780 Value *Dst = PtrOp;
1781 TypeSize SlotSize = DL.getTypeStoreSize(Ty: VT);
1782 assert(!SlotSize.isScalable() && "unsupported");
1783 const auto LoadSizeBytes = SlotSize.getFixedValue();
1784 Value *ValueSize = Constant::getIntegerValue(
1785 Ty: IntptrTy, V: APInt(IntptrTy->getPrimitiveSizeInBits(), LoadSizeBytes));
1786
1787 ++NumInstrumentedNonFTStores;
1788 Value *StoredValue = Store.getValueOperand();
1789 if (LoadInst *Load = dyn_cast<LoadInst>(Val: StoredValue)) {
1790 // TODO: Handle the case when the value is from a phi.
1791 // This is a memory transfer with memcpy semantics. Copy the type and
1792 // value from the source. Note that we cannot use __nsan_copy_values()
1793 // here, because that will not work when there is a write to memory in
1794 // between the load and the store, e.g. in the case of a swap.
1795 Type *ShadowTypeIntTy = Type::getIntNTy(C&: Context, N: 8 * LoadSizeBytes);
1796 Type *ShadowValueIntTy =
1797 Type::getIntNTy(C&: Context, N: 8 * kShadowScale * LoadSizeBytes);
1798 IRBuilder<> LoadBuilder(Load->getNextNode());
1799 Builder.SetCurrentDebugLocation(Store.getDebugLoc());
1800 Value *LoadSrc = Load->getPointerOperand();
1801 // Read the shadow type and value at load time. The type has the same size
1802 // as the FT value, the value has twice its size.
1803 // TODO: cache them to avoid re-creating them when a load is used by
1804 // several stores. Maybe create them like the FT shadows when a load is
1805 // encountered.
1806 Value *RawShadowType = LoadBuilder.CreateAlignedLoad(
1807 Ty: ShadowTypeIntTy,
1808 Ptr: LoadBuilder.CreateCall(Callee: NsanGetRawShadowTypePtr, Args: {LoadSrc}), Align: Align(1),
1809 /*isVolatile=*/false);
1810 Value *RawShadowValue = LoadBuilder.CreateAlignedLoad(
1811 Ty: ShadowValueIntTy,
1812 Ptr: LoadBuilder.CreateCall(Callee: NsanGetRawShadowPtr, Args: {LoadSrc}), Align: Align(1),
1813 /*isVolatile=*/false);
1814
1815 // Write back the shadow type and value at store time.
1816 Builder.CreateAlignedStore(
1817 Val: RawShadowType, Ptr: Builder.CreateCall(Callee: NsanGetRawShadowTypePtr, Args: {Dst}),
1818 Align: Align(1),
1819 /*isVolatile=*/false);
1820 Builder.CreateAlignedStore(Val: RawShadowValue,
1821 Ptr: Builder.CreateCall(Callee: NsanGetRawShadowPtr, Args: {Dst}),
1822 Align: Align(1),
1823 /*isVolatile=*/false);
1824
1825 ++NumInstrumentedNonFTMemcpyStores;
1826 return;
1827 }
1828 // ClPropagateNonFTConstStoresAsFT is by default false.
1829 if (Constant *C; ClPropagateNonFTConstStoresAsFT &&
1830 (C = dyn_cast<Constant>(Val: StoredValue))) {
1831 // This might be a fp constant stored as an int. Bitcast and store if it has
1832 // appropriate size.
1833 Type *BitcastTy = nullptr; // The FT type to bitcast to.
1834 if (auto *CInt = dyn_cast<ConstantInt>(Val: C)) {
1835 switch (CInt->getType()->getScalarSizeInBits()) {
1836 case 32:
1837 BitcastTy = Type::getFloatTy(C&: Context);
1838 break;
1839 case 64:
1840 BitcastTy = Type::getDoubleTy(C&: Context);
1841 break;
1842 case 80:
1843 BitcastTy = Type::getX86_FP80Ty(C&: Context);
1844 break;
1845 default:
1846 break;
1847 }
1848 } else if (auto *CDV = dyn_cast<ConstantDataVector>(Val: C)) {
1849 const int NumElements =
1850 cast<VectorType>(Val: CDV->getType())->getElementCount().getFixedValue();
1851 switch (CDV->getType()->getScalarSizeInBits()) {
1852 case 32:
1853 BitcastTy =
1854 VectorType::get(ElementType: Type::getFloatTy(C&: Context), NumElements, Scalable: false);
1855 break;
1856 case 64:
1857 BitcastTy =
1858 VectorType::get(ElementType: Type::getDoubleTy(C&: Context), NumElements, Scalable: false);
1859 break;
1860 case 80:
1861 BitcastTy =
1862 VectorType::get(ElementType: Type::getX86_FP80Ty(C&: Context), NumElements, Scalable: false);
1863 break;
1864 default:
1865 break;
1866 }
1867 }
1868 if (BitcastTy) {
1869 const MemoryExtents Extents = getMemoryExtentsOrDie(FT: BitcastTy);
1870 Value *ShadowPtr = Builder.CreateCall(
1871 Callee: NsanGetShadowPtrForStore[Extents.ValueType],
1872 Args: {PtrOp, ConstantInt::get(Ty: IntptrTy, V: Extents.NumElts)});
1873 // Bitcast the integer value to the appropriate FT type and extend to 2FT.
1874 Type *ExtVT = Config.getExtendedFPType(FT: BitcastTy);
1875 Value *Shadow =
1876 Builder.CreateFPExt(V: Builder.CreateBitCast(V: C, DestTy: BitcastTy), DestTy: ExtVT);
1877 Builder.CreateAlignedStore(Val: Shadow, Ptr: ShadowPtr, Align: Align(1),
1878 isVolatile: Store.isVolatile());
1879 return;
1880 }
1881 }
1882 // All other stores just reset the shadow value to unknown.
1883 Builder.CreateCall(Callee: NsanSetValueUnknown, Args: {Dst, ValueSize});
1884}
1885
1886void NumericalStabilitySanitizer::propagateShadowValues(
1887 Instruction &Inst, const TargetLibraryInfo &TLI,
1888 const ValueToShadowMap &Map) {
1889 if (auto *Store = dyn_cast<StoreInst>(Val: &Inst)) {
1890 Value *StoredValue = Store->getValueOperand();
1891 Type *VT = StoredValue->getType();
1892 Type *ExtendedVT = Config.getExtendedFPType(FT: VT);
1893 if (ExtendedVT == nullptr)
1894 return propagateNonFTStore(Store&: *Store, VT, Map);
1895 return propagateFTStore(Store&: *Store, VT, ExtendedVT, Map);
1896 }
1897
1898 if (auto *FCmp = dyn_cast<FCmpInst>(Val: &Inst)) {
1899 emitFCmpCheck(FCmp&: *FCmp, Map);
1900 return;
1901 }
1902
1903 if (auto *CB = dyn_cast<CallBase>(Val: &Inst)) {
1904 maybeAddSuffixForNsanInterface(CI: CB);
1905 if (CallInst *CI = dyn_cast<CallInst>(Val: &Inst))
1906 maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI: &TLI);
1907 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Val: &Inst)) {
1908 instrumentMemIntrinsic(MI);
1909 return;
1910 }
1911 populateShadowStack(CI&: *CB, TLI, Map);
1912 return;
1913 }
1914
1915 if (auto *RetInst = dyn_cast<ReturnInst>(Val: &Inst)) {
1916 if (!ClCheckRet)
1917 return;
1918
1919 Value *RV = RetInst->getReturnValue();
1920 if (RV == nullptr)
1921 return; // This is a `ret void`.
1922 Type *VT = RV->getType();
1923 Type *ExtendedVT = Config.getExtendedFPType(FT: VT);
1924 if (ExtendedVT == nullptr)
1925 return; // Not an FT ret.
1926 Value *RVShadow = Map.getShadow(V: RV);
1927 IRBuilder<> Builder(RetInst);
1928
1929 RVShadow = emitCheck(V: RV, ShadowV: RVShadow, Builder, Loc: CheckLoc::makeRet());
1930 ++NumInstrumentedFTRets;
1931 // Store tag.
1932 Value *FnAddr =
1933 Builder.CreatePtrToInt(V: Inst.getParent()->getParent(), DestTy: IntptrTy);
1934 Builder.CreateStore(Val: FnAddr, Ptr: NsanShadowRetTag);
1935 // Store value.
1936 Value *ShadowRetValPtr =
1937 Builder.CreateConstGEP2_64(Ty: NsanShadowRetType, Ptr: NsanShadowRetPtr, Idx0: 0, Idx1: 0);
1938 Builder.CreateStore(Val: RVShadow, Ptr: ShadowRetValPtr);
1939 return;
1940 }
1941
1942 if (InsertValueInst *Insert = dyn_cast<InsertValueInst>(Val: &Inst)) {
1943 Value *V = Insert->getOperand(i_nocapture: 1);
1944 Type *VT = V->getType();
1945 Type *ExtendedVT = Config.getExtendedFPType(FT: VT);
1946 if (ExtendedVT == nullptr)
1947 return;
1948 IRBuilder<> Builder(Insert);
1949 emitCheck(V, ShadowV: Map.getShadow(V), Builder, Loc: CheckLoc::makeInsert());
1950 return;
1951 }
1952}
1953
1954// Moves fast math flags from the function to individual instructions, and
1955// removes the attribute from the function.
1956// TODO: Make this controllable with a flag.
1957static void moveFastMathFlags(Function &F,
1958 std::vector<Instruction *> &Instructions) {
1959 FastMathFlags FMF;
1960#define MOVE_FLAG(attr, setter) \
1961 if (F.getFnAttribute(attr).getValueAsString() == "true") { \
1962 F.removeFnAttr(attr); \
1963 FMF.set##setter(); \
1964 }
1965 MOVE_FLAG("unsafe-fp-math", Fast)
1966 MOVE_FLAG("no-infs-fp-math", NoInfs)
1967 MOVE_FLAG("no-nans-fp-math", NoNaNs)
1968 MOVE_FLAG("no-signed-zeros-fp-math", NoSignedZeros)
1969#undef MOVE_FLAG
1970
1971 for (Instruction *I : Instructions)
1972 if (isa<FPMathOperator>(Val: I))
1973 I->setFastMathFlags(FMF);
1974}
1975
1976bool NumericalStabilitySanitizer::sanitizeFunction(
1977 Function &F, const TargetLibraryInfo &TLI) {
1978 if (!F.hasFnAttribute(Kind: Attribute::SanitizeNumericalStability))
1979 return false;
1980
1981 // This is required to prevent instrumenting call to __nsan_init from within
1982 // the module constructor.
1983 if (F.getName() == kNsanModuleCtorName)
1984 return false;
1985 SmallVector<Instruction *, 8> AllLoadsAndStores;
1986 SmallVector<Instruction *, 8> LocalLoadsAndStores;
1987
1988 // The instrumentation maintains:
1989 // - for each IR value `v` of floating-point (or vector floating-point) type
1990 // FT, a shadow IR value `s(v)` with twice the precision 2FT (e.g.
1991 // double for float and f128 for double).
1992 // - A shadow memory, which stores `s(v)` for any `v` that has been stored,
1993 // along with a shadow memory tag, which stores whether the value in the
1994 // corresponding shadow memory is valid. Note that this might be
1995 // incorrect if a non-instrumented function stores to memory, or if
1996 // memory is stored to through a char pointer.
1997 // - A shadow stack, which holds `s(v)` for any floating-point argument `v`
1998 // of a call to an instrumented function. This allows
1999 // instrumented functions to retrieve the shadow values for their
2000 // arguments.
2001 // Because instrumented functions can be called from non-instrumented
2002 // functions, the stack needs to include a tag so that the instrumented
2003 // function knows whether shadow values are available for their
2004 // parameters (i.e. whether is was called by an instrumented function).
2005 // When shadow arguments are not available, they have to be recreated by
2006 // extending the precision of the non-shadow arguments to the non-shadow
2007 // value. Non-instrumented functions do not modify (or even know about) the
2008 // shadow stack. The shadow stack pointer is __nsan_shadow_args. The shadow
2009 // stack tag is __nsan_shadow_args_tag. The tag is any unique identifier
2010 // for the function (we use the address of the function). Both variables
2011 // are thread local.
2012 // Example:
2013 // calls shadow stack tag shadow stack
2014 // =======================================================================
2015 // non_instrumented_1() 0 0
2016 // |
2017 // v
2018 // instrumented_2(float a) 0 0
2019 // |
2020 // v
2021 // instrumented_3(float b, double c) &instrumented_3 s(b),s(c)
2022 // |
2023 // v
2024 // instrumented_4(float d) &instrumented_4 s(d)
2025 // |
2026 // v
2027 // non_instrumented_5(float e) &non_instrumented_5 s(e)
2028 // |
2029 // v
2030 // instrumented_6(float f) &non_instrumented_5 s(e)
2031 //
2032 // On entry, instrumented_2 checks whether the tag corresponds to its
2033 // function ptr.
2034 // Note that functions reset the tag to 0 after reading shadow parameters.
2035 // This ensures that the function does not erroneously read invalid data if
2036 // called twice in the same stack, once from an instrumented function and
2037 // once from an uninstrumented one. For example, in the following example,
2038 // resetting the tag in (A) ensures that (B) does not reuse the same the
2039 // shadow arguments (which would be incorrect).
2040 // instrumented_1(float a)
2041 // |
2042 // v
2043 // instrumented_2(float b) (A)
2044 // |
2045 // v
2046 // non_instrumented_3()
2047 // |
2048 // v
2049 // instrumented_2(float b) (B)
2050 //
2051 // - A shadow return slot. Any function that returns a floating-point value
2052 // places a shadow return value in __nsan_shadow_ret_val. Again, because
2053 // we might be calling non-instrumented functions, this value is guarded
2054 // by __nsan_shadow_ret_tag marker indicating which instrumented function
2055 // placed the value in __nsan_shadow_ret_val, so that the caller can check
2056 // that this corresponds to the callee. Both variables are thread local.
2057 //
2058 // For example, in the following example, the instrumentation in
2059 // `instrumented_1` rejects the shadow return value from `instrumented_3`
2060 // because is is not tagged as expected (`&instrumented_3` instead of
2061 // `non_instrumented_2`):
2062 //
2063 // instrumented_1()
2064 // |
2065 // v
2066 // float non_instrumented_2()
2067 // |
2068 // v
2069 // float instrumented_3()
2070 //
2071 // Calls of known math functions (sin, cos, exp, ...) are duplicated to call
2072 // their overload on the shadow type.
2073
2074 // Collect all instructions before processing, as creating shadow values
2075 // creates new instructions inside the function.
2076 std::vector<Instruction *> OriginalInstructions;
2077 for (BasicBlock &BB : F)
2078 for (Instruction &Inst : BB)
2079 OriginalInstructions.emplace_back(args: &Inst);
2080
2081 moveFastMathFlags(F, Instructions&: OriginalInstructions);
2082 ValueToShadowMap ValueToShadow(Config);
2083
2084 // In the first pass, we create shadow values for all FT function arguments
2085 // and all phis. This ensures that the DFS of the next pass does not have
2086 // any loops.
2087 std::vector<PHINode *> OriginalPhis;
2088 createShadowArguments(F, TLI, Map&: ValueToShadow);
2089 for (Instruction *I : OriginalInstructions) {
2090 if (PHINode *Phi = dyn_cast<PHINode>(Val: I)) {
2091 if (PHINode *Shadow = maybeCreateShadowPhi(Phi&: *Phi, TLI)) {
2092 OriginalPhis.push_back(x: Phi);
2093 ValueToShadow.setShadow(V&: *Phi, Shadow&: *Shadow);
2094 }
2095 }
2096 }
2097
2098 // Create shadow values for all instructions creating FT values.
2099 for (Instruction *I : OriginalInstructions)
2100 maybeCreateShadowValue(Root&: *I, TLI, Map&: ValueToShadow);
2101
2102 // Propagate shadow values across stores, calls and rets.
2103 for (Instruction *I : OriginalInstructions)
2104 propagateShadowValues(Inst&: *I, TLI, Map: ValueToShadow);
2105
2106 // The last pass populates shadow phis with shadow values.
2107 for (PHINode *Phi : OriginalPhis) {
2108 PHINode *ShadowPhi = dyn_cast<PHINode>(Val: ValueToShadow.getShadow(V: Phi));
2109 for (unsigned I : seq(Size: Phi->getNumOperands())) {
2110 Value *V = Phi->getOperand(i_nocapture: I);
2111 Value *Shadow = ValueToShadow.getShadow(V);
2112 BasicBlock *IncomingBB = Phi->getIncomingBlock(i: I);
2113 // For some instructions (e.g. invoke), we create the shadow in a separate
2114 // block, different from the block where the original value is created.
2115 // In that case, the shadow phi might need to refer to this block instead
2116 // of the original block.
2117 // Note that this can only happen for instructions as constant shadows are
2118 // always created in the same block.
2119 ShadowPhi->addIncoming(V: Shadow, BB: IncomingBB);
2120 }
2121 }
2122
2123 return !ValueToShadow.empty();
2124}
2125
2126// Instrument the memory intrinsics so that they properly modify the shadow
2127// memory.
2128bool NumericalStabilitySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
2129 IRBuilder<> Builder(MI);
2130 if (auto *M = dyn_cast<MemSetInst>(Val: MI)) {
2131 Builder.CreateCall(
2132 Callee: NsanSetValueUnknown,
2133 Args: {/*Address=*/M->getArgOperand(i: 0),
2134 /*Size=*/Builder.CreateIntCast(V: M->getArgOperand(i: 2), DestTy: IntptrTy, isSigned: false)});
2135 } else if (auto *M = dyn_cast<MemTransferInst>(Val: MI)) {
2136 Builder.CreateCall(
2137 Callee: NsanCopyValues,
2138 Args: {/*Destination=*/M->getArgOperand(i: 0),
2139 /*Source=*/M->getArgOperand(i: 1),
2140 /*Size=*/Builder.CreateIntCast(V: M->getArgOperand(i: 2), DestTy: IntptrTy, isSigned: false)});
2141 }
2142 return false;
2143}
2144
2145void NumericalStabilitySanitizer::maybeAddSuffixForNsanInterface(CallBase *CI) {
2146 Function *Fn = CI->getCalledFunction();
2147 if (Fn == nullptr)
2148 return;
2149
2150 if (!Fn->getName().starts_with(Prefix: "__nsan_"))
2151 return;
2152
2153 if (Fn->getName() == "__nsan_dump_shadow_mem") {
2154 assert(CI->arg_size() == 4 &&
2155 "invalid prototype for __nsan_dump_shadow_mem");
2156 // __nsan_dump_shadow_mem requires an extra parameter with the dynamic
2157 // configuration:
2158 // (shadow_type_id_for_long_double << 16) | (shadow_type_id_for_double << 8)
2159 // | shadow_type_id_for_double
2160 const uint64_t shadow_value_type_ids =
2161 (static_cast<size_t>(Config.byValueType(VT: kLongDouble).getNsanTypeId())
2162 << 16) |
2163 (static_cast<size_t>(Config.byValueType(VT: kDouble).getNsanTypeId())
2164 << 8) |
2165 static_cast<size_t>(Config.byValueType(VT: kFloat).getNsanTypeId());
2166 CI->setArgOperand(i: 3, v: ConstantInt::get(Ty: IntptrTy, V: shadow_value_type_ids));
2167 }
2168}
2169