NumericalStabilitySanitizer.cpp source code [llvm_projects/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp]

1	//===-- NumericalStabilitySanitizer.cpp -----------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains the instrumentation pass for the numerical sanitizer.
10	// Conceptually the pass injects shadow computations using higher precision
11	// types and inserts consistency checks. For details see the paper
12	// https://arxiv.org/abs/2102.12782.
13	//
14	//===----------------------------------------------------------------------===//
15
16	#include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h"
17
18	#include "llvm/ADT/DenseMap.h"
19	#include "llvm/ADT/SmallString.h"
20	#include "llvm/ADT/SmallVector.h"
21	#include "llvm/ADT/Statistic.h"
22	#include "llvm/ADT/StringExtras.h"
23	#include "llvm/Analysis/TargetLibraryInfo.h"
24	#include "llvm/Analysis/ValueTracking.h"
25	#include "llvm/IR/DataLayout.h"
26	#include "llvm/IR/Function.h"
27	#include "llvm/IR/IRBuilder.h"
28	#include "llvm/IR/IntrinsicInst.h"
29	#include "llvm/IR/Intrinsics.h"
30	#include "llvm/IR/LLVMContext.h"
31	#include "llvm/IR/MDBuilder.h"
32	#include "llvm/IR/Metadata.h"
33	#include "llvm/IR/Module.h"
34	#include "llvm/IR/Type.h"
35	#include "llvm/InitializePasses.h"
36	#include "llvm/Support/CommandLine.h"
37	#include "llvm/Support/Debug.h"
38	#include "llvm/Support/MathExtras.h"
39	#include "llvm/Support/Regex.h"
40	#include "llvm/Support/raw_ostream.h"
41	#include "llvm/Transforms/Instrumentation.h"
42	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
43	#include "llvm/Transforms/Utils/EscapeEnumerator.h"
44	#include "llvm/Transforms/Utils/Local.h"
45	#include "llvm/Transforms/Utils/ModuleUtils.h"
46
47	#include <cstdint>
48
49	using namespace llvm;
50
51	#define DEBUG_TYPE "nsan"
52
53	STATISTIC(NumInstrumentedFTLoads,
54	"Number of instrumented floating-point loads");
55
56	STATISTIC(NumInstrumentedFTCalls,
57	"Number of instrumented floating-point calls");
58	STATISTIC(NumInstrumentedFTRets,
59	"Number of instrumented floating-point returns");
60	STATISTIC(NumInstrumentedFTStores,
61	"Number of instrumented floating-point stores");
62	STATISTIC(NumInstrumentedNonFTStores,
63	"Number of instrumented non floating-point stores");
64	STATISTIC(
65	NumInstrumentedNonFTMemcpyStores,
66	"Number of instrumented non floating-point stores with memcpy semantics");
67	STATISTIC(NumInstrumentedFCmp, "Number of instrumented fcmps");
68
69	// Using smaller shadow types types can help improve speed. For example, `dlq`
70	// is 3x slower to 5x faster in opt mode and 2-6x faster in dbg mode compared to
71	// `dqq`.
72	static cl::opt<std::string> ClShadowMapping(
73	"nsan-shadow-type-mapping", cl::init(Val: "dqq"),
74	cl::desc ("One shadow type id for each of `float`, `double`, `long double`. "
75	"`d`,`l`,`q`,`e` mean double, x86_fp80, fp128 (quad) and "
76	"ppc_fp128 (extended double) respectively. The default is to "
77	"shadow `float` as `double`, and `double` and `x86_fp80` as "
78	"`fp128`"),
79	cl::Hidden);
80
81	static cl::opt<bool>
82	ClInstrumentFCmp("nsan-instrument-fcmp", cl::init(Val: true),
83	cl::desc ("Instrument floating-point comparisons"),
84	cl::Hidden);
85
86	static cl::opt<std::string> ClCheckFunctionsFilter(
87	"check-functions-filter",
88	cl::desc ("Only emit checks for arguments of functions "
89	"whose names match the given regular expression"),
90	cl::value_desc ("regex"));
91
92	static cl::opt<bool> ClTruncateFCmpEq(
93	"nsan-truncate-fcmp-eq", cl::init(Val: true),
94	cl::desc (
95	"This flag controls the behaviour of fcmp equality comparisons."
96	"For equality comparisons such as `x == 0.0f`, we can perform the "
97	"shadow check in the shadow (`x_shadow == 0.0) == (x == 0.0f)`) or app "
98	" domain (`(trunc(x_shadow) == 0.0f) == (x == 0.0f)`). This helps "
99	"catch the case when `x_shadow` is accurate enough (and therefore "
100	"close enough to zero) so that `trunc(x_shadow)` is zero even though "
101	"both `x` and `x_shadow` are not"),
102	cl::Hidden);
103
104	// When there is external, uninstrumented code writing to memory, the shadow
105	// memory can get out of sync with the application memory. Enabling this flag
106	// emits consistency checks for loads to catch this situation.
107	// When everything is instrumented, this is not strictly necessary because any
108	// load should have a corresponding store, but can help debug cases when the
109	// framework did a bad job at tracking shadow memory modifications by failing on
110	// load rather than store.
111	// TODO: provide a way to resume computations from the FT value when the load
112	// is inconsistent. This ensures that further computations are not polluted.
113	static cl::opt<bool> ClCheckLoads("nsan-check-loads",
114	cl::desc ("Check floating-point load"),
115	cl::Hidden);
116
117	static cl::opt<bool> ClCheckStores("nsan-check-stores", cl::init(Val: true),
118	cl::desc ("Check floating-point stores"),
119	cl::Hidden);
120
121	static cl::opt<bool> ClCheckRet("nsan-check-ret", cl::init(Val: true),
122	cl::desc ("Check floating-point return values"),
123	cl::Hidden);
124
125	// LLVM may store constant floats as bitcasted ints.
126	// It's not really necessary to shadow such stores,
127	// if the shadow value is unknown the framework will re-extend it on load
128	// anyway. Moreover, because of size collisions (e.g. bf16 vs f16) it is
129	// impossible to determine the floating-point type based on the size.
130	// However, for debugging purposes it can be useful to model such stores.
131	static cl::opt<bool> ClPropagateNonFTConstStoresAsFT(
132	"nsan-propagate-non-ft-const-stores-as-ft",
133	cl::desc (
134	"Propagate non floating-point const stores as floating point values."
135	"For debugging purposes only"),
136	cl::Hidden);
137
138	constexpr StringLiteral kNsanModuleCtorName("nsan.module_ctor");
139	constexpr StringLiteral kNsanInitName("__nsan_init");
140
141	// The following values must be kept in sync with the runtime.
142	constexpr int kShadowScale = `2`;
143	constexpr int kMaxVectorWidth = `8`;
144	constexpr int kMaxNumArgs = `128`;
145	constexpr int kMaxShadowTypeSizeBytes = `16`; // fp128
146
147	namespace {
148
149	// Defines the characteristics (type id, type, and floating-point semantics)
150	// attached for all possible shadow types.
151	class ShadowTypeConfig {
152	public:
153	static std::unique_ptr<ShadowTypeConfig> fromNsanTypeId(char TypeId);
154
155	// The LLVM Type corresponding to the shadow type.
156	virtual Type getType(LLVMContext &Context) const* = `0`;
157
158	// The nsan type id of the shadow type (`d`, `l`, `q`, ...).
159	virtual char getNsanTypeId() const = `0`;
160
161	virtual ~ShadowTypeConfig() = default;
162	};
163
164	template <char NsanTypeId>
165	class ShadowTypeConfigImpl : public ShadowTypeConfig {
166	public:
167	char getNsanTypeId() const override { return NsanTypeId; }
168	static constexpr const char kNsanTypeId = NsanTypeId;
169	};
170
171	// `double` (`d`) shadow type.
172	class F64ShadowConfig : public ShadowTypeConfigImpl<`'d'`> {
173	Type getType(LLVMContext &Context) const* override {
174	return Type::getDoubleTy(C&: Context);
175	}
176	};
177
178	// `x86_fp80` (`l`) shadow type: X86 long double.
179	class F80ShadowConfig : public ShadowTypeConfigImpl<`'l'`> {
180	Type getType(LLVMContext &Context) const* override {
181	return Type::getX86_FP80Ty(C&: Context);
182	}
183	};
184
185	// `fp128` (`q`) shadow type.
186	class F128ShadowConfig : public ShadowTypeConfigImpl<`'q'`> {
187	Type getType(LLVMContext &Context) const* override {
188	return Type::getFP128Ty(C&: Context);
189	}
190	};
191
192	// `ppc_fp128` (`e`) shadow type: IBM extended double with 106 bits of mantissa.
193	class PPC128ShadowConfig : public ShadowTypeConfigImpl<`'e'`> {
194	Type getType(LLVMContext &Context) const* override {
195	return Type::getPPC_FP128Ty(C&: Context);
196	}
197	};
198
199	// Creates a ShadowTypeConfig given its type id.
200	std::unique_ptr<ShadowTypeConfig>
201	ShadowTypeConfig::fromNsanTypeId(const char TypeId) {
202	switch (TypeId) {
203	case F64ShadowConfig::kNsanTypeId:
204	return std::make_unique<F64ShadowConfig>();
205	case F80ShadowConfig::kNsanTypeId:
206	return std::make_unique<F80ShadowConfig>();
207	case F128ShadowConfig::kNsanTypeId:
208	return std::make_unique<F128ShadowConfig>();
209	case PPC128ShadowConfig::kNsanTypeId:
210	return std::make_unique<PPC128ShadowConfig>();
211	}
212	report_fatal_error(reason: "nsan: invalid shadow type id '" + Twine (TypeId) + "'");
213	}
214
215	// An enum corresponding to shadow value types. Used as indices in arrays, so
216	// not an `enum class`.
217	enum FTValueType { kFloat, kDouble, kLongDouble, kNumValueTypes };
218
219	// If `FT` corresponds to a primitive FTValueType, return it.
220	static std::optional<FTValueType> ftValueTypeFromType(Type *FT) {
221	if (FT->isFloatTy())
222	return kFloat;
223	if (FT->isDoubleTy())
224	return kDouble;
225	if (FT->isX86_FP80Ty())
226	return kLongDouble;
227	return {};
228	}
229
230	// Returns the LLVM type for an FTValueType.
231	static Type *typeFromFTValueType(FTValueType VT, LLVMContext &Context) {
232	switch (VT) {
233	case kFloat:
234	return Type::getFloatTy(C&: Context);
235	case kDouble:
236	return Type::getDoubleTy(C&: Context);
237	case kLongDouble:
238	return Type::getX86_FP80Ty(C&: Context);
239	case kNumValueTypes:
240	return nullptr;
241	}
242	llvm_unreachable("Unhandled FTValueType enum");
243	}
244
245	// Returns the type name for an FTValueType.
246	static const char *typeNameFromFTValueType(FTValueType VT) {
247	switch (VT) {
248	case kFloat:
249	return "float";
250	case kDouble:
251	return "double";
252	case kLongDouble:
253	return "longdouble";
254	case kNumValueTypes:
255	return nullptr;
256	}
257	llvm_unreachable("Unhandled FTValueType enum");
258	}
259
260	// A specific mapping configuration of application type to shadow type for nsan
261	// (see -nsan-shadow-mapping flag).
262	class MappingConfig {
263	public:
264	explicit MappingConfig(LLVMContext &C) : Context(C) {
265	if (ClShadowMapping.size() != `3`)
266	report_fatal_error(reason: "Invalid nsan mapping: " + Twine (ClShadowMapping));
267	unsigned ShadowTypeSizeBits[kNumValueTypes];
268	for (int VT = `0`; VT < kNumValueTypes; ++VT) {
269	auto Config = ShadowTypeConfig::fromNsanTypeId(TypeId: ClShadowMapping [VT]);
270	if (!Config)
271	report_fatal_error(reason: "Failed to get ShadowTypeConfig for " +
272	Twine (ClShadowMapping [VT]));
273	const unsigned AppTypeSize =
274	typeFromFTValueType(VT: static_cast<FTValueType>(VT), Context)
275	->getScalarSizeInBits();
276	const unsigned ShadowTypeSize =
277	Config ->getType(Context)->getScalarSizeInBits();
278	// Check that the shadow type size is at most kShadowScale times the
279	// application type size, so that shadow memory compoutations are valid.
280	if (ShadowTypeSize > kShadowScale * AppTypeSize)
281	report_fatal_error(reason: "Invalid nsan mapping f" + Twine (AppTypeSize) +
282	"->f" + Twine (ShadowTypeSize) +
283	": The shadow type size should be at most " +
284	Twine (kShadowScale) +
285	" times the application type size");
286	ShadowTypeSizeBits[VT] = ShadowTypeSize;
287	Configs[VT] = std::move(Config);
288	}
289
290	// Check that the mapping is monotonous. This is required because if one
291	// does an fpextend of `float->long double` in application code, nsan is
292	// going to do an fpextend of `shadow(float) -> shadow(long double)` in
293	// shadow code. This will fail in `qql` mode, since nsan would be
294	// fpextending `f128->long`, which is invalid.
295	// TODO: Relax this.
296	if (ShadowTypeSizeBits[kFloat] > ShadowTypeSizeBits[kDouble] \|\|
297	ShadowTypeSizeBits[kDouble] > ShadowTypeSizeBits[kLongDouble])
298	report_fatal_error(reason: "Invalid nsan mapping: { float->f" +
299	Twine (ShadowTypeSizeBits[kFloat]) + "; double->f" +
300	Twine (ShadowTypeSizeBits[kDouble]) +
301	"; long double->f" +
302	Twine (ShadowTypeSizeBits[kLongDouble]) + " }");
303	}
304
305	const ShadowTypeConfig &byValueType(FTValueType VT) const {
306	assert(VT < FTValueType::kNumValueTypes && "invalid value type");
307	return *Configs[VT];
308	}
309
310	// Returns the extended shadow type for a given application type.
311	Type getExtendedFPType(Type FT) const {
312	if (const auto VT = ftValueTypeFromType(FT))
313	return Configs[*VT]->getType(Context);
314	if (FT->isVectorTy()) {
315	auto *VecTy = cast<VectorType>(Val: FT);
316	// TODO: add support for scalable vector types.
317	if (VecTy->isScalableTy())
318	return nullptr;
319	Type *ExtendedScalar = getExtendedFPType(FT: VecTy->getElementType());
320	return ExtendedScalar
321	? VectorType::get(ElementType: ExtendedScalar, EC: VecTy->getElementCount())
322	: nullptr;
323	}
324	return nullptr;
325	}
326
327	private:
328	LLVMContext &Context;
329	std::unique_ptr<ShadowTypeConfig> Configs[FTValueType::kNumValueTypes];
330	};
331
332	// The memory extents of a type specifies how many elements of a given
333	// FTValueType needs to be stored when storing this type.
334	struct MemoryExtents {
335	FTValueType ValueType;
336	uint64_t NumElts;
337	};
338
339	static MemoryExtents getMemoryExtentsOrDie(Type *FT) {
340	if (const auto VT = ftValueTypeFromType(FT))
341	return {.ValueType: *VT, .NumElts: `1`};
342	if (auto *VecTy = dyn_cast<VectorType>(Val: FT)) {
343	const auto ScalarExtents = getMemoryExtentsOrDie(FT: VecTy->getElementType());
344	return {.ValueType: ScalarExtents.ValueType,
345	.NumElts: ScalarExtents.NumElts * VecTy->getElementCount().getFixedValue()};
346	}
347	llvm_unreachable("invalid value type");
348	}
349
350	// The location of a check. Passed as parameters to runtime checking functions.
351	class CheckLoc {
352	public:
353	// Creates a location that references an application memory location.
354	static CheckLoc makeStore(Value *Address) {
355	CheckLoc Result(kStore);
356	Result.Address = Address;
357	return Result;
358	}
359	static CheckLoc makeLoad(Value *Address) {
360	CheckLoc Result(kLoad);
361	Result.Address = Address;
362	return Result;
363	}
364
365	// Creates a location that references an argument, given by id.
366	static CheckLoc makeArg(int ArgId) {
367	CheckLoc Result(kArg);
368	Result.ArgId = ArgId;
369	return Result;
370	}
371
372	// Creates a location that references the return value of a function.
373	static CheckLoc makeRet() { return CheckLoc (kRet); }
374
375	// Creates a location that references a vector insert.
376	static CheckLoc makeInsert() { return CheckLoc (kInsert); }
377
378	// Returns the CheckType of location this refers to, as an integer-typed LLVM
379	// IR value.
380	Value getType(LLVMContext &C) const* {
381	return ConstantInt::get(Ty: Type::getInt32Ty(C), V: static_cast<int>(CheckTy));
382	}
383
384	// Returns a CheckType-specific value representing details of the location
385	// (e.g. application address for loads or stores), as an `IntptrTy`-typed LLVM
386	// IR value.
387	Value getValue(Type IntptrTy, IRBuilder<> &Builder) const {
388	switch (CheckTy) {
389	case kUnknown:
390	llvm_unreachable("unknown type");
391	case kRet:
392	case kInsert:
393	return ConstantInt::get(Ty: IntptrTy, V: `0`);
394	case kArg:
395	return ConstantInt::get(Ty: IntptrTy, V: ArgId);
396	case kLoad:
397	case kStore:
398	return Builder.CreatePtrToInt(V: Address, DestTy: IntptrTy);
399	}
400	llvm_unreachable("Unhandled CheckType enum");
401	}
402
403	private:
404	// Must be kept in sync with the runtime,
405	// see compiler-rt/lib/nsan/nsan_stats.h
406	enum CheckType {
407	kUnknown = `0`,
408	kRet,
409	kArg,
410	kLoad,
411	kStore,
412	kInsert,
413	};
414	explicit CheckLoc(CheckType CheckTy) : CheckTy(CheckTy) {}
415
416	Value Address = nullptr*;
417	const CheckType CheckTy;
418	int ArgId = -`1`;
419	};
420
421	// A map of LLVM IR values to shadow LLVM IR values.
422	class ValueToShadowMap {
423	public:
424	explicit ValueToShadowMap(const MappingConfig &Config) : Config(Config) {}
425
426	ValueToShadowMap(const ValueToShadowMap &) = delete;
427	ValueToShadowMap &operator=(const ValueToShadowMap &) = delete;
428
429	// Sets the shadow value for a value. Asserts that the value does not already
430	// have a value.
431	void setShadow(Value &V, Value &Shadow) {
432	[[maybe_unused]] const bool Inserted = Map.try_emplace(Key: &V, Args: &Shadow).second;
433	LLVM_DEBUG({
434	if (!Inserted) {
435	if (auto *I = dyn_cast<Instruction>(&V))
436	errs() << I->getFunction()->getName() << ": ";
437	errs() << "duplicate shadow (" << &V << "): ";
438	V.dump();
439	}
440	});
441	assert(Inserted && "duplicate shadow");
442	}
443
444	// Returns true if the value already has a shadow (including if the value is a
445	// constant). If true, calling getShadow() is valid.
446	bool hasShadow(Value V) const* {
447	return isa<Constant>(Val: V) \|\| (Map.find(Val: V) != Map.end());
448	}
449
450	// Returns the shadow value for a given value. Asserts that the value has
451	// a shadow value. Lazily creates shadows for constant values.
452	Value getShadow(Value V) const {
453	if (Constant *C = dyn_cast<Constant>(Val: V))
454	return getShadowConstant(C);
455	return Map.find(Val: V)->second;
456	}
457
458	bool empty() const { return Map.empty(); }
459
460	private:
461	// Extends a constant application value to its shadow counterpart.
462	APFloat extendConstantFP(APFloat CV, const fltSemantics &To) const {
463	bool LosesInfo = false;
464	CV.convert(ToSemantics: To, RM: APFloatBase::rmTowardZero, losesInfo: &LosesInfo);
465	return CV;
466	}
467
468	// Returns the shadow constant for the given application constant.
469	Constant getShadowConstant(Constant C) const {
470	if (UndefValue *U = dyn_cast<UndefValue>(Val: C)) {
471	return UndefValue::get(T: Config.getExtendedFPType(FT: U->getType()));
472	}
473	if (ConstantFP *CFP = dyn_cast<ConstantFP>(Val: C)) {
474	// Floating-point constants.
475	Type *Ty = Config.getExtendedFPType(FT: CFP->getType());
476	return ConstantFP::get(
477	Ty, V: extendConstantFP(CV: CFP->getValueAPF(), To: Ty->getFltSemantics()));
478	}
479	// Vector, array, or aggregate constants.
480	if (C->getType()->isVectorTy()) {
481	SmallVector<Constant *, `8`> Elements;
482	for (int I = `0`, E = cast<VectorType>(Val: C->getType())
483	->getElementCount()
484	.getFixedValue();
485	I < E; ++I)
486	Elements.push_back(Elt: getShadowConstant(C: C->getAggregateElement(Elt: I)));
487	return ConstantVector::get(V: Elements);
488	}
489	llvm_unreachable("unimplemented");
490	}
491
492	const MappingConfig &Config;
493	DenseMap<Value , Value > Map;
494	};
495
496	/// Instantiating NumericalStabilitySanitizer inserts the nsan runtime library
497	/// API function declarations into the module if they don't exist already.
498	/// Instantiating ensures the __nsan_init function is in the list of global
499	/// constructors for the module.
500	class NumericalStabilitySanitizer {
501	public:
502	NumericalStabilitySanitizer(Module &M);
503	bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI);
504
505	private:
506	bool instrumentMemIntrinsic(MemIntrinsic *MI);
507	void maybeAddSuffixForNsanInterface(CallBase *CI);
508	bool addrPointsToConstantData(Value *Addr);
509	void maybeCreateShadowValue(Instruction &Root, const TargetLibraryInfo &TLI,
510	ValueToShadowMap &Map);
511	Value *createShadowValueWithOperandsAvailable(Instruction &Inst,
512	const TargetLibraryInfo &TLI,
513	const ValueToShadowMap &Map);
514	PHINode maybeCreateShadowPhi(PHINode &Phi, const* TargetLibraryInfo &TLI);
515	void createShadowArguments(Function &F, const TargetLibraryInfo &TLI,
516	ValueToShadowMap &Map);
517
518	void populateShadowStack(CallBase &CI, const TargetLibraryInfo &TLI,
519	const ValueToShadowMap &Map);
520
521	void propagateShadowValues(Instruction &Inst, const TargetLibraryInfo &TLI,
522	const ValueToShadowMap &Map);
523	Value emitCheck(Value V, Value *ShadowV, IRBuilder<> &Builder,
524	CheckLoc Loc);
525	Value emitCheckInternal(Value V, Value *ShadowV, IRBuilder<> &Builder,
526	CheckLoc Loc);
527	void emitFCmpCheck(FCmpInst &FCmp, const ValueToShadowMap &Map);
528
529	// Value creation handlers.
530	Value handleLoad(LoadInst &Load, Type VT, Type *ExtendedVT);
531	Value handleCallBase(CallBase &Call, Type VT, Type *ExtendedVT,
532	const TargetLibraryInfo &TLI,
533	const ValueToShadowMap &Map, IRBuilder<> &Builder);
534	Value maybeHandleKnownCallBase(CallBase &Call, Type VT, Type *ExtendedVT,
535	const TargetLibraryInfo &TLI,
536	const ValueToShadowMap &Map,
537	IRBuilder<> &Builder);
538	Value handleTrunc(const* FPTruncInst &Trunc, Type VT, Type ExtendedVT,
539	const ValueToShadowMap &Map, IRBuilder<> &Builder);
540	Value handleExt(const* FPExtInst &Ext, Type VT, Type ExtendedVT,
541	const ValueToShadowMap &Map, IRBuilder<> &Builder);
542
543	// Value propagation handlers.
544	void propagateFTStore(StoreInst &Store, Type VT, Type ExtendedVT,
545	const ValueToShadowMap &Map);
546	void propagateNonFTStore(StoreInst &Store, Type *VT,
547	const ValueToShadowMap &Map);
548
549	const DataLayout &DL;
550	LLVMContext &Context;
551	MappingConfig Config;
552	IntegerType IntptrTy = nullptr*;
553	FunctionCallee NsanGetShadowPtrForStore[FTValueType::kNumValueTypes] = {};
554	FunctionCallee NsanGetShadowPtrForLoad[FTValueType::kNumValueTypes] = {};
555	FunctionCallee NsanCheckValue[FTValueType::kNumValueTypes] = {};
556	FunctionCallee NsanFCmpFail[FTValueType::kNumValueTypes] = {};
557	FunctionCallee NsanCopyValues;
558	FunctionCallee NsanSetValueUnknown;
559	FunctionCallee NsanGetRawShadowTypePtr;
560	FunctionCallee NsanGetRawShadowPtr;
561	GlobalValue NsanShadowRetTag = nullptr*;
562
563	Type NsanShadowRetType = nullptr*;
564	GlobalValue NsanShadowRetPtr = nullptr*;
565
566	GlobalValue NsanShadowArgsTag = nullptr*;
567
568	Type NsanShadowArgsType = nullptr*;
569	GlobalValue NsanShadowArgsPtr = nullptr*;
570
571	std::optional<Regex> CheckFunctionsFilter;
572	};
573	} // end anonymous namespace
574
575	PreservedAnalyses
576	NumericalStabilitySanitizerPass::run(Module &M, ModuleAnalysisManager &MAM) {
577	getOrCreateSanitizerCtorAndInitFunctions(
578	M, CtorName: kNsanModuleCtorName, InitName: kNsanInitName, /InitArgTypes=/{},
579	/InitArgs=/{},
580	// This callback is invoked when the functions are created the first
581	// time. Hook them into the global ctors list in that case:
582	FunctionsCreatedCallback: [&](Function *Ctor, FunctionCallee) { appendToGlobalCtors(M, F: Ctor, Priority: `0`); });
583
584	NumericalStabilitySanitizer Nsan(M);
585	auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
586	for (Function &F : M)
587	Nsan.sanitizeFunction(F, TLI: FAM.getResult<TargetLibraryAnalysis>(IR&: F));
588
589	return PreservedAnalyses::none();
590	}
591
592	static GlobalValue createThreadLocalGV(const* char Name, Module &M, Type Ty) {
593	return dyn_cast<GlobalValue>(Val: M.getOrInsertGlobal(Name, Ty, CreateGlobalCallback: [&M, Ty, Name] {
594	return new GlobalVariable (M, Ty, false, GlobalVariable::ExternalLinkage,
595	nullptr, Name, nullptr,
596	GlobalVariable::InitialExecTLSModel);
597	}));
598	}
599
600	NumericalStabilitySanitizer::NumericalStabilitySanitizer(Module &M)
601	: DL(M.getDataLayout()), Context(M.getContext()), Config (Context) {
602	IntptrTy = DL.getIntPtrType(C&: Context);
603	Type *PtrTy = PointerType::getUnqual(C&: Context);
604	Type *Int32Ty = Type::getInt32Ty(C&: Context);
605	Type *Int1Ty = Type::getInt1Ty(C&: Context);
606	Type *VoidTy = Type::getVoidTy(C&: Context);
607
608	AttributeList Attr;
609	Attr = Attr.addFnAttribute(C&: Context, Kind: Attribute::NoUnwind);
610	// Initialize the runtime values (functions and global variables).
611	for (int I = `0`; I < kNumValueTypes; ++I) {
612	const FTValueType VT = static_cast<FTValueType>(I);
613	const char *VTName = typeNameFromFTValueType(VT);
614	Type *VTTy = typeFromFTValueType(VT, Context);
615
616	// Load/store.
617	const std::string GetterPrefix =
618	std::string ("__nsan_get_shadow_ptr_for_") + VTName;
619	NsanGetShadowPtrForStore[VT] = M.getOrInsertFunction(
620	Name: GetterPrefix + "_store", AttributeList: Attr, RetTy: PtrTy, Args: PtrTy, Args: IntptrTy);
621	NsanGetShadowPtrForLoad[VT] = M.getOrInsertFunction(
622	Name: GetterPrefix + "_load", AttributeList: Attr, RetTy: PtrTy, Args: PtrTy, Args: IntptrTy);
623
624	// Check.
625	const auto &ShadowConfig = Config.byValueType(VT);
626	Type *ShadowTy = ShadowConfig.getType(Context);
627	NsanCheckValue[VT] =
628	M.getOrInsertFunction(Name: std::string ("__nsan_internal_check_") + VTName +
629	"_" + ShadowConfig.getNsanTypeId(),
630	AttributeList: Attr, RetTy: Int32Ty, Args: VTTy, Args: ShadowTy, Args: Int32Ty, Args: IntptrTy);
631	NsanFCmpFail[VT] = M.getOrInsertFunction(
632	Name: std::string ("__nsan_fcmp_fail_") + VTName + "_" +
633	ShadowConfig.getNsanTypeId(),
634	AttributeList: Attr, RetTy: VoidTy, Args: VTTy, Args: VTTy, Args: ShadowTy, Args: ShadowTy, Args: Int32Ty, Args: Int1Ty, Args: Int1Ty);
635	}
636
637	NsanCopyValues = M.getOrInsertFunction(Name: "__nsan_copy_values", AttributeList: Attr, RetTy: VoidTy,
638	Args: PtrTy, Args: PtrTy, Args: IntptrTy);
639	NsanSetValueUnknown = M.getOrInsertFunction(Name: "__nsan_set_value_unknown", AttributeList: Attr,
640	RetTy: VoidTy, Args: PtrTy, Args: IntptrTy);
641
642	// TODO: Add attributes nofree, nosync, readnone, readonly,
643	NsanGetRawShadowTypePtr = M.getOrInsertFunction(
644	Name: "__nsan_internal_get_raw_shadow_type_ptr", AttributeList: Attr, RetTy: PtrTy, Args: PtrTy);
645	NsanGetRawShadowPtr = M.getOrInsertFunction(
646	Name: "__nsan_internal_get_raw_shadow_ptr", AttributeList: Attr, RetTy: PtrTy, Args: PtrTy);
647
648	NsanShadowRetTag = createThreadLocalGV(Name: "__nsan_shadow_ret_tag", M, Ty: IntptrTy);
649
650	NsanShadowRetType = ArrayType::get(ElementType: Type::getInt8Ty(C&: Context),
651	NumElements: kMaxVectorWidth * kMaxShadowTypeSizeBytes);
652	NsanShadowRetPtr =
653	createThreadLocalGV(Name: "__nsan_shadow_ret_ptr", M, Ty: NsanShadowRetType);
654
655	NsanShadowArgsTag =
656	createThreadLocalGV(Name: "__nsan_shadow_args_tag", M, Ty: IntptrTy);
657
658	NsanShadowArgsType =
659	ArrayType::get(ElementType: Type::getInt8Ty(C&: Context),
660	NumElements: kMaxVectorWidth * kMaxNumArgs * kMaxShadowTypeSizeBytes);
661
662	NsanShadowArgsPtr =
663	createThreadLocalGV(Name: "__nsan_shadow_args_ptr", M, Ty: NsanShadowArgsType);
664
665	if (!ClCheckFunctionsFilter.empty()) {
666	Regex R = Regex (ClCheckFunctionsFilter);
667	std::string RegexError;
668	assert(R.isValid(RegexError));
669	CheckFunctionsFilter = std::move(R);
670	}
671	}
672
673	// Returns true if the given LLVM Value points to constant data (typically, a
674	// global variable reference).
675	bool NumericalStabilitySanitizer::addrPointsToConstantData(Value *Addr) {
676	// If this is a GEP, just analyze its pointer operand.
677	if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Val: Addr))
678	Addr = GEP->getPointerOperand();
679
680	if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Val: Addr))
681	return GV->isConstant();
682	return false;
683	}
684
685	// This instruments the function entry to create shadow arguments.
686	// Pseudocode:
687	// if (this_fn_ptr == __nsan_shadow_args_tag) {
688	// s(arg0) = LOAD<sizeof(arg0)>(__nsan_shadow_args);
689	// s(arg1) = LOAD<sizeof(arg1)>(__nsan_shadow_args + sizeof(arg0));
690	// ...
691	// __nsan_shadow_args_tag = 0;
692	// } else {
693	// s(arg0) = fext(arg0);
694	// s(arg1) = fext(arg1);
695	// ...
696	// }
697	void NumericalStabilitySanitizer::createShadowArguments(
698	Function &F, const TargetLibraryInfo &TLI, ValueToShadowMap &Map) {
699	assert(!F.getIntrinsicID() && "found a definition of an intrinsic");
700
701	// Do not bother if there are no FP args.
702	if (all_of(Range: F.args(), P: [this](const Argument &Arg) {
703	return Config.getExtendedFPType(FT: Arg.getType()) == nullptr;
704	}))
705	return;
706
707	IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHI());
708	// The function has shadow args if the shadow args tag matches the function
709	// address.
710	Value *HasShadowArgs = Builder.CreateICmpEQ(
711	LHS: Builder.CreateLoad(Ty: IntptrTy, Ptr: NsanShadowArgsTag, /isVolatile=/false),
712	RHS: Builder.CreatePtrToInt(V: &F, DestTy: IntptrTy));
713
714	unsigned ShadowArgsOffsetBytes = `0`;
715	for (Argument &Arg : F.args()) {
716	Type *VT = Arg.getType();
717	Type *ExtendedVT = Config.getExtendedFPType(FT: VT);
718	if (ExtendedVT == nullptr)
719	continue; // Not an FT value.
720	Value *L = Builder.CreateAlignedLoad(
721	Ty: ExtendedVT,
722	Ptr: Builder.CreateConstGEP2_64(Ty: NsanShadowArgsType, Ptr: NsanShadowArgsPtr, Idx0: `0`,
723	Idx1: ShadowArgsOffsetBytes),
724	Align: Align (`1`), /isVolatile=/false);
725	Value *Shadow = Builder.CreateSelect(C: HasShadowArgs, True: L,
726	False: Builder.CreateFPExt(V: &Arg, DestTy: ExtendedVT));
727	Map.setShadow(V&: Arg, Shadow&: *Shadow);
728	TypeSize SlotSize = DL.getTypeStoreSize(Ty: ExtendedVT);
729	assert(!SlotSize.isScalable() && "unsupported");
730	ShadowArgsOffsetBytes += SlotSize;
731	}
732	Builder.CreateStore(Val: ConstantInt::get(Ty: IntptrTy, V: `0`), Ptr: NsanShadowArgsTag);
733	}
734
735	// Returns true if the instrumentation should emit code to check arguments
736	// before a function call.
737	static bool shouldCheckArgs(CallBase &CI, const TargetLibraryInfo &TLI,
738	const std::optional<Regex> &CheckFunctionsFilter) {
739
740	Function *Fn = CI.getCalledFunction();
741
742	if (CheckFunctionsFilter) {
743	// Skip checking args of indirect calls.
744	if (Fn == nullptr)
745	return false;
746	if (CheckFunctionsFilter ->match(String: Fn->getName()))
747	return true;
748	return false;
749	}
750
751	if (Fn == nullptr)
752	return true; // Always check args of indirect calls.
753
754	// Never check nsan functions, the user called them for a reason.
755	if (Fn->getName().starts_with(Prefix: "__nsan_"))
756	return false;
757
758	const auto ID = Fn->getIntrinsicID();
759	LibFunc LFunc = LibFunc::NumLibFuncs;
760	// Always check args of unknown functions.
761	if (ID == Intrinsic::ID() && !TLI.getLibFunc(FDecl: *Fn, F&: LFunc))
762	return true;
763
764	// Do not check args of an `fabs` call that is used for a comparison.
765	// This is typically used for `fabs(a-b) < tolerance`, where what matters is
766	// the result of the comparison, which is already caught be the fcmp checks.
767	if (ID == Intrinsic::fabs \|\| LFunc == LibFunc_fabsf \|\|
768	LFunc == LibFunc_fabs \|\| LFunc == LibFunc_fabsl)
769	for (const auto &U : CI.users())
770	if (isa<CmpInst>(Val: U))
771	return false;
772
773	return true; // Default is check.
774	}
775
776	// Populates the shadow call stack (which contains shadow values for every
777	// floating-point parameter to the function).
778	void NumericalStabilitySanitizer::populateShadowStack(
779	CallBase &CI, const TargetLibraryInfo &TLI, const ValueToShadowMap &Map) {
780	// Do not create a shadow stack for inline asm.
781	if (CI.isInlineAsm())
782	return;
783
784	// Do not bother if there are no FP args.
785	if (all_of(Range: CI.operands(), P: [this](const Value *Arg) {
786	return Config.getExtendedFPType(FT: Arg->getType()) == nullptr;
787	}))
788	return;
789
790	IRBuilder<> Builder(&CI);
791	SmallVector<Value *, `8`> ArgShadows;
792	const bool ShouldCheckArgs = shouldCheckArgs(CI, TLI, CheckFunctionsFilter);
793	for (auto [ArgIdx, Arg] : enumerate(First: CI.operands())) {
794	if (Config.getExtendedFPType(FT: Arg ->getType()) == nullptr)
795	continue; // Not an FT value.
796	Value *ArgShadow = Map.getShadow(V: Arg);
797	ArgShadows.push_back(Elt: ShouldCheckArgs ? emitCheck(V: Arg, ShadowV: ArgShadow, Builder,
798	Loc: CheckLoc::makeArg(ArgId: ArgIdx))
799	: ArgShadow);
800	}
801
802	// Do not create shadow stacks for intrinsics/known lib funcs.
803	if (Function *Fn = CI.getCalledFunction()) {
804	LibFunc LFunc;
805	if (Fn->isIntrinsic() \|\| TLI.getLibFunc(FDecl: *Fn, F&: LFunc))
806	return;
807	}
808
809	// Set the shadow stack tag.
810	Builder.CreateStore(Val: CI.getCalledOperand(), Ptr: NsanShadowArgsTag);
811	TypeSize ShadowArgsOffsetBytes = TypeSize::getFixed(ExactSize: `0`);
812
813	unsigned ShadowArgId = `0`;
814	for (const Value *Arg : CI.operands()) {
815	Type *VT = Arg->getType();
816	Type *ExtendedVT = Config.getExtendedFPType(FT: VT);
817	if (ExtendedVT == nullptr)
818	continue; // Not an FT value.
819	Builder.CreateAlignedStore(
820	Val: ArgShadows [ShadowArgId++],
821	Ptr: Builder.CreateConstGEP2_64(Ty: NsanShadowArgsType, Ptr: NsanShadowArgsPtr, Idx0: `0`,
822	Idx1: ShadowArgsOffsetBytes),
823	Align: Align (`1`), /isVolatile=/false);
824	TypeSize SlotSize = DL.getTypeStoreSize(Ty: ExtendedVT);
825	assert(!SlotSize.isScalable() && "unsupported");
826	ShadowArgsOffsetBytes += SlotSize;
827	}
828	}
829
830	// Internal part of emitCheck(). Returns a value that indicates whether
831	// computation should continue with the shadow or resume by re-fextending the
832	// value.
833	enum class ContinuationType { // Keep in sync with runtime.
834	ContinueWithShadow = `0`,
835	ResumeFromValue = `1`,
836	};
837
838	Value NumericalStabilitySanitizer::emitCheckInternal(Value V, Value *ShadowV,
839	IRBuilder<> &Builder,
840	CheckLoc Loc) {
841	// Do not emit checks for constant values, this is redundant.
842	if (isa<Constant>(Val: V))
843	return ConstantInt::get(
844	Ty: Builder.getInt32Ty(),
845	V: static_cast<int>(ContinuationType::ContinueWithShadow));
846
847	Type *Ty = V->getType();
848	if (const auto VT = ftValueTypeFromType(FT: Ty))
849	return Builder.CreateCall(
850	Callee: NsanCheckValue[*VT],
851	Args: {V, ShadowV, Loc.getType(C&: Context), Loc.getValue(IntptrTy, Builder)});
852
853	if (Ty->isVectorTy()) {
854	auto *VecTy = cast<VectorType>(Val: Ty);
855	// We currently skip scalable vector types in MappingConfig,
856	// thus we should not encounter any such types here.
857	assert(!VecTy->isScalableTy() &&
858	"Scalable vector types are not supported yet");
859	Value CheckResult = nullptr*;
860	for (int I = `0`, E = VecTy->getElementCount().getFixedValue(); I < E; ++I) {
861	// We resume if any element resumes. Another option would be to create a
862	// vector shuffle with the array of ContinueWithShadow, but that is too
863	// complex.
864	Value *ExtractV = Builder.CreateExtractElement(Vec: V, Idx: I);
865	Value *ExtractShadowV = Builder.CreateExtractElement(Vec: ShadowV, Idx: I);
866	Value *ComponentCheckResult =
867	emitCheckInternal(V: ExtractV, ShadowV: ExtractShadowV, Builder, Loc);
868	CheckResult = CheckResult
869	? Builder.CreateOr(LHS: CheckResult, RHS: ComponentCheckResult)
870	: ComponentCheckResult;
871	}
872	return CheckResult;
873	}
874	if (Ty->isArrayTy()) {
875	Value CheckResult = nullptr*;
876	for (auto I : seq(Size: Ty->getArrayNumElements())) {
877	Value *ExtractV = Builder.CreateExtractElement(Vec: V, Idx: I);
878	Value *ExtractShadowV = Builder.CreateExtractElement(Vec: ShadowV, Idx: I);
879	Value *ComponentCheckResult =
880	emitCheckInternal(V: ExtractV, ShadowV: ExtractShadowV, Builder, Loc);
881	CheckResult = CheckResult
882	? Builder.CreateOr(LHS: CheckResult, RHS: ComponentCheckResult)
883	: ComponentCheckResult;
884	}
885	return CheckResult;
886	}
887	if (Ty->isStructTy()) {
888	Value CheckResult = nullptr*;
889	for (auto I : seq(Size: Ty->getStructNumElements())) {
890	if (Config.getExtendedFPType(FT: Ty->getStructElementType(N: I)) == nullptr)
891	continue; // Only check FT values.
892	Value *ExtractV = Builder.CreateExtractValue(Agg: V, Idxs: I);
893	Value *ExtractShadowV = Builder.CreateExtractElement(Vec: ShadowV, Idx: I);
894	Value *ComponentCheckResult =
895	emitCheckInternal(V: ExtractV, ShadowV: ExtractShadowV, Builder, Loc);
896	CheckResult = CheckResult
897	? Builder.CreateOr(LHS: CheckResult, RHS: ComponentCheckResult)
898	: ComponentCheckResult;
899	}
900	if (!CheckResult)
901	return ConstantInt::get(
902	Ty: Builder.getInt32Ty(),
903	V: static_cast<int>(ContinuationType::ContinueWithShadow));
904	return CheckResult;
905	}
906
907	llvm_unreachable("not implemented");
908	}
909
910	// Inserts a runtime check of V against its shadow value ShadowV.
911	// We check values whenever they escape: on return, call, stores, and
912	// insertvalue.
913	// Returns the shadow value that should be used to continue the computations,
914	// depending on the answer from the runtime.
915	// TODO: Should we check on select ? phi ?
916	Value NumericalStabilitySanitizer::emitCheck(Value V, Value *ShadowV,
917	IRBuilder<> &Builder,
918	CheckLoc Loc) {
919	// Do not emit checks for constant values, this is redundant.
920	if (isa<Constant>(Val: V))
921	return ShadowV;
922
923	if (Instruction *Inst = dyn_cast<Instruction>(Val: V)) {
924	Function *F = Inst->getFunction();
925	if (CheckFunctionsFilter && !CheckFunctionsFilter ->match(String: F->getName())) {
926	return ShadowV;
927	}
928	}
929
930	Value *CheckResult = emitCheckInternal(V, ShadowV, Builder, Loc);
931	Value *ICmpEQ = Builder.CreateICmpEQ(
932	LHS: CheckResult,
933	RHS: ConstantInt::get(Ty: Builder.getInt32Ty(),
934	V: static_cast<int>(ContinuationType::ResumeFromValue)));
935	return Builder.CreateSelect(
936	C: ICmpEQ, True: Builder.CreateFPExt(V, DestTy: Config.getExtendedFPType(FT: V->getType())),
937	False: ShadowV);
938	}
939
940	// Inserts a check that fcmp on shadow values are consistent with that on base
941	// values.
942	void NumericalStabilitySanitizer::emitFCmpCheck(FCmpInst &FCmp,
943	const ValueToShadowMap &Map) {
944	if (!ClInstrumentFCmp)
945	return;
946
947	Function *F = FCmp.getFunction();
948	if (CheckFunctionsFilter && !CheckFunctionsFilter ->match(String: F->getName()))
949	return;
950
951	Value *LHS = FCmp.getOperand(i_nocapture: `0`);
952	if (Config.getExtendedFPType(FT: LHS->getType()) == nullptr)
953	return;
954	Value *RHS = FCmp.getOperand(i_nocapture: `1`);
955
956	// Split the basic block. On mismatch, we'll jump to the new basic block with
957	// a call to the runtime for error reporting.
958	BasicBlock *FCmpBB = FCmp.getParent();
959	BasicBlock *NextBB = FCmpBB->splitBasicBlock(I: FCmp.getNextNode());
960	// Remove the newly created terminator unconditional branch.
961	FCmpBB->back().eraseFromParent();
962	BasicBlock *FailBB =
963	BasicBlock::Create(Context, Name: "", Parent: FCmpBB->getParent(), InsertBefore: NextBB);
964
965	// Create the shadow fcmp and comparison between the fcmps.
966	IRBuilder<> FCmpBuilder(FCmpBB);
967	FCmpBuilder.SetCurrentDebugLocation(FCmp.getDebugLoc());
968	Value *ShadowLHS = Map.getShadow(V: LHS);
969	Value *ShadowRHS = Map.getShadow(V: RHS);
970	// See comment on ClTruncateFCmpEq.
971	if (FCmp.isEquality() && ClTruncateFCmpEq) {
972	Type *Ty = ShadowLHS->getType();
973	ShadowLHS = FCmpBuilder.CreateFPExt(
974	V: FCmpBuilder.CreateFPTrunc(V: ShadowLHS, DestTy: LHS->getType()), DestTy: Ty);
975	ShadowRHS = FCmpBuilder.CreateFPExt(
976	V: FCmpBuilder.CreateFPTrunc(V: ShadowRHS, DestTy: RHS->getType()), DestTy: Ty);
977	}
978	Value *ShadowFCmp =
979	FCmpBuilder.CreateFCmp(P: FCmp.getPredicate(), LHS: ShadowLHS, RHS: ShadowRHS);
980	Value *OriginalAndShadowFcmpMatch =
981	FCmpBuilder.CreateICmpEQ(LHS: &FCmp, RHS: ShadowFCmp);
982
983	if (OriginalAndShadowFcmpMatch->getType()->isVectorTy()) {
984	// If we have a vector type, `OriginalAndShadowFcmpMatch` is a vector of i1,
985	// where an element is true if the corresponding elements in original and
986	// shadow are the same. We want all elements to be 1.
987	OriginalAndShadowFcmpMatch =
988	FCmpBuilder.CreateAndReduce(Src: OriginalAndShadowFcmpMatch);
989	}
990
991	// Use MDBuilder(C).createLikelyBranchWeights() because "match" is the common*
992	// case.
993	FCmpBuilder.CreateCondBr(Cond: OriginalAndShadowFcmpMatch, True: NextBB, False: FailBB,
994	BranchWeights: MDBuilder (Context).createLikelyBranchWeights());
995
996	// Fill in FailBB.
997	IRBuilder<> FailBuilder(FailBB);
998	FailBuilder.SetCurrentDebugLocation(FCmp.getDebugLoc());
999
1000	const auto EmitFailCall = [this, &FCmp, &FCmpBuilder,
1001	&FailBuilder](Value L, Value R, Value *ShadowL,
1002	Value ShadowR, Value Result,
1003	Value *ShadowResult) {
1004	Type *FT = L->getType();
1005	FunctionCallee Callee = nullptr*;
1006	if (FT->isFloatTy()) {
1007	Callee = &(NsanFCmpFail[kFloat]);
1008	} else if (FT->isDoubleTy()) {
1009	Callee = &(NsanFCmpFail[kDouble]);
1010	} else if (FT->isX86_FP80Ty()) {
1011	// TODO: make NsanFCmpFailLongDouble work.
1012	Callee = &(NsanFCmpFail[kDouble]);
1013	L = FailBuilder.CreateFPTrunc(V: L, DestTy: Type::getDoubleTy(C&: Context));
1014	R = FailBuilder.CreateFPTrunc(V: L, DestTy: Type::getDoubleTy(C&: Context));
1015	} else {
1016	llvm_unreachable("not implemented");
1017	}
1018	FailBuilder.CreateCall(Callee: *Callee, Args: {L, R, ShadowL, ShadowR,
1019	ConstantInt::get(Ty: FCmpBuilder.getInt32Ty(),
1020	V: FCmp.getPredicate()),
1021	Result, ShadowResult});
1022	};
1023	if (LHS->getType()->isVectorTy()) {
1024	for (int I = `0`, E = cast<VectorType>(Val: LHS->getType())
1025	->getElementCount()
1026	.getFixedValue();
1027	I < E; ++I) {
1028	Value *ExtractLHS = FailBuilder.CreateExtractElement(Vec: LHS, Idx: I);
1029	Value *ExtractRHS = FailBuilder.CreateExtractElement(Vec: RHS, Idx: I);
1030	Value *ExtractShaodwLHS = FailBuilder.CreateExtractElement(Vec: ShadowLHS, Idx: I);
1031	Value *ExtractShaodwRHS = FailBuilder.CreateExtractElement(Vec: ShadowRHS, Idx: I);
1032	Value *ExtractFCmp = FailBuilder.CreateExtractElement(Vec: &FCmp, Idx: I);
1033	Value *ExtractShadowFCmp =
1034	FailBuilder.CreateExtractElement(Vec: ShadowFCmp, Idx: I);
1035	EmitFailCall (ExtractLHS, ExtractRHS, ExtractShaodwLHS, ExtractShaodwRHS,
1036	ExtractFCmp, ExtractShadowFCmp);
1037	}
1038	} else {
1039	EmitFailCall (LHS, RHS, ShadowLHS, ShadowRHS, &FCmp, ShadowFCmp);
1040	}
1041	FailBuilder.CreateBr(Dest: NextBB);
1042
1043	++NumInstrumentedFCmp;
1044	}
1045
1046	// Creates a shadow phi value for any phi that defines a value of FT type.
1047	PHINode *NumericalStabilitySanitizer::maybeCreateShadowPhi(
1048	PHINode &Phi, const TargetLibraryInfo &TLI) {
1049	Type *VT = Phi.getType();
1050	Type *ExtendedVT = Config.getExtendedFPType(FT: VT);
1051	if (ExtendedVT == nullptr)
1052	return nullptr; // Not an FT value.
1053	// The phi operands are shadow values and are not available when the phi is
1054	// created. They will be populated in a final phase, once all shadow values
1055	// have been created.
1056	PHINode *Shadow = PHINode::Create(Ty: ExtendedVT, NumReservedValues: Phi.getNumIncomingValues());
1057	Shadow->insertAfter(InsertPos: &Phi);
1058	return Shadow;
1059	}
1060
1061	Value NumericalStabilitySanitizer::handleLoad(LoadInst &Load, Type VT,
1062	Type *ExtendedVT) {
1063	IRBuilder<> Builder(Load.getNextNode());
1064	Builder.SetCurrentDebugLocation(Load.getDebugLoc());
1065	if (addrPointsToConstantData(Addr: Load.getPointerOperand())) {
1066	// No need to look into the shadow memory, the value is a constant. Just
1067	// convert from FT to 2FT.
1068	return Builder.CreateFPExt(V: &Load, DestTy: ExtendedVT);
1069	}
1070
1071	// if (%shadowptr == &)
1072	// %shadow = fpext %v
1073	// else
1074	// %shadow = load (ptrcast %shadow_ptr))
1075	// Considered options here:
1076	// - Have `NsanGetShadowPtrForLoad` return a fixed address
1077	// &__nsan_unknown_value_shadow_address that is valid to load from, and
1078	// use a select. This has the advantage that the generated IR is simpler.
1079	// - Have `NsanGetShadowPtrForLoad` return nullptr. Because `select` does
1080	// not short-circuit, dereferencing the returned pointer is no longer an
1081	// option, have to split and create a separate basic block. This has the
1082	// advantage of being easier to debug because it crashes if we ever mess
1083	// up.
1084
1085	const auto Extents = getMemoryExtentsOrDie(FT: VT);
1086	Value *ShadowPtr = Builder.CreateCall(
1087	Callee: NsanGetShadowPtrForLoad[Extents.ValueType],
1088	Args: {Load.getPointerOperand(), ConstantInt::get(Ty: IntptrTy, V: Extents.NumElts)});
1089	++NumInstrumentedFTLoads;
1090
1091	// Split the basic block.
1092	BasicBlock *LoadBB = Load.getParent();
1093	BasicBlock *NextBB = LoadBB->splitBasicBlock(I: Builder.GetInsertPoint());
1094	// Create the two options for creating the shadow value.
1095	BasicBlock *ShadowLoadBB =
1096	BasicBlock::Create(Context, Name: "", Parent: LoadBB->getParent(), InsertBefore: NextBB);
1097	BasicBlock *FExtBB =
1098	BasicBlock::Create(Context, Name: "", Parent: LoadBB->getParent(), InsertBefore: NextBB);
1099
1100	// Replace the newly created terminator unconditional branch by a conditional
1101	// branch to one of the options.
1102	{
1103	LoadBB->back().eraseFromParent();
1104	IRBuilder<> LoadBBBuilder(LoadBB); // The old builder has been invalidated.
1105	LoadBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1106	LoadBBBuilder.CreateCondBr(Cond: LoadBBBuilder.CreateIsNull(Arg: ShadowPtr), True: FExtBB,
1107	False: ShadowLoadBB);
1108	}
1109
1110	// Fill in ShadowLoadBB.
1111	IRBuilder<> ShadowLoadBBBuilder(ShadowLoadBB);
1112	ShadowLoadBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1113	Value *ShadowLoad = ShadowLoadBBBuilder.CreateAlignedLoad(
1114	Ty: ExtendedVT, Ptr: ShadowPtr, Align: Align (`1`), isVolatile: Load.isVolatile());
1115	if (ClCheckLoads) {
1116	ShadowLoad = emitCheck(V: &Load, ShadowV: ShadowLoad, Builder&: ShadowLoadBBBuilder,
1117	Loc: CheckLoc::makeLoad(Address: Load.getPointerOperand()));
1118	}
1119	ShadowLoadBBBuilder.CreateBr(Dest: NextBB);
1120
1121	// Fill in FExtBB.
1122	IRBuilder<> FExtBBBuilder(FExtBB);
1123	FExtBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1124	Value *FExt = FExtBBBuilder.CreateFPExt(V: &Load, DestTy: ExtendedVT);
1125	FExtBBBuilder.CreateBr(Dest: NextBB);
1126
1127	// The shadow value come from any of the options.
1128	IRBuilder<> NextBBBuilder(&*NextBB->begin());
1129	NextBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1130	PHINode *ShadowPhi = NextBBBuilder.CreatePHI(Ty: ExtendedVT, NumReservedValues: `2`);
1131	ShadowPhi->addIncoming(V: ShadowLoad, BB: ShadowLoadBB);
1132	ShadowPhi->addIncoming(V: FExt, BB: FExtBB);
1133	return ShadowPhi;
1134	}
1135
1136	Value NumericalStabilitySanitizer::handleTrunc(const* FPTruncInst &Trunc,
1137	Type VT, Type ExtendedVT,
1138	const ValueToShadowMap &Map,
1139	IRBuilder<> &Builder) {
1140	Value *OrigSource = Trunc.getOperand(i_nocapture: `0`);
1141	Type *OrigSourceTy = OrigSource->getType();
1142	Type *ExtendedSourceTy = Config.getExtendedFPType(FT: OrigSourceTy);
1143
1144	// When truncating:
1145	// - (A) If the source has a shadow, we truncate from the shadow, else we
1146	// truncate from the original source.
1147	// - (B) If the shadow of the source is larger than the shadow of the dest,
1148	// we still need a truncate. Else, the shadow of the source is the same
1149	// type as the shadow of the dest (because mappings are non-decreasing), so
1150	// we don't need to emit a truncate.
1151	// Examples,
1152	// with a mapping of {f32->f64;f64->f80;f80->f128}
1153	// fptrunc double %1 to float -> fptrunc x86_fp80 s(%1) to double
1154	// fptrunc x86_fp80 %1 to float -> fptrunc fp128 s(%1) to double
1155	// fptrunc fp128 %1 to float -> fptrunc fp128 %1 to double
1156	// fptrunc x86_fp80 %1 to double -> x86_fp80 s(%1)
1157	// fptrunc fp128 %1 to double -> fptrunc fp128 %1 to x86_fp80
1158	// fptrunc fp128 %1 to x86_fp80 -> fp128 %1
1159	// with a mapping of {f32->f64;f64->f128;f80->f128}
1160	// fptrunc double %1 to float -> fptrunc fp128 s(%1) to double
1161	// fptrunc x86_fp80 %1 to float -> fptrunc fp128 s(%1) to double
1162	// fptrunc fp128 %1 to float -> fptrunc fp128 %1 to double
1163	// fptrunc x86_fp80 %1 to double -> fp128 %1
1164	// fptrunc fp128 %1 to double -> fp128 %1
1165	// fptrunc fp128 %1 to x86_fp80 -> fp128 %1
1166	// with a mapping of {f32->f32;f64->f32;f80->f64}
1167	// fptrunc double %1 to float -> float s(%1)
1168	// fptrunc x86_fp80 %1 to float -> fptrunc double s(%1) to float
1169	// fptrunc fp128 %1 to float -> fptrunc fp128 %1 to float
1170	// fptrunc x86_fp80 %1 to double -> fptrunc double s(%1) to float
1171	// fptrunc fp128 %1 to double -> fptrunc fp128 %1 to float
1172	// fptrunc fp128 %1 to x86_fp80 -> fptrunc fp128 %1 to double
1173
1174	// See (A) above.
1175	Value *Source = ExtendedSourceTy ? Map.getShadow(V: OrigSource) : OrigSource;
1176	Type *SourceTy = ExtendedSourceTy ? ExtendedSourceTy : OrigSourceTy;
1177	// See (B) above.
1178	if (SourceTy == ExtendedVT)
1179	return Source;
1180
1181	return Builder.CreateFPTrunc(V: Source, DestTy: ExtendedVT);
1182	}
1183
1184	Value NumericalStabilitySanitizer::handleExt(const* FPExtInst &Ext, Type *VT,
1185	Type *ExtendedVT,
1186	const ValueToShadowMap &Map,
1187	IRBuilder<> &Builder) {
1188	Value *OrigSource = Ext.getOperand(i_nocapture: `0`);
1189	Type *OrigSourceTy = OrigSource->getType();
1190	Type *ExtendedSourceTy = Config.getExtendedFPType(FT: OrigSourceTy);
1191	// When extending:
1192	// - (A) If the source has a shadow, we extend from the shadow, else we
1193	// extend from the original source.
1194	// - (B) If the shadow of the dest is larger than the shadow of the source,
1195	// we still need an extend. Else, the shadow of the source is the same
1196	// type as the shadow of the dest (because mappings are non-decreasing), so
1197	// we don't need to emit an extend.
1198	// Examples,
1199	// with a mapping of {f32->f64;f64->f80;f80->f128}
1200	// fpext half %1 to float -> fpext half %1 to double
1201	// fpext half %1 to double -> fpext half %1 to x86_fp80
1202	// fpext half %1 to x86_fp80 -> fpext half %1 to fp128
1203	// fpext float %1 to double -> double s(%1)
1204	// fpext float %1 to x86_fp80 -> fpext double s(%1) to fp128
1205	// fpext double %1 to x86_fp80 -> fpext x86_fp80 s(%1) to fp128
1206	// with a mapping of {f32->f64;f64->f128;f80->f128}
1207	// fpext half %1 to float -> fpext half %1 to double
1208	// fpext half %1 to double -> fpext half %1 to fp128
1209	// fpext half %1 to x86_fp80 -> fpext half %1 to fp128
1210	// fpext float %1 to double -> fpext double s(%1) to fp128
1211	// fpext float %1 to x86_fp80 -> fpext double s(%1) to fp128
1212	// fpext double %1 to x86_fp80 -> fp128 s(%1)
1213	// with a mapping of {f32->f32;f64->f32;f80->f64}
1214	// fpext half %1 to float -> fpext half %1 to float
1215	// fpext half %1 to double -> fpext half %1 to float
1216	// fpext half %1 to x86_fp80 -> fpext half %1 to double
1217	// fpext float %1 to double -> s(%1)
1218	// fpext float %1 to x86_fp80 -> fpext float s(%1) to double
1219	// fpext double %1 to x86_fp80 -> fpext float s(%1) to double
1220
1221	// See (A) above.
1222	Value *Source = ExtendedSourceTy ? Map.getShadow(V: OrigSource) : OrigSource;
1223	Type *SourceTy = ExtendedSourceTy ? ExtendedSourceTy : OrigSourceTy;
1224	// See (B) above.
1225	if (SourceTy == ExtendedVT)
1226	return Source;
1227
1228	return Builder.CreateFPExt(V: Source, DestTy: ExtendedVT);
1229	}
1230
1231	namespace {
1232	// TODO: This should be tablegen-ed.
1233	struct KnownIntrinsic {
1234	struct WidenedIntrinsic {
1235	const char *NarrowName;
1236	Intrinsic::ID ID; // wide id.
1237	using FnTypeFactory = FunctionType ()(LLVMContext &);
1238	FnTypeFactory MakeFnTy;
1239	};
1240
1241	static const char *get(LibFunc LFunc);
1242
1243	// Given an intrinsic with an `FT` argument, try to find a wider intrinsic
1244	// that applies the same operation on the shadow argument.
1245	// Options are:
1246	// - pass in the ID and full function type,
1247	// - pass in the name, which includes the function type through mangling.
1248	static const WidenedIntrinsic *widen(StringRef Name);
1249
1250	private:
1251	struct LFEntry {
1252	LibFunc LFunc;
1253	const char *IntrinsicName;
1254	};
1255	static const LFEntry kLibfuncIntrinsics[];
1256
1257	static const WidenedIntrinsic kWidenedIntrinsics[];
1258	};
1259	} // namespace
1260
1261	static FunctionType *makeDoubleDouble(LLVMContext &C) {
1262	return FunctionType::get(Result: Type::getDoubleTy(C), Params: {Type::getDoubleTy(C)}, isVarArg: false);
1263	}
1264
1265	static FunctionType *makeX86FP80X86FP80(LLVMContext &C) {
1266	return FunctionType::get(Result: Type::getX86_FP80Ty(C), Params: {Type::getX86_FP80Ty(C)},
1267	isVarArg: false);
1268	}
1269
1270	static FunctionType *makeDoubleDoubleI32(LLVMContext &C) {
1271	return FunctionType::get(Result: Type::getDoubleTy(C),
1272	Params: {Type::getDoubleTy(C), Type::getInt32Ty(C)}, isVarArg: false);
1273	}
1274
1275	static FunctionType *makeX86FP80X86FP80I32(LLVMContext &C) {
1276	return FunctionType::get(Result: Type::getX86_FP80Ty(C),
1277	Params: {Type::getX86_FP80Ty(C), Type::getInt32Ty(C)},
1278	isVarArg: false);
1279	}
1280
1281	static FunctionType *makeDoubleDoubleDouble(LLVMContext &C) {
1282	return FunctionType::get(Result: Type::getDoubleTy(C),
1283	Params: {Type::getDoubleTy(C), Type::getDoubleTy(C)}, isVarArg: false);
1284	}
1285
1286	static FunctionType *makeX86FP80X86FP80X86FP80(LLVMContext &C) {
1287	return FunctionType::get(Result: Type::getX86_FP80Ty(C),
1288	Params: {Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C)},
1289	isVarArg: false);
1290	}
1291
1292	static FunctionType *makeDoubleDoubleDoubleDouble(LLVMContext &C) {
1293	return FunctionType::get(
1294	Result: Type::getDoubleTy(C),
1295	Params: {Type::getDoubleTy(C), Type::getDoubleTy(C), Type::getDoubleTy(C)},
1296	isVarArg: false);
1297	}
1298
1299	static FunctionType *makeX86FP80X86FP80X86FP80X86FP80(LLVMContext &C) {
1300	return FunctionType::get(
1301	Result: Type::getX86_FP80Ty(C),
1302	Params: {Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C)},
1303	isVarArg: false);
1304	}
1305
1306	const KnownIntrinsic::WidenedIntrinsic KnownIntrinsic::kWidenedIntrinsics[] = {
1307	// TODO: Right now we ignore vector intrinsics.
1308	// This is hard because we have to model the semantics of the intrinsics,
1309	// e.g. llvm.x86.sse2.min.sd means extract first element, min, insert back.
1310	// Intrinsics that take any non-vector FT types:
1311	// NOTE: Right now because of
1312	// https://github.com/llvm/llvm-project/issues/44744
1313	// for f128 we need to use makeX86FP80X86FP80 (go to a lower precision and
1314	// come back).
1315	{.NarrowName: "llvm.sqrt.f32", .ID: Intrinsic::sqrt, .MakeFnTy: makeDoubleDouble},
1316	{.NarrowName: "llvm.sqrt.f64", .ID: Intrinsic::sqrt, .MakeFnTy: makeX86FP80X86FP80},
1317	{.NarrowName: "llvm.sqrt.f80", .ID: Intrinsic::sqrt, .MakeFnTy: makeX86FP80X86FP80},
1318	{.NarrowName: "llvm.powi.f32", .ID: Intrinsic::powi, .MakeFnTy: makeDoubleDoubleI32},
1319	{.NarrowName: "llvm.powi.f64", .ID: Intrinsic::powi, .MakeFnTy: makeX86FP80X86FP80I32},
1320	{.NarrowName: "llvm.powi.f80", .ID: Intrinsic::powi, .MakeFnTy: makeX86FP80X86FP80I32},
1321	{.NarrowName: "llvm.sin.f32", .ID: Intrinsic::sin, .MakeFnTy: makeDoubleDouble},
1322	{.NarrowName: "llvm.sin.f64", .ID: Intrinsic::sin, .MakeFnTy: makeX86FP80X86FP80},
1323	{.NarrowName: "llvm.sin.f80", .ID: Intrinsic::sin, .MakeFnTy: makeX86FP80X86FP80},
1324	{.NarrowName: "llvm.cos.f32", .ID: Intrinsic::cos, .MakeFnTy: makeDoubleDouble},
1325	{.NarrowName: "llvm.cos.f64", .ID: Intrinsic::cos, .MakeFnTy: makeX86FP80X86FP80},
1326	{.NarrowName: "llvm.cos.f80", .ID: Intrinsic::cos, .MakeFnTy: makeX86FP80X86FP80},
1327	{.NarrowName: "llvm.pow.f32", .ID: Intrinsic::pow, .MakeFnTy: makeDoubleDoubleDouble},
1328	{.NarrowName: "llvm.pow.f64", .ID: Intrinsic::pow, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1329	{.NarrowName: "llvm.pow.f80", .ID: Intrinsic::pow, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1330	{.NarrowName: "llvm.exp.f32", .ID: Intrinsic::exp, .MakeFnTy: makeDoubleDouble},
1331	{.NarrowName: "llvm.exp.f64", .ID: Intrinsic::exp, .MakeFnTy: makeX86FP80X86FP80},
1332	{.NarrowName: "llvm.exp.f80", .ID: Intrinsic::exp, .MakeFnTy: makeX86FP80X86FP80},
1333	{.NarrowName: "llvm.exp2.f32", .ID: Intrinsic::exp2, .MakeFnTy: makeDoubleDouble},
1334	{.NarrowName: "llvm.exp2.f64", .ID: Intrinsic::exp2, .MakeFnTy: makeX86FP80X86FP80},
1335	{.NarrowName: "llvm.exp2.f80", .ID: Intrinsic::exp2, .MakeFnTy: makeX86FP80X86FP80},
1336	{.NarrowName: "llvm.log.f32", .ID: Intrinsic::log, .MakeFnTy: makeDoubleDouble},
1337	{.NarrowName: "llvm.log.f64", .ID: Intrinsic::log, .MakeFnTy: makeX86FP80X86FP80},
1338	{.NarrowName: "llvm.log.f80", .ID: Intrinsic::log, .MakeFnTy: makeX86FP80X86FP80},
1339	{.NarrowName: "llvm.log10.f32", .ID: Intrinsic::log10, .MakeFnTy: makeDoubleDouble},
1340	{.NarrowName: "llvm.log10.f64", .ID: Intrinsic::log10, .MakeFnTy: makeX86FP80X86FP80},
1341	{.NarrowName: "llvm.log10.f80", .ID: Intrinsic::log10, .MakeFnTy: makeX86FP80X86FP80},
1342	{.NarrowName: "llvm.log2.f32", .ID: Intrinsic::log2, .MakeFnTy: makeDoubleDouble},
1343	{.NarrowName: "llvm.log2.f64", .ID: Intrinsic::log2, .MakeFnTy: makeX86FP80X86FP80},
1344	{.NarrowName: "llvm.log2.f80", .ID: Intrinsic::log2, .MakeFnTy: makeX86FP80X86FP80},
1345	{.NarrowName: "llvm.fma.f32", .ID: Intrinsic::fma, .MakeFnTy: makeDoubleDoubleDoubleDouble},
1346
1347	{.NarrowName: "llvm.fmuladd.f32", .ID: Intrinsic::fmuladd, .MakeFnTy: makeDoubleDoubleDoubleDouble},
1348
1349	{.NarrowName: "llvm.fma.f64", .ID: Intrinsic::fma, .MakeFnTy: makeX86FP80X86FP80X86FP80X86FP80},
1350
1351	{.NarrowName: "llvm.fmuladd.f64", .ID: Intrinsic::fma, .MakeFnTy: makeX86FP80X86FP80X86FP80X86FP80},
1352
1353	{.NarrowName: "llvm.fma.f80", .ID: Intrinsic::fma, .MakeFnTy: makeX86FP80X86FP80X86FP80X86FP80},
1354	{.NarrowName: "llvm.fabs.f32", .ID: Intrinsic::fabs, .MakeFnTy: makeDoubleDouble},
1355	{.NarrowName: "llvm.fabs.f64", .ID: Intrinsic::fabs, .MakeFnTy: makeX86FP80X86FP80},
1356	{.NarrowName: "llvm.fabs.f80", .ID: Intrinsic::fabs, .MakeFnTy: makeX86FP80X86FP80},
1357	{.NarrowName: "llvm.minnum.f32", .ID: Intrinsic::minnum, .MakeFnTy: makeDoubleDoubleDouble},
1358	{.NarrowName: "llvm.minnum.f64", .ID: Intrinsic::minnum, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1359	{.NarrowName: "llvm.minnum.f80", .ID: Intrinsic::minnum, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1360	{.NarrowName: "llvm.maxnum.f32", .ID: Intrinsic::maxnum, .MakeFnTy: makeDoubleDoubleDouble},
1361	{.NarrowName: "llvm.maxnum.f64", .ID: Intrinsic::maxnum, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1362	{.NarrowName: "llvm.maxnum.f80", .ID: Intrinsic::maxnum, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1363	{.NarrowName: "llvm.minimum.f32", .ID: Intrinsic::minimum, .MakeFnTy: makeDoubleDoubleDouble},
1364	{.NarrowName: "llvm.minimum.f64", .ID: Intrinsic::minimum, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1365	{.NarrowName: "llvm.minimum.f80", .ID: Intrinsic::minimum, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1366	{.NarrowName: "llvm.maximum.f32", .ID: Intrinsic::maximum, .MakeFnTy: makeDoubleDoubleDouble},
1367	{.NarrowName: "llvm.maximum.f64", .ID: Intrinsic::maximum, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1368	{.NarrowName: "llvm.maximum.f80", .ID: Intrinsic::maximum, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1369	{.NarrowName: "llvm.copysign.f32", .ID: Intrinsic::copysign, .MakeFnTy: makeDoubleDoubleDouble},
1370	{.NarrowName: "llvm.copysign.f64", .ID: Intrinsic::copysign, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1371	{.NarrowName: "llvm.copysign.f80", .ID: Intrinsic::copysign, .MakeFnTy: makeX86FP80X86FP80X86FP80},
1372	{.NarrowName: "llvm.floor.f32", .ID: Intrinsic::floor, .MakeFnTy: makeDoubleDouble},
1373	{.NarrowName: "llvm.floor.f64", .ID: Intrinsic::floor, .MakeFnTy: makeX86FP80X86FP80},
1374	{.NarrowName: "llvm.floor.f80", .ID: Intrinsic::floor, .MakeFnTy: makeX86FP80X86FP80},
1375	{.NarrowName: "llvm.ceil.f32", .ID: Intrinsic::ceil, .MakeFnTy: makeDoubleDouble},
1376	{.NarrowName: "llvm.ceil.f64", .ID: Intrinsic::ceil, .MakeFnTy: makeX86FP80X86FP80},
1377	{.NarrowName: "llvm.ceil.f80", .ID: Intrinsic::ceil, .MakeFnTy: makeX86FP80X86FP80},
1378	{.NarrowName: "llvm.trunc.f32", .ID: Intrinsic::trunc, .MakeFnTy: makeDoubleDouble},
1379	{.NarrowName: "llvm.trunc.f64", .ID: Intrinsic::trunc, .MakeFnTy: makeX86FP80X86FP80},
1380	{.NarrowName: "llvm.trunc.f80", .ID: Intrinsic::trunc, .MakeFnTy: makeX86FP80X86FP80},
1381	{.NarrowName: "llvm.rint.f32", .ID: Intrinsic::rint, .MakeFnTy: makeDoubleDouble},
1382	{.NarrowName: "llvm.rint.f64", .ID: Intrinsic::rint, .MakeFnTy: makeX86FP80X86FP80},
1383	{.NarrowName: "llvm.rint.f80", .ID: Intrinsic::rint, .MakeFnTy: makeX86FP80X86FP80},
1384	{.NarrowName: "llvm.nearbyint.f32", .ID: Intrinsic::nearbyint, .MakeFnTy: makeDoubleDouble},
1385	{.NarrowName: "llvm.nearbyint.f64", .ID: Intrinsic::nearbyint, .MakeFnTy: makeX86FP80X86FP80},
1386	{.NarrowName: "llvm.nearbyin80f64", .ID: Intrinsic::nearbyint, .MakeFnTy: makeX86FP80X86FP80},
1387	{.NarrowName: "llvm.round.f32", .ID: Intrinsic::round, .MakeFnTy: makeDoubleDouble},
1388	{.NarrowName: "llvm.round.f64", .ID: Intrinsic::round, .MakeFnTy: makeX86FP80X86FP80},
1389	{.NarrowName: "llvm.round.f80", .ID: Intrinsic::round, .MakeFnTy: makeX86FP80X86FP80},
1390	{.NarrowName: "llvm.lround.f32", .ID: Intrinsic::lround, .MakeFnTy: makeDoubleDouble},
1391	{.NarrowName: "llvm.lround.f64", .ID: Intrinsic::lround, .MakeFnTy: makeX86FP80X86FP80},
1392	{.NarrowName: "llvm.lround.f80", .ID: Intrinsic::lround, .MakeFnTy: makeX86FP80X86FP80},
1393	{.NarrowName: "llvm.llround.f32", .ID: Intrinsic::llround, .MakeFnTy: makeDoubleDouble},
1394	{.NarrowName: "llvm.llround.f64", .ID: Intrinsic::llround, .MakeFnTy: makeX86FP80X86FP80},
1395	{.NarrowName: "llvm.llround.f80", .ID: Intrinsic::llround, .MakeFnTy: makeX86FP80X86FP80},
1396	{.NarrowName: "llvm.lrint.f32", .ID: Intrinsic::lrint, .MakeFnTy: makeDoubleDouble},
1397	{.NarrowName: "llvm.lrint.f64", .ID: Intrinsic::lrint, .MakeFnTy: makeX86FP80X86FP80},
1398	{.NarrowName: "llvm.lrint.f80", .ID: Intrinsic::lrint, .MakeFnTy: makeX86FP80X86FP80},
1399	{.NarrowName: "llvm.llrint.f32", .ID: Intrinsic::llrint, .MakeFnTy: makeDoubleDouble},
1400	{.NarrowName: "llvm.llrint.f64", .ID: Intrinsic::llrint, .MakeFnTy: makeX86FP80X86FP80},
1401	{.NarrowName: "llvm.llrint.f80", .ID: Intrinsic::llrint, .MakeFnTy: makeX86FP80X86FP80},
1402	};
1403
1404	const KnownIntrinsic::LFEntry KnownIntrinsic::kLibfuncIntrinsics[] = {
1405	{.LFunc: LibFunc_sqrtf, .IntrinsicName: "llvm.sqrt.f32"},
1406	{.LFunc: LibFunc_sqrt, .IntrinsicName: "llvm.sqrt.f64"},
1407	{.LFunc: LibFunc_sqrtl, .IntrinsicName: "llvm.sqrt.f80"},
1408	{.LFunc: LibFunc_sinf, .IntrinsicName: "llvm.sin.f32"},
1409	{.LFunc: LibFunc_sin, .IntrinsicName: "llvm.sin.f64"},
1410	{.LFunc: LibFunc_sinl, .IntrinsicName: "llvm.sin.f80"},
1411	{.LFunc: LibFunc_cosf, .IntrinsicName: "llvm.cos.f32"},
1412	{.LFunc: LibFunc_cos, .IntrinsicName: "llvm.cos.f64"},
1413	{.LFunc: LibFunc_cosl, .IntrinsicName: "llvm.cos.f80"},
1414	{.LFunc: LibFunc_powf, .IntrinsicName: "llvm.pow.f32"},
1415	{.LFunc: LibFunc_pow, .IntrinsicName: "llvm.pow.f64"},
1416	{.LFunc: LibFunc_powl, .IntrinsicName: "llvm.pow.f80"},
1417	{.LFunc: LibFunc_expf, .IntrinsicName: "llvm.exp.f32"},
1418	{.LFunc: LibFunc_exp, .IntrinsicName: "llvm.exp.f64"},
1419	{.LFunc: LibFunc_expl, .IntrinsicName: "llvm.exp.f80"},
1420	{.LFunc: LibFunc_exp2f, .IntrinsicName: "llvm.exp2.f32"},
1421	{.LFunc: LibFunc_exp2, .IntrinsicName: "llvm.exp2.f64"},
1422	{.LFunc: LibFunc_exp2l, .IntrinsicName: "llvm.exp2.f80"},
1423	{.LFunc: LibFunc_logf, .IntrinsicName: "llvm.log.f32"},
1424	{.LFunc: LibFunc_log, .IntrinsicName: "llvm.log.f64"},
1425	{.LFunc: LibFunc_logl, .IntrinsicName: "llvm.log.f80"},
1426	{.LFunc: LibFunc_log10f, .IntrinsicName: "llvm.log10.f32"},
1427	{.LFunc: LibFunc_log10, .IntrinsicName: "llvm.log10.f64"},
1428	{.LFunc: LibFunc_log10l, .IntrinsicName: "llvm.log10.f80"},
1429	{.LFunc: LibFunc_log2f, .IntrinsicName: "llvm.log2.f32"},
1430	{.LFunc: LibFunc_log2, .IntrinsicName: "llvm.log2.f64"},
1431	{.LFunc: LibFunc_log2l, .IntrinsicName: "llvm.log2.f80"},
1432	{.LFunc: LibFunc_fabsf, .IntrinsicName: "llvm.fabs.f32"},
1433	{.LFunc: LibFunc_fabs, .IntrinsicName: "llvm.fabs.f64"},
1434	{.LFunc: LibFunc_fabsl, .IntrinsicName: "llvm.fabs.f80"},
1435	{.LFunc: LibFunc_copysignf, .IntrinsicName: "llvm.copysign.f32"},
1436	{.LFunc: LibFunc_copysign, .IntrinsicName: "llvm.copysign.f64"},
1437	{.LFunc: LibFunc_copysignl, .IntrinsicName: "llvm.copysign.f80"},
1438	{.LFunc: LibFunc_floorf, .IntrinsicName: "llvm.floor.f32"},
1439	{.LFunc: LibFunc_floor, .IntrinsicName: "llvm.floor.f64"},
1440	{.LFunc: LibFunc_floorl, .IntrinsicName: "llvm.floor.f80"},
1441	{.LFunc: LibFunc_fmaxf, .IntrinsicName: "llvm.maxnum.f32"},
1442	{.LFunc: LibFunc_fmax, .IntrinsicName: "llvm.maxnum.f64"},
1443	{.LFunc: LibFunc_fmaxl, .IntrinsicName: "llvm.maxnum.f80"},
1444	{.LFunc: LibFunc_fminf, .IntrinsicName: "llvm.minnum.f32"},
1445	{.LFunc: LibFunc_fmin, .IntrinsicName: "llvm.minnum.f64"},
1446	{.LFunc: LibFunc_fminl, .IntrinsicName: "llvm.minnum.f80"},
1447	{.LFunc: LibFunc_ceilf, .IntrinsicName: "llvm.ceil.f32"},
1448	{.LFunc: LibFunc_ceil, .IntrinsicName: "llvm.ceil.f64"},
1449	{.LFunc: LibFunc_ceill, .IntrinsicName: "llvm.ceil.f80"},
1450	{.LFunc: LibFunc_truncf, .IntrinsicName: "llvm.trunc.f32"},
1451	{.LFunc: LibFunc_trunc, .IntrinsicName: "llvm.trunc.f64"},
1452	{.LFunc: LibFunc_truncl, .IntrinsicName: "llvm.trunc.f80"},
1453	{.LFunc: LibFunc_rintf, .IntrinsicName: "llvm.rint.f32"},
1454	{.LFunc: LibFunc_rint, .IntrinsicName: "llvm.rint.f64"},
1455	{.LFunc: LibFunc_rintl, .IntrinsicName: "llvm.rint.f80"},
1456	{.LFunc: LibFunc_nearbyintf, .IntrinsicName: "llvm.nearbyint.f32"},
1457	{.LFunc: LibFunc_nearbyint, .IntrinsicName: "llvm.nearbyint.f64"},
1458	{.LFunc: LibFunc_nearbyintl, .IntrinsicName: "llvm.nearbyint.f80"},
1459	{.LFunc: LibFunc_roundf, .IntrinsicName: "llvm.round.f32"},
1460	{.LFunc: LibFunc_round, .IntrinsicName: "llvm.round.f64"},
1461	{.LFunc: LibFunc_roundl, .IntrinsicName: "llvm.round.f80"},
1462	};
1463
1464	const char *KnownIntrinsic::get(LibFunc LFunc) {
1465	for (const auto &E : kLibfuncIntrinsics) {
1466	if (E.LFunc == LFunc)
1467	return E.IntrinsicName;
1468	}
1469	return nullptr;
1470	}
1471
1472	const KnownIntrinsic::WidenedIntrinsic *KnownIntrinsic::widen(StringRef Name) {
1473	for (const auto &E : kWidenedIntrinsics) {
1474	if (E.NarrowName == Name)
1475	return &E;
1476	}
1477	return nullptr;
1478	}
1479
1480	// Returns the name of the LLVM intrinsic corresponding to the given function.
1481	static const char getIntrinsicFromLibfunc(Function &Fn, Type VT,
1482	const TargetLibraryInfo &TLI) {
1483	LibFunc LFunc;
1484	if (!TLI.getLibFunc(FDecl: Fn, F&: LFunc))
1485	return nullptr;
1486
1487	if (const char *Name = KnownIntrinsic::get(LFunc))
1488	return Name;
1489
1490	LLVM_DEBUG(errs() << "TODO: LibFunc: " << TLI.getName(LFunc) << "\n");
1491	return nullptr;
1492	}
1493
1494	// Try to handle a known function call.
1495	Value *NumericalStabilitySanitizer::maybeHandleKnownCallBase(
1496	CallBase &Call, Type VT, Type ExtendedVT, const TargetLibraryInfo &TLI,
1497	const ValueToShadowMap &Map, IRBuilder<> &Builder) {
1498	Function *Fn = Call.getCalledFunction();
1499	if (Fn == nullptr)
1500	return nullptr;
1501
1502	Intrinsic::ID WidenedId = Intrinsic::ID();
1503	FunctionType WidenedFnTy = nullptr*;
1504	if (const auto ID = Fn->getIntrinsicID()) {
1505	const auto *Widened = KnownIntrinsic::widen(Name: Fn->getName());
1506	if (Widened) {
1507	WidenedId = Widened->ID;
1508	WidenedFnTy = Widened->MakeFnTy(Context);
1509	} else {
1510	// If we don't know how to widen the intrinsic, we have no choice but to
1511	// call the non-wide version on a truncated shadow and extend again
1512	// afterwards.
1513	WidenedId = ID;
1514	WidenedFnTy = Fn->getFunctionType();
1515	}
1516	} else if (const char Name = getIntrinsicFromLibfunc(Fn&: Fn, VT, TLI)) {
1517	// We might have a call to a library function that we can replace with a
1518	// wider Intrinsic.
1519	const auto *Widened = KnownIntrinsic::widen(Name);
1520	assert(Widened && "make sure KnownIntrinsic entries are consistent");
1521	WidenedId = Widened->ID;
1522	WidenedFnTy = Widened->MakeFnTy(Context);
1523	} else {
1524	// This is not a known library function or intrinsic.
1525	return nullptr;
1526	}
1527
1528	// Check that the widened intrinsic is valid.
1529	SmallVector<Intrinsic::IITDescriptor, `8`> Table;
1530	getIntrinsicInfoTableEntries(id: WidenedId, T&: Table);
1531	SmallVector<Type *, `4`> ArgTys;
1532	ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
1533	[[maybe_unused]] Intrinsic::MatchIntrinsicTypesResult MatchResult =
1534	Intrinsic::matchIntrinsicSignature(FTy: WidenedFnTy, Infos&: TableRef, ArgTys);
1535	assert(MatchResult == Intrinsic::MatchIntrinsicTypes_Match &&
1536	"invalid widened intrinsic");
1537	// For known intrinsic functions, we create a second call to the same
1538	// intrinsic with a different type.
1539	SmallVector<Value *, `4`> Args;
1540	// The last operand is the intrinsic itself, skip it.
1541	for (unsigned I = `0`, E = Call.getNumOperands() - `1`; I < E; ++I) {
1542	Value *Arg = Call.getOperand(i_nocapture: I);
1543	Type *OrigArgTy = Arg->getType();
1544	Type *IntrinsicArgTy = WidenedFnTy->getParamType(i: I);
1545	if (OrigArgTy == IntrinsicArgTy) {
1546	Args.push_back(Elt: Arg); // The arg is passed as is.
1547	continue;
1548	}
1549	Type *ShadowArgTy = Config.getExtendedFPType(FT: Arg->getType());
1550	assert(ShadowArgTy &&
1551	"don't know how to get the shadow value for a non-FT");
1552	Value *Shadow = Map.getShadow(V: Arg);
1553	if (ShadowArgTy == IntrinsicArgTy) {
1554	// The shadow is the right type for the intrinsic.
1555	assert(Shadow->getType() == ShadowArgTy);
1556	Args.push_back(Elt: Shadow);
1557	continue;
1558	}
1559	// There is no intrinsic with his level of precision, truncate the shadow.
1560	Args.push_back(Elt: Builder.CreateFPTrunc(V: Shadow, DestTy: IntrinsicArgTy));
1561	}
1562	Value *IntrinsicCall = Builder.CreateIntrinsic(ID: WidenedId, Types: ArgTys, Args);
1563	return WidenedFnTy->getReturnType() == ExtendedVT
1564	? IntrinsicCall
1565	: Builder.CreateFPExt(V: IntrinsicCall, DestTy: ExtendedVT);
1566	}
1567
1568	// Handle a CallBase, i.e. a function call, an inline asm sequence, or an
1569	// invoke.
1570	Value NumericalStabilitySanitizer::handleCallBase(CallBase &Call, Type VT,
1571	Type *ExtendedVT,
1572	const TargetLibraryInfo &TLI,
1573	const ValueToShadowMap &Map,
1574	IRBuilder<> &Builder) {
1575	// We cannot look inside inline asm, just expand the result again.
1576	if (Call.isInlineAsm())
1577	return Builder.CreateFPExt(V: &Call, DestTy: ExtendedVT);
1578
1579	// Intrinsics and library functions (e.g. sin, exp) are handled
1580	// specifically, because we know their semantics and can do better than
1581	// blindly calling them (e.g. compute the sinus in the actual shadow domain).
1582	if (Value *V =
1583	maybeHandleKnownCallBase(Call, VT, ExtendedVT, TLI, Map, Builder))
1584	return V;
1585
1586	// If the return tag matches that of the called function, read the extended
1587	// return value from the shadow ret ptr. Else, just extend the return value.
1588	Value *L =
1589	Builder.CreateLoad(Ty: IntptrTy, Ptr: NsanShadowRetTag, /isVolatile=/false);
1590	Value *HasShadowRet = Builder.CreateICmpEQ(
1591	LHS: L, RHS: Builder.CreatePtrToInt(V: Call.getCalledOperand(), DestTy: IntptrTy));
1592
1593	Value *ShadowRetVal = Builder.CreateLoad(
1594	Ty: ExtendedVT,
1595	Ptr: Builder.CreateConstGEP2_64(Ty: NsanShadowRetType, Ptr: NsanShadowRetPtr, Idx0: `0`, Idx1: `0`),
1596	/isVolatile=/false);
1597	Value *Shadow = Builder.CreateSelect(C: HasShadowRet, True: ShadowRetVal,
1598	False: Builder.CreateFPExt(V: &Call, DestTy: ExtendedVT));
1599	++NumInstrumentedFTCalls;
1600	return Shadow;
1601	}
1602
1603	// Creates a shadow value for the given FT value. At that point all operands are
1604	// guaranteed to be available.
1605	Value *NumericalStabilitySanitizer::createShadowValueWithOperandsAvailable(
1606	Instruction &Inst, const TargetLibraryInfo &TLI,
1607	const ValueToShadowMap &Map) {
1608	Type *VT = Inst.getType();
1609	Type *ExtendedVT = Config.getExtendedFPType(FT: VT);
1610	assert(ExtendedVT != nullptr && "trying to create a shadow for a non-FT");
1611
1612	if (auto *Load = dyn_cast<LoadInst>(Val: &Inst))
1613	return handleLoad(Load&: *Load, VT, ExtendedVT);
1614
1615	if (auto *Call = dyn_cast<CallInst>(Val: &Inst)) {
1616	// Insert after the call.
1617	BasicBlock::iterator It(Inst);
1618	IRBuilder<> Builder(Call->getParent(), ++It);
1619	Builder.SetCurrentDebugLocation(Call->getDebugLoc());
1620	return handleCallBase(Call&: *Call, VT, ExtendedVT, TLI, Map, Builder);
1621	}
1622
1623	if (auto *Invoke = dyn_cast<InvokeInst>(Val: &Inst)) {
1624	// The Invoke terminates the basic block, create a new basic block in
1625	// between the successful invoke and the next block.
1626	BasicBlock *InvokeBB = Invoke->getParent();
1627	BasicBlock *NextBB = Invoke->getNormalDest();
1628	BasicBlock *NewBB =
1629	BasicBlock::Create(Context, Name: "", Parent: NextBB->getParent(), InsertBefore: NextBB);
1630	Inst.replaceSuccessorWith(OldBB: NextBB, NewBB);
1631
1632	IRBuilder<> Builder(NewBB);
1633	Builder.SetCurrentDebugLocation(Invoke->getDebugLoc());
1634	Value Shadow = handleCallBase(Call&: Invoke, VT, ExtendedVT, TLI, Map, Builder);
1635	Builder.CreateBr(Dest: NextBB);
1636	NewBB->replaceSuccessorsPhiUsesWith(Old: InvokeBB, New: NewBB);
1637	return Shadow;
1638	}
1639
1640	IRBuilder<> Builder(Inst.getNextNode());
1641	Builder.SetCurrentDebugLocation(Inst.getDebugLoc());
1642
1643	if (auto *Trunc = dyn_cast<FPTruncInst>(Val: &Inst))
1644	return handleTrunc(Trunc: *Trunc, VT, ExtendedVT, Map, Builder);
1645	if (auto *Ext = dyn_cast<FPExtInst>(Val: &Inst))
1646	return handleExt(Ext: *Ext, VT, ExtendedVT, Map, Builder);
1647
1648	if (auto *UnaryOp = dyn_cast<UnaryOperator>(Val: &Inst))
1649	return Builder.CreateUnOp(Opc: UnaryOp->getOpcode(),
1650	V: Map.getShadow(V: UnaryOp->getOperand(i_nocapture: `0`)));
1651
1652	if (auto *BinOp = dyn_cast<BinaryOperator>(Val: &Inst))
1653	return Builder.CreateBinOp(Opc: BinOp->getOpcode(),
1654	LHS: Map.getShadow(V: BinOp->getOperand(i_nocapture: `0`)),
1655	RHS: Map.getShadow(V: BinOp->getOperand(i_nocapture: `1`)));
1656
1657	if (isa<UIToFPInst>(Val: &Inst) \|\| isa<SIToFPInst>(Val: &Inst)) {
1658	auto *Cast = dyn_cast<CastInst>(Val: &Inst);
1659	return Builder.CreateCast(Op: Cast->getOpcode(), V: Cast->getOperand(i_nocapture: `0`),
1660	DestTy: ExtendedVT);
1661	}
1662
1663	if (auto *S = dyn_cast<SelectInst>(Val: &Inst))
1664	return Builder.CreateSelect(C: S->getCondition(),
1665	True: Map.getShadow(V: S->getTrueValue()),
1666	False: Map.getShadow(V: S->getFalseValue()));
1667
1668	if (auto *Extract = dyn_cast<ExtractElementInst>(Val: &Inst))
1669	return Builder.CreateExtractElement(
1670	Vec: Map.getShadow(V: Extract->getVectorOperand()), Idx: Extract->getIndexOperand());
1671
1672	if (auto *Insert = dyn_cast<InsertElementInst>(Val: &Inst))
1673	return Builder.CreateInsertElement(Vec: Map.getShadow(V: Insert->getOperand(i_nocapture: `0`)),
1674	NewElt: Map.getShadow(V: Insert->getOperand(i_nocapture: `1`)),
1675	Idx: Insert->getOperand(i_nocapture: `2`));
1676
1677	if (auto *Shuffle = dyn_cast<ShuffleVectorInst>(Val: &Inst))
1678	return Builder.CreateShuffleVector(V1: Map.getShadow(V: Shuffle->getOperand(i_nocapture: `0`)),
1679	V2: Map.getShadow(V: Shuffle->getOperand(i_nocapture: `1`)),
1680	Mask: Shuffle->getShuffleMask());
1681	// TODO: We could make aggregate object first class citizens. For now we
1682	// just extend the extracted value.
1683	if (auto *Extract = dyn_cast<ExtractValueInst>(Val: &Inst))
1684	return Builder.CreateFPExt(V: Extract, DestTy: ExtendedVT);
1685
1686	if (auto *BC = dyn_cast<BitCastInst>(Val: &Inst))
1687	return Builder.CreateFPExt(V: BC, DestTy: ExtendedVT);
1688
1689	report_fatal_error(reason: "Unimplemented support for " +
1690	Twine (Inst.getOpcodeName()));
1691	}
1692
1693	// Creates a shadow value for an instruction that defines a value of FT type.
1694	// FT operands that do not already have shadow values are created recursively.
1695	// The DFS is guaranteed to not loop as phis and arguments already have
1696	// shadows.
1697	void NumericalStabilitySanitizer::maybeCreateShadowValue(
1698	Instruction &Root, const TargetLibraryInfo &TLI, ValueToShadowMap &Map) {
1699	Type *VT = Root.getType();
1700	Type *ExtendedVT = Config.getExtendedFPType(FT: VT);
1701	if (ExtendedVT == nullptr)
1702	return; // Not an FT value.
1703
1704	if (Map.hasShadow(V: &Root))
1705	return; // Shadow already exists.
1706
1707	assert(!isa<PHINode>(Root) && "phi nodes should already have shadows");
1708
1709	std::vector<Instruction *> DfsStack(`1`, &Root);
1710	while (!DfsStack.empty()) {
1711	// Ensure that all operands to the instruction have shadows before
1712	// proceeding.
1713	Instruction *I = DfsStack.back();
1714	// The shadow for the instruction might have been created deeper in the DFS,
1715	// see `forward_use_with_two_uses` test.
1716	if (Map.hasShadow(V: I)) {
1717	DfsStack.pop_back();
1718	continue;
1719	}
1720
1721	bool MissingShadow = false;
1722	for (Value *Op : I->operands()) {
1723	Type *VT = Op->getType();
1724	if (!Config.getExtendedFPType(FT: VT))
1725	continue; // Not an FT value.
1726	if (Map.hasShadow(V: Op))
1727	continue; // Shadow is already available.
1728	MissingShadow = true;
1729	DfsStack.push_back(x: cast<Instruction>(Val: Op));
1730	}
1731	if (MissingShadow)
1732	continue; // Process operands and come back to this instruction later.
1733
1734	// All operands have shadows. Create a shadow for the current value.
1735	Value Shadow = createShadowValueWithOperandsAvailable(Inst&: I, TLI, Map);
1736	Map.setShadow(V&: I, Shadow&: Shadow);
1737	DfsStack.pop_back();
1738	}
1739	}
1740
1741	// A floating-point store needs its value and type written to shadow memory.
1742	void NumericalStabilitySanitizer::propagateFTStore(
1743	StoreInst &Store, Type VT, Type ExtendedVT, const ValueToShadowMap &Map) {
1744	Value *StoredValue = Store.getValueOperand();
1745	IRBuilder<> Builder(&Store);
1746	Builder.SetCurrentDebugLocation(Store.getDebugLoc());
1747	const auto Extents = getMemoryExtentsOrDie(FT: VT);
1748	Value *ShadowPtr = Builder.CreateCall(
1749	Callee: NsanGetShadowPtrForStore[Extents.ValueType],
1750	Args: {Store.getPointerOperand(), ConstantInt::get(Ty: IntptrTy, V: Extents.NumElts)});
1751
1752	Value *StoredShadow = Map.getShadow(V: StoredValue);
1753	if (!Store.getParent()->getParent()->hasOptNone()) {
1754	// Only check stores when optimizing, because non-optimized code generates
1755	// too many stores to the stack, creating false positives.
1756	if (ClCheckStores) {
1757	StoredShadow = emitCheck(V: StoredValue, ShadowV: StoredShadow, Builder,
1758	Loc: CheckLoc::makeStore(Address: Store.getPointerOperand()));
1759	++NumInstrumentedFTStores;
1760	}
1761	}
1762
1763	Builder.CreateAlignedStore(Val: StoredShadow, Ptr: ShadowPtr, Align: Align (`1`),
1764	isVolatile: Store.isVolatile());
1765	}
1766
1767	// A non-ft store needs to invalidate shadow memory. Exceptions are:
1768	// - memory transfers of floating-point data through other pointer types (llvm
1769	// optimization passes transform `(float)a = (float)b` into
1770	// `(i32)a = (i32)b` ). These have the same semantics as memcpy.
1771	// - Writes of FT-sized constants. LLVM likes to do float stores as bitcasted
1772	// ints. Note that this is not really necessary because if the value is
1773	// unknown the framework will re-extend it on load anyway. It just felt
1774	// easier to debug tests with vectors of FTs.
1775	void NumericalStabilitySanitizer::propagateNonFTStore(
1776	StoreInst &Store, Type VT, const* ValueToShadowMap &Map) {
1777	Value *PtrOp = Store.getPointerOperand();
1778	IRBuilder<> Builder(Store.getNextNode());
1779	Builder.SetCurrentDebugLocation(Store.getDebugLoc());
1780	Value *Dst = PtrOp;
1781	TypeSize SlotSize = DL.getTypeStoreSize(Ty: VT);
1782	assert(!SlotSize.isScalable() && "unsupported");
1783	const auto LoadSizeBytes = SlotSize.getFixedValue();
1784	Value *ValueSize = Constant::getIntegerValue(
1785	Ty: IntptrTy, V: APInt (IntptrTy->getPrimitiveSizeInBits(), LoadSizeBytes));
1786
1787	++NumInstrumentedNonFTStores;
1788	Value *StoredValue = Store.getValueOperand();
1789	if (LoadInst *Load = dyn_cast<LoadInst>(Val: StoredValue)) {
1790	// TODO: Handle the case when the value is from a phi.
1791	// This is a memory transfer with memcpy semantics. Copy the type and
1792	// value from the source. Note that we cannot use __nsan_copy_values()
1793	// here, because that will not work when there is a write to memory in
1794	// between the load and the store, e.g. in the case of a swap.
1795	Type ShadowTypeIntTy = Type::getIntNTy(C&: Context, N: `8` LoadSizeBytes);
1796	Type *ShadowValueIntTy =
1797	Type::getIntNTy(C&: Context, N: `8` * kShadowScale * LoadSizeBytes);
1798	IRBuilder<> LoadBuilder(Load->getNextNode());
1799	Builder.SetCurrentDebugLocation(Store.getDebugLoc());
1800	Value *LoadSrc = Load->getPointerOperand();
1801	// Read the shadow type and value at load time. The type has the same size
1802	// as the FT value, the value has twice its size.
1803	// TODO: cache them to avoid re-creating them when a load is used by
1804	// several stores. Maybe create them like the FT shadows when a load is
1805	// encountered.
1806	Value *RawShadowType = LoadBuilder.CreateAlignedLoad(
1807	Ty: ShadowTypeIntTy,
1808	Ptr: LoadBuilder.CreateCall(Callee: NsanGetRawShadowTypePtr, Args: {LoadSrc}), Align: Align (`1`),
1809	/isVolatile=/false);
1810	Value *RawShadowValue = LoadBuilder.CreateAlignedLoad(
1811	Ty: ShadowValueIntTy,
1812	Ptr: LoadBuilder.CreateCall(Callee: NsanGetRawShadowPtr, Args: {LoadSrc}), Align: Align (`1`),
1813	/isVolatile=/false);
1814
1815	// Write back the shadow type and value at store time.
1816	Builder.CreateAlignedStore(
1817	Val: RawShadowType, Ptr: Builder.CreateCall(Callee: NsanGetRawShadowTypePtr, Args: {Dst}),
1818	Align: Align (`1`),
1819	/isVolatile=/false);
1820	Builder.CreateAlignedStore(Val: RawShadowValue,
1821	Ptr: Builder.CreateCall(Callee: NsanGetRawShadowPtr, Args: {Dst}),
1822	Align: Align (`1`),
1823	/isVolatile=/false);
1824
1825	++NumInstrumentedNonFTMemcpyStores;
1826	return;
1827	}
1828	// ClPropagateNonFTConstStoresAsFT is by default false.
1829	if (Constant *C; ClPropagateNonFTConstStoresAsFT &&
1830	(C = dyn_cast<Constant>(Val: StoredValue))) {
1831	// This might be a fp constant stored as an int. Bitcast and store if it has
1832	// appropriate size.
1833	Type BitcastTy = nullptr; // The FT type to bitcast to.*
1834	if (auto *CInt = dyn_cast<ConstantInt>(Val: C)) {
1835	switch (CInt->getType()->getScalarSizeInBits()) {
1836	case `32`:
1837	BitcastTy = Type::getFloatTy(C&: Context);
1838	break;
1839	case `64`:
1840	BitcastTy = Type::getDoubleTy(C&: Context);
1841	break;
1842	case `80`:
1843	BitcastTy = Type::getX86_FP80Ty(C&: Context);
1844	break;
1845	default:
1846	break;
1847	}
1848	} else if (auto *CDV = dyn_cast<ConstantDataVector>(Val: C)) {
1849	const int NumElements =
1850	cast<VectorType>(Val: CDV->getType())->getElementCount().getFixedValue();
1851	switch (CDV->getType()->getScalarSizeInBits()) {
1852	case `32`:
1853	BitcastTy =
1854	VectorType::get(ElementType: Type::getFloatTy(C&: Context), NumElements, Scalable: false);
1855	break;
1856	case `64`:
1857	BitcastTy =
1858	VectorType::get(ElementType: Type::getDoubleTy(C&: Context), NumElements, Scalable: false);
1859	break;
1860	case `80`:
1861	BitcastTy =
1862	VectorType::get(ElementType: Type::getX86_FP80Ty(C&: Context), NumElements, Scalable: false);
1863	break;
1864	default:
1865	break;
1866	}
1867	}
1868	if (BitcastTy) {
1869	const MemoryExtents Extents = getMemoryExtentsOrDie(FT: BitcastTy);
1870	Value *ShadowPtr = Builder.CreateCall(
1871	Callee: NsanGetShadowPtrForStore[Extents.ValueType],
1872	Args: {PtrOp, ConstantInt::get(Ty: IntptrTy, V: Extents.NumElts)});
1873	// Bitcast the integer value to the appropriate FT type and extend to 2FT.
1874	Type *ExtVT = Config.getExtendedFPType(FT: BitcastTy);
1875	Value *Shadow =
1876	Builder.CreateFPExt(V: Builder.CreateBitCast(V: C, DestTy: BitcastTy), DestTy: ExtVT);
1877	Builder.CreateAlignedStore(Val: Shadow, Ptr: ShadowPtr, Align: Align (`1`),
1878	isVolatile: Store.isVolatile());
1879	return;
1880	}
1881	}
1882	// All other stores just reset the shadow value to unknown.
1883	Builder.CreateCall(Callee: NsanSetValueUnknown, Args: {Dst, ValueSize});
1884	}
1885
1886	void NumericalStabilitySanitizer::propagateShadowValues(
1887	Instruction &Inst, const TargetLibraryInfo &TLI,
1888	const ValueToShadowMap &Map) {
1889	if (auto *Store = dyn_cast<StoreInst>(Val: &Inst)) {
1890	Value *StoredValue = Store->getValueOperand();
1891	Type *VT = StoredValue->getType();
1892	Type *ExtendedVT = Config.getExtendedFPType(FT: VT);
1893	if (ExtendedVT == nullptr)
1894	return propagateNonFTStore(Store&: *Store, VT, Map);
1895	return propagateFTStore(Store&: *Store, VT, ExtendedVT, Map);
1896	}
1897
1898	if (auto *FCmp = dyn_cast<FCmpInst>(Val: &Inst)) {
1899	emitFCmpCheck(FCmp&: *FCmp, Map);
1900	return;
1901	}
1902
1903	if (auto *CB = dyn_cast<CallBase>(Val: &Inst)) {
1904	maybeAddSuffixForNsanInterface(CI: CB);
1905	if (CallInst *CI = dyn_cast<CallInst>(Val: &Inst))
1906	maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI: &TLI);
1907	if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Val: &Inst)) {
1908	instrumentMemIntrinsic(MI);
1909	return;
1910	}
1911	populateShadowStack(CI&: *CB, TLI, Map);
1912	return;
1913	}
1914
1915	if (auto *RetInst = dyn_cast<ReturnInst>(Val: &Inst)) {
1916	if (!ClCheckRet)
1917	return;
1918
1919	Value *RV = RetInst->getReturnValue();
1920	if (RV == nullptr)
1921	return; // This is a `ret void`.
1922	Type *VT = RV->getType();
1923	Type *ExtendedVT = Config.getExtendedFPType(FT: VT);
1924	if (ExtendedVT == nullptr)
1925	return; // Not an FT ret.
1926	Value *RVShadow = Map.getShadow(V: RV);
1927	IRBuilder<> Builder(RetInst);
1928
1929	RVShadow = emitCheck(V: RV, ShadowV: RVShadow, Builder, Loc: CheckLoc::makeRet());
1930	++NumInstrumentedFTRets;
1931	// Store tag.
1932	Value *FnAddr =
1933	Builder.CreatePtrToInt(V: Inst.getParent()->getParent(), DestTy: IntptrTy);
1934	Builder.CreateStore(Val: FnAddr, Ptr: NsanShadowRetTag);
1935	// Store value.
1936	Value *ShadowRetValPtr =
1937	Builder.CreateConstGEP2_64(Ty: NsanShadowRetType, Ptr: NsanShadowRetPtr, Idx0: `0`, Idx1: `0`);
1938	Builder.CreateStore(Val: RVShadow, Ptr: ShadowRetValPtr);
1939	return;
1940	}
1941
1942	if (InsertValueInst *Insert = dyn_cast<InsertValueInst>(Val: &Inst)) {
1943	Value *V = Insert->getOperand(i_nocapture: `1`);
1944	Type *VT = V->getType();
1945	Type *ExtendedVT = Config.getExtendedFPType(FT: VT);
1946	if (ExtendedVT == nullptr)
1947	return;
1948	IRBuilder<> Builder(Insert);
1949	emitCheck(V, ShadowV: Map.getShadow(V), Builder, Loc: CheckLoc::makeInsert());
1950	return;
1951	}
1952	}
1953
1954	// Moves fast math flags from the function to individual instructions, and
1955	// removes the attribute from the function.
1956	// TODO: Make this controllable with a flag.
1957	static void moveFastMathFlags(Function &F,
1958	std::vector<Instruction *> &Instructions) {
1959	FastMathFlags FMF;
1960	#define MOVE_FLAG(attr, setter) \
1961	if (F.getFnAttribute(attr).getValueAsString() == "true") { \
1962	F.removeFnAttr(attr); \
1963	FMF.set##setter(); \
1964	}
1965	MOVE_FLAG("unsafe-fp-math", Fast)
1966	MOVE_FLAG("no-infs-fp-math", NoInfs)
1967	MOVE_FLAG("no-nans-fp-math", NoNaNs)
1968	MOVE_FLAG("no-signed-zeros-fp-math", NoSignedZeros)
1969	#undef MOVE_FLAG
1970
1971	for (Instruction *I : Instructions)
1972	if (isa<FPMathOperator>(Val: I))
1973	I->setFastMathFlags(FMF);
1974	}
1975
1976	bool NumericalStabilitySanitizer::sanitizeFunction(
1977	Function &F, const TargetLibraryInfo &TLI) {
1978	if (!F.hasFnAttribute(Kind: Attribute::SanitizeNumericalStability))
1979	return false;
1980
1981	// This is required to prevent instrumenting call to __nsan_init from within
1982	// the module constructor.
1983	if (F.getName() == kNsanModuleCtorName)
1984	return false;
1985	SmallVector<Instruction *, `8`> AllLoadsAndStores;
1986	SmallVector<Instruction *, `8`> LocalLoadsAndStores;
1987
1988	// The instrumentation maintains:
1989	// - for each IR value `v` of floating-point (or vector floating-point) type
1990	// FT, a shadow IR value `s(v)` with twice the precision 2FT (e.g.
1991	// double for float and f128 for double).
1992	// - A shadow memory, which stores `s(v)` for any `v` that has been stored,
1993	// along with a shadow memory tag, which stores whether the value in the
1994	// corresponding shadow memory is valid. Note that this might be
1995	// incorrect if a non-instrumented function stores to memory, or if
1996	// memory is stored to through a char pointer.
1997	// - A shadow stack, which holds `s(v)` for any floating-point argument `v`
1998	// of a call to an instrumented function. This allows
1999	// instrumented functions to retrieve the shadow values for their
2000	// arguments.
2001	// Because instrumented functions can be called from non-instrumented
2002	// functions, the stack needs to include a tag so that the instrumented
2003	// function knows whether shadow values are available for their
2004	// parameters (i.e. whether is was called by an instrumented function).
2005	// When shadow arguments are not available, they have to be recreated by
2006	// extending the precision of the non-shadow arguments to the non-shadow
2007	// value. Non-instrumented functions do not modify (or even know about) the
2008	// shadow stack. The shadow stack pointer is __nsan_shadow_args. The shadow
2009	// stack tag is __nsan_shadow_args_tag. The tag is any unique identifier
2010	// for the function (we use the address of the function). Both variables
2011	// are thread local.
2012	// Example:
2013	// calls shadow stack tag shadow stack
2014	// =======================================================================
2015	// non_instrumented_1() 0 0
2016	// \|
2017	// v
2018	// instrumented_2(float a) 0 0
2019	// \|
2020	// v
2021	// instrumented_3(float b, double c) &instrumented_3 s(b),s(c)
2022	// \|
2023	// v
2024	// instrumented_4(float d) &instrumented_4 s(d)
2025	// \|
2026	// v
2027	// non_instrumented_5(float e) &non_instrumented_5 s(e)
2028	// \|
2029	// v
2030	// instrumented_6(float f) &non_instrumented_5 s(e)
2031	//
2032	// On entry, instrumented_2 checks whether the tag corresponds to its
2033	// function ptr.
2034	// Note that functions reset the tag to 0 after reading shadow parameters.
2035	// This ensures that the function does not erroneously read invalid data if
2036	// called twice in the same stack, once from an instrumented function and
2037	// once from an uninstrumented one. For example, in the following example,
2038	// resetting the tag in (A) ensures that (B) does not reuse the same the
2039	// shadow arguments (which would be incorrect).
2040	// instrumented_1(float a)
2041	// \|
2042	// v
2043	// instrumented_2(float b) (A)
2044	// \|
2045	// v
2046	// non_instrumented_3()
2047	// \|
2048	// v
2049	// instrumented_2(float b) (B)
2050	//
2051	// - A shadow return slot. Any function that returns a floating-point value
2052	// places a shadow return value in __nsan_shadow_ret_val. Again, because
2053	// we might be calling non-instrumented functions, this value is guarded
2054	// by __nsan_shadow_ret_tag marker indicating which instrumented function
2055	// placed the value in __nsan_shadow_ret_val, so that the caller can check
2056	// that this corresponds to the callee. Both variables are thread local.
2057	//
2058	// For example, in the following example, the instrumentation in
2059	// `instrumented_1` rejects the shadow return value from `instrumented_3`
2060	// because is is not tagged as expected (`&instrumented_3` instead of
2061	// `non_instrumented_2`):
2062	//
2063	// instrumented_1()
2064	// \|
2065	// v
2066	// float non_instrumented_2()
2067	// \|
2068	// v
2069	// float instrumented_3()
2070	//
2071	// Calls of known math functions (sin, cos, exp, ...) are duplicated to call
2072	// their overload on the shadow type.
2073
2074	// Collect all instructions before processing, as creating shadow values
2075	// creates new instructions inside the function.
2076	std::vector<Instruction *> OriginalInstructions;
2077	for (BasicBlock &BB : F)
2078	for (Instruction &Inst : BB)
2079	OriginalInstructions.emplace_back(args: &Inst);
2080
2081	moveFastMathFlags(F, Instructions&: OriginalInstructions);
2082	ValueToShadowMap ValueToShadow(Config);
2083
2084	// In the first pass, we create shadow values for all FT function arguments
2085	// and all phis. This ensures that the DFS of the next pass does not have
2086	// any loops.
2087	std::vector<PHINode *> OriginalPhis;
2088	createShadowArguments(F, TLI, Map&: ValueToShadow);
2089	for (Instruction *I : OriginalInstructions) {
2090	if (PHINode *Phi = dyn_cast<PHINode>(Val: I)) {
2091	if (PHINode Shadow = maybeCreateShadowPhi(Phi&: Phi, TLI)) {
2092	OriginalPhis.push_back(x: Phi);
2093	ValueToShadow.setShadow(V&: Phi, Shadow&: Shadow);
2094	}
2095	}
2096	}
2097
2098	// Create shadow values for all instructions creating FT values.
2099	for (Instruction *I : OriginalInstructions)
2100	maybeCreateShadowValue(Root&: *I, TLI, Map&: ValueToShadow);
2101
2102	// Propagate shadow values across stores, calls and rets.
2103	for (Instruction *I : OriginalInstructions)
2104	propagateShadowValues(Inst&: *I, TLI, Map: ValueToShadow);
2105
2106	// The last pass populates shadow phis with shadow values.
2107	for (PHINode *Phi : OriginalPhis) {
2108	PHINode *ShadowPhi = dyn_cast<PHINode>(Val: ValueToShadow.getShadow(V: Phi));
2109	for (unsigned I : seq(Size: Phi->getNumOperands())) {
2110	Value *V = Phi->getOperand(i_nocapture: I);
2111	Value *Shadow = ValueToShadow.getShadow(V);
2112	BasicBlock *IncomingBB = Phi->getIncomingBlock(i: I);
2113	// For some instructions (e.g. invoke), we create the shadow in a separate
2114	// block, different from the block where the original value is created.
2115	// In that case, the shadow phi might need to refer to this block instead
2116	// of the original block.
2117	// Note that this can only happen for instructions as constant shadows are
2118	// always created in the same block.
2119	ShadowPhi->addIncoming(V: Shadow, BB: IncomingBB);
2120	}
2121	}
2122
2123	return !ValueToShadow.empty();
2124	}
2125
2126	// Instrument the memory intrinsics so that they properly modify the shadow
2127	// memory.
2128	bool NumericalStabilitySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
2129	IRBuilder<> Builder(MI);
2130	if (auto *M = dyn_cast<MemSetInst>(Val: MI)) {
2131	Builder.CreateCall(
2132	Callee: NsanSetValueUnknown,
2133	Args: {/Address=/M->getArgOperand(i: `0`),
2134	/Size=/Builder.CreateIntCast(V: M->getArgOperand(i: `2`), DestTy: IntptrTy, isSigned: false)});
2135	} else if (auto *M = dyn_cast<MemTransferInst>(Val: MI)) {
2136	Builder.CreateCall(
2137	Callee: NsanCopyValues,
2138	Args: {/Destination=/M->getArgOperand(i: `0`),
2139	/Source=/M->getArgOperand(i: `1`),
2140	/Size=/Builder.CreateIntCast(V: M->getArgOperand(i: `2`), DestTy: IntptrTy, isSigned: false)});
2141	}
2142	return false;
2143	}
2144
2145	void NumericalStabilitySanitizer::maybeAddSuffixForNsanInterface(CallBase *CI) {
2146	Function *Fn = CI->getCalledFunction();
2147	if (Fn == nullptr)
2148	return;
2149
2150	if (!Fn->getName().starts_with(Prefix: "__nsan_"))
2151	return;
2152
2153	if (Fn->getName() == "__nsan_dump_shadow_mem") {
2154	assert(CI->arg_size() == `4` &&
2155	"invalid prototype for __nsan_dump_shadow_mem");
2156	// __nsan_dump_shadow_mem requires an extra parameter with the dynamic
2157	// configuration:
2158	// (shadow_type_id_for_long_double << 16) \| (shadow_type_id_for_double << 8)
2159	// \| shadow_type_id_for_double
2160	const uint64_t shadow_value_type_ids =
2161	(static_cast<size_t>(Config.byValueType(VT: kLongDouble).getNsanTypeId())
2162	<< `16`) \|
2163	(static_cast<size_t>(Config.byValueType(VT: kDouble).getNsanTypeId())
2164	<< `8`) \|
2165	static_cast<size_t>(Config.byValueType(VT: kFloat).getNsanTypeId());
2166	CI->setArgOperand(i: `3`, v: ConstantInt::get(Ty: IntptrTy, V: shadow_value_type_ids));
2167	}
2168	}
2169

Browse the source code of llvm_projects/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp