1//===- MemorySanitizer.cpp - detector of uninitialized reads --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file is a part of MemorySanitizer, a detector of uninitialized
11/// reads.
12///
13/// The algorithm of the tool is similar to Memcheck
14/// (https://static.usenix.org/event/usenix05/tech/general/full_papers/seward/seward_html/usenix2005.html)
15/// We associate a few shadow bits with every byte of the application memory,
16/// poison the shadow of the malloc-ed or alloca-ed memory, load the shadow,
17/// bits on every memory read, propagate the shadow bits through some of the
18/// arithmetic instruction (including MOV), store the shadow bits on every
19/// memory write, report a bug on some other instructions (e.g. JMP) if the
20/// associated shadow is poisoned.
21///
22/// But there are differences too. The first and the major one:
23/// compiler instrumentation instead of binary instrumentation. This
24/// gives us much better register allocation, possible compiler
25/// optimizations and a fast start-up. But this brings the major issue
26/// as well: msan needs to see all program events, including system
27/// calls and reads/writes in system libraries, so we either need to
28/// compile *everything* with msan or use a binary translation
29/// component (e.g. DynamoRIO) to instrument pre-built libraries.
30/// Another difference from Memcheck is that we use 8 shadow bits per
31/// byte of application memory and use a direct shadow mapping. This
32/// greatly simplifies the instrumentation code and avoids races on
33/// shadow updates (Memcheck is single-threaded so races are not a
34/// concern there. Memcheck uses 2 shadow bits per byte with a slow
35/// path storage that uses 8 bits per byte).
36///
37/// The default value of shadow is 0, which means "clean" (not poisoned).
38///
39/// Every module initializer should call __msan_init to ensure that the
40/// shadow memory is ready. On error, __msan_warning is called. Since
41/// parameters and return values may be passed via registers, we have a
42/// specialized thread-local shadow for return values
43/// (__msan_retval_tls) and parameters (__msan_param_tls).
44///
45/// Origin tracking.
46///
47/// MemorySanitizer can track origins (allocation points) of all uninitialized
48/// values. This behavior is controlled with a flag (msan-track-origins) and is
49/// disabled by default.
50///
51/// Origins are 4-byte values created and interpreted by the runtime library.
52/// They are stored in a second shadow mapping, one 4-byte value for 4 bytes
53/// of application memory. Propagation of origins is basically a bunch of
54/// "select" instructions that pick the origin of a dirty argument, if an
55/// instruction has one.
56///
57/// Every 4 aligned, consecutive bytes of application memory have one origin
58/// value associated with them. If these bytes contain uninitialized data
59/// coming from 2 different allocations, the last store wins. Because of this,
60/// MemorySanitizer reports can show unrelated origins, but this is unlikely in
61/// practice.
62///
63/// Origins are meaningless for fully initialized values, so MemorySanitizer
64/// avoids storing origin to memory when a fully initialized value is stored.
65/// This way it avoids needless overwriting origin of the 4-byte region on
66/// a short (i.e. 1 byte) clean store, and it is also good for performance.
67///
68/// Atomic handling.
69///
70/// Ideally, every atomic store of application value should update the
71/// corresponding shadow location in an atomic way. Unfortunately, atomic store
72/// of two disjoint locations can not be done without severe slowdown.
73///
74/// Therefore, we implement an approximation that may err on the safe side.
75/// In this implementation, every atomically accessed location in the program
76/// may only change from (partially) uninitialized to fully initialized, but
77/// not the other way around. We load the shadow _after_ the application load,
78/// and we store the shadow _before_ the app store. Also, we always store clean
79/// shadow (if the application store is atomic). This way, if the store-load
80/// pair constitutes a happens-before arc, shadow store and load are correctly
81/// ordered such that the load will get either the value that was stored, or
82/// some later value (which is always clean).
83///
84/// This does not work very well with Compare-And-Swap (CAS) and
85/// Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW
86/// must store the new shadow before the app operation, and load the shadow
87/// after the app operation. Computers don't work this way. Current
88/// implementation ignores the load aspect of CAS/RMW, always returning a clean
89/// value. It implements the store part as a simple atomic store by storing a
90/// clean shadow.
91///
92/// Instrumenting inline assembly.
93///
94/// For inline assembly code LLVM has little idea about which memory locations
95/// become initialized depending on the arguments. It can be possible to figure
96/// out which arguments are meant to point to inputs and outputs, but the
97/// actual semantics can be only visible at runtime. In the Linux kernel it's
98/// also possible that the arguments only indicate the offset for a base taken
99/// from a segment register, so it's dangerous to treat any asm() arguments as
100/// pointers. We take a conservative approach generating calls to
101/// __msan_instrument_asm_store(ptr, size)
102/// , which defer the memory unpoisoning to the runtime library.
103/// The latter can perform more complex address checks to figure out whether
104/// it's safe to touch the shadow memory.
105/// Like with atomic operations, we call __msan_instrument_asm_store() before
106/// the assembly call, so that changes to the shadow memory will be seen by
107/// other threads together with main memory initialization.
108///
109/// KernelMemorySanitizer (KMSAN) implementation.
110///
111/// The major differences between KMSAN and MSan instrumentation are:
112/// - KMSAN always tracks the origins and implies msan-keep-going=true;
113/// - KMSAN allocates shadow and origin memory for each page separately, so
114/// there are no explicit accesses to shadow and origin in the
115/// instrumentation.
116/// Shadow and origin values for a particular X-byte memory location
117/// (X=1,2,4,8) are accessed through pointers obtained via the
118/// __msan_metadata_ptr_for_load_X(ptr)
119/// __msan_metadata_ptr_for_store_X(ptr)
120/// functions. The corresponding functions check that the X-byte accesses
121/// are possible and returns the pointers to shadow and origin memory.
122/// Arbitrary sized accesses are handled with:
123/// __msan_metadata_ptr_for_load_n(ptr, size)
124/// __msan_metadata_ptr_for_store_n(ptr, size);
125/// Note that the sanitizer code has to deal with how shadow/origin pairs
126/// returned by the these functions are represented in different ABIs. In
127/// the X86_64 ABI they are returned in RDX:RAX, in PowerPC64 they are
128/// returned in r3 and r4, and in the SystemZ ABI they are written to memory
129/// pointed to by a hidden parameter.
130/// - TLS variables are stored in a single per-task struct. A call to a
131/// function __msan_get_context_state() returning a pointer to that struct
132/// is inserted into every instrumented function before the entry block;
133/// - __msan_warning() takes a 32-bit origin parameter;
134/// - local variables are poisoned with __msan_poison_alloca() upon function
135/// entry and unpoisoned with __msan_unpoison_alloca() before leaving the
136/// function;
137/// - the pass doesn't declare any global variables or add global constructors
138/// to the translation unit.
139///
140/// Also, KMSAN currently ignores uninitialized memory passed into inline asm
141/// calls, making sure we're on the safe side wrt. possible false positives.
142///
143/// KernelMemorySanitizer only supports X86_64, SystemZ and PowerPC64 at the
144/// moment.
145///
146//
147// FIXME: This sanitizer does not yet handle scalable vectors
148//
149//===----------------------------------------------------------------------===//
150
151#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
152#include "llvm/ADT/APInt.h"
153#include "llvm/ADT/ArrayRef.h"
154#include "llvm/ADT/DenseMap.h"
155#include "llvm/ADT/DepthFirstIterator.h"
156#include "llvm/ADT/SetVector.h"
157#include "llvm/ADT/SmallPtrSet.h"
158#include "llvm/ADT/SmallVector.h"
159#include "llvm/ADT/StringExtras.h"
160#include "llvm/ADT/StringRef.h"
161#include "llvm/Analysis/GlobalsModRef.h"
162#include "llvm/Analysis/TargetLibraryInfo.h"
163#include "llvm/Analysis/ValueTracking.h"
164#include "llvm/IR/Argument.h"
165#include "llvm/IR/AttributeMask.h"
166#include "llvm/IR/Attributes.h"
167#include "llvm/IR/BasicBlock.h"
168#include "llvm/IR/CallingConv.h"
169#include "llvm/IR/Constant.h"
170#include "llvm/IR/Constants.h"
171#include "llvm/IR/DataLayout.h"
172#include "llvm/IR/DerivedTypes.h"
173#include "llvm/IR/Function.h"
174#include "llvm/IR/GlobalValue.h"
175#include "llvm/IR/GlobalVariable.h"
176#include "llvm/IR/IRBuilder.h"
177#include "llvm/IR/InlineAsm.h"
178#include "llvm/IR/InstVisitor.h"
179#include "llvm/IR/InstrTypes.h"
180#include "llvm/IR/Instruction.h"
181#include "llvm/IR/Instructions.h"
182#include "llvm/IR/IntrinsicInst.h"
183#include "llvm/IR/Intrinsics.h"
184#include "llvm/IR/IntrinsicsAArch64.h"
185#include "llvm/IR/IntrinsicsX86.h"
186#include "llvm/IR/MDBuilder.h"
187#include "llvm/IR/Module.h"
188#include "llvm/IR/Type.h"
189#include "llvm/IR/Value.h"
190#include "llvm/IR/ValueMap.h"
191#include "llvm/Support/Alignment.h"
192#include "llvm/Support/AtomicOrdering.h"
193#include "llvm/Support/Casting.h"
194#include "llvm/Support/CommandLine.h"
195#include "llvm/Support/Debug.h"
196#include "llvm/Support/DebugCounter.h"
197#include "llvm/Support/ErrorHandling.h"
198#include "llvm/Support/MathExtras.h"
199#include "llvm/Support/raw_ostream.h"
200#include "llvm/TargetParser/Triple.h"
201#include "llvm/Transforms/Utils/BasicBlockUtils.h"
202#include "llvm/Transforms/Utils/Instrumentation.h"
203#include "llvm/Transforms/Utils/Local.h"
204#include "llvm/Transforms/Utils/ModuleUtils.h"
205#include <algorithm>
206#include <cassert>
207#include <cstddef>
208#include <cstdint>
209#include <memory>
210#include <numeric>
211#include <string>
212#include <tuple>
213
214using namespace llvm;
215
216#define DEBUG_TYPE "msan"
217
218DEBUG_COUNTER(DebugInsertCheck, "msan-insert-check",
219 "Controls which checks to insert");
220
221DEBUG_COUNTER(DebugInstrumentInstruction, "msan-instrument-instruction",
222 "Controls which instruction to instrument");
223
224static const unsigned kOriginSize = 4;
225static const Align kMinOriginAlignment = Align(4);
226static const Align kShadowTLSAlignment = Align(8);
227
228// These constants must be kept in sync with the ones in msan.h.
229// TODO: increase size to match SVE/SVE2/SME/SME2 limits
230static const unsigned kParamTLSSize = 800;
231static const unsigned kRetvalTLSSize = 800;
232
233// Accesses sizes are powers of two: 1, 2, 4, 8.
234static const size_t kNumberOfAccessSizes = 4;
235
236/// Track origins of uninitialized values.
237///
238/// Adds a section to MemorySanitizer report that points to the allocation
239/// (stack or heap) the uninitialized bits came from originally.
240static cl::opt<int> ClTrackOrigins(
241 "msan-track-origins",
242 cl::desc("Track origins (allocation sites) of poisoned memory"), cl::Hidden,
243 cl::init(Val: 0));
244
245static cl::opt<bool> ClKeepGoing("msan-keep-going",
246 cl::desc("keep going after reporting a UMR"),
247 cl::Hidden, cl::init(Val: false));
248
249static cl::opt<bool>
250 ClPoisonStack("msan-poison-stack",
251 cl::desc("poison uninitialized stack variables"), cl::Hidden,
252 cl::init(Val: true));
253
254static cl::opt<bool> ClPoisonStackWithCall(
255 "msan-poison-stack-with-call",
256 cl::desc("poison uninitialized stack variables with a call"), cl::Hidden,
257 cl::init(Val: false));
258
259static cl::opt<int> ClPoisonStackPattern(
260 "msan-poison-stack-pattern",
261 cl::desc("poison uninitialized stack variables with the given pattern"),
262 cl::Hidden, cl::init(Val: 0xff));
263
264static cl::opt<bool>
265 ClPrintStackNames("msan-print-stack-names",
266 cl::desc("Print name of local stack variable"),
267 cl::Hidden, cl::init(Val: true));
268
269static cl::opt<bool>
270 ClPoisonUndef("msan-poison-undef",
271 cl::desc("Poison fully undef temporary values. "
272 "Partially undefined constant vectors "
273 "are unaffected by this flag (see "
274 "-msan-poison-undef-vectors)."),
275 cl::Hidden, cl::init(Val: true));
276
277static cl::opt<bool> ClPoisonUndefVectors(
278 "msan-poison-undef-vectors",
279 cl::desc("Precisely poison partially undefined constant vectors. "
280 "If false (legacy behavior), the entire vector is "
281 "considered fully initialized, which may lead to false "
282 "negatives. Fully undefined constant vectors are "
283 "unaffected by this flag (see -msan-poison-undef)."),
284 cl::Hidden, cl::init(Val: false));
285
286static cl::opt<bool> ClPreciseDisjointOr(
287 "msan-precise-disjoint-or",
288 cl::desc("Precisely poison disjoint OR. If false (legacy behavior), "
289 "disjointedness is ignored (i.e., 1|1 is initialized)."),
290 cl::Hidden, cl::init(Val: false));
291
292static cl::opt<bool>
293 ClHandleICmp("msan-handle-icmp",
294 cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
295 cl::Hidden, cl::init(Val: true));
296
297static cl::opt<bool>
298 ClHandleICmpExact("msan-handle-icmp-exact",
299 cl::desc("exact handling of relational integer ICmp"),
300 cl::Hidden, cl::init(Val: true));
301
302static cl::opt<int> ClSwitchPrecision(
303 "msan-switch-precision",
304 cl::desc("Controls the number of cases considered by MSan for LLVM switch "
305 "instructions. 0 means no UUMs detected. Higher values lead to "
306 "fewer false negatives but may impact compiler and/or "
307 "application performance. N.B. LLVM switch instructions do not "
308 "correspond exactly to C++ switch statements."),
309 cl::Hidden, cl::init(Val: 99));
310
311static cl::opt<bool> ClHandleLifetimeIntrinsics(
312 "msan-handle-lifetime-intrinsics",
313 cl::desc(
314 "when possible, poison scoped variables at the beginning of the scope "
315 "(slower, but more precise)"),
316 cl::Hidden, cl::init(Val: true));
317
318// When compiling the Linux kernel, we sometimes see false positives related to
319// MSan being unable to understand that inline assembly calls may initialize
320// local variables.
321// This flag makes the compiler conservatively unpoison every memory location
322// passed into an assembly call. Note that this may cause false positives.
323// Because it's impossible to figure out the array sizes, we can only unpoison
324// the first sizeof(type) bytes for each type* pointer.
325static cl::opt<bool> ClHandleAsmConservative(
326 "msan-handle-asm-conservative",
327 cl::desc("conservative handling of inline assembly"), cl::Hidden,
328 cl::init(Val: true));
329
330// This flag controls whether we check the shadow of the address
331// operand of load or store. Such bugs are very rare, since load from
332// a garbage address typically results in SEGV, but still happen
333// (e.g. only lower bits of address are garbage, or the access happens
334// early at program startup where malloc-ed memory is more likely to
335// be zeroed. As of 2012-08-28 this flag adds 20% slowdown.
336static cl::opt<bool> ClCheckAccessAddress(
337 "msan-check-access-address",
338 cl::desc("report accesses through a pointer which has poisoned shadow"),
339 cl::Hidden, cl::init(Val: true));
340
341static cl::opt<bool> ClEagerChecks(
342 "msan-eager-checks",
343 cl::desc("check arguments and return values at function call boundaries"),
344 cl::Hidden, cl::init(Val: false));
345
346static cl::opt<bool> ClDumpStrictInstructions(
347 "msan-dump-strict-instructions",
348 cl::desc("print out instructions with default strict semantics i.e.,"
349 "check that all the inputs are fully initialized, and mark "
350 "the output as fully initialized. These semantics are applied "
351 "to instructions that could not be handled explicitly nor "
352 "heuristically."),
353 cl::Hidden, cl::init(Val: false));
354
355// Currently, all the heuristically handled instructions are specifically
356// IntrinsicInst. However, we use the broader "HeuristicInstructions" name
357// to parallel 'msan-dump-strict-instructions', and to keep the door open to
358// handling non-intrinsic instructions heuristically.
359static cl::opt<bool> ClDumpHeuristicInstructions(
360 "msan-dump-heuristic-instructions",
361 cl::desc("Prints 'unknown' instructions that were handled heuristically. "
362 "Use -msan-dump-strict-instructions to print instructions that "
363 "could not be handled explicitly nor heuristically."),
364 cl::Hidden, cl::init(Val: false));
365
366static cl::opt<int> ClInstrumentationWithCallThreshold(
367 "msan-instrumentation-with-call-threshold",
368 cl::desc(
369 "If the function being instrumented requires more than "
370 "this number of checks and origin stores, use callbacks instead of "
371 "inline checks (-1 means never use callbacks)."),
372 cl::Hidden, cl::init(Val: 3500));
373
374static cl::opt<bool>
375 ClEnableKmsan("msan-kernel",
376 cl::desc("Enable KernelMemorySanitizer instrumentation"),
377 cl::Hidden, cl::init(Val: false));
378
379static cl::opt<bool>
380 ClDisableChecks("msan-disable-checks",
381 cl::desc("Apply no_sanitize to the whole file"), cl::Hidden,
382 cl::init(Val: false));
383
384static cl::opt<bool>
385 ClCheckConstantShadow("msan-check-constant-shadow",
386 cl::desc("Insert checks for constant shadow values"),
387 cl::Hidden, cl::init(Val: true));
388
389// This is off by default because of a bug in gold:
390// https://sourceware.org/bugzilla/show_bug.cgi?id=19002
391static cl::opt<bool>
392 ClWithComdat("msan-with-comdat",
393 cl::desc("Place MSan constructors in comdat sections"),
394 cl::Hidden, cl::init(Val: false));
395
396// These options allow to specify custom memory map parameters
397// See MemoryMapParams for details.
398static cl::opt<uint64_t> ClAndMask("msan-and-mask",
399 cl::desc("Define custom MSan AndMask"),
400 cl::Hidden, cl::init(Val: 0));
401
402static cl::opt<uint64_t> ClXorMask("msan-xor-mask",
403 cl::desc("Define custom MSan XorMask"),
404 cl::Hidden, cl::init(Val: 0));
405
406static cl::opt<uint64_t> ClShadowBase("msan-shadow-base",
407 cl::desc("Define custom MSan ShadowBase"),
408 cl::Hidden, cl::init(Val: 0));
409
410static cl::opt<uint64_t> ClOriginBase("msan-origin-base",
411 cl::desc("Define custom MSan OriginBase"),
412 cl::Hidden, cl::init(Val: 0));
413
414static cl::opt<int>
415 ClDisambiguateWarning("msan-disambiguate-warning-threshold",
416 cl::desc("Define threshold for number of checks per "
417 "debug location to force origin update."),
418 cl::Hidden, cl::init(Val: 3));
419
420const char kMsanModuleCtorName[] = "msan.module_ctor";
421const char kMsanInitName[] = "__msan_init";
422
423namespace {
424
425// Memory map parameters used in application-to-shadow address calculation.
426// Offset = (Addr & ~AndMask) ^ XorMask
427// Shadow = ShadowBase + Offset
428// Origin = OriginBase + Offset
429struct MemoryMapParams {
430 uint64_t AndMask;
431 uint64_t XorMask;
432 uint64_t ShadowBase;
433 uint64_t OriginBase;
434};
435
436struct PlatformMemoryMapParams {
437 const MemoryMapParams *bits32;
438 const MemoryMapParams *bits64;
439};
440
441} // end anonymous namespace
442
443// i386 Linux
444static const MemoryMapParams Linux_I386_MemoryMapParams = {
445 .AndMask: 0x000080000000, // AndMask
446 .XorMask: 0, // XorMask (not used)
447 .ShadowBase: 0, // ShadowBase (not used)
448 .OriginBase: 0x000040000000, // OriginBase
449};
450
451// x86_64 Linux
452static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
453 .AndMask: 0, // AndMask (not used)
454 .XorMask: 0x500000000000, // XorMask
455 .ShadowBase: 0, // ShadowBase (not used)
456 .OriginBase: 0x100000000000, // OriginBase
457};
458
459// mips32 Linux
460// FIXME: Remove -msan-origin-base -msan-and-mask added by PR #109284 to tests
461// after picking good constants
462
463// mips64 Linux
464static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {
465 .AndMask: 0, // AndMask (not used)
466 .XorMask: 0x008000000000, // XorMask
467 .ShadowBase: 0, // ShadowBase (not used)
468 .OriginBase: 0x002000000000, // OriginBase
469};
470
471// ppc32 Linux
472// FIXME: Remove -msan-origin-base -msan-and-mask added by PR #109284 to tests
473// after picking good constants
474
475// ppc64 Linux
476static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = {
477 .AndMask: 0xE00000000000, // AndMask
478 .XorMask: 0x100000000000, // XorMask
479 .ShadowBase: 0x080000000000, // ShadowBase
480 .OriginBase: 0x1C0000000000, // OriginBase
481};
482
483// s390x Linux
484static const MemoryMapParams Linux_S390X_MemoryMapParams = {
485 .AndMask: 0xC00000000000, // AndMask
486 .XorMask: 0, // XorMask (not used)
487 .ShadowBase: 0x080000000000, // ShadowBase
488 .OriginBase: 0x1C0000000000, // OriginBase
489};
490
491// arm32 Linux
492// FIXME: Remove -msan-origin-base -msan-and-mask added by PR #109284 to tests
493// after picking good constants
494
495// aarch64 Linux
496static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
497 .AndMask: 0, // AndMask (not used)
498 .XorMask: 0x0B00000000000, // XorMask
499 .ShadowBase: 0, // ShadowBase (not used)
500 .OriginBase: 0x0200000000000, // OriginBase
501};
502
503// loongarch64 Linux
504static const MemoryMapParams Linux_LoongArch64_MemoryMapParams = {
505 .AndMask: 0, // AndMask (not used)
506 .XorMask: 0x500000000000, // XorMask
507 .ShadowBase: 0, // ShadowBase (not used)
508 .OriginBase: 0x100000000000, // OriginBase
509};
510
511// hexagon Linux
512static const MemoryMapParams Linux_Hexagon_MemoryMapParams = {
513 .AndMask: 0, // AndMask (not used)
514 .XorMask: 0x20000000, // XorMask
515 .ShadowBase: 0, // ShadowBase (not used)
516 .OriginBase: 0x50000000, // OriginBase
517};
518
519// riscv32 Linux
520// FIXME: Remove -msan-origin-base -msan-and-mask added by PR #109284 to tests
521// after picking good constants
522
523// aarch64 FreeBSD
524static const MemoryMapParams FreeBSD_AArch64_MemoryMapParams = {
525 .AndMask: 0x1800000000000, // AndMask
526 .XorMask: 0x0400000000000, // XorMask
527 .ShadowBase: 0x0200000000000, // ShadowBase
528 .OriginBase: 0x0700000000000, // OriginBase
529};
530
531// i386 FreeBSD
532static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
533 .AndMask: 0x000180000000, // AndMask
534 .XorMask: 0x000040000000, // XorMask
535 .ShadowBase: 0x000020000000, // ShadowBase
536 .OriginBase: 0x000700000000, // OriginBase
537};
538
539// x86_64 FreeBSD
540static const MemoryMapParams FreeBSD_X86_64_MemoryMapParams = {
541 .AndMask: 0xc00000000000, // AndMask
542 .XorMask: 0x200000000000, // XorMask
543 .ShadowBase: 0x100000000000, // ShadowBase
544 .OriginBase: 0x380000000000, // OriginBase
545};
546
547// x86_64 NetBSD
548static const MemoryMapParams NetBSD_X86_64_MemoryMapParams = {
549 .AndMask: 0, // AndMask
550 .XorMask: 0x500000000000, // XorMask
551 .ShadowBase: 0, // ShadowBase
552 .OriginBase: 0x100000000000, // OriginBase
553};
554
555static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
556 .bits32: &Linux_I386_MemoryMapParams,
557 .bits64: &Linux_X86_64_MemoryMapParams,
558};
559
560static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
561 .bits32: nullptr,
562 .bits64: &Linux_MIPS64_MemoryMapParams,
563};
564
565static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = {
566 .bits32: nullptr,
567 .bits64: &Linux_PowerPC64_MemoryMapParams,
568};
569
570static const PlatformMemoryMapParams Linux_S390_MemoryMapParams = {
571 .bits32: nullptr,
572 .bits64: &Linux_S390X_MemoryMapParams,
573};
574
575static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
576 .bits32: nullptr,
577 .bits64: &Linux_AArch64_MemoryMapParams,
578};
579
580static const PlatformMemoryMapParams Linux_LoongArch_MemoryMapParams = {
581 .bits32: nullptr,
582 .bits64: &Linux_LoongArch64_MemoryMapParams,
583};
584
585static const PlatformMemoryMapParams Linux_Hexagon_MemoryMapParams_P = {
586 .bits32: &Linux_Hexagon_MemoryMapParams,
587 .bits64: nullptr,
588};
589
590static const PlatformMemoryMapParams FreeBSD_ARM_MemoryMapParams = {
591 .bits32: nullptr,
592 .bits64: &FreeBSD_AArch64_MemoryMapParams,
593};
594
595static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
596 .bits32: &FreeBSD_I386_MemoryMapParams,
597 .bits64: &FreeBSD_X86_64_MemoryMapParams,
598};
599
600static const PlatformMemoryMapParams NetBSD_X86_MemoryMapParams = {
601 .bits32: nullptr,
602 .bits64: &NetBSD_X86_64_MemoryMapParams,
603};
604
605enum OddOrEvenLanes { kBothLanes, kEvenLanes, kOddLanes };
606
607namespace {
608
609/// Instrument functions of a module to detect uninitialized reads.
610///
611/// Instantiating MemorySanitizer inserts the msan runtime library API function
612/// declarations into the module if they don't exist already. Instantiating
613/// ensures the __msan_init function is in the list of global constructors for
614/// the module.
615class MemorySanitizer {
616public:
617 MemorySanitizer(Module &M, MemorySanitizerOptions Options)
618 : CompileKernel(Options.Kernel), TrackOrigins(Options.TrackOrigins),
619 Recover(Options.Recover), EagerChecks(Options.EagerChecks) {
620 initializeModule(M);
621 }
622
623 // MSan cannot be moved or copied because of MapParams.
624 MemorySanitizer(MemorySanitizer &&) = delete;
625 MemorySanitizer &operator=(MemorySanitizer &&) = delete;
626 MemorySanitizer(const MemorySanitizer &) = delete;
627 MemorySanitizer &operator=(const MemorySanitizer &) = delete;
628
629 bool sanitizeFunction(Function &F, TargetLibraryInfo &TLI);
630
631private:
632 friend struct MemorySanitizerVisitor;
633 friend struct VarArgHelperBase;
634 friend struct VarArgAMD64Helper;
635 friend struct VarArgAArch64Helper;
636 friend struct VarArgPowerPC64Helper;
637 friend struct VarArgPowerPC32Helper;
638 friend struct VarArgSystemZHelper;
639 friend struct VarArgI386Helper;
640 friend struct VarArgGenericHelper;
641
642 void initializeModule(Module &M);
643 void initializeCallbacks(Module &M, const TargetLibraryInfo &TLI);
644 void createKernelApi(Module &M, const TargetLibraryInfo &TLI);
645 void createUserspaceApi(Module &M, const TargetLibraryInfo &TLI);
646
647 template <typename... ArgsTy>
648 FunctionCallee getOrInsertMsanMetadataFunction(Module &M, StringRef Name,
649 ArgsTy... Args);
650
651 /// True if we're compiling the Linux kernel.
652 bool CompileKernel;
653 /// Track origins (allocation points) of uninitialized values.
654 int TrackOrigins;
655 bool Recover;
656 bool EagerChecks;
657
658 Triple TargetTriple;
659 LLVMContext *C;
660 Type *IntptrTy; ///< Integer type with the size of a ptr in default AS.
661 Type *OriginTy;
662 PointerType *PtrTy; ///< Integer type with the size of a ptr in default AS.
663
664 // XxxTLS variables represent the per-thread state in MSan and per-task state
665 // in KMSAN.
666 // For the userspace these point to thread-local globals. In the kernel land
667 // they point to the members of a per-task struct obtained via a call to
668 // __msan_get_context_state().
669
670 /// Thread-local shadow storage for function parameters.
671 Value *ParamTLS;
672
673 /// Thread-local origin storage for function parameters.
674 Value *ParamOriginTLS;
675
676 /// Thread-local shadow storage for function return value.
677 Value *RetvalTLS;
678
679 /// Thread-local origin storage for function return value.
680 Value *RetvalOriginTLS;
681
682 /// Thread-local shadow storage for in-register va_arg function.
683 Value *VAArgTLS;
684
685 /// Thread-local shadow storage for in-register va_arg function.
686 Value *VAArgOriginTLS;
687
688 /// Thread-local shadow storage for va_arg overflow area.
689 Value *VAArgOverflowSizeTLS;
690
691 /// Are the instrumentation callbacks set up?
692 bool CallbacksInitialized = false;
693
694 /// The run-time callback to print a warning.
695 FunctionCallee WarningFn;
696
697 // These arrays are indexed by log2(AccessSize).
698 FunctionCallee MaybeWarningFn[kNumberOfAccessSizes];
699 FunctionCallee MaybeWarningVarSizeFn;
700 FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes];
701
702 /// Run-time helper that generates a new origin value for a stack
703 /// allocation.
704 FunctionCallee MsanSetAllocaOriginWithDescriptionFn;
705 // No description version
706 FunctionCallee MsanSetAllocaOriginNoDescriptionFn;
707
708 /// Run-time helper that poisons stack on function entry.
709 FunctionCallee MsanPoisonStackFn;
710
711 /// Run-time helper that records a store (or any event) of an
712 /// uninitialized value and returns an updated origin id encoding this info.
713 FunctionCallee MsanChainOriginFn;
714
715 /// Run-time helper that paints an origin over a region.
716 FunctionCallee MsanSetOriginFn;
717
718 /// MSan runtime replacements for memmove, memcpy and memset.
719 FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
720
721 /// KMSAN callback for task-local function argument shadow.
722 StructType *MsanContextStateTy;
723 FunctionCallee MsanGetContextStateFn;
724
725 /// Functions for poisoning/unpoisoning local variables
726 FunctionCallee MsanPoisonAllocaFn, MsanUnpoisonAllocaFn;
727
728 /// Pair of shadow/origin pointers.
729 Type *MsanMetadata;
730
731 /// Each of the MsanMetadataPtrXxx functions returns a MsanMetadata.
732 FunctionCallee MsanMetadataPtrForLoadN, MsanMetadataPtrForStoreN;
733 FunctionCallee MsanMetadataPtrForLoad_1_8[4];
734 FunctionCallee MsanMetadataPtrForStore_1_8[4];
735 FunctionCallee MsanInstrumentAsmStoreFn;
736
737 /// Storage for return values of the MsanMetadataPtrXxx functions.
738 Value *MsanMetadataAlloca;
739
740 /// Helper to choose between different MsanMetadataPtrXxx().
741 FunctionCallee getKmsanShadowOriginAccessFn(bool isStore, int size);
742
743 /// Memory map parameters used in application-to-shadow calculation.
744 const MemoryMapParams *MapParams;
745
746 /// Custom memory map parameters used when -msan-shadow-base or
747 // -msan-origin-base is provided.
748 MemoryMapParams CustomMapParams;
749
750 MDNode *ColdCallWeights;
751
752 /// Branch weights for origin store.
753 MDNode *OriginStoreWeights;
754};
755
756void insertModuleCtor(Module &M) {
757 getOrCreateSanitizerCtorAndInitFunctions(
758 M, CtorName: kMsanModuleCtorName, InitName: kMsanInitName,
759 /*InitArgTypes=*/{},
760 /*InitArgs=*/{},
761 // This callback is invoked when the functions are created the first
762 // time. Hook them into the global ctors list in that case:
763 FunctionsCreatedCallback: [&](Function *Ctor, FunctionCallee) {
764 if (!ClWithComdat) {
765 appendToGlobalCtors(M, F: Ctor, Priority: 0);
766 return;
767 }
768 Comdat *MsanCtorComdat = M.getOrInsertComdat(Name: kMsanModuleCtorName);
769 Ctor->setComdat(MsanCtorComdat);
770 appendToGlobalCtors(M, F: Ctor, Priority: 0, Data: Ctor);
771 });
772}
773
774template <class T> T getOptOrDefault(const cl::opt<T> &Opt, T Default) {
775 return (Opt.getNumOccurrences() > 0) ? Opt : Default;
776}
777
778} // end anonymous namespace
779
780MemorySanitizerOptions::MemorySanitizerOptions(int TO, bool R, bool K,
781 bool EagerChecks)
782 : Kernel(getOptOrDefault(Opt: ClEnableKmsan, Default: K)),
783 TrackOrigins(getOptOrDefault(Opt: ClTrackOrigins, Default: Kernel ? 2 : TO)),
784 Recover(getOptOrDefault(Opt: ClKeepGoing, Default: Kernel || R)),
785 EagerChecks(getOptOrDefault(Opt: ClEagerChecks, Default: EagerChecks)) {}
786
787PreservedAnalyses MemorySanitizerPass::run(Module &M,
788 ModuleAnalysisManager &AM) {
789 // Return early if nosanitize_memory module flag is present for the module.
790 if (checkIfAlreadyInstrumented(M, Flag: "nosanitize_memory"))
791 return PreservedAnalyses::all();
792 bool Modified = false;
793 if (!Options.Kernel) {
794 insertModuleCtor(M);
795 Modified = true;
796 }
797
798 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
799 for (Function &F : M) {
800 if (F.empty())
801 continue;
802 MemorySanitizer Msan(*F.getParent(), Options);
803 Modified |=
804 Msan.sanitizeFunction(F, TLI&: FAM.getResult<TargetLibraryAnalysis>(IR&: F));
805 }
806
807 if (!Modified)
808 return PreservedAnalyses::all();
809
810 PreservedAnalyses PA = PreservedAnalyses::none();
811 // GlobalsAA is considered stateless and does not get invalidated unless
812 // explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers
813 // make changes that require GlobalsAA to be invalidated.
814 PA.abandon<GlobalsAA>();
815 return PA;
816}
817
818void MemorySanitizerPass::printPipeline(
819 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
820 static_cast<PassInfoMixin<MemorySanitizerPass> *>(this)->printPipeline(
821 OS, MapClassName2PassName);
822 OS << '<';
823 if (Options.Recover)
824 OS << "recover;";
825 if (Options.Kernel)
826 OS << "kernel;";
827 if (Options.EagerChecks)
828 OS << "eager-checks;";
829 OS << "track-origins=" << Options.TrackOrigins;
830 OS << '>';
831}
832
833/// Create a non-const global initialized with the given string.
834///
835/// Creates a writable global for Str so that we can pass it to the
836/// run-time lib. Runtime uses first 4 bytes of the string to store the
837/// frame ID, so the string needs to be mutable.
838static GlobalVariable *createPrivateConstGlobalForString(Module &M,
839 StringRef Str) {
840 Constant *StrConst = ConstantDataArray::getString(Context&: M.getContext(), Initializer: Str);
841 return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/true,
842 GlobalValue::PrivateLinkage, StrConst, "");
843}
844
845template <typename... ArgsTy>
846FunctionCallee
847MemorySanitizer::getOrInsertMsanMetadataFunction(Module &M, StringRef Name,
848 ArgsTy... Args) {
849 if (TargetTriple.getArch() == Triple::systemz) {
850 // SystemZ ABI: shadow/origin pair is returned via a hidden parameter.
851 return M.getOrInsertFunction(Name, Type::getVoidTy(C&: *C), PtrTy,
852 std::forward<ArgsTy>(Args)...);
853 }
854
855 return M.getOrInsertFunction(Name, MsanMetadata,
856 std::forward<ArgsTy>(Args)...);
857}
858
859/// Create KMSAN API callbacks.
860void MemorySanitizer::createKernelApi(Module &M, const TargetLibraryInfo &TLI) {
861 IRBuilder<> IRB(*C);
862
863 // These will be initialized in insertKmsanPrologue().
864 RetvalTLS = nullptr;
865 RetvalOriginTLS = nullptr;
866 ParamTLS = nullptr;
867 ParamOriginTLS = nullptr;
868 VAArgTLS = nullptr;
869 VAArgOriginTLS = nullptr;
870 VAArgOverflowSizeTLS = nullptr;
871
872 WarningFn = M.getOrInsertFunction(Name: "__msan_warning",
873 AttributeList: TLI.getAttrList(C, ArgNos: {0}, /*Signed=*/false),
874 RetTy: IRB.getVoidTy(), Args: IRB.getInt32Ty());
875
876 // Requests the per-task context state (kmsan_context_state*) from the
877 // runtime library.
878 MsanContextStateTy = StructType::get(
879 elt1: ArrayType::get(ElementType: IRB.getInt64Ty(), NumElements: kParamTLSSize / 8),
880 elts: ArrayType::get(ElementType: IRB.getInt64Ty(), NumElements: kRetvalTLSSize / 8),
881 elts: ArrayType::get(ElementType: IRB.getInt64Ty(), NumElements: kParamTLSSize / 8),
882 elts: ArrayType::get(ElementType: IRB.getInt64Ty(), NumElements: kParamTLSSize / 8), /* va_arg_origin */
883 elts: IRB.getInt64Ty(), elts: ArrayType::get(ElementType: OriginTy, NumElements: kParamTLSSize / 4), elts: OriginTy,
884 elts: OriginTy);
885 MsanGetContextStateFn =
886 M.getOrInsertFunction(Name: "__msan_get_context_state", RetTy: PtrTy);
887
888 MsanMetadata = StructType::get(elt1: PtrTy, elts: PtrTy);
889
890 for (int ind = 0, size = 1; ind < 4; ind++, size <<= 1) {
891 std::string name_load =
892 "__msan_metadata_ptr_for_load_" + std::to_string(val: size);
893 std::string name_store =
894 "__msan_metadata_ptr_for_store_" + std::to_string(val: size);
895 MsanMetadataPtrForLoad_1_8[ind] =
896 getOrInsertMsanMetadataFunction(M, Name: name_load, Args: PtrTy);
897 MsanMetadataPtrForStore_1_8[ind] =
898 getOrInsertMsanMetadataFunction(M, Name: name_store, Args: PtrTy);
899 }
900
901 MsanMetadataPtrForLoadN = getOrInsertMsanMetadataFunction(
902 M, Name: "__msan_metadata_ptr_for_load_n", Args: PtrTy, Args: IntptrTy);
903 MsanMetadataPtrForStoreN = getOrInsertMsanMetadataFunction(
904 M, Name: "__msan_metadata_ptr_for_store_n", Args: PtrTy, Args: IntptrTy);
905
906 // Functions for poisoning and unpoisoning memory.
907 MsanPoisonAllocaFn = M.getOrInsertFunction(
908 Name: "__msan_poison_alloca", RetTy: IRB.getVoidTy(), Args: PtrTy, Args: IntptrTy, Args: PtrTy);
909 MsanUnpoisonAllocaFn = M.getOrInsertFunction(
910 Name: "__msan_unpoison_alloca", RetTy: IRB.getVoidTy(), Args: PtrTy, Args: IntptrTy);
911}
912
913static Constant *getOrInsertGlobal(Module &M, StringRef Name, Type *Ty) {
914 return M.getOrInsertGlobal(Name, Ty, CreateGlobalCallback: [&] {
915 return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
916 nullptr, Name, nullptr,
917 GlobalVariable::InitialExecTLSModel);
918 });
919}
920
921/// Insert declarations for userspace-specific functions and globals.
922void MemorySanitizer::createUserspaceApi(Module &M,
923 const TargetLibraryInfo &TLI) {
924 IRBuilder<> IRB(*C);
925
926 // Create the callback.
927 // FIXME: this function should have "Cold" calling conv,
928 // which is not yet implemented.
929 if (TrackOrigins) {
930 StringRef WarningFnName = Recover ? "__msan_warning_with_origin"
931 : "__msan_warning_with_origin_noreturn";
932 WarningFn = M.getOrInsertFunction(Name: WarningFnName,
933 AttributeList: TLI.getAttrList(C, ArgNos: {0}, /*Signed=*/false),
934 RetTy: IRB.getVoidTy(), Args: IRB.getInt32Ty());
935 } else {
936 StringRef WarningFnName =
937 Recover ? "__msan_warning" : "__msan_warning_noreturn";
938 WarningFn = M.getOrInsertFunction(Name: WarningFnName, RetTy: IRB.getVoidTy());
939 }
940
941 // Create the global TLS variables.
942 RetvalTLS =
943 getOrInsertGlobal(M, Name: "__msan_retval_tls",
944 Ty: ArrayType::get(ElementType: IRB.getInt64Ty(), NumElements: kRetvalTLSSize / 8));
945
946 RetvalOriginTLS = getOrInsertGlobal(M, Name: "__msan_retval_origin_tls", Ty: OriginTy);
947
948 ParamTLS =
949 getOrInsertGlobal(M, Name: "__msan_param_tls",
950 Ty: ArrayType::get(ElementType: IRB.getInt64Ty(), NumElements: kParamTLSSize / 8));
951
952 ParamOriginTLS =
953 getOrInsertGlobal(M, Name: "__msan_param_origin_tls",
954 Ty: ArrayType::get(ElementType: OriginTy, NumElements: kParamTLSSize / 4));
955
956 VAArgTLS =
957 getOrInsertGlobal(M, Name: "__msan_va_arg_tls",
958 Ty: ArrayType::get(ElementType: IRB.getInt64Ty(), NumElements: kParamTLSSize / 8));
959
960 VAArgOriginTLS =
961 getOrInsertGlobal(M, Name: "__msan_va_arg_origin_tls",
962 Ty: ArrayType::get(ElementType: OriginTy, NumElements: kParamTLSSize / 4));
963
964 VAArgOverflowSizeTLS = getOrInsertGlobal(M, Name: "__msan_va_arg_overflow_size_tls",
965 Ty: IRB.getIntPtrTy(DL: M.getDataLayout()));
966
967 for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
968 AccessSizeIndex++) {
969 unsigned AccessSize = 1 << AccessSizeIndex;
970 std::string FunctionName = "__msan_maybe_warning_" + itostr(X: AccessSize);
971 MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
972 Name: FunctionName, AttributeList: TLI.getAttrList(C, ArgNos: {0, 1}, /*Signed=*/false),
973 RetTy: IRB.getVoidTy(), Args: IRB.getIntNTy(N: AccessSize * 8), Args: IRB.getInt32Ty());
974 MaybeWarningVarSizeFn = M.getOrInsertFunction(
975 Name: "__msan_maybe_warning_N", AttributeList: TLI.getAttrList(C, ArgNos: {}, /*Signed=*/false),
976 RetTy: IRB.getVoidTy(), Args: PtrTy, Args: IRB.getInt64Ty(), Args: IRB.getInt32Ty());
977 FunctionName = "__msan_maybe_store_origin_" + itostr(X: AccessSize);
978 MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
979 Name: FunctionName, AttributeList: TLI.getAttrList(C, ArgNos: {0, 2}, /*Signed=*/false),
980 RetTy: IRB.getVoidTy(), Args: IRB.getIntNTy(N: AccessSize * 8), Args: PtrTy,
981 Args: IRB.getInt32Ty());
982 }
983
984 MsanSetAllocaOriginWithDescriptionFn =
985 M.getOrInsertFunction(Name: "__msan_set_alloca_origin_with_descr",
986 RetTy: IRB.getVoidTy(), Args: PtrTy, Args: IntptrTy, Args: PtrTy, Args: PtrTy);
987 MsanSetAllocaOriginNoDescriptionFn =
988 M.getOrInsertFunction(Name: "__msan_set_alloca_origin_no_descr",
989 RetTy: IRB.getVoidTy(), Args: PtrTy, Args: IntptrTy, Args: PtrTy);
990 MsanPoisonStackFn = M.getOrInsertFunction(Name: "__msan_poison_stack",
991 RetTy: IRB.getVoidTy(), Args: PtrTy, Args: IntptrTy);
992}
993
994/// Insert extern declaration of runtime-provided functions and globals.
995void MemorySanitizer::initializeCallbacks(Module &M,
996 const TargetLibraryInfo &TLI) {
997 // Only do this once.
998 if (CallbacksInitialized)
999 return;
1000
1001 IRBuilder<> IRB(*C);
1002 // Initialize callbacks that are common for kernel and userspace
1003 // instrumentation.
1004 MsanChainOriginFn = M.getOrInsertFunction(
1005 Name: "__msan_chain_origin",
1006 AttributeList: TLI.getAttrList(C, ArgNos: {0}, /*Signed=*/false, /*Ret=*/true), RetTy: IRB.getInt32Ty(),
1007 Args: IRB.getInt32Ty());
1008 MsanSetOriginFn = M.getOrInsertFunction(
1009 Name: "__msan_set_origin", AttributeList: TLI.getAttrList(C, ArgNos: {2}, /*Signed=*/false),
1010 RetTy: IRB.getVoidTy(), Args: PtrTy, Args: IntptrTy, Args: IRB.getInt32Ty());
1011 MemmoveFn =
1012 M.getOrInsertFunction(Name: "__msan_memmove", RetTy: PtrTy, Args: PtrTy, Args: PtrTy, Args: IntptrTy);
1013 MemcpyFn =
1014 M.getOrInsertFunction(Name: "__msan_memcpy", RetTy: PtrTy, Args: PtrTy, Args: PtrTy, Args: IntptrTy);
1015 MemsetFn = M.getOrInsertFunction(Name: "__msan_memset",
1016 AttributeList: TLI.getAttrList(C, ArgNos: {1}, /*Signed=*/true),
1017 RetTy: PtrTy, Args: PtrTy, Args: IRB.getInt32Ty(), Args: IntptrTy);
1018
1019 MsanInstrumentAsmStoreFn = M.getOrInsertFunction(
1020 Name: "__msan_instrument_asm_store", RetTy: IRB.getVoidTy(), Args: PtrTy, Args: IntptrTy);
1021
1022 if (CompileKernel) {
1023 createKernelApi(M, TLI);
1024 } else {
1025 createUserspaceApi(M, TLI);
1026 }
1027 CallbacksInitialized = true;
1028}
1029
1030FunctionCallee MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore,
1031 int size) {
1032 FunctionCallee *Fns =
1033 isStore ? MsanMetadataPtrForStore_1_8 : MsanMetadataPtrForLoad_1_8;
1034 switch (size) {
1035 case 1:
1036 return Fns[0];
1037 case 2:
1038 return Fns[1];
1039 case 4:
1040 return Fns[2];
1041 case 8:
1042 return Fns[3];
1043 default:
1044 return nullptr;
1045 }
1046}
1047
1048/// Module-level initialization.
1049///
1050/// inserts a call to __msan_init to the module's constructor list.
1051void MemorySanitizer::initializeModule(Module &M) {
1052 auto &DL = M.getDataLayout();
1053
1054 TargetTriple = M.getTargetTriple();
1055
1056 bool ShadowPassed = ClShadowBase.getNumOccurrences() > 0;
1057 bool OriginPassed = ClOriginBase.getNumOccurrences() > 0;
1058 // Check the overrides first
1059 if (ShadowPassed || OriginPassed) {
1060 CustomMapParams.AndMask = ClAndMask;
1061 CustomMapParams.XorMask = ClXorMask;
1062 CustomMapParams.ShadowBase = ClShadowBase;
1063 CustomMapParams.OriginBase = ClOriginBase;
1064 MapParams = &CustomMapParams;
1065 } else {
1066 switch (TargetTriple.getOS()) {
1067 case Triple::FreeBSD:
1068 switch (TargetTriple.getArch()) {
1069 case Triple::aarch64:
1070 MapParams = FreeBSD_ARM_MemoryMapParams.bits64;
1071 break;
1072 case Triple::x86_64:
1073 MapParams = FreeBSD_X86_MemoryMapParams.bits64;
1074 break;
1075 case Triple::x86:
1076 MapParams = FreeBSD_X86_MemoryMapParams.bits32;
1077 break;
1078 default:
1079 report_fatal_error(reason: "unsupported architecture");
1080 }
1081 break;
1082 case Triple::NetBSD:
1083 switch (TargetTriple.getArch()) {
1084 case Triple::x86_64:
1085 MapParams = NetBSD_X86_MemoryMapParams.bits64;
1086 break;
1087 default:
1088 report_fatal_error(reason: "unsupported architecture");
1089 }
1090 break;
1091 case Triple::Linux:
1092 switch (TargetTriple.getArch()) {
1093 case Triple::x86_64:
1094 MapParams = Linux_X86_MemoryMapParams.bits64;
1095 break;
1096 case Triple::x86:
1097 MapParams = Linux_X86_MemoryMapParams.bits32;
1098 break;
1099 case Triple::mips64:
1100 case Triple::mips64el:
1101 MapParams = Linux_MIPS_MemoryMapParams.bits64;
1102 break;
1103 case Triple::ppc64:
1104 case Triple::ppc64le:
1105 MapParams = Linux_PowerPC_MemoryMapParams.bits64;
1106 break;
1107 case Triple::systemz:
1108 MapParams = Linux_S390_MemoryMapParams.bits64;
1109 break;
1110 case Triple::aarch64:
1111 case Triple::aarch64_be:
1112 MapParams = Linux_ARM_MemoryMapParams.bits64;
1113 break;
1114 case Triple::loongarch64:
1115 MapParams = Linux_LoongArch_MemoryMapParams.bits64;
1116 break;
1117 case Triple::hexagon:
1118 MapParams = Linux_Hexagon_MemoryMapParams_P.bits32;
1119 break;
1120 default:
1121 report_fatal_error(reason: "unsupported architecture");
1122 }
1123 break;
1124 default:
1125 report_fatal_error(reason: "unsupported operating system");
1126 }
1127 }
1128
1129 C = &(M.getContext());
1130 IRBuilder<> IRB(*C);
1131 IntptrTy = IRB.getIntPtrTy(DL);
1132 OriginTy = IRB.getInt32Ty();
1133 PtrTy = IRB.getPtrTy();
1134
1135 ColdCallWeights = MDBuilder(*C).createUnlikelyBranchWeights();
1136 OriginStoreWeights = MDBuilder(*C).createUnlikelyBranchWeights();
1137
1138 if (!CompileKernel) {
1139 if (TrackOrigins)
1140 M.getOrInsertGlobal(Name: "__msan_track_origins", Ty: IRB.getInt32Ty(), CreateGlobalCallback: [&] {
1141 return new GlobalVariable(
1142 M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
1143 IRB.getInt32(C: TrackOrigins), "__msan_track_origins");
1144 });
1145
1146 if (Recover)
1147 M.getOrInsertGlobal(Name: "__msan_keep_going", Ty: IRB.getInt32Ty(), CreateGlobalCallback: [&] {
1148 return new GlobalVariable(M, IRB.getInt32Ty(), true,
1149 GlobalValue::WeakODRLinkage,
1150 IRB.getInt32(C: Recover), "__msan_keep_going");
1151 });
1152 }
1153}
1154
1155namespace {
1156
1157/// A helper class that handles instrumentation of VarArg
1158/// functions on a particular platform.
1159///
1160/// Implementations are expected to insert the instrumentation
1161/// necessary to propagate argument shadow through VarArg function
1162/// calls. Visit* methods are called during an InstVisitor pass over
1163/// the function, and should avoid creating new basic blocks. A new
1164/// instance of this class is created for each instrumented function.
1165struct VarArgHelper {
1166 virtual ~VarArgHelper() = default;
1167
1168 /// Visit a CallBase.
1169 virtual void visitCallBase(CallBase &CB, IRBuilder<> &IRB) = 0;
1170
1171 /// Visit a va_start call.
1172 virtual void visitVAStartInst(VAStartInst &I) = 0;
1173
1174 /// Visit a va_copy call.
1175 virtual void visitVACopyInst(VACopyInst &I) = 0;
1176
1177 /// Finalize function instrumentation.
1178 ///
1179 /// This method is called after visiting all interesting (see above)
1180 /// instructions in a function.
1181 virtual void finalizeInstrumentation() = 0;
1182};
1183
1184struct MemorySanitizerVisitor;
1185
1186} // end anonymous namespace
1187
1188static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
1189 MemorySanitizerVisitor &Visitor);
1190
1191static unsigned TypeSizeToSizeIndex(TypeSize TS) {
1192 if (TS.isScalable())
1193 // Scalable types unconditionally take slowpaths.
1194 return kNumberOfAccessSizes;
1195 unsigned TypeSizeFixed = TS.getFixedValue();
1196 if (TypeSizeFixed <= 8)
1197 return 0;
1198 return Log2_32_Ceil(Value: (TypeSizeFixed + 7) / 8);
1199}
1200
1201namespace {
1202
1203/// Helper class to attach debug information of the given instruction onto new
1204/// instructions inserted after.
1205class NextNodeIRBuilder : public IRBuilder<> {
1206public:
1207 explicit NextNodeIRBuilder(Instruction *IP) : IRBuilder<>(IP->getNextNode()) {
1208 SetCurrentDebugLocation(IP->getDebugLoc());
1209 }
1210};
1211
1212/// This class does all the work for a given function. Store and Load
1213/// instructions store and load corresponding shadow and origin
1214/// values. Most instructions propagate shadow from arguments to their
1215/// return values. Certain instructions (most importantly, BranchInst)
1216/// test their argument shadow and print reports (with a runtime call) if it's
1217/// non-zero.
1218struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
1219 Function &F;
1220 MemorySanitizer &MS;
1221 SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
1222 ValueMap<Value *, Value *> ShadowMap, OriginMap;
1223 std::unique_ptr<VarArgHelper> VAHelper;
1224 const TargetLibraryInfo *TLI;
1225 Instruction *FnPrologueEnd;
1226 SmallVector<Instruction *, 16> Instructions;
1227
1228 // The following flags disable parts of MSan instrumentation based on
1229 // exclusion list contents and command-line options.
1230 bool InsertChecks;
1231 bool PropagateShadow;
1232 bool PoisonStack;
1233 bool PoisonUndef;
1234 bool PoisonUndefVectors;
1235
1236 struct ShadowOriginAndInsertPoint {
1237 Value *Shadow;
1238 Value *Origin;
1239 Instruction *OrigIns;
1240
1241 ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I)
1242 : Shadow(S), Origin(O), OrigIns(I) {}
1243 };
1244 SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
1245 DenseMap<const DILocation *, int> LazyWarningDebugLocationCount;
1246 SmallSetVector<AllocaInst *, 16> AllocaSet;
1247 SmallVector<std::pair<IntrinsicInst *, AllocaInst *>, 16> LifetimeStartList;
1248 SmallVector<StoreInst *, 16> StoreList;
1249 int64_t SplittableBlocksCount = 0;
1250
1251 MemorySanitizerVisitor(Function &F, MemorySanitizer &MS,
1252 const TargetLibraryInfo &TLI)
1253 : F(F), MS(MS), VAHelper(CreateVarArgHelper(Func&: F, Msan&: MS, Visitor&: *this)), TLI(&TLI) {
1254 bool SanitizeFunction =
1255 F.hasFnAttribute(Kind: Attribute::SanitizeMemory) && !ClDisableChecks;
1256 InsertChecks = SanitizeFunction;
1257 PropagateShadow = SanitizeFunction;
1258 PoisonStack = SanitizeFunction && ClPoisonStack;
1259 PoisonUndef = SanitizeFunction && ClPoisonUndef;
1260 PoisonUndefVectors = SanitizeFunction && ClPoisonUndefVectors;
1261
1262 // In the presence of unreachable blocks, we may see Phi nodes with
1263 // incoming nodes from such blocks. Since InstVisitor skips unreachable
1264 // blocks, such nodes will not have any shadow value associated with them.
1265 // It's easier to remove unreachable blocks than deal with missing shadow.
1266 removeUnreachableBlocks(F);
1267
1268 MS.initializeCallbacks(M&: *F.getParent(), TLI);
1269 FnPrologueEnd =
1270 IRBuilder<>(&F.getEntryBlock(), F.getEntryBlock().getFirstNonPHIIt())
1271 .CreateIntrinsic(ID: Intrinsic::donothing, Args: {});
1272
1273 if (MS.CompileKernel) {
1274 IRBuilder<> IRB(FnPrologueEnd);
1275 insertKmsanPrologue(IRB);
1276 }
1277
1278 LLVM_DEBUG(if (!InsertChecks) dbgs()
1279 << "MemorySanitizer is not inserting checks into '"
1280 << F.getName() << "'\n");
1281 }
1282
1283 bool instrumentWithCalls(Value *V) {
1284 // Constants likely will be eliminated by follow-up passes.
1285 if (isa<Constant>(Val: V))
1286 return false;
1287 ++SplittableBlocksCount;
1288 return ClInstrumentationWithCallThreshold >= 0 &&
1289 SplittableBlocksCount > ClInstrumentationWithCallThreshold;
1290 }
1291
1292 bool isInPrologue(Instruction &I) {
1293 return I.getParent() == FnPrologueEnd->getParent() &&
1294 (&I == FnPrologueEnd || I.comesBefore(Other: FnPrologueEnd));
1295 }
1296
1297 // Creates a new origin and records the stack trace. In general we can call
1298 // this function for any origin manipulation we like. However it will cost
1299 // runtime resources. So use this wisely only if it can provide additional
1300 // information helpful to a user.
1301 Value *updateOrigin(Value *V, IRBuilder<> &IRB) {
1302 if (MS.TrackOrigins <= 1)
1303 return V;
1304 return IRB.CreateCall(Callee: MS.MsanChainOriginFn, Args: V);
1305 }
1306
1307 Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) {
1308 const DataLayout &DL = F.getDataLayout();
1309 unsigned IntptrSize = DL.getTypeStoreSize(Ty: MS.IntptrTy);
1310 if (IntptrSize == kOriginSize)
1311 return Origin;
1312 assert(IntptrSize == kOriginSize * 2);
1313 Origin = IRB.CreateIntCast(V: Origin, DestTy: MS.IntptrTy, /* isSigned */ false);
1314 return IRB.CreateOr(LHS: Origin, RHS: IRB.CreateShl(LHS: Origin, RHS: kOriginSize * 8));
1315 }
1316
1317 /// Fill memory range with the given origin value.
1318 void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr,
1319 TypeSize TS, Align Alignment) {
1320 const DataLayout &DL = F.getDataLayout();
1321 const Align IntptrAlignment = DL.getABITypeAlign(Ty: MS.IntptrTy);
1322 unsigned IntptrSize = DL.getTypeStoreSize(Ty: MS.IntptrTy);
1323 assert(IntptrAlignment >= kMinOriginAlignment);
1324 assert(IntptrSize >= kOriginSize);
1325
1326 // Note: The loop based formation works for fixed length vectors too,
1327 // however we prefer to unroll and specialize alignment below.
1328 if (TS.isScalable()) {
1329 Value *Size = IRB.CreateTypeSize(Ty: MS.IntptrTy, Size: TS);
1330 Value *RoundUp =
1331 IRB.CreateAdd(LHS: Size, RHS: ConstantInt::get(Ty: MS.IntptrTy, V: kOriginSize - 1));
1332 Value *End =
1333 IRB.CreateUDiv(LHS: RoundUp, RHS: ConstantInt::get(Ty: MS.IntptrTy, V: kOriginSize));
1334 auto [InsertPt, Index] =
1335 SplitBlockAndInsertSimpleForLoop(End, SplitBefore: IRB.GetInsertPoint());
1336 IRB.SetInsertPoint(InsertPt);
1337
1338 Value *GEP = IRB.CreateGEP(Ty: MS.OriginTy, Ptr: OriginPtr, IdxList: Index);
1339 IRB.CreateAlignedStore(Val: Origin, Ptr: GEP, Align: kMinOriginAlignment);
1340 return;
1341 }
1342
1343 unsigned Size = TS.getFixedValue();
1344
1345 unsigned Ofs = 0;
1346 Align CurrentAlignment = Alignment;
1347 if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) {
1348 Value *IntptrOrigin = originToIntptr(IRB, Origin);
1349 Value *IntptrOriginPtr = IRB.CreatePointerCast(V: OriginPtr, DestTy: MS.PtrTy);
1350 for (unsigned i = 0; i < Size / IntptrSize; ++i) {
1351 Value *Ptr = i ? IRB.CreateConstGEP1_32(Ty: MS.IntptrTy, Ptr: IntptrOriginPtr, Idx0: i)
1352 : IntptrOriginPtr;
1353 IRB.CreateAlignedStore(Val: IntptrOrigin, Ptr, Align: CurrentAlignment);
1354 Ofs += IntptrSize / kOriginSize;
1355 CurrentAlignment = IntptrAlignment;
1356 }
1357 }
1358
1359 for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
1360 Value *GEP =
1361 i ? IRB.CreateConstGEP1_32(Ty: MS.OriginTy, Ptr: OriginPtr, Idx0: i) : OriginPtr;
1362 IRB.CreateAlignedStore(Val: Origin, Ptr: GEP, Align: CurrentAlignment);
1363 CurrentAlignment = kMinOriginAlignment;
1364 }
1365 }
1366
1367 void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
1368 Value *OriginPtr, Align Alignment) {
1369 const DataLayout &DL = F.getDataLayout();
1370 const Align OriginAlignment = std::max(a: kMinOriginAlignment, b: Alignment);
1371 TypeSize StoreSize = DL.getTypeStoreSize(Ty: Shadow->getType());
1372 // ZExt cannot convert between vector and scalar
1373 Value *ConvertedShadow = convertShadowToScalar(V: Shadow, IRB);
1374 if (auto *ConstantShadow = dyn_cast<Constant>(Val: ConvertedShadow)) {
1375 if (!ClCheckConstantShadow || ConstantShadow->isNullValue()) {
1376 // Origin is not needed: value is initialized or const shadow is
1377 // ignored.
1378 return;
1379 }
1380 if (llvm::isKnownNonZero(V: ConvertedShadow, Q: DL)) {
1381 // Copy origin as the value is definitely uninitialized.
1382 paintOrigin(IRB, Origin: updateOrigin(V: Origin, IRB), OriginPtr, TS: StoreSize,
1383 Alignment: OriginAlignment);
1384 return;
1385 }
1386 // Fallback to runtime check, which still can be optimized out later.
1387 }
1388
1389 TypeSize TypeSizeInBits = DL.getTypeSizeInBits(Ty: ConvertedShadow->getType());
1390 unsigned SizeIndex = TypeSizeToSizeIndex(TS: TypeSizeInBits);
1391 if (instrumentWithCalls(V: ConvertedShadow) &&
1392 SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1393 FunctionCallee Fn = MS.MaybeStoreOriginFn[SizeIndex];
1394 Value *ConvertedShadow2 =
1395 IRB.CreateZExt(V: ConvertedShadow, DestTy: IRB.getIntNTy(N: 8 * (1 << SizeIndex)));
1396 CallBase *CB = IRB.CreateCall(Callee: Fn, Args: {ConvertedShadow2, Addr, Origin});
1397 CB->addParamAttr(ArgNo: 0, Kind: Attribute::ZExt);
1398 CB->addParamAttr(ArgNo: 2, Kind: Attribute::ZExt);
1399 } else {
1400 Value *Cmp = convertToBool(V: ConvertedShadow, IRB, name: "_mscmp");
1401 Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1402 Cond: Cmp, SplitBefore: &*IRB.GetInsertPoint(), Unreachable: false, BranchWeights: MS.OriginStoreWeights);
1403 IRBuilder<> IRBNew(CheckTerm);
1404 paintOrigin(IRB&: IRBNew, Origin: updateOrigin(V: Origin, IRB&: IRBNew), OriginPtr, TS: StoreSize,
1405 Alignment: OriginAlignment);
1406 }
1407 }
1408
1409 void materializeStores() {
1410 for (StoreInst *SI : StoreList) {
1411 IRBuilder<> IRB(SI);
1412 Value *Val = SI->getValueOperand();
1413 Value *Addr = SI->getPointerOperand();
1414 Value *Shadow = SI->isAtomic() ? getCleanShadow(V: Val) : getShadow(V: Val);
1415 Value *ShadowPtr, *OriginPtr;
1416 Type *ShadowTy = Shadow->getType();
1417 const Align Alignment = SI->getAlign();
1418 const Align OriginAlignment = std::max(a: kMinOriginAlignment, b: Alignment);
1419 std::tie(args&: ShadowPtr, args&: OriginPtr) =
1420 getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ true);
1421
1422 [[maybe_unused]] StoreInst *NewSI =
1423 IRB.CreateAlignedStore(Val: Shadow, Ptr: ShadowPtr, Align: Alignment);
1424 LLVM_DEBUG(dbgs() << " STORE: " << *NewSI << "\n");
1425
1426 if (SI->isAtomic())
1427 SI->setOrdering(addReleaseOrdering(a: SI->getOrdering()));
1428
1429 if (MS.TrackOrigins && !SI->isAtomic())
1430 storeOrigin(IRB, Addr, Shadow, Origin: getOrigin(V: Val), OriginPtr,
1431 Alignment: OriginAlignment);
1432 }
1433 }
1434
1435 // Returns true if Debug Location corresponds to multiple warnings.
1436 bool shouldDisambiguateWarningLocation(const DebugLoc &DebugLoc) {
1437 if (MS.TrackOrigins < 2)
1438 return false;
1439
1440 if (LazyWarningDebugLocationCount.empty())
1441 for (const auto &I : InstrumentationList)
1442 ++LazyWarningDebugLocationCount[I.OrigIns->getDebugLoc()];
1443
1444 return LazyWarningDebugLocationCount[DebugLoc] >= ClDisambiguateWarning;
1445 }
1446
1447 /// Helper function to insert a warning at IRB's current insert point.
1448 void insertWarningFn(IRBuilder<> &IRB, Value *Origin) {
1449 if (!Origin)
1450 Origin = (Value *)IRB.getInt32(C: 0);
1451 assert(Origin->getType()->isIntegerTy());
1452
1453 if (shouldDisambiguateWarningLocation(DebugLoc: IRB.getCurrentDebugLocation())) {
1454 // Try to create additional origin with debug info of the last origin
1455 // instruction. It may provide additional information to the user.
1456 if (Instruction *OI = dyn_cast_or_null<Instruction>(Val: Origin)) {
1457 assert(MS.TrackOrigins);
1458 auto NewDebugLoc = OI->getDebugLoc();
1459 // Origin update with missing or the same debug location provides no
1460 // additional value.
1461 if (NewDebugLoc && NewDebugLoc != IRB.getCurrentDebugLocation()) {
1462 // Insert update just before the check, so we call runtime only just
1463 // before the report.
1464 IRBuilder<> IRBOrigin(&*IRB.GetInsertPoint());
1465 IRBOrigin.SetCurrentDebugLocation(NewDebugLoc);
1466 Origin = updateOrigin(V: Origin, IRB&: IRBOrigin);
1467 }
1468 }
1469 }
1470
1471 if (MS.CompileKernel || MS.TrackOrigins)
1472 IRB.CreateCall(Callee: MS.WarningFn, Args: Origin)->setCannotMerge();
1473 else
1474 IRB.CreateCall(Callee: MS.WarningFn)->setCannotMerge();
1475 // FIXME: Insert UnreachableInst if !MS.Recover?
1476 // This may invalidate some of the following checks and needs to be done
1477 // at the very end.
1478 }
1479
1480 void materializeOneCheck(IRBuilder<> &IRB, Value *ConvertedShadow,
1481 Value *Origin) {
1482 const DataLayout &DL = F.getDataLayout();
1483 TypeSize TypeSizeInBits = DL.getTypeSizeInBits(Ty: ConvertedShadow->getType());
1484 unsigned SizeIndex = TypeSizeToSizeIndex(TS: TypeSizeInBits);
1485 if (instrumentWithCalls(V: ConvertedShadow) && !MS.CompileKernel) {
1486 // ZExt cannot convert between vector and scalar
1487 ConvertedShadow = convertShadowToScalar(V: ConvertedShadow, IRB);
1488 Value *ConvertedShadow2 =
1489 IRB.CreateZExt(V: ConvertedShadow, DestTy: IRB.getIntNTy(N: 8 * (1 << SizeIndex)));
1490
1491 if (SizeIndex < kNumberOfAccessSizes) {
1492 FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
1493 CallBase *CB = IRB.CreateCall(
1494 Callee: Fn,
1495 Args: {ConvertedShadow2,
1496 MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(C: 0)});
1497 CB->addParamAttr(ArgNo: 0, Kind: Attribute::ZExt);
1498 CB->addParamAttr(ArgNo: 1, Kind: Attribute::ZExt);
1499 } else {
1500 FunctionCallee Fn = MS.MaybeWarningVarSizeFn;
1501 Value *ShadowAlloca = IRB.CreateAlloca(Ty: ConvertedShadow2->getType(), AddrSpace: 0u);
1502 IRB.CreateStore(Val: ConvertedShadow2, Ptr: ShadowAlloca);
1503 unsigned ShadowSize = DL.getTypeAllocSize(Ty: ConvertedShadow2->getType());
1504 CallBase *CB = IRB.CreateCall(
1505 Callee: Fn,
1506 Args: {ShadowAlloca, ConstantInt::get(Ty: IRB.getInt64Ty(), V: ShadowSize),
1507 MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(C: 0)});
1508 CB->addParamAttr(ArgNo: 1, Kind: Attribute::ZExt);
1509 CB->addParamAttr(ArgNo: 2, Kind: Attribute::ZExt);
1510 }
1511 } else {
1512 Value *Cmp = convertToBool(V: ConvertedShadow, IRB, name: "_mscmp");
1513 Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1514 Cond: Cmp, SplitBefore: &*IRB.GetInsertPoint(),
1515 /* Unreachable */ !MS.Recover, BranchWeights: MS.ColdCallWeights);
1516
1517 IRB.SetInsertPoint(CheckTerm);
1518 insertWarningFn(IRB, Origin);
1519 LLVM_DEBUG(dbgs() << " CHECK: " << *Cmp << "\n");
1520 }
1521 }
1522
1523 void materializeInstructionChecks(
1524 ArrayRef<ShadowOriginAndInsertPoint> InstructionChecks) {
1525 const DataLayout &DL = F.getDataLayout();
1526 // Disable combining in some cases. TrackOrigins checks each shadow to pick
1527 // correct origin.
1528 bool Combine = !MS.TrackOrigins;
1529 Instruction *Instruction = InstructionChecks.front().OrigIns;
1530 Value *Shadow = nullptr;
1531 for (const auto &ShadowData : InstructionChecks) {
1532 assert(ShadowData.OrigIns == Instruction);
1533 IRBuilder<> IRB(Instruction);
1534
1535 Value *ConvertedShadow = ShadowData.Shadow;
1536
1537 if (auto *ConstantShadow = dyn_cast<Constant>(Val: ConvertedShadow)) {
1538 if (!ClCheckConstantShadow || ConstantShadow->isNullValue()) {
1539 // Skip, value is initialized or const shadow is ignored.
1540 continue;
1541 }
1542 if (llvm::isKnownNonZero(V: ConvertedShadow, Q: DL)) {
1543 // Report as the value is definitely uninitialized.
1544 insertWarningFn(IRB, Origin: ShadowData.Origin);
1545 if (!MS.Recover)
1546 return; // Always fail and stop here, not need to check the rest.
1547 // Skip entire instruction,
1548 continue;
1549 }
1550 // Fallback to runtime check, which still can be optimized out later.
1551 }
1552
1553 if (!Combine) {
1554 materializeOneCheck(IRB, ConvertedShadow, Origin: ShadowData.Origin);
1555 continue;
1556 }
1557
1558 if (!Shadow) {
1559 Shadow = ConvertedShadow;
1560 continue;
1561 }
1562
1563 Shadow = convertToBool(V: Shadow, IRB, name: "_mscmp");
1564 ConvertedShadow = convertToBool(V: ConvertedShadow, IRB, name: "_mscmp");
1565 Shadow = IRB.CreateOr(LHS: Shadow, RHS: ConvertedShadow, Name: "_msor");
1566 }
1567
1568 if (Shadow) {
1569 assert(Combine);
1570 IRBuilder<> IRB(Instruction);
1571 materializeOneCheck(IRB, ConvertedShadow: Shadow, Origin: nullptr);
1572 }
1573 }
1574
1575 static bool isAArch64SVCount(Type *Ty) {
1576 if (TargetExtType *TTy = dyn_cast<TargetExtType>(Val: Ty))
1577 return TTy->getName() == "aarch64.svcount";
1578 return false;
1579 }
1580
1581 // This is intended to match the "AArch64 Predicate-as-Counter Type" (aka
1582 // 'target("aarch64.svcount")', but not e.g., <vscale x 4 x i32>.
1583 static bool isScalableNonVectorType(Type *Ty) {
1584 if (!isAArch64SVCount(Ty))
1585 LLVM_DEBUG(dbgs() << "isScalableNonVectorType: Unexpected type " << *Ty
1586 << "\n");
1587
1588 return Ty->isScalableTy() && !isa<VectorType>(Val: Ty);
1589 }
1590
1591 void materializeChecks() {
1592#ifndef NDEBUG
1593 // For assert below.
1594 SmallPtrSet<Instruction *, 16> Done;
1595#endif
1596
1597 for (auto I = InstrumentationList.begin();
1598 I != InstrumentationList.end();) {
1599 auto OrigIns = I->OrigIns;
1600 // Checks are grouped by the original instruction. We call all
1601 // `insertShadowCheck` for an instruction at once.
1602 assert(Done.insert(OrigIns).second);
1603 auto J = std::find_if(first: I + 1, last: InstrumentationList.end(),
1604 pred: [OrigIns](const ShadowOriginAndInsertPoint &R) {
1605 return OrigIns != R.OrigIns;
1606 });
1607 // Process all checks of instruction at once.
1608 materializeInstructionChecks(InstructionChecks: ArrayRef<ShadowOriginAndInsertPoint>(I, J));
1609 I = J;
1610 }
1611
1612 LLVM_DEBUG(dbgs() << "DONE:\n" << F);
1613 }
1614
1615 // Returns the last instruction in the new prologue
1616 void insertKmsanPrologue(IRBuilder<> &IRB) {
1617 Value *ContextState = IRB.CreateCall(Callee: MS.MsanGetContextStateFn, Args: {});
1618 Constant *Zero = IRB.getInt32(C: 0);
1619 MS.ParamTLS = IRB.CreateGEP(Ty: MS.MsanContextStateTy, Ptr: ContextState,
1620 IdxList: {Zero, IRB.getInt32(C: 0)}, Name: "param_shadow");
1621 MS.RetvalTLS = IRB.CreateGEP(Ty: MS.MsanContextStateTy, Ptr: ContextState,
1622 IdxList: {Zero, IRB.getInt32(C: 1)}, Name: "retval_shadow");
1623 MS.VAArgTLS = IRB.CreateGEP(Ty: MS.MsanContextStateTy, Ptr: ContextState,
1624 IdxList: {Zero, IRB.getInt32(C: 2)}, Name: "va_arg_shadow");
1625 MS.VAArgOriginTLS = IRB.CreateGEP(Ty: MS.MsanContextStateTy, Ptr: ContextState,
1626 IdxList: {Zero, IRB.getInt32(C: 3)}, Name: "va_arg_origin");
1627 MS.VAArgOverflowSizeTLS =
1628 IRB.CreateGEP(Ty: MS.MsanContextStateTy, Ptr: ContextState,
1629 IdxList: {Zero, IRB.getInt32(C: 4)}, Name: "va_arg_overflow_size");
1630 MS.ParamOriginTLS = IRB.CreateGEP(Ty: MS.MsanContextStateTy, Ptr: ContextState,
1631 IdxList: {Zero, IRB.getInt32(C: 5)}, Name: "param_origin");
1632 MS.RetvalOriginTLS =
1633 IRB.CreateGEP(Ty: MS.MsanContextStateTy, Ptr: ContextState,
1634 IdxList: {Zero, IRB.getInt32(C: 6)}, Name: "retval_origin");
1635 if (MS.TargetTriple.getArch() == Triple::systemz)
1636 MS.MsanMetadataAlloca = IRB.CreateAlloca(Ty: MS.MsanMetadata, AddrSpace: 0u);
1637 }
1638
1639 /// Add MemorySanitizer instrumentation to a function.
1640 bool runOnFunction() {
1641 // Iterate all BBs in depth-first order and create shadow instructions
1642 // for all instructions (where applicable).
1643 // For PHI nodes we create dummy shadow PHIs which will be finalized later.
1644 for (BasicBlock *BB : depth_first(G: FnPrologueEnd->getParent()))
1645 visit(BB&: *BB);
1646
1647 // `visit` above only collects instructions. Process them after iterating
1648 // CFG to avoid requirement on CFG transformations.
1649 for (Instruction *I : Instructions)
1650 InstVisitor<MemorySanitizerVisitor>::visit(I&: *I);
1651
1652 // Finalize PHI nodes.
1653 for (PHINode *PN : ShadowPHINodes) {
1654 PHINode *PNS = cast<PHINode>(Val: getShadow(V: PN));
1655 PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(Val: getOrigin(V: PN)) : nullptr;
1656 size_t NumValues = PN->getNumIncomingValues();
1657 for (size_t v = 0; v < NumValues; v++) {
1658 PNS->addIncoming(V: getShadow(I: PN, i: v), BB: PN->getIncomingBlock(i: v));
1659 if (PNO)
1660 PNO->addIncoming(V: getOrigin(I: PN, i: v), BB: PN->getIncomingBlock(i: v));
1661 }
1662 }
1663
1664 VAHelper->finalizeInstrumentation();
1665
1666 // Poison llvm.lifetime.start intrinsics, if we haven't fallen back to
1667 // instrumenting only allocas.
1668 if (ClHandleLifetimeIntrinsics) {
1669 for (auto Item : LifetimeStartList) {
1670 instrumentAlloca(I&: *Item.second, InsPoint: Item.first);
1671 AllocaSet.remove(X: Item.second);
1672 }
1673 }
1674 // Poison the allocas for which we didn't instrument the corresponding
1675 // lifetime intrinsics.
1676 for (AllocaInst *AI : AllocaSet)
1677 instrumentAlloca(I&: *AI);
1678
1679 // Insert shadow value checks.
1680 materializeChecks();
1681
1682 // Delayed instrumentation of StoreInst.
1683 // This may not add new address checks.
1684 materializeStores();
1685
1686 return true;
1687 }
1688
1689 /// Compute the shadow type that corresponds to a given Value.
1690 Type *getShadowTy(Value *V) { return getShadowTy(OrigTy: V->getType()); }
1691
1692 /// Compute the shadow type that corresponds to a given Type.
1693 Type *getShadowTy(Type *OrigTy) {
1694 if (!OrigTy->isSized()) {
1695 return nullptr;
1696 }
1697 // For integer type, shadow is the same as the original type.
1698 // This may return weird-sized types like i1.
1699 if (IntegerType *IT = dyn_cast<IntegerType>(Val: OrigTy))
1700 return IT;
1701 const DataLayout &DL = F.getDataLayout();
1702 if (VectorType *VT = dyn_cast<VectorType>(Val: OrigTy)) {
1703 uint32_t EltSize = DL.getTypeSizeInBits(Ty: VT->getElementType());
1704 return VectorType::get(ElementType: IntegerType::get(C&: *MS.C, NumBits: EltSize),
1705 EC: VT->getElementCount());
1706 }
1707 if (ArrayType *AT = dyn_cast<ArrayType>(Val: OrigTy)) {
1708 return ArrayType::get(ElementType: getShadowTy(OrigTy: AT->getElementType()),
1709 NumElements: AT->getNumElements());
1710 }
1711 if (StructType *ST = dyn_cast<StructType>(Val: OrigTy)) {
1712 SmallVector<Type *, 4> Elements;
1713 for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1714 Elements.push_back(Elt: getShadowTy(OrigTy: ST->getElementType(N: i)));
1715 StructType *Res = StructType::get(Context&: *MS.C, Elements, isPacked: ST->isPacked());
1716 LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
1717 return Res;
1718 }
1719 if (isScalableNonVectorType(Ty: OrigTy)) {
1720 LLVM_DEBUG(dbgs() << "getShadowTy: Scalable non-vector type: " << *OrigTy
1721 << "\n");
1722 return OrigTy;
1723 }
1724
1725 uint32_t TypeSize = DL.getTypeSizeInBits(Ty: OrigTy);
1726 return IntegerType::get(C&: *MS.C, NumBits: TypeSize);
1727 }
1728
1729 /// Extract combined shadow of struct elements as a bool
1730 Value *collapseStructShadow(StructType *Struct, Value *Shadow,
1731 IRBuilder<> &IRB) {
1732 Value *FalseVal = IRB.getIntN(/* width */ N: 1, /* value */ C: 0);
1733 Value *Aggregator = FalseVal;
1734
1735 for (unsigned Idx = 0; Idx < Struct->getNumElements(); Idx++) {
1736 // Combine by ORing together each element's bool shadow
1737 Value *ShadowItem = IRB.CreateExtractValue(Agg: Shadow, Idxs: Idx);
1738 Value *ShadowBool = convertToBool(V: ShadowItem, IRB);
1739
1740 if (Aggregator != FalseVal)
1741 Aggregator = IRB.CreateOr(LHS: Aggregator, RHS: ShadowBool);
1742 else
1743 Aggregator = ShadowBool;
1744 }
1745
1746 return Aggregator;
1747 }
1748
1749 // Extract combined shadow of array elements
1750 Value *collapseArrayShadow(ArrayType *Array, Value *Shadow,
1751 IRBuilder<> &IRB) {
1752 if (!Array->getNumElements())
1753 return IRB.getIntN(/* width */ N: 1, /* value */ C: 0);
1754
1755 Value *FirstItem = IRB.CreateExtractValue(Agg: Shadow, Idxs: 0);
1756 Value *Aggregator = convertShadowToScalar(V: FirstItem, IRB);
1757
1758 for (unsigned Idx = 1; Idx < Array->getNumElements(); Idx++) {
1759 Value *ShadowItem = IRB.CreateExtractValue(Agg: Shadow, Idxs: Idx);
1760 Value *ShadowInner = convertShadowToScalar(V: ShadowItem, IRB);
1761 Aggregator = IRB.CreateOr(LHS: Aggregator, RHS: ShadowInner);
1762 }
1763 return Aggregator;
1764 }
1765
1766 /// Convert a shadow value to it's flattened variant. The resulting
1767 /// shadow may not necessarily have the same bit width as the input
1768 /// value, but it will always be comparable to zero.
1769 Value *convertShadowToScalar(Value *V, IRBuilder<> &IRB) {
1770 if (StructType *Struct = dyn_cast<StructType>(Val: V->getType()))
1771 return collapseStructShadow(Struct, Shadow: V, IRB);
1772 if (ArrayType *Array = dyn_cast<ArrayType>(Val: V->getType()))
1773 return collapseArrayShadow(Array, Shadow: V, IRB);
1774 if (isa<VectorType>(Val: V->getType())) {
1775 if (isa<ScalableVectorType>(Val: V->getType()))
1776 return convertShadowToScalar(V: IRB.CreateOrReduce(Src: V), IRB);
1777 unsigned BitWidth =
1778 V->getType()->getPrimitiveSizeInBits().getFixedValue();
1779 return IRB.CreateBitCast(V, DestTy: IntegerType::get(C&: *MS.C, NumBits: BitWidth));
1780 }
1781 return V;
1782 }
1783
1784 // Convert a scalar value to an i1 by comparing with 0
1785 Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &name = "") {
1786 Type *VTy = V->getType();
1787 if (!VTy->isIntegerTy())
1788 return convertToBool(V: convertShadowToScalar(V, IRB), IRB, name);
1789 if (VTy->getIntegerBitWidth() == 1)
1790 // Just converting a bool to a bool, so do nothing.
1791 return V;
1792 return IRB.CreateICmpNE(LHS: V, RHS: ConstantInt::get(Ty: VTy, V: 0), Name: name);
1793 }
1794
1795 Type *ptrToIntPtrType(Type *PtrTy) const {
1796 if (VectorType *VectTy = dyn_cast<VectorType>(Val: PtrTy)) {
1797 return VectorType::get(ElementType: ptrToIntPtrType(PtrTy: VectTy->getElementType()),
1798 EC: VectTy->getElementCount());
1799 }
1800 assert(PtrTy->isIntOrPtrTy());
1801 return MS.IntptrTy;
1802 }
1803
1804 Type *getPtrToShadowPtrType(Type *IntPtrTy, Type *ShadowTy) const {
1805 if (VectorType *VectTy = dyn_cast<VectorType>(Val: IntPtrTy)) {
1806 return VectorType::get(
1807 ElementType: getPtrToShadowPtrType(IntPtrTy: VectTy->getElementType(), ShadowTy),
1808 EC: VectTy->getElementCount());
1809 }
1810 assert(IntPtrTy == MS.IntptrTy);
1811 return MS.PtrTy;
1812 }
1813
1814 Constant *constToIntPtr(Type *IntPtrTy, uint64_t C) const {
1815 if (VectorType *VectTy = dyn_cast<VectorType>(Val: IntPtrTy)) {
1816 return ConstantVector::getSplat(
1817 EC: VectTy->getElementCount(),
1818 Elt: constToIntPtr(IntPtrTy: VectTy->getElementType(), C));
1819 }
1820 assert(IntPtrTy == MS.IntptrTy);
1821 // TODO: Avoid implicit trunc?
1822 // See https://github.com/llvm/llvm-project/issues/112510.
1823 return ConstantInt::get(Ty: MS.IntptrTy, V: C, /*IsSigned=*/false,
1824 /*ImplicitTrunc=*/true);
1825 }
1826
1827 /// Returns the integer shadow offset that corresponds to a given
1828 /// application address, whereby:
1829 ///
1830 /// Offset = (Addr & ~AndMask) ^ XorMask
1831 /// Shadow = ShadowBase + Offset
1832 /// Origin = (OriginBase + Offset) & ~Alignment
1833 ///
1834 /// Note: for efficiency, many shadow mappings only require use the XorMask
1835 /// and OriginBase; the AndMask and ShadowBase are often zero.
1836 Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) {
1837 Type *IntptrTy = ptrToIntPtrType(PtrTy: Addr->getType());
1838 Value *OffsetLong = IRB.CreatePointerCast(V: Addr, DestTy: IntptrTy);
1839
1840 if (uint64_t AndMask = MS.MapParams->AndMask)
1841 OffsetLong = IRB.CreateAnd(LHS: OffsetLong, RHS: constToIntPtr(IntPtrTy: IntptrTy, C: ~AndMask));
1842
1843 if (uint64_t XorMask = MS.MapParams->XorMask)
1844 OffsetLong = IRB.CreateXor(LHS: OffsetLong, RHS: constToIntPtr(IntPtrTy: IntptrTy, C: XorMask));
1845 return OffsetLong;
1846 }
1847
1848 /// Compute the shadow and origin addresses corresponding to a given
1849 /// application address.
1850 ///
1851 /// Shadow = ShadowBase + Offset
1852 /// Origin = (OriginBase + Offset) & ~3ULL
1853 /// Addr can be a ptr or <N x ptr>. In both cases ShadowTy the shadow type of
1854 /// a single pointee.
1855 /// Returns <shadow_ptr, origin_ptr> or <<N x shadow_ptr>, <N x origin_ptr>>.
1856 std::pair<Value *, Value *>
1857 getShadowOriginPtrUserspace(Value *Addr, IRBuilder<> &IRB, Type *ShadowTy,
1858 MaybeAlign Alignment) {
1859 VectorType *VectTy = dyn_cast<VectorType>(Val: Addr->getType());
1860 if (!VectTy) {
1861 assert(Addr->getType()->isPointerTy());
1862 } else {
1863 assert(VectTy->getElementType()->isPointerTy());
1864 }
1865 Type *IntptrTy = ptrToIntPtrType(PtrTy: Addr->getType());
1866 Value *ShadowOffset = getShadowPtrOffset(Addr, IRB);
1867 Value *ShadowLong = ShadowOffset;
1868 if (uint64_t ShadowBase = MS.MapParams->ShadowBase) {
1869 ShadowLong =
1870 IRB.CreateAdd(LHS: ShadowLong, RHS: constToIntPtr(IntPtrTy: IntptrTy, C: ShadowBase));
1871 }
1872 Value *ShadowPtr = IRB.CreateIntToPtr(
1873 V: ShadowLong, DestTy: getPtrToShadowPtrType(IntPtrTy: IntptrTy, ShadowTy));
1874
1875 Value *OriginPtr = nullptr;
1876 if (MS.TrackOrigins) {
1877 Value *OriginLong = ShadowOffset;
1878 uint64_t OriginBase = MS.MapParams->OriginBase;
1879 if (OriginBase != 0)
1880 OriginLong =
1881 IRB.CreateAdd(LHS: OriginLong, RHS: constToIntPtr(IntPtrTy: IntptrTy, C: OriginBase));
1882 if (!Alignment || *Alignment < kMinOriginAlignment) {
1883 uint64_t Mask = kMinOriginAlignment.value() - 1;
1884 OriginLong = IRB.CreateAnd(LHS: OriginLong, RHS: constToIntPtr(IntPtrTy: IntptrTy, C: ~Mask));
1885 }
1886 OriginPtr = IRB.CreateIntToPtr(
1887 V: OriginLong, DestTy: getPtrToShadowPtrType(IntPtrTy: IntptrTy, ShadowTy: MS.OriginTy));
1888 }
1889 return std::make_pair(x&: ShadowPtr, y&: OriginPtr);
1890 }
1891
1892 template <typename... ArgsTy>
1893 Value *createMetadataCall(IRBuilder<> &IRB, FunctionCallee Callee,
1894 ArgsTy... Args) {
1895 if (MS.TargetTriple.getArch() == Triple::systemz) {
1896 IRB.CreateCall(Callee,
1897 {MS.MsanMetadataAlloca, std::forward<ArgsTy>(Args)...});
1898 return IRB.CreateLoad(Ty: MS.MsanMetadata, Ptr: MS.MsanMetadataAlloca);
1899 }
1900
1901 return IRB.CreateCall(Callee, {std::forward<ArgsTy>(Args)...});
1902 }
1903
1904 std::pair<Value *, Value *> getShadowOriginPtrKernelNoVec(Value *Addr,
1905 IRBuilder<> &IRB,
1906 Type *ShadowTy,
1907 bool isStore) {
1908 Value *ShadowOriginPtrs;
1909 const DataLayout &DL = F.getDataLayout();
1910 TypeSize Size = DL.getTypeStoreSize(Ty: ShadowTy);
1911
1912 FunctionCallee Getter = MS.getKmsanShadowOriginAccessFn(isStore, size: Size);
1913 Value *AddrCast = IRB.CreatePointerCast(V: Addr, DestTy: MS.PtrTy);
1914 if (Getter) {
1915 ShadowOriginPtrs = createMetadataCall(IRB, Callee: Getter, Args: AddrCast);
1916 } else {
1917 Value *SizeVal = ConstantInt::get(Ty: MS.IntptrTy, V: Size);
1918 ShadowOriginPtrs = createMetadataCall(
1919 IRB,
1920 Callee: isStore ? MS.MsanMetadataPtrForStoreN : MS.MsanMetadataPtrForLoadN,
1921 Args: AddrCast, Args: SizeVal);
1922 }
1923 Value *ShadowPtr = IRB.CreateExtractValue(Agg: ShadowOriginPtrs, Idxs: 0);
1924 ShadowPtr = IRB.CreatePointerCast(V: ShadowPtr, DestTy: MS.PtrTy);
1925 Value *OriginPtr = IRB.CreateExtractValue(Agg: ShadowOriginPtrs, Idxs: 1);
1926
1927 return std::make_pair(x&: ShadowPtr, y&: OriginPtr);
1928 }
1929
1930 /// Addr can be a ptr or <N x ptr>. In both cases ShadowTy the shadow type of
1931 /// a single pointee.
1932 /// Returns <shadow_ptr, origin_ptr> or <<N x shadow_ptr>, <N x origin_ptr>>.
1933 std::pair<Value *, Value *> getShadowOriginPtrKernel(Value *Addr,
1934 IRBuilder<> &IRB,
1935 Type *ShadowTy,
1936 bool isStore) {
1937 VectorType *VectTy = dyn_cast<VectorType>(Val: Addr->getType());
1938 if (!VectTy) {
1939 assert(Addr->getType()->isPointerTy());
1940 return getShadowOriginPtrKernelNoVec(Addr, IRB, ShadowTy, isStore);
1941 }
1942
1943 // TODO: Support callbacs with vectors of addresses.
1944 unsigned NumElements = cast<FixedVectorType>(Val: VectTy)->getNumElements();
1945 Value *ShadowPtrs = ConstantInt::getNullValue(
1946 Ty: FixedVectorType::get(ElementType: IRB.getPtrTy(), NumElts: NumElements));
1947 Value *OriginPtrs = nullptr;
1948 if (MS.TrackOrigins)
1949 OriginPtrs = ConstantInt::getNullValue(
1950 Ty: FixedVectorType::get(ElementType: IRB.getPtrTy(), NumElts: NumElements));
1951 for (unsigned i = 0; i < NumElements; ++i) {
1952 Value *OneAddr =
1953 IRB.CreateExtractElement(Vec: Addr, Idx: ConstantInt::get(Ty: IRB.getInt32Ty(), V: i));
1954 auto [ShadowPtr, OriginPtr] =
1955 getShadowOriginPtrKernelNoVec(Addr: OneAddr, IRB, ShadowTy, isStore);
1956
1957 ShadowPtrs = IRB.CreateInsertElement(
1958 Vec: ShadowPtrs, NewElt: ShadowPtr, Idx: ConstantInt::get(Ty: IRB.getInt32Ty(), V: i));
1959 if (MS.TrackOrigins)
1960 OriginPtrs = IRB.CreateInsertElement(
1961 Vec: OriginPtrs, NewElt: OriginPtr, Idx: ConstantInt::get(Ty: IRB.getInt32Ty(), V: i));
1962 }
1963 return {ShadowPtrs, OriginPtrs};
1964 }
1965
1966 std::pair<Value *, Value *> getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB,
1967 Type *ShadowTy,
1968 MaybeAlign Alignment,
1969 bool isStore) {
1970 if (MS.CompileKernel)
1971 return getShadowOriginPtrKernel(Addr, IRB, ShadowTy, isStore);
1972 return getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment);
1973 }
1974
1975 /// Compute the shadow address for a given function argument.
1976 ///
1977 /// Shadow = ParamTLS+ArgOffset.
1978 Value *getShadowPtrForArgument(IRBuilder<> &IRB, int ArgOffset) {
1979 return IRB.CreatePtrAdd(Ptr: MS.ParamTLS,
1980 Offset: ConstantInt::get(Ty: MS.IntptrTy, V: ArgOffset), Name: "_msarg");
1981 }
1982
1983 /// Compute the origin address for a given function argument.
1984 Value *getOriginPtrForArgument(IRBuilder<> &IRB, int ArgOffset) {
1985 if (!MS.TrackOrigins)
1986 return nullptr;
1987 return IRB.CreatePtrAdd(Ptr: MS.ParamOriginTLS,
1988 Offset: ConstantInt::get(Ty: MS.IntptrTy, V: ArgOffset),
1989 Name: "_msarg_o");
1990 }
1991
1992 /// Compute the shadow address for a retval.
1993 Value *getShadowPtrForRetval(IRBuilder<> &IRB) {
1994 return IRB.CreatePointerCast(V: MS.RetvalTLS, DestTy: IRB.getPtrTy(AddrSpace: 0), Name: "_msret");
1995 }
1996
1997 /// Compute the origin address for a retval.
1998 Value *getOriginPtrForRetval() {
1999 // We keep a single origin for the entire retval. Might be too optimistic.
2000 return MS.RetvalOriginTLS;
2001 }
2002
2003 /// Set SV to be the shadow value for V.
2004 void setShadow(Value *V, Value *SV) {
2005 assert(!ShadowMap.count(V) && "Values may only have one shadow");
2006 ShadowMap[V] = PropagateShadow ? SV : getCleanShadow(V);
2007 }
2008
2009 /// Set Origin to be the origin value for V.
2010 void setOrigin(Value *V, Value *Origin) {
2011 if (!MS.TrackOrigins)
2012 return;
2013 assert(!OriginMap.count(V) && "Values may only have one origin");
2014 LLVM_DEBUG(dbgs() << "ORIGIN: " << *V << " ==> " << *Origin << "\n");
2015 OriginMap[V] = Origin;
2016 }
2017
2018 Constant *getCleanShadow(Type *OrigTy) {
2019 Type *ShadowTy = getShadowTy(OrigTy);
2020 if (!ShadowTy)
2021 return nullptr;
2022 return Constant::getNullValue(Ty: ShadowTy);
2023 }
2024
2025 /// Create a clean shadow value for a given value.
2026 ///
2027 /// Clean shadow (all zeroes) means all bits of the value are defined
2028 /// (initialized).
2029 Constant *getCleanShadow(Value *V) { return getCleanShadow(OrigTy: V->getType()); }
2030
2031 /// Create a dirty shadow of a given shadow type.
2032 Constant *getPoisonedShadow(Type *ShadowTy) {
2033 assert(ShadowTy);
2034 if (isa<IntegerType>(Val: ShadowTy) || isa<VectorType>(Val: ShadowTy))
2035 return Constant::getAllOnesValue(Ty: ShadowTy);
2036 if (ArrayType *AT = dyn_cast<ArrayType>(Val: ShadowTy)) {
2037 SmallVector<Constant *, 4> Vals(AT->getNumElements(),
2038 getPoisonedShadow(ShadowTy: AT->getElementType()));
2039 return ConstantArray::get(T: AT, V: Vals);
2040 }
2041 if (StructType *ST = dyn_cast<StructType>(Val: ShadowTy)) {
2042 SmallVector<Constant *, 4> Vals;
2043 for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
2044 Vals.push_back(Elt: getPoisonedShadow(ShadowTy: ST->getElementType(N: i)));
2045 return ConstantStruct::get(T: ST, V: Vals);
2046 }
2047 llvm_unreachable("Unexpected shadow type");
2048 }
2049
2050 /// Create a dirty shadow for a given value.
2051 Constant *getPoisonedShadow(Value *V) {
2052 Type *ShadowTy = getShadowTy(V);
2053 if (!ShadowTy)
2054 return nullptr;
2055 return getPoisonedShadow(ShadowTy);
2056 }
2057
2058 /// Create a clean (zero) origin.
2059 Value *getCleanOrigin() { return Constant::getNullValue(Ty: MS.OriginTy); }
2060
2061 /// Get the shadow value for a given Value.
2062 ///
2063 /// This function either returns the value set earlier with setShadow,
2064 /// or extracts if from ParamTLS (for function arguments).
2065 Value *getShadow(Value *V) {
2066 if (Instruction *I = dyn_cast<Instruction>(Val: V)) {
2067 if (!PropagateShadow || I->getMetadata(KindID: LLVMContext::MD_nosanitize))
2068 return getCleanShadow(V);
2069 // For instructions the shadow is already stored in the map.
2070 Value *Shadow = ShadowMap[V];
2071 if (!Shadow) {
2072 LLVM_DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent()));
2073 assert(Shadow && "No shadow for a value");
2074 }
2075 return Shadow;
2076 }
2077 // Handle fully undefined values
2078 // (partially undefined constant vectors are handled later)
2079 if ([[maybe_unused]] UndefValue *U = dyn_cast<UndefValue>(Val: V)) {
2080 Value *AllOnes = (PropagateShadow && PoisonUndef) ? getPoisonedShadow(V)
2081 : getCleanShadow(V);
2082 LLVM_DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
2083 return AllOnes;
2084 }
2085 if (Argument *A = dyn_cast<Argument>(Val: V)) {
2086 // For arguments we compute the shadow on demand and store it in the map.
2087 Value *&ShadowPtr = ShadowMap[V];
2088 if (ShadowPtr)
2089 return ShadowPtr;
2090 Function *F = A->getParent();
2091 IRBuilder<> EntryIRB(FnPrologueEnd);
2092 unsigned ArgOffset = 0;
2093 const DataLayout &DL = F->getDataLayout();
2094 for (auto &FArg : F->args()) {
2095 if (!FArg.getType()->isSized() || FArg.getType()->isScalableTy()) {
2096 LLVM_DEBUG(dbgs() << (FArg.getType()->isScalableTy()
2097 ? "vscale not fully supported\n"
2098 : "Arg is not sized\n"));
2099 if (A == &FArg) {
2100 ShadowPtr = getCleanShadow(V);
2101 setOrigin(V: A, Origin: getCleanOrigin());
2102 break;
2103 }
2104 continue;
2105 }
2106
2107 unsigned Size = FArg.hasByValAttr()
2108 ? DL.getTypeAllocSize(Ty: FArg.getParamByValType())
2109 : DL.getTypeAllocSize(Ty: FArg.getType());
2110
2111 if (A == &FArg) {
2112 bool Overflow = ArgOffset + Size > kParamTLSSize;
2113 if (FArg.hasByValAttr()) {
2114 // ByVal pointer itself has clean shadow. We copy the actual
2115 // argument shadow to the underlying memory.
2116 // Figure out maximal valid memcpy alignment.
2117 const Align ArgAlign = DL.getValueOrABITypeAlignment(
2118 Alignment: FArg.getParamAlign(), Ty: FArg.getParamByValType());
2119 Value *CpShadowPtr, *CpOriginPtr;
2120 std::tie(args&: CpShadowPtr, args&: CpOriginPtr) =
2121 getShadowOriginPtr(Addr: V, IRB&: EntryIRB, ShadowTy: EntryIRB.getInt8Ty(), Alignment: ArgAlign,
2122 /*isStore*/ true);
2123 if (!PropagateShadow || Overflow) {
2124 // ParamTLS overflow.
2125 EntryIRB.CreateMemSet(
2126 Ptr: CpShadowPtr, Val: Constant::getNullValue(Ty: EntryIRB.getInt8Ty()),
2127 Size, Align: ArgAlign);
2128 } else {
2129 Value *Base = getShadowPtrForArgument(IRB&: EntryIRB, ArgOffset);
2130 const Align CopyAlign = std::min(a: ArgAlign, b: kShadowTLSAlignment);
2131 [[maybe_unused]] Value *Cpy = EntryIRB.CreateMemCpy(
2132 Dst: CpShadowPtr, DstAlign: CopyAlign, Src: Base, SrcAlign: CopyAlign, Size);
2133 LLVM_DEBUG(dbgs() << " ByValCpy: " << *Cpy << "\n");
2134
2135 if (MS.TrackOrigins) {
2136 Value *OriginPtr = getOriginPtrForArgument(IRB&: EntryIRB, ArgOffset);
2137 // FIXME: OriginSize should be:
2138 // alignTo(V % kMinOriginAlignment + Size, kMinOriginAlignment)
2139 unsigned OriginSize = alignTo(Size, A: kMinOriginAlignment);
2140 EntryIRB.CreateMemCpy(
2141 Dst: CpOriginPtr,
2142 /* by getShadowOriginPtr */ DstAlign: kMinOriginAlignment, Src: OriginPtr,
2143 /* by origin_tls[ArgOffset] */ SrcAlign: kMinOriginAlignment,
2144 Size: OriginSize);
2145 }
2146 }
2147 }
2148
2149 if (!PropagateShadow || Overflow || FArg.hasByValAttr() ||
2150 (MS.EagerChecks && FArg.hasAttribute(Kind: Attribute::NoUndef))) {
2151 ShadowPtr = getCleanShadow(V);
2152 setOrigin(V: A, Origin: getCleanOrigin());
2153 } else {
2154 // Shadow over TLS
2155 Value *Base = getShadowPtrForArgument(IRB&: EntryIRB, ArgOffset);
2156 ShadowPtr = EntryIRB.CreateAlignedLoad(Ty: getShadowTy(V: &FArg), Ptr: Base,
2157 Align: kShadowTLSAlignment);
2158 if (MS.TrackOrigins) {
2159 Value *OriginPtr = getOriginPtrForArgument(IRB&: EntryIRB, ArgOffset);
2160 setOrigin(V: A, Origin: EntryIRB.CreateLoad(Ty: MS.OriginTy, Ptr: OriginPtr));
2161 }
2162 }
2163 LLVM_DEBUG(dbgs()
2164 << " ARG: " << FArg << " ==> " << *ShadowPtr << "\n");
2165 break;
2166 }
2167
2168 ArgOffset += alignTo(Size, A: kShadowTLSAlignment);
2169 }
2170 assert(ShadowPtr && "Could not find shadow for an argument");
2171 return ShadowPtr;
2172 }
2173
2174 // Check for partially-undefined constant vectors
2175 // TODO: scalable vectors (this is hard because we do not have IRBuilder)
2176 if (isa<FixedVectorType>(Val: V->getType()) && isa<Constant>(Val: V) &&
2177 cast<Constant>(Val: V)->containsUndefOrPoisonElement() && PropagateShadow &&
2178 PoisonUndefVectors) {
2179 unsigned NumElems = cast<FixedVectorType>(Val: V->getType())->getNumElements();
2180 SmallVector<Constant *, 32> ShadowVector(NumElems);
2181 for (unsigned i = 0; i != NumElems; ++i) {
2182 Constant *Elem = cast<Constant>(Val: V)->getAggregateElement(Elt: i);
2183 ShadowVector[i] = isa<UndefValue>(Val: Elem) ? getPoisonedShadow(V: Elem)
2184 : getCleanShadow(V: Elem);
2185 }
2186
2187 Value *ShadowConstant = ConstantVector::get(V: ShadowVector);
2188 LLVM_DEBUG(dbgs() << "Partial undef constant vector: " << *V << " ==> "
2189 << *ShadowConstant << "\n");
2190
2191 return ShadowConstant;
2192 }
2193
2194 // TODO: partially-undefined constant arrays, structures, and nested types
2195
2196 // For everything else the shadow is zero.
2197 return getCleanShadow(V);
2198 }
2199
2200 /// Get the shadow for i-th argument of the instruction I.
2201 Value *getShadow(Instruction *I, int i) {
2202 return getShadow(V: I->getOperand(i));
2203 }
2204
2205 /// Get the origin for a value.
2206 Value *getOrigin(Value *V) {
2207 if (!MS.TrackOrigins)
2208 return nullptr;
2209 if (!PropagateShadow || isa<Constant>(Val: V) || isa<InlineAsm>(Val: V))
2210 return getCleanOrigin();
2211 assert((isa<Instruction>(V) || isa<Argument>(V)) &&
2212 "Unexpected value type in getOrigin()");
2213 if (Instruction *I = dyn_cast<Instruction>(Val: V)) {
2214 if (I->getMetadata(KindID: LLVMContext::MD_nosanitize))
2215 return getCleanOrigin();
2216 }
2217 Value *Origin = OriginMap[V];
2218 assert(Origin && "Missing origin");
2219 return Origin;
2220 }
2221
2222 /// Get the origin for i-th argument of the instruction I.
2223 Value *getOrigin(Instruction *I, int i) {
2224 return getOrigin(V: I->getOperand(i));
2225 }
2226
2227 /// Remember the place where a shadow check should be inserted.
2228 ///
2229 /// This location will be later instrumented with a check that will print a
2230 /// UMR warning in runtime if the shadow value is not 0.
2231 void insertCheckShadow(Value *Shadow, Value *Origin, Instruction *OrigIns) {
2232 assert(Shadow);
2233 if (!InsertChecks)
2234 return;
2235
2236 if (!DebugCounter::shouldExecute(Counter&: DebugInsertCheck)) {
2237 LLVM_DEBUG(dbgs() << "Skipping check of " << *Shadow << " before "
2238 << *OrigIns << "\n");
2239 return;
2240 }
2241
2242 Type *ShadowTy = Shadow->getType();
2243 if (isScalableNonVectorType(Ty: ShadowTy)) {
2244 LLVM_DEBUG(dbgs() << "Skipping check of scalable non-vector " << *Shadow
2245 << " before " << *OrigIns << "\n");
2246 return;
2247 }
2248#ifndef NDEBUG
2249 assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy) ||
2250 isa<StructType>(ShadowTy) || isa<ArrayType>(ShadowTy)) &&
2251 "Can only insert checks for integer, vector, and aggregate shadow "
2252 "types");
2253#endif
2254 InstrumentationList.push_back(
2255 Elt: ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
2256 }
2257
2258 /// Get shadow for value, and remember the place where a shadow check should
2259 /// be inserted.
2260 ///
2261 /// This location will be later instrumented with a check that will print a
2262 /// UMR warning in runtime if the value is not fully defined.
2263 void insertCheckShadowOf(Value *Val, Instruction *OrigIns) {
2264 assert(Val);
2265 Value *Shadow, *Origin;
2266 if (ClCheckConstantShadow) {
2267 Shadow = getShadow(V: Val);
2268 if (!Shadow)
2269 return;
2270 Origin = getOrigin(V: Val);
2271 } else {
2272 Shadow = dyn_cast_or_null<Instruction>(Val: getShadow(V: Val));
2273 if (!Shadow)
2274 return;
2275 Origin = dyn_cast_or_null<Instruction>(Val: getOrigin(V: Val));
2276 }
2277 insertCheckShadow(Shadow, Origin, OrigIns);
2278 }
2279
2280 AtomicOrdering addReleaseOrdering(AtomicOrdering a) {
2281 switch (a) {
2282 case AtomicOrdering::NotAtomic:
2283 return AtomicOrdering::NotAtomic;
2284 case AtomicOrdering::Unordered:
2285 case AtomicOrdering::Monotonic:
2286 case AtomicOrdering::Release:
2287 return AtomicOrdering::Release;
2288 case AtomicOrdering::Acquire:
2289 case AtomicOrdering::AcquireRelease:
2290 return AtomicOrdering::AcquireRelease;
2291 case AtomicOrdering::SequentiallyConsistent:
2292 return AtomicOrdering::SequentiallyConsistent;
2293 }
2294 llvm_unreachable("Unknown ordering");
2295 }
2296
2297 Value *makeAddReleaseOrderingTable(IRBuilder<> &IRB) {
2298 constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
2299 uint32_t OrderingTable[NumOrderings] = {};
2300
2301 OrderingTable[(int)AtomicOrderingCABI::relaxed] =
2302 OrderingTable[(int)AtomicOrderingCABI::release] =
2303 (int)AtomicOrderingCABI::release;
2304 OrderingTable[(int)AtomicOrderingCABI::consume] =
2305 OrderingTable[(int)AtomicOrderingCABI::acquire] =
2306 OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
2307 (int)AtomicOrderingCABI::acq_rel;
2308 OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
2309 (int)AtomicOrderingCABI::seq_cst;
2310
2311 return ConstantDataVector::get(Context&: IRB.getContext(), Elts: OrderingTable);
2312 }
2313
2314 AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
2315 switch (a) {
2316 case AtomicOrdering::NotAtomic:
2317 return AtomicOrdering::NotAtomic;
2318 case AtomicOrdering::Unordered:
2319 case AtomicOrdering::Monotonic:
2320 case AtomicOrdering::Acquire:
2321 return AtomicOrdering::Acquire;
2322 case AtomicOrdering::Release:
2323 case AtomicOrdering::AcquireRelease:
2324 return AtomicOrdering::AcquireRelease;
2325 case AtomicOrdering::SequentiallyConsistent:
2326 return AtomicOrdering::SequentiallyConsistent;
2327 }
2328 llvm_unreachable("Unknown ordering");
2329 }
2330
2331 Value *makeAddAcquireOrderingTable(IRBuilder<> &IRB) {
2332 constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
2333 uint32_t OrderingTable[NumOrderings] = {};
2334
2335 OrderingTable[(int)AtomicOrderingCABI::relaxed] =
2336 OrderingTable[(int)AtomicOrderingCABI::acquire] =
2337 OrderingTable[(int)AtomicOrderingCABI::consume] =
2338 (int)AtomicOrderingCABI::acquire;
2339 OrderingTable[(int)AtomicOrderingCABI::release] =
2340 OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
2341 (int)AtomicOrderingCABI::acq_rel;
2342 OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
2343 (int)AtomicOrderingCABI::seq_cst;
2344
2345 return ConstantDataVector::get(Context&: IRB.getContext(), Elts: OrderingTable);
2346 }
2347
2348 // ------------------- Visitors.
2349 using InstVisitor<MemorySanitizerVisitor>::visit;
2350 void visit(Instruction &I) {
2351 if (I.getMetadata(KindID: LLVMContext::MD_nosanitize))
2352 return;
2353 // Don't want to visit if we're in the prologue
2354 if (isInPrologue(I))
2355 return;
2356 if (!DebugCounter::shouldExecute(Counter&: DebugInstrumentInstruction)) {
2357 LLVM_DEBUG(dbgs() << "Skipping instruction: " << I << "\n");
2358 // We still need to set the shadow and origin to clean values.
2359 setShadow(V: &I, SV: getCleanShadow(V: &I));
2360 setOrigin(V: &I, Origin: getCleanOrigin());
2361 return;
2362 }
2363
2364 Instructions.push_back(Elt: &I);
2365 }
2366
2367 /// Instrument LoadInst
2368 ///
2369 /// Loads the corresponding shadow and (optionally) origin.
2370 /// Optionally, checks that the load address is fully defined.
2371 void visitLoadInst(LoadInst &I) {
2372 assert(I.getType()->isSized() && "Load type must have size");
2373 assert(!I.getMetadata(LLVMContext::MD_nosanitize));
2374 NextNodeIRBuilder IRB(&I);
2375 Type *ShadowTy = getShadowTy(V: &I);
2376 Value *Addr = I.getPointerOperand();
2377 Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
2378 const Align Alignment = I.getAlign();
2379 if (PropagateShadow) {
2380 std::tie(args&: ShadowPtr, args&: OriginPtr) =
2381 getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
2382 setShadow(V: &I,
2383 SV: IRB.CreateAlignedLoad(Ty: ShadowTy, Ptr: ShadowPtr, Align: Alignment, Name: "_msld"));
2384 } else {
2385 setShadow(V: &I, SV: getCleanShadow(V: &I));
2386 }
2387
2388 if (ClCheckAccessAddress)
2389 insertCheckShadowOf(Val: I.getPointerOperand(), OrigIns: &I);
2390
2391 if (I.isAtomic())
2392 I.setOrdering(addAcquireOrdering(a: I.getOrdering()));
2393
2394 if (MS.TrackOrigins) {
2395 if (PropagateShadow) {
2396 const Align OriginAlignment = std::max(a: kMinOriginAlignment, b: Alignment);
2397 setOrigin(
2398 V: &I, Origin: IRB.CreateAlignedLoad(Ty: MS.OriginTy, Ptr: OriginPtr, Align: OriginAlignment));
2399 } else {
2400 setOrigin(V: &I, Origin: getCleanOrigin());
2401 }
2402 }
2403 }
2404
2405 /// Instrument StoreInst
2406 ///
2407 /// Stores the corresponding shadow and (optionally) origin.
2408 /// Optionally, checks that the store address is fully defined.
2409 void visitStoreInst(StoreInst &I) {
2410 StoreList.push_back(Elt: &I);
2411 if (ClCheckAccessAddress)
2412 insertCheckShadowOf(Val: I.getPointerOperand(), OrigIns: &I);
2413 }
2414
2415 void handleCASOrRMW(Instruction &I) {
2416 assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
2417
2418 IRBuilder<> IRB(&I);
2419 Value *Addr = I.getOperand(i: 0);
2420 Value *Val = I.getOperand(i: 1);
2421 Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, ShadowTy: getShadowTy(V: Val), Alignment: Align(1),
2422 /*isStore*/ true)
2423 .first;
2424
2425 if (ClCheckAccessAddress)
2426 insertCheckShadowOf(Val: Addr, OrigIns: &I);
2427
2428 // Only test the conditional argument of cmpxchg instruction.
2429 // The other argument can potentially be uninitialized, but we can not
2430 // detect this situation reliably without possible false positives.
2431 if (isa<AtomicCmpXchgInst>(Val: I))
2432 insertCheckShadowOf(Val, OrigIns: &I);
2433
2434 IRB.CreateStore(Val: getCleanShadow(V: Val), Ptr: ShadowPtr);
2435
2436 setShadow(V: &I, SV: getCleanShadow(V: &I));
2437 setOrigin(V: &I, Origin: getCleanOrigin());
2438 }
2439
2440 void visitAtomicRMWInst(AtomicRMWInst &I) {
2441 handleCASOrRMW(I);
2442 I.setOrdering(addReleaseOrdering(a: I.getOrdering()));
2443 }
2444
2445 void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
2446 handleCASOrRMW(I);
2447 I.setSuccessOrdering(addReleaseOrdering(a: I.getSuccessOrdering()));
2448 }
2449
2450 /// Generic handler to compute shadow for == and != comparisons.
2451 ///
2452 /// This function is used by handleEqualityComparison and visitSwitchInst.
2453 ///
2454 /// Sometimes the comparison result is known even if some of the bits of the
2455 /// arguments are not.
2456 Value *propagateEqualityComparison(IRBuilder<> &IRB, Value *A, Value *B,
2457 Value *Sa, Value *Sb) {
2458 assert(getShadowTy(A) == Sa->getType());
2459 assert(getShadowTy(B) == Sb->getType());
2460
2461 // Get rid of pointers and vectors of pointers.
2462 // For ints (and vectors of ints), types of A and Sa match,
2463 // and this is a no-op.
2464 A = IRB.CreatePointerCast(V: A, DestTy: Sa->getType());
2465 B = IRB.CreatePointerCast(V: B, DestTy: Sb->getType());
2466
2467 // A == B <==> (C = A^B) == 0
2468 // A != B <==> (C = A^B) != 0
2469 // Sc = Sa | Sb
2470 Value *C = IRB.CreateXor(LHS: A, RHS: B);
2471 Value *Sc = IRB.CreateOr(LHS: Sa, RHS: Sb);
2472 // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now)
2473 // Result is defined if one of the following is true
2474 // * there is a defined 1 bit in C
2475 // * C is fully defined
2476 // Si = !(C & ~Sc) && Sc
2477 Value *Zero = Constant::getNullValue(Ty: Sc->getType());
2478 Value *MinusOne = Constant::getAllOnesValue(Ty: Sc->getType());
2479 Value *LHS = IRB.CreateICmpNE(LHS: Sc, RHS: Zero);
2480 Value *RHS =
2481 IRB.CreateICmpEQ(LHS: IRB.CreateAnd(LHS: IRB.CreateXor(LHS: Sc, RHS: MinusOne), RHS: C), RHS: Zero);
2482 Value *Si = IRB.CreateAnd(LHS, RHS);
2483 Si->setName("_msprop_icmp");
2484
2485 return Si;
2486 }
2487
2488 // Instrument:
2489 // switch i32 %Val, label %else [ i32 0, label %A
2490 // i32 1, label %B
2491 // i32 2, label %C ]
2492 //
2493 // Typically, the switch input value (%Val) is fully initialized.
2494 //
2495 // Sometimes the compiler may convert (icmp + br) into a switch statement.
2496 // MSan allows icmp eq/ne with partly initialized inputs to still result in a
2497 // fully initialized output, if there exists a bit that is initialized in
2498 // both inputs with a differing value. For compatibility, we support this in
2499 // the switch instrumentation as well. Note that this edge case only applies
2500 // if the switch input value does not match *any* of the cases (matching any
2501 // of the cases requires an exact, fully initialized match).
2502 //
2503 // ShadowCases = 0
2504 // | propagateEqualityComparison(Val, 0)
2505 // | propagateEqualityComparison(Val, 1)
2506 // | propagateEqualityComparison(Val, 2))
2507 void visitSwitchInst(SwitchInst &SI) {
2508 IRBuilder<> IRB(&SI);
2509
2510 Value *Val = SI.getCondition();
2511 Value *ShadowVal = getShadow(V: Val);
2512 // TODO: add fast path - if the condition is fully initialized, we know
2513 // there is no UUM, without needing to consider the case values below.
2514
2515 // Some code (e.g., AMDGPUGenMCCodeEmitter.inc) has tens of thousands of
2516 // cases. This results in an extremely long chained expression for MSan's
2517 // switch instrumentation, which can cause the JumpThreadingPass to have a
2518 // stack overflow or excessive runtime. We limit the number of cases
2519 // considered, with the tradeoff of niche false negatives.
2520 // TODO: figure out a better solution.
2521 int casesToConsider = ClSwitchPrecision;
2522
2523 Value *ShadowCases = nullptr;
2524 for (auto Case : SI.cases()) {
2525 if (casesToConsider <= 0)
2526 break;
2527
2528 Value *Comparator = Case.getCaseValue();
2529 // TODO: some simplification is possible when comparing multiple cases
2530 // simultaneously.
2531 Value *ComparisonShadow = propagateEqualityComparison(
2532 IRB, A: Val, B: Comparator, Sa: ShadowVal, Sb: getShadow(V: Comparator));
2533
2534 if (ShadowCases)
2535 ShadowCases = IRB.CreateOr(LHS: ShadowCases, RHS: ComparisonShadow);
2536 else
2537 ShadowCases = ComparisonShadow;
2538
2539 casesToConsider--;
2540 }
2541
2542 if (ShadowCases)
2543 insertCheckShadow(Shadow: ShadowCases, Origin: getOrigin(V: Val), OrigIns: &SI);
2544 }
2545
2546 // Vector manipulation.
2547 void visitExtractElementInst(ExtractElementInst &I) {
2548 insertCheckShadowOf(Val: I.getOperand(i_nocapture: 1), OrigIns: &I);
2549 IRBuilder<> IRB(&I);
2550 setShadow(V: &I, SV: IRB.CreateExtractElement(Vec: getShadow(I: &I, i: 0), Idx: I.getOperand(i_nocapture: 1),
2551 Name: "_msprop"));
2552 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
2553 }
2554
2555 void visitInsertElementInst(InsertElementInst &I) {
2556 insertCheckShadowOf(Val: I.getOperand(i_nocapture: 2), OrigIns: &I);
2557 IRBuilder<> IRB(&I);
2558 auto *Shadow0 = getShadow(I: &I, i: 0);
2559 auto *Shadow1 = getShadow(I: &I, i: 1);
2560 setShadow(V: &I, SV: IRB.CreateInsertElement(Vec: Shadow0, NewElt: Shadow1, Idx: I.getOperand(i_nocapture: 2),
2561 Name: "_msprop"));
2562 setOriginForNaryOp(I);
2563 }
2564
2565 void visitShuffleVectorInst(ShuffleVectorInst &I) {
2566 IRBuilder<> IRB(&I);
2567 auto *Shadow0 = getShadow(I: &I, i: 0);
2568 auto *Shadow1 = getShadow(I: &I, i: 1);
2569 setShadow(V: &I, SV: IRB.CreateShuffleVector(V1: Shadow0, V2: Shadow1, Mask: I.getShuffleMask(),
2570 Name: "_msprop"));
2571 setOriginForNaryOp(I);
2572 }
2573
2574 // Casts.
2575 void visitSExtInst(SExtInst &I) {
2576 IRBuilder<> IRB(&I);
2577 setShadow(V: &I, SV: IRB.CreateSExt(V: getShadow(I: &I, i: 0), DestTy: I.getType(), Name: "_msprop"));
2578 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
2579 }
2580
2581 void visitZExtInst(ZExtInst &I) {
2582 IRBuilder<> IRB(&I);
2583 setShadow(V: &I, SV: IRB.CreateZExt(V: getShadow(I: &I, i: 0), DestTy: I.getType(), Name: "_msprop"));
2584 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
2585 }
2586
2587 void visitTruncInst(TruncInst &I) {
2588 IRBuilder<> IRB(&I);
2589 setShadow(V: &I, SV: IRB.CreateTrunc(V: getShadow(I: &I, i: 0), DestTy: I.getType(), Name: "_msprop"));
2590 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
2591 }
2592
2593 void visitBitCastInst(BitCastInst &I) {
2594 // Special case: if this is the bitcast (there is exactly 1 allowed) between
2595 // a musttail call and a ret, don't instrument. New instructions are not
2596 // allowed after a musttail call.
2597 if (auto *CI = dyn_cast<CallInst>(Val: I.getOperand(i_nocapture: 0)))
2598 if (CI->isMustTailCall())
2599 return;
2600 IRBuilder<> IRB(&I);
2601 setShadow(V: &I, SV: IRB.CreateBitCast(V: getShadow(I: &I, i: 0), DestTy: getShadowTy(V: &I)));
2602 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
2603 }
2604
2605 void visitPtrToIntInst(PtrToIntInst &I) {
2606 IRBuilder<> IRB(&I);
2607 setShadow(V: &I, SV: IRB.CreateIntCast(V: getShadow(I: &I, i: 0), DestTy: getShadowTy(V: &I), isSigned: false,
2608 Name: "_msprop_ptrtoint"));
2609 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
2610 }
2611
2612 void visitIntToPtrInst(IntToPtrInst &I) {
2613 IRBuilder<> IRB(&I);
2614 setShadow(V: &I, SV: IRB.CreateIntCast(V: getShadow(I: &I, i: 0), DestTy: getShadowTy(V: &I), isSigned: false,
2615 Name: "_msprop_inttoptr"));
2616 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
2617 }
2618
2619 void visitFPToSIInst(CastInst &I) { handleShadowOr(I); }
2620 void visitFPToUIInst(CastInst &I) { handleShadowOr(I); }
2621 void visitSIToFPInst(CastInst &I) { handleShadowOr(I); }
2622 void visitUIToFPInst(CastInst &I) { handleShadowOr(I); }
2623 void visitFPExtInst(CastInst &I) { handleShadowOr(I); }
2624 void visitFPTruncInst(CastInst &I) { handleShadowOr(I); }
2625
2626 /// Generic handler to compute shadow for bitwise AND.
2627 ///
2628 /// This is used by 'visitAnd' but also as a primitive for other handlers.
2629 ///
2630 /// This code is precise: it implements the rule that "And" of an initialized
2631 /// zero bit always results in an initialized value:
2632 // 1&1 => 1; 0&1 => 0; p&1 => p;
2633 // 1&0 => 0; 0&0 => 0; p&0 => 0;
2634 // 1&p => p; 0&p => 0; p&p => p;
2635 //
2636 // S = (S1 & S2) | (V1 & S2) | (S1 & V2)
2637 Value *handleBitwiseAnd(IRBuilder<> &IRB, Value *V1, Value *V2, Value *S1,
2638 Value *S2) {
2639 // "The two arguments to the ‘and’ instruction must be integer or vector
2640 // of integer values. Both arguments must have identical types."
2641 //
2642 // We enforce this condition for all callers to handleBitwiseAnd(); callers
2643 // with non-integer types should call CreateAppToShadowCast() themselves.
2644 assert(V1->getType()->isIntOrIntVectorTy());
2645 assert(V1->getType() == V2->getType());
2646
2647 // Conveniently, getShadowTy() of Int/IntVector returns the original type.
2648 assert(V1->getType() == S1->getType());
2649 assert(V2->getType() == S2->getType());
2650
2651 Value *S1S2 = IRB.CreateAnd(LHS: S1, RHS: S2);
2652 Value *V1S2 = IRB.CreateAnd(LHS: V1, RHS: S2);
2653 Value *S1V2 = IRB.CreateAnd(LHS: S1, RHS: V2);
2654
2655 return IRB.CreateOr(Ops: {S1S2, V1S2, S1V2});
2656 }
2657
2658 /// Handler for bitwise AND operator.
2659 void visitAnd(BinaryOperator &I) {
2660 IRBuilder<> IRB(&I);
2661 Value *V1 = I.getOperand(i_nocapture: 0);
2662 Value *V2 = I.getOperand(i_nocapture: 1);
2663 Value *S1 = getShadow(I: &I, i: 0);
2664 Value *S2 = getShadow(I: &I, i: 1);
2665
2666 Value *OutShadow = handleBitwiseAnd(IRB, V1, V2, S1, S2);
2667
2668 setShadow(V: &I, SV: OutShadow);
2669 setOriginForNaryOp(I);
2670 }
2671
2672 void visitOr(BinaryOperator &I) {
2673 IRBuilder<> IRB(&I);
2674 // "Or" of 1 and a poisoned value results in unpoisoned value:
2675 // 1|1 => 1; 0|1 => 1; p|1 => 1;
2676 // 1|0 => 1; 0|0 => 0; p|0 => p;
2677 // 1|p => 1; 0|p => p; p|p => p;
2678 //
2679 // S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
2680 //
2681 // If the "disjoint OR" property is violated, the result is poison, and
2682 // hence the entire shadow is uninitialized:
2683 // S = S | SignExt(V1 & V2 != 0)
2684 Value *S1 = getShadow(I: &I, i: 0);
2685 Value *S2 = getShadow(I: &I, i: 1);
2686 Value *V1 = I.getOperand(i_nocapture: 0);
2687 Value *V2 = I.getOperand(i_nocapture: 1);
2688
2689 // "The two arguments to the ‘or’ instruction must be integer or vector
2690 // of integer values. Both arguments must have identical types."
2691 assert(V1->getType()->isIntOrIntVectorTy());
2692 assert(V1->getType() == V2->getType());
2693
2694 // Conveniently, getShadowTy() of Int/IntVector returns the original type.
2695 assert(V1->getType() == S1->getType());
2696 assert(V2->getType() == S2->getType());
2697
2698 Value *NotV1 = IRB.CreateNot(V: V1);
2699 Value *NotV2 = IRB.CreateNot(V: V2);
2700
2701 Value *S1S2 = IRB.CreateAnd(LHS: S1, RHS: S2);
2702 Value *S2NotV1 = IRB.CreateAnd(LHS: NotV1, RHS: S2);
2703 Value *S1NotV2 = IRB.CreateAnd(LHS: S1, RHS: NotV2);
2704
2705 Value *S = IRB.CreateOr(Ops: {S1S2, S2NotV1, S1NotV2});
2706
2707 if (ClPreciseDisjointOr && cast<PossiblyDisjointInst>(Val: &I)->isDisjoint()) {
2708 Value *V1V2 = IRB.CreateAnd(LHS: V1, RHS: V2);
2709 Value *DisjointOrShadow = IRB.CreateSExt(
2710 V: IRB.CreateICmpNE(LHS: V1V2, RHS: getCleanShadow(V: V1V2)), DestTy: V1V2->getType());
2711 S = IRB.CreateOr(LHS: S, RHS: DisjointOrShadow, Name: "_ms_disjoint");
2712 }
2713
2714 setShadow(V: &I, SV: S);
2715 setOriginForNaryOp(I);
2716 }
2717
2718 /// Default propagation of shadow and/or origin.
2719 ///
2720 /// This class implements the general case of shadow propagation, used in all
2721 /// cases where we don't know and/or don't care about what the operation
2722 /// actually does. It converts all input shadow values to a common type
2723 /// (extending or truncating as necessary), and bitwise OR's them.
2724 ///
2725 /// This is much cheaper than inserting checks (i.e. requiring inputs to be
2726 /// fully initialized), and less prone to false positives.
2727 ///
2728 /// This class also implements the general case of origin propagation. For a
2729 /// Nary operation, result origin is set to the origin of an argument that is
2730 /// not entirely initialized. If there is more than one such arguments, the
2731 /// rightmost of them is picked. It does not matter which one is picked if all
2732 /// arguments are initialized.
2733 template <bool CombineShadow> class Combiner {
2734 Value *Shadow = nullptr;
2735 Value *Origin = nullptr;
2736 IRBuilder<> &IRB;
2737 MemorySanitizerVisitor *MSV;
2738
2739 public:
2740 Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB)
2741 : IRB(IRB), MSV(MSV) {}
2742
2743 /// Add a pair of shadow and origin values to the mix.
2744 Combiner &Add(Value *OpShadow, Value *OpOrigin) {
2745 if (CombineShadow) {
2746 assert(OpShadow);
2747 if (!Shadow)
2748 Shadow = OpShadow;
2749 else {
2750 OpShadow = MSV->CreateShadowCast(IRB, V: OpShadow, dstTy: Shadow->getType());
2751 Shadow = IRB.CreateOr(LHS: Shadow, RHS: OpShadow, Name: "_msprop");
2752 }
2753 }
2754
2755 if (MSV->MS.TrackOrigins) {
2756 assert(OpOrigin);
2757 if (!Origin) {
2758 Origin = OpOrigin;
2759 } else {
2760 Constant *ConstOrigin = dyn_cast<Constant>(Val: OpOrigin);
2761 // No point in adding something that might result in 0 origin value.
2762 if (!ConstOrigin || !ConstOrigin->isNullValue()) {
2763 Value *Cond = MSV->convertToBool(V: OpShadow, IRB);
2764 Origin = IRB.CreateSelect(C: Cond, True: OpOrigin, False: Origin);
2765 }
2766 }
2767 }
2768 return *this;
2769 }
2770
2771 /// Add an application value to the mix.
2772 Combiner &Add(Value *V) {
2773 Value *OpShadow = MSV->getShadow(V);
2774 Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : nullptr;
2775 return Add(OpShadow, OpOrigin);
2776 }
2777
2778 /// Set the current combined values as the given instruction's shadow
2779 /// and origin.
2780 void Done(Instruction *I) {
2781 if (CombineShadow) {
2782 assert(Shadow);
2783 Shadow = MSV->CreateShadowCast(IRB, V: Shadow, dstTy: MSV->getShadowTy(V: I));
2784 MSV->setShadow(V: I, SV: Shadow);
2785 }
2786 if (MSV->MS.TrackOrigins) {
2787 assert(Origin);
2788 MSV->setOrigin(V: I, Origin);
2789 }
2790 }
2791
2792 /// Store the current combined value at the specified origin
2793 /// location.
2794 void DoneAndStoreOrigin(TypeSize TS, Value *OriginPtr) {
2795 if (MSV->MS.TrackOrigins) {
2796 assert(Origin);
2797 MSV->paintOrigin(IRB, Origin, OriginPtr, TS, Alignment: kMinOriginAlignment);
2798 }
2799 }
2800 };
2801
2802 using ShadowAndOriginCombiner = Combiner<true>;
2803 using OriginCombiner = Combiner<false>;
2804
2805 /// Propagate origin for arbitrary operation.
2806 void setOriginForNaryOp(Instruction &I) {
2807 if (!MS.TrackOrigins)
2808 return;
2809 IRBuilder<> IRB(&I);
2810 OriginCombiner OC(this, IRB);
2811 for (Use &Op : I.operands())
2812 OC.Add(V: Op.get());
2813 OC.Done(I: &I);
2814 }
2815
2816 size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) {
2817 assert(!(Ty->isVectorTy() && Ty->getScalarType()->isPointerTy()) &&
2818 "Vector of pointers is not a valid shadow type");
2819 return Ty->isVectorTy() ? cast<FixedVectorType>(Val: Ty)->getNumElements() *
2820 Ty->getScalarSizeInBits()
2821 : Ty->getPrimitiveSizeInBits();
2822 }
2823
2824 /// Cast between two shadow types, extending or truncating as
2825 /// necessary.
2826 Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy,
2827 bool Signed = false) {
2828 Type *srcTy = V->getType();
2829 if (srcTy == dstTy)
2830 return V;
2831 size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(Ty: srcTy);
2832 size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(Ty: dstTy);
2833 if (srcSizeInBits > 1 && dstSizeInBits == 1)
2834 return IRB.CreateICmpNE(LHS: V, RHS: getCleanShadow(V));
2835
2836 if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
2837 return IRB.CreateIntCast(V, DestTy: dstTy, isSigned: Signed);
2838 if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
2839 cast<VectorType>(Val: dstTy)->getElementCount() ==
2840 cast<VectorType>(Val: srcTy)->getElementCount())
2841 return IRB.CreateIntCast(V, DestTy: dstTy, isSigned: Signed);
2842 Value *V1 = IRB.CreateBitCast(V, DestTy: Type::getIntNTy(C&: *MS.C, N: srcSizeInBits));
2843 Value *V2 =
2844 IRB.CreateIntCast(V: V1, DestTy: Type::getIntNTy(C&: *MS.C, N: dstSizeInBits), isSigned: Signed);
2845 return IRB.CreateBitCast(V: V2, DestTy: dstTy);
2846 // TODO: handle struct types.
2847 }
2848
2849 /// Cast an application value to the type of its own shadow.
2850 Value *CreateAppToShadowCast(IRBuilder<> &IRB, Value *V) {
2851 Type *ShadowTy = getShadowTy(V);
2852 if (V->getType() == ShadowTy)
2853 return V;
2854 if (V->getType()->isPtrOrPtrVectorTy())
2855 return IRB.CreatePtrToInt(V, DestTy: ShadowTy);
2856 else
2857 return IRB.CreateBitCast(V, DestTy: ShadowTy);
2858 }
2859
2860 /// Propagate shadow for arbitrary operation.
2861 void handleShadowOr(Instruction &I) {
2862 IRBuilder<> IRB(&I);
2863 ShadowAndOriginCombiner SC(this, IRB);
2864 for (Use &Op : I.operands())
2865 SC.Add(V: Op.get());
2866 SC.Done(I: &I);
2867 }
2868
2869 // Perform a bitwise OR on the horizontal pairs (or other specified grouping)
2870 // of elements.
2871 //
2872 // For example, suppose we have:
2873 // VectorA: <a0, a1, a2, a3, a4, a5>
2874 // VectorB: <b0, b1, b2, b3, b4, b5>
2875 // ReductionFactor: 3
2876 // Shards: 1
2877 // The output would be:
2878 // <a0|a1|a2, a3|a4|a5, b0|b1|b2, b3|b4|b5>
2879 //
2880 // If we have:
2881 // VectorA: <a0, a1, a2, a3, a4, a5, a6, a7>
2882 // VectorB: <b0, b1, b2, b3, b4, b5, b6, b7>
2883 // ReductionFactor: 2
2884 // Shards: 2
2885 // then a and be each have 2 "shards", resulting in the output being
2886 // interleaved:
2887 // <a0|a1, a2|a3, b0|b1, b2|b3, a4|a5, a6|a7, b4|b5, b6|b7>
2888 //
2889 // This is convenient for instrumenting horizontal add/sub.
2890 // For bitwise OR on "vertical" pairs, see maybeHandleSimpleNomemIntrinsic().
2891 Value *horizontalReduce(IntrinsicInst &I, unsigned ReductionFactor,
2892 unsigned Shards, Value *VectorA, Value *VectorB) {
2893 assert(isa<FixedVectorType>(VectorA->getType()));
2894 unsigned NumElems =
2895 cast<FixedVectorType>(Val: VectorA->getType())->getNumElements();
2896
2897 [[maybe_unused]] unsigned TotalNumElems = NumElems;
2898 if (VectorB) {
2899 assert(VectorA->getType() == VectorB->getType());
2900 TotalNumElems *= 2;
2901 }
2902
2903 assert(NumElems % (ReductionFactor * Shards) == 0);
2904
2905 Value *Or = nullptr;
2906
2907 IRBuilder<> IRB(&I);
2908 for (unsigned i = 0; i < ReductionFactor; i++) {
2909 SmallVector<int, 16> Mask;
2910
2911 for (unsigned j = 0; j < Shards; j++) {
2912 unsigned Offset = NumElems / Shards * j;
2913
2914 for (unsigned X = 0; X < NumElems / Shards; X += ReductionFactor)
2915 Mask.push_back(Elt: Offset + X + i);
2916
2917 if (VectorB) {
2918 for (unsigned X = 0; X < NumElems / Shards; X += ReductionFactor)
2919 Mask.push_back(Elt: NumElems + Offset + X + i);
2920 }
2921 }
2922
2923 Value *Masked;
2924 if (VectorB)
2925 Masked = IRB.CreateShuffleVector(V1: VectorA, V2: VectorB, Mask);
2926 else
2927 Masked = IRB.CreateShuffleVector(V: VectorA, Mask);
2928
2929 if (Or)
2930 Or = IRB.CreateOr(LHS: Or, RHS: Masked);
2931 else
2932 Or = Masked;
2933 }
2934
2935 return Or;
2936 }
2937
2938 /// Propagate shadow for 1- or 2-vector intrinsics that combine adjacent
2939 /// fields.
2940 ///
2941 /// e.g., <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16>)
2942 /// <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8>, <16 x i8>)
2943 void handlePairwiseShadowOrIntrinsic(IntrinsicInst &I, unsigned Shards) {
2944 assert(I.arg_size() == 1 || I.arg_size() == 2);
2945
2946 assert(I.getType()->isVectorTy());
2947 assert(I.getArgOperand(0)->getType()->isVectorTy());
2948
2949 [[maybe_unused]] FixedVectorType *ParamType =
2950 cast<FixedVectorType>(Val: I.getArgOperand(i: 0)->getType());
2951 assert((I.arg_size() != 2) ||
2952 (ParamType == cast<FixedVectorType>(I.getArgOperand(1)->getType())));
2953 [[maybe_unused]] FixedVectorType *ReturnType =
2954 cast<FixedVectorType>(Val: I.getType());
2955 assert(ParamType->getNumElements() * I.arg_size() ==
2956 2 * ReturnType->getNumElements());
2957
2958 IRBuilder<> IRB(&I);
2959
2960 // Horizontal OR of shadow
2961 Value *FirstArgShadow = getShadow(I: &I, i: 0);
2962 Value *SecondArgShadow = nullptr;
2963 if (I.arg_size() == 2)
2964 SecondArgShadow = getShadow(I: &I, i: 1);
2965
2966 Value *OrShadow = horizontalReduce(I, /*ReductionFactor=*/2, Shards,
2967 VectorA: FirstArgShadow, VectorB: SecondArgShadow);
2968
2969 OrShadow = CreateShadowCast(IRB, V: OrShadow, dstTy: getShadowTy(V: &I));
2970
2971 setShadow(V: &I, SV: OrShadow);
2972 setOriginForNaryOp(I);
2973 }
2974
2975 /// Propagate shadow for 1- or 2-vector intrinsics that combine adjacent
2976 /// fields, with the parameters reinterpreted to have elements of a specified
2977 /// width. For example:
2978 /// @llvm.x86.ssse3.phadd.w(<1 x i64> [[VAR1]], <1 x i64> [[VAR2]])
2979 /// conceptually operates on
2980 /// (<4 x i16> [[VAR1]], <4 x i16> [[VAR2]])
2981 /// and can be handled with ReinterpretElemWidth == 16.
2982 void handlePairwiseShadowOrIntrinsic(IntrinsicInst &I, unsigned Shards,
2983 int ReinterpretElemWidth) {
2984 assert(I.arg_size() == 1 || I.arg_size() == 2);
2985
2986 assert(I.getType()->isVectorTy());
2987 assert(I.getArgOperand(0)->getType()->isVectorTy());
2988
2989 FixedVectorType *ParamType =
2990 cast<FixedVectorType>(Val: I.getArgOperand(i: 0)->getType());
2991 assert((I.arg_size() != 2) ||
2992 (ParamType == cast<FixedVectorType>(I.getArgOperand(1)->getType())));
2993
2994 [[maybe_unused]] FixedVectorType *ReturnType =
2995 cast<FixedVectorType>(Val: I.getType());
2996 assert(ParamType->getNumElements() * I.arg_size() ==
2997 2 * ReturnType->getNumElements());
2998
2999 IRBuilder<> IRB(&I);
3000
3001 FixedVectorType *ReinterpretShadowTy = nullptr;
3002 assert(isAligned(Align(ReinterpretElemWidth),
3003 ParamType->getPrimitiveSizeInBits()));
3004 ReinterpretShadowTy = FixedVectorType::get(
3005 ElementType: IRB.getIntNTy(N: ReinterpretElemWidth),
3006 NumElts: ParamType->getPrimitiveSizeInBits() / ReinterpretElemWidth);
3007
3008 // Horizontal OR of shadow
3009 Value *FirstArgShadow = getShadow(I: &I, i: 0);
3010 FirstArgShadow = IRB.CreateBitCast(V: FirstArgShadow, DestTy: ReinterpretShadowTy);
3011
3012 // If we had two parameters each with an odd number of elements, the total
3013 // number of elements is even, but we have never seen this in extant
3014 // instruction sets, so we enforce that each parameter must have an even
3015 // number of elements.
3016 assert(isAligned(
3017 Align(2),
3018 cast<FixedVectorType>(FirstArgShadow->getType())->getNumElements()));
3019
3020 Value *SecondArgShadow = nullptr;
3021 if (I.arg_size() == 2) {
3022 SecondArgShadow = getShadow(I: &I, i: 1);
3023 SecondArgShadow = IRB.CreateBitCast(V: SecondArgShadow, DestTy: ReinterpretShadowTy);
3024 }
3025
3026 Value *OrShadow = horizontalReduce(I, /*ReductionFactor=*/2, Shards,
3027 VectorA: FirstArgShadow, VectorB: SecondArgShadow);
3028
3029 OrShadow = CreateShadowCast(IRB, V: OrShadow, dstTy: getShadowTy(V: &I));
3030
3031 setShadow(V: &I, SV: OrShadow);
3032 setOriginForNaryOp(I);
3033 }
3034
3035 void visitFNeg(UnaryOperator &I) { handleShadowOr(I); }
3036
3037 // Handle multiplication by constant.
3038 //
3039 // Handle a special case of multiplication by constant that may have one or
3040 // more zeros in the lower bits. This makes corresponding number of lower bits
3041 // of the result zero as well. We model it by shifting the other operand
3042 // shadow left by the required number of bits. Effectively, we transform
3043 // (X * (A * 2**B)) to ((X << B) * A) and instrument (X << B) as (Sx << B).
3044 // We use multiplication by 2**N instead of shift to cover the case of
3045 // multiplication by 0, which may occur in some elements of a vector operand.
3046 void handleMulByConstant(BinaryOperator &I, Constant *ConstArg,
3047 Value *OtherArg) {
3048 Constant *ShadowMul;
3049 Type *Ty = ConstArg->getType();
3050 if (auto *VTy = dyn_cast<VectorType>(Val: Ty)) {
3051 unsigned NumElements = cast<FixedVectorType>(Val: VTy)->getNumElements();
3052 Type *EltTy = VTy->getElementType();
3053 SmallVector<Constant *, 16> Elements;
3054 for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
3055 if (ConstantInt *Elt =
3056 dyn_cast<ConstantInt>(Val: ConstArg->getAggregateElement(Elt: Idx))) {
3057 const APInt &V = Elt->getValue();
3058 APInt V2 = APInt(V.getBitWidth(), 1) << V.countr_zero();
3059 Elements.push_back(Elt: ConstantInt::get(Ty: EltTy, V: V2));
3060 } else {
3061 Elements.push_back(Elt: ConstantInt::get(Ty: EltTy, V: 1));
3062 }
3063 }
3064 ShadowMul = ConstantVector::get(V: Elements);
3065 } else {
3066 if (ConstantInt *Elt = dyn_cast<ConstantInt>(Val: ConstArg)) {
3067 const APInt &V = Elt->getValue();
3068 APInt V2 = APInt(V.getBitWidth(), 1) << V.countr_zero();
3069 ShadowMul = ConstantInt::get(Ty, V: V2);
3070 } else {
3071 ShadowMul = ConstantInt::get(Ty, V: 1);
3072 }
3073 }
3074
3075 IRBuilder<> IRB(&I);
3076 setShadow(V: &I,
3077 SV: IRB.CreateMul(LHS: getShadow(V: OtherArg), RHS: ShadowMul, Name: "msprop_mul_cst"));
3078 setOrigin(V: &I, Origin: getOrigin(V: OtherArg));
3079 }
3080
3081 void visitMul(BinaryOperator &I) {
3082 Constant *constOp0 = dyn_cast<Constant>(Val: I.getOperand(i_nocapture: 0));
3083 Constant *constOp1 = dyn_cast<Constant>(Val: I.getOperand(i_nocapture: 1));
3084 if (constOp0 && !constOp1)
3085 handleMulByConstant(I, ConstArg: constOp0, OtherArg: I.getOperand(i_nocapture: 1));
3086 else if (constOp1 && !constOp0)
3087 handleMulByConstant(I, ConstArg: constOp1, OtherArg: I.getOperand(i_nocapture: 0));
3088 else
3089 handleShadowOr(I);
3090 }
3091
3092 void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
3093 void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
3094 void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
3095 void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
3096 void visitSub(BinaryOperator &I) { handleShadowOr(I); }
3097 void visitXor(BinaryOperator &I) { handleShadowOr(I); }
3098
3099 void handleIntegerDiv(Instruction &I) {
3100 IRBuilder<> IRB(&I);
3101 // Strict on the second argument.
3102 insertCheckShadowOf(Val: I.getOperand(i: 1), OrigIns: &I);
3103 setShadow(V: &I, SV: getShadow(I: &I, i: 0));
3104 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
3105 }
3106
3107 void visitUDiv(BinaryOperator &I) { handleIntegerDiv(I); }
3108 void visitSDiv(BinaryOperator &I) { handleIntegerDiv(I); }
3109 void visitURem(BinaryOperator &I) { handleIntegerDiv(I); }
3110 void visitSRem(BinaryOperator &I) { handleIntegerDiv(I); }
3111
3112 // Floating point division is side-effect free. We can not require that the
3113 // divisor is fully initialized and must propagate shadow. See PR37523.
3114 void visitFDiv(BinaryOperator &I) { handleShadowOr(I); }
3115 void visitFRem(BinaryOperator &I) { handleShadowOr(I); }
3116
3117 /// Instrument == and != comparisons.
3118 ///
3119 /// Sometimes the comparison result is known even if some of the bits of the
3120 /// arguments are not.
3121 void handleEqualityComparison(ICmpInst &I) {
3122 IRBuilder<> IRB(&I);
3123 Value *A = I.getOperand(i_nocapture: 0);
3124 Value *B = I.getOperand(i_nocapture: 1);
3125 Value *Sa = getShadow(V: A);
3126 Value *Sb = getShadow(V: B);
3127
3128 Value *Si = propagateEqualityComparison(IRB, A, B, Sa, Sb);
3129
3130 setShadow(V: &I, SV: Si);
3131 setOriginForNaryOp(I);
3132 }
3133
3134 /// Instrument relational comparisons.
3135 ///
3136 /// This function does exact shadow propagation for all relational
3137 /// comparisons of integers, pointers and vectors of those.
3138 /// FIXME: output seems suboptimal when one of the operands is a constant
3139 void handleRelationalComparisonExact(ICmpInst &I) {
3140 IRBuilder<> IRB(&I);
3141 Value *A = I.getOperand(i_nocapture: 0);
3142 Value *B = I.getOperand(i_nocapture: 1);
3143 Value *Sa = getShadow(V: A);
3144 Value *Sb = getShadow(V: B);
3145
3146 // Get rid of pointers and vectors of pointers.
3147 // For ints (and vectors of ints), types of A and Sa match,
3148 // and this is a no-op.
3149 A = IRB.CreatePointerCast(V: A, DestTy: Sa->getType());
3150 B = IRB.CreatePointerCast(V: B, DestTy: Sb->getType());
3151
3152 // Let [a0, a1] be the interval of possible values of A, taking into account
3153 // its undefined bits. Let [b0, b1] be the interval of possible values of B.
3154 // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0).
3155 bool IsSigned = I.isSigned();
3156
3157 auto GetMinMaxUnsigned = [&](Value *V, Value *S) {
3158 if (IsSigned) {
3159 // Sign-flip to map from signed range to unsigned range. Relation A vs B
3160 // should be preserved, if checked with `getUnsignedPredicate()`.
3161 // Relationship between Amin, Amax, Bmin, Bmax also will not be
3162 // affected, as they are created by effectively adding/substructing from
3163 // A (or B) a value, derived from shadow, with no overflow, either
3164 // before or after sign flip.
3165 APInt MinVal =
3166 APInt::getSignedMinValue(numBits: V->getType()->getScalarSizeInBits());
3167 V = IRB.CreateXor(LHS: V, RHS: ConstantInt::get(Ty: V->getType(), V: MinVal));
3168 }
3169 // Minimize undefined bits.
3170 Value *Min = IRB.CreateAnd(LHS: V, RHS: IRB.CreateNot(V: S));
3171 Value *Max = IRB.CreateOr(LHS: V, RHS: S);
3172 return std::make_pair(x&: Min, y&: Max);
3173 };
3174
3175 auto [Amin, Amax] = GetMinMaxUnsigned(A, Sa);
3176 auto [Bmin, Bmax] = GetMinMaxUnsigned(B, Sb);
3177 Value *S1 = IRB.CreateICmp(P: I.getUnsignedPredicate(), LHS: Amin, RHS: Bmax);
3178 Value *S2 = IRB.CreateICmp(P: I.getUnsignedPredicate(), LHS: Amax, RHS: Bmin);
3179
3180 Value *Si = IRB.CreateXor(LHS: S1, RHS: S2);
3181 setShadow(V: &I, SV: Si);
3182 setOriginForNaryOp(I);
3183 }
3184
3185 /// Instrument signed relational comparisons.
3186 ///
3187 /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest
3188 /// bit of the shadow. Everything else is delegated to handleShadowOr().
3189 void handleSignedRelationalComparison(ICmpInst &I) {
3190 Constant *constOp;
3191 Value *op = nullptr;
3192 CmpInst::Predicate pre;
3193 if ((constOp = dyn_cast<Constant>(Val: I.getOperand(i_nocapture: 1)))) {
3194 op = I.getOperand(i_nocapture: 0);
3195 pre = I.getPredicate();
3196 } else if ((constOp = dyn_cast<Constant>(Val: I.getOperand(i_nocapture: 0)))) {
3197 op = I.getOperand(i_nocapture: 1);
3198 pre = I.getSwappedPredicate();
3199 } else {
3200 handleShadowOr(I);
3201 return;
3202 }
3203
3204 if ((constOp->isNullValue() &&
3205 (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) ||
3206 (constOp->isAllOnesValue() &&
3207 (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) {
3208 IRBuilder<> IRB(&I);
3209 Value *Shadow = IRB.CreateICmpSLT(LHS: getShadow(V: op), RHS: getCleanShadow(V: op),
3210 Name: "_msprop_icmp_s");
3211 setShadow(V: &I, SV: Shadow);
3212 setOrigin(V: &I, Origin: getOrigin(V: op));
3213 } else {
3214 handleShadowOr(I);
3215 }
3216 }
3217
3218 void visitICmpInst(ICmpInst &I) {
3219 if (!ClHandleICmp) {
3220 handleShadowOr(I);
3221 return;
3222 }
3223 if (I.isEquality()) {
3224 handleEqualityComparison(I);
3225 return;
3226 }
3227
3228 assert(I.isRelational());
3229 if (ClHandleICmpExact) {
3230 handleRelationalComparisonExact(I);
3231 return;
3232 }
3233 if (I.isSigned()) {
3234 handleSignedRelationalComparison(I);
3235 return;
3236 }
3237
3238 assert(I.isUnsigned());
3239 if ((isa<Constant>(Val: I.getOperand(i_nocapture: 0)) || isa<Constant>(Val: I.getOperand(i_nocapture: 1)))) {
3240 handleRelationalComparisonExact(I);
3241 return;
3242 }
3243
3244 handleShadowOr(I);
3245 }
3246
3247 void visitFCmpInst(FCmpInst &I) { handleShadowOr(I); }
3248
3249 void handleShift(BinaryOperator &I) {
3250 IRBuilder<> IRB(&I);
3251 // If any of the S2 bits are poisoned, the whole thing is poisoned.
3252 // Otherwise perform the same shift on S1.
3253 Value *S1 = getShadow(I: &I, i: 0);
3254 Value *S2 = getShadow(I: &I, i: 1);
3255 Value *S2Conv =
3256 IRB.CreateSExt(V: IRB.CreateICmpNE(LHS: S2, RHS: getCleanShadow(V: S2)), DestTy: S2->getType());
3257 Value *V2 = I.getOperand(i_nocapture: 1);
3258 Value *Shift = IRB.CreateBinOp(Opc: I.getOpcode(), LHS: S1, RHS: V2);
3259 setShadow(V: &I, SV: IRB.CreateOr(LHS: Shift, RHS: S2Conv));
3260 setOriginForNaryOp(I);
3261 }
3262
3263 void visitShl(BinaryOperator &I) { handleShift(I); }
3264 void visitAShr(BinaryOperator &I) { handleShift(I); }
3265 void visitLShr(BinaryOperator &I) { handleShift(I); }
3266
3267 void handleFunnelShift(IntrinsicInst &I) {
3268 IRBuilder<> IRB(&I);
3269 // If any of the S2 bits are poisoned, the whole thing is poisoned.
3270 // Otherwise perform the same shift on S0 and S1.
3271 Value *S0 = getShadow(I: &I, i: 0);
3272 Value *S1 = getShadow(I: &I, i: 1);
3273 Value *S2 = getShadow(I: &I, i: 2);
3274 Value *S2Conv =
3275 IRB.CreateSExt(V: IRB.CreateICmpNE(LHS: S2, RHS: getCleanShadow(V: S2)), DestTy: S2->getType());
3276 Value *V2 = I.getOperand(i_nocapture: 2);
3277 Value *Shift = IRB.CreateIntrinsic(ID: I.getIntrinsicID(), Types: S2Conv->getType(),
3278 Args: {S0, S1, V2});
3279 setShadow(V: &I, SV: IRB.CreateOr(LHS: Shift, RHS: S2Conv));
3280 setOriginForNaryOp(I);
3281 }
3282
3283 /// Instrument llvm.memmove
3284 ///
3285 /// At this point we don't know if llvm.memmove will be inlined or not.
3286 /// If we don't instrument it and it gets inlined,
3287 /// our interceptor will not kick in and we will lose the memmove.
3288 /// If we instrument the call here, but it does not get inlined,
3289 /// we will memmove the shadow twice: which is bad in case
3290 /// of overlapping regions. So, we simply lower the intrinsic to a call.
3291 ///
3292 /// Similar situation exists for memcpy and memset.
3293 void visitMemMoveInst(MemMoveInst &I) {
3294 getShadow(V: I.getArgOperand(i: 1)); // Ensure shadow initialized
3295 IRBuilder<> IRB(&I);
3296 IRB.CreateCall(Callee: MS.MemmoveFn,
3297 Args: {I.getArgOperand(i: 0), I.getArgOperand(i: 1),
3298 IRB.CreateIntCast(V: I.getArgOperand(i: 2), DestTy: MS.IntptrTy, isSigned: false)});
3299 I.eraseFromParent();
3300 }
3301
3302 /// Instrument memcpy
3303 ///
3304 /// Similar to memmove: avoid copying shadow twice. This is somewhat
3305 /// unfortunate as it may slowdown small constant memcpys.
3306 /// FIXME: consider doing manual inline for small constant sizes and proper
3307 /// alignment.
3308 ///
3309 /// Note: This also handles memcpy.inline, which promises no calls to external
3310 /// functions as an optimization. However, with instrumentation enabled this
3311 /// is difficult to promise; additionally, we know that the MSan runtime
3312 /// exists and provides __msan_memcpy(). Therefore, we assume that with
3313 /// instrumentation it's safe to turn memcpy.inline into a call to
3314 /// __msan_memcpy(). Should this be wrong, such as when implementing memcpy()
3315 /// itself, instrumentation should be disabled with the no_sanitize attribute.
3316 void visitMemCpyInst(MemCpyInst &I) {
3317 getShadow(V: I.getArgOperand(i: 1)); // Ensure shadow initialized
3318 IRBuilder<> IRB(&I);
3319 IRB.CreateCall(Callee: MS.MemcpyFn,
3320 Args: {I.getArgOperand(i: 0), I.getArgOperand(i: 1),
3321 IRB.CreateIntCast(V: I.getArgOperand(i: 2), DestTy: MS.IntptrTy, isSigned: false)});
3322 I.eraseFromParent();
3323 }
3324
3325 // Same as memcpy.
3326 void visitMemSetInst(MemSetInst &I) {
3327 IRBuilder<> IRB(&I);
3328 IRB.CreateCall(
3329 Callee: MS.MemsetFn,
3330 Args: {I.getArgOperand(i: 0),
3331 IRB.CreateIntCast(V: I.getArgOperand(i: 1), DestTy: IRB.getInt32Ty(), isSigned: false),
3332 IRB.CreateIntCast(V: I.getArgOperand(i: 2), DestTy: MS.IntptrTy, isSigned: false)});
3333 I.eraseFromParent();
3334 }
3335
3336 void visitVAStartInst(VAStartInst &I) { VAHelper->visitVAStartInst(I); }
3337
3338 void visitVACopyInst(VACopyInst &I) { VAHelper->visitVACopyInst(I); }
3339
3340 /// Handle vector store-like intrinsics.
3341 ///
3342 /// Instrument intrinsics that look like a simple SIMD store: writes memory,
3343 /// has 1 pointer argument and 1 vector argument, returns void.
3344 bool handleVectorStoreIntrinsic(IntrinsicInst &I) {
3345 assert(I.arg_size() == 2);
3346
3347 IRBuilder<> IRB(&I);
3348 Value *Addr = I.getArgOperand(i: 0);
3349 Value *Shadow = getShadow(I: &I, i: 1);
3350 Value *ShadowPtr, *OriginPtr;
3351
3352 // We don't know the pointer alignment (could be unaligned SSE store!).
3353 // Have to assume to worst case.
3354 std::tie(args&: ShadowPtr, args&: OriginPtr) = getShadowOriginPtr(
3355 Addr, IRB, ShadowTy: Shadow->getType(), Alignment: Align(1), /*isStore*/ true);
3356 IRB.CreateAlignedStore(Val: Shadow, Ptr: ShadowPtr, Align: Align(1));
3357
3358 if (ClCheckAccessAddress)
3359 insertCheckShadowOf(Val: Addr, OrigIns: &I);
3360
3361 // FIXME: factor out common code from materializeStores
3362 if (MS.TrackOrigins)
3363 IRB.CreateStore(Val: getOrigin(I: &I, i: 1), Ptr: OriginPtr);
3364 return true;
3365 }
3366
3367 /// Handle vector load-like intrinsics.
3368 ///
3369 /// Instrument intrinsics that look like a simple SIMD load: reads memory,
3370 /// has 1 pointer argument, returns a vector.
3371 bool handleVectorLoadIntrinsic(IntrinsicInst &I) {
3372 assert(I.arg_size() == 1);
3373
3374 IRBuilder<> IRB(&I);
3375 Value *Addr = I.getArgOperand(i: 0);
3376
3377 Type *ShadowTy = getShadowTy(V: &I);
3378 Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
3379 if (PropagateShadow) {
3380 // We don't know the pointer alignment (could be unaligned SSE load!).
3381 // Have to assume to worst case.
3382 const Align Alignment = Align(1);
3383 std::tie(args&: ShadowPtr, args&: OriginPtr) =
3384 getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
3385 setShadow(V: &I,
3386 SV: IRB.CreateAlignedLoad(Ty: ShadowTy, Ptr: ShadowPtr, Align: Alignment, Name: "_msld"));
3387 } else {
3388 setShadow(V: &I, SV: getCleanShadow(V: &I));
3389 }
3390
3391 if (ClCheckAccessAddress)
3392 insertCheckShadowOf(Val: Addr, OrigIns: &I);
3393
3394 if (MS.TrackOrigins) {
3395 if (PropagateShadow)
3396 setOrigin(V: &I, Origin: IRB.CreateLoad(Ty: MS.OriginTy, Ptr: OriginPtr));
3397 else
3398 setOrigin(V: &I, Origin: getCleanOrigin());
3399 }
3400 return true;
3401 }
3402
3403 /// Handle (SIMD arithmetic)-like intrinsics.
3404 ///
3405 /// Instrument intrinsics with any number of arguments of the same type [*],
3406 /// equal to the return type, plus a specified number of trailing flags of
3407 /// any type.
3408 ///
3409 /// [*] The type should be simple (no aggregates or pointers; vectors are
3410 /// fine).
3411 ///
3412 /// Caller guarantees that this intrinsic does not access memory.
3413 ///
3414 /// TODO: "horizontal"/"pairwise" intrinsics are often incorrectly matched by
3415 /// by this handler. See horizontalReduce().
3416 ///
3417 /// TODO: permutation intrinsics are also often incorrectly matched.
3418 [[maybe_unused]] bool
3419 maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I,
3420 unsigned int trailingFlags) {
3421 Type *RetTy = I.getType();
3422 if (!(RetTy->isIntOrIntVectorTy() || RetTy->isFPOrFPVectorTy()))
3423 return false;
3424
3425 unsigned NumArgOperands = I.arg_size();
3426 assert(NumArgOperands >= trailingFlags);
3427 for (unsigned i = 0; i < NumArgOperands - trailingFlags; ++i) {
3428 Type *Ty = I.getArgOperand(i)->getType();
3429 if (Ty != RetTy)
3430 return false;
3431 }
3432
3433 IRBuilder<> IRB(&I);
3434 ShadowAndOriginCombiner SC(this, IRB);
3435 for (unsigned i = 0; i < NumArgOperands; ++i)
3436 SC.Add(V: I.getArgOperand(i));
3437 SC.Done(I: &I);
3438
3439 return true;
3440 }
3441
3442 /// Returns whether it was able to heuristically instrument unknown
3443 /// intrinsics.
3444 ///
3445 /// The main purpose of this code is to do something reasonable with all
3446 /// random intrinsics we might encounter, most importantly - SIMD intrinsics.
3447 /// We recognize several classes of intrinsics by their argument types and
3448 /// ModRefBehaviour and apply special instrumentation when we are reasonably
3449 /// sure that we know what the intrinsic does.
3450 ///
3451 /// We special-case intrinsics where this approach fails. See llvm.bswap
3452 /// handling as an example of that.
3453 bool maybeHandleUnknownIntrinsicUnlogged(IntrinsicInst &I) {
3454 unsigned NumArgOperands = I.arg_size();
3455 if (NumArgOperands == 0)
3456 return false;
3457
3458 if (NumArgOperands == 2 && I.getArgOperand(i: 0)->getType()->isPointerTy() &&
3459 I.getArgOperand(i: 1)->getType()->isVectorTy() &&
3460 I.getType()->isVoidTy() && !I.onlyReadsMemory()) {
3461 // This looks like a vector store.
3462 return handleVectorStoreIntrinsic(I);
3463 }
3464
3465 if (NumArgOperands == 1 && I.getArgOperand(i: 0)->getType()->isPointerTy() &&
3466 I.getType()->isVectorTy() && I.onlyReadsMemory()) {
3467 // This looks like a vector load.
3468 return handleVectorLoadIntrinsic(I);
3469 }
3470
3471 if (I.doesNotAccessMemory())
3472 if (maybeHandleSimpleNomemIntrinsic(I, /*trailingFlags=*/0))
3473 return true;
3474
3475 // FIXME: detect and handle SSE maskstore/maskload?
3476 // Some cases are now handled in handleAVXMasked{Load,Store}.
3477 return false;
3478 }
3479
3480 bool maybeHandleUnknownIntrinsic(IntrinsicInst &I) {
3481 if (maybeHandleUnknownIntrinsicUnlogged(I)) {
3482 if (ClDumpHeuristicInstructions)
3483 dumpInst(I, Prefix: "Heuristic");
3484
3485 LLVM_DEBUG(dbgs() << "UNKNOWN INSTRUCTION HANDLED HEURISTICALLY: " << I
3486 << "\n");
3487 return true;
3488 } else
3489 return false;
3490 }
3491
3492 void handleInvariantGroup(IntrinsicInst &I) {
3493 setShadow(V: &I, SV: getShadow(I: &I, i: 0));
3494 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
3495 }
3496
3497 void handleLifetimeStart(IntrinsicInst &I) {
3498 if (!PoisonStack)
3499 return;
3500 AllocaInst *AI = dyn_cast<AllocaInst>(Val: I.getArgOperand(i: 0));
3501 if (AI)
3502 LifetimeStartList.push_back(Elt: std::make_pair(x: &I, y&: AI));
3503 }
3504
3505 void handleBswap(IntrinsicInst &I) {
3506 IRBuilder<> IRB(&I);
3507 Value *Op = I.getArgOperand(i: 0);
3508 Type *OpType = Op->getType();
3509 setShadow(V: &I, SV: IRB.CreateIntrinsic(ID: Intrinsic::bswap, Types: ArrayRef(&OpType, 1),
3510 Args: getShadow(V: Op)));
3511 setOrigin(V: &I, Origin: getOrigin(V: Op));
3512 }
3513
3514 // Uninitialized bits are ok if they appear after the leading/trailing 0's
3515 // and a 1. If the input is all zero, it is fully initialized iff
3516 // !is_zero_poison.
3517 //
3518 // e.g., for ctlz, with little-endian, if 0/1 are initialized bits with
3519 // concrete value 0/1, and ? is an uninitialized bit:
3520 // - 0001 0??? is fully initialized
3521 // - 000? ???? is fully uninitialized (*)
3522 // - ???? ???? is fully uninitialized
3523 // - 0000 0000 is fully uninitialized if is_zero_poison,
3524 // fully initialized otherwise
3525 //
3526 // (*) TODO: arguably, since the number of zeros is in the range [3, 8], we
3527 // only need to poison 4 bits.
3528 //
3529 // OutputShadow =
3530 // ((ConcreteZerosCount >= ShadowZerosCount) && !AllZeroShadow)
3531 // || (is_zero_poison && AllZeroSrc)
3532 void handleCountLeadingTrailingZeros(IntrinsicInst &I) {
3533 IRBuilder<> IRB(&I);
3534 Value *Src = I.getArgOperand(i: 0);
3535 Value *SrcShadow = getShadow(V: Src);
3536
3537 Value *False = IRB.getInt1(V: false);
3538 Value *ConcreteZerosCount = IRB.CreateIntrinsic(
3539 RetTy: I.getType(), ID: I.getIntrinsicID(), Args: {Src, /*is_zero_poison=*/False});
3540 Value *ShadowZerosCount = IRB.CreateIntrinsic(
3541 RetTy: I.getType(), ID: I.getIntrinsicID(), Args: {SrcShadow, /*is_zero_poison=*/False});
3542
3543 Value *CompareConcreteZeros = IRB.CreateICmpUGE(
3544 LHS: ConcreteZerosCount, RHS: ShadowZerosCount, Name: "_mscz_cmp_zeros");
3545
3546 Value *NotAllZeroShadow =
3547 IRB.CreateIsNotNull(Arg: SrcShadow, Name: "_mscz_shadow_not_null");
3548 Value *OutputShadow =
3549 IRB.CreateAnd(LHS: CompareConcreteZeros, RHS: NotAllZeroShadow, Name: "_mscz_main");
3550
3551 // If zero poison is requested, mix in with the shadow
3552 Constant *IsZeroPoison = cast<Constant>(Val: I.getOperand(i_nocapture: 1));
3553 if (!IsZeroPoison->isNullValue()) {
3554 Value *BoolZeroPoison = IRB.CreateIsNull(Arg: Src, Name: "_mscz_bzp");
3555 OutputShadow = IRB.CreateOr(LHS: OutputShadow, RHS: BoolZeroPoison, Name: "_mscz_bs");
3556 }
3557
3558 OutputShadow = IRB.CreateSExt(V: OutputShadow, DestTy: getShadowTy(V: Src), Name: "_mscz_os");
3559
3560 setShadow(V: &I, SV: OutputShadow);
3561 setOriginForNaryOp(I);
3562 }
3563
3564 /// Handle Arm NEON vector convert intrinsics.
3565 ///
3566 /// e.g., <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float>)
3567 /// i32 @llvm.aarch64.neon.fcvtms.i32.f64 (double)
3568 ///
3569 /// For conversions to or from fixed-point, there is a trailing argument to
3570 /// indicate the fixed-point precision:
3571 /// - <4 x float> llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32)
3572 /// - <4 x i32> llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32)
3573 ///
3574 /// For x86 SSE vector convert intrinsics, see
3575 /// handleSSEVectorConvertIntrinsic().
3576 void handleNEONVectorConvertIntrinsic(IntrinsicInst &I, bool FixedPoint) {
3577 if (FixedPoint)
3578 assert(I.arg_size() == 2);
3579 else
3580 assert(I.arg_size() == 1);
3581
3582 IRBuilder<> IRB(&I);
3583 Value *S0 = getShadow(I: &I, i: 0);
3584
3585 if (FixedPoint) {
3586 Value *Precision = I.getOperand(i_nocapture: 1);
3587 insertCheckShadowOf(Val: Precision, OrigIns: &I);
3588 }
3589
3590 /// For scalars:
3591 /// Since they are converting from floating-point to integer, the output is
3592 /// - fully uninitialized if *any* bit of the input is uninitialized
3593 /// - fully ininitialized if all bits of the input are ininitialized
3594 /// We apply the same principle on a per-field basis for vectors.
3595 Value *OutShadow = IRB.CreateSExt(V: IRB.CreateICmpNE(LHS: S0, RHS: getCleanShadow(V: S0)),
3596 DestTy: getShadowTy(V: &I));
3597 setShadow(V: &I, SV: OutShadow);
3598 setOriginForNaryOp(I);
3599 }
3600
3601 /// Some instructions have additional zero-elements in the return type
3602 /// e.g., <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, ...)
3603 ///
3604 /// This function will return a vector type with the same number of elements
3605 /// as the input, but same per-element width as the return value e.g.,
3606 /// <8 x i8>.
3607 FixedVectorType *maybeShrinkVectorShadowType(Value *Src, IntrinsicInst &I) {
3608 assert(isa<FixedVectorType>(getShadowTy(&I)));
3609 FixedVectorType *ShadowType = cast<FixedVectorType>(Val: getShadowTy(V: &I));
3610
3611 // TODO: generalize beyond 2x?
3612 if (ShadowType->getElementCount() ==
3613 cast<VectorType>(Val: Src->getType())->getElementCount() * 2)
3614 ShadowType = FixedVectorType::getHalfElementsVectorType(VTy: ShadowType);
3615
3616 assert(ShadowType->getElementCount() ==
3617 cast<VectorType>(Src->getType())->getElementCount());
3618
3619 return ShadowType;
3620 }
3621
3622 /// Doubles the length of a vector shadow (extending with zeros) if necessary
3623 /// to match the length of the shadow for the instruction.
3624 /// If scalar types of the vectors are different, it will use the type of the
3625 /// input vector.
3626 /// This is more type-safe than CreateShadowCast().
3627 Value *maybeExtendVectorShadowWithZeros(Value *Shadow, IntrinsicInst &I) {
3628 IRBuilder<> IRB(&I);
3629 assert(isa<FixedVectorType>(Shadow->getType()));
3630 assert(isa<FixedVectorType>(I.getType()));
3631
3632 Value *FullShadow = getCleanShadow(V: &I);
3633 unsigned ShadowNumElems =
3634 cast<FixedVectorType>(Val: Shadow->getType())->getNumElements();
3635 unsigned FullShadowNumElems =
3636 cast<FixedVectorType>(Val: FullShadow->getType())->getNumElements();
3637
3638 assert((ShadowNumElems == FullShadowNumElems) ||
3639 (ShadowNumElems * 2 == FullShadowNumElems));
3640
3641 if (ShadowNumElems == FullShadowNumElems) {
3642 FullShadow = Shadow;
3643 } else {
3644 // TODO: generalize beyond 2x?
3645 SmallVector<int, 32> ShadowMask(FullShadowNumElems);
3646 std::iota(first: ShadowMask.begin(), last: ShadowMask.end(), value: 0);
3647
3648 // Append zeros
3649 FullShadow =
3650 IRB.CreateShuffleVector(V1: Shadow, V2: getCleanShadow(V: Shadow), Mask: ShadowMask);
3651 }
3652
3653 return FullShadow;
3654 }
3655
3656 /// Handle x86 SSE vector conversion.
3657 ///
3658 /// e.g., single-precision to half-precision conversion:
3659 /// <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0)
3660 /// <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0)
3661 ///
3662 /// floating-point to integer:
3663 /// <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>)
3664 /// <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>)
3665 ///
3666 /// Note: if the output has more elements, they are zero-initialized (and
3667 /// therefore the shadow will also be initialized).
3668 ///
3669 /// This differs from handleSSEVectorConvertIntrinsic() because it
3670 /// propagates uninitialized shadow (instead of checking the shadow).
3671 void handleSSEVectorConvertIntrinsicByProp(IntrinsicInst &I,
3672 bool HasRoundingMode) {
3673 if (HasRoundingMode) {
3674 assert(I.arg_size() == 2);
3675 [[maybe_unused]] Value *RoundingMode = I.getArgOperand(i: 1);
3676 assert(RoundingMode->getType()->isIntegerTy());
3677 } else {
3678 assert(I.arg_size() == 1);
3679 }
3680
3681 Value *Src = I.getArgOperand(i: 0);
3682 assert(Src->getType()->isVectorTy());
3683
3684 // The return type might have more elements than the input.
3685 // Temporarily shrink the return type's number of elements.
3686 VectorType *ShadowType = maybeShrinkVectorShadowType(Src, I);
3687
3688 IRBuilder<> IRB(&I);
3689 Value *S0 = getShadow(I: &I, i: 0);
3690
3691 /// For scalars:
3692 /// Since they are converting to and/or from floating-point, the output is:
3693 /// - fully uninitialized if *any* bit of the input is uninitialized
3694 /// - fully ininitialized if all bits of the input are ininitialized
3695 /// We apply the same principle on a per-field basis for vectors.
3696 Value *Shadow =
3697 IRB.CreateSExt(V: IRB.CreateICmpNE(LHS: S0, RHS: getCleanShadow(V: S0)), DestTy: ShadowType);
3698
3699 // The return type might have more elements than the input.
3700 // Extend the return type back to its original width if necessary.
3701 Value *FullShadow = maybeExtendVectorShadowWithZeros(Shadow, I);
3702
3703 setShadow(V: &I, SV: FullShadow);
3704 setOriginForNaryOp(I);
3705 }
3706
3707 // Instrument x86 SSE vector convert intrinsic.
3708 //
3709 // This function instruments intrinsics like cvtsi2ss:
3710 // %Out = int_xxx_cvtyyy(%ConvertOp)
3711 // or
3712 // %Out = int_xxx_cvtyyy(%CopyOp, %ConvertOp)
3713 // Intrinsic converts \p NumUsedElements elements of \p ConvertOp to the same
3714 // number \p Out elements, and (if has 2 arguments) copies the rest of the
3715 // elements from \p CopyOp.
3716 // In most cases conversion involves floating-point value which may trigger a
3717 // hardware exception when not fully initialized. For this reason we require
3718 // \p ConvertOp[0:NumUsedElements] to be fully initialized and trap otherwise.
3719 // We copy the shadow of \p CopyOp[NumUsedElements:] to \p
3720 // Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always
3721 // return a fully initialized value.
3722 //
3723 // For Arm NEON vector convert intrinsics, see
3724 // handleNEONVectorConvertIntrinsic().
3725 void handleSSEVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements,
3726 bool HasRoundingMode = false) {
3727 IRBuilder<> IRB(&I);
3728 Value *CopyOp, *ConvertOp;
3729
3730 assert((!HasRoundingMode ||
3731 isa<ConstantInt>(I.getArgOperand(I.arg_size() - 1))) &&
3732 "Invalid rounding mode");
3733
3734 switch (I.arg_size() - HasRoundingMode) {
3735 case 2:
3736 CopyOp = I.getArgOperand(i: 0);
3737 ConvertOp = I.getArgOperand(i: 1);
3738 break;
3739 case 1:
3740 ConvertOp = I.getArgOperand(i: 0);
3741 CopyOp = nullptr;
3742 break;
3743 default:
3744 llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
3745 }
3746
3747 // The first *NumUsedElements* elements of ConvertOp are converted to the
3748 // same number of output elements. The rest of the output is copied from
3749 // CopyOp, or (if not available) filled with zeroes.
3750 // Combine shadow for elements of ConvertOp that are used in this operation,
3751 // and insert a check.
3752 // FIXME: consider propagating shadow of ConvertOp, at least in the case of
3753 // int->any conversion.
3754 Value *ConvertShadow = getShadow(V: ConvertOp);
3755 Value *AggShadow = nullptr;
3756 if (ConvertOp->getType()->isVectorTy()) {
3757 AggShadow = IRB.CreateExtractElement(
3758 Vec: ConvertShadow, Idx: ConstantInt::get(Ty: IRB.getInt32Ty(), V: 0));
3759 for (int i = 1; i < NumUsedElements; ++i) {
3760 Value *MoreShadow = IRB.CreateExtractElement(
3761 Vec: ConvertShadow, Idx: ConstantInt::get(Ty: IRB.getInt32Ty(), V: i));
3762 AggShadow = IRB.CreateOr(LHS: AggShadow, RHS: MoreShadow);
3763 }
3764 } else {
3765 AggShadow = ConvertShadow;
3766 }
3767 assert(AggShadow->getType()->isIntegerTy());
3768 insertCheckShadow(Shadow: AggShadow, Origin: getOrigin(V: ConvertOp), OrigIns: &I);
3769
3770 // Build result shadow by zero-filling parts of CopyOp shadow that come from
3771 // ConvertOp.
3772 if (CopyOp) {
3773 assert(CopyOp->getType() == I.getType());
3774 assert(CopyOp->getType()->isVectorTy());
3775 Value *ResultShadow = getShadow(V: CopyOp);
3776 Type *EltTy = cast<VectorType>(Val: ResultShadow->getType())->getElementType();
3777 for (int i = 0; i < NumUsedElements; ++i) {
3778 ResultShadow = IRB.CreateInsertElement(
3779 Vec: ResultShadow, NewElt: ConstantInt::getNullValue(Ty: EltTy),
3780 Idx: ConstantInt::get(Ty: IRB.getInt32Ty(), V: i));
3781 }
3782 setShadow(V: &I, SV: ResultShadow);
3783 setOrigin(V: &I, Origin: getOrigin(V: CopyOp));
3784 } else {
3785 setShadow(V: &I, SV: getCleanShadow(V: &I));
3786 setOrigin(V: &I, Origin: getCleanOrigin());
3787 }
3788 }
3789
3790 // Given a scalar or vector, extract lower 64 bits (or less), and return all
3791 // zeroes if it is zero, and all ones otherwise.
3792 Value *Lower64ShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
3793 if (S->getType()->isVectorTy())
3794 S = CreateShadowCast(IRB, V: S, dstTy: IRB.getInt64Ty(), /* Signed */ true);
3795 assert(S->getType()->getPrimitiveSizeInBits() <= 64);
3796 Value *S2 = IRB.CreateICmpNE(LHS: S, RHS: getCleanShadow(V: S));
3797 return CreateShadowCast(IRB, V: S2, dstTy: T, /* Signed */ true);
3798 }
3799
3800 // Given a vector, extract its first element, and return all
3801 // zeroes if it is zero, and all ones otherwise.
3802 Value *LowerElementShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
3803 Value *S1 = IRB.CreateExtractElement(Vec: S, Idx: (uint64_t)0);
3804 Value *S2 = IRB.CreateICmpNE(LHS: S1, RHS: getCleanShadow(V: S1));
3805 return CreateShadowCast(IRB, V: S2, dstTy: T, /* Signed */ true);
3806 }
3807
3808 Value *VariableShadowExtend(IRBuilder<> &IRB, Value *S) {
3809 Type *T = S->getType();
3810 assert(T->isVectorTy());
3811 Value *S2 = IRB.CreateICmpNE(LHS: S, RHS: getCleanShadow(V: S));
3812 return IRB.CreateSExt(V: S2, DestTy: T);
3813 }
3814
3815 // Instrument vector shift intrinsic.
3816 //
3817 // This function instruments intrinsics like int_x86_avx2_psll_w.
3818 // Intrinsic shifts %In by %ShiftSize bits.
3819 // %ShiftSize may be a vector. In that case the lower 64 bits determine shift
3820 // size, and the rest is ignored. Behavior is defined even if shift size is
3821 // greater than register (or field) width.
3822 void handleVectorShiftIntrinsic(IntrinsicInst &I, bool Variable) {
3823 assert(I.arg_size() == 2);
3824 IRBuilder<> IRB(&I);
3825 // If any of the S2 bits are poisoned, the whole thing is poisoned.
3826 // Otherwise perform the same shift on S1.
3827 Value *S1 = getShadow(I: &I, i: 0);
3828 Value *S2 = getShadow(I: &I, i: 1);
3829 Value *S2Conv = Variable ? VariableShadowExtend(IRB, S: S2)
3830 : Lower64ShadowExtend(IRB, S: S2, T: getShadowTy(V: &I));
3831 Value *V1 = I.getOperand(i_nocapture: 0);
3832 Value *V2 = I.getOperand(i_nocapture: 1);
3833 Value *Shift = IRB.CreateCall(FTy: I.getFunctionType(), Callee: I.getCalledOperand(),
3834 Args: {IRB.CreateBitCast(V: S1, DestTy: V1->getType()), V2});
3835 Shift = IRB.CreateBitCast(V: Shift, DestTy: getShadowTy(V: &I));
3836 setShadow(V: &I, SV: IRB.CreateOr(LHS: Shift, RHS: S2Conv));
3837 setOriginForNaryOp(I);
3838 }
3839
3840 // Get an MMX-sized (64-bit) vector type, or optionally, other sized
3841 // vectors.
3842 Type *getMMXVectorTy(unsigned EltSizeInBits,
3843 unsigned X86_MMXSizeInBits = 64) {
3844 assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
3845 "Illegal MMX vector element size");
3846 return FixedVectorType::get(ElementType: IntegerType::get(C&: *MS.C, NumBits: EltSizeInBits),
3847 NumElts: X86_MMXSizeInBits / EltSizeInBits);
3848 }
3849
3850 // Returns a signed counterpart for an (un)signed-saturate-and-pack
3851 // intrinsic.
3852 Intrinsic::ID getSignedPackIntrinsic(Intrinsic::ID id) {
3853 switch (id) {
3854 case Intrinsic::x86_sse2_packsswb_128:
3855 case Intrinsic::x86_sse2_packuswb_128:
3856 return Intrinsic::x86_sse2_packsswb_128;
3857
3858 case Intrinsic::x86_sse2_packssdw_128:
3859 case Intrinsic::x86_sse41_packusdw:
3860 return Intrinsic::x86_sse2_packssdw_128;
3861
3862 case Intrinsic::x86_avx2_packsswb:
3863 case Intrinsic::x86_avx2_packuswb:
3864 return Intrinsic::x86_avx2_packsswb;
3865
3866 case Intrinsic::x86_avx2_packssdw:
3867 case Intrinsic::x86_avx2_packusdw:
3868 return Intrinsic::x86_avx2_packssdw;
3869
3870 case Intrinsic::x86_mmx_packsswb:
3871 case Intrinsic::x86_mmx_packuswb:
3872 return Intrinsic::x86_mmx_packsswb;
3873
3874 case Intrinsic::x86_mmx_packssdw:
3875 return Intrinsic::x86_mmx_packssdw;
3876
3877 case Intrinsic::x86_avx512_packssdw_512:
3878 case Intrinsic::x86_avx512_packusdw_512:
3879 return Intrinsic::x86_avx512_packssdw_512;
3880
3881 case Intrinsic::x86_avx512_packsswb_512:
3882 case Intrinsic::x86_avx512_packuswb_512:
3883 return Intrinsic::x86_avx512_packsswb_512;
3884
3885 default:
3886 llvm_unreachable("unexpected intrinsic id");
3887 }
3888 }
3889
3890 // Instrument vector pack intrinsic.
3891 //
3892 // This function instruments intrinsics like x86_mmx_packsswb, that
3893 // packs elements of 2 input vectors into half as many bits with saturation.
3894 // Shadow is propagated with the signed variant of the same intrinsic applied
3895 // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
3896 // MMXEltSizeInBits is used only for x86mmx arguments.
3897 //
3898 // TODO: consider using GetMinMaxUnsigned() to handle saturation precisely
3899 void handleVectorPackIntrinsic(IntrinsicInst &I,
3900 unsigned MMXEltSizeInBits = 0) {
3901 assert(I.arg_size() == 2);
3902 IRBuilder<> IRB(&I);
3903 Value *S1 = getShadow(I: &I, i: 0);
3904 Value *S2 = getShadow(I: &I, i: 1);
3905 assert(S1->getType()->isVectorTy());
3906
3907 // SExt and ICmpNE below must apply to individual elements of input vectors.
3908 // In case of x86mmx arguments, cast them to appropriate vector types and
3909 // back.
3910 Type *T =
3911 MMXEltSizeInBits ? getMMXVectorTy(EltSizeInBits: MMXEltSizeInBits) : S1->getType();
3912 if (MMXEltSizeInBits) {
3913 S1 = IRB.CreateBitCast(V: S1, DestTy: T);
3914 S2 = IRB.CreateBitCast(V: S2, DestTy: T);
3915 }
3916 Value *S1_ext =
3917 IRB.CreateSExt(V: IRB.CreateICmpNE(LHS: S1, RHS: Constant::getNullValue(Ty: T)), DestTy: T);
3918 Value *S2_ext =
3919 IRB.CreateSExt(V: IRB.CreateICmpNE(LHS: S2, RHS: Constant::getNullValue(Ty: T)), DestTy: T);
3920 if (MMXEltSizeInBits) {
3921 S1_ext = IRB.CreateBitCast(V: S1_ext, DestTy: getMMXVectorTy(EltSizeInBits: 64));
3922 S2_ext = IRB.CreateBitCast(V: S2_ext, DestTy: getMMXVectorTy(EltSizeInBits: 64));
3923 }
3924
3925 Value *S = IRB.CreateIntrinsic(ID: getSignedPackIntrinsic(id: I.getIntrinsicID()),
3926 Args: {S1_ext, S2_ext}, /*FMFSource=*/nullptr,
3927 Name: "_msprop_vector_pack");
3928 if (MMXEltSizeInBits)
3929 S = IRB.CreateBitCast(V: S, DestTy: getShadowTy(V: &I));
3930 setShadow(V: &I, SV: S);
3931 setOriginForNaryOp(I);
3932 }
3933
3934 // Convert `Mask` into `<n x i1>`.
3935 Constant *createDppMask(unsigned Width, unsigned Mask) {
3936 SmallVector<Constant *, 4> R(Width);
3937 for (auto &M : R) {
3938 M = ConstantInt::getBool(Context&: F.getContext(), V: Mask & 1);
3939 Mask >>= 1;
3940 }
3941 return ConstantVector::get(V: R);
3942 }
3943
3944 // Calculate output shadow as array of booleans `<n x i1>`, assuming if any
3945 // arg is poisoned, entire dot product is poisoned.
3946 Value *findDppPoisonedOutput(IRBuilder<> &IRB, Value *S, unsigned SrcMask,
3947 unsigned DstMask) {
3948 const unsigned Width =
3949 cast<FixedVectorType>(Val: S->getType())->getNumElements();
3950
3951 S = IRB.CreateSelect(C: createDppMask(Width, Mask: SrcMask), True: S,
3952 False: Constant::getNullValue(Ty: S->getType()));
3953 Value *SElem = IRB.CreateOrReduce(Src: S);
3954 Value *IsClean = IRB.CreateIsNull(Arg: SElem, Name: "_msdpp");
3955 Value *DstMaskV = createDppMask(Width, Mask: DstMask);
3956
3957 return IRB.CreateSelect(
3958 C: IsClean, True: Constant::getNullValue(Ty: DstMaskV->getType()), False: DstMaskV);
3959 }
3960
3961 // See `Intel Intrinsics Guide` for `_dp_p*` instructions.
3962 //
3963 // 2 and 4 element versions produce single scalar of dot product, and then
3964 // puts it into elements of output vector, selected by 4 lowest bits of the
3965 // mask. Top 4 bits of the mask control which elements of input to use for dot
3966 // product.
3967 //
3968 // 8 element version mask still has only 4 bit for input, and 4 bit for output
3969 // mask. According to the spec it just operates as 4 element version on first
3970 // 4 elements of inputs and output, and then on last 4 elements of inputs and
3971 // output.
3972 void handleDppIntrinsic(IntrinsicInst &I) {
3973 IRBuilder<> IRB(&I);
3974
3975 Value *S0 = getShadow(I: &I, i: 0);
3976 Value *S1 = getShadow(I: &I, i: 1);
3977 Value *S = IRB.CreateOr(LHS: S0, RHS: S1);
3978
3979 const unsigned Width =
3980 cast<FixedVectorType>(Val: S->getType())->getNumElements();
3981 assert(Width == 2 || Width == 4 || Width == 8);
3982
3983 const unsigned Mask = cast<ConstantInt>(Val: I.getArgOperand(i: 2))->getZExtValue();
3984 const unsigned SrcMask = Mask >> 4;
3985 const unsigned DstMask = Mask & 0xf;
3986
3987 // Calculate shadow as `<n x i1>`.
3988 Value *SI1 = findDppPoisonedOutput(IRB, S, SrcMask, DstMask);
3989 if (Width == 8) {
3990 // First 4 elements of shadow are already calculated. `makeDppShadow`
3991 // operats on 32 bit masks, so we can just shift masks, and repeat.
3992 SI1 = IRB.CreateOr(
3993 LHS: SI1, RHS: findDppPoisonedOutput(IRB, S, SrcMask: SrcMask << 4, DstMask: DstMask << 4));
3994 }
3995 // Extend to real size of shadow, poisoning either all or none bits of an
3996 // element.
3997 S = IRB.CreateSExt(V: SI1, DestTy: S->getType(), Name: "_msdpp");
3998
3999 setShadow(V: &I, SV: S);
4000 setOriginForNaryOp(I);
4001 }
4002
4003 Value *convertBlendvToSelectMask(IRBuilder<> &IRB, Value *C) {
4004 C = CreateAppToShadowCast(IRB, V: C);
4005 FixedVectorType *FVT = cast<FixedVectorType>(Val: C->getType());
4006 unsigned ElSize = FVT->getElementType()->getPrimitiveSizeInBits();
4007 C = IRB.CreateAShr(LHS: C, RHS: ElSize - 1);
4008 FVT = FixedVectorType::get(ElementType: IRB.getInt1Ty(), NumElts: FVT->getNumElements());
4009 return IRB.CreateTrunc(V: C, DestTy: FVT);
4010 }
4011
4012 // `blendv(f, t, c)` is effectively `select(c[top_bit], t, f)`.
4013 void handleBlendvIntrinsic(IntrinsicInst &I) {
4014 Value *C = I.getOperand(i_nocapture: 2);
4015 Value *T = I.getOperand(i_nocapture: 1);
4016 Value *F = I.getOperand(i_nocapture: 0);
4017
4018 Value *Sc = getShadow(I: &I, i: 2);
4019 Value *Oc = MS.TrackOrigins ? getOrigin(V: C) : nullptr;
4020
4021 {
4022 IRBuilder<> IRB(&I);
4023 // Extract top bit from condition and its shadow.
4024 C = convertBlendvToSelectMask(IRB, C);
4025 Sc = convertBlendvToSelectMask(IRB, C: Sc);
4026
4027 setShadow(V: C, SV: Sc);
4028 setOrigin(V: C, Origin: Oc);
4029 }
4030
4031 handleSelectLikeInst(I, B: C, C: T, D: F);
4032 }
4033
4034 // Instrument sum-of-absolute-differences intrinsic.
4035 void handleVectorSadIntrinsic(IntrinsicInst &I, bool IsMMX = false) {
4036 const unsigned SignificantBitsPerResultElement = 16;
4037 Type *ResTy = IsMMX ? IntegerType::get(C&: *MS.C, NumBits: 64) : I.getType();
4038 unsigned ZeroBitsPerResultElement =
4039 ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement;
4040
4041 IRBuilder<> IRB(&I);
4042 auto *Shadow0 = getShadow(I: &I, i: 0);
4043 auto *Shadow1 = getShadow(I: &I, i: 1);
4044 Value *S = IRB.CreateOr(LHS: Shadow0, RHS: Shadow1);
4045 S = IRB.CreateBitCast(V: S, DestTy: ResTy);
4046 S = IRB.CreateSExt(V: IRB.CreateICmpNE(LHS: S, RHS: Constant::getNullValue(Ty: ResTy)),
4047 DestTy: ResTy);
4048 S = IRB.CreateLShr(LHS: S, RHS: ZeroBitsPerResultElement);
4049 S = IRB.CreateBitCast(V: S, DestTy: getShadowTy(V: &I));
4050 setShadow(V: &I, SV: S);
4051 setOriginForNaryOp(I);
4052 }
4053
4054 // Instrument dot-product / multiply-add(-accumulate)? intrinsics.
4055 //
4056 // e.g., Two operands:
4057 // <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a, <8 x i16> %b)
4058 //
4059 // Two operands which require an EltSizeInBits override:
4060 // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %a, <1 x i64> %b)
4061 //
4062 // Three operands:
4063 // <4 x i32> @llvm.x86.avx512.vpdpbusd.128
4064 // (<4 x i32> %s, <16 x i8> %a, <16 x i8> %b)
4065 // <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v4bf16
4066 // (<2 x float> %acc, <4 x bfloat> %a, <4 x bfloat> %b)
4067 // (these are equivalent to multiply-add on %a and %b, followed by
4068 // adding/"accumulating" %s. "Accumulation" stores the result in one
4069 // of the source registers, but this accumulate vs. add distinction
4070 // is lost when dealing with LLVM intrinsics.)
4071 //
4072 // ZeroPurifies means that multiplying a known-zero with an uninitialized
4073 // value results in an initialized value. This is applicable for integer
4074 // multiplication, but not floating-point (counter-example: NaN).
4075 void handleVectorDotProductIntrinsic(IntrinsicInst &I,
4076 unsigned ReductionFactor,
4077 bool ZeroPurifies,
4078 unsigned EltSizeInBits,
4079 enum OddOrEvenLanes Lanes) {
4080 IRBuilder<> IRB(&I);
4081
4082 [[maybe_unused]] FixedVectorType *ReturnType =
4083 cast<FixedVectorType>(Val: I.getType());
4084 assert(isa<FixedVectorType>(ReturnType));
4085
4086 // Vectors A and B, and shadows
4087 Value *Va = nullptr;
4088 Value *Vb = nullptr;
4089 Value *Sa = nullptr;
4090 Value *Sb = nullptr;
4091
4092 assert(I.arg_size() == 2 || I.arg_size() == 3);
4093 if (I.arg_size() == 2) {
4094 assert(Lanes == kBothLanes);
4095
4096 Va = I.getOperand(i_nocapture: 0);
4097 Vb = I.getOperand(i_nocapture: 1);
4098
4099 Sa = getShadow(I: &I, i: 0);
4100 Sb = getShadow(I: &I, i: 1);
4101 } else if (I.arg_size() == 3) {
4102 // Operand 0 is the accumulator. We will deal with that below.
4103 Va = I.getOperand(i_nocapture: 1);
4104 Vb = I.getOperand(i_nocapture: 2);
4105
4106 Sa = getShadow(I: &I, i: 1);
4107 Sb = getShadow(I: &I, i: 2);
4108
4109 if (Lanes == kEvenLanes || Lanes == kOddLanes) {
4110 // Convert < S0, S1, S2, S3, S4, S5, S6, S7 >
4111 // to < S0, S0, S2, S2, S4, S4, S6, S6 > (if even)
4112 // to < S1, S1, S3, S3, S5, S5, S7, S7 > (if odd)
4113 //
4114 // Note: for aarch64.neon.bfmlalb/t, the odd/even-indexed values are
4115 // zeroed, not duplicated. However, for shadow propagation, this
4116 // distinction is unimportant because Step 1 below will squeeze
4117 // each pair of elements (e.g., [S0, S0]) into a single bit, and
4118 // we only care if it is fully initialized.
4119
4120 FixedVectorType *InputShadowType = cast<FixedVectorType>(Val: Sa->getType());
4121 unsigned Width = InputShadowType->getNumElements();
4122
4123 Sa = IRB.CreateShuffleVector(
4124 V: Sa, Mask: getPclmulMask(Width, /*OddElements=*/Lanes == kOddLanes));
4125 Sb = IRB.CreateShuffleVector(
4126 V: Sb, Mask: getPclmulMask(Width, /*OddElements=*/Lanes == kOddLanes));
4127 }
4128 }
4129
4130 FixedVectorType *ParamType = cast<FixedVectorType>(Val: Va->getType());
4131 assert(ParamType == Vb->getType());
4132
4133 assert(ParamType->getPrimitiveSizeInBits() ==
4134 ReturnType->getPrimitiveSizeInBits());
4135
4136 if (I.arg_size() == 3) {
4137 [[maybe_unused]] auto *AccumulatorType =
4138 cast<FixedVectorType>(Val: I.getOperand(i_nocapture: 0)->getType());
4139 assert(AccumulatorType == ReturnType);
4140 }
4141
4142 FixedVectorType *ImplicitReturnType =
4143 cast<FixedVectorType>(Val: getShadowTy(OrigTy: ReturnType));
4144 // Step 1: instrument multiplication of corresponding vector elements
4145 if (EltSizeInBits) {
4146 ImplicitReturnType = cast<FixedVectorType>(
4147 Val: getMMXVectorTy(EltSizeInBits: EltSizeInBits * ReductionFactor,
4148 X86_MMXSizeInBits: ParamType->getPrimitiveSizeInBits()));
4149 ParamType = cast<FixedVectorType>(
4150 Val: getMMXVectorTy(EltSizeInBits, X86_MMXSizeInBits: ParamType->getPrimitiveSizeInBits()));
4151
4152 Va = IRB.CreateBitCast(V: Va, DestTy: ParamType);
4153 Vb = IRB.CreateBitCast(V: Vb, DestTy: ParamType);
4154
4155 Sa = IRB.CreateBitCast(V: Sa, DestTy: getShadowTy(OrigTy: ParamType));
4156 Sb = IRB.CreateBitCast(V: Sb, DestTy: getShadowTy(OrigTy: ParamType));
4157 } else {
4158 assert(ParamType->getNumElements() ==
4159 ReturnType->getNumElements() * ReductionFactor);
4160 }
4161
4162 // Each element of the vector is represented by a single bit (poisoned or
4163 // not) e.g., <8 x i1>.
4164 Value *SaNonZero = IRB.CreateIsNotNull(Arg: Sa);
4165 Value *SbNonZero = IRB.CreateIsNotNull(Arg: Sb);
4166 Value *And;
4167 if (ZeroPurifies) {
4168 // Multiplying an *initialized* zero by an uninitialized element results
4169 // in an initialized zero element.
4170 //
4171 // This is analogous to bitwise AND, where "AND" of 0 and a poisoned value
4172 // results in an unpoisoned value.
4173 Value *VaInt = Va;
4174 Value *VbInt = Vb;
4175 if (!Va->getType()->isIntegerTy()) {
4176 VaInt = CreateAppToShadowCast(IRB, V: Va);
4177 VbInt = CreateAppToShadowCast(IRB, V: Vb);
4178 }
4179
4180 // We check for non-zero on a per-element basis, not per-bit.
4181 Value *VaNonZero = IRB.CreateIsNotNull(Arg: VaInt);
4182 Value *VbNonZero = IRB.CreateIsNotNull(Arg: VbInt);
4183
4184 And = handleBitwiseAnd(IRB, V1: VaNonZero, V2: VbNonZero, S1: SaNonZero, S2: SbNonZero);
4185 } else {
4186 And = IRB.CreateOr(Ops: {SaNonZero, SbNonZero});
4187 }
4188
4189 // Extend <8 x i1> to <8 x i16>.
4190 // (The real pmadd intrinsic would have computed intermediate values of
4191 // <8 x i32>, but that is irrelevant for our shadow purposes because we
4192 // consider each element to be either fully initialized or fully
4193 // uninitialized.)
4194 And = IRB.CreateSExt(V: And, DestTy: Sa->getType());
4195
4196 // Step 2: instrument horizontal add
4197 // We don't need bit-precise horizontalReduce because we only want to check
4198 // if each pair/quad of elements is fully zero.
4199 // Cast to <4 x i32>.
4200 Value *Horizontal = IRB.CreateBitCast(V: And, DestTy: ImplicitReturnType);
4201
4202 // Compute <4 x i1>, then extend back to <4 x i32>.
4203 Value *OutShadow = IRB.CreateSExt(
4204 V: IRB.CreateICmpNE(LHS: Horizontal,
4205 RHS: Constant::getNullValue(Ty: Horizontal->getType())),
4206 DestTy: ImplicitReturnType);
4207
4208 // Cast it back to the required fake return type (if MMX: <1 x i64>; for
4209 // AVX, it is already correct).
4210 if (EltSizeInBits)
4211 OutShadow = CreateShadowCast(IRB, V: OutShadow, dstTy: getShadowTy(V: &I));
4212
4213 // Step 3 (if applicable): instrument accumulator
4214 if (I.arg_size() == 3)
4215 OutShadow = IRB.CreateOr(LHS: OutShadow, RHS: getShadow(I: &I, i: 0));
4216
4217 setShadow(V: &I, SV: OutShadow);
4218 setOriginForNaryOp(I);
4219 }
4220
4221 // Instrument compare-packed intrinsic.
4222 //
4223 // x86 has the predicate as the third operand, which is ImmArg e.g.,
4224 // - <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8)
4225 // - <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8)
4226 //
4227 // while Arm has separate intrinsics for >= and > e.g.,
4228 // - <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32
4229 // (<2 x float> %A, <2 x float>)
4230 // - <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32
4231 // (<2 x float> %A, <2 x float>)
4232 //
4233 // Bonus: this also handles scalar cases e.g.,
4234 // - i32 @llvm.aarch64.neon.facgt.i32.f32(float %A, float %B)
4235 void handleVectorComparePackedIntrinsic(IntrinsicInst &I,
4236 bool PredicateAsOperand) {
4237 if (PredicateAsOperand) {
4238 assert(I.arg_size() == 3);
4239 assert(I.paramHasAttr(2, Attribute::ImmArg));
4240 } else
4241 assert(I.arg_size() == 2);
4242
4243 IRBuilder<> IRB(&I);
4244
4245 // Basically, an or followed by sext(icmp ne 0) to end up with all-zeros or
4246 // all-ones shadow.
4247 Type *ResTy = getShadowTy(V: &I);
4248 auto *Shadow0 = getShadow(I: &I, i: 0);
4249 auto *Shadow1 = getShadow(I: &I, i: 1);
4250 Value *S0 = IRB.CreateOr(LHS: Shadow0, RHS: Shadow1);
4251 Value *S = IRB.CreateSExt(
4252 V: IRB.CreateICmpNE(LHS: S0, RHS: Constant::getNullValue(Ty: ResTy)), DestTy: ResTy);
4253 setShadow(V: &I, SV: S);
4254 setOriginForNaryOp(I);
4255 }
4256
4257 // Instrument compare-scalar intrinsic.
4258 // This handles both cmp* intrinsics which return the result in the first
4259 // element of a vector, and comi* which return the result as i32.
4260 void handleVectorCompareScalarIntrinsic(IntrinsicInst &I) {
4261 IRBuilder<> IRB(&I);
4262 auto *Shadow0 = getShadow(I: &I, i: 0);
4263 auto *Shadow1 = getShadow(I: &I, i: 1);
4264 Value *S0 = IRB.CreateOr(LHS: Shadow0, RHS: Shadow1);
4265 Value *S = LowerElementShadowExtend(IRB, S: S0, T: getShadowTy(V: &I));
4266 setShadow(V: &I, SV: S);
4267 setOriginForNaryOp(I);
4268 }
4269
4270 // Instrument generic vector reduction intrinsics
4271 // by ORing together all their fields.
4272 //
4273 // If AllowShadowCast is true, the return type does not need to be the same
4274 // type as the fields
4275 // e.g., declare i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8>)
4276 void handleVectorReduceIntrinsic(IntrinsicInst &I, bool AllowShadowCast) {
4277 assert(I.arg_size() == 1);
4278
4279 IRBuilder<> IRB(&I);
4280 Value *S = IRB.CreateOrReduce(Src: getShadow(I: &I, i: 0));
4281 if (AllowShadowCast)
4282 S = CreateShadowCast(IRB, V: S, dstTy: getShadowTy(V: &I));
4283 else
4284 assert(S->getType() == getShadowTy(&I));
4285 setShadow(V: &I, SV: S);
4286 setOriginForNaryOp(I);
4287 }
4288
4289 // Similar to handleVectorReduceIntrinsic but with an initial starting value.
4290 // e.g., call float @llvm.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float>
4291 // %a1)
4292 // shadow = shadow[a0] | shadow[a1.0] | shadow[a1.1]
4293 //
4294 // The type of the return value, initial starting value, and elements of the
4295 // vector must be identical.
4296 void handleVectorReduceWithStarterIntrinsic(IntrinsicInst &I) {
4297 assert(I.arg_size() == 2);
4298
4299 IRBuilder<> IRB(&I);
4300 Value *Shadow0 = getShadow(I: &I, i: 0);
4301 Value *Shadow1 = IRB.CreateOrReduce(Src: getShadow(I: &I, i: 1));
4302 assert(Shadow0->getType() == Shadow1->getType());
4303 Value *S = IRB.CreateOr(LHS: Shadow0, RHS: Shadow1);
4304 assert(S->getType() == getShadowTy(&I));
4305 setShadow(V: &I, SV: S);
4306 setOriginForNaryOp(I);
4307 }
4308
4309 // Instrument vector.reduce.or intrinsic.
4310 // Valid (non-poisoned) set bits in the operand pull low the
4311 // corresponding shadow bits.
4312 void handleVectorReduceOrIntrinsic(IntrinsicInst &I) {
4313 assert(I.arg_size() == 1);
4314
4315 IRBuilder<> IRB(&I);
4316 Value *OperandShadow = getShadow(I: &I, i: 0);
4317 Value *OperandUnsetBits = IRB.CreateNot(V: I.getOperand(i_nocapture: 0));
4318 Value *OperandUnsetOrPoison = IRB.CreateOr(LHS: OperandUnsetBits, RHS: OperandShadow);
4319 // Bit N is clean if any field's bit N is 1 and unpoison
4320 Value *OutShadowMask = IRB.CreateAndReduce(Src: OperandUnsetOrPoison);
4321 // Otherwise, it is clean if every field's bit N is unpoison
4322 Value *OrShadow = IRB.CreateOrReduce(Src: OperandShadow);
4323 Value *S = IRB.CreateAnd(LHS: OutShadowMask, RHS: OrShadow);
4324
4325 setShadow(V: &I, SV: S);
4326 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
4327 }
4328
4329 // Instrument vector.reduce.and intrinsic.
4330 // Valid (non-poisoned) unset bits in the operand pull down the
4331 // corresponding shadow bits.
4332 void handleVectorReduceAndIntrinsic(IntrinsicInst &I) {
4333 assert(I.arg_size() == 1);
4334
4335 IRBuilder<> IRB(&I);
4336 Value *OperandShadow = getShadow(I: &I, i: 0);
4337 Value *OperandSetOrPoison = IRB.CreateOr(LHS: I.getOperand(i_nocapture: 0), RHS: OperandShadow);
4338 // Bit N is clean if any field's bit N is 0 and unpoison
4339 Value *OutShadowMask = IRB.CreateAndReduce(Src: OperandSetOrPoison);
4340 // Otherwise, it is clean if every field's bit N is unpoison
4341 Value *OrShadow = IRB.CreateOrReduce(Src: OperandShadow);
4342 Value *S = IRB.CreateAnd(LHS: OutShadowMask, RHS: OrShadow);
4343
4344 setShadow(V: &I, SV: S);
4345 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
4346 }
4347
4348 void handleStmxcsr(IntrinsicInst &I) {
4349 IRBuilder<> IRB(&I);
4350 Value *Addr = I.getArgOperand(i: 0);
4351 Type *Ty = IRB.getInt32Ty();
4352 Value *ShadowPtr =
4353 getShadowOriginPtr(Addr, IRB, ShadowTy: Ty, Alignment: Align(1), /*isStore*/ true).first;
4354
4355 IRB.CreateStore(Val: getCleanShadow(OrigTy: Ty), Ptr: ShadowPtr);
4356
4357 if (ClCheckAccessAddress)
4358 insertCheckShadowOf(Val: Addr, OrigIns: &I);
4359 }
4360
4361 void handleLdmxcsr(IntrinsicInst &I) {
4362 if (!InsertChecks)
4363 return;
4364
4365 IRBuilder<> IRB(&I);
4366 Value *Addr = I.getArgOperand(i: 0);
4367 Type *Ty = IRB.getInt32Ty();
4368 const Align Alignment = Align(1);
4369 Value *ShadowPtr, *OriginPtr;
4370 std::tie(args&: ShadowPtr, args&: OriginPtr) =
4371 getShadowOriginPtr(Addr, IRB, ShadowTy: Ty, Alignment, /*isStore*/ false);
4372
4373 if (ClCheckAccessAddress)
4374 insertCheckShadowOf(Val: Addr, OrigIns: &I);
4375
4376 Value *Shadow = IRB.CreateAlignedLoad(Ty, Ptr: ShadowPtr, Align: Alignment, Name: "_ldmxcsr");
4377 Value *Origin = MS.TrackOrigins ? IRB.CreateLoad(Ty: MS.OriginTy, Ptr: OriginPtr)
4378 : getCleanOrigin();
4379 insertCheckShadow(Shadow, Origin, OrigIns: &I);
4380 }
4381
4382 void handleMaskedExpandLoad(IntrinsicInst &I) {
4383 IRBuilder<> IRB(&I);
4384 Value *Ptr = I.getArgOperand(i: 0);
4385 MaybeAlign Align = I.getParamAlign(ArgNo: 0);
4386 Value *Mask = I.getArgOperand(i: 1);
4387 Value *PassThru = I.getArgOperand(i: 2);
4388
4389 if (ClCheckAccessAddress) {
4390 insertCheckShadowOf(Val: Ptr, OrigIns: &I);
4391 insertCheckShadowOf(Val: Mask, OrigIns: &I);
4392 }
4393
4394 if (!PropagateShadow) {
4395 setShadow(V: &I, SV: getCleanShadow(V: &I));
4396 setOrigin(V: &I, Origin: getCleanOrigin());
4397 return;
4398 }
4399
4400 Type *ShadowTy = getShadowTy(V: &I);
4401 Type *ElementShadowTy = cast<VectorType>(Val: ShadowTy)->getElementType();
4402 auto [ShadowPtr, OriginPtr] =
4403 getShadowOriginPtr(Addr: Ptr, IRB, ShadowTy: ElementShadowTy, Alignment: Align, /*isStore*/ false);
4404
4405 Value *Shadow =
4406 IRB.CreateMaskedExpandLoad(Ty: ShadowTy, Ptr: ShadowPtr, Align, Mask,
4407 PassThru: getShadow(V: PassThru), Name: "_msmaskedexpload");
4408
4409 setShadow(V: &I, SV: Shadow);
4410
4411 // TODO: Store origins.
4412 setOrigin(V: &I, Origin: getCleanOrigin());
4413 }
4414
4415 void handleMaskedCompressStore(IntrinsicInst &I) {
4416 IRBuilder<> IRB(&I);
4417 Value *Values = I.getArgOperand(i: 0);
4418 Value *Ptr = I.getArgOperand(i: 1);
4419 MaybeAlign Align = I.getParamAlign(ArgNo: 1);
4420 Value *Mask = I.getArgOperand(i: 2);
4421
4422 if (ClCheckAccessAddress) {
4423 insertCheckShadowOf(Val: Ptr, OrigIns: &I);
4424 insertCheckShadowOf(Val: Mask, OrigIns: &I);
4425 }
4426
4427 Value *Shadow = getShadow(V: Values);
4428 Type *ElementShadowTy =
4429 getShadowTy(OrigTy: cast<VectorType>(Val: Values->getType())->getElementType());
4430 auto [ShadowPtr, OriginPtrs] =
4431 getShadowOriginPtr(Addr: Ptr, IRB, ShadowTy: ElementShadowTy, Alignment: Align, /*isStore*/ true);
4432
4433 IRB.CreateMaskedCompressStore(Val: Shadow, Ptr: ShadowPtr, Align, Mask);
4434
4435 // TODO: Store origins.
4436 }
4437
4438 void handleMaskedGather(IntrinsicInst &I) {
4439 IRBuilder<> IRB(&I);
4440 Value *Ptrs = I.getArgOperand(i: 0);
4441 const Align Alignment = I.getParamAlign(ArgNo: 0).valueOrOne();
4442 Value *Mask = I.getArgOperand(i: 1);
4443 Value *PassThru = I.getArgOperand(i: 2);
4444
4445 Type *PtrsShadowTy = getShadowTy(V: Ptrs);
4446 if (ClCheckAccessAddress) {
4447 insertCheckShadowOf(Val: Mask, OrigIns: &I);
4448 Value *MaskedPtrShadow = IRB.CreateSelect(
4449 C: Mask, True: getShadow(V: Ptrs), False: Constant::getNullValue(Ty: (PtrsShadowTy)),
4450 Name: "_msmaskedptrs");
4451 insertCheckShadow(Shadow: MaskedPtrShadow, Origin: getOrigin(V: Ptrs), OrigIns: &I);
4452 }
4453
4454 if (!PropagateShadow) {
4455 setShadow(V: &I, SV: getCleanShadow(V: &I));
4456 setOrigin(V: &I, Origin: getCleanOrigin());
4457 return;
4458 }
4459
4460 Type *ShadowTy = getShadowTy(V: &I);
4461 Type *ElementShadowTy = cast<VectorType>(Val: ShadowTy)->getElementType();
4462 auto [ShadowPtrs, OriginPtrs] = getShadowOriginPtr(
4463 Addr: Ptrs, IRB, ShadowTy: ElementShadowTy, Alignment, /*isStore*/ false);
4464
4465 Value *Shadow =
4466 IRB.CreateMaskedGather(Ty: ShadowTy, Ptrs: ShadowPtrs, Alignment, Mask,
4467 PassThru: getShadow(V: PassThru), Name: "_msmaskedgather");
4468
4469 setShadow(V: &I, SV: Shadow);
4470
4471 // TODO: Store origins.
4472 setOrigin(V: &I, Origin: getCleanOrigin());
4473 }
4474
4475 void handleMaskedScatter(IntrinsicInst &I) {
4476 IRBuilder<> IRB(&I);
4477 Value *Values = I.getArgOperand(i: 0);
4478 Value *Ptrs = I.getArgOperand(i: 1);
4479 const Align Alignment = I.getParamAlign(ArgNo: 1).valueOrOne();
4480 Value *Mask = I.getArgOperand(i: 2);
4481
4482 Type *PtrsShadowTy = getShadowTy(V: Ptrs);
4483 if (ClCheckAccessAddress) {
4484 insertCheckShadowOf(Val: Mask, OrigIns: &I);
4485 Value *MaskedPtrShadow = IRB.CreateSelect(
4486 C: Mask, True: getShadow(V: Ptrs), False: Constant::getNullValue(Ty: (PtrsShadowTy)),
4487 Name: "_msmaskedptrs");
4488 insertCheckShadow(Shadow: MaskedPtrShadow, Origin: getOrigin(V: Ptrs), OrigIns: &I);
4489 }
4490
4491 Value *Shadow = getShadow(V: Values);
4492 Type *ElementShadowTy =
4493 getShadowTy(OrigTy: cast<VectorType>(Val: Values->getType())->getElementType());
4494 auto [ShadowPtrs, OriginPtrs] = getShadowOriginPtr(
4495 Addr: Ptrs, IRB, ShadowTy: ElementShadowTy, Alignment, /*isStore*/ true);
4496
4497 IRB.CreateMaskedScatter(Val: Shadow, Ptrs: ShadowPtrs, Alignment, Mask);
4498
4499 // TODO: Store origin.
4500 }
4501
4502 // Intrinsic::masked_store
4503 //
4504 // Note: handleAVXMaskedStore handles AVX/AVX2 variants, though AVX512 masked
4505 // stores are lowered to Intrinsic::masked_store.
4506 void handleMaskedStore(IntrinsicInst &I) {
4507 IRBuilder<> IRB(&I);
4508 Value *V = I.getArgOperand(i: 0);
4509 Value *Ptr = I.getArgOperand(i: 1);
4510 const Align Alignment = I.getParamAlign(ArgNo: 1).valueOrOne();
4511 Value *Mask = I.getArgOperand(i: 2);
4512 Value *Shadow = getShadow(V);
4513
4514 if (ClCheckAccessAddress) {
4515 insertCheckShadowOf(Val: Ptr, OrigIns: &I);
4516 insertCheckShadowOf(Val: Mask, OrigIns: &I);
4517 }
4518
4519 Value *ShadowPtr;
4520 Value *OriginPtr;
4521 std::tie(args&: ShadowPtr, args&: OriginPtr) = getShadowOriginPtr(
4522 Addr: Ptr, IRB, ShadowTy: Shadow->getType(), Alignment, /*isStore*/ true);
4523
4524 IRB.CreateMaskedStore(Val: Shadow, Ptr: ShadowPtr, Alignment, Mask);
4525
4526 if (!MS.TrackOrigins)
4527 return;
4528
4529 auto &DL = F.getDataLayout();
4530 paintOrigin(IRB, Origin: getOrigin(V), OriginPtr,
4531 TS: DL.getTypeStoreSize(Ty: Shadow->getType()),
4532 Alignment: std::max(a: Alignment, b: kMinOriginAlignment));
4533 }
4534
4535 // Intrinsic::masked_load
4536 //
4537 // Note: handleAVXMaskedLoad handles AVX/AVX2 variants, though AVX512 masked
4538 // loads are lowered to Intrinsic::masked_load.
4539 void handleMaskedLoad(IntrinsicInst &I) {
4540 IRBuilder<> IRB(&I);
4541 Value *Ptr = I.getArgOperand(i: 0);
4542 const Align Alignment = I.getParamAlign(ArgNo: 0).valueOrOne();
4543 Value *Mask = I.getArgOperand(i: 1);
4544 Value *PassThru = I.getArgOperand(i: 2);
4545
4546 if (ClCheckAccessAddress) {
4547 insertCheckShadowOf(Val: Ptr, OrigIns: &I);
4548 insertCheckShadowOf(Val: Mask, OrigIns: &I);
4549 }
4550
4551 if (!PropagateShadow) {
4552 setShadow(V: &I, SV: getCleanShadow(V: &I));
4553 setOrigin(V: &I, Origin: getCleanOrigin());
4554 return;
4555 }
4556
4557 Type *ShadowTy = getShadowTy(V: &I);
4558 Value *ShadowPtr, *OriginPtr;
4559 std::tie(args&: ShadowPtr, args&: OriginPtr) =
4560 getShadowOriginPtr(Addr: Ptr, IRB, ShadowTy, Alignment, /*isStore*/ false);
4561 setShadow(V: &I, SV: IRB.CreateMaskedLoad(Ty: ShadowTy, Ptr: ShadowPtr, Alignment, Mask,
4562 PassThru: getShadow(V: PassThru), Name: "_msmaskedld"));
4563
4564 if (!MS.TrackOrigins)
4565 return;
4566
4567 // Choose between PassThru's and the loaded value's origins.
4568 Value *MaskedPassThruShadow = IRB.CreateAnd(
4569 LHS: getShadow(V: PassThru), RHS: IRB.CreateSExt(V: IRB.CreateNeg(V: Mask), DestTy: ShadowTy));
4570
4571 Value *NotNull = convertToBool(V: MaskedPassThruShadow, IRB, name: "_mscmp");
4572
4573 Value *PtrOrigin = IRB.CreateLoad(Ty: MS.OriginTy, Ptr: OriginPtr);
4574 Value *Origin = IRB.CreateSelect(C: NotNull, True: getOrigin(V: PassThru), False: PtrOrigin);
4575
4576 setOrigin(V: &I, Origin);
4577 }
4578
4579 // e.g., void @llvm.x86.avx.maskstore.ps.256(ptr, <8 x i32>, <8 x float>)
4580 // dst mask src
4581 //
4582 // AVX512 masked stores are lowered to Intrinsic::masked_load and are handled
4583 // by handleMaskedStore.
4584 //
4585 // This function handles AVX and AVX2 masked stores; these use the MSBs of a
4586 // vector of integers, unlike the LLVM masked intrinsics, which require a
4587 // vector of booleans. X86InstCombineIntrinsic.cpp::simplifyX86MaskedLoad
4588 // mentions that the x86 backend does not know how to efficiently convert
4589 // from a vector of booleans back into the AVX mask format; therefore, they
4590 // (and we) do not reduce AVX/AVX2 masked intrinsics into LLVM masked
4591 // intrinsics.
4592 void handleAVXMaskedStore(IntrinsicInst &I) {
4593 assert(I.arg_size() == 3);
4594
4595 IRBuilder<> IRB(&I);
4596
4597 Value *Dst = I.getArgOperand(i: 0);
4598 assert(Dst->getType()->isPointerTy() && "Destination is not a pointer!");
4599
4600 Value *Mask = I.getArgOperand(i: 1);
4601 assert(isa<VectorType>(Mask->getType()) && "Mask is not a vector!");
4602
4603 Value *Src = I.getArgOperand(i: 2);
4604 assert(isa<VectorType>(Src->getType()) && "Source is not a vector!");
4605
4606 const Align Alignment = Align(1);
4607
4608 Value *SrcShadow = getShadow(V: Src);
4609
4610 if (ClCheckAccessAddress) {
4611 insertCheckShadowOf(Val: Dst, OrigIns: &I);
4612 insertCheckShadowOf(Val: Mask, OrigIns: &I);
4613 }
4614
4615 Value *DstShadowPtr;
4616 Value *DstOriginPtr;
4617 std::tie(args&: DstShadowPtr, args&: DstOriginPtr) = getShadowOriginPtr(
4618 Addr: Dst, IRB, ShadowTy: SrcShadow->getType(), Alignment, /*isStore*/ true);
4619
4620 SmallVector<Value *, 2> ShadowArgs;
4621 ShadowArgs.append(NumInputs: 1, Elt: DstShadowPtr);
4622 ShadowArgs.append(NumInputs: 1, Elt: Mask);
4623 // The intrinsic may require floating-point but shadows can be arbitrary
4624 // bit patterns, of which some would be interpreted as "invalid"
4625 // floating-point values (NaN etc.); we assume the intrinsic will happily
4626 // copy them.
4627 ShadowArgs.append(NumInputs: 1, Elt: IRB.CreateBitCast(V: SrcShadow, DestTy: Src->getType()));
4628
4629 CallInst *CI =
4630 IRB.CreateIntrinsic(RetTy: IRB.getVoidTy(), ID: I.getIntrinsicID(), Args: ShadowArgs);
4631 setShadow(V: &I, SV: CI);
4632
4633 if (!MS.TrackOrigins)
4634 return;
4635
4636 // Approximation only
4637 auto &DL = F.getDataLayout();
4638 paintOrigin(IRB, Origin: getOrigin(V: Src), OriginPtr: DstOriginPtr,
4639 TS: DL.getTypeStoreSize(Ty: SrcShadow->getType()),
4640 Alignment: std::max(a: Alignment, b: kMinOriginAlignment));
4641 }
4642
4643 // e.g., <8 x float> @llvm.x86.avx.maskload.ps.256(ptr, <8 x i32>)
4644 // return src mask
4645 //
4646 // Masked-off values are replaced with 0, which conveniently also represents
4647 // initialized memory.
4648 //
4649 // AVX512 masked stores are lowered to Intrinsic::masked_load and are handled
4650 // by handleMaskedStore.
4651 //
4652 // We do not combine this with handleMaskedLoad; see comment in
4653 // handleAVXMaskedStore for the rationale.
4654 //
4655 // This is subtly different than handleIntrinsicByApplyingToShadow(I, 1)
4656 // because we need to apply getShadowOriginPtr, not getShadow, to the first
4657 // parameter.
4658 void handleAVXMaskedLoad(IntrinsicInst &I) {
4659 assert(I.arg_size() == 2);
4660
4661 IRBuilder<> IRB(&I);
4662
4663 Value *Src = I.getArgOperand(i: 0);
4664 assert(Src->getType()->isPointerTy() && "Source is not a pointer!");
4665
4666 Value *Mask = I.getArgOperand(i: 1);
4667 assert(isa<VectorType>(Mask->getType()) && "Mask is not a vector!");
4668
4669 const Align Alignment = Align(1);
4670
4671 if (ClCheckAccessAddress) {
4672 insertCheckShadowOf(Val: Mask, OrigIns: &I);
4673 }
4674
4675 Type *SrcShadowTy = getShadowTy(V: Src);
4676 Value *SrcShadowPtr, *SrcOriginPtr;
4677 std::tie(args&: SrcShadowPtr, args&: SrcOriginPtr) =
4678 getShadowOriginPtr(Addr: Src, IRB, ShadowTy: SrcShadowTy, Alignment, /*isStore*/ false);
4679
4680 SmallVector<Value *, 2> ShadowArgs;
4681 ShadowArgs.append(NumInputs: 1, Elt: SrcShadowPtr);
4682 ShadowArgs.append(NumInputs: 1, Elt: Mask);
4683
4684 CallInst *CI =
4685 IRB.CreateIntrinsic(RetTy: I.getType(), ID: I.getIntrinsicID(), Args: ShadowArgs);
4686 // The AVX masked load intrinsics do not have integer variants. We use the
4687 // floating-point variants, which will happily copy the shadows even if
4688 // they are interpreted as "invalid" floating-point values (NaN etc.).
4689 setShadow(V: &I, SV: IRB.CreateBitCast(V: CI, DestTy: getShadowTy(V: &I)));
4690
4691 if (!MS.TrackOrigins)
4692 return;
4693
4694 // The "pass-through" value is always zero (initialized). To the extent
4695 // that that results in initialized aligned 4-byte chunks, the origin value
4696 // is ignored. It is therefore correct to simply copy the origin from src.
4697 Value *PtrSrcOrigin = IRB.CreateLoad(Ty: MS.OriginTy, Ptr: SrcOriginPtr);
4698 setOrigin(V: &I, Origin: PtrSrcOrigin);
4699 }
4700
4701 // Test whether the mask indices are initialized, only checking the bits that
4702 // are actually used.
4703 //
4704 // e.g., if Idx is <32 x i16>, only (log2(32) == 5) bits of each index are
4705 // used/checked.
4706 void maskedCheckAVXIndexShadow(IRBuilder<> &IRB, Value *Idx, Instruction *I) {
4707 assert(isFixedIntVector(Idx));
4708 auto IdxVectorSize =
4709 cast<FixedVectorType>(Val: Idx->getType())->getNumElements();
4710 assert(isPowerOf2_64(IdxVectorSize));
4711
4712 // Compiler isn't smart enough, let's help it
4713 if (isa<Constant>(Val: Idx))
4714 return;
4715
4716 auto *IdxShadow = getShadow(V: Idx);
4717 Value *Truncated = IRB.CreateTrunc(
4718 V: IdxShadow,
4719 DestTy: FixedVectorType::get(ElementType: Type::getIntNTy(C&: *MS.C, N: Log2_64(Value: IdxVectorSize)),
4720 NumElts: IdxVectorSize));
4721 insertCheckShadow(Shadow: Truncated, Origin: getOrigin(V: Idx), OrigIns: I);
4722 }
4723
4724 // Instrument AVX permutation intrinsic.
4725 // We apply the same permutation (argument index 1) to the shadow.
4726 void handleAVXVpermilvar(IntrinsicInst &I) {
4727 IRBuilder<> IRB(&I);
4728 Value *Shadow = getShadow(I: &I, i: 0);
4729 maskedCheckAVXIndexShadow(IRB, Idx: I.getArgOperand(i: 1), I: &I);
4730
4731 // Shadows are integer-ish types but some intrinsics require a
4732 // different (e.g., floating-point) type.
4733 Shadow = IRB.CreateBitCast(V: Shadow, DestTy: I.getArgOperand(i: 0)->getType());
4734 CallInst *CI = IRB.CreateIntrinsic(RetTy: I.getType(), ID: I.getIntrinsicID(),
4735 Args: {Shadow, I.getArgOperand(i: 1)});
4736
4737 setShadow(V: &I, SV: IRB.CreateBitCast(V: CI, DestTy: getShadowTy(V: &I)));
4738 setOriginForNaryOp(I);
4739 }
4740
4741 // Instrument AVX permutation intrinsic.
4742 // We apply the same permutation (argument index 1) to the shadows.
4743 void handleAVXVpermi2var(IntrinsicInst &I) {
4744 assert(I.arg_size() == 3);
4745 assert(isa<FixedVectorType>(I.getArgOperand(0)->getType()));
4746 assert(isa<FixedVectorType>(I.getArgOperand(1)->getType()));
4747 assert(isa<FixedVectorType>(I.getArgOperand(2)->getType()));
4748 [[maybe_unused]] auto ArgVectorSize =
4749 cast<FixedVectorType>(Val: I.getArgOperand(i: 0)->getType())->getNumElements();
4750 assert(cast<FixedVectorType>(I.getArgOperand(1)->getType())
4751 ->getNumElements() == ArgVectorSize);
4752 assert(cast<FixedVectorType>(I.getArgOperand(2)->getType())
4753 ->getNumElements() == ArgVectorSize);
4754 assert(I.getArgOperand(0)->getType() == I.getArgOperand(2)->getType());
4755 assert(I.getType() == I.getArgOperand(0)->getType());
4756 assert(I.getArgOperand(1)->getType()->isIntOrIntVectorTy());
4757 IRBuilder<> IRB(&I);
4758 Value *AShadow = getShadow(I: &I, i: 0);
4759 Value *Idx = I.getArgOperand(i: 1);
4760 Value *BShadow = getShadow(I: &I, i: 2);
4761
4762 maskedCheckAVXIndexShadow(IRB, Idx, I: &I);
4763
4764 // Shadows are integer-ish types but some intrinsics require a
4765 // different (e.g., floating-point) type.
4766 AShadow = IRB.CreateBitCast(V: AShadow, DestTy: I.getArgOperand(i: 0)->getType());
4767 BShadow = IRB.CreateBitCast(V: BShadow, DestTy: I.getArgOperand(i: 2)->getType());
4768 CallInst *CI = IRB.CreateIntrinsic(RetTy: I.getType(), ID: I.getIntrinsicID(),
4769 Args: {AShadow, Idx, BShadow});
4770 setShadow(V: &I, SV: IRB.CreateBitCast(V: CI, DestTy: getShadowTy(V: &I)));
4771 setOriginForNaryOp(I);
4772 }
4773
4774 [[maybe_unused]] static bool isFixedIntVectorTy(const Type *T) {
4775 return isa<FixedVectorType>(Val: T) && T->isIntOrIntVectorTy();
4776 }
4777
4778 [[maybe_unused]] static bool isFixedFPVectorTy(const Type *T) {
4779 return isa<FixedVectorType>(Val: T) && T->isFPOrFPVectorTy();
4780 }
4781
4782 [[maybe_unused]] static bool isFixedIntVector(const Value *V) {
4783 return isFixedIntVectorTy(T: V->getType());
4784 }
4785
4786 [[maybe_unused]] static bool isFixedFPVector(const Value *V) {
4787 return isFixedFPVectorTy(T: V->getType());
4788 }
4789
4790 // e.g., <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
4791 // (<16 x float> a, <16 x i32> writethru, i16 mask,
4792 // i32 rounding)
4793 //
4794 // Inconveniently, some similar intrinsics have a different operand order:
4795 // <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512
4796 // (<16 x float> a, i32 rounding, <16 x i16> writethru,
4797 // i16 mask)
4798 //
4799 // If the return type has more elements than A, the excess elements are
4800 // zeroed (and the corresponding shadow is initialized).
4801 // <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128
4802 // (<4 x float> a, i32 rounding, <8 x i16> writethru,
4803 // i8 mask)
4804 //
4805 // dst[i] = mask[i] ? convert(a[i]) : writethru[i]
4806 // dst_shadow[i] = mask[i] ? all_or_nothing(a_shadow[i]) : writethru_shadow[i]
4807 // where all_or_nothing(x) is fully uninitialized if x has any
4808 // uninitialized bits
4809 void handleAVX512VectorConvertFPToInt(IntrinsicInst &I, bool LastMask) {
4810 IRBuilder<> IRB(&I);
4811
4812 assert(I.arg_size() == 4);
4813 Value *A = I.getOperand(i_nocapture: 0);
4814 Value *WriteThrough;
4815 Value *Mask;
4816 Value *RoundingMode;
4817 if (LastMask) {
4818 WriteThrough = I.getOperand(i_nocapture: 2);
4819 Mask = I.getOperand(i_nocapture: 3);
4820 RoundingMode = I.getOperand(i_nocapture: 1);
4821 } else {
4822 WriteThrough = I.getOperand(i_nocapture: 1);
4823 Mask = I.getOperand(i_nocapture: 2);
4824 RoundingMode = I.getOperand(i_nocapture: 3);
4825 }
4826
4827 assert(isFixedFPVector(A));
4828 assert(isFixedIntVector(WriteThrough));
4829
4830 unsigned ANumElements =
4831 cast<FixedVectorType>(Val: A->getType())->getNumElements();
4832 [[maybe_unused]] unsigned WriteThruNumElements =
4833 cast<FixedVectorType>(Val: WriteThrough->getType())->getNumElements();
4834 assert(ANumElements == WriteThruNumElements ||
4835 ANumElements * 2 == WriteThruNumElements);
4836
4837 assert(Mask->getType()->isIntegerTy());
4838 unsigned MaskNumElements = Mask->getType()->getScalarSizeInBits();
4839 assert(ANumElements == MaskNumElements ||
4840 ANumElements * 2 == MaskNumElements);
4841
4842 assert(WriteThruNumElements == MaskNumElements);
4843
4844 // Some bits of the mask may be unused, though it's unusual to have partly
4845 // uninitialized bits.
4846 insertCheckShadowOf(Val: Mask, OrigIns: &I);
4847
4848 assert(RoundingMode->getType()->isIntegerTy());
4849 // Only some bits of the rounding mode are used, though it's very
4850 // unusual to have uninitialized bits there (more commonly, it's a
4851 // constant).
4852 insertCheckShadowOf(Val: RoundingMode, OrigIns: &I);
4853
4854 assert(I.getType() == WriteThrough->getType());
4855
4856 Value *AShadow = getShadow(V: A);
4857 AShadow = maybeExtendVectorShadowWithZeros(Shadow: AShadow, I);
4858
4859 if (ANumElements * 2 == MaskNumElements) {
4860 // Ensure that the irrelevant bits of the mask are zero, hence selecting
4861 // from the zeroed shadow instead of the writethrough's shadow.
4862 Mask =
4863 IRB.CreateTrunc(V: Mask, DestTy: IRB.getIntNTy(N: ANumElements), Name: "_ms_mask_trunc");
4864 Mask =
4865 IRB.CreateZExt(V: Mask, DestTy: IRB.getIntNTy(N: MaskNumElements), Name: "_ms_mask_zext");
4866 }
4867
4868 // Convert i16 mask to <16 x i1>
4869 Mask = IRB.CreateBitCast(
4870 V: Mask, DestTy: FixedVectorType::get(ElementType: IRB.getInt1Ty(), NumElts: MaskNumElements),
4871 Name: "_ms_mask_bitcast");
4872
4873 /// For floating-point to integer conversion, the output is:
4874 /// - fully uninitialized if *any* bit of the input is uninitialized
4875 /// - fully ininitialized if all bits of the input are ininitialized
4876 /// We apply the same principle on a per-element basis for vectors.
4877 ///
4878 /// We use the scalar width of the return type instead of A's.
4879 AShadow = IRB.CreateSExt(
4880 V: IRB.CreateICmpNE(LHS: AShadow, RHS: getCleanShadow(OrigTy: AShadow->getType())),
4881 DestTy: getShadowTy(V: &I), Name: "_ms_a_shadow");
4882
4883 Value *WriteThroughShadow = getShadow(V: WriteThrough);
4884 Value *Shadow = IRB.CreateSelect(C: Mask, True: AShadow, False: WriteThroughShadow,
4885 Name: "_ms_writethru_select");
4886
4887 setShadow(V: &I, SV: Shadow);
4888 setOriginForNaryOp(I);
4889 }
4890
4891 // Instrument BMI / BMI2 intrinsics.
4892 // All of these intrinsics are Z = I(X, Y)
4893 // where the types of all operands and the result match, and are either i32 or
4894 // i64. The following instrumentation happens to work for all of them:
4895 // Sz = I(Sx, Y) | (sext (Sy != 0))
4896 void handleBmiIntrinsic(IntrinsicInst &I) {
4897 IRBuilder<> IRB(&I);
4898 Type *ShadowTy = getShadowTy(V: &I);
4899
4900 // If any bit of the mask operand is poisoned, then the whole thing is.
4901 Value *SMask = getShadow(I: &I, i: 1);
4902 SMask = IRB.CreateSExt(V: IRB.CreateICmpNE(LHS: SMask, RHS: getCleanShadow(OrigTy: ShadowTy)),
4903 DestTy: ShadowTy);
4904 // Apply the same intrinsic to the shadow of the first operand.
4905 Value *S = IRB.CreateCall(Callee: I.getCalledFunction(),
4906 Args: {getShadow(I: &I, i: 0), I.getOperand(i_nocapture: 1)});
4907 S = IRB.CreateOr(LHS: SMask, RHS: S);
4908 setShadow(V: &I, SV: S);
4909 setOriginForNaryOp(I);
4910 }
4911
4912 static SmallVector<int, 8> getPclmulMask(unsigned Width, bool OddElements) {
4913 SmallVector<int, 8> Mask;
4914 for (unsigned X = OddElements ? 1 : 0; X < Width; X += 2) {
4915 Mask.append(NumInputs: 2, Elt: X);
4916 }
4917 return Mask;
4918 }
4919
4920 // Instrument pclmul intrinsics.
4921 // These intrinsics operate either on odd or on even elements of the input
4922 // vectors, depending on the constant in the 3rd argument, ignoring the rest.
4923 // Replace the unused elements with copies of the used ones, ex:
4924 // (0, 1, 2, 3) -> (0, 0, 2, 2) (even case)
4925 // or
4926 // (0, 1, 2, 3) -> (1, 1, 3, 3) (odd case)
4927 // and then apply the usual shadow combining logic.
4928 void handlePclmulIntrinsic(IntrinsicInst &I) {
4929 IRBuilder<> IRB(&I);
4930 unsigned Width =
4931 cast<FixedVectorType>(Val: I.getArgOperand(i: 0)->getType())->getNumElements();
4932 assert(isa<ConstantInt>(I.getArgOperand(2)) &&
4933 "pclmul 3rd operand must be a constant");
4934 unsigned Imm = cast<ConstantInt>(Val: I.getArgOperand(i: 2))->getZExtValue();
4935 Value *Shuf0 = IRB.CreateShuffleVector(V: getShadow(I: &I, i: 0),
4936 Mask: getPclmulMask(Width, OddElements: Imm & 0x01));
4937 Value *Shuf1 = IRB.CreateShuffleVector(V: getShadow(I: &I, i: 1),
4938 Mask: getPclmulMask(Width, OddElements: Imm & 0x10));
4939 ShadowAndOriginCombiner SOC(this, IRB);
4940 SOC.Add(OpShadow: Shuf0, OpOrigin: getOrigin(I: &I, i: 0));
4941 SOC.Add(OpShadow: Shuf1, OpOrigin: getOrigin(I: &I, i: 1));
4942 SOC.Done(I: &I);
4943 }
4944
4945 // Instrument _mm_*_sd|ss intrinsics
4946 void handleUnarySdSsIntrinsic(IntrinsicInst &I) {
4947 IRBuilder<> IRB(&I);
4948 unsigned Width =
4949 cast<FixedVectorType>(Val: I.getArgOperand(i: 0)->getType())->getNumElements();
4950 Value *First = getShadow(I: &I, i: 0);
4951 Value *Second = getShadow(I: &I, i: 1);
4952 // First element of second operand, remaining elements of first operand
4953 SmallVector<int, 16> Mask;
4954 Mask.push_back(Elt: Width);
4955 for (unsigned i = 1; i < Width; i++)
4956 Mask.push_back(Elt: i);
4957 Value *Shadow = IRB.CreateShuffleVector(V1: First, V2: Second, Mask);
4958
4959 setShadow(V: &I, SV: Shadow);
4960 setOriginForNaryOp(I);
4961 }
4962
4963 void handleVtestIntrinsic(IntrinsicInst &I) {
4964 IRBuilder<> IRB(&I);
4965 Value *Shadow0 = getShadow(I: &I, i: 0);
4966 Value *Shadow1 = getShadow(I: &I, i: 1);
4967 Value *Or = IRB.CreateOr(LHS: Shadow0, RHS: Shadow1);
4968 Value *NZ = IRB.CreateICmpNE(LHS: Or, RHS: Constant::getNullValue(Ty: Or->getType()));
4969 Value *Scalar = convertShadowToScalar(V: NZ, IRB);
4970 Value *Shadow = IRB.CreateZExt(V: Scalar, DestTy: getShadowTy(V: &I));
4971
4972 setShadow(V: &I, SV: Shadow);
4973 setOriginForNaryOp(I);
4974 }
4975
4976 void handleBinarySdSsIntrinsic(IntrinsicInst &I) {
4977 IRBuilder<> IRB(&I);
4978 unsigned Width =
4979 cast<FixedVectorType>(Val: I.getArgOperand(i: 0)->getType())->getNumElements();
4980 Value *First = getShadow(I: &I, i: 0);
4981 Value *Second = getShadow(I: &I, i: 1);
4982 Value *OrShadow = IRB.CreateOr(LHS: First, RHS: Second);
4983 // First element of both OR'd together, remaining elements of first operand
4984 SmallVector<int, 16> Mask;
4985 Mask.push_back(Elt: Width);
4986 for (unsigned i = 1; i < Width; i++)
4987 Mask.push_back(Elt: i);
4988 Value *Shadow = IRB.CreateShuffleVector(V1: First, V2: OrShadow, Mask);
4989
4990 setShadow(V: &I, SV: Shadow);
4991 setOriginForNaryOp(I);
4992 }
4993
4994 // _mm_round_ps / _mm_round_ps.
4995 // Similar to maybeHandleSimpleNomemIntrinsic except
4996 // the second argument is guaranteed to be a constant integer.
4997 void handleRoundPdPsIntrinsic(IntrinsicInst &I) {
4998 assert(I.getArgOperand(0)->getType() == I.getType());
4999 assert(I.arg_size() == 2);
5000 assert(isa<ConstantInt>(I.getArgOperand(1)));
5001
5002 IRBuilder<> IRB(&I);
5003 ShadowAndOriginCombiner SC(this, IRB);
5004 SC.Add(V: I.getArgOperand(i: 0));
5005 SC.Done(I: &I);
5006 }
5007
5008 // Instrument @llvm.abs intrinsic.
5009 //
5010 // e.g., i32 @llvm.abs.i32 (i32 <Src>, i1 <is_int_min_poison>)
5011 // <4 x i32> @llvm.abs.v4i32(<4 x i32> <Src>, i1 <is_int_min_poison>)
5012 void handleAbsIntrinsic(IntrinsicInst &I) {
5013 assert(I.arg_size() == 2);
5014 Value *Src = I.getArgOperand(i: 0);
5015 Value *IsIntMinPoison = I.getArgOperand(i: 1);
5016
5017 assert(I.getType()->isIntOrIntVectorTy());
5018
5019 assert(Src->getType() == I.getType());
5020
5021 assert(IsIntMinPoison->getType()->isIntegerTy());
5022 assert(IsIntMinPoison->getType()->getIntegerBitWidth() == 1);
5023
5024 IRBuilder<> IRB(&I);
5025 Value *SrcShadow = getShadow(V: Src);
5026
5027 APInt MinVal =
5028 APInt::getSignedMinValue(numBits: Src->getType()->getScalarSizeInBits());
5029 Value *MinValVec = ConstantInt::get(Ty: Src->getType(), V: MinVal);
5030 Value *SrcIsMin = IRB.CreateICmp(P: CmpInst::ICMP_EQ, LHS: Src, RHS: MinValVec);
5031
5032 Value *PoisonedShadow = getPoisonedShadow(V: Src);
5033 Value *PoisonedIfIntMinShadow =
5034 IRB.CreateSelect(C: SrcIsMin, True: PoisonedShadow, False: SrcShadow);
5035 Value *Shadow =
5036 IRB.CreateSelect(C: IsIntMinPoison, True: PoisonedIfIntMinShadow, False: SrcShadow);
5037
5038 setShadow(V: &I, SV: Shadow);
5039 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
5040 }
5041
5042 void handleIsFpClass(IntrinsicInst &I) {
5043 IRBuilder<> IRB(&I);
5044 Value *Shadow = getShadow(I: &I, i: 0);
5045 setShadow(V: &I, SV: IRB.CreateICmpNE(LHS: Shadow, RHS: getCleanShadow(V: Shadow)));
5046 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
5047 }
5048
5049 void handleArithmeticWithOverflow(IntrinsicInst &I) {
5050 IRBuilder<> IRB(&I);
5051 Value *Shadow0 = getShadow(I: &I, i: 0);
5052 Value *Shadow1 = getShadow(I: &I, i: 1);
5053 Value *ShadowElt0 = IRB.CreateOr(LHS: Shadow0, RHS: Shadow1);
5054 Value *ShadowElt1 =
5055 IRB.CreateICmpNE(LHS: ShadowElt0, RHS: getCleanShadow(V: ShadowElt0));
5056
5057 Value *Shadow = PoisonValue::get(T: getShadowTy(V: &I));
5058 Shadow = IRB.CreateInsertValue(Agg: Shadow, Val: ShadowElt0, Idxs: 0);
5059 Shadow = IRB.CreateInsertValue(Agg: Shadow, Val: ShadowElt1, Idxs: 1);
5060
5061 setShadow(V: &I, SV: Shadow);
5062 setOriginForNaryOp(I);
5063 }
5064
5065 Value *extractLowerShadow(IRBuilder<> &IRB, Value *V) {
5066 assert(isa<FixedVectorType>(V->getType()));
5067 assert(cast<FixedVectorType>(V->getType())->getNumElements() > 0);
5068 Value *Shadow = getShadow(V);
5069 return IRB.CreateExtractElement(Vec: Shadow,
5070 Idx: ConstantInt::get(Ty: IRB.getInt32Ty(), V: 0));
5071 }
5072
5073 // Handle llvm.x86.avx512.mask.pmov{,s,us}.*.{128,256,512}
5074 //
5075 // e.g., call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512
5076 // (<8 x i64>, <16 x i8>, i8)
5077 // A WriteThru Mask
5078 //
5079 // call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512
5080 // (<16 x i32>, <16 x i8>, i16)
5081 //
5082 // Dst[i] = Mask[i] ? truncate_or_saturate(A[i]) : WriteThru[i]
5083 // Dst_shadow[i] = Mask[i] ? truncate(A_shadow[i]) : WriteThru_shadow[i]
5084 //
5085 // If Dst has more elements than A, the excess elements are zeroed (and the
5086 // corresponding shadow is initialized).
5087 //
5088 // Note: for PMOV (truncation), handleIntrinsicByApplyingToShadow is precise
5089 // and is much faster than this handler.
5090 void handleAVX512VectorDownConvert(IntrinsicInst &I) {
5091 IRBuilder<> IRB(&I);
5092
5093 assert(I.arg_size() == 3);
5094 Value *A = I.getOperand(i_nocapture: 0);
5095 Value *WriteThrough = I.getOperand(i_nocapture: 1);
5096 Value *Mask = I.getOperand(i_nocapture: 2);
5097
5098 assert(isFixedIntVector(A));
5099 assert(isFixedIntVector(WriteThrough));
5100
5101 unsigned ANumElements =
5102 cast<FixedVectorType>(Val: A->getType())->getNumElements();
5103 unsigned OutputNumElements =
5104 cast<FixedVectorType>(Val: WriteThrough->getType())->getNumElements();
5105 assert(ANumElements == OutputNumElements ||
5106 ANumElements * 2 == OutputNumElements);
5107 // N.B. some PMOV{,S,US} instructions have a 4x or even 8x ratio in the
5108 // number of elements e.g.,
5109 // <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256
5110 // (<4 x i64>, <16 x i8>, i8)
5111 // <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128
5112 // (<2 x i64>, <16 x i8>, i8)
5113 // However, we currently handle those elsewhere.
5114
5115 assert(Mask->getType()->isIntegerTy());
5116 insertCheckShadowOf(Val: Mask, OrigIns: &I);
5117
5118 // The mask has 1 bit per element of A, but a minimum of 8 bits.
5119 if (Mask->getType()->getScalarSizeInBits() == 8 && OutputNumElements < 8)
5120 Mask = IRB.CreateTrunc(V: Mask, DestTy: Type::getIntNTy(C&: *MS.C, N: OutputNumElements));
5121 assert(Mask->getType()->getScalarSizeInBits() == ANumElements);
5122
5123 assert(I.getType() == WriteThrough->getType());
5124
5125 // Widen the mask, if necessary, to have one bit per element of the output
5126 // vector.
5127 // We want the extra bits to have '1's, so that the CreateSelect will
5128 // select the values from AShadow instead of WriteThroughShadow ("maskless"
5129 // versions of the intrinsics are sometimes implemented using an all-1's
5130 // mask and an undefined value for WriteThroughShadow). We accomplish this
5131 // by using bitwise NOT before and after the ZExt.
5132 if (ANumElements != OutputNumElements) {
5133 Mask = IRB.CreateNot(V: Mask);
5134 Mask = IRB.CreateZExt(V: Mask, DestTy: Type::getIntNTy(C&: *MS.C, N: OutputNumElements),
5135 Name: "_ms_widen_mask");
5136 Mask = IRB.CreateNot(V: Mask);
5137 }
5138 Mask = IRB.CreateBitCast(
5139 V: Mask, DestTy: FixedVectorType::get(ElementType: IRB.getInt1Ty(), NumElts: OutputNumElements));
5140
5141 Value *AShadow = getShadow(V: A);
5142
5143 // The return type might have more elements than the input.
5144 // Temporarily shrink the return type's number of elements.
5145 VectorType *ShadowType = maybeShrinkVectorShadowType(Src: A, I);
5146
5147 // PMOV truncates; PMOVS/PMOVUS uses signed/unsigned saturation.
5148 // This handler treats them all as truncation, which leads to some rare
5149 // false positives in the cases where the truncated bytes could
5150 // unambiguously saturate the value e.g., if A = ??????10 ????????
5151 // (big-endian), the unsigned saturated byte conversion is 11111111 i.e.,
5152 // fully defined, but the truncated byte is ????????.
5153 //
5154 // TODO: use GetMinMaxUnsigned() to handle saturation precisely.
5155 AShadow = IRB.CreateTrunc(V: AShadow, DestTy: ShadowType, Name: "_ms_trunc_shadow");
5156 AShadow = maybeExtendVectorShadowWithZeros(Shadow: AShadow, I);
5157
5158 Value *WriteThroughShadow = getShadow(V: WriteThrough);
5159
5160 Value *Shadow = IRB.CreateSelect(C: Mask, True: AShadow, False: WriteThroughShadow);
5161 setShadow(V: &I, SV: Shadow);
5162 setOriginForNaryOp(I);
5163 }
5164
5165 // Handle llvm.x86.avx512.* instructions that take vector(s) of floating-point
5166 // values and perform an operation whose shadow propagation should be handled
5167 // as all-or-nothing [*], with masking provided by a vector and a mask
5168 // supplied as an integer.
5169 //
5170 // [*] if all bits of a vector element are initialized, the output is fully
5171 // initialized; otherwise, the output is fully uninitialized
5172 //
5173 // e.g., <16 x float> @llvm.x86.avx512.rsqrt14.ps.512
5174 // (<16 x float>, <16 x float>, i16)
5175 // A WriteThru Mask
5176 //
5177 // <2 x double> @llvm.x86.avx512.rcp14.pd.128
5178 // (<2 x double>, <2 x double>, i8)
5179 // A WriteThru Mask
5180 //
5181 // <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512
5182 // (<8 x double>, i32, <8 x double>, i8, i32)
5183 // A Imm WriteThru Mask Rounding
5184 //
5185 // <16 x float> @llvm.x86.avx512.mask.scalef.ps.512
5186 // (<16 x float>, <16 x float>, <16 x float>, i16, i32)
5187 // WriteThru A B Mask Rnd
5188 //
5189 // All operands other than A, B, ..., and WriteThru (e.g., Mask, Imm,
5190 // Rounding) must be fully initialized.
5191 //
5192 // Dst[i] = Mask[i] ? some_op(A[i], B[i], ...)
5193 // : WriteThru[i]
5194 // Dst_shadow[i] = Mask[i] ? all_or_nothing(A_shadow[i] | B_shadow[i] | ...)
5195 // : WriteThru_shadow[i]
5196 void handleAVX512VectorGenericMaskedFP(IntrinsicInst &I,
5197 SmallVector<unsigned, 4> DataIndices,
5198 unsigned WriteThruIndex,
5199 unsigned MaskIndex) {
5200 IRBuilder<> IRB(&I);
5201
5202 unsigned NumArgs = I.arg_size();
5203
5204 assert(WriteThruIndex < NumArgs);
5205 assert(MaskIndex < NumArgs);
5206 assert(WriteThruIndex != MaskIndex);
5207 Value *WriteThru = I.getOperand(i_nocapture: WriteThruIndex);
5208
5209 unsigned OutputNumElements =
5210 cast<FixedVectorType>(Val: WriteThru->getType())->getNumElements();
5211
5212 assert(DataIndices.size() > 0);
5213
5214 bool isData[16] = {false};
5215 assert(NumArgs <= 16);
5216 for (unsigned i : DataIndices) {
5217 assert(i < NumArgs);
5218 assert(i != WriteThruIndex);
5219 assert(i != MaskIndex);
5220
5221 isData[i] = true;
5222
5223 Value *A = I.getOperand(i_nocapture: i);
5224 assert(isFixedFPVector(A));
5225 [[maybe_unused]] unsigned ANumElements =
5226 cast<FixedVectorType>(Val: A->getType())->getNumElements();
5227 assert(ANumElements == OutputNumElements);
5228 }
5229
5230 Value *Mask = I.getOperand(i_nocapture: MaskIndex);
5231
5232 assert(isFixedFPVector(WriteThru));
5233
5234 for (unsigned i = 0; i < NumArgs; ++i) {
5235 if (!isData[i] && i != WriteThruIndex) {
5236 // Imm, Mask, Rounding etc. are "control" data, hence we require that
5237 // they be fully initialized.
5238 assert(I.getOperand(i)->getType()->isIntegerTy());
5239 insertCheckShadowOf(Val: I.getOperand(i_nocapture: i), OrigIns: &I);
5240 }
5241 }
5242
5243 // The mask has 1 bit per element of A, but a minimum of 8 bits.
5244 if (Mask->getType()->getScalarSizeInBits() == 8 && OutputNumElements < 8)
5245 Mask = IRB.CreateTrunc(V: Mask, DestTy: Type::getIntNTy(C&: *MS.C, N: OutputNumElements));
5246 assert(Mask->getType()->getScalarSizeInBits() == OutputNumElements);
5247
5248 assert(I.getType() == WriteThru->getType());
5249
5250 Mask = IRB.CreateBitCast(
5251 V: Mask, DestTy: FixedVectorType::get(ElementType: IRB.getInt1Ty(), NumElts: OutputNumElements));
5252
5253 Value *DataShadow = nullptr;
5254 for (unsigned i : DataIndices) {
5255 Value *A = I.getOperand(i_nocapture: i);
5256 if (DataShadow)
5257 DataShadow = IRB.CreateOr(LHS: DataShadow, RHS: getShadow(V: A));
5258 else
5259 DataShadow = getShadow(V: A);
5260 }
5261
5262 // All-or-nothing shadow
5263 DataShadow =
5264 IRB.CreateSExt(V: IRB.CreateICmpNE(LHS: DataShadow, RHS: getCleanShadow(V: DataShadow)),
5265 DestTy: DataShadow->getType());
5266
5267 Value *WriteThruShadow = getShadow(V: WriteThru);
5268
5269 Value *Shadow = IRB.CreateSelect(C: Mask, True: DataShadow, False: WriteThruShadow);
5270 setShadow(V: &I, SV: Shadow);
5271
5272 setOriginForNaryOp(I);
5273 }
5274
5275 // For sh.* compiler intrinsics:
5276 // llvm.x86.avx512fp16.mask.{add/sub/mul/div/max/min}.sh.round
5277 // (<8 x half>, <8 x half>, <8 x half>, i8, i32)
5278 // A B WriteThru Mask RoundingMode
5279 //
5280 // DstShadow[0] = Mask[0] ? (AShadow[0] | BShadow[0]) : WriteThruShadow[0]
5281 // DstShadow[1..7] = AShadow[1..7]
5282 void visitGenericScalarHalfwordInst(IntrinsicInst &I) {
5283 IRBuilder<> IRB(&I);
5284
5285 assert(I.arg_size() == 5);
5286 Value *A = I.getOperand(i_nocapture: 0);
5287 Value *B = I.getOperand(i_nocapture: 1);
5288 Value *WriteThrough = I.getOperand(i_nocapture: 2);
5289 Value *Mask = I.getOperand(i_nocapture: 3);
5290 Value *RoundingMode = I.getOperand(i_nocapture: 4);
5291
5292 // Technically, we could probably just check whether the LSB is
5293 // initialized, but intuitively it feels like a partly uninitialized mask
5294 // is unintended, and we should warn the user immediately.
5295 insertCheckShadowOf(Val: Mask, OrigIns: &I);
5296 insertCheckShadowOf(Val: RoundingMode, OrigIns: &I);
5297
5298 assert(isa<FixedVectorType>(A->getType()));
5299 unsigned NumElements =
5300 cast<FixedVectorType>(Val: A->getType())->getNumElements();
5301 assert(NumElements == 8);
5302 assert(A->getType() == B->getType());
5303 assert(B->getType() == WriteThrough->getType());
5304 assert(Mask->getType()->getPrimitiveSizeInBits() == NumElements);
5305 assert(RoundingMode->getType()->isIntegerTy());
5306
5307 Value *ALowerShadow = extractLowerShadow(IRB, V: A);
5308 Value *BLowerShadow = extractLowerShadow(IRB, V: B);
5309
5310 Value *ABLowerShadow = IRB.CreateOr(LHS: ALowerShadow, RHS: BLowerShadow);
5311
5312 Value *WriteThroughLowerShadow = extractLowerShadow(IRB, V: WriteThrough);
5313
5314 Mask = IRB.CreateBitCast(
5315 V: Mask, DestTy: FixedVectorType::get(ElementType: IRB.getInt1Ty(), NumElts: NumElements));
5316 Value *MaskLower =
5317 IRB.CreateExtractElement(Vec: Mask, Idx: ConstantInt::get(Ty: IRB.getInt32Ty(), V: 0));
5318
5319 Value *AShadow = getShadow(V: A);
5320 Value *DstLowerShadow =
5321 IRB.CreateSelect(C: MaskLower, True: ABLowerShadow, False: WriteThroughLowerShadow);
5322 Value *DstShadow = IRB.CreateInsertElement(
5323 Vec: AShadow, NewElt: DstLowerShadow, Idx: ConstantInt::get(Ty: IRB.getInt32Ty(), V: 0),
5324 Name: "_msprop");
5325
5326 setShadow(V: &I, SV: DstShadow);
5327 setOriginForNaryOp(I);
5328 }
5329
5330 // Approximately handle AVX Galois Field Affine Transformation
5331 //
5332 // e.g.,
5333 // <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8>, <16 x i8>, i8)
5334 // <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8>, <32 x i8>, i8)
5335 // <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8>, <64 x i8>, i8)
5336 // Out A x b
5337 // where A and x are packed matrices, b is a vector,
5338 // Out = A * x + b in GF(2)
5339 //
5340 // Multiplication in GF(2) is equivalent to bitwise AND. However, the matrix
5341 // computation also includes a parity calculation.
5342 //
5343 // For the bitwise AND of bits V1 and V2, the exact shadow is:
5344 // Out_Shadow = (V1_Shadow & V2_Shadow)
5345 // | (V1 & V2_Shadow)
5346 // | (V1_Shadow & V2 )
5347 //
5348 // We approximate the shadow of gf2p8affineqb using:
5349 // Out_Shadow = gf2p8affineqb(x_Shadow, A_shadow, 0)
5350 // | gf2p8affineqb(x, A_shadow, 0)
5351 // | gf2p8affineqb(x_Shadow, A, 0)
5352 // | set1_epi8(b_Shadow)
5353 //
5354 // This approximation has false negatives: if an intermediate dot-product
5355 // contains an even number of 1's, the parity is 0.
5356 // It has no false positives.
5357 void handleAVXGF2P8Affine(IntrinsicInst &I) {
5358 IRBuilder<> IRB(&I);
5359
5360 assert(I.arg_size() == 3);
5361 Value *A = I.getOperand(i_nocapture: 0);
5362 Value *X = I.getOperand(i_nocapture: 1);
5363 Value *B = I.getOperand(i_nocapture: 2);
5364
5365 assert(isFixedIntVector(A));
5366 assert(cast<VectorType>(A->getType())
5367 ->getElementType()
5368 ->getScalarSizeInBits() == 8);
5369
5370 assert(A->getType() == X->getType());
5371
5372 assert(B->getType()->isIntegerTy());
5373 assert(B->getType()->getScalarSizeInBits() == 8);
5374
5375 assert(I.getType() == A->getType());
5376
5377 Value *AShadow = getShadow(V: A);
5378 Value *XShadow = getShadow(V: X);
5379 Value *BZeroShadow = getCleanShadow(V: B);
5380
5381 CallInst *AShadowXShadow = IRB.CreateIntrinsic(
5382 RetTy: I.getType(), ID: I.getIntrinsicID(), Args: {XShadow, AShadow, BZeroShadow});
5383 CallInst *AShadowX = IRB.CreateIntrinsic(RetTy: I.getType(), ID: I.getIntrinsicID(),
5384 Args: {X, AShadow, BZeroShadow});
5385 CallInst *XShadowA = IRB.CreateIntrinsic(RetTy: I.getType(), ID: I.getIntrinsicID(),
5386 Args: {XShadow, A, BZeroShadow});
5387
5388 unsigned NumElements = cast<FixedVectorType>(Val: I.getType())->getNumElements();
5389 Value *BShadow = getShadow(V: B);
5390 Value *BBroadcastShadow = getCleanShadow(V: AShadow);
5391 // There is no LLVM IR intrinsic for _mm512_set1_epi8.
5392 // This loop generates a lot of LLVM IR, which we expect that CodeGen will
5393 // lower appropriately (e.g., VPBROADCASTB).
5394 // Besides, b is often a constant, in which case it is fully initialized.
5395 for (unsigned i = 0; i < NumElements; i++)
5396 BBroadcastShadow = IRB.CreateInsertElement(Vec: BBroadcastShadow, NewElt: BShadow, Idx: i);
5397
5398 setShadow(V: &I, SV: IRB.CreateOr(
5399 Ops: {AShadowXShadow, AShadowX, XShadowA, BBroadcastShadow}));
5400 setOriginForNaryOp(I);
5401 }
5402
5403 // Handle Arm NEON vector load intrinsics (vld*).
5404 //
5405 // The WithLane instructions (ld[234]lane) are similar to:
5406 // call {<4 x i32>, <4 x i32>, <4 x i32>}
5407 // @llvm.aarch64.neon.ld3lane.v4i32.p0
5408 // (<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 %lane, ptr
5409 // %A)
5410 //
5411 // The non-WithLane instructions (ld[234], ld1x[234], ld[234]r) are similar
5412 // to:
5413 // call {<8 x i8>, <8 x i8>} @llvm.aarch64.neon.ld2.v8i8.p0(ptr %A)
5414 void handleNEONVectorLoad(IntrinsicInst &I, bool WithLane) {
5415 unsigned int numArgs = I.arg_size();
5416
5417 // Return type is a struct of vectors of integers or floating-point
5418 assert(I.getType()->isStructTy());
5419 [[maybe_unused]] StructType *RetTy = cast<StructType>(Val: I.getType());
5420 assert(RetTy->getNumElements() > 0);
5421 assert(RetTy->getElementType(0)->isIntOrIntVectorTy() ||
5422 RetTy->getElementType(0)->isFPOrFPVectorTy());
5423 for (unsigned int i = 0; i < RetTy->getNumElements(); i++)
5424 assert(RetTy->getElementType(i) == RetTy->getElementType(0));
5425
5426 if (WithLane) {
5427 // 2, 3 or 4 vectors, plus lane number, plus input pointer
5428 assert(4 <= numArgs && numArgs <= 6);
5429
5430 // Return type is a struct of the input vectors
5431 assert(RetTy->getNumElements() + 2 == numArgs);
5432 for (unsigned int i = 0; i < RetTy->getNumElements(); i++)
5433 assert(I.getArgOperand(i)->getType() == RetTy->getElementType(0));
5434 } else {
5435 assert(numArgs == 1);
5436 }
5437
5438 IRBuilder<> IRB(&I);
5439
5440 SmallVector<Value *, 6> ShadowArgs;
5441 if (WithLane) {
5442 for (unsigned int i = 0; i < numArgs - 2; i++)
5443 ShadowArgs.push_back(Elt: getShadow(V: I.getArgOperand(i)));
5444
5445 // Lane number, passed verbatim
5446 Value *LaneNumber = I.getArgOperand(i: numArgs - 2);
5447 ShadowArgs.push_back(Elt: LaneNumber);
5448
5449 // TODO: blend shadow of lane number into output shadow?
5450 insertCheckShadowOf(Val: LaneNumber, OrigIns: &I);
5451 }
5452
5453 Value *Src = I.getArgOperand(i: numArgs - 1);
5454 assert(Src->getType()->isPointerTy() && "Source is not a pointer!");
5455
5456 Type *SrcShadowTy = getShadowTy(V: Src);
5457 auto [SrcShadowPtr, SrcOriginPtr] =
5458 getShadowOriginPtr(Addr: Src, IRB, ShadowTy: SrcShadowTy, Alignment: Align(1), /*isStore*/ false);
5459 ShadowArgs.push_back(Elt: SrcShadowPtr);
5460
5461 // The NEON vector load instructions handled by this function all have
5462 // integer variants. It is easier to use those rather than trying to cast
5463 // a struct of vectors of floats into a struct of vectors of integers.
5464 CallInst *CI =
5465 IRB.CreateIntrinsic(RetTy: getShadowTy(V: &I), ID: I.getIntrinsicID(), Args: ShadowArgs);
5466 setShadow(V: &I, SV: CI);
5467
5468 if (!MS.TrackOrigins)
5469 return;
5470
5471 Value *PtrSrcOrigin = IRB.CreateLoad(Ty: MS.OriginTy, Ptr: SrcOriginPtr);
5472 setOrigin(V: &I, Origin: PtrSrcOrigin);
5473 }
5474
5475 /// Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
5476 /// and vst{2,3,4}lane).
5477 ///
5478 /// Arm NEON vector store intrinsics have the output address (pointer) as the
5479 /// last argument, with the initial arguments being the inputs (and lane
5480 /// number for vst{2,3,4}lane). They return void.
5481 ///
5482 /// - st4 interleaves the output e.g., st4 (inA, inB, inC, inD, outP) writes
5483 /// abcdabcdabcdabcd... into *outP
5484 /// - st1_x4 is non-interleaved e.g., st1_x4 (inA, inB, inC, inD, outP)
5485 /// writes aaaa...bbbb...cccc...dddd... into *outP
5486 /// - st4lane has arguments of (inA, inB, inC, inD, lane, outP)
5487 /// These instructions can all be instrumented with essentially the same
5488 /// MSan logic, simply by applying the corresponding intrinsic to the shadow.
5489 void handleNEONVectorStoreIntrinsic(IntrinsicInst &I, bool useLane) {
5490 IRBuilder<> IRB(&I);
5491
5492 // Don't use getNumOperands() because it includes the callee
5493 int numArgOperands = I.arg_size();
5494
5495 // The last arg operand is the output (pointer)
5496 assert(numArgOperands >= 1);
5497 Value *Addr = I.getArgOperand(i: numArgOperands - 1);
5498 assert(Addr->getType()->isPointerTy());
5499 int skipTrailingOperands = 1;
5500
5501 if (ClCheckAccessAddress)
5502 insertCheckShadowOf(Val: Addr, OrigIns: &I);
5503
5504 // Second-last operand is the lane number (for vst{2,3,4}lane)
5505 if (useLane) {
5506 skipTrailingOperands++;
5507 assert(numArgOperands >= static_cast<int>(skipTrailingOperands));
5508 assert(isa<IntegerType>(
5509 I.getArgOperand(numArgOperands - skipTrailingOperands)->getType()));
5510 }
5511
5512 SmallVector<Value *, 8> ShadowArgs;
5513 // All the initial operands are the inputs
5514 for (int i = 0; i < numArgOperands - skipTrailingOperands; i++) {
5515 assert(isa<FixedVectorType>(I.getArgOperand(i)->getType()));
5516 Value *Shadow = getShadow(I: &I, i);
5517 ShadowArgs.append(NumInputs: 1, Elt: Shadow);
5518 }
5519
5520 // MSan's GetShadowTy assumes the LHS is the type we want the shadow for
5521 // e.g., for:
5522 // [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5523 // we know the type of the output (and its shadow) is <16 x i8>.
5524 //
5525 // Arm NEON VST is unusual because the last argument is the output address:
5526 // define void @st2_16b(<16 x i8> %A, <16 x i8> %B, ptr %P) {
5527 // call void @llvm.aarch64.neon.st2.v16i8.p0
5528 // (<16 x i8> [[A]], <16 x i8> [[B]], ptr [[P]])
5529 // and we have no type information about P's operand. We must manually
5530 // compute the type (<16 x i8> x 2).
5531 FixedVectorType *OutputVectorTy = FixedVectorType::get(
5532 ElementType: cast<FixedVectorType>(Val: I.getArgOperand(i: 0)->getType())->getElementType(),
5533 NumElts: cast<FixedVectorType>(Val: I.getArgOperand(i: 0)->getType())->getNumElements() *
5534 (numArgOperands - skipTrailingOperands));
5535 Type *OutputShadowTy = getShadowTy(OrigTy: OutputVectorTy);
5536
5537 if (useLane)
5538 ShadowArgs.append(NumInputs: 1,
5539 Elt: I.getArgOperand(i: numArgOperands - skipTrailingOperands));
5540
5541 Value *OutputShadowPtr, *OutputOriginPtr;
5542 // AArch64 NEON does not need alignment (unless OS requires it)
5543 std::tie(args&: OutputShadowPtr, args&: OutputOriginPtr) = getShadowOriginPtr(
5544 Addr, IRB, ShadowTy: OutputShadowTy, Alignment: Align(1), /*isStore*/ true);
5545 ShadowArgs.append(NumInputs: 1, Elt: OutputShadowPtr);
5546
5547 CallInst *CI =
5548 IRB.CreateIntrinsic(RetTy: IRB.getVoidTy(), ID: I.getIntrinsicID(), Args: ShadowArgs);
5549 setShadow(V: &I, SV: CI);
5550
5551 if (MS.TrackOrigins) {
5552 // TODO: if we modelled the vst* instruction more precisely, we could
5553 // more accurately track the origins (e.g., if both inputs are
5554 // uninitialized for vst2, we currently blame the second input, even
5555 // though part of the output depends only on the first input).
5556 //
5557 // This is particularly imprecise for vst{2,3,4}lane, since only one
5558 // lane of each input is actually copied to the output.
5559 OriginCombiner OC(this, IRB);
5560 for (int i = 0; i < numArgOperands - skipTrailingOperands; i++)
5561 OC.Add(V: I.getArgOperand(i));
5562
5563 const DataLayout &DL = F.getDataLayout();
5564 OC.DoneAndStoreOrigin(TS: DL.getTypeStoreSize(Ty: OutputVectorTy),
5565 OriginPtr: OutputOriginPtr);
5566 }
5567 }
5568
5569 // Integer matrix multiplication:
5570 // - <4 x i32> @llvm.aarch64.neon.{s,u,us}mmla.v4i32.v16i8
5571 // (<4 x i32> %R, <16 x i8> %A, <16 x i8> %B)
5572 // - <4 x i32> is a 2x2 matrix
5573 // - <16 x i8> %A and %B are 2x8 and 8x2 matrices respectively
5574 //
5575 // Floating-point matrix multiplication:
5576 // - <4 x float> @llvm.aarch64.neon.bfmmla
5577 // (<4 x float> %R, <8 x bfloat> %A, <8 x bfloat> %B)
5578 // - <4 x float> is a 2x2 matrix
5579 // - <8 x bfloat> %A and %B are 2x4 and 4x2 matrices respectively
5580 //
5581 // The general shadow propagation approach is:
5582 // 1) get the shadows of the input matrices %A and %B
5583 // 2) map each shadow value to 0x1 if the corresponding value is fully
5584 // initialized, and 0x0 otherwise
5585 // 3) perform a matrix multiplication on the shadows of %A and %B [*].
5586 // The output will be a 2x2 matrix. For each element, a value of 0x8
5587 // (for {s,u,us}mmla) or 0x4 (for bfmmla) means all the corresponding
5588 // inputs were clean; if so, set the shadow to zero, otherwise set to -1.
5589 // 4) blend in the shadow of %R
5590 //
5591 // [*] Since shadows are integral, the obvious approach is to always apply
5592 // ummla to the shadows. Unfortunately, Armv8.2+bf16 supports bfmmla,
5593 // but not ummla. Thus, for bfmmla, our instrumentation reuses bfmmla.
5594 //
5595 // TODO: consider allowing multiplication of zero with an uninitialized value
5596 // to result in an initialized value.
5597 void handleNEONMatrixMultiply(IntrinsicInst &I) {
5598 IRBuilder<> IRB(&I);
5599
5600 assert(I.arg_size() == 3);
5601 Value *R = I.getArgOperand(i: 0);
5602 Value *A = I.getArgOperand(i: 1);
5603 Value *B = I.getArgOperand(i: 2);
5604
5605 assert(I.getType() == R->getType());
5606
5607 assert(isa<FixedVectorType>(R->getType()));
5608 assert(isa<FixedVectorType>(A->getType()));
5609 assert(isa<FixedVectorType>(B->getType()));
5610
5611 FixedVectorType *RTy = cast<FixedVectorType>(Val: R->getType());
5612 FixedVectorType *ATy = cast<FixedVectorType>(Val: A->getType());
5613 FixedVectorType *BTy = cast<FixedVectorType>(Val: B->getType());
5614 assert(ATy->getElementType() == BTy->getElementType());
5615
5616 if (RTy->getElementType()->isIntegerTy()) {
5617 // <4 x i32> @llvm.aarch64.neon.ummla.v4i32.v16i8
5618 // (<4 x i32> %R, <16 x i8> %X, <16 x i8> %Y)
5619 assert(RTy == FixedVectorType::get(IntegerType::get(*MS.C, 32), 4));
5620 assert(ATy == FixedVectorType::get(IntegerType::get(*MS.C, 8), 16));
5621 assert(BTy == FixedVectorType::get(IntegerType::get(*MS.C, 8), 16));
5622 } else {
5623 // <4 x float> @llvm.aarch64.neon.bfmmla
5624 // (<4 x float> %R, <8 x bfloat> %X, <8 x bfloat> %Y)
5625 assert(RTy == FixedVectorType::get(Type::getFloatTy(*MS.C), 4));
5626 assert(ATy == FixedVectorType::get(Type::getBFloatTy(*MS.C), 8));
5627 assert(BTy == FixedVectorType::get(Type::getBFloatTy(*MS.C), 8));
5628 }
5629
5630 Value *ShadowR = getShadow(I: &I, i: 0);
5631 Value *ShadowA = getShadow(I: &I, i: 1);
5632 Value *ShadowB = getShadow(I: &I, i: 2);
5633
5634 Value *ShadowAB;
5635 Value *FullyInit;
5636
5637 if (RTy->getElementType()->isIntegerTy()) {
5638 // If the value is fully initialized, the shadow will be 000...001.
5639 // Otherwise, the shadow will be all zero.
5640 // (This is the opposite of how we typically handle shadows.)
5641 ShadowA = IRB.CreateZExt(V: IRB.CreateICmpEQ(LHS: ShadowA, RHS: getCleanShadow(OrigTy: ATy)),
5642 DestTy: getShadowTy(OrigTy: ATy));
5643 ShadowB = IRB.CreateZExt(V: IRB.CreateICmpEQ(LHS: ShadowB, RHS: getCleanShadow(OrigTy: BTy)),
5644 DestTy: getShadowTy(OrigTy: BTy));
5645 // TODO: the CreateSelect approach used below for floating-point is more
5646 // generic than CreateZExt. Investigate whether it is worthwhile
5647 // unifying the two approaches.
5648
5649 ShadowAB = IRB.CreateIntrinsic(RetTy: RTy, ID: Intrinsic::aarch64_neon_ummla,
5650 Args: {getCleanShadow(OrigTy: RTy), ShadowA, ShadowB});
5651
5652 // ummla multiplies a 2x8 matrix with an 8x2 matrix. If all entries of the
5653 // input matrices are equal to 0x1, all entries of the output matrix will
5654 // be 0x8.
5655 FullyInit = ConstantVector::getSplat(
5656 EC: RTy->getElementCount(), Elt: ConstantInt::get(Ty: RTy->getElementType(), V: 0x8));
5657
5658 ShadowAB = IRB.CreateICmpNE(LHS: ShadowAB, RHS: FullyInit);
5659 } else {
5660 Constant *ABZeros = ConstantVector::getSplat(
5661 EC: ATy->getElementCount(), Elt: ConstantFP::get(Ty: ATy->getElementType(), V: 0));
5662 Constant *ABOnes = ConstantVector::getSplat(
5663 EC: ATy->getElementCount(), Elt: ConstantFP::get(Ty: ATy->getElementType(), V: 1));
5664
5665 // As per the integer case, if the shadow is clean, we store 0x1,
5666 // otherwise we store 0x0 (the opposite of usual shadow arithmetic).
5667 ShadowA = IRB.CreateSelect(C: IRB.CreateICmpEQ(LHS: ShadowA, RHS: getCleanShadow(OrigTy: ATy)),
5668 True: ABOnes, False: ABZeros);
5669 ShadowB = IRB.CreateSelect(C: IRB.CreateICmpEQ(LHS: ShadowB, RHS: getCleanShadow(OrigTy: BTy)),
5670 True: ABOnes, False: ABZeros);
5671
5672 Constant *RZeros = ConstantVector::getSplat(
5673 EC: RTy->getElementCount(), Elt: ConstantFP::get(Ty: RTy->getElementType(), V: 0));
5674
5675 ShadowAB = IRB.CreateIntrinsic(RetTy: RTy, ID: Intrinsic::aarch64_neon_bfmmla,
5676 Args: {RZeros, ShadowA, ShadowB});
5677
5678 // bfmmla multiplies a 2x4 matrix with an 4x2 matrix. If all entries of
5679 // the input matrices are equal to 0x1, all entries of the output matrix
5680 // will be 4.0. (To avoid floating-point error, we check if each entry
5681 // < 3.5.)
5682 FullyInit = ConstantVector::getSplat(
5683 EC: RTy->getElementCount(), Elt: ConstantFP::get(Ty: RTy->getElementType(), V: 3.5));
5684
5685 // FCmpULT: "yields true if either operand is a QNAN or op1 is less than"
5686 // op2"
5687 ShadowAB = IRB.CreateFCmpULT(LHS: ShadowAB, RHS: FullyInit);
5688 }
5689
5690 ShadowR = IRB.CreateICmpNE(LHS: ShadowR, RHS: getCleanShadow(OrigTy: RTy));
5691 ShadowR = IRB.CreateOr(LHS: ShadowAB, RHS: ShadowR);
5692
5693 setShadow(V: &I, SV: IRB.CreateSExt(V: ShadowR, DestTy: getShadowTy(OrigTy: RTy)));
5694
5695 setOriginForNaryOp(I);
5696 }
5697
5698 /// Handle intrinsics by applying the intrinsic to the shadows.
5699 ///
5700 /// The trailing arguments are passed verbatim to the intrinsic, though any
5701 /// uninitialized trailing arguments can also taint the shadow e.g., for an
5702 /// intrinsic with one trailing verbatim argument:
5703 /// out = intrinsic(var1, var2, opType)
5704 /// we compute:
5705 /// shadow[out] =
5706 /// intrinsic(shadow[var1], shadow[var2], opType) | shadow[opType]
5707 ///
5708 /// Typically, shadowIntrinsicID will be specified by the caller to be
5709 /// I.getIntrinsicID(), but the caller can choose to replace it with another
5710 /// intrinsic of the same type.
5711 ///
5712 /// CAUTION: this assumes that the intrinsic will handle arbitrary
5713 /// bit-patterns (for example, if the intrinsic accepts floats for
5714 /// var1, we require that it doesn't care if inputs are NaNs).
5715 ///
5716 /// For example, this can be applied to the Arm NEON vector table intrinsics
5717 /// (tbl{1,2,3,4}).
5718 ///
5719 /// The origin is approximated using setOriginForNaryOp.
5720 void handleIntrinsicByApplyingToShadow(IntrinsicInst &I,
5721 Intrinsic::ID shadowIntrinsicID,
5722 unsigned int trailingVerbatimArgs) {
5723 IRBuilder<> IRB(&I);
5724
5725 assert(trailingVerbatimArgs < I.arg_size());
5726
5727 SmallVector<Value *, 8> ShadowArgs;
5728 // Don't use getNumOperands() because it includes the callee
5729 for (unsigned int i = 0; i < I.arg_size() - trailingVerbatimArgs; i++) {
5730 Value *Shadow = getShadow(I: &I, i);
5731
5732 // Shadows are integer-ish types but some intrinsics require a
5733 // different (e.g., floating-point) type.
5734 ShadowArgs.push_back(
5735 Elt: IRB.CreateBitCast(V: Shadow, DestTy: I.getArgOperand(i)->getType()));
5736 }
5737
5738 for (unsigned int i = I.arg_size() - trailingVerbatimArgs; i < I.arg_size();
5739 i++) {
5740 Value *Arg = I.getArgOperand(i);
5741 ShadowArgs.push_back(Elt: Arg);
5742 }
5743
5744 CallInst *CI =
5745 IRB.CreateIntrinsic(RetTy: I.getType(), ID: shadowIntrinsicID, Args: ShadowArgs);
5746 Value *CombinedShadow = CI;
5747
5748 // Combine the computed shadow with the shadow of trailing args
5749 for (unsigned int i = I.arg_size() - trailingVerbatimArgs; i < I.arg_size();
5750 i++) {
5751 Value *Shadow =
5752 CreateShadowCast(IRB, V: getShadow(I: &I, i), dstTy: CombinedShadow->getType());
5753 CombinedShadow = IRB.CreateOr(LHS: Shadow, RHS: CombinedShadow, Name: "_msprop");
5754 }
5755
5756 setShadow(V: &I, SV: IRB.CreateBitCast(V: CombinedShadow, DestTy: getShadowTy(V: &I)));
5757
5758 setOriginForNaryOp(I);
5759 }
5760
5761 // Approximation only
5762 //
5763 // e.g., <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64)
5764 void handleNEONVectorMultiplyIntrinsic(IntrinsicInst &I) {
5765 assert(I.arg_size() == 2);
5766
5767 handleShadowOr(I);
5768 }
5769
5770 bool maybeHandleCrossPlatformIntrinsic(IntrinsicInst &I) {
5771 switch (I.getIntrinsicID()) {
5772 case Intrinsic::uadd_with_overflow:
5773 case Intrinsic::sadd_with_overflow:
5774 case Intrinsic::usub_with_overflow:
5775 case Intrinsic::ssub_with_overflow:
5776 case Intrinsic::umul_with_overflow:
5777 case Intrinsic::smul_with_overflow:
5778 handleArithmeticWithOverflow(I);
5779 break;
5780 case Intrinsic::abs:
5781 handleAbsIntrinsic(I);
5782 break;
5783 case Intrinsic::bitreverse:
5784 handleIntrinsicByApplyingToShadow(I, shadowIntrinsicID: I.getIntrinsicID(),
5785 /*trailingVerbatimArgs*/ 0);
5786 break;
5787 case Intrinsic::is_fpclass:
5788 handleIsFpClass(I);
5789 break;
5790 case Intrinsic::lifetime_start:
5791 handleLifetimeStart(I);
5792 break;
5793 case Intrinsic::launder_invariant_group:
5794 case Intrinsic::strip_invariant_group:
5795 handleInvariantGroup(I);
5796 break;
5797 case Intrinsic::bswap:
5798 handleBswap(I);
5799 break;
5800 case Intrinsic::ctlz:
5801 case Intrinsic::cttz:
5802 handleCountLeadingTrailingZeros(I);
5803 break;
5804 case Intrinsic::masked_compressstore:
5805 handleMaskedCompressStore(I);
5806 break;
5807 case Intrinsic::masked_expandload:
5808 handleMaskedExpandLoad(I);
5809 break;
5810 case Intrinsic::masked_gather:
5811 handleMaskedGather(I);
5812 break;
5813 case Intrinsic::masked_scatter:
5814 handleMaskedScatter(I);
5815 break;
5816 case Intrinsic::masked_store:
5817 handleMaskedStore(I);
5818 break;
5819 case Intrinsic::masked_load:
5820 handleMaskedLoad(I);
5821 break;
5822 case Intrinsic::vector_reduce_and:
5823 handleVectorReduceAndIntrinsic(I);
5824 break;
5825 case Intrinsic::vector_reduce_or:
5826 handleVectorReduceOrIntrinsic(I);
5827 break;
5828
5829 case Intrinsic::vector_reduce_add:
5830 case Intrinsic::vector_reduce_xor:
5831 case Intrinsic::vector_reduce_mul:
5832 // Signed/Unsigned Min/Max
5833 // TODO: handling similarly to AND/OR may be more precise.
5834 case Intrinsic::vector_reduce_smax:
5835 case Intrinsic::vector_reduce_smin:
5836 case Intrinsic::vector_reduce_umax:
5837 case Intrinsic::vector_reduce_umin:
5838 // TODO: this has no false positives, but arguably we should check that all
5839 // the bits are initialized.
5840 case Intrinsic::vector_reduce_fmax:
5841 case Intrinsic::vector_reduce_fmin:
5842 handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/false);
5843 break;
5844
5845 case Intrinsic::vector_reduce_fadd:
5846 case Intrinsic::vector_reduce_fmul:
5847 handleVectorReduceWithStarterIntrinsic(I);
5848 break;
5849
5850 case Intrinsic::scmp:
5851 case Intrinsic::ucmp: {
5852 handleShadowOr(I);
5853 break;
5854 }
5855
5856 case Intrinsic::fshl:
5857 case Intrinsic::fshr:
5858 handleFunnelShift(I);
5859 break;
5860
5861 case Intrinsic::is_constant:
5862 // The result of llvm.is.constant() is always defined.
5863 setShadow(V: &I, SV: getCleanShadow(V: &I));
5864 setOrigin(V: &I, Origin: getCleanOrigin());
5865 break;
5866
5867 default:
5868 return false;
5869 }
5870
5871 return true;
5872 }
5873
5874 bool maybeHandleX86SIMDIntrinsic(IntrinsicInst &I) {
5875 switch (I.getIntrinsicID()) {
5876 case Intrinsic::x86_sse_stmxcsr:
5877 handleStmxcsr(I);
5878 break;
5879 case Intrinsic::x86_sse_ldmxcsr:
5880 handleLdmxcsr(I);
5881 break;
5882
5883 // Convert Scalar Double Precision Floating-Point Value
5884 // to Unsigned Doubleword Integer
5885 // etc.
5886 case Intrinsic::x86_avx512_vcvtsd2usi64:
5887 case Intrinsic::x86_avx512_vcvtsd2usi32:
5888 case Intrinsic::x86_avx512_vcvtss2usi64:
5889 case Intrinsic::x86_avx512_vcvtss2usi32:
5890 case Intrinsic::x86_avx512_cvttss2usi64:
5891 case Intrinsic::x86_avx512_cvttss2usi:
5892 case Intrinsic::x86_avx512_cvttsd2usi64:
5893 case Intrinsic::x86_avx512_cvttsd2usi:
5894 case Intrinsic::x86_avx512_cvtusi2ss:
5895 case Intrinsic::x86_avx512_cvtusi642sd:
5896 case Intrinsic::x86_avx512_cvtusi642ss:
5897 handleSSEVectorConvertIntrinsic(I, NumUsedElements: 1, HasRoundingMode: true);
5898 break;
5899 case Intrinsic::x86_sse2_cvtsd2si64:
5900 case Intrinsic::x86_sse2_cvtsd2si:
5901 case Intrinsic::x86_sse2_cvtsd2ss:
5902 case Intrinsic::x86_sse2_cvttsd2si64:
5903 case Intrinsic::x86_sse2_cvttsd2si:
5904 case Intrinsic::x86_sse_cvtss2si64:
5905 case Intrinsic::x86_sse_cvtss2si:
5906 case Intrinsic::x86_sse_cvttss2si64:
5907 case Intrinsic::x86_sse_cvttss2si:
5908 handleSSEVectorConvertIntrinsic(I, NumUsedElements: 1);
5909 break;
5910 case Intrinsic::x86_sse_cvtps2pi:
5911 case Intrinsic::x86_sse_cvttps2pi:
5912 handleSSEVectorConvertIntrinsic(I, NumUsedElements: 2);
5913 break;
5914
5915 // TODO:
5916 // <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double>)
5917 // <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64>)
5918 // <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>)
5919
5920 case Intrinsic::x86_vcvtps2ph_128:
5921 case Intrinsic::x86_vcvtps2ph_256: {
5922 handleSSEVectorConvertIntrinsicByProp(I, /*HasRoundingMode=*/true);
5923 break;
5924 }
5925
5926 // Convert Packed Single Precision Floating-Point Values
5927 // to Packed Signed Doubleword Integer Values
5928 //
5929 // <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
5930 // (<16 x float>, <16 x i32>, i16, i32)
5931 case Intrinsic::x86_avx512_mask_cvtps2dq_512:
5932 handleAVX512VectorConvertFPToInt(I, /*LastMask=*/false);
5933 break;
5934
5935 // Convert Packed Double Precision Floating-Point Values
5936 // to Packed Single Precision Floating-Point Values
5937 case Intrinsic::x86_sse2_cvtpd2ps:
5938 case Intrinsic::x86_sse2_cvtps2dq:
5939 case Intrinsic::x86_sse2_cvtpd2dq:
5940 case Intrinsic::x86_sse2_cvttps2dq:
5941 case Intrinsic::x86_sse2_cvttpd2dq:
5942 case Intrinsic::x86_avx_cvt_pd2_ps_256:
5943 case Intrinsic::x86_avx_cvt_ps2dq_256:
5944 case Intrinsic::x86_avx_cvt_pd2dq_256:
5945 case Intrinsic::x86_avx_cvtt_ps2dq_256:
5946 case Intrinsic::x86_avx_cvtt_pd2dq_256: {
5947 handleSSEVectorConvertIntrinsicByProp(I, /*HasRoundingMode=*/false);
5948 break;
5949 }
5950
5951 // Convert Single-Precision FP Value to 16-bit FP Value
5952 // <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512
5953 // (<16 x float>, i32, <16 x i16>, i16)
5954 // <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128
5955 // (<4 x float>, i32, <8 x i16>, i8)
5956 // <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256
5957 // (<8 x float>, i32, <8 x i16>, i8)
5958 case Intrinsic::x86_avx512_mask_vcvtps2ph_512:
5959 case Intrinsic::x86_avx512_mask_vcvtps2ph_256:
5960 case Intrinsic::x86_avx512_mask_vcvtps2ph_128:
5961 handleAVX512VectorConvertFPToInt(I, /*LastMask=*/true);
5962 break;
5963
5964 // Shift Packed Data (Left Logical, Right Arithmetic, Right Logical)
5965 case Intrinsic::x86_avx512_psll_w_512:
5966 case Intrinsic::x86_avx512_psll_d_512:
5967 case Intrinsic::x86_avx512_psll_q_512:
5968 case Intrinsic::x86_avx512_pslli_w_512:
5969 case Intrinsic::x86_avx512_pslli_d_512:
5970 case Intrinsic::x86_avx512_pslli_q_512:
5971 case Intrinsic::x86_avx512_psrl_w_512:
5972 case Intrinsic::x86_avx512_psrl_d_512:
5973 case Intrinsic::x86_avx512_psrl_q_512:
5974 case Intrinsic::x86_avx512_psra_w_512:
5975 case Intrinsic::x86_avx512_psra_d_512:
5976 case Intrinsic::x86_avx512_psra_q_512:
5977 case Intrinsic::x86_avx512_psrli_w_512:
5978 case Intrinsic::x86_avx512_psrli_d_512:
5979 case Intrinsic::x86_avx512_psrli_q_512:
5980 case Intrinsic::x86_avx512_psrai_w_512:
5981 case Intrinsic::x86_avx512_psrai_d_512:
5982 case Intrinsic::x86_avx512_psrai_q_512:
5983 case Intrinsic::x86_avx512_psra_q_256:
5984 case Intrinsic::x86_avx512_psra_q_128:
5985 case Intrinsic::x86_avx512_psrai_q_256:
5986 case Intrinsic::x86_avx512_psrai_q_128:
5987 case Intrinsic::x86_avx2_psll_w:
5988 case Intrinsic::x86_avx2_psll_d:
5989 case Intrinsic::x86_avx2_psll_q:
5990 case Intrinsic::x86_avx2_pslli_w:
5991 case Intrinsic::x86_avx2_pslli_d:
5992 case Intrinsic::x86_avx2_pslli_q:
5993 case Intrinsic::x86_avx2_psrl_w:
5994 case Intrinsic::x86_avx2_psrl_d:
5995 case Intrinsic::x86_avx2_psrl_q:
5996 case Intrinsic::x86_avx2_psra_w:
5997 case Intrinsic::x86_avx2_psra_d:
5998 case Intrinsic::x86_avx2_psrli_w:
5999 case Intrinsic::x86_avx2_psrli_d:
6000 case Intrinsic::x86_avx2_psrli_q:
6001 case Intrinsic::x86_avx2_psrai_w:
6002 case Intrinsic::x86_avx2_psrai_d:
6003 case Intrinsic::x86_sse2_psll_w:
6004 case Intrinsic::x86_sse2_psll_d:
6005 case Intrinsic::x86_sse2_psll_q:
6006 case Intrinsic::x86_sse2_pslli_w:
6007 case Intrinsic::x86_sse2_pslli_d:
6008 case Intrinsic::x86_sse2_pslli_q:
6009 case Intrinsic::x86_sse2_psrl_w:
6010 case Intrinsic::x86_sse2_psrl_d:
6011 case Intrinsic::x86_sse2_psrl_q:
6012 case Intrinsic::x86_sse2_psra_w:
6013 case Intrinsic::x86_sse2_psra_d:
6014 case Intrinsic::x86_sse2_psrli_w:
6015 case Intrinsic::x86_sse2_psrli_d:
6016 case Intrinsic::x86_sse2_psrli_q:
6017 case Intrinsic::x86_sse2_psrai_w:
6018 case Intrinsic::x86_sse2_psrai_d:
6019 case Intrinsic::x86_mmx_psll_w:
6020 case Intrinsic::x86_mmx_psll_d:
6021 case Intrinsic::x86_mmx_psll_q:
6022 case Intrinsic::x86_mmx_pslli_w:
6023 case Intrinsic::x86_mmx_pslli_d:
6024 case Intrinsic::x86_mmx_pslli_q:
6025 case Intrinsic::x86_mmx_psrl_w:
6026 case Intrinsic::x86_mmx_psrl_d:
6027 case Intrinsic::x86_mmx_psrl_q:
6028 case Intrinsic::x86_mmx_psra_w:
6029 case Intrinsic::x86_mmx_psra_d:
6030 case Intrinsic::x86_mmx_psrli_w:
6031 case Intrinsic::x86_mmx_psrli_d:
6032 case Intrinsic::x86_mmx_psrli_q:
6033 case Intrinsic::x86_mmx_psrai_w:
6034 case Intrinsic::x86_mmx_psrai_d:
6035 handleVectorShiftIntrinsic(I, /* Variable */ false);
6036 break;
6037 case Intrinsic::x86_avx2_psllv_d:
6038 case Intrinsic::x86_avx2_psllv_d_256:
6039 case Intrinsic::x86_avx512_psllv_d_512:
6040 case Intrinsic::x86_avx2_psllv_q:
6041 case Intrinsic::x86_avx2_psllv_q_256:
6042 case Intrinsic::x86_avx512_psllv_q_512:
6043 case Intrinsic::x86_avx2_psrlv_d:
6044 case Intrinsic::x86_avx2_psrlv_d_256:
6045 case Intrinsic::x86_avx512_psrlv_d_512:
6046 case Intrinsic::x86_avx2_psrlv_q:
6047 case Intrinsic::x86_avx2_psrlv_q_256:
6048 case Intrinsic::x86_avx512_psrlv_q_512:
6049 case Intrinsic::x86_avx2_psrav_d:
6050 case Intrinsic::x86_avx2_psrav_d_256:
6051 case Intrinsic::x86_avx512_psrav_d_512:
6052 case Intrinsic::x86_avx512_psrav_q_128:
6053 case Intrinsic::x86_avx512_psrav_q_256:
6054 case Intrinsic::x86_avx512_psrav_q_512:
6055 handleVectorShiftIntrinsic(I, /* Variable */ true);
6056 break;
6057
6058 // Pack with Signed/Unsigned Saturation
6059 case Intrinsic::x86_sse2_packsswb_128:
6060 case Intrinsic::x86_sse2_packssdw_128:
6061 case Intrinsic::x86_sse2_packuswb_128:
6062 case Intrinsic::x86_sse41_packusdw:
6063 case Intrinsic::x86_avx2_packsswb:
6064 case Intrinsic::x86_avx2_packssdw:
6065 case Intrinsic::x86_avx2_packuswb:
6066 case Intrinsic::x86_avx2_packusdw:
6067 // e.g., <64 x i8> @llvm.x86.avx512.packsswb.512
6068 // (<32 x i16> %a, <32 x i16> %b)
6069 // <32 x i16> @llvm.x86.avx512.packssdw.512
6070 // (<16 x i32> %a, <16 x i32> %b)
6071 // Note: AVX512 masked variants are auto-upgraded by LLVM.
6072 case Intrinsic::x86_avx512_packsswb_512:
6073 case Intrinsic::x86_avx512_packssdw_512:
6074 case Intrinsic::x86_avx512_packuswb_512:
6075 case Intrinsic::x86_avx512_packusdw_512:
6076 handleVectorPackIntrinsic(I);
6077 break;
6078
6079 case Intrinsic::x86_sse41_pblendvb:
6080 case Intrinsic::x86_sse41_blendvpd:
6081 case Intrinsic::x86_sse41_blendvps:
6082 case Intrinsic::x86_avx_blendv_pd_256:
6083 case Intrinsic::x86_avx_blendv_ps_256:
6084 case Intrinsic::x86_avx2_pblendvb:
6085 handleBlendvIntrinsic(I);
6086 break;
6087
6088 case Intrinsic::x86_avx_dp_ps_256:
6089 case Intrinsic::x86_sse41_dppd:
6090 case Intrinsic::x86_sse41_dpps:
6091 handleDppIntrinsic(I);
6092 break;
6093
6094 case Intrinsic::x86_mmx_packsswb:
6095 case Intrinsic::x86_mmx_packuswb:
6096 handleVectorPackIntrinsic(I, MMXEltSizeInBits: 16);
6097 break;
6098
6099 case Intrinsic::x86_mmx_packssdw:
6100 handleVectorPackIntrinsic(I, MMXEltSizeInBits: 32);
6101 break;
6102
6103 case Intrinsic::x86_mmx_psad_bw:
6104 handleVectorSadIntrinsic(I, IsMMX: true);
6105 break;
6106 case Intrinsic::x86_sse2_psad_bw:
6107 case Intrinsic::x86_avx2_psad_bw:
6108 handleVectorSadIntrinsic(I);
6109 break;
6110
6111 // Multiply and Add Packed Words
6112 // < 4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>)
6113 // < 8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>)
6114 // <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16>, <32 x i16>)
6115 //
6116 // Multiply and Add Packed Signed and Unsigned Bytes
6117 // < 8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>)
6118 // <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>)
6119 // <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8>, <64 x i8>)
6120 //
6121 // These intrinsics are auto-upgraded into non-masked forms:
6122 // < 4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128
6123 // (<8 x i16>, <8 x i16>, <4 x i32>, i8)
6124 // < 8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256
6125 // (<16 x i16>, <16 x i16>, <8 x i32>, i8)
6126 // <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512
6127 // (<32 x i16>, <32 x i16>, <16 x i32>, i16)
6128 // < 8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128
6129 // (<16 x i8>, <16 x i8>, <8 x i16>, i8)
6130 // <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256
6131 // (<32 x i8>, <32 x i8>, <16 x i16>, i16)
6132 // <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512
6133 // (<64 x i8>, <64 x i8>, <32 x i16>, i32)
6134 case Intrinsic::x86_sse2_pmadd_wd:
6135 case Intrinsic::x86_avx2_pmadd_wd:
6136 case Intrinsic::x86_avx512_pmaddw_d_512:
6137 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
6138 case Intrinsic::x86_avx2_pmadd_ub_sw:
6139 case Intrinsic::x86_avx512_pmaddubs_w_512:
6140 handleVectorDotProductIntrinsic(I, /*ReductionFactor=*/2,
6141 /*ZeroPurifies=*/true,
6142 /*EltSizeInBits=*/0,
6143 /*Lanes=*/kBothLanes);
6144 break;
6145
6146 // <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>)
6147 case Intrinsic::x86_ssse3_pmadd_ub_sw:
6148 handleVectorDotProductIntrinsic(I, /*ReductionFactor=*/2,
6149 /*ZeroPurifies=*/true,
6150 /*EltSizeInBits=*/8,
6151 /*Lanes=*/kBothLanes);
6152 break;
6153
6154 // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>)
6155 case Intrinsic::x86_mmx_pmadd_wd:
6156 handleVectorDotProductIntrinsic(I, /*ReductionFactor=*/2,
6157 /*ZeroPurifies=*/true,
6158 /*EltSizeInBits=*/16,
6159 /*Lanes=*/kBothLanes);
6160 break;
6161
6162 // BFloat16 multiply-add to single-precision
6163 // <4 x float> llvm.aarch64.neon.bfmlalt
6164 // (<4 x float>, <8 x bfloat>, <8 x bfloat>)
6165 case Intrinsic::aarch64_neon_bfmlalt:
6166 handleVectorDotProductIntrinsic(I, /*ReductionFactor=*/2,
6167 /*ZeroPurifies=*/false,
6168 /*EltSizeInBits=*/0,
6169 /*Lanes=*/kOddLanes);
6170 break;
6171
6172 // <4 x float> llvm.aarch64.neon.bfmlalb
6173 // (<4 x float>, <8 x bfloat>, <8 x bfloat>)
6174 case Intrinsic::aarch64_neon_bfmlalb:
6175 handleVectorDotProductIntrinsic(I, /*ReductionFactor=*/2,
6176 /*ZeroPurifies=*/false,
6177 /*EltSizeInBits=*/0,
6178 /*Lanes=*/kEvenLanes);
6179 break;
6180
6181 // AVX Vector Neural Network Instructions: bytes
6182 //
6183 // Multiply and Add Signed Bytes
6184 // < 4 x i32> @llvm.x86.avx2.vpdpbssd.128
6185 // (< 4 x i32>, <16 x i8>, <16 x i8>)
6186 // < 8 x i32> @llvm.x86.avx2.vpdpbssd.256
6187 // (< 8 x i32>, <32 x i8>, <32 x i8>)
6188 // <16 x i32> @llvm.x86.avx10.vpdpbssd.512
6189 // (<16 x i32>, <64 x i8>, <64 x i8>)
6190 //
6191 // Multiply and Add Signed Bytes With Saturation
6192 // < 4 x i32> @llvm.x86.avx2.vpdpbssds.128
6193 // (< 4 x i32>, <16 x i8>, <16 x i8>)
6194 // < 8 x i32> @llvm.x86.avx2.vpdpbssds.256
6195 // (< 8 x i32>, <32 x i8>, <32 x i8>)
6196 // <16 x i32> @llvm.x86.avx10.vpdpbssds.512
6197 // (<16 x i32>, <64 x i8>, <64 x i8>)
6198 //
6199 // Multiply and Add Signed and Unsigned Bytes
6200 // < 4 x i32> @llvm.x86.avx2.vpdpbsud.128
6201 // (< 4 x i32>, <16 x i8>, <16 x i8>)
6202 // < 8 x i32> @llvm.x86.avx2.vpdpbsud.256
6203 // (< 8 x i32>, <32 x i8>, <32 x i8>)
6204 // <16 x i32> @llvm.x86.avx10.vpdpbsud.512
6205 // (<16 x i32>, <64 x i8>, <64 x i8>)
6206 //
6207 // Multiply and Add Signed and Unsigned Bytes With Saturation
6208 // < 4 x i32> @llvm.x86.avx2.vpdpbsuds.128
6209 // (< 4 x i32>, <16 x i8>, <16 x i8>)
6210 // < 8 x i32> @llvm.x86.avx2.vpdpbsuds.256
6211 // (< 8 x i32>, <32 x i8>, <32 x i8>)
6212 // <16 x i32> @llvm.x86.avx512.vpdpbusds.512
6213 // (<16 x i32>, <64 x i8>, <64 x i8>)
6214 //
6215 // Multiply and Add Unsigned and Signed Bytes
6216 // < 4 x i32> @llvm.x86.avx512.vpdpbusd.128
6217 // (< 4 x i32>, <16 x i8>, <16 x i8>)
6218 // < 8 x i32> @llvm.x86.avx512.vpdpbusd.256
6219 // (< 8 x i32>, <32 x i8>, <32 x i8>)
6220 // <16 x i32> @llvm.x86.avx512.vpdpbusd.512
6221 // (<16 x i32>, <64 x i8>, <64 x i8>)
6222 //
6223 // Multiply and Add Unsigned and Signed Bytes With Saturation
6224 // < 4 x i32> @llvm.x86.avx512.vpdpbusds.128
6225 // (< 4 x i32>, <16 x i8>, <16 x i8>)
6226 // < 8 x i32> @llvm.x86.avx512.vpdpbusds.256
6227 // (< 8 x i32>, <32 x i8>, <32 x i8>)
6228 // <16 x i32> @llvm.x86.avx10.vpdpbsuds.512
6229 // (<16 x i32>, <64 x i8>, <64 x i8>)
6230 //
6231 // Multiply and Add Unsigned Bytes
6232 // < 4 x i32> @llvm.x86.avx2.vpdpbuud.128
6233 // (< 4 x i32>, <16 x i8>, <16 x i8>)
6234 // < 8 x i32> @llvm.x86.avx2.vpdpbuud.256
6235 // (< 8 x i32>, <32 x i8>, <32 x i8>)
6236 // <16 x i32> @llvm.x86.avx10.vpdpbuud.512
6237 // (<16 x i32>, <64 x i8>, <64 x i8>)
6238 //
6239 // Multiply and Add Unsigned Bytes With Saturation
6240 // < 4 x i32> @llvm.x86.avx2.vpdpbuuds.128
6241 // (< 4 x i32>, <16 x i8>, <16 x i8>)
6242 // < 8 x i32> @llvm.x86.avx2.vpdpbuuds.256
6243 // (< 8 x i32>, <32 x i8>, <32 x i8>)
6244 // <16 x i32> @llvm.x86.avx10.vpdpbuuds.512
6245 // (<16 x i32>, <64 x i8>, <64 x i8>)
6246 //
6247 // These intrinsics are auto-upgraded into non-masked forms:
6248 // <4 x i32> @llvm.x86.avx512.mask.vpdpbusd.128
6249 // (<4 x i32>, <16 x i8>, <16 x i8>, i8)
6250 // <4 x i32> @llvm.x86.avx512.maskz.vpdpbusd.128
6251 // (<4 x i32>, <16 x i8>, <16 x i8>, i8)
6252 // <8 x i32> @llvm.x86.avx512.mask.vpdpbusd.256
6253 // (<8 x i32>, <32 x i8>, <32 x i8>, i8)
6254 // <8 x i32> @llvm.x86.avx512.maskz.vpdpbusd.256
6255 // (<8 x i32>, <32 x i8>, <32 x i8>, i8)
6256 // <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512
6257 // (<16 x i32>, <64 x i8>, <64 x i8>, i16)
6258 // <16 x i32> @llvm.x86.avx512.maskz.vpdpbusd.512
6259 // (<16 x i32>, <64 x i8>, <64 x i8>, i16)
6260 //
6261 // <4 x i32> @llvm.x86.avx512.mask.vpdpbusds.128
6262 // (<4 x i32>, <16 x i8>, <16 x i8>, i8)
6263 // <4 x i32> @llvm.x86.avx512.maskz.vpdpbusds.128
6264 // (<4 x i32>, <16 x i8>, <16 x i8>, i8)
6265 // <8 x i32> @llvm.x86.avx512.mask.vpdpbusds.256
6266 // (<8 x i32>, <32 x i8>, <32 x i8>, i8)
6267 // <8 x i32> @llvm.x86.avx512.maskz.vpdpbusds.256
6268 // (<8 x i32>, <32 x i8>, <32 x i8>, i8)
6269 // <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512
6270 // (<16 x i32>, <64 x i8>, <64 x i8>, i16)
6271 // <16 x i32> @llvm.x86.avx512.maskz.vpdpbusds.512
6272 // (<16 x i32>, <64 x i8>, <64 x i8>, i16)
6273 case Intrinsic::x86_avx512_vpdpbusd_128:
6274 case Intrinsic::x86_avx512_vpdpbusd_256:
6275 case Intrinsic::x86_avx512_vpdpbusd_512:
6276 case Intrinsic::x86_avx512_vpdpbusds_128:
6277 case Intrinsic::x86_avx512_vpdpbusds_256:
6278 case Intrinsic::x86_avx512_vpdpbusds_512:
6279 case Intrinsic::x86_avx2_vpdpbssd_128:
6280 case Intrinsic::x86_avx2_vpdpbssd_256:
6281 case Intrinsic::x86_avx10_vpdpbssd_512:
6282 case Intrinsic::x86_avx2_vpdpbssds_128:
6283 case Intrinsic::x86_avx2_vpdpbssds_256:
6284 case Intrinsic::x86_avx10_vpdpbssds_512:
6285 case Intrinsic::x86_avx2_vpdpbsud_128:
6286 case Intrinsic::x86_avx2_vpdpbsud_256:
6287 case Intrinsic::x86_avx10_vpdpbsud_512:
6288 case Intrinsic::x86_avx2_vpdpbsuds_128:
6289 case Intrinsic::x86_avx2_vpdpbsuds_256:
6290 case Intrinsic::x86_avx10_vpdpbsuds_512:
6291 case Intrinsic::x86_avx2_vpdpbuud_128:
6292 case Intrinsic::x86_avx2_vpdpbuud_256:
6293 case Intrinsic::x86_avx10_vpdpbuud_512:
6294 case Intrinsic::x86_avx2_vpdpbuuds_128:
6295 case Intrinsic::x86_avx2_vpdpbuuds_256:
6296 case Intrinsic::x86_avx10_vpdpbuuds_512:
6297 handleVectorDotProductIntrinsic(I, /*ReductionFactor=*/4,
6298 /*ZeroPurifies=*/true,
6299 /*EltSizeInBits=*/0,
6300 /*Lanes=*/kBothLanes);
6301 break;
6302
6303 // AVX Vector Neural Network Instructions: words
6304 //
6305 // Multiply and Add Signed Word Integers
6306 // < 4 x i32> @llvm.x86.avx512.vpdpwssd.128
6307 // (< 4 x i32>, < 8 x i16>, < 8 x i16>)
6308 // < 8 x i32> @llvm.x86.avx512.vpdpwssd.256
6309 // (< 8 x i32>, <16 x i16>, <16 x i16>)
6310 // <16 x i32> @llvm.x86.avx512.vpdpwssd.512
6311 // (<16 x i32>, <32 x i16>, <32 x i16>)
6312 //
6313 // Multiply and Add Signed Word Integers With Saturation
6314 // < 4 x i32> @llvm.x86.avx512.vpdpwssds.128
6315 // (< 4 x i32>, < 8 x i16>, < 8 x i16>)
6316 // < 8 x i32> @llvm.x86.avx512.vpdpwssds.256
6317 // (< 8 x i32>, <16 x i16>, <16 x i16>)
6318 // <16 x i32> @llvm.x86.avx512.vpdpwssds.512
6319 // (<16 x i32>, <32 x i16>, <32 x i16>)
6320 //
6321 // Multiply and Add Signed and Unsigned Word Integers
6322 // < 4 x i32> @llvm.x86.avx2.vpdpwsud.128
6323 // (< 4 x i32>, < 8 x i16>, < 8 x i16>)
6324 // < 8 x i32> @llvm.x86.avx2.vpdpwsud.256
6325 // (< 8 x i32>, <16 x i16>, <16 x i16>)
6326 // <16 x i32> @llvm.x86.avx10.vpdpwsud.512
6327 // (<16 x i32>, <32 x i16>, <32 x i16>)
6328 //
6329 // Multiply and Add Signed and Unsigned Word Integers With Saturation
6330 // < 4 x i32> @llvm.x86.avx2.vpdpwsuds.128
6331 // (< 4 x i32>, < 8 x i16>, < 8 x i16>)
6332 // < 8 x i32> @llvm.x86.avx2.vpdpwsuds.256
6333 // (< 8 x i32>, <16 x i16>, <16 x i16>)
6334 // <16 x i32> @llvm.x86.avx10.vpdpwsuds.512
6335 // (<16 x i32>, <32 x i16>, <32 x i16>)
6336 //
6337 // Multiply and Add Unsigned and Signed Word Integers
6338 // < 4 x i32> @llvm.x86.avx2.vpdpwusd.128
6339 // (< 4 x i32>, < 8 x i16>, < 8 x i16>)
6340 // < 8 x i32> @llvm.x86.avx2.vpdpwusd.256
6341 // (< 8 x i32>, <16 x i16>, <16 x i16>)
6342 // <16 x i32> @llvm.x86.avx10.vpdpwusd.512
6343 // (<16 x i32>, <32 x i16>, <32 x i16>)
6344 //
6345 // Multiply and Add Unsigned and Signed Word Integers With Saturation
6346 // < 4 x i32> @llvm.x86.avx2.vpdpwusds.128
6347 // (< 4 x i32>, < 8 x i16>, < 8 x i16>)
6348 // < 8 x i32> @llvm.x86.avx2.vpdpwusds.256
6349 // (< 8 x i32>, <16 x i16>, <16 x i16>)
6350 // <16 x i32> @llvm.x86.avx10.vpdpwusds.512
6351 // (<16 x i32>, <32 x i16>, <32 x i16>)
6352 //
6353 // Multiply and Add Unsigned and Unsigned Word Integers
6354 // < 4 x i32> @llvm.x86.avx2.vpdpwuud.128
6355 // (< 4 x i32>, < 8 x i16>, < 8 x i16>)
6356 // < 8 x i32> @llvm.x86.avx2.vpdpwuud.256
6357 // (< 8 x i32>, <16 x i16>, <16 x i16>)
6358 // <16 x i32> @llvm.x86.avx10.vpdpwuud.512
6359 // (<16 x i32>, <32 x i16>, <32 x i16>)
6360 //
6361 // Multiply and Add Unsigned and Unsigned Word Integers With Saturation
6362 // < 4 x i32> @llvm.x86.avx2.vpdpwuuds.128
6363 // (< 4 x i32>, < 8 x i16>, < 8 x i16>)
6364 // < 8 x i32> @llvm.x86.avx2.vpdpwuuds.256
6365 // (< 8 x i32>, <16 x i16>, <16 x i16>)
6366 // <16 x i32> @llvm.x86.avx10.vpdpwuuds.512
6367 // (<16 x i32>, <32 x i16>, <32 x i16>)
6368 //
6369 // These intrinsics are auto-upgraded into non-masked forms:
6370 // <4 x i32> @llvm.x86.avx512.mask.vpdpwssd.128
6371 // (<4 x i32>, <8 x i16>, <8 x i16>, i8)
6372 // <4 x i32> @llvm.x86.avx512.maskz.vpdpwssd.128
6373 // (<4 x i32>, <8 x i16>, <8 x i16>, i8)
6374 // <8 x i32> @llvm.x86.avx512.mask.vpdpwssd.256
6375 // (<8 x i32>, <16 x i16>, <16 x i16>, i8)
6376 // <8 x i32> @llvm.x86.avx512.maskz.vpdpwssd.256
6377 // (<8 x i32>, <16 x i16>, <16 x i16>, i8)
6378 // <16 x i32> @llvm.x86.avx512.mask.vpdpwssd.512
6379 // (<16 x i32>, <32 x i16>, <32 x i16>, i16)
6380 // <16 x i32> @llvm.x86.avx512.maskz.vpdpwssd.512
6381 // (<16 x i32>, <32 x i16>, <32 x i16>, i16)
6382 //
6383 // <4 x i32> @llvm.x86.avx512.mask.vpdpwssds.128
6384 // (<4 x i32>, <8 x i16>, <8 x i16>, i8)
6385 // <4 x i32> @llvm.x86.avx512.maskz.vpdpwssds.128
6386 // (<4 x i32>, <8 x i16>, <8 x i16>, i8)
6387 // <8 x i32> @llvm.x86.avx512.mask.vpdpwssds.256
6388 // (<8 x i32>, <16 x i16>, <16 x i16>, i8)
6389 // <8 x i32> @llvm.x86.avx512.maskz.vpdpwssds.256
6390 // (<8 x i32>, <16 x i16>, <16 x i16>, i8)
6391 // <16 x i32> @llvm.x86.avx512.mask.vpdpwssds.512
6392 // (<16 x i32>, <32 x i16>, <32 x i16>, i16)
6393 // <16 x i32> @llvm.x86.avx512.maskz.vpdpwssds.512
6394 // (<16 x i32>, <32 x i16>, <32 x i16>, i16)
6395 case Intrinsic::x86_avx512_vpdpwssd_128:
6396 case Intrinsic::x86_avx512_vpdpwssd_256:
6397 case Intrinsic::x86_avx512_vpdpwssd_512:
6398 case Intrinsic::x86_avx512_vpdpwssds_128:
6399 case Intrinsic::x86_avx512_vpdpwssds_256:
6400 case Intrinsic::x86_avx512_vpdpwssds_512:
6401 case Intrinsic::x86_avx2_vpdpwsud_128:
6402 case Intrinsic::x86_avx2_vpdpwsud_256:
6403 case Intrinsic::x86_avx10_vpdpwsud_512:
6404 case Intrinsic::x86_avx2_vpdpwsuds_128:
6405 case Intrinsic::x86_avx2_vpdpwsuds_256:
6406 case Intrinsic::x86_avx10_vpdpwsuds_512:
6407 case Intrinsic::x86_avx2_vpdpwusd_128:
6408 case Intrinsic::x86_avx2_vpdpwusd_256:
6409 case Intrinsic::x86_avx10_vpdpwusd_512:
6410 case Intrinsic::x86_avx2_vpdpwusds_128:
6411 case Intrinsic::x86_avx2_vpdpwusds_256:
6412 case Intrinsic::x86_avx10_vpdpwusds_512:
6413 case Intrinsic::x86_avx2_vpdpwuud_128:
6414 case Intrinsic::x86_avx2_vpdpwuud_256:
6415 case Intrinsic::x86_avx10_vpdpwuud_512:
6416 case Intrinsic::x86_avx2_vpdpwuuds_128:
6417 case Intrinsic::x86_avx2_vpdpwuuds_256:
6418 case Intrinsic::x86_avx10_vpdpwuuds_512:
6419 handleVectorDotProductIntrinsic(I, /*ReductionFactor=*/2,
6420 /*ZeroPurifies=*/true,
6421 /*EltSizeInBits=*/0,
6422 /*Lanes=*/kBothLanes);
6423 break;
6424
6425 // Dot Product of BF16 Pairs Accumulated Into Packed Single
6426 // Precision
6427 // <4 x float> @llvm.x86.avx512bf16.dpbf16ps.128
6428 // (<4 x float>, <8 x bfloat>, <8 x bfloat>)
6429 // <8 x float> @llvm.x86.avx512bf16.dpbf16ps.256
6430 // (<8 x float>, <16 x bfloat>, <16 x bfloat>)
6431 // <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512
6432 // (<16 x float>, <32 x bfloat>, <32 x bfloat>)
6433 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
6434 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
6435 case Intrinsic::x86_avx512bf16_dpbf16ps_512:
6436 handleVectorDotProductIntrinsic(I, /*ReductionFactor=*/2,
6437 /*ZeroPurifies=*/false,
6438 /*EltSizeInBits=*/0,
6439 /*Lanes=*/kBothLanes);
6440 break;
6441
6442 case Intrinsic::x86_sse_cmp_ss:
6443 case Intrinsic::x86_sse2_cmp_sd:
6444 case Intrinsic::x86_sse_comieq_ss:
6445 case Intrinsic::x86_sse_comilt_ss:
6446 case Intrinsic::x86_sse_comile_ss:
6447 case Intrinsic::x86_sse_comigt_ss:
6448 case Intrinsic::x86_sse_comige_ss:
6449 case Intrinsic::x86_sse_comineq_ss:
6450 case Intrinsic::x86_sse_ucomieq_ss:
6451 case Intrinsic::x86_sse_ucomilt_ss:
6452 case Intrinsic::x86_sse_ucomile_ss:
6453 case Intrinsic::x86_sse_ucomigt_ss:
6454 case Intrinsic::x86_sse_ucomige_ss:
6455 case Intrinsic::x86_sse_ucomineq_ss:
6456 case Intrinsic::x86_sse2_comieq_sd:
6457 case Intrinsic::x86_sse2_comilt_sd:
6458 case Intrinsic::x86_sse2_comile_sd:
6459 case Intrinsic::x86_sse2_comigt_sd:
6460 case Intrinsic::x86_sse2_comige_sd:
6461 case Intrinsic::x86_sse2_comineq_sd:
6462 case Intrinsic::x86_sse2_ucomieq_sd:
6463 case Intrinsic::x86_sse2_ucomilt_sd:
6464 case Intrinsic::x86_sse2_ucomile_sd:
6465 case Intrinsic::x86_sse2_ucomigt_sd:
6466 case Intrinsic::x86_sse2_ucomige_sd:
6467 case Intrinsic::x86_sse2_ucomineq_sd:
6468 handleVectorCompareScalarIntrinsic(I);
6469 break;
6470
6471 case Intrinsic::x86_avx_cmp_pd_256:
6472 case Intrinsic::x86_avx_cmp_ps_256:
6473 case Intrinsic::x86_sse2_cmp_pd:
6474 case Intrinsic::x86_sse_cmp_ps:
6475 handleVectorComparePackedIntrinsic(I, /*PredicateAsOperand=*/true);
6476 break;
6477
6478 case Intrinsic::x86_bmi_bextr_32:
6479 case Intrinsic::x86_bmi_bextr_64:
6480 case Intrinsic::x86_bmi_bzhi_32:
6481 case Intrinsic::x86_bmi_bzhi_64:
6482 case Intrinsic::x86_bmi_pdep_32:
6483 case Intrinsic::x86_bmi_pdep_64:
6484 case Intrinsic::x86_bmi_pext_32:
6485 case Intrinsic::x86_bmi_pext_64:
6486 handleBmiIntrinsic(I);
6487 break;
6488
6489 case Intrinsic::x86_pclmulqdq:
6490 case Intrinsic::x86_pclmulqdq_256:
6491 case Intrinsic::x86_pclmulqdq_512:
6492 handlePclmulIntrinsic(I);
6493 break;
6494
6495 case Intrinsic::x86_avx_round_pd_256:
6496 case Intrinsic::x86_avx_round_ps_256:
6497 case Intrinsic::x86_sse41_round_pd:
6498 case Intrinsic::x86_sse41_round_ps:
6499 handleRoundPdPsIntrinsic(I);
6500 break;
6501
6502 case Intrinsic::x86_sse41_round_sd:
6503 case Intrinsic::x86_sse41_round_ss:
6504 handleUnarySdSsIntrinsic(I);
6505 break;
6506
6507 case Intrinsic::x86_sse2_max_sd:
6508 case Intrinsic::x86_sse_max_ss:
6509 case Intrinsic::x86_sse2_min_sd:
6510 case Intrinsic::x86_sse_min_ss:
6511 handleBinarySdSsIntrinsic(I);
6512 break;
6513
6514 case Intrinsic::x86_avx_vtestc_pd:
6515 case Intrinsic::x86_avx_vtestc_pd_256:
6516 case Intrinsic::x86_avx_vtestc_ps:
6517 case Intrinsic::x86_avx_vtestc_ps_256:
6518 case Intrinsic::x86_avx_vtestnzc_pd:
6519 case Intrinsic::x86_avx_vtestnzc_pd_256:
6520 case Intrinsic::x86_avx_vtestnzc_ps:
6521 case Intrinsic::x86_avx_vtestnzc_ps_256:
6522 case Intrinsic::x86_avx_vtestz_pd:
6523 case Intrinsic::x86_avx_vtestz_pd_256:
6524 case Intrinsic::x86_avx_vtestz_ps:
6525 case Intrinsic::x86_avx_vtestz_ps_256:
6526 case Intrinsic::x86_avx_ptestc_256:
6527 case Intrinsic::x86_avx_ptestnzc_256:
6528 case Intrinsic::x86_avx_ptestz_256:
6529 case Intrinsic::x86_sse41_ptestc:
6530 case Intrinsic::x86_sse41_ptestnzc:
6531 case Intrinsic::x86_sse41_ptestz:
6532 handleVtestIntrinsic(I);
6533 break;
6534
6535 // Packed Horizontal Add/Subtract
6536 case Intrinsic::x86_ssse3_phadd_w:
6537 case Intrinsic::x86_ssse3_phadd_w_128:
6538 case Intrinsic::x86_ssse3_phsub_w:
6539 case Intrinsic::x86_ssse3_phsub_w_128:
6540 handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1,
6541 /*ReinterpretElemWidth=*/16);
6542 break;
6543
6544 case Intrinsic::x86_avx2_phadd_w:
6545 case Intrinsic::x86_avx2_phsub_w:
6546 handlePairwiseShadowOrIntrinsic(I, /*Shards=*/2,
6547 /*ReinterpretElemWidth=*/16);
6548 break;
6549
6550 // Packed Horizontal Add/Subtract
6551 case Intrinsic::x86_ssse3_phadd_d:
6552 case Intrinsic::x86_ssse3_phadd_d_128:
6553 case Intrinsic::x86_ssse3_phsub_d:
6554 case Intrinsic::x86_ssse3_phsub_d_128:
6555 handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1,
6556 /*ReinterpretElemWidth=*/32);
6557 break;
6558
6559 case Intrinsic::x86_avx2_phadd_d:
6560 case Intrinsic::x86_avx2_phsub_d:
6561 handlePairwiseShadowOrIntrinsic(I, /*Shards=*/2,
6562 /*ReinterpretElemWidth=*/32);
6563 break;
6564
6565 // Packed Horizontal Add/Subtract and Saturate
6566 case Intrinsic::x86_ssse3_phadd_sw:
6567 case Intrinsic::x86_ssse3_phadd_sw_128:
6568 case Intrinsic::x86_ssse3_phsub_sw:
6569 case Intrinsic::x86_ssse3_phsub_sw_128:
6570 handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1,
6571 /*ReinterpretElemWidth=*/16);
6572 break;
6573
6574 case Intrinsic::x86_avx2_phadd_sw:
6575 case Intrinsic::x86_avx2_phsub_sw:
6576 handlePairwiseShadowOrIntrinsic(I, /*Shards=*/2,
6577 /*ReinterpretElemWidth=*/16);
6578 break;
6579
6580 // Packed Single/Double Precision Floating-Point Horizontal Add
6581 case Intrinsic::x86_sse3_hadd_ps:
6582 case Intrinsic::x86_sse3_hadd_pd:
6583 case Intrinsic::x86_sse3_hsub_ps:
6584 case Intrinsic::x86_sse3_hsub_pd:
6585 handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1);
6586 break;
6587
6588 case Intrinsic::x86_avx_hadd_pd_256:
6589 case Intrinsic::x86_avx_hadd_ps_256:
6590 case Intrinsic::x86_avx_hsub_pd_256:
6591 case Intrinsic::x86_avx_hsub_ps_256:
6592 handlePairwiseShadowOrIntrinsic(I, /*Shards=*/2);
6593 break;
6594
6595 case Intrinsic::x86_avx_maskstore_ps:
6596 case Intrinsic::x86_avx_maskstore_pd:
6597 case Intrinsic::x86_avx_maskstore_ps_256:
6598 case Intrinsic::x86_avx_maskstore_pd_256:
6599 case Intrinsic::x86_avx2_maskstore_d:
6600 case Intrinsic::x86_avx2_maskstore_q:
6601 case Intrinsic::x86_avx2_maskstore_d_256:
6602 case Intrinsic::x86_avx2_maskstore_q_256: {
6603 handleAVXMaskedStore(I);
6604 break;
6605 }
6606
6607 case Intrinsic::x86_avx_maskload_ps:
6608 case Intrinsic::x86_avx_maskload_pd:
6609 case Intrinsic::x86_avx_maskload_ps_256:
6610 case Intrinsic::x86_avx_maskload_pd_256:
6611 case Intrinsic::x86_avx2_maskload_d:
6612 case Intrinsic::x86_avx2_maskload_q:
6613 case Intrinsic::x86_avx2_maskload_d_256:
6614 case Intrinsic::x86_avx2_maskload_q_256: {
6615 handleAVXMaskedLoad(I);
6616 break;
6617 }
6618
6619 // Packed
6620 case Intrinsic::x86_avx512fp16_add_ph_512:
6621 case Intrinsic::x86_avx512fp16_sub_ph_512:
6622 case Intrinsic::x86_avx512fp16_mul_ph_512:
6623 case Intrinsic::x86_avx512fp16_div_ph_512:
6624 case Intrinsic::x86_avx512fp16_max_ph_512:
6625 case Intrinsic::x86_avx512fp16_min_ph_512:
6626 case Intrinsic::x86_avx512_min_ps_512:
6627 case Intrinsic::x86_avx512_min_pd_512:
6628 case Intrinsic::x86_avx512_max_ps_512:
6629 case Intrinsic::x86_avx512_max_pd_512: {
6630 // These AVX512 variants contain the rounding mode as a trailing flag.
6631 // Earlier variants do not have a trailing flag and are already handled
6632 // by maybeHandleSimpleNomemIntrinsic(I, 0) via
6633 // maybeHandleUnknownIntrinsic.
6634 [[maybe_unused]] bool Success =
6635 maybeHandleSimpleNomemIntrinsic(I, /*trailingFlags=*/1);
6636 assert(Success);
6637 break;
6638 }
6639
6640 case Intrinsic::x86_avx_vpermilvar_pd:
6641 case Intrinsic::x86_avx_vpermilvar_pd_256:
6642 case Intrinsic::x86_avx512_vpermilvar_pd_512:
6643 case Intrinsic::x86_avx_vpermilvar_ps:
6644 case Intrinsic::x86_avx_vpermilvar_ps_256:
6645 case Intrinsic::x86_avx512_vpermilvar_ps_512: {
6646 handleAVXVpermilvar(I);
6647 break;
6648 }
6649
6650 case Intrinsic::x86_avx512_vpermi2var_d_128:
6651 case Intrinsic::x86_avx512_vpermi2var_d_256:
6652 case Intrinsic::x86_avx512_vpermi2var_d_512:
6653 case Intrinsic::x86_avx512_vpermi2var_hi_128:
6654 case Intrinsic::x86_avx512_vpermi2var_hi_256:
6655 case Intrinsic::x86_avx512_vpermi2var_hi_512:
6656 case Intrinsic::x86_avx512_vpermi2var_pd_128:
6657 case Intrinsic::x86_avx512_vpermi2var_pd_256:
6658 case Intrinsic::x86_avx512_vpermi2var_pd_512:
6659 case Intrinsic::x86_avx512_vpermi2var_ps_128:
6660 case Intrinsic::x86_avx512_vpermi2var_ps_256:
6661 case Intrinsic::x86_avx512_vpermi2var_ps_512:
6662 case Intrinsic::x86_avx512_vpermi2var_q_128:
6663 case Intrinsic::x86_avx512_vpermi2var_q_256:
6664 case Intrinsic::x86_avx512_vpermi2var_q_512:
6665 case Intrinsic::x86_avx512_vpermi2var_qi_128:
6666 case Intrinsic::x86_avx512_vpermi2var_qi_256:
6667 case Intrinsic::x86_avx512_vpermi2var_qi_512:
6668 handleAVXVpermi2var(I);
6669 break;
6670
6671 // Packed Shuffle
6672 // llvm.x86.sse.pshuf.w(<1 x i64>, i8)
6673 // llvm.x86.ssse3.pshuf.b(<1 x i64>, <1 x i64>)
6674 // llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
6675 // llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)
6676 // llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>)
6677 //
6678 // The following intrinsics are auto-upgraded:
6679 // llvm.x86.sse2.pshuf.d(<4 x i32>, i8)
6680 // llvm.x86.sse2.gpshufh.w(<8 x i16>, i8)
6681 // llvm.x86.sse2.pshufl.w(<8 x i16>, i8)
6682 case Intrinsic::x86_avx2_pshuf_b:
6683 case Intrinsic::x86_sse_pshuf_w:
6684 case Intrinsic::x86_ssse3_pshuf_b_128:
6685 case Intrinsic::x86_ssse3_pshuf_b:
6686 case Intrinsic::x86_avx512_pshuf_b_512:
6687 handleIntrinsicByApplyingToShadow(I, shadowIntrinsicID: I.getIntrinsicID(),
6688 /*trailingVerbatimArgs=*/1);
6689 break;
6690
6691 // AVX512 PMOV: Packed MOV, with truncation
6692 // Precisely handled by applying the same intrinsic to the shadow
6693 case Intrinsic::x86_avx512_mask_pmov_dw_128:
6694 case Intrinsic::x86_avx512_mask_pmov_db_128:
6695 case Intrinsic::x86_avx512_mask_pmov_qb_128:
6696 case Intrinsic::x86_avx512_mask_pmov_qw_128:
6697 case Intrinsic::x86_avx512_mask_pmov_qd_128:
6698 case Intrinsic::x86_avx512_mask_pmov_wb_128:
6699 case Intrinsic::x86_avx512_mask_pmov_dw_256:
6700 case Intrinsic::x86_avx512_mask_pmov_db_256:
6701 case Intrinsic::x86_avx512_mask_pmov_qb_256:
6702 case Intrinsic::x86_avx512_mask_pmov_qw_256:
6703 case Intrinsic::x86_avx512_mask_pmov_dw_512:
6704 case Intrinsic::x86_avx512_mask_pmov_db_512:
6705 case Intrinsic::x86_avx512_mask_pmov_qb_512:
6706 case Intrinsic::x86_avx512_mask_pmov_qw_512: {
6707 // Intrinsic::x86_avx512_mask_pmov_{qd,wb}_{256,512} were removed in
6708 // f608dc1f5775ee880e8ea30e2d06ab5a4a935c22
6709 handleIntrinsicByApplyingToShadow(I, shadowIntrinsicID: I.getIntrinsicID(),
6710 /*trailingVerbatimArgs=*/1);
6711 break;
6712 }
6713
6714 // AVX512 PMOV{S,US}: Packed MOV, with signed/unsigned saturation
6715 // Approximately handled using the corresponding truncation intrinsic
6716 // TODO: improve handleAVX512VectorDownConvert to precisely model saturation
6717 case Intrinsic::x86_avx512_mask_pmovs_dw_512:
6718 case Intrinsic::x86_avx512_mask_pmovus_dw_512: {
6719 handleIntrinsicByApplyingToShadow(I,
6720 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_dw_512,
6721 /*trailingVerbatimArgs=*/1);
6722 break;
6723 }
6724
6725 case Intrinsic::x86_avx512_mask_pmovs_dw_256:
6726 case Intrinsic::x86_avx512_mask_pmovus_dw_256:
6727 handleIntrinsicByApplyingToShadow(I,
6728 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_dw_256,
6729 /*trailingVerbatimArgs=*/1);
6730 break;
6731
6732 case Intrinsic::x86_avx512_mask_pmovs_dw_128:
6733 case Intrinsic::x86_avx512_mask_pmovus_dw_128:
6734 handleIntrinsicByApplyingToShadow(I,
6735 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_dw_128,
6736 /*trailingVerbatimArgs=*/1);
6737 break;
6738
6739 case Intrinsic::x86_avx512_mask_pmovs_db_512:
6740 case Intrinsic::x86_avx512_mask_pmovus_db_512: {
6741 handleIntrinsicByApplyingToShadow(I,
6742 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_db_512,
6743 /*trailingVerbatimArgs=*/1);
6744 break;
6745 }
6746
6747 case Intrinsic::x86_avx512_mask_pmovs_db_256:
6748 case Intrinsic::x86_avx512_mask_pmovus_db_256:
6749 handleIntrinsicByApplyingToShadow(I,
6750 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_db_256,
6751 /*trailingVerbatimArgs=*/1);
6752 break;
6753
6754 case Intrinsic::x86_avx512_mask_pmovs_db_128:
6755 case Intrinsic::x86_avx512_mask_pmovus_db_128:
6756 handleIntrinsicByApplyingToShadow(I,
6757 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_db_128,
6758 /*trailingVerbatimArgs=*/1);
6759 break;
6760
6761 case Intrinsic::x86_avx512_mask_pmovs_qb_512:
6762 case Intrinsic::x86_avx512_mask_pmovus_qb_512: {
6763 handleIntrinsicByApplyingToShadow(I,
6764 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_qb_512,
6765 /*trailingVerbatimArgs=*/1);
6766 break;
6767 }
6768
6769 case Intrinsic::x86_avx512_mask_pmovs_qb_256:
6770 case Intrinsic::x86_avx512_mask_pmovus_qb_256:
6771 handleIntrinsicByApplyingToShadow(I,
6772 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_qb_256,
6773 /*trailingVerbatimArgs=*/1);
6774 break;
6775
6776 case Intrinsic::x86_avx512_mask_pmovs_qb_128:
6777 case Intrinsic::x86_avx512_mask_pmovus_qb_128:
6778 handleIntrinsicByApplyingToShadow(I,
6779 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_qb_128,
6780 /*trailingVerbatimArgs=*/1);
6781 break;
6782
6783 case Intrinsic::x86_avx512_mask_pmovs_qw_512:
6784 case Intrinsic::x86_avx512_mask_pmovus_qw_512: {
6785 handleIntrinsicByApplyingToShadow(I,
6786 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_qw_512,
6787 /*trailingVerbatimArgs=*/1);
6788 break;
6789 }
6790
6791 case Intrinsic::x86_avx512_mask_pmovs_qw_256:
6792 case Intrinsic::x86_avx512_mask_pmovus_qw_256:
6793 handleIntrinsicByApplyingToShadow(I,
6794 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_qw_256,
6795 /*trailingVerbatimArgs=*/1);
6796 break;
6797
6798 case Intrinsic::x86_avx512_mask_pmovs_qw_128:
6799 case Intrinsic::x86_avx512_mask_pmovus_qw_128:
6800 handleIntrinsicByApplyingToShadow(I,
6801 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_qw_128,
6802 /*trailingVerbatimArgs=*/1);
6803 break;
6804
6805 case Intrinsic::x86_avx512_mask_pmovs_qd_128:
6806 case Intrinsic::x86_avx512_mask_pmovus_qd_128:
6807 handleIntrinsicByApplyingToShadow(I,
6808 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_qd_128,
6809 /*trailingVerbatimArgs=*/1);
6810 break;
6811
6812 case Intrinsic::x86_avx512_mask_pmovs_wb_128:
6813 case Intrinsic::x86_avx512_mask_pmovus_wb_128:
6814 handleIntrinsicByApplyingToShadow(I,
6815 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_wb_128,
6816 /*trailingVerbatimArgs=*/1);
6817 break;
6818
6819 case Intrinsic::x86_avx512_mask_pmovs_qd_256:
6820 case Intrinsic::x86_avx512_mask_pmovus_qd_256:
6821 case Intrinsic::x86_avx512_mask_pmovs_wb_256:
6822 case Intrinsic::x86_avx512_mask_pmovus_wb_256:
6823 case Intrinsic::x86_avx512_mask_pmovs_qd_512:
6824 case Intrinsic::x86_avx512_mask_pmovus_qd_512:
6825 case Intrinsic::x86_avx512_mask_pmovs_wb_512:
6826 case Intrinsic::x86_avx512_mask_pmovus_wb_512: {
6827 // Since Intrinsic::x86_avx512_mask_pmov_{qd,wb}_{256,512} do not exist,
6828 // we cannot use handleIntrinsicByApplyingToShadow. Instead, we call the
6829 // slow-path handler.
6830 handleAVX512VectorDownConvert(I);
6831 break;
6832 }
6833
6834 // AVX512/AVX10 Reciprocal
6835 // <16 x float> @llvm.x86.avx512.rsqrt14.ps.512
6836 // (<16 x float>, <16 x float>, i16)
6837 // <8 x float> @llvm.x86.avx512.rsqrt14.ps.256
6838 // (<8 x float>, <8 x float>, i8)
6839 // <4 x float> @llvm.x86.avx512.rsqrt14.ps.128
6840 // (<4 x float>, <4 x float>, i8)
6841 //
6842 // <8 x double> @llvm.x86.avx512.rsqrt14.pd.512
6843 // (<8 x double>, <8 x double>, i8)
6844 // <4 x double> @llvm.x86.avx512.rsqrt14.pd.256
6845 // (<4 x double>, <4 x double>, i8)
6846 // <2 x double> @llvm.x86.avx512.rsqrt14.pd.128
6847 // (<2 x double>, <2 x double>, i8)
6848 //
6849 // <32 x bfloat> @llvm.x86.avx10.mask.rsqrt.bf16.512
6850 // (<32 x bfloat>, <32 x bfloat>, i32)
6851 // <16 x bfloat> @llvm.x86.avx10.mask.rsqrt.bf16.256
6852 // (<16 x bfloat>, <16 x bfloat>, i16)
6853 // <8 x bfloat> @llvm.x86.avx10.mask.rsqrt.bf16.128
6854 // (<8 x bfloat>, <8 x bfloat>, i8)
6855 //
6856 // <32 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.512
6857 // (<32 x half>, <32 x half>, i32)
6858 // <16 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.256
6859 // (<16 x half>, <16 x half>, i16)
6860 // <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.128
6861 // (<8 x half>, <8 x half>, i8)
6862 //
6863 // TODO: 3-operand variants are not handled:
6864 // <2 x double> @llvm.x86.avx512.rsqrt14.sd
6865 // (<2 x double>, <2 x double>, <2 x double>, i8)
6866 // <4 x float> @llvm.x86.avx512.rsqrt14.ss
6867 // (<4 x float>, <4 x float>, <4 x float>, i8)
6868 // <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh
6869 // (<8 x half>, <8 x half>, <8 x half>, i8)
6870 case Intrinsic::x86_avx512_rsqrt14_ps_512:
6871 case Intrinsic::x86_avx512_rsqrt14_ps_256:
6872 case Intrinsic::x86_avx512_rsqrt14_ps_128:
6873 case Intrinsic::x86_avx512_rsqrt14_pd_512:
6874 case Intrinsic::x86_avx512_rsqrt14_pd_256:
6875 case Intrinsic::x86_avx512_rsqrt14_pd_128:
6876 case Intrinsic::x86_avx10_mask_rsqrt_bf16_512:
6877 case Intrinsic::x86_avx10_mask_rsqrt_bf16_256:
6878 case Intrinsic::x86_avx10_mask_rsqrt_bf16_128:
6879 case Intrinsic::x86_avx512fp16_mask_rsqrt_ph_512:
6880 case Intrinsic::x86_avx512fp16_mask_rsqrt_ph_256:
6881 case Intrinsic::x86_avx512fp16_mask_rsqrt_ph_128:
6882 handleAVX512VectorGenericMaskedFP(I, /*DataIndices=*/{0},
6883 /*WriteThruIndex=*/1,
6884 /*MaskIndex=*/2);
6885 break;
6886
6887 // AVX512/AVX10 Reciprocal Square Root
6888 // <16 x float> @llvm.x86.avx512.rcp14.ps.512
6889 // (<16 x float>, <16 x float>, i16)
6890 // <8 x float> @llvm.x86.avx512.rcp14.ps.256
6891 // (<8 x float>, <8 x float>, i8)
6892 // <4 x float> @llvm.x86.avx512.rcp14.ps.128
6893 // (<4 x float>, <4 x float>, i8)
6894 //
6895 // <8 x double> @llvm.x86.avx512.rcp14.pd.512
6896 // (<8 x double>, <8 x double>, i8)
6897 // <4 x double> @llvm.x86.avx512.rcp14.pd.256
6898 // (<4 x double>, <4 x double>, i8)
6899 // <2 x double> @llvm.x86.avx512.rcp14.pd.128
6900 // (<2 x double>, <2 x double>, i8)
6901 //
6902 // <32 x bfloat> @llvm.x86.avx10.mask.rcp.bf16.512
6903 // (<32 x bfloat>, <32 x bfloat>, i32)
6904 // <16 x bfloat> @llvm.x86.avx10.mask.rcp.bf16.256
6905 // (<16 x bfloat>, <16 x bfloat>, i16)
6906 // <8 x bfloat> @llvm.x86.avx10.mask.rcp.bf16.128
6907 // (<8 x bfloat>, <8 x bfloat>, i8)
6908 //
6909 // <32 x half> @llvm.x86.avx512fp16.mask.rcp.ph.512
6910 // (<32 x half>, <32 x half>, i32)
6911 // <16 x half> @llvm.x86.avx512fp16.mask.rcp.ph.256
6912 // (<16 x half>, <16 x half>, i16)
6913 // <8 x half> @llvm.x86.avx512fp16.mask.rcp.ph.128
6914 // (<8 x half>, <8 x half>, i8)
6915 //
6916 // TODO: 3-operand variants are not handled:
6917 // <2 x double> @llvm.x86.avx512.rcp14.sd
6918 // (<2 x double>, <2 x double>, <2 x double>, i8)
6919 // <4 x float> @llvm.x86.avx512.rcp14.ss
6920 // (<4 x float>, <4 x float>, <4 x float>, i8)
6921 // <8 x half> @llvm.x86.avx512fp16.mask.rcp.sh
6922 // (<8 x half>, <8 x half>, <8 x half>, i8)
6923 case Intrinsic::x86_avx512_rcp14_ps_512:
6924 case Intrinsic::x86_avx512_rcp14_ps_256:
6925 case Intrinsic::x86_avx512_rcp14_ps_128:
6926 case Intrinsic::x86_avx512_rcp14_pd_512:
6927 case Intrinsic::x86_avx512_rcp14_pd_256:
6928 case Intrinsic::x86_avx512_rcp14_pd_128:
6929 case Intrinsic::x86_avx10_mask_rcp_bf16_512:
6930 case Intrinsic::x86_avx10_mask_rcp_bf16_256:
6931 case Intrinsic::x86_avx10_mask_rcp_bf16_128:
6932 case Intrinsic::x86_avx512fp16_mask_rcp_ph_512:
6933 case Intrinsic::x86_avx512fp16_mask_rcp_ph_256:
6934 case Intrinsic::x86_avx512fp16_mask_rcp_ph_128:
6935 handleAVX512VectorGenericMaskedFP(I, /*DataIndices=*/{0},
6936 /*WriteThruIndex=*/1,
6937 /*MaskIndex=*/2);
6938 break;
6939
6940 // <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512
6941 // (<32 x half>, i32, <32 x half>, i32, i32)
6942 // <16 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.256
6943 // (<16 x half>, i32, <16 x half>, i32, i16)
6944 // <8 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.128
6945 // (<8 x half>, i32, <8 x half>, i32, i8)
6946 //
6947 // <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512
6948 // (<16 x float>, i32, <16 x float>, i16, i32)
6949 // <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256
6950 // (<8 x float>, i32, <8 x float>, i8)
6951 // <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128
6952 // (<4 x float>, i32, <4 x float>, i8)
6953 //
6954 // <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512
6955 // (<8 x double>, i32, <8 x double>, i8, i32)
6956 // A Imm WriteThru Mask Rounding
6957 // <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256
6958 // (<4 x double>, i32, <4 x double>, i8)
6959 // <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128
6960 // (<2 x double>, i32, <2 x double>, i8)
6961 // A Imm WriteThru Mask
6962 //
6963 // <32 x bfloat> @llvm.x86.avx10.mask.rndscale.bf16.512
6964 // (<32 x bfloat>, i32, <32 x bfloat>, i32)
6965 // <16 x bfloat> @llvm.x86.avx10.mask.rndscale.bf16.256
6966 // (<16 x bfloat>, i32, <16 x bfloat>, i16)
6967 // <8 x bfloat> @llvm.x86.avx10.mask.rndscale.bf16.128
6968 // (<8 x bfloat>, i32, <8 x bfloat>, i8)
6969 //
6970 // Not supported: three vectors
6971 // - <8 x half> @llvm.x86.avx512fp16.mask.rndscale.sh
6972 // (<8 x half>, <8 x half>,<8 x half>, i8, i32, i32)
6973 // - <4 x float> @llvm.x86.avx512.mask.rndscale.ss
6974 // (<4 x float>, <4 x float>, <4 x float>, i8, i32, i32)
6975 // - <2 x double> @llvm.x86.avx512.mask.rndscale.sd
6976 // (<2 x double>, <2 x double>, <2 x double>, i8, i32,
6977 // i32)
6978 // A B WriteThru Mask Imm
6979 // Rounding
6980 case Intrinsic::x86_avx512fp16_mask_rndscale_ph_512:
6981 case Intrinsic::x86_avx512fp16_mask_rndscale_ph_256:
6982 case Intrinsic::x86_avx512fp16_mask_rndscale_ph_128:
6983 case Intrinsic::x86_avx512_mask_rndscale_ps_512:
6984 case Intrinsic::x86_avx512_mask_rndscale_ps_256:
6985 case Intrinsic::x86_avx512_mask_rndscale_ps_128:
6986 case Intrinsic::x86_avx512_mask_rndscale_pd_512:
6987 case Intrinsic::x86_avx512_mask_rndscale_pd_256:
6988 case Intrinsic::x86_avx512_mask_rndscale_pd_128:
6989 case Intrinsic::x86_avx10_mask_rndscale_bf16_512:
6990 case Intrinsic::x86_avx10_mask_rndscale_bf16_256:
6991 case Intrinsic::x86_avx10_mask_rndscale_bf16_128:
6992 handleAVX512VectorGenericMaskedFP(I, /*DataIndices=*/{0},
6993 /*WriteThruIndex=*/2,
6994 /*MaskIndex=*/3);
6995 break;
6996
6997 // AVX512 Vector Scale Float* Packed
6998 //
6999 // < 8 x double> @llvm.x86.avx512.mask.scalef.pd.512
7000 // (<8 x double>, <8 x double>, <8 x double>, i8, i32)
7001 // A B WriteThru Msk Round
7002 // < 4 x double> @llvm.x86.avx512.mask.scalef.pd.256
7003 // (<4 x double>, <4 x double>, <4 x double>, i8)
7004 // < 2 x double> @llvm.x86.avx512.mask.scalef.pd.128
7005 // (<2 x double>, <2 x double>, <2 x double>, i8)
7006 //
7007 // <16 x float> @llvm.x86.avx512.mask.scalef.ps.512
7008 // (<16 x float>, <16 x float>, <16 x float>, i16, i32)
7009 // < 8 x float> @llvm.x86.avx512.mask.scalef.ps.256
7010 // (<8 x float>, <8 x float>, <8 x float>, i8)
7011 // < 4 x float> @llvm.x86.avx512.mask.scalef.ps.128
7012 // (<4 x float>, <4 x float>, <4 x float>, i8)
7013 //
7014 // <32 x half> @llvm.x86.avx512fp16.mask.scalef.ph.512
7015 // (<32 x half>, <32 x half>, <32 x half>, i32, i32)
7016 // <16 x half> @llvm.x86.avx512fp16.mask.scalef.ph.256
7017 // (<16 x half>, <16 x half>, <16 x half>, i16)
7018 // < 8 x half> @llvm.x86.avx512fp16.mask.scalef.ph.128
7019 // (<8 x half>, <8 x half>, <8 x half>, i8)
7020 //
7021 // TODO: AVX10
7022 // <32 x bfloat> @llvm.x86.avx10.mask.scalef.bf16.512
7023 // (<32 x bfloat>, <32 x bfloat>, <32 x bfloat>, i32)
7024 // <16 x bfloat> @llvm.x86.avx10.mask.scalef.bf16.256
7025 // (<16 x bfloat>, <16 x bfloat>, <16 x bfloat>, i16)
7026 // < 8 x bfloat> @llvm.x86.avx10.mask.scalef.bf16.128
7027 // (<8 x bfloat>, <8 x bfloat>, <8 x bfloat>, i8)
7028 case Intrinsic::x86_avx512_mask_scalef_pd_512:
7029 case Intrinsic::x86_avx512_mask_scalef_pd_256:
7030 case Intrinsic::x86_avx512_mask_scalef_pd_128:
7031 case Intrinsic::x86_avx512_mask_scalef_ps_512:
7032 case Intrinsic::x86_avx512_mask_scalef_ps_256:
7033 case Intrinsic::x86_avx512_mask_scalef_ps_128:
7034 case Intrinsic::x86_avx512fp16_mask_scalef_ph_512:
7035 case Intrinsic::x86_avx512fp16_mask_scalef_ph_256:
7036 case Intrinsic::x86_avx512fp16_mask_scalef_ph_128:
7037 // The AVX512 512-bit operand variants have an extra operand (the
7038 // Rounding mode). The extra operand, if present, will be
7039 // automatically checked by the handler.
7040 handleAVX512VectorGenericMaskedFP(I, /*DataIndices=*/{0, 1},
7041 /*WriteThruIndex=*/2,
7042 /*MaskIndex=*/3);
7043 break;
7044
7045 // TODO: AVX512 Vector Scale Float* Scalar
7046 //
7047 // This is different from the Packed variant, because some bits are copied,
7048 // and some bits are zeroed.
7049 //
7050 // < 4 x float> @llvm.x86.avx512.mask.scalef.ss
7051 // (<4 x float>, <4 x float>, <4 x float>, i8, i32)
7052 //
7053 // < 2 x double> @llvm.x86.avx512.mask.scalef.sd
7054 // (<2 x double>, <2 x double>, <2 x double>, i8, i32)
7055 //
7056 // < 8 x half> @llvm.x86.avx512fp16.mask.scalef.sh
7057 // (<8 x half>, <8 x half>, <8 x half>, i8, i32)
7058
7059 // AVX512 FP16 Arithmetic
7060 case Intrinsic::x86_avx512fp16_mask_add_sh_round:
7061 case Intrinsic::x86_avx512fp16_mask_sub_sh_round:
7062 case Intrinsic::x86_avx512fp16_mask_mul_sh_round:
7063 case Intrinsic::x86_avx512fp16_mask_div_sh_round:
7064 case Intrinsic::x86_avx512fp16_mask_max_sh_round:
7065 case Intrinsic::x86_avx512fp16_mask_min_sh_round: {
7066 visitGenericScalarHalfwordInst(I);
7067 break;
7068 }
7069
7070 // AVX Galois Field New Instructions
7071 case Intrinsic::x86_vgf2p8affineqb_128:
7072 case Intrinsic::x86_vgf2p8affineqb_256:
7073 case Intrinsic::x86_vgf2p8affineqb_512:
7074 handleAVXGF2P8Affine(I);
7075 break;
7076
7077 default:
7078 return false;
7079 }
7080
7081 return true;
7082 }
7083
7084 bool maybeHandleArmSIMDIntrinsic(IntrinsicInst &I) {
7085 switch (I.getIntrinsicID()) {
7086 // Two operands e.g.,
7087 // - <8 x i8> @llvm.aarch64.neon.rshrn.v8i8 (<8 x i16>, i32)
7088 // - <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>)
7089 case Intrinsic::aarch64_neon_rshrn:
7090 case Intrinsic::aarch64_neon_sqrshl:
7091 case Intrinsic::aarch64_neon_sqrshrn:
7092 case Intrinsic::aarch64_neon_sqrshrun:
7093 case Intrinsic::aarch64_neon_sqshl:
7094 case Intrinsic::aarch64_neon_sqshlu:
7095 case Intrinsic::aarch64_neon_sqshrn:
7096 case Intrinsic::aarch64_neon_sqshrun:
7097 case Intrinsic::aarch64_neon_srshl:
7098 case Intrinsic::aarch64_neon_sshl:
7099 case Intrinsic::aarch64_neon_uqrshl:
7100 case Intrinsic::aarch64_neon_uqrshrn:
7101 case Intrinsic::aarch64_neon_uqshl:
7102 case Intrinsic::aarch64_neon_uqshrn:
7103 case Intrinsic::aarch64_neon_urshl:
7104 case Intrinsic::aarch64_neon_ushl:
7105 handleVectorShiftIntrinsic(I, /* Variable */ false);
7106 break;
7107
7108 // Vector Shift Left/Right and Insert
7109 //
7110 // Three operands e.g.,
7111 // - <4 x i16> @llvm.aarch64.neon.vsli.v4i16
7112 // (<4 x i16> %a, <4 x i16> %b, i32 %n)
7113 // - <16 x i8> @llvm.aarch64.neon.vsri.v16i8
7114 // (<16 x i8> %a, <16 x i8> %b, i32 %n)
7115 //
7116 // %b is shifted by %n bits, and the "missing" bits are filled in with %a
7117 // (instead of zero-extending/sign-extending).
7118 case Intrinsic::aarch64_neon_vsli:
7119 case Intrinsic::aarch64_neon_vsri:
7120 handleIntrinsicByApplyingToShadow(I, shadowIntrinsicID: I.getIntrinsicID(),
7121 /*trailingVerbatimArgs=*/1);
7122 break;
7123
7124 // TODO: handling max/min similarly to AND/OR may be more precise
7125 // Floating-Point Maximum/Minimum Pairwise
7126 case Intrinsic::aarch64_neon_fmaxp:
7127 case Intrinsic::aarch64_neon_fminp:
7128 // Floating-Point Maximum/Minimum Number Pairwise
7129 case Intrinsic::aarch64_neon_fmaxnmp:
7130 case Intrinsic::aarch64_neon_fminnmp:
7131 // Signed/Unsigned Maximum/Minimum Pairwise
7132 case Intrinsic::aarch64_neon_smaxp:
7133 case Intrinsic::aarch64_neon_sminp:
7134 case Intrinsic::aarch64_neon_umaxp:
7135 case Intrinsic::aarch64_neon_uminp:
7136 // Add Pairwise
7137 case Intrinsic::aarch64_neon_addp:
7138 // Floating-point Add Pairwise
7139 case Intrinsic::aarch64_neon_faddp:
7140 // Add Long Pairwise
7141 case Intrinsic::aarch64_neon_saddlp:
7142 case Intrinsic::aarch64_neon_uaddlp: {
7143 handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1);
7144 break;
7145 }
7146
7147 // Floating-point Convert to integer, rounding to nearest with ties to Away
7148 case Intrinsic::aarch64_neon_fcvtas:
7149 case Intrinsic::aarch64_neon_fcvtau:
7150 // Floating-point convert to integer, rounding toward minus infinity
7151 case Intrinsic::aarch64_neon_fcvtms:
7152 case Intrinsic::aarch64_neon_fcvtmu:
7153 // Floating-point convert to integer, rounding to nearest with ties to even
7154 case Intrinsic::aarch64_neon_fcvtns:
7155 case Intrinsic::aarch64_neon_fcvtnu:
7156 // Floating-point convert to integer, rounding toward plus infinity
7157 case Intrinsic::aarch64_neon_fcvtps:
7158 case Intrinsic::aarch64_neon_fcvtpu:
7159 // Floating-point Convert to integer, rounding toward Zero
7160 case Intrinsic::aarch64_neon_fcvtzs:
7161 case Intrinsic::aarch64_neon_fcvtzu:
7162 // Floating-point convert to lower precision narrow, rounding to odd
7163 case Intrinsic::aarch64_neon_fcvtxn:
7164 // Vector Conversions Between Half-Precision and Single-Precision
7165 case Intrinsic::aarch64_neon_vcvthf2fp:
7166 case Intrinsic::aarch64_neon_vcvtfp2hf:
7167 handleNEONVectorConvertIntrinsic(I, /*FixedPoint=*/false);
7168 break;
7169
7170 // Vector Conversions Between Fixed-Point and Floating-Point
7171 case Intrinsic::aarch64_neon_vcvtfxs2fp:
7172 case Intrinsic::aarch64_neon_vcvtfp2fxs:
7173 case Intrinsic::aarch64_neon_vcvtfxu2fp:
7174 case Intrinsic::aarch64_neon_vcvtfp2fxu:
7175 handleNEONVectorConvertIntrinsic(I, /*FixedPoint=*/true);
7176 break;
7177
7178 // TODO: bfloat conversions
7179 // - bfloat @llvm.aarch64.neon.bfcvt(float)
7180 // - <8 x bfloat> @llvm.aarch64.neon.bfcvtn(<4 x float>)
7181 // - <8 x bfloat> @llvm.aarch64.neon.bfcvtn2(<8 x bfloat>, <4 x float>)
7182
7183 // Add reduction to scalar
7184 case Intrinsic::aarch64_neon_faddv:
7185 case Intrinsic::aarch64_neon_saddv:
7186 case Intrinsic::aarch64_neon_uaddv:
7187 // Signed/Unsigned min/max (Vector)
7188 // TODO: handling similarly to AND/OR may be more precise.
7189 case Intrinsic::aarch64_neon_smaxv:
7190 case Intrinsic::aarch64_neon_sminv:
7191 case Intrinsic::aarch64_neon_umaxv:
7192 case Intrinsic::aarch64_neon_uminv:
7193 // Floating-point min/max (vector)
7194 // The f{min,max}"nm"v variants handle NaN differently than f{min,max}v,
7195 // but our shadow propagation is the same.
7196 case Intrinsic::aarch64_neon_fmaxv:
7197 case Intrinsic::aarch64_neon_fminv:
7198 case Intrinsic::aarch64_neon_fmaxnmv:
7199 case Intrinsic::aarch64_neon_fminnmv:
7200 // Sum long across vector
7201 case Intrinsic::aarch64_neon_saddlv:
7202 case Intrinsic::aarch64_neon_uaddlv:
7203 handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/true);
7204 break;
7205
7206 case Intrinsic::aarch64_neon_ld1x2:
7207 case Intrinsic::aarch64_neon_ld1x3:
7208 case Intrinsic::aarch64_neon_ld1x4:
7209 case Intrinsic::aarch64_neon_ld2:
7210 case Intrinsic::aarch64_neon_ld3:
7211 case Intrinsic::aarch64_neon_ld4:
7212 case Intrinsic::aarch64_neon_ld2r:
7213 case Intrinsic::aarch64_neon_ld3r:
7214 case Intrinsic::aarch64_neon_ld4r: {
7215 handleNEONVectorLoad(I, /*WithLane=*/false);
7216 break;
7217 }
7218
7219 case Intrinsic::aarch64_neon_ld2lane:
7220 case Intrinsic::aarch64_neon_ld3lane:
7221 case Intrinsic::aarch64_neon_ld4lane: {
7222 handleNEONVectorLoad(I, /*WithLane=*/true);
7223 break;
7224 }
7225
7226 // Saturating extract narrow
7227 case Intrinsic::aarch64_neon_sqxtn:
7228 case Intrinsic::aarch64_neon_sqxtun:
7229 case Intrinsic::aarch64_neon_uqxtn:
7230 // These only have one argument, but we (ab)use handleShadowOr because it
7231 // does work on single argument intrinsics and will typecast the shadow
7232 // (and update the origin).
7233 handleShadowOr(I);
7234 break;
7235
7236 case Intrinsic::aarch64_neon_st1x2:
7237 case Intrinsic::aarch64_neon_st1x3:
7238 case Intrinsic::aarch64_neon_st1x4:
7239 case Intrinsic::aarch64_neon_st2:
7240 case Intrinsic::aarch64_neon_st3:
7241 case Intrinsic::aarch64_neon_st4: {
7242 handleNEONVectorStoreIntrinsic(I, useLane: false);
7243 break;
7244 }
7245
7246 case Intrinsic::aarch64_neon_st2lane:
7247 case Intrinsic::aarch64_neon_st3lane:
7248 case Intrinsic::aarch64_neon_st4lane: {
7249 handleNEONVectorStoreIntrinsic(I, useLane: true);
7250 break;
7251 }
7252
7253 // Arm NEON vector table intrinsics have the source/table register(s) as
7254 // arguments, followed by the index register. They return the output.
7255 //
7256 // 'TBL writes a zero if an index is out-of-range, while TBX leaves the
7257 // original value unchanged in the destination register.'
7258 // Conveniently, zero denotes a clean shadow, which means out-of-range
7259 // indices for TBL will initialize the user data with zero and also clean
7260 // the shadow. (For TBX, neither the user data nor the shadow will be
7261 // updated, which is also correct.)
7262 case Intrinsic::aarch64_neon_tbl1:
7263 case Intrinsic::aarch64_neon_tbl2:
7264 case Intrinsic::aarch64_neon_tbl3:
7265 case Intrinsic::aarch64_neon_tbl4:
7266 case Intrinsic::aarch64_neon_tbx1:
7267 case Intrinsic::aarch64_neon_tbx2:
7268 case Intrinsic::aarch64_neon_tbx3:
7269 case Intrinsic::aarch64_neon_tbx4: {
7270 // The last trailing argument (index register) should be handled verbatim
7271 handleIntrinsicByApplyingToShadow(
7272 I, /*shadowIntrinsicID=*/I.getIntrinsicID(),
7273 /*trailingVerbatimArgs*/ 1);
7274 break;
7275 }
7276
7277 case Intrinsic::aarch64_neon_fmulx:
7278 case Intrinsic::aarch64_neon_pmul:
7279 case Intrinsic::aarch64_neon_pmull:
7280 case Intrinsic::aarch64_neon_smull:
7281 case Intrinsic::aarch64_neon_pmull64:
7282 case Intrinsic::aarch64_neon_umull: {
7283 handleNEONVectorMultiplyIntrinsic(I);
7284 break;
7285 }
7286
7287 case Intrinsic::aarch64_neon_smmla:
7288 case Intrinsic::aarch64_neon_ummla:
7289 case Intrinsic::aarch64_neon_usmmla:
7290 case Intrinsic::aarch64_neon_bfmmla:
7291 handleNEONMatrixMultiply(I);
7292 break;
7293
7294 // <2 x i32> @llvm.aarch64.neon.{u,s,us}dot.v2i32.v8i8
7295 // (<2 x i32> %acc, <8 x i8> %a, <8 x i8> %b)
7296 // <4 x i32> @llvm.aarch64.neon.{u,s,us}dot.v4i32.v16i8
7297 // (<4 x i32> %acc, <16 x i8> %a, <16 x i8> %b)
7298 case Intrinsic::aarch64_neon_sdot:
7299 case Intrinsic::aarch64_neon_udot:
7300 case Intrinsic::aarch64_neon_usdot:
7301 handleVectorDotProductIntrinsic(I, /*ReductionFactor=*/4,
7302 /*ZeroPurifies=*/true,
7303 /*EltSizeInBits=*/0,
7304 /*Lanes=*/kBothLanes);
7305 break;
7306
7307 // <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v4bf16
7308 // (<2 x float> %acc, <4 x bfloat> %a, <4 x bfloat> %b)
7309 // <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v8bf16
7310 // (<4 x float> %acc, <8 x bfloat> %a, <8 x bfloat> %b)
7311 case Intrinsic::aarch64_neon_bfdot:
7312 handleVectorDotProductIntrinsic(I, /*ReductionFactor=*/2,
7313 /*ZeroPurifies=*/false,
7314 /*EltSizeInBits=*/0,
7315 /*Lanes=*/kBothLanes);
7316 break;
7317
7318 // Floating-Point Absolute Compare Greater Than/Equal
7319 case Intrinsic::aarch64_neon_facge:
7320 case Intrinsic::aarch64_neon_facgt:
7321 handleVectorComparePackedIntrinsic(I, /*PredicateAsOperand=*/false);
7322 break;
7323
7324 default:
7325 return false;
7326 }
7327
7328 return true;
7329 }
7330
7331 void visitIntrinsicInst(IntrinsicInst &I) {
7332 if (maybeHandleCrossPlatformIntrinsic(I))
7333 return;
7334
7335 if (maybeHandleX86SIMDIntrinsic(I))
7336 return;
7337
7338 if (maybeHandleArmSIMDIntrinsic(I))
7339 return;
7340
7341 if (maybeHandleUnknownIntrinsic(I))
7342 return;
7343
7344 visitInstruction(I);
7345 }
7346
7347 void visitLibAtomicLoad(CallBase &CB) {
7348 // Since we use getNextNode here, we can't have CB terminate the BB.
7349 assert(isa<CallInst>(CB));
7350
7351 IRBuilder<> IRB(&CB);
7352 Value *Size = CB.getArgOperand(i: 0);
7353 Value *SrcPtr = CB.getArgOperand(i: 1);
7354 Value *DstPtr = CB.getArgOperand(i: 2);
7355 Value *Ordering = CB.getArgOperand(i: 3);
7356 // Convert the call to have at least Acquire ordering to make sure
7357 // the shadow operations aren't reordered before it.
7358 Value *NewOrdering =
7359 IRB.CreateExtractElement(Vec: makeAddAcquireOrderingTable(IRB), Idx: Ordering);
7360 CB.setArgOperand(i: 3, v: NewOrdering);
7361
7362 NextNodeIRBuilder NextIRB(&CB);
7363 Value *SrcShadowPtr, *SrcOriginPtr;
7364 std::tie(args&: SrcShadowPtr, args&: SrcOriginPtr) =
7365 getShadowOriginPtr(Addr: SrcPtr, IRB&: NextIRB, ShadowTy: NextIRB.getInt8Ty(), Alignment: Align(1),
7366 /*isStore*/ false);
7367 Value *DstShadowPtr =
7368 getShadowOriginPtr(Addr: DstPtr, IRB&: NextIRB, ShadowTy: NextIRB.getInt8Ty(), Alignment: Align(1),
7369 /*isStore*/ true)
7370 .first;
7371
7372 NextIRB.CreateMemCpy(Dst: DstShadowPtr, DstAlign: Align(1), Src: SrcShadowPtr, SrcAlign: Align(1), Size);
7373 if (MS.TrackOrigins) {
7374 Value *SrcOrigin = NextIRB.CreateAlignedLoad(Ty: MS.OriginTy, Ptr: SrcOriginPtr,
7375 Align: kMinOriginAlignment);
7376 Value *NewOrigin = updateOrigin(V: SrcOrigin, IRB&: NextIRB);
7377 NextIRB.CreateCall(Callee: MS.MsanSetOriginFn, Args: {DstPtr, Size, NewOrigin});
7378 }
7379 }
7380
7381 void visitLibAtomicStore(CallBase &CB) {
7382 IRBuilder<> IRB(&CB);
7383 Value *Size = CB.getArgOperand(i: 0);
7384 Value *DstPtr = CB.getArgOperand(i: 2);
7385 Value *Ordering = CB.getArgOperand(i: 3);
7386 // Convert the call to have at least Release ordering to make sure
7387 // the shadow operations aren't reordered after it.
7388 Value *NewOrdering =
7389 IRB.CreateExtractElement(Vec: makeAddReleaseOrderingTable(IRB), Idx: Ordering);
7390 CB.setArgOperand(i: 3, v: NewOrdering);
7391
7392 Value *DstShadowPtr =
7393 getShadowOriginPtr(Addr: DstPtr, IRB, ShadowTy: IRB.getInt8Ty(), Alignment: Align(1),
7394 /*isStore*/ true)
7395 .first;
7396
7397 // Atomic store always paints clean shadow/origin. See file header.
7398 IRB.CreateMemSet(Ptr: DstShadowPtr, Val: getCleanShadow(OrigTy: IRB.getInt8Ty()), Size,
7399 Align: Align(1));
7400 }
7401
7402 void visitCallBase(CallBase &CB) {
7403 assert(!CB.getMetadata(LLVMContext::MD_nosanitize));
7404 if (CB.isInlineAsm()) {
7405 // For inline asm (either a call to asm function, or callbr instruction),
7406 // do the usual thing: check argument shadow and mark all outputs as
7407 // clean. Note that any side effects of the inline asm that are not
7408 // immediately visible in its constraints are not handled.
7409 if (ClHandleAsmConservative)
7410 visitAsmInstruction(I&: CB);
7411 else
7412 visitInstruction(I&: CB);
7413 return;
7414 }
7415 LibFunc LF;
7416 if (TLI->getLibFunc(CB, F&: LF)) {
7417 // libatomic.a functions need to have special handling because there isn't
7418 // a good way to intercept them or compile the library with
7419 // instrumentation.
7420 switch (LF) {
7421 case LibFunc_atomic_load:
7422 if (!isa<CallInst>(Val: CB)) {
7423 llvm::errs() << "MSAN -- cannot instrument invoke of libatomic load."
7424 "Ignoring!\n";
7425 break;
7426 }
7427 visitLibAtomicLoad(CB);
7428 return;
7429 case LibFunc_atomic_store:
7430 visitLibAtomicStore(CB);
7431 return;
7432 default:
7433 break;
7434 }
7435 }
7436
7437 if (auto *Call = dyn_cast<CallInst>(Val: &CB)) {
7438 assert(!isa<IntrinsicInst>(Call) && "intrinsics are handled elsewhere");
7439
7440 // We are going to insert code that relies on the fact that the callee
7441 // will become a non-readonly function after it is instrumented by us. To
7442 // prevent this code from being optimized out, mark that function
7443 // non-readonly in advance.
7444 // TODO: We can likely do better than dropping memory() completely here.
7445 AttributeMask B;
7446 B.addAttribute(Val: Attribute::Memory).addAttribute(Val: Attribute::Speculatable);
7447
7448 Call->removeFnAttrs(AttrsToRemove: B);
7449 if (Function *Func = Call->getCalledFunction()) {
7450 Func->removeFnAttrs(Attrs: B);
7451 }
7452
7453 maybeMarkSanitizerLibraryCallNoBuiltin(CI: Call, TLI);
7454 }
7455 IRBuilder<> IRB(&CB);
7456 bool MayCheckCall = MS.EagerChecks;
7457 if (Function *Func = CB.getCalledFunction()) {
7458 // __sanitizer_unaligned_{load,store} functions may be called by users
7459 // and always expects shadows in the TLS. So don't check them.
7460 MayCheckCall &= !Func->getName().starts_with(Prefix: "__sanitizer_unaligned_");
7461 }
7462
7463 unsigned ArgOffset = 0;
7464 LLVM_DEBUG(dbgs() << " CallSite: " << CB << "\n");
7465 for (const auto &[i, A] : llvm::enumerate(First: CB.args())) {
7466 if (!A->getType()->isSized()) {
7467 LLVM_DEBUG(dbgs() << "Arg " << i << " is not sized: " << CB << "\n");
7468 continue;
7469 }
7470
7471 if (A->getType()->isScalableTy()) {
7472 LLVM_DEBUG(dbgs() << "Arg " << i << " is vscale: " << CB << "\n");
7473 // Handle as noundef, but don't reserve tls slots.
7474 insertCheckShadowOf(Val: A, OrigIns: &CB);
7475 continue;
7476 }
7477
7478 unsigned Size = 0;
7479 const DataLayout &DL = F.getDataLayout();
7480
7481 bool ByVal = CB.paramHasAttr(ArgNo: i, Kind: Attribute::ByVal);
7482 bool NoUndef = CB.paramHasAttr(ArgNo: i, Kind: Attribute::NoUndef);
7483 bool EagerCheck = MayCheckCall && !ByVal && NoUndef;
7484
7485 if (EagerCheck) {
7486 insertCheckShadowOf(Val: A, OrigIns: &CB);
7487 Size = DL.getTypeAllocSize(Ty: A->getType());
7488 } else {
7489 [[maybe_unused]] Value *Store = nullptr;
7490 // Compute the Shadow for arg even if it is ByVal, because
7491 // in that case getShadow() will copy the actual arg shadow to
7492 // __msan_param_tls.
7493 Value *ArgShadow = getShadow(V: A);
7494 Value *ArgShadowBase = getShadowPtrForArgument(IRB, ArgOffset);
7495 LLVM_DEBUG(dbgs() << " Arg#" << i << ": " << *A
7496 << " Shadow: " << *ArgShadow << "\n");
7497 if (ByVal) {
7498 // ByVal requires some special handling as it's too big for a single
7499 // load
7500 assert(A->getType()->isPointerTy() &&
7501 "ByVal argument is not a pointer!");
7502 Size = DL.getTypeAllocSize(Ty: CB.getParamByValType(ArgNo: i));
7503 if (ArgOffset + Size > kParamTLSSize)
7504 break;
7505 const MaybeAlign ParamAlignment(CB.getParamAlign(ArgNo: i));
7506 MaybeAlign Alignment = std::nullopt;
7507 if (ParamAlignment)
7508 Alignment = std::min(a: *ParamAlignment, b: kShadowTLSAlignment);
7509 Value *AShadowPtr, *AOriginPtr;
7510 std::tie(args&: AShadowPtr, args&: AOriginPtr) =
7511 getShadowOriginPtr(Addr: A, IRB, ShadowTy: IRB.getInt8Ty(), Alignment,
7512 /*isStore*/ false);
7513 if (!PropagateShadow) {
7514 Store = IRB.CreateMemSet(Ptr: ArgShadowBase,
7515 Val: Constant::getNullValue(Ty: IRB.getInt8Ty()),
7516 Size, Align: Alignment);
7517 } else {
7518 Store = IRB.CreateMemCpy(Dst: ArgShadowBase, DstAlign: Alignment, Src: AShadowPtr,
7519 SrcAlign: Alignment, Size);
7520 if (MS.TrackOrigins) {
7521 Value *ArgOriginBase = getOriginPtrForArgument(IRB, ArgOffset);
7522 // FIXME: OriginSize should be:
7523 // alignTo(A % kMinOriginAlignment + Size, kMinOriginAlignment)
7524 unsigned OriginSize = alignTo(Size, A: kMinOriginAlignment);
7525 IRB.CreateMemCpy(
7526 Dst: ArgOriginBase,
7527 /* by origin_tls[ArgOffset] */ DstAlign: kMinOriginAlignment,
7528 Src: AOriginPtr,
7529 /* by getShadowOriginPtr */ SrcAlign: kMinOriginAlignment, Size: OriginSize);
7530 }
7531 }
7532 } else {
7533 // Any other parameters mean we need bit-grained tracking of uninit
7534 // data
7535 Size = DL.getTypeAllocSize(Ty: A->getType());
7536 if (ArgOffset + Size > kParamTLSSize)
7537 break;
7538 Store = IRB.CreateAlignedStore(Val: ArgShadow, Ptr: ArgShadowBase,
7539 Align: kShadowTLSAlignment);
7540 Constant *Cst = dyn_cast<Constant>(Val: ArgShadow);
7541 if (MS.TrackOrigins && !(Cst && Cst->isNullValue())) {
7542 IRB.CreateStore(Val: getOrigin(V: A),
7543 Ptr: getOriginPtrForArgument(IRB, ArgOffset));
7544 }
7545 }
7546 assert(Store != nullptr);
7547 LLVM_DEBUG(dbgs() << " Param:" << *Store << "\n");
7548 }
7549 assert(Size != 0);
7550 ArgOffset += alignTo(Size, A: kShadowTLSAlignment);
7551 }
7552 LLVM_DEBUG(dbgs() << " done with call args\n");
7553
7554 FunctionType *FT = CB.getFunctionType();
7555 if (FT->isVarArg()) {
7556 VAHelper->visitCallBase(CB, IRB);
7557 }
7558
7559 // Now, get the shadow for the RetVal.
7560 if (!CB.getType()->isSized())
7561 return;
7562 // Don't emit the epilogue for musttail call returns.
7563 if (isa<CallInst>(Val: CB) && cast<CallInst>(Val&: CB).isMustTailCall())
7564 return;
7565
7566 if (MayCheckCall && CB.hasRetAttr(Kind: Attribute::NoUndef)) {
7567 setShadow(V: &CB, SV: getCleanShadow(V: &CB));
7568 setOrigin(V: &CB, Origin: getCleanOrigin());
7569 return;
7570 }
7571
7572 IRBuilder<> IRBBefore(&CB);
7573 // Until we have full dynamic coverage, make sure the retval shadow is 0.
7574 Value *Base = getShadowPtrForRetval(IRB&: IRBBefore);
7575 IRBBefore.CreateAlignedStore(Val: getCleanShadow(V: &CB), Ptr: Base,
7576 Align: kShadowTLSAlignment);
7577 BasicBlock::iterator NextInsn;
7578 if (isa<CallInst>(Val: CB)) {
7579 NextInsn = ++CB.getIterator();
7580 assert(NextInsn != CB.getParent()->end());
7581 } else {
7582 BasicBlock *NormalDest = cast<InvokeInst>(Val&: CB).getNormalDest();
7583 if (!NormalDest->getSinglePredecessor()) {
7584 // FIXME: this case is tricky, so we are just conservative here.
7585 // Perhaps we need to split the edge between this BB and NormalDest,
7586 // but a naive attempt to use SplitEdge leads to a crash.
7587 setShadow(V: &CB, SV: getCleanShadow(V: &CB));
7588 setOrigin(V: &CB, Origin: getCleanOrigin());
7589 return;
7590 }
7591 // FIXME: NextInsn is likely in a basic block that has not been visited
7592 // yet. Anything inserted there will be instrumented by MSan later!
7593 NextInsn = NormalDest->getFirstInsertionPt();
7594 assert(NextInsn != NormalDest->end() &&
7595 "Could not find insertion point for retval shadow load");
7596 }
7597 IRBuilder<> IRBAfter(&*NextInsn);
7598 Value *RetvalShadow = IRBAfter.CreateAlignedLoad(
7599 Ty: getShadowTy(V: &CB), Ptr: getShadowPtrForRetval(IRB&: IRBAfter), Align: kShadowTLSAlignment,
7600 Name: "_msret");
7601 setShadow(V: &CB, SV: RetvalShadow);
7602 if (MS.TrackOrigins)
7603 setOrigin(V: &CB, Origin: IRBAfter.CreateLoad(Ty: MS.OriginTy, Ptr: getOriginPtrForRetval()));
7604 }
7605
7606 bool isAMustTailRetVal(Value *RetVal) {
7607 if (auto *I = dyn_cast<BitCastInst>(Val: RetVal)) {
7608 RetVal = I->getOperand(i_nocapture: 0);
7609 }
7610 if (auto *I = dyn_cast<CallInst>(Val: RetVal)) {
7611 return I->isMustTailCall();
7612 }
7613 return false;
7614 }
7615
7616 void visitReturnInst(ReturnInst &I) {
7617 IRBuilder<> IRB(&I);
7618 Value *RetVal = I.getReturnValue();
7619 if (!RetVal)
7620 return;
7621 // Don't emit the epilogue for musttail call returns.
7622 if (isAMustTailRetVal(RetVal))
7623 return;
7624 Value *ShadowPtr = getShadowPtrForRetval(IRB);
7625 bool HasNoUndef = F.hasRetAttribute(Kind: Attribute::NoUndef);
7626 bool StoreShadow = !(MS.EagerChecks && HasNoUndef);
7627 // FIXME: Consider using SpecialCaseList to specify a list of functions that
7628 // must always return fully initialized values. For now, we hardcode "main".
7629 bool EagerCheck = (MS.EagerChecks && HasNoUndef) || (F.getName() == "main");
7630
7631 Value *Shadow = getShadow(V: RetVal);
7632 bool StoreOrigin = true;
7633 if (EagerCheck) {
7634 insertCheckShadowOf(Val: RetVal, OrigIns: &I);
7635 Shadow = getCleanShadow(V: RetVal);
7636 StoreOrigin = false;
7637 }
7638
7639 // The caller may still expect information passed over TLS if we pass our
7640 // check
7641 if (StoreShadow) {
7642 IRB.CreateAlignedStore(Val: Shadow, Ptr: ShadowPtr, Align: kShadowTLSAlignment);
7643 if (MS.TrackOrigins && StoreOrigin)
7644 IRB.CreateStore(Val: getOrigin(V: RetVal), Ptr: getOriginPtrForRetval());
7645 }
7646 }
7647
7648 void visitPHINode(PHINode &I) {
7649 IRBuilder<> IRB(&I);
7650 if (!PropagateShadow) {
7651 setShadow(V: &I, SV: getCleanShadow(V: &I));
7652 setOrigin(V: &I, Origin: getCleanOrigin());
7653 return;
7654 }
7655
7656 ShadowPHINodes.push_back(Elt: &I);
7657 setShadow(V: &I, SV: IRB.CreatePHI(Ty: getShadowTy(V: &I), NumReservedValues: I.getNumIncomingValues(),
7658 Name: "_msphi_s"));
7659 if (MS.TrackOrigins)
7660 setOrigin(
7661 V: &I, Origin: IRB.CreatePHI(Ty: MS.OriginTy, NumReservedValues: I.getNumIncomingValues(), Name: "_msphi_o"));
7662 }
7663
7664 Value *getLocalVarIdptr(AllocaInst &I) {
7665 ConstantInt *IntConst =
7666 ConstantInt::get(Ty: Type::getInt32Ty(C&: (*F.getParent()).getContext()), V: 0);
7667 return new GlobalVariable(*F.getParent(), IntConst->getType(),
7668 /*isConstant=*/false, GlobalValue::PrivateLinkage,
7669 IntConst);
7670 }
7671
7672 Value *getLocalVarDescription(AllocaInst &I) {
7673 return createPrivateConstGlobalForString(M&: *F.getParent(), Str: I.getName());
7674 }
7675
7676 void poisonAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
7677 if (PoisonStack && ClPoisonStackWithCall) {
7678 IRB.CreateCall(Callee: MS.MsanPoisonStackFn, Args: {&I, Len});
7679 } else {
7680 Value *ShadowBase, *OriginBase;
7681 std::tie(args&: ShadowBase, args&: OriginBase) = getShadowOriginPtr(
7682 Addr: &I, IRB, ShadowTy: IRB.getInt8Ty(), Alignment: Align(1), /*isStore*/ true);
7683
7684 Value *PoisonValue = IRB.getInt8(C: PoisonStack ? ClPoisonStackPattern : 0);
7685 IRB.CreateMemSet(Ptr: ShadowBase, Val: PoisonValue, Size: Len, Align: I.getAlign());
7686 }
7687
7688 if (PoisonStack && MS.TrackOrigins) {
7689 Value *Idptr = getLocalVarIdptr(I);
7690 if (ClPrintStackNames) {
7691 Value *Descr = getLocalVarDescription(I);
7692 IRB.CreateCall(Callee: MS.MsanSetAllocaOriginWithDescriptionFn,
7693 Args: {&I, Len, Idptr, Descr});
7694 } else {
7695 IRB.CreateCall(Callee: MS.MsanSetAllocaOriginNoDescriptionFn, Args: {&I, Len, Idptr});
7696 }
7697 }
7698 }
7699
7700 void poisonAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
7701 Value *Descr = getLocalVarDescription(I);
7702 if (PoisonStack) {
7703 IRB.CreateCall(Callee: MS.MsanPoisonAllocaFn, Args: {&I, Len, Descr});
7704 } else {
7705 IRB.CreateCall(Callee: MS.MsanUnpoisonAllocaFn, Args: {&I, Len});
7706 }
7707 }
7708
7709 void instrumentAlloca(AllocaInst &I, Instruction *InsPoint = nullptr) {
7710 if (!InsPoint)
7711 InsPoint = &I;
7712 NextNodeIRBuilder IRB(InsPoint);
7713 Value *Len = IRB.CreateAllocationSize(DestTy: MS.IntptrTy, AI: &I);
7714
7715 if (MS.CompileKernel)
7716 poisonAllocaKmsan(I, IRB, Len);
7717 else
7718 poisonAllocaUserspace(I, IRB, Len);
7719 }
7720
7721 void visitAllocaInst(AllocaInst &I) {
7722 setShadow(V: &I, SV: getCleanShadow(V: &I));
7723 setOrigin(V: &I, Origin: getCleanOrigin());
7724 // We'll get to this alloca later unless it's poisoned at the corresponding
7725 // llvm.lifetime.start.
7726 AllocaSet.insert(X: &I);
7727 }
7728
7729 void visitSelectInst(SelectInst &I) {
7730 // a = select b, c, d
7731 Value *B = I.getCondition();
7732 Value *C = I.getTrueValue();
7733 Value *D = I.getFalseValue();
7734
7735 handleSelectLikeInst(I, B, C, D);
7736 }
7737
7738 void handleSelectLikeInst(Instruction &I, Value *B, Value *C, Value *D) {
7739 IRBuilder<> IRB(&I);
7740
7741 Value *Sb = getShadow(V: B);
7742 Value *Sc = getShadow(V: C);
7743 Value *Sd = getShadow(V: D);
7744
7745 Value *Ob = MS.TrackOrigins ? getOrigin(V: B) : nullptr;
7746 Value *Oc = MS.TrackOrigins ? getOrigin(V: C) : nullptr;
7747 Value *Od = MS.TrackOrigins ? getOrigin(V: D) : nullptr;
7748
7749 // Result shadow if condition shadow is 0.
7750 Value *Sa0 = IRB.CreateSelect(C: B, True: Sc, False: Sd);
7751 Value *Sa1;
7752 if (I.getType()->isAggregateType()) {
7753 // To avoid "sign extending" i1 to an arbitrary aggregate type, we just do
7754 // an extra "select". This results in much more compact IR.
7755 // Sa = select Sb, poisoned, (select b, Sc, Sd)
7756 Sa1 = getPoisonedShadow(ShadowTy: getShadowTy(OrigTy: I.getType()));
7757 } else if (isScalableNonVectorType(Ty: I.getType())) {
7758 // This is intended to handle target("aarch64.svcount"), which can't be
7759 // handled in the else branch because of incompatibility with CreateXor
7760 // ("The supported LLVM operations on this type are limited to load,
7761 // store, phi, select and alloca instructions").
7762
7763 // TODO: this currently underapproximates. Use Arm SVE EOR in the else
7764 // branch as needed instead.
7765 Sa1 = getCleanShadow(OrigTy: getShadowTy(OrigTy: I.getType()));
7766 } else {
7767 // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ]
7768 // If Sb (condition is poisoned), look for bits in c and d that are equal
7769 // and both unpoisoned.
7770 // If !Sb (condition is unpoisoned), simply pick one of Sc and Sd.
7771
7772 // Cast arguments to shadow-compatible type.
7773 C = CreateAppToShadowCast(IRB, V: C);
7774 D = CreateAppToShadowCast(IRB, V: D);
7775
7776 // Result shadow if condition shadow is 1.
7777 Sa1 = IRB.CreateOr(Ops: {IRB.CreateXor(LHS: C, RHS: D), Sc, Sd});
7778 }
7779 Value *Sa = IRB.CreateSelect(C: Sb, True: Sa1, False: Sa0, Name: "_msprop_select");
7780 setShadow(V: &I, SV: Sa);
7781 if (MS.TrackOrigins) {
7782 // Origins are always i32, so any vector conditions must be flattened.
7783 // FIXME: consider tracking vector origins for app vectors?
7784 if (B->getType()->isVectorTy()) {
7785 B = convertToBool(V: B, IRB);
7786 Sb = convertToBool(V: Sb, IRB);
7787 }
7788 // a = select b, c, d
7789 // Oa = Sb ? Ob : (b ? Oc : Od)
7790 setOrigin(V: &I, Origin: IRB.CreateSelect(C: Sb, True: Ob, False: IRB.CreateSelect(C: B, True: Oc, False: Od)));
7791 }
7792 }
7793
7794 void visitLandingPadInst(LandingPadInst &I) {
7795 // Do nothing.
7796 // See https://github.com/google/sanitizers/issues/504
7797 setShadow(V: &I, SV: getCleanShadow(V: &I));
7798 setOrigin(V: &I, Origin: getCleanOrigin());
7799 }
7800
7801 void visitCatchSwitchInst(CatchSwitchInst &I) {
7802 setShadow(V: &I, SV: getCleanShadow(V: &I));
7803 setOrigin(V: &I, Origin: getCleanOrigin());
7804 }
7805
7806 void visitFuncletPadInst(FuncletPadInst &I) {
7807 setShadow(V: &I, SV: getCleanShadow(V: &I));
7808 setOrigin(V: &I, Origin: getCleanOrigin());
7809 }
7810
7811 void visitGetElementPtrInst(GetElementPtrInst &I) { handleShadowOr(I); }
7812
7813 void visitExtractValueInst(ExtractValueInst &I) {
7814 IRBuilder<> IRB(&I);
7815 Value *Agg = I.getAggregateOperand();
7816 LLVM_DEBUG(dbgs() << "ExtractValue: " << I << "\n");
7817 Value *AggShadow = getShadow(V: Agg);
7818 LLVM_DEBUG(dbgs() << " AggShadow: " << *AggShadow << "\n");
7819 Value *ResShadow = IRB.CreateExtractValue(Agg: AggShadow, Idxs: I.getIndices());
7820 LLVM_DEBUG(dbgs() << " ResShadow: " << *ResShadow << "\n");
7821 setShadow(V: &I, SV: ResShadow);
7822 setOriginForNaryOp(I);
7823 }
7824
7825 void visitInsertValueInst(InsertValueInst &I) {
7826 IRBuilder<> IRB(&I);
7827 LLVM_DEBUG(dbgs() << "InsertValue: " << I << "\n");
7828 Value *AggShadow = getShadow(V: I.getAggregateOperand());
7829 Value *InsShadow = getShadow(V: I.getInsertedValueOperand());
7830 LLVM_DEBUG(dbgs() << " AggShadow: " << *AggShadow << "\n");
7831 LLVM_DEBUG(dbgs() << " InsShadow: " << *InsShadow << "\n");
7832 Value *Res = IRB.CreateInsertValue(Agg: AggShadow, Val: InsShadow, Idxs: I.getIndices());
7833 LLVM_DEBUG(dbgs() << " Res: " << *Res << "\n");
7834 setShadow(V: &I, SV: Res);
7835 setOriginForNaryOp(I);
7836 }
7837
7838 void dumpInst(Instruction &I, const Twine &Prefix) {
7839 // Instruction name only
7840 // For intrinsics, the full/overloaded name is used
7841 //
7842 // e.g., "call llvm.aarch64.neon.uqsub.v16i8"
7843 if (CallInst *CI = dyn_cast<CallInst>(Val: &I)) {
7844 errs() << "ZZZ:" << Prefix << " call "
7845 << CI->getCalledFunction()->getName() << "\n";
7846 } else {
7847 errs() << "ZZZ:" << Prefix << " " << I.getOpcodeName() << "\n";
7848 }
7849
7850 // Instruction prototype (including return type and parameter types)
7851 // For intrinsics, we use the base/non-overloaded name
7852 //
7853 // e.g., "call <16 x i8> @llvm.aarch64.neon.uqsub(<16 x i8>, <16 x i8>)"
7854 unsigned NumOperands = I.getNumOperands();
7855 if (CallInst *CI = dyn_cast<CallInst>(Val: &I)) {
7856 errs() << "YYY:" << Prefix << " call " << *I.getType() << " @";
7857
7858 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: CI))
7859 errs() << Intrinsic::getBaseName(id: II->getIntrinsicID());
7860 else
7861 errs() << CI->getCalledFunction()->getName();
7862
7863 errs() << "(";
7864
7865 // The last operand of a CallInst is the function itself.
7866 NumOperands--;
7867 } else
7868 errs() << "YYY:" << Prefix << " " << *I.getType() << " "
7869 << I.getOpcodeName() << "(";
7870
7871 for (size_t i = 0; i < NumOperands; i++) {
7872 if (i > 0)
7873 errs() << ", ";
7874
7875 errs() << *(I.getOperand(i)->getType());
7876 }
7877
7878 errs() << ")\n";
7879
7880 // Full instruction, including types and operand values
7881 // For intrinsics, the full/overloaded name is used
7882 //
7883 // e.g., "%vqsubq_v.i15 = call noundef <16 x i8>
7884 // @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %vext21.i,
7885 // <16 x i8> splat (i8 1)), !dbg !66"
7886 errs() << "QQQ:" << Prefix << " " << I << "\n";
7887 }
7888
7889 void visitResumeInst(ResumeInst &I) {
7890 LLVM_DEBUG(dbgs() << "Resume: " << I << "\n");
7891 // Nothing to do here.
7892 }
7893
7894 void visitCleanupReturnInst(CleanupReturnInst &CRI) {
7895 LLVM_DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n");
7896 // Nothing to do here.
7897 }
7898
7899 void visitCatchReturnInst(CatchReturnInst &CRI) {
7900 LLVM_DEBUG(dbgs() << "CatchReturn: " << CRI << "\n");
7901 // Nothing to do here.
7902 }
7903
7904 void instrumentAsmArgument(Value *Operand, Type *ElemTy, Instruction &I,
7905 IRBuilder<> &IRB, const DataLayout &DL,
7906 bool isOutput) {
7907 // For each assembly argument, we check its value for being initialized.
7908 // If the argument is a pointer, we assume it points to a single element
7909 // of the corresponding type (or to a 8-byte word, if the type is unsized).
7910 // Each such pointer is instrumented with a call to the runtime library.
7911 Type *OpType = Operand->getType();
7912 // Check the operand value itself.
7913 insertCheckShadowOf(Val: Operand, OrigIns: &I);
7914 if (!OpType->isPointerTy() || !isOutput) {
7915 assert(!isOutput);
7916 return;
7917 }
7918 if (!ElemTy->isSized())
7919 return;
7920 auto Size = DL.getTypeStoreSize(Ty: ElemTy);
7921 Value *SizeVal = IRB.CreateTypeSize(Ty: MS.IntptrTy, Size);
7922 if (MS.CompileKernel) {
7923 IRB.CreateCall(Callee: MS.MsanInstrumentAsmStoreFn, Args: {Operand, SizeVal});
7924 } else {
7925 // ElemTy, derived from elementtype(), does not encode the alignment of
7926 // the pointer. Conservatively assume that the shadow memory is unaligned.
7927 // When Size is large, avoid StoreInst as it would expand to many
7928 // instructions.
7929 auto [ShadowPtr, _] =
7930 getShadowOriginPtrUserspace(Addr: Operand, IRB, ShadowTy: IRB.getInt8Ty(), Alignment: Align(1));
7931 if (Size <= 32)
7932 IRB.CreateAlignedStore(Val: getCleanShadow(OrigTy: ElemTy), Ptr: ShadowPtr, Align: Align(1));
7933 else
7934 IRB.CreateMemSet(Ptr: ShadowPtr, Val: ConstantInt::getNullValue(Ty: IRB.getInt8Ty()),
7935 Size: SizeVal, Align: Align(1));
7936 }
7937 }
7938
7939 /// Get the number of output arguments returned by pointers.
7940 int getNumOutputArgs(InlineAsm *IA, CallBase *CB) {
7941 int NumRetOutputs = 0;
7942 int NumOutputs = 0;
7943 Type *RetTy = cast<Value>(Val: CB)->getType();
7944 if (!RetTy->isVoidTy()) {
7945 // Register outputs are returned via the CallInst return value.
7946 auto *ST = dyn_cast<StructType>(Val: RetTy);
7947 if (ST)
7948 NumRetOutputs = ST->getNumElements();
7949 else
7950 NumRetOutputs = 1;
7951 }
7952 InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
7953 for (const InlineAsm::ConstraintInfo &Info : Constraints) {
7954 switch (Info.Type) {
7955 case InlineAsm::isOutput:
7956 NumOutputs++;
7957 break;
7958 default:
7959 break;
7960 }
7961 }
7962 return NumOutputs - NumRetOutputs;
7963 }
7964
7965 void visitAsmInstruction(Instruction &I) {
7966 // Conservative inline assembly handling: check for poisoned shadow of
7967 // asm() arguments, then unpoison the result and all the memory locations
7968 // pointed to by those arguments.
7969 // An inline asm() statement in C++ contains lists of input and output
7970 // arguments used by the assembly code. These are mapped to operands of the
7971 // CallInst as follows:
7972 // - nR register outputs ("=r) are returned by value in a single structure
7973 // (SSA value of the CallInst);
7974 // - nO other outputs ("=m" and others) are returned by pointer as first
7975 // nO operands of the CallInst;
7976 // - nI inputs ("r", "m" and others) are passed to CallInst as the
7977 // remaining nI operands.
7978 // The total number of asm() arguments in the source is nR+nO+nI, and the
7979 // corresponding CallInst has nO+nI+1 operands (the last operand is the
7980 // function to be called).
7981 const DataLayout &DL = F.getDataLayout();
7982 CallBase *CB = cast<CallBase>(Val: &I);
7983 IRBuilder<> IRB(&I);
7984 InlineAsm *IA = cast<InlineAsm>(Val: CB->getCalledOperand());
7985 int OutputArgs = getNumOutputArgs(IA, CB);
7986 // The last operand of a CallInst is the function itself.
7987 int NumOperands = CB->getNumOperands() - 1;
7988
7989 // Check input arguments. Doing so before unpoisoning output arguments, so
7990 // that we won't overwrite uninit values before checking them.
7991 for (int i = OutputArgs; i < NumOperands; i++) {
7992 Value *Operand = CB->getOperand(i_nocapture: i);
7993 instrumentAsmArgument(Operand, ElemTy: CB->getParamElementType(ArgNo: i), I, IRB, DL,
7994 /*isOutput*/ false);
7995 }
7996 // Unpoison output arguments. This must happen before the actual InlineAsm
7997 // call, so that the shadow for memory published in the asm() statement
7998 // remains valid.
7999 for (int i = 0; i < OutputArgs; i++) {
8000 Value *Operand = CB->getOperand(i_nocapture: i);
8001 instrumentAsmArgument(Operand, ElemTy: CB->getParamElementType(ArgNo: i), I, IRB, DL,
8002 /*isOutput*/ true);
8003 }
8004
8005 setShadow(V: &I, SV: getCleanShadow(V: &I));
8006 setOrigin(V: &I, Origin: getCleanOrigin());
8007 }
8008
8009 void visitFreezeInst(FreezeInst &I) {
8010 // Freeze always returns a fully defined value.
8011 setShadow(V: &I, SV: getCleanShadow(V: &I));
8012 setOrigin(V: &I, Origin: getCleanOrigin());
8013 }
8014
8015 void visitInstruction(Instruction &I) {
8016 // Everything else: stop propagating and check for poisoned shadow.
8017 if (ClDumpStrictInstructions)
8018 dumpInst(I, Prefix: "Strict");
8019 LLVM_DEBUG(dbgs() << "DEFAULT: " << I << "\n");
8020 for (size_t i = 0, n = I.getNumOperands(); i < n; i++) {
8021 Value *Operand = I.getOperand(i);
8022 if (Operand->getType()->isSized())
8023 insertCheckShadowOf(Val: Operand, OrigIns: &I);
8024 }
8025 setShadow(V: &I, SV: getCleanShadow(V: &I));
8026 setOrigin(V: &I, Origin: getCleanOrigin());
8027 }
8028};
8029
8030struct VarArgHelperBase : public VarArgHelper {
8031 Function &F;
8032 MemorySanitizer &MS;
8033 MemorySanitizerVisitor &MSV;
8034 SmallVector<CallInst *, 16> VAStartInstrumentationList;
8035 const unsigned VAListTagSize;
8036
8037 VarArgHelperBase(Function &F, MemorySanitizer &MS,
8038 MemorySanitizerVisitor &MSV, unsigned VAListTagSize)
8039 : F(F), MS(MS), MSV(MSV), VAListTagSize(VAListTagSize) {}
8040
8041 Value *getShadowAddrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) {
8042 Value *Base = IRB.CreatePointerCast(V: MS.VAArgTLS, DestTy: MS.IntptrTy);
8043 return IRB.CreateAdd(LHS: Base, RHS: ConstantInt::get(Ty: MS.IntptrTy, V: ArgOffset));
8044 }
8045
8046 /// Compute the shadow address for a given va_arg.
8047 Value *getShadowPtrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) {
8048 return IRB.CreatePtrAdd(
8049 Ptr: MS.VAArgTLS, Offset: ConstantInt::get(Ty: MS.IntptrTy, V: ArgOffset), Name: "_msarg_va_s");
8050 }
8051
8052 /// Compute the shadow address for a given va_arg.
8053 Value *getShadowPtrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset,
8054 unsigned ArgSize) {
8055 // Make sure we don't overflow __msan_va_arg_tls.
8056 if (ArgOffset + ArgSize > kParamTLSSize)
8057 return nullptr;
8058 return getShadowPtrForVAArgument(IRB, ArgOffset);
8059 }
8060
8061 /// Compute the origin address for a given va_arg.
8062 Value *getOriginPtrForVAArgument(IRBuilder<> &IRB, int ArgOffset) {
8063 // getOriginPtrForVAArgument() is always called after
8064 // getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never
8065 // overflow.
8066 return IRB.CreatePtrAdd(Ptr: MS.VAArgOriginTLS,
8067 Offset: ConstantInt::get(Ty: MS.IntptrTy, V: ArgOffset),
8068 Name: "_msarg_va_o");
8069 }
8070
8071 void CleanUnusedTLS(IRBuilder<> &IRB, Value *ShadowBase,
8072 unsigned BaseOffset) {
8073 // The tails of __msan_va_arg_tls is not large enough to fit full
8074 // value shadow, but it will be copied to backup anyway. Make it
8075 // clean.
8076 if (BaseOffset >= kParamTLSSize)
8077 return;
8078 Value *TailSize =
8079 ConstantInt::getSigned(Ty: IRB.getInt32Ty(), V: kParamTLSSize - BaseOffset);
8080 IRB.CreateMemSet(Ptr: ShadowBase, Val: ConstantInt::getNullValue(Ty: IRB.getInt8Ty()),
8081 Size: TailSize, Align: Align(8));
8082 }
8083
8084 void unpoisonVAListTagForInst(IntrinsicInst &I) {
8085 IRBuilder<> IRB(&I);
8086 Value *VAListTag = I.getArgOperand(i: 0);
8087 const Align Alignment = Align(8);
8088 auto [ShadowPtr, OriginPtr] = MSV.getShadowOriginPtr(
8089 Addr: VAListTag, IRB, ShadowTy: IRB.getInt8Ty(), Alignment, /*isStore*/ true);
8090 // Unpoison the whole __va_list_tag.
8091 IRB.CreateMemSet(Ptr: ShadowPtr, Val: Constant::getNullValue(Ty: IRB.getInt8Ty()),
8092 Size: VAListTagSize, Align: Alignment, isVolatile: false);
8093 }
8094
8095 void visitVAStartInst(VAStartInst &I) override {
8096 if (F.getCallingConv() == CallingConv::Win64)
8097 return;
8098 VAStartInstrumentationList.push_back(Elt: &I);
8099 unpoisonVAListTagForInst(I);
8100 }
8101
8102 void visitVACopyInst(VACopyInst &I) override {
8103 if (F.getCallingConv() == CallingConv::Win64)
8104 return;
8105 unpoisonVAListTagForInst(I);
8106 }
8107};
8108
8109/// AMD64-specific implementation of VarArgHelper.
8110struct VarArgAMD64Helper : public VarArgHelperBase {
8111 // An unfortunate workaround for asymmetric lowering of va_arg stuff.
8112 // See a comment in visitCallBase for more details.
8113 static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7
8114 static const unsigned AMD64FpEndOffsetSSE = 176;
8115 // If SSE is disabled, fp_offset in va_list is zero.
8116 static const unsigned AMD64FpEndOffsetNoSSE = AMD64GpEndOffset;
8117
8118 unsigned AMD64FpEndOffset;
8119 AllocaInst *VAArgTLSCopy = nullptr;
8120 AllocaInst *VAArgTLSOriginCopy = nullptr;
8121 Value *VAArgOverflowSize = nullptr;
8122
8123 enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
8124
8125 VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
8126 MemorySanitizerVisitor &MSV)
8127 : VarArgHelperBase(F, MS, MSV, /*VAListTagSize=*/24) {
8128 AMD64FpEndOffset = AMD64FpEndOffsetSSE;
8129 for (const auto &Attr : F.getAttributes().getFnAttrs()) {
8130 if (Attr.isStringAttribute() &&
8131 (Attr.getKindAsString() == "target-features")) {
8132 if (Attr.getValueAsString().contains(Other: "-sse"))
8133 AMD64FpEndOffset = AMD64FpEndOffsetNoSSE;
8134 break;
8135 }
8136 }
8137 }
8138
8139 ArgKind classifyArgument(Value *arg) {
8140 // A very rough approximation of X86_64 argument classification rules.
8141 Type *T = arg->getType();
8142 if (T->isX86_FP80Ty())
8143 return AK_Memory;
8144 if (T->isFPOrFPVectorTy())
8145 return AK_FloatingPoint;
8146 if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
8147 return AK_GeneralPurpose;
8148 if (T->isPointerTy())
8149 return AK_GeneralPurpose;
8150 return AK_Memory;
8151 }
8152
8153 // For VarArg functions, store the argument shadow in an ABI-specific format
8154 // that corresponds to va_list layout.
8155 // We do this because Clang lowers va_arg in the frontend, and this pass
8156 // only sees the low level code that deals with va_list internals.
8157 // A much easier alternative (provided that Clang emits va_arg instructions)
8158 // would have been to associate each live instance of va_list with a copy of
8159 // MSanParamTLS, and extract shadow on va_arg() call in the argument list
8160 // order.
8161 void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
8162 unsigned GpOffset = 0;
8163 unsigned FpOffset = AMD64GpEndOffset;
8164 unsigned OverflowOffset = AMD64FpEndOffset;
8165 const DataLayout &DL = F.getDataLayout();
8166
8167 for (const auto &[ArgNo, A] : llvm::enumerate(First: CB.args())) {
8168 bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
8169 bool IsByVal = CB.paramHasAttr(ArgNo, Kind: Attribute::ByVal);
8170 if (IsByVal) {
8171 // ByVal arguments always go to the overflow area.
8172 // Fixed arguments passed through the overflow area will be stepped
8173 // over by va_start, so don't count them towards the offset.
8174 if (IsFixed)
8175 continue;
8176 assert(A->getType()->isPointerTy());
8177 Type *RealTy = CB.getParamByValType(ArgNo);
8178 uint64_t ArgSize = DL.getTypeAllocSize(Ty: RealTy);
8179 uint64_t AlignedSize = alignTo(Value: ArgSize, Align: 8);
8180 unsigned BaseOffset = OverflowOffset;
8181 Value *ShadowBase = getShadowPtrForVAArgument(IRB, ArgOffset: OverflowOffset);
8182 Value *OriginBase = nullptr;
8183 if (MS.TrackOrigins)
8184 OriginBase = getOriginPtrForVAArgument(IRB, ArgOffset: OverflowOffset);
8185 OverflowOffset += AlignedSize;
8186
8187 if (OverflowOffset > kParamTLSSize) {
8188 CleanUnusedTLS(IRB, ShadowBase, BaseOffset);
8189 continue; // We have no space to copy shadow there.
8190 }
8191
8192 Value *ShadowPtr, *OriginPtr;
8193 std::tie(args&: ShadowPtr, args&: OriginPtr) =
8194 MSV.getShadowOriginPtr(Addr: A, IRB, ShadowTy: IRB.getInt8Ty(), Alignment: kShadowTLSAlignment,
8195 /*isStore*/ false);
8196 IRB.CreateMemCpy(Dst: ShadowBase, DstAlign: kShadowTLSAlignment, Src: ShadowPtr,
8197 SrcAlign: kShadowTLSAlignment, Size: ArgSize);
8198 if (MS.TrackOrigins)
8199 IRB.CreateMemCpy(Dst: OriginBase, DstAlign: kShadowTLSAlignment, Src: OriginPtr,
8200 SrcAlign: kShadowTLSAlignment, Size: ArgSize);
8201 } else {
8202 ArgKind AK = classifyArgument(arg: A);
8203 if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
8204 AK = AK_Memory;
8205 if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
8206 AK = AK_Memory;
8207 Value *ShadowBase, *OriginBase = nullptr;
8208 switch (AK) {
8209 case AK_GeneralPurpose:
8210 ShadowBase = getShadowPtrForVAArgument(IRB, ArgOffset: GpOffset);
8211 if (MS.TrackOrigins)
8212 OriginBase = getOriginPtrForVAArgument(IRB, ArgOffset: GpOffset);
8213 GpOffset += 8;
8214 assert(GpOffset <= kParamTLSSize);
8215 break;
8216 case AK_FloatingPoint:
8217 ShadowBase = getShadowPtrForVAArgument(IRB, ArgOffset: FpOffset);
8218 if (MS.TrackOrigins)
8219 OriginBase = getOriginPtrForVAArgument(IRB, ArgOffset: FpOffset);
8220 FpOffset += 16;
8221 assert(FpOffset <= kParamTLSSize);
8222 break;
8223 case AK_Memory:
8224 if (IsFixed)
8225 continue;
8226 uint64_t ArgSize = DL.getTypeAllocSize(Ty: A->getType());
8227 uint64_t AlignedSize = alignTo(Value: ArgSize, Align: 8);
8228 unsigned BaseOffset = OverflowOffset;
8229 ShadowBase = getShadowPtrForVAArgument(IRB, ArgOffset: OverflowOffset);
8230 if (MS.TrackOrigins) {
8231 OriginBase = getOriginPtrForVAArgument(IRB, ArgOffset: OverflowOffset);
8232 }
8233 OverflowOffset += AlignedSize;
8234 if (OverflowOffset > kParamTLSSize) {
8235 // We have no space to copy shadow there.
8236 CleanUnusedTLS(IRB, ShadowBase, BaseOffset);
8237 continue;
8238 }
8239 }
8240 // Take fixed arguments into account for GpOffset and FpOffset,
8241 // but don't actually store shadows for them.
8242 // TODO(glider): don't call get*PtrForVAArgument() for them.
8243 if (IsFixed)
8244 continue;
8245 Value *Shadow = MSV.getShadow(V: A);
8246 IRB.CreateAlignedStore(Val: Shadow, Ptr: ShadowBase, Align: kShadowTLSAlignment);
8247 if (MS.TrackOrigins) {
8248 Value *Origin = MSV.getOrigin(V: A);
8249 TypeSize StoreSize = DL.getTypeStoreSize(Ty: Shadow->getType());
8250 MSV.paintOrigin(IRB, Origin, OriginPtr: OriginBase, TS: StoreSize,
8251 Alignment: std::max(a: kShadowTLSAlignment, b: kMinOriginAlignment));
8252 }
8253 }
8254 }
8255 Constant *OverflowSize =
8256 ConstantInt::get(Ty: IRB.getInt64Ty(), V: OverflowOffset - AMD64FpEndOffset);
8257 IRB.CreateStore(Val: OverflowSize, Ptr: MS.VAArgOverflowSizeTLS);
8258 }
8259
8260 void finalizeInstrumentation() override {
8261 assert(!VAArgOverflowSize && !VAArgTLSCopy &&
8262 "finalizeInstrumentation called twice");
8263 if (!VAStartInstrumentationList.empty()) {
8264 // If there is a va_start in this function, make a backup copy of
8265 // va_arg_tls somewhere in the function entry block.
8266 IRBuilder<> IRB(MSV.FnPrologueEnd);
8267 VAArgOverflowSize =
8268 IRB.CreateLoad(Ty: IRB.getInt64Ty(), Ptr: MS.VAArgOverflowSizeTLS);
8269 Value *CopySize = IRB.CreateAdd(
8270 LHS: ConstantInt::get(Ty: MS.IntptrTy, V: AMD64FpEndOffset), RHS: VAArgOverflowSize);
8271 VAArgTLSCopy = IRB.CreateAlloca(Ty: Type::getInt8Ty(C&: *MS.C), ArraySize: CopySize);
8272 VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
8273 IRB.CreateMemSet(Ptr: VAArgTLSCopy, Val: Constant::getNullValue(Ty: IRB.getInt8Ty()),
8274 Size: CopySize, Align: kShadowTLSAlignment, isVolatile: false);
8275
8276 Value *SrcSize = IRB.CreateBinaryIntrinsic(
8277 ID: Intrinsic::umin, LHS: CopySize,
8278 RHS: ConstantInt::get(Ty: MS.IntptrTy, V: kParamTLSSize));
8279 IRB.CreateMemCpy(Dst: VAArgTLSCopy, DstAlign: kShadowTLSAlignment, Src: MS.VAArgTLS,
8280 SrcAlign: kShadowTLSAlignment, Size: SrcSize);
8281 if (MS.TrackOrigins) {
8282 VAArgTLSOriginCopy = IRB.CreateAlloca(Ty: Type::getInt8Ty(C&: *MS.C), ArraySize: CopySize);
8283 VAArgTLSOriginCopy->setAlignment(kShadowTLSAlignment);
8284 IRB.CreateMemCpy(Dst: VAArgTLSOriginCopy, DstAlign: kShadowTLSAlignment,
8285 Src: MS.VAArgOriginTLS, SrcAlign: kShadowTLSAlignment, Size: SrcSize);
8286 }
8287 }
8288
8289 // Instrument va_start.
8290 // Copy va_list shadow from the backup copy of the TLS contents.
8291 for (CallInst *OrigInst : VAStartInstrumentationList) {
8292 NextNodeIRBuilder IRB(OrigInst);
8293 Value *VAListTag = OrigInst->getArgOperand(i: 0);
8294
8295 Value *RegSaveAreaPtrPtr =
8296 IRB.CreatePtrAdd(Ptr: VAListTag, Offset: ConstantInt::get(Ty: MS.IntptrTy, V: 16));
8297 Value *RegSaveAreaPtr = IRB.CreateLoad(Ty: MS.PtrTy, Ptr: RegSaveAreaPtrPtr);
8298 Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
8299 const Align Alignment = Align(16);
8300 std::tie(args&: RegSaveAreaShadowPtr, args&: RegSaveAreaOriginPtr) =
8301 MSV.getShadowOriginPtr(Addr: RegSaveAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(),
8302 Alignment, /*isStore*/ true);
8303 IRB.CreateMemCpy(Dst: RegSaveAreaShadowPtr, DstAlign: Alignment, Src: VAArgTLSCopy, SrcAlign: Alignment,
8304 Size: AMD64FpEndOffset);
8305 if (MS.TrackOrigins)
8306 IRB.CreateMemCpy(Dst: RegSaveAreaOriginPtr, DstAlign: Alignment, Src: VAArgTLSOriginCopy,
8307 SrcAlign: Alignment, Size: AMD64FpEndOffset);
8308 Value *OverflowArgAreaPtrPtr =
8309 IRB.CreatePtrAdd(Ptr: VAListTag, Offset: ConstantInt::get(Ty: MS.IntptrTy, V: 8));
8310 Value *OverflowArgAreaPtr =
8311 IRB.CreateLoad(Ty: MS.PtrTy, Ptr: OverflowArgAreaPtrPtr);
8312 Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
8313 std::tie(args&: OverflowArgAreaShadowPtr, args&: OverflowArgAreaOriginPtr) =
8314 MSV.getShadowOriginPtr(Addr: OverflowArgAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(),
8315 Alignment, /*isStore*/ true);
8316 Value *SrcPtr = IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(), Ptr: VAArgTLSCopy,
8317 Idx0: AMD64FpEndOffset);
8318 IRB.CreateMemCpy(Dst: OverflowArgAreaShadowPtr, DstAlign: Alignment, Src: SrcPtr, SrcAlign: Alignment,
8319 Size: VAArgOverflowSize);
8320 if (MS.TrackOrigins) {
8321 SrcPtr = IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(), Ptr: VAArgTLSOriginCopy,
8322 Idx0: AMD64FpEndOffset);
8323 IRB.CreateMemCpy(Dst: OverflowArgAreaOriginPtr, DstAlign: Alignment, Src: SrcPtr, SrcAlign: Alignment,
8324 Size: VAArgOverflowSize);
8325 }
8326 }
8327 }
8328};
8329
8330/// AArch64-specific implementation of VarArgHelper.
8331struct VarArgAArch64Helper : public VarArgHelperBase {
8332 static const unsigned kAArch64GrArgSize = 64;
8333 static const unsigned kAArch64VrArgSize = 128;
8334
8335 static const unsigned AArch64GrBegOffset = 0;
8336 static const unsigned AArch64GrEndOffset = kAArch64GrArgSize;
8337 // Make VR space aligned to 16 bytes.
8338 static const unsigned AArch64VrBegOffset = AArch64GrEndOffset;
8339 static const unsigned AArch64VrEndOffset =
8340 AArch64VrBegOffset + kAArch64VrArgSize;
8341 static const unsigned AArch64VAEndOffset = AArch64VrEndOffset;
8342
8343 AllocaInst *VAArgTLSCopy = nullptr;
8344 Value *VAArgOverflowSize = nullptr;
8345
8346 enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
8347
8348 VarArgAArch64Helper(Function &F, MemorySanitizer &MS,
8349 MemorySanitizerVisitor &MSV)
8350 : VarArgHelperBase(F, MS, MSV, /*VAListTagSize=*/32) {}
8351
8352 // A very rough approximation of aarch64 argument classification rules.
8353 std::pair<ArgKind, uint64_t> classifyArgument(Type *T) {
8354 if (T->isIntOrPtrTy() && T->getPrimitiveSizeInBits() <= 64)
8355 return {AK_GeneralPurpose, 1};
8356 if (T->isFloatingPointTy() && T->getPrimitiveSizeInBits() <= 128)
8357 return {AK_FloatingPoint, 1};
8358
8359 if (T->isArrayTy()) {
8360 auto R = classifyArgument(T: T->getArrayElementType());
8361 R.second *= T->getScalarType()->getArrayNumElements();
8362 return R;
8363 }
8364
8365 if (const FixedVectorType *FV = dyn_cast<FixedVectorType>(Val: T)) {
8366 auto R = classifyArgument(T: FV->getScalarType());
8367 R.second *= FV->getNumElements();
8368 return R;
8369 }
8370
8371 LLVM_DEBUG(errs() << "Unknown vararg type: " << *T << "\n");
8372 return {AK_Memory, 0};
8373 }
8374
8375 // The instrumentation stores the argument shadow in a non ABI-specific
8376 // format because it does not know which argument is named (since Clang,
8377 // like x86_64 case, lowers the va_args in the frontend and this pass only
8378 // sees the low level code that deals with va_list internals).
8379 // The first seven GR registers are saved in the first 56 bytes of the
8380 // va_arg tls arra, followed by the first 8 FP/SIMD registers, and then
8381 // the remaining arguments.
8382 // Using constant offset within the va_arg TLS array allows fast copy
8383 // in the finalize instrumentation.
8384 void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
8385 unsigned GrOffset = AArch64GrBegOffset;
8386 unsigned VrOffset = AArch64VrBegOffset;
8387 unsigned OverflowOffset = AArch64VAEndOffset;
8388
8389 const DataLayout &DL = F.getDataLayout();
8390 for (const auto &[ArgNo, A] : llvm::enumerate(First: CB.args())) {
8391 bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
8392 auto [AK, RegNum] = classifyArgument(T: A->getType());
8393 if (AK == AK_GeneralPurpose &&
8394 (GrOffset + RegNum * 8) > AArch64GrEndOffset)
8395 AK = AK_Memory;
8396 if (AK == AK_FloatingPoint &&
8397 (VrOffset + RegNum * 16) > AArch64VrEndOffset)
8398 AK = AK_Memory;
8399 Value *Base;
8400 switch (AK) {
8401 case AK_GeneralPurpose:
8402 Base = getShadowPtrForVAArgument(IRB, ArgOffset: GrOffset);
8403 GrOffset += 8 * RegNum;
8404 break;
8405 case AK_FloatingPoint:
8406 Base = getShadowPtrForVAArgument(IRB, ArgOffset: VrOffset);
8407 VrOffset += 16 * RegNum;
8408 break;
8409 case AK_Memory:
8410 // Don't count fixed arguments in the overflow area - va_start will
8411 // skip right over them.
8412 if (IsFixed)
8413 continue;
8414 uint64_t ArgSize = DL.getTypeAllocSize(Ty: A->getType());
8415 uint64_t AlignedSize = alignTo(Value: ArgSize, Align: 8);
8416 unsigned BaseOffset = OverflowOffset;
8417 Base = getShadowPtrForVAArgument(IRB, ArgOffset: BaseOffset);
8418 OverflowOffset += AlignedSize;
8419 if (OverflowOffset > kParamTLSSize) {
8420 // We have no space to copy shadow there.
8421 CleanUnusedTLS(IRB, ShadowBase: Base, BaseOffset);
8422 continue;
8423 }
8424 break;
8425 }
8426 // Count Gp/Vr fixed arguments to their respective offsets, but don't
8427 // bother to actually store a shadow.
8428 if (IsFixed)
8429 continue;
8430 IRB.CreateAlignedStore(Val: MSV.getShadow(V: A), Ptr: Base, Align: kShadowTLSAlignment);
8431 }
8432 Constant *OverflowSize =
8433 ConstantInt::get(Ty: IRB.getInt64Ty(), V: OverflowOffset - AArch64VAEndOffset);
8434 IRB.CreateStore(Val: OverflowSize, Ptr: MS.VAArgOverflowSizeTLS);
8435 }
8436
8437 // Retrieve a va_list field of 'void*' size.
8438 Value *getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
8439 Value *SaveAreaPtrPtr =
8440 IRB.CreatePtrAdd(Ptr: VAListTag, Offset: ConstantInt::get(Ty: MS.IntptrTy, V: offset));
8441 return IRB.CreateLoad(Ty: Type::getInt64Ty(C&: *MS.C), Ptr: SaveAreaPtrPtr);
8442 }
8443
8444 // Retrieve a va_list field of 'int' size.
8445 Value *getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) {
8446 Value *SaveAreaPtr =
8447 IRB.CreatePtrAdd(Ptr: VAListTag, Offset: ConstantInt::get(Ty: MS.IntptrTy, V: offset));
8448 Value *SaveArea32 = IRB.CreateLoad(Ty: IRB.getInt32Ty(), Ptr: SaveAreaPtr);
8449 return IRB.CreateSExt(V: SaveArea32, DestTy: MS.IntptrTy);
8450 }
8451
8452 void finalizeInstrumentation() override {
8453 assert(!VAArgOverflowSize && !VAArgTLSCopy &&
8454 "finalizeInstrumentation called twice");
8455 if (!VAStartInstrumentationList.empty()) {
8456 // If there is a va_start in this function, make a backup copy of
8457 // va_arg_tls somewhere in the function entry block.
8458 IRBuilder<> IRB(MSV.FnPrologueEnd);
8459 VAArgOverflowSize =
8460 IRB.CreateLoad(Ty: IRB.getInt64Ty(), Ptr: MS.VAArgOverflowSizeTLS);
8461 Value *CopySize = IRB.CreateAdd(
8462 LHS: ConstantInt::get(Ty: MS.IntptrTy, V: AArch64VAEndOffset), RHS: VAArgOverflowSize);
8463 VAArgTLSCopy = IRB.CreateAlloca(Ty: Type::getInt8Ty(C&: *MS.C), ArraySize: CopySize);
8464 VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
8465 IRB.CreateMemSet(Ptr: VAArgTLSCopy, Val: Constant::getNullValue(Ty: IRB.getInt8Ty()),
8466 Size: CopySize, Align: kShadowTLSAlignment, isVolatile: false);
8467
8468 Value *SrcSize = IRB.CreateBinaryIntrinsic(
8469 ID: Intrinsic::umin, LHS: CopySize,
8470 RHS: ConstantInt::get(Ty: MS.IntptrTy, V: kParamTLSSize));
8471 IRB.CreateMemCpy(Dst: VAArgTLSCopy, DstAlign: kShadowTLSAlignment, Src: MS.VAArgTLS,
8472 SrcAlign: kShadowTLSAlignment, Size: SrcSize);
8473 }
8474
8475 Value *GrArgSize = ConstantInt::get(Ty: MS.IntptrTy, V: kAArch64GrArgSize);
8476 Value *VrArgSize = ConstantInt::get(Ty: MS.IntptrTy, V: kAArch64VrArgSize);
8477
8478 // Instrument va_start, copy va_list shadow from the backup copy of
8479 // the TLS contents.
8480 for (CallInst *OrigInst : VAStartInstrumentationList) {
8481 NextNodeIRBuilder IRB(OrigInst);
8482
8483 Value *VAListTag = OrigInst->getArgOperand(i: 0);
8484
8485 // The variadic ABI for AArch64 creates two areas to save the incoming
8486 // argument registers (one for 64-bit general register xn-x7 and another
8487 // for 128-bit FP/SIMD vn-v7).
8488 // We need then to propagate the shadow arguments on both regions
8489 // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'.
8490 // The remaining arguments are saved on shadow for 'va::stack'.
8491 // One caveat is it requires only to propagate the non-named arguments,
8492 // however on the call site instrumentation 'all' the arguments are
8493 // saved. So to copy the shadow values from the va_arg TLS array
8494 // we need to adjust the offset for both GR and VR fields based on
8495 // the __{gr,vr}_offs value (since they are stores based on incoming
8496 // named arguments).
8497 Type *RegSaveAreaPtrTy = IRB.getPtrTy();
8498
8499 // Read the stack pointer from the va_list.
8500 Value *StackSaveAreaPtr =
8501 IRB.CreateIntToPtr(V: getVAField64(IRB, VAListTag, offset: 0), DestTy: RegSaveAreaPtrTy);
8502
8503 // Read both the __gr_top and __gr_off and add them up.
8504 Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, offset: 8);
8505 Value *GrOffSaveArea = getVAField32(IRB, VAListTag, offset: 24);
8506
8507 Value *GrRegSaveAreaPtr = IRB.CreateIntToPtr(
8508 V: IRB.CreateAdd(LHS: GrTopSaveAreaPtr, RHS: GrOffSaveArea), DestTy: RegSaveAreaPtrTy);
8509
8510 // Read both the __vr_top and __vr_off and add them up.
8511 Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, offset: 16);
8512 Value *VrOffSaveArea = getVAField32(IRB, VAListTag, offset: 28);
8513
8514 Value *VrRegSaveAreaPtr = IRB.CreateIntToPtr(
8515 V: IRB.CreateAdd(LHS: VrTopSaveAreaPtr, RHS: VrOffSaveArea), DestTy: RegSaveAreaPtrTy);
8516
8517 // It does not know how many named arguments is being used and, on the
8518 // callsite all the arguments were saved. Since __gr_off is defined as
8519 // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic
8520 // argument by ignoring the bytes of shadow from named arguments.
8521 Value *GrRegSaveAreaShadowPtrOff =
8522 IRB.CreateAdd(LHS: GrArgSize, RHS: GrOffSaveArea);
8523
8524 Value *GrRegSaveAreaShadowPtr =
8525 MSV.getShadowOriginPtr(Addr: GrRegSaveAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(),
8526 Alignment: Align(8), /*isStore*/ true)
8527 .first;
8528
8529 Value *GrSrcPtr =
8530 IRB.CreateInBoundsPtrAdd(Ptr: VAArgTLSCopy, Offset: GrRegSaveAreaShadowPtrOff);
8531 Value *GrCopySize = IRB.CreateSub(LHS: GrArgSize, RHS: GrRegSaveAreaShadowPtrOff);
8532
8533 IRB.CreateMemCpy(Dst: GrRegSaveAreaShadowPtr, DstAlign: Align(8), Src: GrSrcPtr, SrcAlign: Align(8),
8534 Size: GrCopySize);
8535
8536 // Again, but for FP/SIMD values.
8537 Value *VrRegSaveAreaShadowPtrOff =
8538 IRB.CreateAdd(LHS: VrArgSize, RHS: VrOffSaveArea);
8539
8540 Value *VrRegSaveAreaShadowPtr =
8541 MSV.getShadowOriginPtr(Addr: VrRegSaveAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(),
8542 Alignment: Align(8), /*isStore*/ true)
8543 .first;
8544
8545 Value *VrSrcPtr = IRB.CreateInBoundsPtrAdd(
8546 Ptr: IRB.CreateInBoundsPtrAdd(Ptr: VAArgTLSCopy,
8547 Offset: IRB.getInt32(C: AArch64VrBegOffset)),
8548 Offset: VrRegSaveAreaShadowPtrOff);
8549 Value *VrCopySize = IRB.CreateSub(LHS: VrArgSize, RHS: VrRegSaveAreaShadowPtrOff);
8550
8551 IRB.CreateMemCpy(Dst: VrRegSaveAreaShadowPtr, DstAlign: Align(8), Src: VrSrcPtr, SrcAlign: Align(8),
8552 Size: VrCopySize);
8553
8554 // And finally for remaining arguments.
8555 Value *StackSaveAreaShadowPtr =
8556 MSV.getShadowOriginPtr(Addr: StackSaveAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(),
8557 Alignment: Align(16), /*isStore*/ true)
8558 .first;
8559
8560 Value *StackSrcPtr = IRB.CreateInBoundsPtrAdd(
8561 Ptr: VAArgTLSCopy, Offset: IRB.getInt32(C: AArch64VAEndOffset));
8562
8563 IRB.CreateMemCpy(Dst: StackSaveAreaShadowPtr, DstAlign: Align(16), Src: StackSrcPtr,
8564 SrcAlign: Align(16), Size: VAArgOverflowSize);
8565 }
8566 }
8567};
8568
8569/// PowerPC64-specific implementation of VarArgHelper.
8570struct VarArgPowerPC64Helper : public VarArgHelperBase {
8571 AllocaInst *VAArgTLSCopy = nullptr;
8572 Value *VAArgSize = nullptr;
8573
8574 VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS,
8575 MemorySanitizerVisitor &MSV)
8576 : VarArgHelperBase(F, MS, MSV, /*VAListTagSize=*/8) {}
8577
8578 void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
8579 // For PowerPC, we need to deal with alignment of stack arguments -
8580 // they are mostly aligned to 8 bytes, but vectors and i128 arrays
8581 // are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes,
8582 // For that reason, we compute current offset from stack pointer (which is
8583 // always properly aligned), and offset for the first vararg, then subtract
8584 // them.
8585 unsigned VAArgBase;
8586 Triple TargetTriple(F.getParent()->getTargetTriple());
8587 // Parameter save area starts at 48 bytes from frame pointer for ABIv1,
8588 // and 32 bytes for ABIv2. This is usually determined by target
8589 // endianness, but in theory could be overridden by function attribute.
8590 if (TargetTriple.isPPC64ELFv2ABI())
8591 VAArgBase = 32;
8592 else
8593 VAArgBase = 48;
8594 unsigned VAArgOffset = VAArgBase;
8595 const DataLayout &DL = F.getDataLayout();
8596 for (const auto &[ArgNo, A] : llvm::enumerate(First: CB.args())) {
8597 bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
8598 bool IsByVal = CB.paramHasAttr(ArgNo, Kind: Attribute::ByVal);
8599 if (IsByVal) {
8600 assert(A->getType()->isPointerTy());
8601 Type *RealTy = CB.getParamByValType(ArgNo);
8602 uint64_t ArgSize = DL.getTypeAllocSize(Ty: RealTy);
8603 Align ArgAlign = CB.getParamAlign(ArgNo).value_or(u: Align(8));
8604 if (ArgAlign < 8)
8605 ArgAlign = Align(8);
8606 VAArgOffset = alignTo(Size: VAArgOffset, A: ArgAlign);
8607 if (!IsFixed) {
8608 Value *Base =
8609 getShadowPtrForVAArgument(IRB, ArgOffset: VAArgOffset - VAArgBase, ArgSize);
8610 if (Base) {
8611 Value *AShadowPtr, *AOriginPtr;
8612 std::tie(args&: AShadowPtr, args&: AOriginPtr) =
8613 MSV.getShadowOriginPtr(Addr: A, IRB, ShadowTy: IRB.getInt8Ty(),
8614 Alignment: kShadowTLSAlignment, /*isStore*/ false);
8615
8616 IRB.CreateMemCpy(Dst: Base, DstAlign: kShadowTLSAlignment, Src: AShadowPtr,
8617 SrcAlign: kShadowTLSAlignment, Size: ArgSize);
8618 }
8619 }
8620 VAArgOffset += alignTo(Size: ArgSize, A: Align(8));
8621 } else {
8622 Value *Base;
8623 uint64_t ArgSize = DL.getTypeAllocSize(Ty: A->getType());
8624 Align ArgAlign = Align(8);
8625 if (A->getType()->isArrayTy()) {
8626 // Arrays are aligned to element size, except for long double
8627 // arrays, which are aligned to 8 bytes.
8628 Type *ElementTy = A->getType()->getArrayElementType();
8629 if (!ElementTy->isPPC_FP128Ty())
8630 ArgAlign = Align(DL.getTypeAllocSize(Ty: ElementTy));
8631 } else if (A->getType()->isVectorTy()) {
8632 // Vectors are naturally aligned.
8633 ArgAlign = Align(ArgSize);
8634 }
8635 if (ArgAlign < 8)
8636 ArgAlign = Align(8);
8637 VAArgOffset = alignTo(Size: VAArgOffset, A: ArgAlign);
8638 if (DL.isBigEndian()) {
8639 // Adjusting the shadow for argument with size < 8 to match the
8640 // placement of bits in big endian system
8641 if (ArgSize < 8)
8642 VAArgOffset += (8 - ArgSize);
8643 }
8644 if (!IsFixed) {
8645 Base =
8646 getShadowPtrForVAArgument(IRB, ArgOffset: VAArgOffset - VAArgBase, ArgSize);
8647 if (Base)
8648 IRB.CreateAlignedStore(Val: MSV.getShadow(V: A), Ptr: Base, Align: kShadowTLSAlignment);
8649 }
8650 VAArgOffset += ArgSize;
8651 VAArgOffset = alignTo(Size: VAArgOffset, A: Align(8));
8652 }
8653 if (IsFixed)
8654 VAArgBase = VAArgOffset;
8655 }
8656
8657 Constant *TotalVAArgSize =
8658 ConstantInt::get(Ty: MS.IntptrTy, V: VAArgOffset - VAArgBase);
8659 // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
8660 // a new class member i.e. it is the total size of all VarArgs.
8661 IRB.CreateStore(Val: TotalVAArgSize, Ptr: MS.VAArgOverflowSizeTLS);
8662 }
8663
8664 void finalizeInstrumentation() override {
8665 assert(!VAArgSize && !VAArgTLSCopy &&
8666 "finalizeInstrumentation called twice");
8667 IRBuilder<> IRB(MSV.FnPrologueEnd);
8668 VAArgSize = IRB.CreateLoad(Ty: IRB.getInt64Ty(), Ptr: MS.VAArgOverflowSizeTLS);
8669 Value *CopySize = VAArgSize;
8670
8671 if (!VAStartInstrumentationList.empty()) {
8672 // If there is a va_start in this function, make a backup copy of
8673 // va_arg_tls somewhere in the function entry block.
8674
8675 VAArgTLSCopy = IRB.CreateAlloca(Ty: Type::getInt8Ty(C&: *MS.C), ArraySize: CopySize);
8676 VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
8677 IRB.CreateMemSet(Ptr: VAArgTLSCopy, Val: Constant::getNullValue(Ty: IRB.getInt8Ty()),
8678 Size: CopySize, Align: kShadowTLSAlignment, isVolatile: false);
8679
8680 Value *SrcSize = IRB.CreateBinaryIntrinsic(
8681 ID: Intrinsic::umin, LHS: CopySize,
8682 RHS: ConstantInt::get(Ty: IRB.getInt64Ty(), V: kParamTLSSize));
8683 IRB.CreateMemCpy(Dst: VAArgTLSCopy, DstAlign: kShadowTLSAlignment, Src: MS.VAArgTLS,
8684 SrcAlign: kShadowTLSAlignment, Size: SrcSize);
8685 }
8686
8687 // Instrument va_start.
8688 // Copy va_list shadow from the backup copy of the TLS contents.
8689 for (CallInst *OrigInst : VAStartInstrumentationList) {
8690 NextNodeIRBuilder IRB(OrigInst);
8691 Value *VAListTag = OrigInst->getArgOperand(i: 0);
8692 Value *RegSaveAreaPtrPtr = IRB.CreatePtrToInt(V: VAListTag, DestTy: MS.IntptrTy);
8693
8694 RegSaveAreaPtrPtr = IRB.CreateIntToPtr(V: RegSaveAreaPtrPtr, DestTy: MS.PtrTy);
8695
8696 Value *RegSaveAreaPtr = IRB.CreateLoad(Ty: MS.PtrTy, Ptr: RegSaveAreaPtrPtr);
8697 Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
8698 const DataLayout &DL = F.getDataLayout();
8699 unsigned IntptrSize = DL.getTypeStoreSize(Ty: MS.IntptrTy);
8700 const Align Alignment = Align(IntptrSize);
8701 std::tie(args&: RegSaveAreaShadowPtr, args&: RegSaveAreaOriginPtr) =
8702 MSV.getShadowOriginPtr(Addr: RegSaveAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(),
8703 Alignment, /*isStore*/ true);
8704 IRB.CreateMemCpy(Dst: RegSaveAreaShadowPtr, DstAlign: Alignment, Src: VAArgTLSCopy, SrcAlign: Alignment,
8705 Size: CopySize);
8706 }
8707 }
8708};
8709
8710/// PowerPC32-specific implementation of VarArgHelper.
8711struct VarArgPowerPC32Helper : public VarArgHelperBase {
8712 AllocaInst *VAArgTLSCopy = nullptr;
8713 Value *VAArgSize = nullptr;
8714
8715 VarArgPowerPC32Helper(Function &F, MemorySanitizer &MS,
8716 MemorySanitizerVisitor &MSV)
8717 : VarArgHelperBase(F, MS, MSV, /*VAListTagSize=*/12) {}
8718
8719 void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
8720 unsigned VAArgBase;
8721 // Parameter save area is 8 bytes from frame pointer in PPC32
8722 VAArgBase = 8;
8723 unsigned VAArgOffset = VAArgBase;
8724 const DataLayout &DL = F.getDataLayout();
8725 unsigned IntptrSize = DL.getTypeStoreSize(Ty: MS.IntptrTy);
8726 for (const auto &[ArgNo, A] : llvm::enumerate(First: CB.args())) {
8727 bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
8728 bool IsByVal = CB.paramHasAttr(ArgNo, Kind: Attribute::ByVal);
8729 if (IsByVal) {
8730 assert(A->getType()->isPointerTy());
8731 Type *RealTy = CB.getParamByValType(ArgNo);
8732 uint64_t ArgSize = DL.getTypeAllocSize(Ty: RealTy);
8733 Align ArgAlign = CB.getParamAlign(ArgNo).value_or(u: Align(IntptrSize));
8734 if (ArgAlign < IntptrSize)
8735 ArgAlign = Align(IntptrSize);
8736 VAArgOffset = alignTo(Size: VAArgOffset, A: ArgAlign);
8737 if (!IsFixed) {
8738 Value *Base =
8739 getShadowPtrForVAArgument(IRB, ArgOffset: VAArgOffset - VAArgBase, ArgSize);
8740 if (Base) {
8741 Value *AShadowPtr, *AOriginPtr;
8742 std::tie(args&: AShadowPtr, args&: AOriginPtr) =
8743 MSV.getShadowOriginPtr(Addr: A, IRB, ShadowTy: IRB.getInt8Ty(),
8744 Alignment: kShadowTLSAlignment, /*isStore*/ false);
8745
8746 IRB.CreateMemCpy(Dst: Base, DstAlign: kShadowTLSAlignment, Src: AShadowPtr,
8747 SrcAlign: kShadowTLSAlignment, Size: ArgSize);
8748 }
8749 }
8750 VAArgOffset += alignTo(Size: ArgSize, A: Align(IntptrSize));
8751 } else {
8752 Value *Base;
8753 Type *ArgTy = A->getType();
8754
8755 // On PPC 32 floating point variable arguments are stored in separate
8756 // area: fp_save_area = reg_save_area + 4*8. We do not copy shaodow for
8757 // them as they will be found when checking call arguments.
8758 if (!ArgTy->isFloatingPointTy()) {
8759 uint64_t ArgSize = DL.getTypeAllocSize(Ty: ArgTy);
8760 Align ArgAlign = Align(IntptrSize);
8761 if (ArgTy->isArrayTy()) {
8762 // Arrays are aligned to element size, except for long double
8763 // arrays, which are aligned to 8 bytes.
8764 Type *ElementTy = ArgTy->getArrayElementType();
8765 if (!ElementTy->isPPC_FP128Ty())
8766 ArgAlign = Align(DL.getTypeAllocSize(Ty: ElementTy));
8767 } else if (ArgTy->isVectorTy()) {
8768 // Vectors are naturally aligned.
8769 ArgAlign = Align(ArgSize);
8770 }
8771 if (ArgAlign < IntptrSize)
8772 ArgAlign = Align(IntptrSize);
8773 VAArgOffset = alignTo(Size: VAArgOffset, A: ArgAlign);
8774 if (DL.isBigEndian()) {
8775 // Adjusting the shadow for argument with size < IntptrSize to match
8776 // the placement of bits in big endian system
8777 if (ArgSize < IntptrSize)
8778 VAArgOffset += (IntptrSize - ArgSize);
8779 }
8780 if (!IsFixed) {
8781 Base = getShadowPtrForVAArgument(IRB, ArgOffset: VAArgOffset - VAArgBase,
8782 ArgSize);
8783 if (Base)
8784 IRB.CreateAlignedStore(Val: MSV.getShadow(V: A), Ptr: Base,
8785 Align: kShadowTLSAlignment);
8786 }
8787 VAArgOffset += ArgSize;
8788 VAArgOffset = alignTo(Size: VAArgOffset, A: Align(IntptrSize));
8789 }
8790 }
8791 }
8792
8793 Constant *TotalVAArgSize =
8794 ConstantInt::get(Ty: MS.IntptrTy, V: VAArgOffset - VAArgBase);
8795 // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
8796 // a new class member i.e. it is the total size of all VarArgs.
8797 IRB.CreateStore(Val: TotalVAArgSize, Ptr: MS.VAArgOverflowSizeTLS);
8798 }
8799
8800 void finalizeInstrumentation() override {
8801 assert(!VAArgSize && !VAArgTLSCopy &&
8802 "finalizeInstrumentation called twice");
8803 IRBuilder<> IRB(MSV.FnPrologueEnd);
8804 VAArgSize = IRB.CreateLoad(Ty: MS.IntptrTy, Ptr: MS.VAArgOverflowSizeTLS);
8805 Value *CopySize = VAArgSize;
8806
8807 if (!VAStartInstrumentationList.empty()) {
8808 // If there is a va_start in this function, make a backup copy of
8809 // va_arg_tls somewhere in the function entry block.
8810
8811 VAArgTLSCopy = IRB.CreateAlloca(Ty: Type::getInt8Ty(C&: *MS.C), ArraySize: CopySize);
8812 VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
8813 IRB.CreateMemSet(Ptr: VAArgTLSCopy, Val: Constant::getNullValue(Ty: IRB.getInt8Ty()),
8814 Size: CopySize, Align: kShadowTLSAlignment, isVolatile: false);
8815
8816 Value *SrcSize = IRB.CreateBinaryIntrinsic(
8817 ID: Intrinsic::umin, LHS: CopySize,
8818 RHS: ConstantInt::get(Ty: MS.IntptrTy, V: kParamTLSSize));
8819 IRB.CreateMemCpy(Dst: VAArgTLSCopy, DstAlign: kShadowTLSAlignment, Src: MS.VAArgTLS,
8820 SrcAlign: kShadowTLSAlignment, Size: SrcSize);
8821 }
8822
8823 // Instrument va_start.
8824 // Copy va_list shadow from the backup copy of the TLS contents.
8825 for (CallInst *OrigInst : VAStartInstrumentationList) {
8826 NextNodeIRBuilder IRB(OrigInst);
8827 Value *VAListTag = OrigInst->getArgOperand(i: 0);
8828 Value *RegSaveAreaPtrPtr = IRB.CreatePtrToInt(V: VAListTag, DestTy: MS.IntptrTy);
8829 Value *RegSaveAreaSize = CopySize;
8830
8831 // In PPC32 va_list_tag is a struct
8832 RegSaveAreaPtrPtr =
8833 IRB.CreateAdd(LHS: RegSaveAreaPtrPtr, RHS: ConstantInt::get(Ty: MS.IntptrTy, V: 8));
8834
8835 // On PPC 32 reg_save_area can only hold 32 bytes of data
8836 RegSaveAreaSize = IRB.CreateBinaryIntrinsic(
8837 ID: Intrinsic::umin, LHS: CopySize, RHS: ConstantInt::get(Ty: MS.IntptrTy, V: 32));
8838
8839 RegSaveAreaPtrPtr = IRB.CreateIntToPtr(V: RegSaveAreaPtrPtr, DestTy: MS.PtrTy);
8840 Value *RegSaveAreaPtr = IRB.CreateLoad(Ty: MS.PtrTy, Ptr: RegSaveAreaPtrPtr);
8841
8842 const DataLayout &DL = F.getDataLayout();
8843 unsigned IntptrSize = DL.getTypeStoreSize(Ty: MS.IntptrTy);
8844 const Align Alignment = Align(IntptrSize);
8845
8846 { // Copy reg save area
8847 Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
8848 std::tie(args&: RegSaveAreaShadowPtr, args&: RegSaveAreaOriginPtr) =
8849 MSV.getShadowOriginPtr(Addr: RegSaveAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(),
8850 Alignment, /*isStore*/ true);
8851 IRB.CreateMemCpy(Dst: RegSaveAreaShadowPtr, DstAlign: Alignment, Src: VAArgTLSCopy,
8852 SrcAlign: Alignment, Size: RegSaveAreaSize);
8853
8854 RegSaveAreaShadowPtr =
8855 IRB.CreatePtrToInt(V: RegSaveAreaShadowPtr, DestTy: MS.IntptrTy);
8856 Value *FPSaveArea = IRB.CreateAdd(LHS: RegSaveAreaShadowPtr,
8857 RHS: ConstantInt::get(Ty: MS.IntptrTy, V: 32));
8858 FPSaveArea = IRB.CreateIntToPtr(V: FPSaveArea, DestTy: MS.PtrTy);
8859 // We fill fp shadow with zeroes as uninitialized fp args should have
8860 // been found during call base check
8861 IRB.CreateMemSet(Ptr: FPSaveArea, Val: ConstantInt::getNullValue(Ty: IRB.getInt8Ty()),
8862 Size: ConstantInt::get(Ty: MS.IntptrTy, V: 32), Align: Alignment);
8863 }
8864
8865 { // Copy overflow area
8866 // RegSaveAreaSize is min(CopySize, 32) -> no overflow can occur
8867 Value *OverflowAreaSize = IRB.CreateSub(LHS: CopySize, RHS: RegSaveAreaSize);
8868
8869 Value *OverflowAreaPtrPtr = IRB.CreatePtrToInt(V: VAListTag, DestTy: MS.IntptrTy);
8870 OverflowAreaPtrPtr =
8871 IRB.CreateAdd(LHS: OverflowAreaPtrPtr, RHS: ConstantInt::get(Ty: MS.IntptrTy, V: 4));
8872 OverflowAreaPtrPtr = IRB.CreateIntToPtr(V: OverflowAreaPtrPtr, DestTy: MS.PtrTy);
8873
8874 Value *OverflowAreaPtr = IRB.CreateLoad(Ty: MS.PtrTy, Ptr: OverflowAreaPtrPtr);
8875
8876 Value *OverflowAreaShadowPtr, *OverflowAreaOriginPtr;
8877 std::tie(args&: OverflowAreaShadowPtr, args&: OverflowAreaOriginPtr) =
8878 MSV.getShadowOriginPtr(Addr: OverflowAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(),
8879 Alignment, /*isStore*/ true);
8880
8881 Value *OverflowVAArgTLSCopyPtr =
8882 IRB.CreatePtrToInt(V: VAArgTLSCopy, DestTy: MS.IntptrTy);
8883 OverflowVAArgTLSCopyPtr =
8884 IRB.CreateAdd(LHS: OverflowVAArgTLSCopyPtr, RHS: RegSaveAreaSize);
8885
8886 OverflowVAArgTLSCopyPtr =
8887 IRB.CreateIntToPtr(V: OverflowVAArgTLSCopyPtr, DestTy: MS.PtrTy);
8888 IRB.CreateMemCpy(Dst: OverflowAreaShadowPtr, DstAlign: Alignment,
8889 Src: OverflowVAArgTLSCopyPtr, SrcAlign: Alignment, Size: OverflowAreaSize);
8890 }
8891 }
8892 }
8893};
8894
8895/// SystemZ-specific implementation of VarArgHelper.
8896struct VarArgSystemZHelper : public VarArgHelperBase {
8897 static const unsigned SystemZGpOffset = 16;
8898 static const unsigned SystemZGpEndOffset = 56;
8899 static const unsigned SystemZFpOffset = 128;
8900 static const unsigned SystemZFpEndOffset = 160;
8901 static const unsigned SystemZMaxVrArgs = 8;
8902 static const unsigned SystemZRegSaveAreaSize = 160;
8903 static const unsigned SystemZOverflowOffset = 160;
8904 static const unsigned SystemZVAListTagSize = 32;
8905 static const unsigned SystemZOverflowArgAreaPtrOffset = 16;
8906 static const unsigned SystemZRegSaveAreaPtrOffset = 24;
8907
8908 bool IsSoftFloatABI;
8909 AllocaInst *VAArgTLSCopy = nullptr;
8910 AllocaInst *VAArgTLSOriginCopy = nullptr;
8911 Value *VAArgOverflowSize = nullptr;
8912
8913 enum class ArgKind {
8914 GeneralPurpose,
8915 FloatingPoint,
8916 Vector,
8917 Memory,
8918 Indirect,
8919 };
8920
8921 enum class ShadowExtension { None, Zero, Sign };
8922
8923 VarArgSystemZHelper(Function &F, MemorySanitizer &MS,
8924 MemorySanitizerVisitor &MSV)
8925 : VarArgHelperBase(F, MS, MSV, SystemZVAListTagSize),
8926 IsSoftFloatABI(F.getFnAttribute(Kind: "use-soft-float").getValueAsBool()) {}
8927
8928 ArgKind classifyArgument(Type *T) {
8929 // T is a SystemZABIInfo::classifyArgumentType() output, and there are
8930 // only a few possibilities of what it can be. In particular, enums, single
8931 // element structs and large types have already been taken care of.
8932
8933 // Some i128 and fp128 arguments are converted to pointers only in the
8934 // back end.
8935 if (T->isIntegerTy(Bitwidth: 128) || T->isFP128Ty())
8936 return ArgKind::Indirect;
8937 if (T->isFloatingPointTy())
8938 return IsSoftFloatABI ? ArgKind::GeneralPurpose : ArgKind::FloatingPoint;
8939 if (T->isIntegerTy() || T->isPointerTy())
8940 return ArgKind::GeneralPurpose;
8941 if (T->isVectorTy())
8942 return ArgKind::Vector;
8943 return ArgKind::Memory;
8944 }
8945
8946 ShadowExtension getShadowExtension(const CallBase &CB, unsigned ArgNo) {
8947 // ABI says: "One of the simple integer types no more than 64 bits wide.
8948 // ... If such an argument is shorter than 64 bits, replace it by a full
8949 // 64-bit integer representing the same number, using sign or zero
8950 // extension". Shadow for an integer argument has the same type as the
8951 // argument itself, so it can be sign or zero extended as well.
8952 bool ZExt = CB.paramHasAttr(ArgNo, Kind: Attribute::ZExt);
8953 bool SExt = CB.paramHasAttr(ArgNo, Kind: Attribute::SExt);
8954 if (ZExt) {
8955 assert(!SExt);
8956 return ShadowExtension::Zero;
8957 }
8958 if (SExt) {
8959 assert(!ZExt);
8960 return ShadowExtension::Sign;
8961 }
8962 return ShadowExtension::None;
8963 }
8964
8965 void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
8966 unsigned GpOffset = SystemZGpOffset;
8967 unsigned FpOffset = SystemZFpOffset;
8968 unsigned VrIndex = 0;
8969 unsigned OverflowOffset = SystemZOverflowOffset;
8970 const DataLayout &DL = F.getDataLayout();
8971 for (const auto &[ArgNo, A] : llvm::enumerate(First: CB.args())) {
8972 bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
8973 // SystemZABIInfo does not produce ByVal parameters.
8974 assert(!CB.paramHasAttr(ArgNo, Attribute::ByVal));
8975 Type *T = A->getType();
8976 ArgKind AK = classifyArgument(T);
8977 if (AK == ArgKind::Indirect) {
8978 T = MS.PtrTy;
8979 AK = ArgKind::GeneralPurpose;
8980 }
8981 if (AK == ArgKind::GeneralPurpose && GpOffset >= SystemZGpEndOffset)
8982 AK = ArgKind::Memory;
8983 if (AK == ArgKind::FloatingPoint && FpOffset >= SystemZFpEndOffset)
8984 AK = ArgKind::Memory;
8985 if (AK == ArgKind::Vector && (VrIndex >= SystemZMaxVrArgs || !IsFixed))
8986 AK = ArgKind::Memory;
8987 Value *ShadowBase = nullptr;
8988 Value *OriginBase = nullptr;
8989 ShadowExtension SE = ShadowExtension::None;
8990 switch (AK) {
8991 case ArgKind::GeneralPurpose: {
8992 // Always keep track of GpOffset, but store shadow only for varargs.
8993 uint64_t ArgSize = 8;
8994 if (GpOffset + ArgSize <= kParamTLSSize) {
8995 if (!IsFixed) {
8996 SE = getShadowExtension(CB, ArgNo);
8997 uint64_t GapSize = 0;
8998 if (SE == ShadowExtension::None) {
8999 uint64_t ArgAllocSize = DL.getTypeAllocSize(Ty: T);
9000 assert(ArgAllocSize <= ArgSize);
9001 GapSize = ArgSize - ArgAllocSize;
9002 }
9003 ShadowBase = getShadowAddrForVAArgument(IRB, ArgOffset: GpOffset + GapSize);
9004 if (MS.TrackOrigins)
9005 OriginBase = getOriginPtrForVAArgument(IRB, ArgOffset: GpOffset + GapSize);
9006 }
9007 GpOffset += ArgSize;
9008 } else {
9009 GpOffset = kParamTLSSize;
9010 }
9011 break;
9012 }
9013 case ArgKind::FloatingPoint: {
9014 // Always keep track of FpOffset, but store shadow only for varargs.
9015 uint64_t ArgSize = 8;
9016 if (FpOffset + ArgSize <= kParamTLSSize) {
9017 if (!IsFixed) {
9018 // PoP says: "A short floating-point datum requires only the
9019 // left-most 32 bit positions of a floating-point register".
9020 // Therefore, in contrast to AK_GeneralPurpose and AK_Memory,
9021 // don't extend shadow and don't mind the gap.
9022 ShadowBase = getShadowAddrForVAArgument(IRB, ArgOffset: FpOffset);
9023 if (MS.TrackOrigins)
9024 OriginBase = getOriginPtrForVAArgument(IRB, ArgOffset: FpOffset);
9025 }
9026 FpOffset += ArgSize;
9027 } else {
9028 FpOffset = kParamTLSSize;
9029 }
9030 break;
9031 }
9032 case ArgKind::Vector: {
9033 // Keep track of VrIndex. No need to store shadow, since vector varargs
9034 // go through AK_Memory.
9035 assert(IsFixed);
9036 VrIndex++;
9037 break;
9038 }
9039 case ArgKind::Memory: {
9040 // Keep track of OverflowOffset and store shadow only for varargs.
9041 // Ignore fixed args, since we need to copy only the vararg portion of
9042 // the overflow area shadow.
9043 if (!IsFixed) {
9044 uint64_t ArgAllocSize = DL.getTypeAllocSize(Ty: T);
9045 uint64_t ArgSize = alignTo(Value: ArgAllocSize, Align: 8);
9046 if (OverflowOffset + ArgSize <= kParamTLSSize) {
9047 SE = getShadowExtension(CB, ArgNo);
9048 uint64_t GapSize =
9049 SE == ShadowExtension::None ? ArgSize - ArgAllocSize : 0;
9050 ShadowBase =
9051 getShadowAddrForVAArgument(IRB, ArgOffset: OverflowOffset + GapSize);
9052 if (MS.TrackOrigins)
9053 OriginBase =
9054 getOriginPtrForVAArgument(IRB, ArgOffset: OverflowOffset + GapSize);
9055 OverflowOffset += ArgSize;
9056 } else {
9057 OverflowOffset = kParamTLSSize;
9058 }
9059 }
9060 break;
9061 }
9062 case ArgKind::Indirect:
9063 llvm_unreachable("Indirect must be converted to GeneralPurpose");
9064 }
9065 if (ShadowBase == nullptr)
9066 continue;
9067 Value *Shadow = MSV.getShadow(V: A);
9068 if (SE != ShadowExtension::None)
9069 Shadow = MSV.CreateShadowCast(IRB, V: Shadow, dstTy: IRB.getInt64Ty(),
9070 /*Signed*/ SE == ShadowExtension::Sign);
9071 ShadowBase = IRB.CreateIntToPtr(V: ShadowBase, DestTy: MS.PtrTy, Name: "_msarg_va_s");
9072 IRB.CreateStore(Val: Shadow, Ptr: ShadowBase);
9073 if (MS.TrackOrigins) {
9074 Value *Origin = MSV.getOrigin(V: A);
9075 TypeSize StoreSize = DL.getTypeStoreSize(Ty: Shadow->getType());
9076 MSV.paintOrigin(IRB, Origin, OriginPtr: OriginBase, TS: StoreSize,
9077 Alignment: kMinOriginAlignment);
9078 }
9079 }
9080 Constant *OverflowSize = ConstantInt::get(
9081 Ty: IRB.getInt64Ty(), V: OverflowOffset - SystemZOverflowOffset);
9082 IRB.CreateStore(Val: OverflowSize, Ptr: MS.VAArgOverflowSizeTLS);
9083 }
9084
9085 void copyRegSaveArea(IRBuilder<> &IRB, Value *VAListTag) {
9086 Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
9087 V: IRB.CreateAdd(
9088 LHS: IRB.CreatePtrToInt(V: VAListTag, DestTy: MS.IntptrTy),
9089 RHS: ConstantInt::get(Ty: MS.IntptrTy, V: SystemZRegSaveAreaPtrOffset)),
9090 DestTy: MS.PtrTy);
9091 Value *RegSaveAreaPtr = IRB.CreateLoad(Ty: MS.PtrTy, Ptr: RegSaveAreaPtrPtr);
9092 Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
9093 const Align Alignment = Align(8);
9094 std::tie(args&: RegSaveAreaShadowPtr, args&: RegSaveAreaOriginPtr) =
9095 MSV.getShadowOriginPtr(Addr: RegSaveAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(), Alignment,
9096 /*isStore*/ true);
9097 // TODO(iii): copy only fragments filled by visitCallBase()
9098 // TODO(iii): support packed-stack && !use-soft-float
9099 // For use-soft-float functions, it is enough to copy just the GPRs.
9100 unsigned RegSaveAreaSize =
9101 IsSoftFloatABI ? SystemZGpEndOffset : SystemZRegSaveAreaSize;
9102 IRB.CreateMemCpy(Dst: RegSaveAreaShadowPtr, DstAlign: Alignment, Src: VAArgTLSCopy, SrcAlign: Alignment,
9103 Size: RegSaveAreaSize);
9104 if (MS.TrackOrigins)
9105 IRB.CreateMemCpy(Dst: RegSaveAreaOriginPtr, DstAlign: Alignment, Src: VAArgTLSOriginCopy,
9106 SrcAlign: Alignment, Size: RegSaveAreaSize);
9107 }
9108
9109 // FIXME: This implementation limits OverflowOffset to kParamTLSSize, so we
9110 // don't know real overflow size and can't clear shadow beyond kParamTLSSize.
9111 void copyOverflowArea(IRBuilder<> &IRB, Value *VAListTag) {
9112 Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
9113 V: IRB.CreateAdd(
9114 LHS: IRB.CreatePtrToInt(V: VAListTag, DestTy: MS.IntptrTy),
9115 RHS: ConstantInt::get(Ty: MS.IntptrTy, V: SystemZOverflowArgAreaPtrOffset)),
9116 DestTy: MS.PtrTy);
9117 Value *OverflowArgAreaPtr = IRB.CreateLoad(Ty: MS.PtrTy, Ptr: OverflowArgAreaPtrPtr);
9118 Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
9119 const Align Alignment = Align(8);
9120 std::tie(args&: OverflowArgAreaShadowPtr, args&: OverflowArgAreaOriginPtr) =
9121 MSV.getShadowOriginPtr(Addr: OverflowArgAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(),
9122 Alignment, /*isStore*/ true);
9123 Value *SrcPtr = IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(), Ptr: VAArgTLSCopy,
9124 Idx0: SystemZOverflowOffset);
9125 IRB.CreateMemCpy(Dst: OverflowArgAreaShadowPtr, DstAlign: Alignment, Src: SrcPtr, SrcAlign: Alignment,
9126 Size: VAArgOverflowSize);
9127 if (MS.TrackOrigins) {
9128 SrcPtr = IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(), Ptr: VAArgTLSOriginCopy,
9129 Idx0: SystemZOverflowOffset);
9130 IRB.CreateMemCpy(Dst: OverflowArgAreaOriginPtr, DstAlign: Alignment, Src: SrcPtr, SrcAlign: Alignment,
9131 Size: VAArgOverflowSize);
9132 }
9133 }
9134
9135 void finalizeInstrumentation() override {
9136 assert(!VAArgOverflowSize && !VAArgTLSCopy &&
9137 "finalizeInstrumentation called twice");
9138 if (!VAStartInstrumentationList.empty()) {
9139 // If there is a va_start in this function, make a backup copy of
9140 // va_arg_tls somewhere in the function entry block.
9141 IRBuilder<> IRB(MSV.FnPrologueEnd);
9142 VAArgOverflowSize =
9143 IRB.CreateLoad(Ty: IRB.getInt64Ty(), Ptr: MS.VAArgOverflowSizeTLS);
9144 Value *CopySize =
9145 IRB.CreateAdd(LHS: ConstantInt::get(Ty: MS.IntptrTy, V: SystemZOverflowOffset),
9146 RHS: VAArgOverflowSize);
9147 VAArgTLSCopy = IRB.CreateAlloca(Ty: Type::getInt8Ty(C&: *MS.C), ArraySize: CopySize);
9148 VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
9149 IRB.CreateMemSet(Ptr: VAArgTLSCopy, Val: Constant::getNullValue(Ty: IRB.getInt8Ty()),
9150 Size: CopySize, Align: kShadowTLSAlignment, isVolatile: false);
9151
9152 Value *SrcSize = IRB.CreateBinaryIntrinsic(
9153 ID: Intrinsic::umin, LHS: CopySize,
9154 RHS: ConstantInt::get(Ty: MS.IntptrTy, V: kParamTLSSize));
9155 IRB.CreateMemCpy(Dst: VAArgTLSCopy, DstAlign: kShadowTLSAlignment, Src: MS.VAArgTLS,
9156 SrcAlign: kShadowTLSAlignment, Size: SrcSize);
9157 if (MS.TrackOrigins) {
9158 VAArgTLSOriginCopy = IRB.CreateAlloca(Ty: Type::getInt8Ty(C&: *MS.C), ArraySize: CopySize);
9159 VAArgTLSOriginCopy->setAlignment(kShadowTLSAlignment);
9160 IRB.CreateMemCpy(Dst: VAArgTLSOriginCopy, DstAlign: kShadowTLSAlignment,
9161 Src: MS.VAArgOriginTLS, SrcAlign: kShadowTLSAlignment, Size: SrcSize);
9162 }
9163 }
9164
9165 // Instrument va_start.
9166 // Copy va_list shadow from the backup copy of the TLS contents.
9167 for (CallInst *OrigInst : VAStartInstrumentationList) {
9168 NextNodeIRBuilder IRB(OrigInst);
9169 Value *VAListTag = OrigInst->getArgOperand(i: 0);
9170 copyRegSaveArea(IRB, VAListTag);
9171 copyOverflowArea(IRB, VAListTag);
9172 }
9173 }
9174};
9175
9176/// i386-specific implementation of VarArgHelper.
9177struct VarArgI386Helper : public VarArgHelperBase {
9178 AllocaInst *VAArgTLSCopy = nullptr;
9179 Value *VAArgSize = nullptr;
9180
9181 VarArgI386Helper(Function &F, MemorySanitizer &MS,
9182 MemorySanitizerVisitor &MSV)
9183 : VarArgHelperBase(F, MS, MSV, /*VAListTagSize=*/4) {}
9184
9185 void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
9186 const DataLayout &DL = F.getDataLayout();
9187 unsigned IntptrSize = DL.getTypeStoreSize(Ty: MS.IntptrTy);
9188 unsigned VAArgOffset = 0;
9189 for (const auto &[ArgNo, A] : llvm::enumerate(First: CB.args())) {
9190 bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
9191 bool IsByVal = CB.paramHasAttr(ArgNo, Kind: Attribute::ByVal);
9192 if (IsByVal) {
9193 assert(A->getType()->isPointerTy());
9194 Type *RealTy = CB.getParamByValType(ArgNo);
9195 uint64_t ArgSize = DL.getTypeAllocSize(Ty: RealTy);
9196 Align ArgAlign = CB.getParamAlign(ArgNo).value_or(u: Align(IntptrSize));
9197 if (ArgAlign < IntptrSize)
9198 ArgAlign = Align(IntptrSize);
9199 VAArgOffset = alignTo(Size: VAArgOffset, A: ArgAlign);
9200 if (!IsFixed) {
9201 Value *Base = getShadowPtrForVAArgument(IRB, ArgOffset: VAArgOffset, ArgSize);
9202 if (Base) {
9203 Value *AShadowPtr, *AOriginPtr;
9204 std::tie(args&: AShadowPtr, args&: AOriginPtr) =
9205 MSV.getShadowOriginPtr(Addr: A, IRB, ShadowTy: IRB.getInt8Ty(),
9206 Alignment: kShadowTLSAlignment, /*isStore*/ false);
9207
9208 IRB.CreateMemCpy(Dst: Base, DstAlign: kShadowTLSAlignment, Src: AShadowPtr,
9209 SrcAlign: kShadowTLSAlignment, Size: ArgSize);
9210 }
9211 VAArgOffset += alignTo(Size: ArgSize, A: Align(IntptrSize));
9212 }
9213 } else {
9214 Value *Base;
9215 uint64_t ArgSize = DL.getTypeAllocSize(Ty: A->getType());
9216 Align ArgAlign = Align(IntptrSize);
9217 VAArgOffset = alignTo(Size: VAArgOffset, A: ArgAlign);
9218 if (DL.isBigEndian()) {
9219 // Adjusting the shadow for argument with size < IntptrSize to match
9220 // the placement of bits in big endian system
9221 if (ArgSize < IntptrSize)
9222 VAArgOffset += (IntptrSize - ArgSize);
9223 }
9224 if (!IsFixed) {
9225 Base = getShadowPtrForVAArgument(IRB, ArgOffset: VAArgOffset, ArgSize);
9226 if (Base)
9227 IRB.CreateAlignedStore(Val: MSV.getShadow(V: A), Ptr: Base, Align: kShadowTLSAlignment);
9228 VAArgOffset += ArgSize;
9229 VAArgOffset = alignTo(Size: VAArgOffset, A: Align(IntptrSize));
9230 }
9231 }
9232 }
9233
9234 Constant *TotalVAArgSize = ConstantInt::get(Ty: MS.IntptrTy, V: VAArgOffset);
9235 // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
9236 // a new class member i.e. it is the total size of all VarArgs.
9237 IRB.CreateStore(Val: TotalVAArgSize, Ptr: MS.VAArgOverflowSizeTLS);
9238 }
9239
9240 void finalizeInstrumentation() override {
9241 assert(!VAArgSize && !VAArgTLSCopy &&
9242 "finalizeInstrumentation called twice");
9243 IRBuilder<> IRB(MSV.FnPrologueEnd);
9244 VAArgSize = IRB.CreateLoad(Ty: MS.IntptrTy, Ptr: MS.VAArgOverflowSizeTLS);
9245 Value *CopySize = VAArgSize;
9246
9247 if (!VAStartInstrumentationList.empty()) {
9248 // If there is a va_start in this function, make a backup copy of
9249 // va_arg_tls somewhere in the function entry block.
9250 VAArgTLSCopy = IRB.CreateAlloca(Ty: Type::getInt8Ty(C&: *MS.C), ArraySize: CopySize);
9251 VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
9252 IRB.CreateMemSet(Ptr: VAArgTLSCopy, Val: Constant::getNullValue(Ty: IRB.getInt8Ty()),
9253 Size: CopySize, Align: kShadowTLSAlignment, isVolatile: false);
9254
9255 Value *SrcSize = IRB.CreateBinaryIntrinsic(
9256 ID: Intrinsic::umin, LHS: CopySize,
9257 RHS: ConstantInt::get(Ty: MS.IntptrTy, V: kParamTLSSize));
9258 IRB.CreateMemCpy(Dst: VAArgTLSCopy, DstAlign: kShadowTLSAlignment, Src: MS.VAArgTLS,
9259 SrcAlign: kShadowTLSAlignment, Size: SrcSize);
9260 }
9261
9262 // Instrument va_start.
9263 // Copy va_list shadow from the backup copy of the TLS contents.
9264 for (CallInst *OrigInst : VAStartInstrumentationList) {
9265 NextNodeIRBuilder IRB(OrigInst);
9266 Value *VAListTag = OrigInst->getArgOperand(i: 0);
9267 Type *RegSaveAreaPtrTy = PointerType::getUnqual(C&: *MS.C);
9268 Value *RegSaveAreaPtrPtr =
9269 IRB.CreateIntToPtr(V: IRB.CreatePtrToInt(V: VAListTag, DestTy: MS.IntptrTy),
9270 DestTy: PointerType::get(C&: *MS.C, AddressSpace: 0));
9271 Value *RegSaveAreaPtr =
9272 IRB.CreateLoad(Ty: RegSaveAreaPtrTy, Ptr: RegSaveAreaPtrPtr);
9273 Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
9274 const DataLayout &DL = F.getDataLayout();
9275 unsigned IntptrSize = DL.getTypeStoreSize(Ty: MS.IntptrTy);
9276 const Align Alignment = Align(IntptrSize);
9277 std::tie(args&: RegSaveAreaShadowPtr, args&: RegSaveAreaOriginPtr) =
9278 MSV.getShadowOriginPtr(Addr: RegSaveAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(),
9279 Alignment, /*isStore*/ true);
9280 IRB.CreateMemCpy(Dst: RegSaveAreaShadowPtr, DstAlign: Alignment, Src: VAArgTLSCopy, SrcAlign: Alignment,
9281 Size: CopySize);
9282 }
9283 }
9284};
9285
9286/// Implementation of VarArgHelper that is used for ARM32, MIPS, RISCV,
9287/// LoongArch64.
9288struct VarArgGenericHelper : public VarArgHelperBase {
9289 AllocaInst *VAArgTLSCopy = nullptr;
9290 Value *VAArgSize = nullptr;
9291
9292 VarArgGenericHelper(Function &F, MemorySanitizer &MS,
9293 MemorySanitizerVisitor &MSV, const unsigned VAListTagSize)
9294 : VarArgHelperBase(F, MS, MSV, VAListTagSize) {}
9295
9296 void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
9297 unsigned VAArgOffset = 0;
9298 const DataLayout &DL = F.getDataLayout();
9299 unsigned IntptrSize = DL.getTypeStoreSize(Ty: MS.IntptrTy);
9300 for (const auto &[ArgNo, A] : llvm::enumerate(First: CB.args())) {
9301 bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
9302 if (IsFixed)
9303 continue;
9304 uint64_t ArgSize = DL.getTypeAllocSize(Ty: A->getType());
9305 if (DL.isBigEndian()) {
9306 // Adjusting the shadow for argument with size < IntptrSize to match the
9307 // placement of bits in big endian system
9308 if (ArgSize < IntptrSize)
9309 VAArgOffset += (IntptrSize - ArgSize);
9310 }
9311 Value *Base = getShadowPtrForVAArgument(IRB, ArgOffset: VAArgOffset, ArgSize);
9312 VAArgOffset += ArgSize;
9313 VAArgOffset = alignTo(Value: VAArgOffset, Align: IntptrSize);
9314 if (!Base)
9315 continue;
9316 IRB.CreateAlignedStore(Val: MSV.getShadow(V: A), Ptr: Base, Align: kShadowTLSAlignment);
9317 }
9318
9319 Constant *TotalVAArgSize = ConstantInt::get(Ty: MS.IntptrTy, V: VAArgOffset);
9320 // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
9321 // a new class member i.e. it is the total size of all VarArgs.
9322 IRB.CreateStore(Val: TotalVAArgSize, Ptr: MS.VAArgOverflowSizeTLS);
9323 }
9324
9325 void finalizeInstrumentation() override {
9326 assert(!VAArgSize && !VAArgTLSCopy &&
9327 "finalizeInstrumentation called twice");
9328 IRBuilder<> IRB(MSV.FnPrologueEnd);
9329 VAArgSize = IRB.CreateLoad(Ty: MS.IntptrTy, Ptr: MS.VAArgOverflowSizeTLS);
9330 Value *CopySize = VAArgSize;
9331
9332 if (!VAStartInstrumentationList.empty()) {
9333 // If there is a va_start in this function, make a backup copy of
9334 // va_arg_tls somewhere in the function entry block.
9335 VAArgTLSCopy = IRB.CreateAlloca(Ty: Type::getInt8Ty(C&: *MS.C), ArraySize: CopySize);
9336 VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
9337 IRB.CreateMemSet(Ptr: VAArgTLSCopy, Val: Constant::getNullValue(Ty: IRB.getInt8Ty()),
9338 Size: CopySize, Align: kShadowTLSAlignment, isVolatile: false);
9339
9340 Value *SrcSize = IRB.CreateBinaryIntrinsic(
9341 ID: Intrinsic::umin, LHS: CopySize,
9342 RHS: ConstantInt::get(Ty: MS.IntptrTy, V: kParamTLSSize));
9343 IRB.CreateMemCpy(Dst: VAArgTLSCopy, DstAlign: kShadowTLSAlignment, Src: MS.VAArgTLS,
9344 SrcAlign: kShadowTLSAlignment, Size: SrcSize);
9345 }
9346
9347 // Instrument va_start.
9348 // Copy va_list shadow from the backup copy of the TLS contents.
9349 for (CallInst *OrigInst : VAStartInstrumentationList) {
9350 NextNodeIRBuilder IRB(OrigInst);
9351 Value *VAListTag = OrigInst->getArgOperand(i: 0);
9352 Type *RegSaveAreaPtrTy = PointerType::getUnqual(C&: *MS.C);
9353 Value *RegSaveAreaPtrPtr =
9354 IRB.CreateIntToPtr(V: IRB.CreatePtrToInt(V: VAListTag, DestTy: MS.IntptrTy),
9355 DestTy: PointerType::get(C&: *MS.C, AddressSpace: 0));
9356 Value *RegSaveAreaPtr =
9357 IRB.CreateLoad(Ty: RegSaveAreaPtrTy, Ptr: RegSaveAreaPtrPtr);
9358 Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
9359 const DataLayout &DL = F.getDataLayout();
9360 unsigned IntptrSize = DL.getTypeStoreSize(Ty: MS.IntptrTy);
9361 const Align Alignment = Align(IntptrSize);
9362 std::tie(args&: RegSaveAreaShadowPtr, args&: RegSaveAreaOriginPtr) =
9363 MSV.getShadowOriginPtr(Addr: RegSaveAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(),
9364 Alignment, /*isStore*/ true);
9365 IRB.CreateMemCpy(Dst: RegSaveAreaShadowPtr, DstAlign: Alignment, Src: VAArgTLSCopy, SrcAlign: Alignment,
9366 Size: CopySize);
9367 }
9368 }
9369};
9370
9371// ARM32, Loongarch64, MIPS and RISCV share the same calling conventions
9372// regarding VAArgs.
9373using VarArgARM32Helper = VarArgGenericHelper;
9374using VarArgRISCVHelper = VarArgGenericHelper;
9375using VarArgMIPSHelper = VarArgGenericHelper;
9376using VarArgLoongArch64Helper = VarArgGenericHelper;
9377using VarArgHexagonHelper = VarArgGenericHelper;
9378
9379/// A no-op implementation of VarArgHelper.
9380struct VarArgNoOpHelper : public VarArgHelper {
9381 VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
9382 MemorySanitizerVisitor &MSV) {}
9383
9384 void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {}
9385
9386 void visitVAStartInst(VAStartInst &I) override {}
9387
9388 void visitVACopyInst(VACopyInst &I) override {}
9389
9390 void finalizeInstrumentation() override {}
9391};
9392
9393} // end anonymous namespace
9394
9395static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
9396 MemorySanitizerVisitor &Visitor) {
9397 // VarArg handling is only implemented on AMD64. False positives are possible
9398 // on other platforms.
9399 Triple TargetTriple(Func.getParent()->getTargetTriple());
9400
9401 if (TargetTriple.getArch() == Triple::x86)
9402 return new VarArgI386Helper(Func, Msan, Visitor);
9403
9404 if (TargetTriple.getArch() == Triple::x86_64)
9405 return new VarArgAMD64Helper(Func, Msan, Visitor);
9406
9407 if (TargetTriple.isARM())
9408 return new VarArgARM32Helper(Func, Msan, Visitor, /*VAListTagSize=*/4);
9409
9410 if (TargetTriple.isAArch64())
9411 return new VarArgAArch64Helper(Func, Msan, Visitor);
9412
9413 if (TargetTriple.isSystemZ())
9414 return new VarArgSystemZHelper(Func, Msan, Visitor);
9415
9416 // On PowerPC32 VAListTag is a struct
9417 // {char, char, i16 padding, char *, char *}
9418 if (TargetTriple.isPPC32())
9419 return new VarArgPowerPC32Helper(Func, Msan, Visitor);
9420
9421 if (TargetTriple.isPPC64())
9422 return new VarArgPowerPC64Helper(Func, Msan, Visitor);
9423
9424 if (TargetTriple.isRISCV32())
9425 return new VarArgRISCVHelper(Func, Msan, Visitor, /*VAListTagSize=*/4);
9426
9427 if (TargetTriple.isRISCV64())
9428 return new VarArgRISCVHelper(Func, Msan, Visitor, /*VAListTagSize=*/8);
9429
9430 if (TargetTriple.isMIPS32())
9431 return new VarArgMIPSHelper(Func, Msan, Visitor, /*VAListTagSize=*/4);
9432
9433 if (TargetTriple.isMIPS64())
9434 return new VarArgMIPSHelper(Func, Msan, Visitor, /*VAListTagSize=*/8);
9435
9436 if (TargetTriple.isLoongArch64())
9437 return new VarArgLoongArch64Helper(Func, Msan, Visitor,
9438 /*VAListTagSize=*/8);
9439
9440 if (TargetTriple.getArch() == Triple::hexagon)
9441 return new VarArgHexagonHelper(Func, Msan, Visitor, /*VAListTagSize=*/12);
9442
9443 return new VarArgNoOpHelper(Func, Msan, Visitor);
9444}
9445
9446bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
9447 if (!CompileKernel && F.getName() == kMsanModuleCtorName)
9448 return false;
9449
9450 if (F.hasFnAttribute(Kind: Attribute::DisableSanitizerInstrumentation))
9451 return false;
9452
9453 MemorySanitizerVisitor Visitor(F, *this, TLI);
9454
9455 // Clear out memory attributes.
9456 AttributeMask B;
9457 B.addAttribute(Val: Attribute::Memory).addAttribute(Val: Attribute::Speculatable);
9458 F.removeFnAttrs(Attrs: B);
9459
9460 return Visitor.runOnFunction();
9461}
9462