1//===- MemorySanitizer.cpp - detector of uninitialized reads --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file is a part of MemorySanitizer, a detector of uninitialized
11/// reads.
12///
13/// The algorithm of the tool is similar to Memcheck
14/// (https://static.usenix.org/event/usenix05/tech/general/full_papers/seward/seward_html/usenix2005.html)
15/// We associate a few shadow bits with every byte of the application memory,
16/// poison the shadow of the malloc-ed or alloca-ed memory, load the shadow,
17/// bits on every memory read, propagate the shadow bits through some of the
18/// arithmetic instruction (including MOV), store the shadow bits on every
19/// memory write, report a bug on some other instructions (e.g. JMP) if the
20/// associated shadow is poisoned.
21///
22/// But there are differences too. The first and the major one:
23/// compiler instrumentation instead of binary instrumentation. This
24/// gives us much better register allocation, possible compiler
25/// optimizations and a fast start-up. But this brings the major issue
26/// as well: msan needs to see all program events, including system
27/// calls and reads/writes in system libraries, so we either need to
28/// compile *everything* with msan or use a binary translation
29/// component (e.g. DynamoRIO) to instrument pre-built libraries.
30/// Another difference from Memcheck is that we use 8 shadow bits per
31/// byte of application memory and use a direct shadow mapping. This
32/// greatly simplifies the instrumentation code and avoids races on
33/// shadow updates (Memcheck is single-threaded so races are not a
34/// concern there. Memcheck uses 2 shadow bits per byte with a slow
35/// path storage that uses 8 bits per byte).
36///
37/// The default value of shadow is 0, which means "clean" (not poisoned).
38///
39/// Every module initializer should call __msan_init to ensure that the
40/// shadow memory is ready. On error, __msan_warning is called. Since
41/// parameters and return values may be passed via registers, we have a
42/// specialized thread-local shadow for return values
43/// (__msan_retval_tls) and parameters (__msan_param_tls).
44///
45/// Origin tracking.
46///
47/// MemorySanitizer can track origins (allocation points) of all uninitialized
48/// values. This behavior is controlled with a flag (msan-track-origins) and is
49/// disabled by default.
50///
51/// Origins are 4-byte values created and interpreted by the runtime library.
52/// They are stored in a second shadow mapping, one 4-byte value for 4 bytes
53/// of application memory. Propagation of origins is basically a bunch of
54/// "select" instructions that pick the origin of a dirty argument, if an
55/// instruction has one.
56///
57/// Every 4 aligned, consecutive bytes of application memory have one origin
58/// value associated with them. If these bytes contain uninitialized data
59/// coming from 2 different allocations, the last store wins. Because of this,
60/// MemorySanitizer reports can show unrelated origins, but this is unlikely in
61/// practice.
62///
63/// Origins are meaningless for fully initialized values, so MemorySanitizer
64/// avoids storing origin to memory when a fully initialized value is stored.
65/// This way it avoids needless overwriting origin of the 4-byte region on
66/// a short (i.e. 1 byte) clean store, and it is also good for performance.
67///
68/// Atomic handling.
69///
70/// Ideally, every atomic store of application value should update the
71/// corresponding shadow location in an atomic way. Unfortunately, atomic store
72/// of two disjoint locations can not be done without severe slowdown.
73///
74/// Therefore, we implement an approximation that may err on the safe side.
75/// In this implementation, every atomically accessed location in the program
76/// may only change from (partially) uninitialized to fully initialized, but
77/// not the other way around. We load the shadow _after_ the application load,
78/// and we store the shadow _before_ the app store. Also, we always store clean
79/// shadow (if the application store is atomic). This way, if the store-load
80/// pair constitutes a happens-before arc, shadow store and load are correctly
81/// ordered such that the load will get either the value that was stored, or
82/// some later value (which is always clean).
83///
84/// This does not work very well with Compare-And-Swap (CAS) and
85/// Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW
86/// must store the new shadow before the app operation, and load the shadow
87/// after the app operation. Computers don't work this way. Current
88/// implementation ignores the load aspect of CAS/RMW, always returning a clean
89/// value. It implements the store part as a simple atomic store by storing a
90/// clean shadow.
91///
92/// Instrumenting inline assembly.
93///
94/// For inline assembly code LLVM has little idea about which memory locations
95/// become initialized depending on the arguments. It can be possible to figure
96/// out which arguments are meant to point to inputs and outputs, but the
97/// actual semantics can be only visible at runtime. In the Linux kernel it's
98/// also possible that the arguments only indicate the offset for a base taken
99/// from a segment register, so it's dangerous to treat any asm() arguments as
100/// pointers. We take a conservative approach generating calls to
101/// __msan_instrument_asm_store(ptr, size)
102/// , which defer the memory unpoisoning to the runtime library.
103/// The latter can perform more complex address checks to figure out whether
104/// it's safe to touch the shadow memory.
105/// Like with atomic operations, we call __msan_instrument_asm_store() before
106/// the assembly call, so that changes to the shadow memory will be seen by
107/// other threads together with main memory initialization.
108///
109/// KernelMemorySanitizer (KMSAN) implementation.
110///
111/// The major differences between KMSAN and MSan instrumentation are:
112/// - KMSAN always tracks the origins and implies msan-keep-going=true;
113/// - KMSAN allocates shadow and origin memory for each page separately, so
114/// there are no explicit accesses to shadow and origin in the
115/// instrumentation.
116/// Shadow and origin values for a particular X-byte memory location
117/// (X=1,2,4,8) are accessed through pointers obtained via the
118/// __msan_metadata_ptr_for_load_X(ptr)
119/// __msan_metadata_ptr_for_store_X(ptr)
120/// functions. The corresponding functions check that the X-byte accesses
121/// are possible and returns the pointers to shadow and origin memory.
122/// Arbitrary sized accesses are handled with:
123/// __msan_metadata_ptr_for_load_n(ptr, size)
124/// __msan_metadata_ptr_for_store_n(ptr, size);
125/// Note that the sanitizer code has to deal with how shadow/origin pairs
126/// returned by the these functions are represented in different ABIs. In
127/// the X86_64 ABI they are returned in RDX:RAX, in PowerPC64 they are
128/// returned in r3 and r4, and in the SystemZ ABI they are written to memory
129/// pointed to by a hidden parameter.
130/// - TLS variables are stored in a single per-task struct. A call to a
131/// function __msan_get_context_state() returning a pointer to that struct
132/// is inserted into every instrumented function before the entry block;
133/// - __msan_warning() takes a 32-bit origin parameter;
134/// - local variables are poisoned with __msan_poison_alloca() upon function
135/// entry and unpoisoned with __msan_unpoison_alloca() before leaving the
136/// function;
137/// - the pass doesn't declare any global variables or add global constructors
138/// to the translation unit.
139///
140/// Also, KMSAN currently ignores uninitialized memory passed into inline asm
141/// calls, making sure we're on the safe side wrt. possible false positives.
142///
143/// KernelMemorySanitizer only supports X86_64, SystemZ and PowerPC64 at the
144/// moment.
145///
146//
147// FIXME: This sanitizer does not yet handle scalable vectors
148//
149//===----------------------------------------------------------------------===//
150
151#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
152#include "llvm/ADT/APInt.h"
153#include "llvm/ADT/ArrayRef.h"
154#include "llvm/ADT/DenseMap.h"
155#include "llvm/ADT/DepthFirstIterator.h"
156#include "llvm/ADT/SetVector.h"
157#include "llvm/ADT/SmallPtrSet.h"
158#include "llvm/ADT/SmallVector.h"
159#include "llvm/ADT/StringExtras.h"
160#include "llvm/ADT/StringRef.h"
161#include "llvm/Analysis/GlobalsModRef.h"
162#include "llvm/Analysis/TargetLibraryInfo.h"
163#include "llvm/Analysis/ValueTracking.h"
164#include "llvm/IR/Argument.h"
165#include "llvm/IR/AttributeMask.h"
166#include "llvm/IR/Attributes.h"
167#include "llvm/IR/BasicBlock.h"
168#include "llvm/IR/CallingConv.h"
169#include "llvm/IR/Constant.h"
170#include "llvm/IR/Constants.h"
171#include "llvm/IR/DataLayout.h"
172#include "llvm/IR/DerivedTypes.h"
173#include "llvm/IR/Function.h"
174#include "llvm/IR/GlobalValue.h"
175#include "llvm/IR/GlobalVariable.h"
176#include "llvm/IR/IRBuilder.h"
177#include "llvm/IR/InlineAsm.h"
178#include "llvm/IR/InstVisitor.h"
179#include "llvm/IR/InstrTypes.h"
180#include "llvm/IR/Instruction.h"
181#include "llvm/IR/Instructions.h"
182#include "llvm/IR/IntrinsicInst.h"
183#include "llvm/IR/Intrinsics.h"
184#include "llvm/IR/IntrinsicsAArch64.h"
185#include "llvm/IR/IntrinsicsX86.h"
186#include "llvm/IR/MDBuilder.h"
187#include "llvm/IR/Module.h"
188#include "llvm/IR/Type.h"
189#include "llvm/IR/Value.h"
190#include "llvm/IR/ValueMap.h"
191#include "llvm/Support/Alignment.h"
192#include "llvm/Support/AtomicOrdering.h"
193#include "llvm/Support/Casting.h"
194#include "llvm/Support/CommandLine.h"
195#include "llvm/Support/Debug.h"
196#include "llvm/Support/DebugCounter.h"
197#include "llvm/Support/ErrorHandling.h"
198#include "llvm/Support/MathExtras.h"
199#include "llvm/Support/raw_ostream.h"
200#include "llvm/TargetParser/Triple.h"
201#include "llvm/Transforms/Utils/BasicBlockUtils.h"
202#include "llvm/Transforms/Utils/Instrumentation.h"
203#include "llvm/Transforms/Utils/Local.h"
204#include "llvm/Transforms/Utils/ModuleUtils.h"
205#include <algorithm>
206#include <cassert>
207#include <cstddef>
208#include <cstdint>
209#include <memory>
210#include <numeric>
211#include <string>
212#include <tuple>
213
214using namespace llvm;
215
216#define DEBUG_TYPE "msan"
217
218DEBUG_COUNTER(DebugInsertCheck, "msan-insert-check",
219 "Controls which checks to insert");
220
221DEBUG_COUNTER(DebugInstrumentInstruction, "msan-instrument-instruction",
222 "Controls which instruction to instrument");
223
224static const unsigned kOriginSize = 4;
225static const Align kMinOriginAlignment = Align(4);
226static const Align kShadowTLSAlignment = Align(8);
227
228// These constants must be kept in sync with the ones in msan.h.
229// TODO: increase size to match SVE/SVE2/SME/SME2 limits
230static const unsigned kParamTLSSize = 800;
231static const unsigned kRetvalTLSSize = 800;
232
233// Accesses sizes are powers of two: 1, 2, 4, 8.
234static const size_t kNumberOfAccessSizes = 4;
235
236/// Track origins of uninitialized values.
237///
238/// Adds a section to MemorySanitizer report that points to the allocation
239/// (stack or heap) the uninitialized bits came from originally.
240static cl::opt<int> ClTrackOrigins(
241 "msan-track-origins",
242 cl::desc("Track origins (allocation sites) of poisoned memory"), cl::Hidden,
243 cl::init(Val: 0));
244
245static cl::opt<bool> ClKeepGoing("msan-keep-going",
246 cl::desc("keep going after reporting a UMR"),
247 cl::Hidden, cl::init(Val: false));
248
249static cl::opt<bool>
250 ClPoisonStack("msan-poison-stack",
251 cl::desc("poison uninitialized stack variables"), cl::Hidden,
252 cl::init(Val: true));
253
254static cl::opt<bool> ClPoisonStackWithCall(
255 "msan-poison-stack-with-call",
256 cl::desc("poison uninitialized stack variables with a call"), cl::Hidden,
257 cl::init(Val: false));
258
259static cl::opt<int> ClPoisonStackPattern(
260 "msan-poison-stack-pattern",
261 cl::desc("poison uninitialized stack variables with the given pattern"),
262 cl::Hidden, cl::init(Val: 0xff));
263
264static cl::opt<bool>
265 ClPrintStackNames("msan-print-stack-names",
266 cl::desc("Print name of local stack variable"),
267 cl::Hidden, cl::init(Val: true));
268
269static cl::opt<bool>
270 ClPoisonUndef("msan-poison-undef",
271 cl::desc("Poison fully undef temporary values. "
272 "Partially undefined constant vectors "
273 "are unaffected by this flag (see "
274 "-msan-poison-undef-vectors)."),
275 cl::Hidden, cl::init(Val: true));
276
277static cl::opt<bool> ClPoisonUndefVectors(
278 "msan-poison-undef-vectors",
279 cl::desc("Precisely poison partially undefined constant vectors. "
280 "If false (legacy behavior), the entire vector is "
281 "considered fully initialized, which may lead to false "
282 "negatives. Fully undefined constant vectors are "
283 "unaffected by this flag (see -msan-poison-undef)."),
284 cl::Hidden, cl::init(Val: false));
285
286static cl::opt<bool> ClPreciseDisjointOr(
287 "msan-precise-disjoint-or",
288 cl::desc("Precisely poison disjoint OR. If false (legacy behavior), "
289 "disjointedness is ignored (i.e., 1|1 is initialized)."),
290 cl::Hidden, cl::init(Val: false));
291
292static cl::opt<bool>
293 ClHandleICmp("msan-handle-icmp",
294 cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
295 cl::Hidden, cl::init(Val: true));
296
297static cl::opt<bool>
298 ClHandleICmpExact("msan-handle-icmp-exact",
299 cl::desc("exact handling of relational integer ICmp"),
300 cl::Hidden, cl::init(Val: true));
301
302static cl::opt<int> ClSwitchPrecision(
303 "msan-switch-precision",
304 cl::desc("Controls the number of cases considered by MSan for LLVM switch "
305 "instructions. 0 means no UUMs detected. Higher values lead to "
306 "fewer false negatives but may impact compiler and/or "
307 "application performance. N.B. LLVM switch instructions do not "
308 "correspond exactly to C++ switch statements."),
309 cl::Hidden, cl::init(Val: 99));
310
311static cl::opt<bool> ClHandleLifetimeIntrinsics(
312 "msan-handle-lifetime-intrinsics",
313 cl::desc(
314 "when possible, poison scoped variables at the beginning of the scope "
315 "(slower, but more precise)"),
316 cl::Hidden, cl::init(Val: true));
317
318// When compiling the Linux kernel, we sometimes see false positives related to
319// MSan being unable to understand that inline assembly calls may initialize
320// local variables.
321// This flag makes the compiler conservatively unpoison every memory location
322// passed into an assembly call. Note that this may cause false positives.
323// Because it's impossible to figure out the array sizes, we can only unpoison
324// the first sizeof(type) bytes for each type* pointer.
325static cl::opt<bool> ClHandleAsmConservative(
326 "msan-handle-asm-conservative",
327 cl::desc("conservative handling of inline assembly"), cl::Hidden,
328 cl::init(Val: true));
329
330// This flag controls whether we check the shadow of the address
331// operand of load or store. Such bugs are very rare, since load from
332// a garbage address typically results in SEGV, but still happen
333// (e.g. only lower bits of address are garbage, or the access happens
334// early at program startup where malloc-ed memory is more likely to
335// be zeroed. As of 2012-08-28 this flag adds 20% slowdown.
336static cl::opt<bool> ClCheckAccessAddress(
337 "msan-check-access-address",
338 cl::desc("report accesses through a pointer which has poisoned shadow"),
339 cl::Hidden, cl::init(Val: true));
340
341static cl::opt<bool> ClEagerChecks(
342 "msan-eager-checks",
343 cl::desc("check arguments and return values at function call boundaries"),
344 cl::Hidden, cl::init(Val: false));
345
346static cl::opt<bool> ClDumpStrictInstructions(
347 "msan-dump-strict-instructions",
348 cl::desc("print out instructions with default strict semantics i.e.,"
349 "check that all the inputs are fully initialized, and mark "
350 "the output as fully initialized. These semantics are applied "
351 "to instructions that could not be handled explicitly nor "
352 "heuristically."),
353 cl::Hidden, cl::init(Val: false));
354
355// Currently, all the heuristically handled instructions are specifically
356// IntrinsicInst. However, we use the broader "HeuristicInstructions" name
357// to parallel 'msan-dump-strict-instructions', and to keep the door open to
358// handling non-intrinsic instructions heuristically.
359static cl::opt<bool> ClDumpHeuristicInstructions(
360 "msan-dump-heuristic-instructions",
361 cl::desc("Prints 'unknown' instructions that were handled heuristically. "
362 "Use -msan-dump-strict-instructions to print instructions that "
363 "could not be handled explicitly nor heuristically."),
364 cl::Hidden, cl::init(Val: false));
365
366static cl::opt<int> ClInstrumentationWithCallThreshold(
367 "msan-instrumentation-with-call-threshold",
368 cl::desc(
369 "If the function being instrumented requires more than "
370 "this number of checks and origin stores, use callbacks instead of "
371 "inline checks (-1 means never use callbacks)."),
372 cl::Hidden, cl::init(Val: 3500));
373
374static cl::opt<bool>
375 ClEnableKmsan("msan-kernel",
376 cl::desc("Enable KernelMemorySanitizer instrumentation"),
377 cl::Hidden, cl::init(Val: false));
378
379static cl::opt<bool>
380 ClDisableChecks("msan-disable-checks",
381 cl::desc("Apply no_sanitize to the whole file"), cl::Hidden,
382 cl::init(Val: false));
383
384static cl::opt<bool>
385 ClCheckConstantShadow("msan-check-constant-shadow",
386 cl::desc("Insert checks for constant shadow values"),
387 cl::Hidden, cl::init(Val: true));
388
389// This is off by default because of a bug in gold:
390// https://sourceware.org/bugzilla/show_bug.cgi?id=19002
391static cl::opt<bool>
392 ClWithComdat("msan-with-comdat",
393 cl::desc("Place MSan constructors in comdat sections"),
394 cl::Hidden, cl::init(Val: false));
395
396// These options allow to specify custom memory map parameters
397// See MemoryMapParams for details.
398static cl::opt<uint64_t> ClAndMask("msan-and-mask",
399 cl::desc("Define custom MSan AndMask"),
400 cl::Hidden, cl::init(Val: 0));
401
402static cl::opt<uint64_t> ClXorMask("msan-xor-mask",
403 cl::desc("Define custom MSan XorMask"),
404 cl::Hidden, cl::init(Val: 0));
405
406static cl::opt<uint64_t> ClShadowBase("msan-shadow-base",
407 cl::desc("Define custom MSan ShadowBase"),
408 cl::Hidden, cl::init(Val: 0));
409
410static cl::opt<uint64_t> ClOriginBase("msan-origin-base",
411 cl::desc("Define custom MSan OriginBase"),
412 cl::Hidden, cl::init(Val: 0));
413
414static cl::opt<int>
415 ClDisambiguateWarning("msan-disambiguate-warning-threshold",
416 cl::desc("Define threshold for number of checks per "
417 "debug location to force origin update."),
418 cl::Hidden, cl::init(Val: 3));
419
420const char kMsanModuleCtorName[] = "msan.module_ctor";
421const char kMsanInitName[] = "__msan_init";
422
423namespace {
424
425// Memory map parameters used in application-to-shadow address calculation.
426// Offset = (Addr & ~AndMask) ^ XorMask
427// Shadow = ShadowBase + Offset
428// Origin = OriginBase + Offset
429struct MemoryMapParams {
430 uint64_t AndMask;
431 uint64_t XorMask;
432 uint64_t ShadowBase;
433 uint64_t OriginBase;
434};
435
436struct PlatformMemoryMapParams {
437 const MemoryMapParams *bits32;
438 const MemoryMapParams *bits64;
439};
440
441} // end anonymous namespace
442
443// i386 Linux
444static const MemoryMapParams Linux_I386_MemoryMapParams = {
445 .AndMask: 0x000080000000, // AndMask
446 .XorMask: 0, // XorMask (not used)
447 .ShadowBase: 0, // ShadowBase (not used)
448 .OriginBase: 0x000040000000, // OriginBase
449};
450
451// x86_64 Linux
452static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
453 .AndMask: 0, // AndMask (not used)
454 .XorMask: 0x500000000000, // XorMask
455 .ShadowBase: 0, // ShadowBase (not used)
456 .OriginBase: 0x100000000000, // OriginBase
457};
458
459// mips32 Linux
460// FIXME: Remove -msan-origin-base -msan-and-mask added by PR #109284 to tests
461// after picking good constants
462
463// mips64 Linux
464static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {
465 .AndMask: 0, // AndMask (not used)
466 .XorMask: 0x008000000000, // XorMask
467 .ShadowBase: 0, // ShadowBase (not used)
468 .OriginBase: 0x002000000000, // OriginBase
469};
470
471// ppc32 Linux
472// FIXME: Remove -msan-origin-base -msan-and-mask added by PR #109284 to tests
473// after picking good constants
474
475// ppc64 Linux
476static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = {
477 .AndMask: 0xE00000000000, // AndMask
478 .XorMask: 0x100000000000, // XorMask
479 .ShadowBase: 0x080000000000, // ShadowBase
480 .OriginBase: 0x1C0000000000, // OriginBase
481};
482
483// s390x Linux
484static const MemoryMapParams Linux_S390X_MemoryMapParams = {
485 .AndMask: 0xC00000000000, // AndMask
486 .XorMask: 0, // XorMask (not used)
487 .ShadowBase: 0x080000000000, // ShadowBase
488 .OriginBase: 0x1C0000000000, // OriginBase
489};
490
491// arm32 Linux
492// FIXME: Remove -msan-origin-base -msan-and-mask added by PR #109284 to tests
493// after picking good constants
494
495// aarch64 Linux
496static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
497 .AndMask: 0, // AndMask (not used)
498 .XorMask: 0x0B00000000000, // XorMask
499 .ShadowBase: 0, // ShadowBase (not used)
500 .OriginBase: 0x0200000000000, // OriginBase
501};
502
503// loongarch64 Linux
504static const MemoryMapParams Linux_LoongArch64_MemoryMapParams = {
505 .AndMask: 0, // AndMask (not used)
506 .XorMask: 0x500000000000, // XorMask
507 .ShadowBase: 0, // ShadowBase (not used)
508 .OriginBase: 0x100000000000, // OriginBase
509};
510
511// hexagon Linux
512static const MemoryMapParams Linux_Hexagon_MemoryMapParams = {
513 .AndMask: 0, // AndMask (not used)
514 .XorMask: 0x20000000, // XorMask
515 .ShadowBase: 0, // ShadowBase (not used)
516 .OriginBase: 0x50000000, // OriginBase
517};
518
519// riscv32 Linux
520// FIXME: Remove -msan-origin-base -msan-and-mask added by PR #109284 to tests
521// after picking good constants
522
523// aarch64 FreeBSD
524static const MemoryMapParams FreeBSD_AArch64_MemoryMapParams = {
525 .AndMask: 0x1800000000000, // AndMask
526 .XorMask: 0x0400000000000, // XorMask
527 .ShadowBase: 0x0200000000000, // ShadowBase
528 .OriginBase: 0x0700000000000, // OriginBase
529};
530
531// i386 FreeBSD
532static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
533 .AndMask: 0x000180000000, // AndMask
534 .XorMask: 0x000040000000, // XorMask
535 .ShadowBase: 0x000020000000, // ShadowBase
536 .OriginBase: 0x000700000000, // OriginBase
537};
538
539// x86_64 FreeBSD
540static const MemoryMapParams FreeBSD_X86_64_MemoryMapParams = {
541 .AndMask: 0xc00000000000, // AndMask
542 .XorMask: 0x200000000000, // XorMask
543 .ShadowBase: 0x100000000000, // ShadowBase
544 .OriginBase: 0x380000000000, // OriginBase
545};
546
547// x86_64 NetBSD
548static const MemoryMapParams NetBSD_X86_64_MemoryMapParams = {
549 .AndMask: 0, // AndMask
550 .XorMask: 0x500000000000, // XorMask
551 .ShadowBase: 0, // ShadowBase
552 .OriginBase: 0x100000000000, // OriginBase
553};
554
555static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
556 .bits32: &Linux_I386_MemoryMapParams,
557 .bits64: &Linux_X86_64_MemoryMapParams,
558};
559
560static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
561 .bits32: nullptr,
562 .bits64: &Linux_MIPS64_MemoryMapParams,
563};
564
565static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = {
566 .bits32: nullptr,
567 .bits64: &Linux_PowerPC64_MemoryMapParams,
568};
569
570static const PlatformMemoryMapParams Linux_S390_MemoryMapParams = {
571 .bits32: nullptr,
572 .bits64: &Linux_S390X_MemoryMapParams,
573};
574
575static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
576 .bits32: nullptr,
577 .bits64: &Linux_AArch64_MemoryMapParams,
578};
579
580static const PlatformMemoryMapParams Linux_LoongArch_MemoryMapParams = {
581 .bits32: nullptr,
582 .bits64: &Linux_LoongArch64_MemoryMapParams,
583};
584
585static const PlatformMemoryMapParams Linux_Hexagon_MemoryMapParams_P = {
586 .bits32: &Linux_Hexagon_MemoryMapParams,
587 .bits64: nullptr,
588};
589
590static const PlatformMemoryMapParams FreeBSD_ARM_MemoryMapParams = {
591 .bits32: nullptr,
592 .bits64: &FreeBSD_AArch64_MemoryMapParams,
593};
594
595static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
596 .bits32: &FreeBSD_I386_MemoryMapParams,
597 .bits64: &FreeBSD_X86_64_MemoryMapParams,
598};
599
600static const PlatformMemoryMapParams NetBSD_X86_MemoryMapParams = {
601 .bits32: nullptr,
602 .bits64: &NetBSD_X86_64_MemoryMapParams,
603};
604
605enum OddOrEvenLanes { kBothLanes, kEvenLanes, kOddLanes };
606
607namespace {
608
609/// Instrument functions of a module to detect uninitialized reads.
610///
611/// Instantiating MemorySanitizer inserts the msan runtime library API function
612/// declarations into the module if they don't exist already. Instantiating
613/// ensures the __msan_init function is in the list of global constructors for
614/// the module.
615class MemorySanitizer {
616public:
617 MemorySanitizer(Module &M, MemorySanitizerOptions Options)
618 : CompileKernel(Options.Kernel), TrackOrigins(Options.TrackOrigins),
619 Recover(Options.Recover), EagerChecks(Options.EagerChecks) {
620 initializeModule(M);
621 }
622
623 // MSan cannot be moved or copied because of MapParams.
624 MemorySanitizer(MemorySanitizer &&) = delete;
625 MemorySanitizer &operator=(MemorySanitizer &&) = delete;
626 MemorySanitizer(const MemorySanitizer &) = delete;
627 MemorySanitizer &operator=(const MemorySanitizer &) = delete;
628
629 bool sanitizeFunction(Function &F, TargetLibraryInfo &TLI);
630
631private:
632 friend struct MemorySanitizerVisitor;
633 friend struct VarArgHelperBase;
634 friend struct VarArgAMD64Helper;
635 friend struct VarArgAArch64Helper;
636 friend struct VarArgPowerPC64Helper;
637 friend struct VarArgPowerPC32Helper;
638 friend struct VarArgSystemZHelper;
639 friend struct VarArgI386Helper;
640 friend struct VarArgGenericHelper;
641
642 void initializeModule(Module &M);
643 void initializeCallbacks(Module &M, const TargetLibraryInfo &TLI);
644 void createKernelApi(Module &M, const TargetLibraryInfo &TLI);
645 void createUserspaceApi(Module &M, const TargetLibraryInfo &TLI);
646
647 template <typename... ArgsTy>
648 FunctionCallee getOrInsertMsanMetadataFunction(Module &M, StringRef Name,
649 ArgsTy... Args);
650
651 /// True if we're compiling the Linux kernel.
652 bool CompileKernel;
653 /// Track origins (allocation points) of uninitialized values.
654 int TrackOrigins;
655 bool Recover;
656 bool EagerChecks;
657
658 Triple TargetTriple;
659 LLVMContext *C;
660 Type *IntptrTy; ///< Integer type with the size of a ptr in default AS.
661 Type *OriginTy;
662 PointerType *PtrTy; ///< Integer type with the size of a ptr in default AS.
663
664 // XxxTLS variables represent the per-thread state in MSan and per-task state
665 // in KMSAN.
666 // For the userspace these point to thread-local globals. In the kernel land
667 // they point to the members of a per-task struct obtained via a call to
668 // __msan_get_context_state().
669
670 /// Thread-local shadow storage for function parameters.
671 Value *ParamTLS;
672
673 /// Thread-local origin storage for function parameters.
674 Value *ParamOriginTLS;
675
676 /// Thread-local shadow storage for function return value.
677 Value *RetvalTLS;
678
679 /// Thread-local origin storage for function return value.
680 Value *RetvalOriginTLS;
681
682 /// Thread-local shadow storage for in-register va_arg function.
683 Value *VAArgTLS;
684
685 /// Thread-local shadow storage for in-register va_arg function.
686 Value *VAArgOriginTLS;
687
688 /// Thread-local shadow storage for va_arg overflow area.
689 Value *VAArgOverflowSizeTLS;
690
691 /// Are the instrumentation callbacks set up?
692 bool CallbacksInitialized = false;
693
694 /// The run-time callback to print a warning.
695 FunctionCallee WarningFn;
696
697 // These arrays are indexed by log2(AccessSize).
698 FunctionCallee MaybeWarningFn[kNumberOfAccessSizes];
699 FunctionCallee MaybeWarningVarSizeFn;
700 FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes];
701
702 /// Run-time helper that generates a new origin value for a stack
703 /// allocation.
704 FunctionCallee MsanSetAllocaOriginWithDescriptionFn;
705 // No description version
706 FunctionCallee MsanSetAllocaOriginNoDescriptionFn;
707
708 /// Run-time helper that poisons stack on function entry.
709 FunctionCallee MsanPoisonStackFn;
710
711 /// Run-time helper that records a store (or any event) of an
712 /// uninitialized value and returns an updated origin id encoding this info.
713 FunctionCallee MsanChainOriginFn;
714
715 /// Run-time helper that paints an origin over a region.
716 FunctionCallee MsanSetOriginFn;
717
718 /// MSan runtime replacements for memmove, memcpy and memset.
719 FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
720
721 /// KMSAN callback for task-local function argument shadow.
722 StructType *MsanContextStateTy;
723 FunctionCallee MsanGetContextStateFn;
724
725 /// Functions for poisoning/unpoisoning local variables
726 FunctionCallee MsanPoisonAllocaFn, MsanUnpoisonAllocaFn;
727
728 /// Pair of shadow/origin pointers.
729 Type *MsanMetadata;
730
731 /// Each of the MsanMetadataPtrXxx functions returns a MsanMetadata.
732 FunctionCallee MsanMetadataPtrForLoadN, MsanMetadataPtrForStoreN;
733 FunctionCallee MsanMetadataPtrForLoad_1_8[4];
734 FunctionCallee MsanMetadataPtrForStore_1_8[4];
735 FunctionCallee MsanInstrumentAsmStoreFn;
736
737 /// Storage for return values of the MsanMetadataPtrXxx functions.
738 Value *MsanMetadataAlloca;
739
740 /// Helper to choose between different MsanMetadataPtrXxx().
741 FunctionCallee getKmsanShadowOriginAccessFn(bool isStore, int size);
742
743 /// Memory map parameters used in application-to-shadow calculation.
744 const MemoryMapParams *MapParams;
745
746 /// Custom memory map parameters used when -msan-shadow-base or
747 // -msan-origin-base is provided.
748 MemoryMapParams CustomMapParams;
749
750 MDNode *ColdCallWeights;
751
752 /// Branch weights for origin store.
753 MDNode *OriginStoreWeights;
754};
755
756void insertModuleCtor(Module &M) {
757 getOrCreateSanitizerCtorAndInitFunctions(
758 M, CtorName: kMsanModuleCtorName, InitName: kMsanInitName,
759 /*InitArgTypes=*/{},
760 /*InitArgs=*/{},
761 // This callback is invoked when the functions are created the first
762 // time. Hook them into the global ctors list in that case:
763 FunctionsCreatedCallback: [&](Function *Ctor, FunctionCallee) {
764 if (!ClWithComdat) {
765 appendToGlobalCtors(M, F: Ctor, Priority: 0);
766 return;
767 }
768 Comdat *MsanCtorComdat = M.getOrInsertComdat(Name: kMsanModuleCtorName);
769 Ctor->setComdat(MsanCtorComdat);
770 appendToGlobalCtors(M, F: Ctor, Priority: 0, Data: Ctor);
771 });
772}
773
774template <class T> T getOptOrDefault(const cl::opt<T> &Opt, T Default) {
775 return (Opt.getNumOccurrences() > 0) ? Opt : Default;
776}
777
778} // end anonymous namespace
779
780MemorySanitizerOptions::MemorySanitizerOptions(int TO, bool R, bool K,
781 bool EagerChecks)
782 : Kernel(getOptOrDefault(Opt: ClEnableKmsan, Default: K)),
783 TrackOrigins(getOptOrDefault(Opt: ClTrackOrigins, Default: Kernel ? 2 : TO)),
784 Recover(getOptOrDefault(Opt: ClKeepGoing, Default: Kernel || R)),
785 EagerChecks(getOptOrDefault(Opt: ClEagerChecks, Default: EagerChecks)) {}
786
787PreservedAnalyses MemorySanitizerPass::run(Module &M,
788 ModuleAnalysisManager &AM) {
789 // Return early if nosanitize_memory module flag is present for the module.
790 if (checkIfAlreadyInstrumented(M, Flag: "nosanitize_memory"))
791 return PreservedAnalyses::all();
792 bool Modified = false;
793 if (!Options.Kernel) {
794 insertModuleCtor(M);
795 Modified = true;
796 }
797
798 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
799 for (Function &F : M) {
800 if (F.empty())
801 continue;
802 MemorySanitizer Msan(*F.getParent(), Options);
803 Modified |=
804 Msan.sanitizeFunction(F, TLI&: FAM.getResult<TargetLibraryAnalysis>(IR&: F));
805 }
806
807 if (!Modified)
808 return PreservedAnalyses::all();
809
810 PreservedAnalyses PA = PreservedAnalyses::none();
811 // GlobalsAA is considered stateless and does not get invalidated unless
812 // explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers
813 // make changes that require GlobalsAA to be invalidated.
814 PA.abandon<GlobalsAA>();
815 return PA;
816}
817
818void MemorySanitizerPass::printPipeline(
819 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
820 static_cast<PassInfoMixin<MemorySanitizerPass> *>(this)->printPipeline(
821 OS, MapClassName2PassName);
822 OS << '<';
823 if (Options.Recover)
824 OS << "recover;";
825 if (Options.Kernel)
826 OS << "kernel;";
827 if (Options.EagerChecks)
828 OS << "eager-checks;";
829 OS << "track-origins=" << Options.TrackOrigins;
830 OS << '>';
831}
832
833/// Create a non-const global initialized with the given string.
834///
835/// Creates a writable global for Str so that we can pass it to the
836/// run-time lib. Runtime uses first 4 bytes of the string to store the
837/// frame ID, so the string needs to be mutable.
838static GlobalVariable *createPrivateConstGlobalForString(Module &M,
839 StringRef Str) {
840 Constant *StrConst = ConstantDataArray::getString(Context&: M.getContext(), Initializer: Str);
841 return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/true,
842 GlobalValue::PrivateLinkage, StrConst, "");
843}
844
845template <typename... ArgsTy>
846FunctionCallee
847MemorySanitizer::getOrInsertMsanMetadataFunction(Module &M, StringRef Name,
848 ArgsTy... Args) {
849 if (TargetTriple.getArch() == Triple::systemz) {
850 // SystemZ ABI: shadow/origin pair is returned via a hidden parameter.
851 return M.getOrInsertFunction(Name, Type::getVoidTy(C&: *C), PtrTy,
852 std::forward<ArgsTy>(Args)...);
853 }
854
855 return M.getOrInsertFunction(Name, MsanMetadata,
856 std::forward<ArgsTy>(Args)...);
857}
858
859/// Create KMSAN API callbacks.
860void MemorySanitizer::createKernelApi(Module &M, const TargetLibraryInfo &TLI) {
861 IRBuilder<> IRB(*C);
862
863 // These will be initialized in insertKmsanPrologue().
864 RetvalTLS = nullptr;
865 RetvalOriginTLS = nullptr;
866 ParamTLS = nullptr;
867 ParamOriginTLS = nullptr;
868 VAArgTLS = nullptr;
869 VAArgOriginTLS = nullptr;
870 VAArgOverflowSizeTLS = nullptr;
871
872 WarningFn = M.getOrInsertFunction(Name: "__msan_warning",
873 AttributeList: TLI.getAttrList(C, ArgNos: {0}, /*Signed=*/false),
874 RetTy: IRB.getVoidTy(), Args: IRB.getInt32Ty());
875
876 // Requests the per-task context state (kmsan_context_state*) from the
877 // runtime library.
878 MsanContextStateTy = StructType::get(
879 elt1: ArrayType::get(ElementType: IRB.getInt64Ty(), NumElements: kParamTLSSize / 8),
880 elts: ArrayType::get(ElementType: IRB.getInt64Ty(), NumElements: kRetvalTLSSize / 8),
881 elts: ArrayType::get(ElementType: IRB.getInt64Ty(), NumElements: kParamTLSSize / 8),
882 elts: ArrayType::get(ElementType: IRB.getInt64Ty(), NumElements: kParamTLSSize / 8), /* va_arg_origin */
883 elts: IRB.getInt64Ty(), elts: ArrayType::get(ElementType: OriginTy, NumElements: kParamTLSSize / 4), elts: OriginTy,
884 elts: OriginTy);
885 MsanGetContextStateFn =
886 M.getOrInsertFunction(Name: "__msan_get_context_state", RetTy: PtrTy);
887
888 MsanMetadata = StructType::get(elt1: PtrTy, elts: PtrTy);
889
890 for (int ind = 0, size = 1; ind < 4; ind++, size <<= 1) {
891 std::string name_load =
892 "__msan_metadata_ptr_for_load_" + std::to_string(val: size);
893 std::string name_store =
894 "__msan_metadata_ptr_for_store_" + std::to_string(val: size);
895 MsanMetadataPtrForLoad_1_8[ind] =
896 getOrInsertMsanMetadataFunction(M, Name: name_load, Args: PtrTy);
897 MsanMetadataPtrForStore_1_8[ind] =
898 getOrInsertMsanMetadataFunction(M, Name: name_store, Args: PtrTy);
899 }
900
901 MsanMetadataPtrForLoadN = getOrInsertMsanMetadataFunction(
902 M, Name: "__msan_metadata_ptr_for_load_n", Args: PtrTy, Args: IntptrTy);
903 MsanMetadataPtrForStoreN = getOrInsertMsanMetadataFunction(
904 M, Name: "__msan_metadata_ptr_for_store_n", Args: PtrTy, Args: IntptrTy);
905
906 // Functions for poisoning and unpoisoning memory.
907 MsanPoisonAllocaFn = M.getOrInsertFunction(
908 Name: "__msan_poison_alloca", RetTy: IRB.getVoidTy(), Args: PtrTy, Args: IntptrTy, Args: PtrTy);
909 MsanUnpoisonAllocaFn = M.getOrInsertFunction(
910 Name: "__msan_unpoison_alloca", RetTy: IRB.getVoidTy(), Args: PtrTy, Args: IntptrTy);
911}
912
913static Constant *getOrInsertGlobal(Module &M, StringRef Name, Type *Ty) {
914 return M.getOrInsertGlobal(Name, Ty, CreateGlobalCallback: [&] {
915 return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
916 nullptr, Name, nullptr,
917 GlobalVariable::InitialExecTLSModel);
918 });
919}
920
921/// Insert declarations for userspace-specific functions and globals.
922void MemorySanitizer::createUserspaceApi(Module &M,
923 const TargetLibraryInfo &TLI) {
924 IRBuilder<> IRB(*C);
925
926 // Create the callback.
927 // FIXME: this function should have "Cold" calling conv,
928 // which is not yet implemented.
929 if (TrackOrigins) {
930 StringRef WarningFnName = Recover ? "__msan_warning_with_origin"
931 : "__msan_warning_with_origin_noreturn";
932 WarningFn = M.getOrInsertFunction(Name: WarningFnName,
933 AttributeList: TLI.getAttrList(C, ArgNos: {0}, /*Signed=*/false),
934 RetTy: IRB.getVoidTy(), Args: IRB.getInt32Ty());
935 } else {
936 StringRef WarningFnName =
937 Recover ? "__msan_warning" : "__msan_warning_noreturn";
938 WarningFn = M.getOrInsertFunction(Name: WarningFnName, RetTy: IRB.getVoidTy());
939 }
940
941 // Create the global TLS variables.
942 RetvalTLS =
943 getOrInsertGlobal(M, Name: "__msan_retval_tls",
944 Ty: ArrayType::get(ElementType: IRB.getInt64Ty(), NumElements: kRetvalTLSSize / 8));
945
946 RetvalOriginTLS = getOrInsertGlobal(M, Name: "__msan_retval_origin_tls", Ty: OriginTy);
947
948 ParamTLS =
949 getOrInsertGlobal(M, Name: "__msan_param_tls",
950 Ty: ArrayType::get(ElementType: IRB.getInt64Ty(), NumElements: kParamTLSSize / 8));
951
952 ParamOriginTLS =
953 getOrInsertGlobal(M, Name: "__msan_param_origin_tls",
954 Ty: ArrayType::get(ElementType: OriginTy, NumElements: kParamTLSSize / 4));
955
956 VAArgTLS =
957 getOrInsertGlobal(M, Name: "__msan_va_arg_tls",
958 Ty: ArrayType::get(ElementType: IRB.getInt64Ty(), NumElements: kParamTLSSize / 8));
959
960 VAArgOriginTLS =
961 getOrInsertGlobal(M, Name: "__msan_va_arg_origin_tls",
962 Ty: ArrayType::get(ElementType: OriginTy, NumElements: kParamTLSSize / 4));
963
964 VAArgOverflowSizeTLS = getOrInsertGlobal(M, Name: "__msan_va_arg_overflow_size_tls",
965 Ty: IRB.getIntPtrTy(DL: M.getDataLayout()));
966
967 for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
968 AccessSizeIndex++) {
969 unsigned AccessSize = 1 << AccessSizeIndex;
970 std::string FunctionName = "__msan_maybe_warning_" + itostr(X: AccessSize);
971 MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
972 Name: FunctionName, AttributeList: TLI.getAttrList(C, ArgNos: {0, 1}, /*Signed=*/false),
973 RetTy: IRB.getVoidTy(), Args: IRB.getIntNTy(N: AccessSize * 8), Args: IRB.getInt32Ty());
974 MaybeWarningVarSizeFn = M.getOrInsertFunction(
975 Name: "__msan_maybe_warning_N", AttributeList: TLI.getAttrList(C, ArgNos: {}, /*Signed=*/false),
976 RetTy: IRB.getVoidTy(), Args: PtrTy, Args: IRB.getInt64Ty(), Args: IRB.getInt32Ty());
977 FunctionName = "__msan_maybe_store_origin_" + itostr(X: AccessSize);
978 MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
979 Name: FunctionName, AttributeList: TLI.getAttrList(C, ArgNos: {0, 2}, /*Signed=*/false),
980 RetTy: IRB.getVoidTy(), Args: IRB.getIntNTy(N: AccessSize * 8), Args: PtrTy,
981 Args: IRB.getInt32Ty());
982 }
983
984 MsanSetAllocaOriginWithDescriptionFn =
985 M.getOrInsertFunction(Name: "__msan_set_alloca_origin_with_descr",
986 RetTy: IRB.getVoidTy(), Args: PtrTy, Args: IntptrTy, Args: PtrTy, Args: PtrTy);
987 MsanSetAllocaOriginNoDescriptionFn =
988 M.getOrInsertFunction(Name: "__msan_set_alloca_origin_no_descr",
989 RetTy: IRB.getVoidTy(), Args: PtrTy, Args: IntptrTy, Args: PtrTy);
990 MsanPoisonStackFn = M.getOrInsertFunction(Name: "__msan_poison_stack",
991 RetTy: IRB.getVoidTy(), Args: PtrTy, Args: IntptrTy);
992}
993
994/// Insert extern declaration of runtime-provided functions and globals.
995void MemorySanitizer::initializeCallbacks(Module &M,
996 const TargetLibraryInfo &TLI) {
997 // Only do this once.
998 if (CallbacksInitialized)
999 return;
1000
1001 IRBuilder<> IRB(*C);
1002 // Initialize callbacks that are common for kernel and userspace
1003 // instrumentation.
1004 MsanChainOriginFn = M.getOrInsertFunction(
1005 Name: "__msan_chain_origin",
1006 AttributeList: TLI.getAttrList(C, ArgNos: {0}, /*Signed=*/false, /*Ret=*/true), RetTy: IRB.getInt32Ty(),
1007 Args: IRB.getInt32Ty());
1008 MsanSetOriginFn = M.getOrInsertFunction(
1009 Name: "__msan_set_origin", AttributeList: TLI.getAttrList(C, ArgNos: {2}, /*Signed=*/false),
1010 RetTy: IRB.getVoidTy(), Args: PtrTy, Args: IntptrTy, Args: IRB.getInt32Ty());
1011 MemmoveFn =
1012 M.getOrInsertFunction(Name: "__msan_memmove", RetTy: PtrTy, Args: PtrTy, Args: PtrTy, Args: IntptrTy);
1013 MemcpyFn =
1014 M.getOrInsertFunction(Name: "__msan_memcpy", RetTy: PtrTy, Args: PtrTy, Args: PtrTy, Args: IntptrTy);
1015 MemsetFn = M.getOrInsertFunction(Name: "__msan_memset",
1016 AttributeList: TLI.getAttrList(C, ArgNos: {1}, /*Signed=*/true),
1017 RetTy: PtrTy, Args: PtrTy, Args: IRB.getInt32Ty(), Args: IntptrTy);
1018
1019 MsanInstrumentAsmStoreFn = M.getOrInsertFunction(
1020 Name: "__msan_instrument_asm_store", RetTy: IRB.getVoidTy(), Args: PtrTy, Args: IntptrTy);
1021
1022 if (CompileKernel) {
1023 createKernelApi(M, TLI);
1024 } else {
1025 createUserspaceApi(M, TLI);
1026 }
1027 CallbacksInitialized = true;
1028}
1029
1030FunctionCallee MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore,
1031 int size) {
1032 FunctionCallee *Fns =
1033 isStore ? MsanMetadataPtrForStore_1_8 : MsanMetadataPtrForLoad_1_8;
1034 switch (size) {
1035 case 1:
1036 return Fns[0];
1037 case 2:
1038 return Fns[1];
1039 case 4:
1040 return Fns[2];
1041 case 8:
1042 return Fns[3];
1043 default:
1044 return nullptr;
1045 }
1046}
1047
1048/// Module-level initialization.
1049///
1050/// inserts a call to __msan_init to the module's constructor list.
1051void MemorySanitizer::initializeModule(Module &M) {
1052 auto &DL = M.getDataLayout();
1053
1054 TargetTriple = M.getTargetTriple();
1055
1056 bool ShadowPassed = ClShadowBase.getNumOccurrences() > 0;
1057 bool OriginPassed = ClOriginBase.getNumOccurrences() > 0;
1058 // Check the overrides first
1059 if (ShadowPassed || OriginPassed) {
1060 CustomMapParams.AndMask = ClAndMask;
1061 CustomMapParams.XorMask = ClXorMask;
1062 CustomMapParams.ShadowBase = ClShadowBase;
1063 CustomMapParams.OriginBase = ClOriginBase;
1064 MapParams = &CustomMapParams;
1065 } else {
1066 switch (TargetTriple.getOS()) {
1067 case Triple::FreeBSD:
1068 switch (TargetTriple.getArch()) {
1069 case Triple::aarch64:
1070 MapParams = FreeBSD_ARM_MemoryMapParams.bits64;
1071 break;
1072 case Triple::x86_64:
1073 MapParams = FreeBSD_X86_MemoryMapParams.bits64;
1074 break;
1075 case Triple::x86:
1076 MapParams = FreeBSD_X86_MemoryMapParams.bits32;
1077 break;
1078 default:
1079 report_fatal_error(reason: "unsupported architecture");
1080 }
1081 break;
1082 case Triple::NetBSD:
1083 switch (TargetTriple.getArch()) {
1084 case Triple::x86_64:
1085 MapParams = NetBSD_X86_MemoryMapParams.bits64;
1086 break;
1087 default:
1088 report_fatal_error(reason: "unsupported architecture");
1089 }
1090 break;
1091 case Triple::Linux:
1092 switch (TargetTriple.getArch()) {
1093 case Triple::x86_64:
1094 MapParams = Linux_X86_MemoryMapParams.bits64;
1095 break;
1096 case Triple::x86:
1097 MapParams = Linux_X86_MemoryMapParams.bits32;
1098 break;
1099 case Triple::mips64:
1100 case Triple::mips64el:
1101 MapParams = Linux_MIPS_MemoryMapParams.bits64;
1102 break;
1103 case Triple::ppc64:
1104 case Triple::ppc64le:
1105 MapParams = Linux_PowerPC_MemoryMapParams.bits64;
1106 break;
1107 case Triple::systemz:
1108 MapParams = Linux_S390_MemoryMapParams.bits64;
1109 break;
1110 case Triple::aarch64:
1111 case Triple::aarch64_be:
1112 MapParams = Linux_ARM_MemoryMapParams.bits64;
1113 break;
1114 case Triple::loongarch64:
1115 MapParams = Linux_LoongArch_MemoryMapParams.bits64;
1116 break;
1117 case Triple::hexagon:
1118 MapParams = Linux_Hexagon_MemoryMapParams_P.bits32;
1119 break;
1120 default:
1121 report_fatal_error(reason: "unsupported architecture");
1122 }
1123 break;
1124 default:
1125 report_fatal_error(reason: "unsupported operating system");
1126 }
1127 }
1128
1129 C = &(M.getContext());
1130 IRBuilder<> IRB(*C);
1131 IntptrTy = IRB.getIntPtrTy(DL);
1132 OriginTy = IRB.getInt32Ty();
1133 PtrTy = IRB.getPtrTy();
1134
1135 ColdCallWeights = MDBuilder(*C).createUnlikelyBranchWeights();
1136 OriginStoreWeights = MDBuilder(*C).createUnlikelyBranchWeights();
1137
1138 if (!CompileKernel) {
1139 if (TrackOrigins)
1140 M.getOrInsertGlobal(Name: "__msan_track_origins", Ty: IRB.getInt32Ty(), CreateGlobalCallback: [&] {
1141 return new GlobalVariable(
1142 M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
1143 IRB.getInt32(C: TrackOrigins), "__msan_track_origins");
1144 });
1145
1146 if (Recover)
1147 M.getOrInsertGlobal(Name: "__msan_keep_going", Ty: IRB.getInt32Ty(), CreateGlobalCallback: [&] {
1148 return new GlobalVariable(M, IRB.getInt32Ty(), true,
1149 GlobalValue::WeakODRLinkage,
1150 IRB.getInt32(C: Recover), "__msan_keep_going");
1151 });
1152 }
1153}
1154
1155namespace {
1156
1157/// A helper class that handles instrumentation of VarArg
1158/// functions on a particular platform.
1159///
1160/// Implementations are expected to insert the instrumentation
1161/// necessary to propagate argument shadow through VarArg function
1162/// calls. Visit* methods are called during an InstVisitor pass over
1163/// the function, and should avoid creating new basic blocks. A new
1164/// instance of this class is created for each instrumented function.
1165struct VarArgHelper {
1166 virtual ~VarArgHelper() = default;
1167
1168 /// Visit a CallBase.
1169 virtual void visitCallBase(CallBase &CB, IRBuilder<> &IRB) = 0;
1170
1171 /// Visit a va_start call.
1172 virtual void visitVAStartInst(VAStartInst &I) = 0;
1173
1174 /// Visit a va_copy call.
1175 virtual void visitVACopyInst(VACopyInst &I) = 0;
1176
1177 /// Finalize function instrumentation.
1178 ///
1179 /// This method is called after visiting all interesting (see above)
1180 /// instructions in a function.
1181 virtual void finalizeInstrumentation() = 0;
1182};
1183
1184struct MemorySanitizerVisitor;
1185
1186} // end anonymous namespace
1187
1188static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
1189 MemorySanitizerVisitor &Visitor);
1190
1191static unsigned TypeSizeToSizeIndex(TypeSize TS) {
1192 if (TS.isScalable())
1193 // Scalable types unconditionally take slowpaths.
1194 return kNumberOfAccessSizes;
1195 unsigned TypeSizeFixed = TS.getFixedValue();
1196 if (TypeSizeFixed <= 8)
1197 return 0;
1198 return Log2_32_Ceil(Value: (TypeSizeFixed + 7) / 8);
1199}
1200
1201namespace {
1202
1203/// Helper class to attach debug information of the given instruction onto new
1204/// instructions inserted after.
1205class NextNodeIRBuilder : public IRBuilder<> {
1206public:
1207 explicit NextNodeIRBuilder(Instruction *IP) : IRBuilder<>(IP->getNextNode()) {
1208 SetCurrentDebugLocation(IP->getDebugLoc());
1209 }
1210};
1211
1212/// This class does all the work for a given function. Store and Load
1213/// instructions store and load corresponding shadow and origin
1214/// values. Most instructions propagate shadow from arguments to their
1215/// return values. Certain instructions (most importantly, BranchInst)
1216/// test their argument shadow and print reports (with a runtime call) if it's
1217/// non-zero.
1218struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
1219 Function &F;
1220 MemorySanitizer &MS;
1221 SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
1222 ValueMap<Value *, Value *> ShadowMap, OriginMap;
1223 std::unique_ptr<VarArgHelper> VAHelper;
1224 const TargetLibraryInfo *TLI;
1225 Instruction *FnPrologueEnd;
1226 SmallVector<Instruction *, 16> Instructions;
1227
1228 // The following flags disable parts of MSan instrumentation based on
1229 // exclusion list contents and command-line options.
1230 bool InsertChecks;
1231 bool PropagateShadow;
1232 bool PoisonStack;
1233 bool PoisonUndef;
1234 bool PoisonUndefVectors;
1235
1236 struct ShadowOriginAndInsertPoint {
1237 Value *Shadow;
1238 Value *Origin;
1239 Instruction *OrigIns;
1240
1241 ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I)
1242 : Shadow(S), Origin(O), OrigIns(I) {}
1243 };
1244 SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
1245 DenseMap<const DILocation *, int> LazyWarningDebugLocationCount;
1246 SmallSetVector<AllocaInst *, 16> AllocaSet;
1247 SmallVector<std::pair<IntrinsicInst *, AllocaInst *>, 16> LifetimeStartList;
1248 SmallVector<StoreInst *, 16> StoreList;
1249 int64_t SplittableBlocksCount = 0;
1250
1251 MemorySanitizerVisitor(Function &F, MemorySanitizer &MS,
1252 const TargetLibraryInfo &TLI)
1253 : F(F), MS(MS), VAHelper(CreateVarArgHelper(Func&: F, Msan&: MS, Visitor&: *this)), TLI(&TLI) {
1254 bool SanitizeFunction =
1255 F.hasFnAttribute(Kind: Attribute::SanitizeMemory) && !ClDisableChecks;
1256 InsertChecks = SanitizeFunction;
1257 PropagateShadow = SanitizeFunction;
1258 PoisonStack = SanitizeFunction && ClPoisonStack;
1259 PoisonUndef = SanitizeFunction && ClPoisonUndef;
1260 PoisonUndefVectors = SanitizeFunction && ClPoisonUndefVectors;
1261
1262 // In the presence of unreachable blocks, we may see Phi nodes with
1263 // incoming nodes from such blocks. Since InstVisitor skips unreachable
1264 // blocks, such nodes will not have any shadow value associated with them.
1265 // It's easier to remove unreachable blocks than deal with missing shadow.
1266 removeUnreachableBlocks(F);
1267
1268 MS.initializeCallbacks(M&: *F.getParent(), TLI);
1269 FnPrologueEnd =
1270 IRBuilder<>(&F.getEntryBlock(), F.getEntryBlock().getFirstNonPHIIt())
1271 .CreateIntrinsicWithoutFolding(ID: Intrinsic::donothing, Args: {});
1272
1273 if (MS.CompileKernel) {
1274 IRBuilder<> IRB(FnPrologueEnd);
1275 insertKmsanPrologue(IRB);
1276 }
1277
1278 LLVM_DEBUG(if (!InsertChecks) dbgs()
1279 << "MemorySanitizer is not inserting checks into '"
1280 << F.getName() << "'\n");
1281 }
1282
1283 bool instrumentWithCalls(Value *V) {
1284 // Constants likely will be eliminated by follow-up passes.
1285 if (isa<Constant>(Val: V))
1286 return false;
1287 ++SplittableBlocksCount;
1288 return ClInstrumentationWithCallThreshold >= 0 &&
1289 SplittableBlocksCount > ClInstrumentationWithCallThreshold;
1290 }
1291
1292 bool isInPrologue(Instruction &I) {
1293 return I.getParent() == FnPrologueEnd->getParent() &&
1294 (&I == FnPrologueEnd || I.comesBefore(Other: FnPrologueEnd));
1295 }
1296
1297 // Creates a new origin and records the stack trace. In general we can call
1298 // this function for any origin manipulation we like. However it will cost
1299 // runtime resources. So use this wisely only if it can provide additional
1300 // information helpful to a user.
1301 Value *updateOrigin(Value *V, IRBuilder<> &IRB) {
1302 if (MS.TrackOrigins <= 1)
1303 return V;
1304 return IRB.CreateCall(Callee: MS.MsanChainOriginFn, Args: V);
1305 }
1306
1307 Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) {
1308 const DataLayout &DL = F.getDataLayout();
1309 unsigned IntptrSize = DL.getTypeStoreSize(Ty: MS.IntptrTy);
1310 if (IntptrSize == kOriginSize)
1311 return Origin;
1312 assert(IntptrSize == kOriginSize * 2);
1313 Origin = IRB.CreateIntCast(V: Origin, DestTy: MS.IntptrTy, /* isSigned */ false);
1314 return IRB.CreateOr(LHS: Origin, RHS: IRB.CreateShl(LHS: Origin, RHS: kOriginSize * 8));
1315 }
1316
1317 /// Fill memory range with the given origin value.
1318 void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr,
1319 TypeSize TS, Align Alignment) {
1320 const DataLayout &DL = F.getDataLayout();
1321 const Align IntptrAlignment = DL.getABITypeAlign(Ty: MS.IntptrTy);
1322 unsigned IntptrSize = DL.getTypeStoreSize(Ty: MS.IntptrTy);
1323 assert(IntptrAlignment >= kMinOriginAlignment);
1324 assert(IntptrSize >= kOriginSize);
1325
1326 // Note: The loop based formation works for fixed length vectors too,
1327 // however we prefer to unroll and specialize alignment below.
1328 if (TS.isScalable()) {
1329 Value *Size = IRB.CreateTypeSize(Ty: MS.IntptrTy, Size: TS);
1330 Value *RoundUp =
1331 IRB.CreateAdd(LHS: Size, RHS: ConstantInt::get(Ty: MS.IntptrTy, V: kOriginSize - 1));
1332 Value *End =
1333 IRB.CreateUDiv(LHS: RoundUp, RHS: ConstantInt::get(Ty: MS.IntptrTy, V: kOriginSize));
1334 auto [InsertPt, Index] =
1335 SplitBlockAndInsertSimpleForLoop(End, SplitBefore: IRB.GetInsertPoint());
1336 IRB.SetInsertPoint(InsertPt);
1337
1338 Value *GEP = IRB.CreateGEP(Ty: MS.OriginTy, Ptr: OriginPtr, IdxList: Index);
1339 IRB.CreateAlignedStore(Val: Origin, Ptr: GEP, Align: kMinOriginAlignment);
1340 return;
1341 }
1342
1343 unsigned Size = TS.getFixedValue();
1344
1345 unsigned Ofs = 0;
1346 Align CurrentAlignment = Alignment;
1347 if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) {
1348 Value *IntptrOrigin = originToIntptr(IRB, Origin);
1349 Value *IntptrOriginPtr = IRB.CreatePointerCast(V: OriginPtr, DestTy: MS.PtrTy);
1350 for (unsigned i = 0; i < Size / IntptrSize; ++i) {
1351 Value *Ptr = i ? IRB.CreateConstGEP1_32(Ty: MS.IntptrTy, Ptr: IntptrOriginPtr, Idx0: i)
1352 : IntptrOriginPtr;
1353 IRB.CreateAlignedStore(Val: IntptrOrigin, Ptr, Align: CurrentAlignment);
1354 Ofs += IntptrSize / kOriginSize;
1355 CurrentAlignment = IntptrAlignment;
1356 }
1357 }
1358
1359 for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
1360 Value *GEP =
1361 i ? IRB.CreateConstGEP1_32(Ty: MS.OriginTy, Ptr: OriginPtr, Idx0: i) : OriginPtr;
1362 IRB.CreateAlignedStore(Val: Origin, Ptr: GEP, Align: CurrentAlignment);
1363 CurrentAlignment = kMinOriginAlignment;
1364 }
1365 }
1366
1367 void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
1368 Value *OriginPtr, Align Alignment) {
1369 const DataLayout &DL = F.getDataLayout();
1370 const Align OriginAlignment = std::max(a: kMinOriginAlignment, b: Alignment);
1371 TypeSize StoreSize = DL.getTypeStoreSize(Ty: Shadow->getType());
1372 // ZExt cannot convert between vector and scalar
1373 Value *ConvertedShadow = convertShadowToScalar(V: Shadow, IRB);
1374 if (auto *ConstantShadow = dyn_cast<Constant>(Val: ConvertedShadow)) {
1375 if (!ClCheckConstantShadow || ConstantShadow->isNullValue()) {
1376 // Origin is not needed: value is initialized or const shadow is
1377 // ignored.
1378 return;
1379 }
1380 if (llvm::isKnownNonZero(V: ConvertedShadow, Q: DL)) {
1381 // Copy origin as the value is definitely uninitialized.
1382 paintOrigin(IRB, Origin: updateOrigin(V: Origin, IRB), OriginPtr, TS: StoreSize,
1383 Alignment: OriginAlignment);
1384 return;
1385 }
1386 // Fallback to runtime check, which still can be optimized out later.
1387 }
1388
1389 TypeSize TypeSizeInBits = DL.getTypeSizeInBits(Ty: ConvertedShadow->getType());
1390 unsigned SizeIndex = TypeSizeToSizeIndex(TS: TypeSizeInBits);
1391 if (instrumentWithCalls(V: ConvertedShadow) &&
1392 SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1393 FunctionCallee Fn = MS.MaybeStoreOriginFn[SizeIndex];
1394 Value *ConvertedShadow2 =
1395 IRB.CreateZExt(V: ConvertedShadow, DestTy: IRB.getIntNTy(N: 8 * (1 << SizeIndex)));
1396 CallBase *CB = IRB.CreateCall(Callee: Fn, Args: {ConvertedShadow2, Addr, Origin});
1397 CB->addParamAttr(ArgNo: 0, Kind: Attribute::ZExt);
1398 CB->addParamAttr(ArgNo: 2, Kind: Attribute::ZExt);
1399 } else {
1400 Value *Cmp = convertToBool(V: ConvertedShadow, IRB, name: "_mscmp");
1401 Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1402 Cond: Cmp, SplitBefore: &*IRB.GetInsertPoint(), Unreachable: false, BranchWeights: MS.OriginStoreWeights);
1403 IRBuilder<> IRBNew(CheckTerm);
1404 paintOrigin(IRB&: IRBNew, Origin: updateOrigin(V: Origin, IRB&: IRBNew), OriginPtr, TS: StoreSize,
1405 Alignment: OriginAlignment);
1406 }
1407 }
1408
1409 void materializeStores() {
1410 for (StoreInst *SI : StoreList) {
1411 IRBuilder<> IRB(SI);
1412 Value *Val = SI->getValueOperand();
1413 Value *Addr = SI->getPointerOperand();
1414 Value *Shadow = SI->isAtomic() ? getCleanShadow(V: Val) : getShadow(V: Val);
1415 Value *ShadowPtr, *OriginPtr;
1416 Type *ShadowTy = Shadow->getType();
1417 const Align Alignment = SI->getAlign();
1418 const Align OriginAlignment = std::max(a: kMinOriginAlignment, b: Alignment);
1419 std::tie(args&: ShadowPtr, args&: OriginPtr) =
1420 getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ true);
1421
1422 [[maybe_unused]] StoreInst *NewSI =
1423 IRB.CreateAlignedStore(Val: Shadow, Ptr: ShadowPtr, Align: Alignment);
1424 LLVM_DEBUG(dbgs() << " STORE: " << *NewSI << "\n");
1425
1426 if (SI->isAtomic())
1427 SI->setOrdering(addReleaseOrdering(a: SI->getOrdering()));
1428
1429 if (MS.TrackOrigins && !SI->isAtomic())
1430 storeOrigin(IRB, Addr, Shadow, Origin: getOrigin(V: Val), OriginPtr,
1431 Alignment: OriginAlignment);
1432 }
1433 }
1434
1435 // Returns true if Debug Location corresponds to multiple warnings.
1436 bool shouldDisambiguateWarningLocation(const DebugLoc &DebugLoc) {
1437 if (MS.TrackOrigins < 2)
1438 return false;
1439
1440 if (LazyWarningDebugLocationCount.empty())
1441 for (const auto &I : InstrumentationList)
1442 ++LazyWarningDebugLocationCount[I.OrigIns->getDebugLoc()];
1443
1444 return LazyWarningDebugLocationCount[DebugLoc] >= ClDisambiguateWarning;
1445 }
1446
1447 /// Helper function to insert a warning at IRB's current insert point.
1448 void insertWarningFn(IRBuilder<> &IRB, Value *Origin) {
1449 if (!Origin)
1450 Origin = (Value *)IRB.getInt32(C: 0);
1451 assert(Origin->getType()->isIntegerTy());
1452
1453 if (shouldDisambiguateWarningLocation(DebugLoc: IRB.getCurrentDebugLocation())) {
1454 // Try to create additional origin with debug info of the last origin
1455 // instruction. It may provide additional information to the user.
1456 if (Instruction *OI = dyn_cast_or_null<Instruction>(Val: Origin)) {
1457 assert(MS.TrackOrigins);
1458 auto NewDebugLoc = OI->getDebugLoc();
1459 // Origin update with missing or the same debug location provides no
1460 // additional value.
1461 if (NewDebugLoc && NewDebugLoc != IRB.getCurrentDebugLocation()) {
1462 // Insert update just before the check, so we call runtime only just
1463 // before the report.
1464 IRBuilder<> IRBOrigin(&*IRB.GetInsertPoint());
1465 IRBOrigin.SetCurrentDebugLocation(NewDebugLoc);
1466 Origin = updateOrigin(V: Origin, IRB&: IRBOrigin);
1467 }
1468 }
1469 }
1470
1471 if (MS.CompileKernel || MS.TrackOrigins)
1472 IRB.CreateCall(Callee: MS.WarningFn, Args: Origin)->setCannotMerge();
1473 else
1474 IRB.CreateCall(Callee: MS.WarningFn)->setCannotMerge();
1475 // FIXME: Insert UnreachableInst if !MS.Recover?
1476 // This may invalidate some of the following checks and needs to be done
1477 // at the very end.
1478 }
1479
1480 void materializeOneCheck(IRBuilder<> &IRB, Value *ConvertedShadow,
1481 Value *Origin) {
1482 const DataLayout &DL = F.getDataLayout();
1483 TypeSize TypeSizeInBits = DL.getTypeSizeInBits(Ty: ConvertedShadow->getType());
1484 unsigned SizeIndex = TypeSizeToSizeIndex(TS: TypeSizeInBits);
1485 if (instrumentWithCalls(V: ConvertedShadow) && !MS.CompileKernel) {
1486 // ZExt cannot convert between vector and scalar
1487 ConvertedShadow = convertShadowToScalar(V: ConvertedShadow, IRB);
1488 Value *ConvertedShadow2 =
1489 IRB.CreateZExt(V: ConvertedShadow, DestTy: IRB.getIntNTy(N: 8 * (1 << SizeIndex)));
1490
1491 if (SizeIndex < kNumberOfAccessSizes) {
1492 FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
1493 CallBase *CB = IRB.CreateCall(
1494 Callee: Fn,
1495 Args: {ConvertedShadow2,
1496 MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(C: 0)});
1497 CB->addParamAttr(ArgNo: 0, Kind: Attribute::ZExt);
1498 CB->addParamAttr(ArgNo: 1, Kind: Attribute::ZExt);
1499 } else {
1500 FunctionCallee Fn = MS.MaybeWarningVarSizeFn;
1501 Value *ShadowAlloca = IRB.CreateAlloca(Ty: ConvertedShadow2->getType(), AddrSpace: 0u);
1502 IRB.CreateStore(Val: ConvertedShadow2, Ptr: ShadowAlloca);
1503 unsigned ShadowSize = DL.getTypeAllocSize(Ty: ConvertedShadow2->getType());
1504 CallBase *CB = IRB.CreateCall(
1505 Callee: Fn,
1506 Args: {ShadowAlloca, ConstantInt::get(Ty: IRB.getInt64Ty(), V: ShadowSize),
1507 MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(C: 0)});
1508 CB->addParamAttr(ArgNo: 1, Kind: Attribute::ZExt);
1509 CB->addParamAttr(ArgNo: 2, Kind: Attribute::ZExt);
1510 }
1511 } else {
1512 Value *Cmp = convertToBool(V: ConvertedShadow, IRB, name: "_mscmp");
1513 Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1514 Cond: Cmp, SplitBefore: &*IRB.GetInsertPoint(),
1515 /* Unreachable */ !MS.Recover, BranchWeights: MS.ColdCallWeights);
1516
1517 IRB.SetInsertPoint(CheckTerm);
1518 insertWarningFn(IRB, Origin);
1519 LLVM_DEBUG(dbgs() << " CHECK: " << *Cmp << "\n");
1520 }
1521 }
1522
1523 void materializeInstructionChecks(
1524 ArrayRef<ShadowOriginAndInsertPoint> InstructionChecks) {
1525 const DataLayout &DL = F.getDataLayout();
1526 // Disable combining in some cases. TrackOrigins checks each shadow to pick
1527 // correct origin.
1528 bool Combine = !MS.TrackOrigins;
1529 Instruction *Instruction = InstructionChecks.front().OrigIns;
1530 Value *Shadow = nullptr;
1531 for (const auto &ShadowData : InstructionChecks) {
1532 assert(ShadowData.OrigIns == Instruction);
1533 IRBuilder<> IRB(Instruction);
1534
1535 Value *ConvertedShadow = ShadowData.Shadow;
1536
1537 if (auto *ConstantShadow = dyn_cast<Constant>(Val: ConvertedShadow)) {
1538 if (!ClCheckConstantShadow || ConstantShadow->isNullValue()) {
1539 // Skip, value is initialized or const shadow is ignored.
1540 continue;
1541 }
1542 if (llvm::isKnownNonZero(V: ConvertedShadow, Q: DL)) {
1543 // Report as the value is definitely uninitialized.
1544 insertWarningFn(IRB, Origin: ShadowData.Origin);
1545 if (!MS.Recover)
1546 return; // Always fail and stop here, not need to check the rest.
1547 // Skip entire instruction,
1548 continue;
1549 }
1550 // Fallback to runtime check, which still can be optimized out later.
1551 }
1552
1553 if (!Combine) {
1554 materializeOneCheck(IRB, ConvertedShadow, Origin: ShadowData.Origin);
1555 continue;
1556 }
1557
1558 if (!Shadow) {
1559 Shadow = ConvertedShadow;
1560 continue;
1561 }
1562
1563 Shadow = convertToBool(V: Shadow, IRB, name: "_mscmp");
1564 ConvertedShadow = convertToBool(V: ConvertedShadow, IRB, name: "_mscmp");
1565 Shadow = IRB.CreateOr(LHS: Shadow, RHS: ConvertedShadow, Name: "_msor");
1566 }
1567
1568 if (Shadow) {
1569 assert(Combine);
1570 IRBuilder<> IRB(Instruction);
1571 materializeOneCheck(IRB, ConvertedShadow: Shadow, Origin: nullptr);
1572 }
1573 }
1574
1575 static bool isAArch64SVCount(Type *Ty) {
1576 if (TargetExtType *TTy = dyn_cast<TargetExtType>(Val: Ty))
1577 return TTy->getName() == "aarch64.svcount";
1578 return false;
1579 }
1580
1581 // This is intended to match the "AArch64 Predicate-as-Counter Type" (aka
1582 // 'target("aarch64.svcount")', but not e.g., <vscale x 4 x i32>.
1583 static bool isScalableNonVectorType(Type *Ty) {
1584 if (!isAArch64SVCount(Ty))
1585 LLVM_DEBUG(dbgs() << "isScalableNonVectorType: Unexpected type " << *Ty
1586 << "\n");
1587
1588 return Ty->isScalableTy() && !isa<VectorType>(Val: Ty);
1589 }
1590
1591 void materializeChecks() {
1592#ifndef NDEBUG
1593 // For assert below.
1594 SmallPtrSet<Instruction *, 16> Done;
1595#endif
1596
1597 for (auto I = InstrumentationList.begin();
1598 I != InstrumentationList.end();) {
1599 auto OrigIns = I->OrigIns;
1600 // Checks are grouped by the original instruction. We call all
1601 // `insertShadowCheck` for an instruction at once.
1602 assert(Done.insert(OrigIns).second);
1603 auto J = std::find_if(first: I + 1, last: InstrumentationList.end(),
1604 pred: [OrigIns](const ShadowOriginAndInsertPoint &R) {
1605 return OrigIns != R.OrigIns;
1606 });
1607 // Process all checks of instruction at once.
1608 materializeInstructionChecks(InstructionChecks: ArrayRef<ShadowOriginAndInsertPoint>(I, J));
1609 I = J;
1610 }
1611
1612 LLVM_DEBUG(dbgs() << "DONE:\n" << F);
1613 }
1614
1615 // Returns the last instruction in the new prologue
1616 void insertKmsanPrologue(IRBuilder<> &IRB) {
1617 Value *ContextState = IRB.CreateCall(Callee: MS.MsanGetContextStateFn, Args: {});
1618 Constant *Zero = IRB.getInt32(C: 0);
1619 MS.ParamTLS = IRB.CreateGEP(Ty: MS.MsanContextStateTy, Ptr: ContextState,
1620 IdxList: {Zero, IRB.getInt32(C: 0)}, Name: "param_shadow");
1621 MS.RetvalTLS = IRB.CreateGEP(Ty: MS.MsanContextStateTy, Ptr: ContextState,
1622 IdxList: {Zero, IRB.getInt32(C: 1)}, Name: "retval_shadow");
1623 MS.VAArgTLS = IRB.CreateGEP(Ty: MS.MsanContextStateTy, Ptr: ContextState,
1624 IdxList: {Zero, IRB.getInt32(C: 2)}, Name: "va_arg_shadow");
1625 MS.VAArgOriginTLS = IRB.CreateGEP(Ty: MS.MsanContextStateTy, Ptr: ContextState,
1626 IdxList: {Zero, IRB.getInt32(C: 3)}, Name: "va_arg_origin");
1627 MS.VAArgOverflowSizeTLS =
1628 IRB.CreateGEP(Ty: MS.MsanContextStateTy, Ptr: ContextState,
1629 IdxList: {Zero, IRB.getInt32(C: 4)}, Name: "va_arg_overflow_size");
1630 MS.ParamOriginTLS = IRB.CreateGEP(Ty: MS.MsanContextStateTy, Ptr: ContextState,
1631 IdxList: {Zero, IRB.getInt32(C: 5)}, Name: "param_origin");
1632 MS.RetvalOriginTLS =
1633 IRB.CreateGEP(Ty: MS.MsanContextStateTy, Ptr: ContextState,
1634 IdxList: {Zero, IRB.getInt32(C: 6)}, Name: "retval_origin");
1635 if (MS.TargetTriple.getArch() == Triple::systemz)
1636 MS.MsanMetadataAlloca = IRB.CreateAlloca(Ty: MS.MsanMetadata, AddrSpace: 0u);
1637 }
1638
1639 /// Add MemorySanitizer instrumentation to a function.
1640 bool runOnFunction() {
1641 // Iterate all BBs in depth-first order and create shadow instructions
1642 // for all instructions (where applicable).
1643 // For PHI nodes we create dummy shadow PHIs which will be finalized later.
1644 for (BasicBlock *BB : depth_first(G: FnPrologueEnd->getParent()))
1645 visit(BB&: *BB);
1646
1647 // `visit` above only collects instructions. Process them after iterating
1648 // CFG to avoid requirement on CFG transformations.
1649 for (Instruction *I : Instructions)
1650 InstVisitor<MemorySanitizerVisitor>::visit(I&: *I);
1651
1652 // Finalize PHI nodes.
1653 for (PHINode *PN : ShadowPHINodes) {
1654 PHINode *PNS = cast<PHINode>(Val: getShadow(V: PN));
1655 PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(Val: getOrigin(V: PN)) : nullptr;
1656 size_t NumValues = PN->getNumIncomingValues();
1657 for (size_t v = 0; v < NumValues; v++) {
1658 PNS->addIncoming(V: getShadow(I: PN, i: v), BB: PN->getIncomingBlock(i: v));
1659 if (PNO)
1660 PNO->addIncoming(V: getOrigin(I: PN, i: v), BB: PN->getIncomingBlock(i: v));
1661 }
1662 }
1663
1664 VAHelper->finalizeInstrumentation();
1665
1666 // Poison llvm.lifetime.start intrinsics, if we haven't fallen back to
1667 // instrumenting only allocas.
1668 if (ClHandleLifetimeIntrinsics) {
1669 for (auto Item : LifetimeStartList) {
1670 instrumentAlloca(I&: *Item.second, InsPoint: Item.first);
1671 AllocaSet.remove(X: Item.second);
1672 }
1673 }
1674 // Poison the allocas for which we didn't instrument the corresponding
1675 // lifetime intrinsics.
1676 for (AllocaInst *AI : AllocaSet)
1677 instrumentAlloca(I&: *AI);
1678
1679 // Insert shadow value checks.
1680 materializeChecks();
1681
1682 // Delayed instrumentation of StoreInst.
1683 // This may not add new address checks.
1684 materializeStores();
1685
1686 return true;
1687 }
1688
1689 /// Compute the shadow type that corresponds to a given Value.
1690 Type *getShadowTy(Value *V) { return getShadowTy(OrigTy: V->getType()); }
1691
1692 /// Compute the shadow type that corresponds to a given Type.
1693 Type *getShadowTy(Type *OrigTy) {
1694 if (!OrigTy->isSized()) {
1695 return nullptr;
1696 }
1697 // For integer type, shadow is the same as the original type.
1698 // This may return weird-sized types like i1.
1699 if (IntegerType *IT = dyn_cast<IntegerType>(Val: OrigTy))
1700 return IT;
1701 const DataLayout &DL = F.getDataLayout();
1702 if (VectorType *VT = dyn_cast<VectorType>(Val: OrigTy)) {
1703 uint32_t EltSize = DL.getTypeSizeInBits(Ty: VT->getElementType());
1704 return VectorType::get(ElementType: IntegerType::get(C&: *MS.C, NumBits: EltSize),
1705 EC: VT->getElementCount());
1706 }
1707 if (ArrayType *AT = dyn_cast<ArrayType>(Val: OrigTy)) {
1708 return ArrayType::get(ElementType: getShadowTy(OrigTy: AT->getElementType()),
1709 NumElements: AT->getNumElements());
1710 }
1711 if (StructType *ST = dyn_cast<StructType>(Val: OrigTy)) {
1712 SmallVector<Type *, 4> Elements;
1713 for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1714 Elements.push_back(Elt: getShadowTy(OrigTy: ST->getElementType(N: i)));
1715 StructType *Res = StructType::get(Context&: *MS.C, Elements, isPacked: ST->isPacked());
1716 LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
1717 return Res;
1718 }
1719 if (isScalableNonVectorType(Ty: OrigTy)) {
1720 LLVM_DEBUG(dbgs() << "getShadowTy: Scalable non-vector type: " << *OrigTy
1721 << "\n");
1722 return OrigTy;
1723 }
1724
1725 uint32_t TypeSize = DL.getTypeSizeInBits(Ty: OrigTy);
1726 return IntegerType::get(C&: *MS.C, NumBits: TypeSize);
1727 }
1728
1729 /// Extract combined shadow of struct elements as a bool
1730 Value *collapseStructShadow(StructType *Struct, Value *Shadow,
1731 IRBuilder<> &IRB) {
1732 Value *FalseVal = IRB.getIntN(/* width */ N: 1, /* value */ C: 0);
1733 Value *Aggregator = FalseVal;
1734
1735 for (unsigned Idx = 0; Idx < Struct->getNumElements(); Idx++) {
1736 // Combine by ORing together each element's bool shadow
1737 Value *ShadowItem = IRB.CreateExtractValue(Agg: Shadow, Idxs: Idx);
1738 Value *ShadowBool = convertToBool(V: ShadowItem, IRB);
1739
1740 if (Aggregator != FalseVal)
1741 Aggregator = IRB.CreateOr(LHS: Aggregator, RHS: ShadowBool);
1742 else
1743 Aggregator = ShadowBool;
1744 }
1745
1746 return Aggregator;
1747 }
1748
1749 // Extract combined shadow of array elements
1750 Value *collapseArrayShadow(ArrayType *Array, Value *Shadow,
1751 IRBuilder<> &IRB) {
1752 if (!Array->getNumElements())
1753 return IRB.getIntN(/* width */ N: 1, /* value */ C: 0);
1754
1755 Value *FirstItem = IRB.CreateExtractValue(Agg: Shadow, Idxs: 0);
1756 Value *Aggregator = convertShadowToScalar(V: FirstItem, IRB);
1757
1758 for (unsigned Idx = 1; Idx < Array->getNumElements(); Idx++) {
1759 Value *ShadowItem = IRB.CreateExtractValue(Agg: Shadow, Idxs: Idx);
1760 Value *ShadowInner = convertShadowToScalar(V: ShadowItem, IRB);
1761 Aggregator = IRB.CreateOr(LHS: Aggregator, RHS: ShadowInner);
1762 }
1763 return Aggregator;
1764 }
1765
1766 /// Convert a shadow value to it's flattened variant. The resulting
1767 /// shadow may not necessarily have the same bit width as the input
1768 /// value, but it will always be comparable to zero.
1769 Value *convertShadowToScalar(Value *V, IRBuilder<> &IRB) {
1770 if (StructType *Struct = dyn_cast<StructType>(Val: V->getType()))
1771 return collapseStructShadow(Struct, Shadow: V, IRB);
1772 if (ArrayType *Array = dyn_cast<ArrayType>(Val: V->getType()))
1773 return collapseArrayShadow(Array, Shadow: V, IRB);
1774 if (isa<VectorType>(Val: V->getType())) {
1775 if (isa<ScalableVectorType>(Val: V->getType()))
1776 return convertShadowToScalar(V: IRB.CreateOrReduce(Src: V), IRB);
1777 unsigned BitWidth =
1778 V->getType()->getPrimitiveSizeInBits().getFixedValue();
1779 return IRB.CreateBitCast(V, DestTy: IntegerType::get(C&: *MS.C, NumBits: BitWidth));
1780 }
1781 return V;
1782 }
1783
1784 // Convert a scalar value to an i1 by comparing with 0
1785 Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &name = "") {
1786 Type *VTy = V->getType();
1787 if (!VTy->isIntegerTy())
1788 return convertToBool(V: convertShadowToScalar(V, IRB), IRB, name);
1789 if (VTy->getIntegerBitWidth() == 1)
1790 // Just converting a bool to a bool, so do nothing.
1791 return V;
1792 return IRB.CreateICmpNE(LHS: V, RHS: ConstantInt::get(Ty: VTy, V: 0), Name: name);
1793 }
1794
1795 Type *ptrToIntPtrType(Type *PtrTy) const {
1796 if (VectorType *VectTy = dyn_cast<VectorType>(Val: PtrTy)) {
1797 return VectorType::get(ElementType: ptrToIntPtrType(PtrTy: VectTy->getElementType()),
1798 EC: VectTy->getElementCount());
1799 }
1800 assert(PtrTy->isIntOrPtrTy());
1801 return MS.IntptrTy;
1802 }
1803
1804 Type *getPtrToShadowPtrType(Type *IntPtrTy, Type *ShadowTy) const {
1805 if (VectorType *VectTy = dyn_cast<VectorType>(Val: IntPtrTy)) {
1806 return VectorType::get(
1807 ElementType: getPtrToShadowPtrType(IntPtrTy: VectTy->getElementType(), ShadowTy),
1808 EC: VectTy->getElementCount());
1809 }
1810 assert(IntPtrTy == MS.IntptrTy);
1811 return MS.PtrTy;
1812 }
1813
1814 Constant *constToIntPtr(Type *IntPtrTy, uint64_t C) const {
1815 if (VectorType *VectTy = dyn_cast<VectorType>(Val: IntPtrTy)) {
1816 return ConstantVector::getSplat(
1817 EC: VectTy->getElementCount(),
1818 Elt: constToIntPtr(IntPtrTy: VectTy->getElementType(), C));
1819 }
1820 assert(IntPtrTy == MS.IntptrTy);
1821 // TODO: Avoid implicit trunc?
1822 // See https://github.com/llvm/llvm-project/issues/112510.
1823 return ConstantInt::get(Ty: MS.IntptrTy, V: C, /*IsSigned=*/false,
1824 /*ImplicitTrunc=*/true);
1825 }
1826
1827 /// Returns the integer shadow offset that corresponds to a given
1828 /// application address, whereby:
1829 ///
1830 /// Offset = (Addr & ~AndMask) ^ XorMask
1831 /// Shadow = ShadowBase + Offset
1832 /// Origin = (OriginBase + Offset) & ~Alignment
1833 ///
1834 /// Note: for efficiency, many shadow mappings only require use the XorMask
1835 /// and OriginBase; the AndMask and ShadowBase are often zero.
1836 Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) {
1837 Type *IntptrTy = ptrToIntPtrType(PtrTy: Addr->getType());
1838 Value *OffsetLong = IRB.CreatePointerCast(V: Addr, DestTy: IntptrTy);
1839
1840 if (uint64_t AndMask = MS.MapParams->AndMask)
1841 OffsetLong = IRB.CreateAnd(LHS: OffsetLong, RHS: constToIntPtr(IntPtrTy: IntptrTy, C: ~AndMask));
1842
1843 if (uint64_t XorMask = MS.MapParams->XorMask)
1844 OffsetLong = IRB.CreateXor(LHS: OffsetLong, RHS: constToIntPtr(IntPtrTy: IntptrTy, C: XorMask));
1845 return OffsetLong;
1846 }
1847
1848 /// Compute the shadow and origin addresses corresponding to a given
1849 /// application address.
1850 ///
1851 /// Shadow = ShadowBase + Offset
1852 /// Origin = (OriginBase + Offset) & ~3ULL
1853 /// Addr can be a ptr or <N x ptr>. In both cases ShadowTy the shadow type of
1854 /// a single pointee.
1855 /// Returns <shadow_ptr, origin_ptr> or <<N x shadow_ptr>, <N x origin_ptr>>.
1856 std::pair<Value *, Value *>
1857 getShadowOriginPtrUserspace(Value *Addr, IRBuilder<> &IRB, Type *ShadowTy,
1858 MaybeAlign Alignment) {
1859 VectorType *VectTy = dyn_cast<VectorType>(Val: Addr->getType());
1860 if (!VectTy) {
1861 assert(Addr->getType()->isPointerTy());
1862 } else {
1863 assert(VectTy->getElementType()->isPointerTy());
1864 }
1865 Type *IntptrTy = ptrToIntPtrType(PtrTy: Addr->getType());
1866 Value *ShadowOffset = getShadowPtrOffset(Addr, IRB);
1867 Value *ShadowLong = ShadowOffset;
1868 if (uint64_t ShadowBase = MS.MapParams->ShadowBase) {
1869 ShadowLong =
1870 IRB.CreateAdd(LHS: ShadowLong, RHS: constToIntPtr(IntPtrTy: IntptrTy, C: ShadowBase));
1871 }
1872 Value *ShadowPtr = IRB.CreateIntToPtr(
1873 V: ShadowLong, DestTy: getPtrToShadowPtrType(IntPtrTy: IntptrTy, ShadowTy));
1874
1875 Value *OriginPtr = nullptr;
1876 if (MS.TrackOrigins) {
1877 Value *OriginLong = ShadowOffset;
1878 uint64_t OriginBase = MS.MapParams->OriginBase;
1879 if (OriginBase != 0)
1880 OriginLong =
1881 IRB.CreateAdd(LHS: OriginLong, RHS: constToIntPtr(IntPtrTy: IntptrTy, C: OriginBase));
1882 if (!Alignment || *Alignment < kMinOriginAlignment) {
1883 uint64_t Mask = kMinOriginAlignment.value() - 1;
1884 OriginLong = IRB.CreateAnd(LHS: OriginLong, RHS: constToIntPtr(IntPtrTy: IntptrTy, C: ~Mask));
1885 }
1886 OriginPtr = IRB.CreateIntToPtr(
1887 V: OriginLong, DestTy: getPtrToShadowPtrType(IntPtrTy: IntptrTy, ShadowTy: MS.OriginTy));
1888 }
1889 return std::make_pair(x&: ShadowPtr, y&: OriginPtr);
1890 }
1891
1892 template <typename... ArgsTy>
1893 Value *createMetadataCall(IRBuilder<> &IRB, FunctionCallee Callee,
1894 ArgsTy... Args) {
1895 if (MS.TargetTriple.getArch() == Triple::systemz) {
1896 IRB.CreateCall(Callee,
1897 {MS.MsanMetadataAlloca, std::forward<ArgsTy>(Args)...});
1898 return IRB.CreateLoad(Ty: MS.MsanMetadata, Ptr: MS.MsanMetadataAlloca);
1899 }
1900
1901 return IRB.CreateCall(Callee, {std::forward<ArgsTy>(Args)...});
1902 }
1903
1904 std::pair<Value *, Value *> getShadowOriginPtrKernelNoVec(Value *Addr,
1905 IRBuilder<> &IRB,
1906 Type *ShadowTy,
1907 bool isStore) {
1908 Value *ShadowOriginPtrs;
1909 const DataLayout &DL = F.getDataLayout();
1910 TypeSize Size = DL.getTypeStoreSize(Ty: ShadowTy);
1911
1912 FunctionCallee Getter = MS.getKmsanShadowOriginAccessFn(isStore, size: Size);
1913 Value *AddrCast = IRB.CreatePointerCast(V: Addr, DestTy: MS.PtrTy);
1914 if (Getter) {
1915 ShadowOriginPtrs = createMetadataCall(IRB, Callee: Getter, Args: AddrCast);
1916 } else {
1917 Value *SizeVal = ConstantInt::get(Ty: MS.IntptrTy, V: Size);
1918 ShadowOriginPtrs = createMetadataCall(
1919 IRB,
1920 Callee: isStore ? MS.MsanMetadataPtrForStoreN : MS.MsanMetadataPtrForLoadN,
1921 Args: AddrCast, Args: SizeVal);
1922 }
1923 Value *ShadowPtr = IRB.CreateExtractValue(Agg: ShadowOriginPtrs, Idxs: 0);
1924 ShadowPtr = IRB.CreatePointerCast(V: ShadowPtr, DestTy: MS.PtrTy);
1925 Value *OriginPtr = IRB.CreateExtractValue(Agg: ShadowOriginPtrs, Idxs: 1);
1926
1927 return std::make_pair(x&: ShadowPtr, y&: OriginPtr);
1928 }
1929
1930 /// Addr can be a ptr or <N x ptr>. In both cases ShadowTy the shadow type of
1931 /// a single pointee.
1932 /// Returns <shadow_ptr, origin_ptr> or <<N x shadow_ptr>, <N x origin_ptr>>.
1933 std::pair<Value *, Value *> getShadowOriginPtrKernel(Value *Addr,
1934 IRBuilder<> &IRB,
1935 Type *ShadowTy,
1936 bool isStore) {
1937 VectorType *VectTy = dyn_cast<VectorType>(Val: Addr->getType());
1938 if (!VectTy) {
1939 assert(Addr->getType()->isPointerTy());
1940 return getShadowOriginPtrKernelNoVec(Addr, IRB, ShadowTy, isStore);
1941 }
1942
1943 // TODO: Support callbacs with vectors of addresses.
1944 unsigned NumElements = cast<FixedVectorType>(Val: VectTy)->getNumElements();
1945 Value *ShadowPtrs = ConstantInt::getNullValue(
1946 Ty: FixedVectorType::get(ElementType: IRB.getPtrTy(), NumElts: NumElements));
1947 Value *OriginPtrs = nullptr;
1948 if (MS.TrackOrigins)
1949 OriginPtrs = ConstantInt::getNullValue(
1950 Ty: FixedVectorType::get(ElementType: IRB.getPtrTy(), NumElts: NumElements));
1951 for (unsigned i = 0; i < NumElements; ++i) {
1952 Value *OneAddr =
1953 IRB.CreateExtractElement(Vec: Addr, Idx: ConstantInt::get(Ty: IRB.getInt32Ty(), V: i));
1954 auto [ShadowPtr, OriginPtr] =
1955 getShadowOriginPtrKernelNoVec(Addr: OneAddr, IRB, ShadowTy, isStore);
1956
1957 ShadowPtrs = IRB.CreateInsertElement(
1958 Vec: ShadowPtrs, NewElt: ShadowPtr, Idx: ConstantInt::get(Ty: IRB.getInt32Ty(), V: i));
1959 if (MS.TrackOrigins)
1960 OriginPtrs = IRB.CreateInsertElement(
1961 Vec: OriginPtrs, NewElt: OriginPtr, Idx: ConstantInt::get(Ty: IRB.getInt32Ty(), V: i));
1962 }
1963 return {ShadowPtrs, OriginPtrs};
1964 }
1965
1966 std::pair<Value *, Value *> getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB,
1967 Type *ShadowTy,
1968 MaybeAlign Alignment,
1969 bool isStore) {
1970 if (MS.CompileKernel)
1971 return getShadowOriginPtrKernel(Addr, IRB, ShadowTy, isStore);
1972 return getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment);
1973 }
1974
1975 /// Compute the shadow address for a given function argument.
1976 ///
1977 /// Shadow = ParamTLS+ArgOffset.
1978 Value *getShadowPtrForArgument(IRBuilder<> &IRB, int ArgOffset) {
1979 return IRB.CreatePtrAdd(Ptr: MS.ParamTLS,
1980 Offset: ConstantInt::get(Ty: MS.IntptrTy, V: ArgOffset), Name: "_msarg");
1981 }
1982
1983 /// Compute the origin address for a given function argument.
1984 Value *getOriginPtrForArgument(IRBuilder<> &IRB, int ArgOffset) {
1985 if (!MS.TrackOrigins)
1986 return nullptr;
1987 return IRB.CreatePtrAdd(Ptr: MS.ParamOriginTLS,
1988 Offset: ConstantInt::get(Ty: MS.IntptrTy, V: ArgOffset),
1989 Name: "_msarg_o");
1990 }
1991
1992 /// Compute the shadow address for a retval.
1993 Value *getShadowPtrForRetval(IRBuilder<> &IRB) {
1994 return IRB.CreatePointerCast(V: MS.RetvalTLS, DestTy: IRB.getPtrTy(AddrSpace: 0), Name: "_msret");
1995 }
1996
1997 /// Compute the origin address for a retval.
1998 Value *getOriginPtrForRetval() {
1999 // We keep a single origin for the entire retval. Might be too optimistic.
2000 return MS.RetvalOriginTLS;
2001 }
2002
2003 /// Set SV to be the shadow value for V.
2004 void setShadow(Value *V, Value *SV) {
2005 assert(!ShadowMap.count(V) && "Values may only have one shadow");
2006 ShadowMap[V] = PropagateShadow ? SV : getCleanShadow(V);
2007 }
2008
2009 /// Set Origin to be the origin value for V.
2010 void setOrigin(Value *V, Value *Origin) {
2011 if (!MS.TrackOrigins)
2012 return;
2013 assert(!OriginMap.count(V) && "Values may only have one origin");
2014 LLVM_DEBUG(dbgs() << "ORIGIN: " << *V << " ==> " << *Origin << "\n");
2015 OriginMap[V] = Origin;
2016 }
2017
2018 Constant *getCleanShadow(Type *OrigTy) {
2019 Type *ShadowTy = getShadowTy(OrigTy);
2020 if (!ShadowTy)
2021 return nullptr;
2022 return Constant::getNullValue(Ty: ShadowTy);
2023 }
2024
2025 /// Create a clean shadow value for a given value.
2026 ///
2027 /// Clean shadow (all zeroes) means all bits of the value are defined
2028 /// (initialized).
2029 Constant *getCleanShadow(Value *V) { return getCleanShadow(OrigTy: V->getType()); }
2030
2031 /// Create a dirty shadow of a given shadow type.
2032 Constant *getPoisonedShadow(Type *ShadowTy) {
2033 assert(ShadowTy);
2034 if (isa<IntegerType>(Val: ShadowTy) || isa<VectorType>(Val: ShadowTy))
2035 return Constant::getAllOnesValue(Ty: ShadowTy);
2036 if (ArrayType *AT = dyn_cast<ArrayType>(Val: ShadowTy)) {
2037 SmallVector<Constant *, 4> Vals(AT->getNumElements(),
2038 getPoisonedShadow(ShadowTy: AT->getElementType()));
2039 return ConstantArray::get(T: AT, V: Vals);
2040 }
2041 if (StructType *ST = dyn_cast<StructType>(Val: ShadowTy)) {
2042 SmallVector<Constant *, 4> Vals;
2043 for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
2044 Vals.push_back(Elt: getPoisonedShadow(ShadowTy: ST->getElementType(N: i)));
2045 return ConstantStruct::get(T: ST, V: Vals);
2046 }
2047 llvm_unreachable("Unexpected shadow type");
2048 }
2049
2050 /// Create a dirty shadow for a given value.
2051 Constant *getPoisonedShadow(Value *V) {
2052 Type *ShadowTy = getShadowTy(V);
2053 if (!ShadowTy)
2054 return nullptr;
2055 return getPoisonedShadow(ShadowTy);
2056 }
2057
2058 /// Create a clean (zero) origin.
2059 Value *getCleanOrigin() { return Constant::getNullValue(Ty: MS.OriginTy); }
2060
2061 /// Get the shadow value for a given Value.
2062 ///
2063 /// This function either returns the value set earlier with setShadow,
2064 /// or extracts if from ParamTLS (for function arguments).
2065 Value *getShadow(Value *V) {
2066 if (Instruction *I = dyn_cast<Instruction>(Val: V)) {
2067 if (!PropagateShadow || I->getMetadata(KindID: LLVMContext::MD_nosanitize))
2068 return getCleanShadow(V);
2069 // For instructions the shadow is already stored in the map.
2070 Value *Shadow = ShadowMap[V];
2071 if (!Shadow) {
2072 LLVM_DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent()));
2073 assert(Shadow && "No shadow for a value");
2074 }
2075 return Shadow;
2076 }
2077 // Handle fully undefined values
2078 // (partially undefined constant vectors are handled later)
2079 if ([[maybe_unused]] UndefValue *U = dyn_cast<UndefValue>(Val: V)) {
2080 Value *AllOnes = (PropagateShadow && PoisonUndef) ? getPoisonedShadow(V)
2081 : getCleanShadow(V);
2082 LLVM_DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
2083 return AllOnes;
2084 }
2085 if (Argument *A = dyn_cast<Argument>(Val: V)) {
2086 // For arguments we compute the shadow on demand and store it in the map.
2087 Value *&ShadowPtr = ShadowMap[V];
2088 if (ShadowPtr)
2089 return ShadowPtr;
2090 Function *F = A->getParent();
2091 IRBuilder<> EntryIRB(FnPrologueEnd);
2092 unsigned ArgOffset = 0;
2093 const DataLayout &DL = F->getDataLayout();
2094 for (auto &FArg : F->args()) {
2095 if (!FArg.getType()->isSized() || FArg.getType()->isScalableTy()) {
2096 LLVM_DEBUG(dbgs() << (FArg.getType()->isScalableTy()
2097 ? "vscale not fully supported\n"
2098 : "Arg is not sized\n"));
2099 if (A == &FArg) {
2100 ShadowPtr = getCleanShadow(V);
2101 setOrigin(V: A, Origin: getCleanOrigin());
2102 break;
2103 }
2104 continue;
2105 }
2106
2107 unsigned Size = FArg.hasByValAttr()
2108 ? DL.getTypeAllocSize(Ty: FArg.getParamByValType())
2109 : DL.getTypeAllocSize(Ty: FArg.getType());
2110
2111 if (A == &FArg) {
2112 bool Overflow = ArgOffset + Size > kParamTLSSize;
2113 if (FArg.hasByValAttr()) {
2114 // ByVal pointer itself has clean shadow. We copy the actual
2115 // argument shadow to the underlying memory.
2116 // Figure out maximal valid memcpy alignment.
2117 const Align ArgAlign = DL.getValueOrABITypeAlignment(
2118 Alignment: FArg.getParamAlign(), Ty: FArg.getParamByValType());
2119 Value *CpShadowPtr, *CpOriginPtr;
2120 std::tie(args&: CpShadowPtr, args&: CpOriginPtr) =
2121 getShadowOriginPtr(Addr: V, IRB&: EntryIRB, ShadowTy: EntryIRB.getInt8Ty(), Alignment: ArgAlign,
2122 /*isStore*/ true);
2123 if (!PropagateShadow || Overflow) {
2124 // ParamTLS overflow.
2125 EntryIRB.CreateMemSet(
2126 Ptr: CpShadowPtr, Val: Constant::getNullValue(Ty: EntryIRB.getInt8Ty()),
2127 Size, Align: ArgAlign);
2128 } else {
2129 Value *Base = getShadowPtrForArgument(IRB&: EntryIRB, ArgOffset);
2130 const Align CopyAlign = std::min(a: ArgAlign, b: kShadowTLSAlignment);
2131 [[maybe_unused]] Value *Cpy = EntryIRB.CreateMemCpy(
2132 Dst: CpShadowPtr, DstAlign: CopyAlign, Src: Base, SrcAlign: CopyAlign, Size);
2133 LLVM_DEBUG(dbgs() << " ByValCpy: " << *Cpy << "\n");
2134
2135 if (MS.TrackOrigins) {
2136 Value *OriginPtr = getOriginPtrForArgument(IRB&: EntryIRB, ArgOffset);
2137 // FIXME: OriginSize should be:
2138 // alignTo(V % kMinOriginAlignment + Size, kMinOriginAlignment)
2139 unsigned OriginSize = alignTo(Size, A: kMinOriginAlignment);
2140 EntryIRB.CreateMemCpy(
2141 Dst: CpOriginPtr,
2142 /* by getShadowOriginPtr */ DstAlign: kMinOriginAlignment, Src: OriginPtr,
2143 /* by origin_tls[ArgOffset] */ SrcAlign: kMinOriginAlignment,
2144 Size: OriginSize);
2145 }
2146 }
2147 }
2148
2149 if (!PropagateShadow || Overflow || FArg.hasByValAttr() ||
2150 (MS.EagerChecks && FArg.hasAttribute(Kind: Attribute::NoUndef))) {
2151 ShadowPtr = getCleanShadow(V);
2152 setOrigin(V: A, Origin: getCleanOrigin());
2153 } else {
2154 // Shadow over TLS
2155 Value *Base = getShadowPtrForArgument(IRB&: EntryIRB, ArgOffset);
2156 ShadowPtr = EntryIRB.CreateAlignedLoad(Ty: getShadowTy(V: &FArg), Ptr: Base,
2157 Align: kShadowTLSAlignment);
2158 if (MS.TrackOrigins) {
2159 Value *OriginPtr = getOriginPtrForArgument(IRB&: EntryIRB, ArgOffset);
2160 setOrigin(V: A, Origin: EntryIRB.CreateLoad(Ty: MS.OriginTy, Ptr: OriginPtr));
2161 }
2162 }
2163 LLVM_DEBUG(dbgs()
2164 << " ARG: " << FArg << " ==> " << *ShadowPtr << "\n");
2165 break;
2166 }
2167
2168 ArgOffset += alignTo(Size, A: kShadowTLSAlignment);
2169 }
2170 assert(ShadowPtr && "Could not find shadow for an argument");
2171 return ShadowPtr;
2172 }
2173
2174 // Check for partially-undefined constant vectors
2175 // TODO: scalable vectors (this is hard because we do not have IRBuilder)
2176 if (isa<FixedVectorType>(Val: V->getType()) && isa<Constant>(Val: V) &&
2177 cast<Constant>(Val: V)->containsUndefOrPoisonElement() && PropagateShadow &&
2178 PoisonUndefVectors) {
2179 unsigned NumElems = cast<FixedVectorType>(Val: V->getType())->getNumElements();
2180 SmallVector<Constant *, 32> ShadowVector(NumElems);
2181 for (unsigned i = 0; i != NumElems; ++i) {
2182 Constant *Elem = cast<Constant>(Val: V)->getAggregateElement(Elt: i);
2183 ShadowVector[i] = isa<UndefValue>(Val: Elem) ? getPoisonedShadow(V: Elem)
2184 : getCleanShadow(V: Elem);
2185 }
2186
2187 Value *ShadowConstant = ConstantVector::get(V: ShadowVector);
2188 LLVM_DEBUG(dbgs() << "Partial undef constant vector: " << *V << " ==> "
2189 << *ShadowConstant << "\n");
2190
2191 return ShadowConstant;
2192 }
2193
2194 // TODO: partially-undefined constant arrays, structures, and nested types
2195
2196 // For everything else the shadow is zero.
2197 return getCleanShadow(V);
2198 }
2199
2200 /// Get the shadow for i-th argument of the instruction I.
2201 Value *getShadow(Instruction *I, int i) {
2202 return getShadow(V: I->getOperand(i));
2203 }
2204
2205 /// Get the origin for a value.
2206 Value *getOrigin(Value *V) {
2207 if (!MS.TrackOrigins)
2208 return nullptr;
2209 if (!PropagateShadow || isa<Constant>(Val: V) || isa<InlineAsm>(Val: V))
2210 return getCleanOrigin();
2211 assert((isa<Instruction>(V) || isa<Argument>(V)) &&
2212 "Unexpected value type in getOrigin()");
2213 if (Instruction *I = dyn_cast<Instruction>(Val: V)) {
2214 if (I->getMetadata(KindID: LLVMContext::MD_nosanitize))
2215 return getCleanOrigin();
2216 }
2217 Value *Origin = OriginMap[V];
2218 assert(Origin && "Missing origin");
2219 return Origin;
2220 }
2221
2222 /// Get the origin for i-th argument of the instruction I.
2223 Value *getOrigin(Instruction *I, int i) {
2224 return getOrigin(V: I->getOperand(i));
2225 }
2226
2227 /// Remember the place where a shadow check should be inserted.
2228 ///
2229 /// This location will be later instrumented with a check that will print a
2230 /// UMR warning in runtime if the shadow value is not 0.
2231 void insertCheckShadow(Value *Shadow, Value *Origin, Instruction *OrigIns) {
2232 assert(Shadow);
2233 if (!InsertChecks)
2234 return;
2235
2236 if (!DebugCounter::shouldExecute(Counter&: DebugInsertCheck)) {
2237 LLVM_DEBUG(dbgs() << "Skipping check of " << *Shadow << " before "
2238 << *OrigIns << "\n");
2239 return;
2240 }
2241
2242 Type *ShadowTy = Shadow->getType();
2243 if (isScalableNonVectorType(Ty: ShadowTy)) {
2244 LLVM_DEBUG(dbgs() << "Skipping check of scalable non-vector " << *Shadow
2245 << " before " << *OrigIns << "\n");
2246 return;
2247 }
2248#ifndef NDEBUG
2249 assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy) ||
2250 isa<StructType>(ShadowTy) || isa<ArrayType>(ShadowTy)) &&
2251 "Can only insert checks for integer, vector, and aggregate shadow "
2252 "types");
2253#endif
2254 InstrumentationList.push_back(
2255 Elt: ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
2256 }
2257
2258 /// Get shadow for value, and remember the place where a shadow check should
2259 /// be inserted.
2260 ///
2261 /// This location will be later instrumented with a check that will print a
2262 /// UMR warning in runtime if the value is not fully defined.
2263 void insertCheckShadowOf(Value *Val, Instruction *OrigIns) {
2264 assert(Val);
2265 Value *Shadow, *Origin;
2266 if (ClCheckConstantShadow) {
2267 Shadow = getShadow(V: Val);
2268 if (!Shadow)
2269 return;
2270 Origin = getOrigin(V: Val);
2271 } else {
2272 Shadow = dyn_cast_or_null<Instruction>(Val: getShadow(V: Val));
2273 if (!Shadow)
2274 return;
2275 Origin = dyn_cast_or_null<Instruction>(Val: getOrigin(V: Val));
2276 }
2277 insertCheckShadow(Shadow, Origin, OrigIns);
2278 }
2279
2280 AtomicOrdering addReleaseOrdering(AtomicOrdering a) {
2281 switch (a) {
2282 case AtomicOrdering::NotAtomic:
2283 return AtomicOrdering::NotAtomic;
2284 case AtomicOrdering::Unordered:
2285 case AtomicOrdering::Monotonic:
2286 case AtomicOrdering::Release:
2287 return AtomicOrdering::Release;
2288 case AtomicOrdering::Acquire:
2289 case AtomicOrdering::AcquireRelease:
2290 return AtomicOrdering::AcquireRelease;
2291 case AtomicOrdering::SequentiallyConsistent:
2292 return AtomicOrdering::SequentiallyConsistent;
2293 }
2294 llvm_unreachable("Unknown ordering");
2295 }
2296
2297 Value *makeAddReleaseOrderingTable(IRBuilder<> &IRB) {
2298 constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
2299 uint32_t OrderingTable[NumOrderings] = {};
2300
2301 OrderingTable[(int)AtomicOrderingCABI::relaxed] =
2302 OrderingTable[(int)AtomicOrderingCABI::release] =
2303 (int)AtomicOrderingCABI::release;
2304 OrderingTable[(int)AtomicOrderingCABI::consume] =
2305 OrderingTable[(int)AtomicOrderingCABI::acquire] =
2306 OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
2307 (int)AtomicOrderingCABI::acq_rel;
2308 OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
2309 (int)AtomicOrderingCABI::seq_cst;
2310
2311 return ConstantDataVector::get(Context&: IRB.getContext(), Elts: OrderingTable);
2312 }
2313
2314 AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
2315 switch (a) {
2316 case AtomicOrdering::NotAtomic:
2317 return AtomicOrdering::NotAtomic;
2318 case AtomicOrdering::Unordered:
2319 case AtomicOrdering::Monotonic:
2320 case AtomicOrdering::Acquire:
2321 return AtomicOrdering::Acquire;
2322 case AtomicOrdering::Release:
2323 case AtomicOrdering::AcquireRelease:
2324 return AtomicOrdering::AcquireRelease;
2325 case AtomicOrdering::SequentiallyConsistent:
2326 return AtomicOrdering::SequentiallyConsistent;
2327 }
2328 llvm_unreachable("Unknown ordering");
2329 }
2330
2331 Value *makeAddAcquireOrderingTable(IRBuilder<> &IRB) {
2332 constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
2333 uint32_t OrderingTable[NumOrderings] = {};
2334
2335 OrderingTable[(int)AtomicOrderingCABI::relaxed] =
2336 OrderingTable[(int)AtomicOrderingCABI::acquire] =
2337 OrderingTable[(int)AtomicOrderingCABI::consume] =
2338 (int)AtomicOrderingCABI::acquire;
2339 OrderingTable[(int)AtomicOrderingCABI::release] =
2340 OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
2341 (int)AtomicOrderingCABI::acq_rel;
2342 OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
2343 (int)AtomicOrderingCABI::seq_cst;
2344
2345 return ConstantDataVector::get(Context&: IRB.getContext(), Elts: OrderingTable);
2346 }
2347
2348 // ------------------- Visitors.
2349 using InstVisitor<MemorySanitizerVisitor>::visit;
2350 void visit(Instruction &I) {
2351 if (I.getMetadata(KindID: LLVMContext::MD_nosanitize))
2352 return;
2353 // Don't want to visit if we're in the prologue
2354 if (isInPrologue(I))
2355 return;
2356 if (!DebugCounter::shouldExecute(Counter&: DebugInstrumentInstruction)) {
2357 LLVM_DEBUG(dbgs() << "Skipping instruction: " << I << "\n");
2358 // We still need to set the shadow and origin to clean values.
2359 setShadow(V: &I, SV: getCleanShadow(V: &I));
2360 setOrigin(V: &I, Origin: getCleanOrigin());
2361 return;
2362 }
2363
2364 Instructions.push_back(Elt: &I);
2365 }
2366
2367 /// Instrument LoadInst
2368 ///
2369 /// Loads the corresponding shadow and (optionally) origin.
2370 /// Optionally, checks that the load address is fully defined.
2371 void visitLoadInst(LoadInst &I) {
2372 assert(I.getType()->isSized() && "Load type must have size");
2373 assert(!I.getMetadata(LLVMContext::MD_nosanitize));
2374 NextNodeIRBuilder IRB(&I);
2375 Type *ShadowTy = getShadowTy(V: &I);
2376 Value *Addr = I.getPointerOperand();
2377 Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
2378 const Align Alignment = I.getAlign();
2379 if (PropagateShadow) {
2380 std::tie(args&: ShadowPtr, args&: OriginPtr) =
2381 getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
2382 setShadow(V: &I,
2383 SV: IRB.CreateAlignedLoad(Ty: ShadowTy, Ptr: ShadowPtr, Align: Alignment, Name: "_msld"));
2384 } else {
2385 setShadow(V: &I, SV: getCleanShadow(V: &I));
2386 }
2387
2388 if (ClCheckAccessAddress)
2389 insertCheckShadowOf(Val: I.getPointerOperand(), OrigIns: &I);
2390
2391 if (I.isAtomic())
2392 I.setOrdering(addAcquireOrdering(a: I.getOrdering()));
2393
2394 if (MS.TrackOrigins) {
2395 if (PropagateShadow) {
2396 const Align OriginAlignment = std::max(a: kMinOriginAlignment, b: Alignment);
2397 setOrigin(
2398 V: &I, Origin: IRB.CreateAlignedLoad(Ty: MS.OriginTy, Ptr: OriginPtr, Align: OriginAlignment));
2399 } else {
2400 setOrigin(V: &I, Origin: getCleanOrigin());
2401 }
2402 }
2403 }
2404
2405 /// Instrument StoreInst
2406 ///
2407 /// Stores the corresponding shadow and (optionally) origin.
2408 /// Optionally, checks that the store address is fully defined.
2409 void visitStoreInst(StoreInst &I) {
2410 StoreList.push_back(Elt: &I);
2411 if (ClCheckAccessAddress)
2412 insertCheckShadowOf(Val: I.getPointerOperand(), OrigIns: &I);
2413 }
2414
2415 void handleCASOrRMW(Instruction &I) {
2416 assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
2417
2418 IRBuilder<> IRB(&I);
2419 Value *Addr = I.getOperand(i: 0);
2420 Value *Val = I.getOperand(i: 1);
2421 Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, ShadowTy: getShadowTy(V: Val), Alignment: Align(1),
2422 /*isStore*/ true)
2423 .first;
2424
2425 if (ClCheckAccessAddress)
2426 insertCheckShadowOf(Val: Addr, OrigIns: &I);
2427
2428 // Only test the conditional argument of cmpxchg instruction.
2429 // The other argument can potentially be uninitialized, but we can not
2430 // detect this situation reliably without possible false positives.
2431 if (isa<AtomicCmpXchgInst>(Val: I))
2432 insertCheckShadowOf(Val, OrigIns: &I);
2433
2434 IRB.CreateStore(Val: getCleanShadow(V: Val), Ptr: ShadowPtr);
2435
2436 setShadow(V: &I, SV: getCleanShadow(V: &I));
2437 setOrigin(V: &I, Origin: getCleanOrigin());
2438 }
2439
2440 void visitAtomicRMWInst(AtomicRMWInst &I) {
2441 handleCASOrRMW(I);
2442 I.setOrdering(addReleaseOrdering(a: I.getOrdering()));
2443 }
2444
2445 void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
2446 handleCASOrRMW(I);
2447 I.setSuccessOrdering(addReleaseOrdering(a: I.getSuccessOrdering()));
2448 }
2449
2450 /// Generic handler to compute shadow for == and != comparisons.
2451 ///
2452 /// This function is used by handleEqualityComparison and visitSwitchInst.
2453 ///
2454 /// Sometimes the comparison result is known even if some of the bits of the
2455 /// arguments are not.
2456 Value *propagateEqualityComparison(IRBuilder<> &IRB, Value *A, Value *B,
2457 Value *Sa, Value *Sb) {
2458 assert(getShadowTy(A) == Sa->getType());
2459 assert(getShadowTy(B) == Sb->getType());
2460
2461 // Get rid of pointers and vectors of pointers.
2462 // For ints (and vectors of ints), types of A and Sa match,
2463 // and this is a no-op.
2464 A = IRB.CreatePointerCast(V: A, DestTy: Sa->getType());
2465 B = IRB.CreatePointerCast(V: B, DestTy: Sb->getType());
2466
2467 // A == B <==> (C = A^B) == 0
2468 // A != B <==> (C = A^B) != 0
2469 // Sc = Sa | Sb
2470 Value *C = IRB.CreateXor(LHS: A, RHS: B);
2471 Value *Sc = IRB.CreateOr(LHS: Sa, RHS: Sb);
2472 // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now)
2473 // Result is defined if one of the following is true
2474 // * there is a defined 1 bit in C
2475 // * C is fully defined
2476 // Si = !(C & ~Sc) && Sc
2477 Value *Zero = Constant::getNullValue(Ty: Sc->getType());
2478 Value *MinusOne = Constant::getAllOnesValue(Ty: Sc->getType());
2479 Value *LHS = IRB.CreateICmpNE(LHS: Sc, RHS: Zero);
2480 Value *RHS =
2481 IRB.CreateICmpEQ(LHS: IRB.CreateAnd(LHS: IRB.CreateXor(LHS: Sc, RHS: MinusOne), RHS: C), RHS: Zero);
2482 Value *Si = IRB.CreateAnd(LHS, RHS);
2483 Si->setName("_msprop_icmp");
2484
2485 return Si;
2486 }
2487
2488 // Instrument:
2489 // switch i32 %Val, label %else [ i32 0, label %A
2490 // i32 1, label %B
2491 // i32 2, label %C ]
2492 //
2493 // Typically, the switch input value (%Val) is fully initialized.
2494 //
2495 // Sometimes the compiler may convert (icmp + br) into a switch statement.
2496 // MSan allows icmp eq/ne with partly initialized inputs to still result in a
2497 // fully initialized output, if there exists a bit that is initialized in
2498 // both inputs with a differing value. For compatibility, we support this in
2499 // the switch instrumentation as well. Note that this edge case only applies
2500 // if the switch input value does not match *any* of the cases (matching any
2501 // of the cases requires an exact, fully initialized match).
2502 //
2503 // ShadowCases = 0
2504 // | propagateEqualityComparison(Val, 0)
2505 // | propagateEqualityComparison(Val, 1)
2506 // | propagateEqualityComparison(Val, 2))
2507 void visitSwitchInst(SwitchInst &SI) {
2508 IRBuilder<> IRB(&SI);
2509
2510 Value *Val = SI.getCondition();
2511 Value *ShadowVal = getShadow(V: Val);
2512 // TODO: add fast path - if the condition is fully initialized, we know
2513 // there is no UUM, without needing to consider the case values below.
2514
2515 // Some code (e.g., AMDGPUGenMCCodeEmitter.inc) has tens of thousands of
2516 // cases. This results in an extremely long chained expression for MSan's
2517 // switch instrumentation, which can cause the JumpThreadingPass to have a
2518 // stack overflow or excessive runtime. We limit the number of cases
2519 // considered, with the tradeoff of niche false negatives.
2520 // TODO: figure out a better solution.
2521 int casesToConsider = ClSwitchPrecision;
2522
2523 Value *ShadowCases = nullptr;
2524 for (auto Case : SI.cases()) {
2525 if (casesToConsider <= 0)
2526 break;
2527
2528 Value *Comparator = Case.getCaseValue();
2529 // TODO: some simplification is possible when comparing multiple cases
2530 // simultaneously.
2531 Value *ComparisonShadow = propagateEqualityComparison(
2532 IRB, A: Val, B: Comparator, Sa: ShadowVal, Sb: getShadow(V: Comparator));
2533
2534 if (ShadowCases)
2535 ShadowCases = IRB.CreateOr(LHS: ShadowCases, RHS: ComparisonShadow);
2536 else
2537 ShadowCases = ComparisonShadow;
2538
2539 casesToConsider--;
2540 }
2541
2542 if (ShadowCases)
2543 insertCheckShadow(Shadow: ShadowCases, Origin: getOrigin(V: Val), OrigIns: &SI);
2544 }
2545
2546 // Vector manipulation.
2547 void visitExtractElementInst(ExtractElementInst &I) {
2548 insertCheckShadowOf(Val: I.getOperand(i_nocapture: 1), OrigIns: &I);
2549 IRBuilder<> IRB(&I);
2550 setShadow(V: &I, SV: IRB.CreateExtractElement(Vec: getShadow(I: &I, i: 0), Idx: I.getOperand(i_nocapture: 1),
2551 Name: "_msprop"));
2552 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
2553 }
2554
2555 void visitInsertElementInst(InsertElementInst &I) {
2556 insertCheckShadowOf(Val: I.getOperand(i_nocapture: 2), OrigIns: &I);
2557 IRBuilder<> IRB(&I);
2558 auto *Shadow0 = getShadow(I: &I, i: 0);
2559 auto *Shadow1 = getShadow(I: &I, i: 1);
2560 setShadow(V: &I, SV: IRB.CreateInsertElement(Vec: Shadow0, NewElt: Shadow1, Idx: I.getOperand(i_nocapture: 2),
2561 Name: "_msprop"));
2562 setOriginForNaryOp(I);
2563 }
2564
2565 void visitShuffleVectorInst(ShuffleVectorInst &I) {
2566 IRBuilder<> IRB(&I);
2567 auto *Shadow0 = getShadow(I: &I, i: 0);
2568 auto *Shadow1 = getShadow(I: &I, i: 1);
2569 setShadow(V: &I, SV: IRB.CreateShuffleVector(V1: Shadow0, V2: Shadow1, Mask: I.getShuffleMask(),
2570 Name: "_msprop"));
2571 setOriginForNaryOp(I);
2572 }
2573
2574 // Casts.
2575 void visitSExtInst(SExtInst &I) {
2576 IRBuilder<> IRB(&I);
2577 setShadow(V: &I, SV: IRB.CreateSExt(V: getShadow(I: &I, i: 0), DestTy: I.getType(), Name: "_msprop"));
2578 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
2579 }
2580
2581 void visitZExtInst(ZExtInst &I) {
2582 IRBuilder<> IRB(&I);
2583 setShadow(V: &I, SV: IRB.CreateZExt(V: getShadow(I: &I, i: 0), DestTy: I.getType(), Name: "_msprop"));
2584 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
2585 }
2586
2587 void visitTruncInst(TruncInst &I) {
2588 IRBuilder<> IRB(&I);
2589 setShadow(V: &I, SV: IRB.CreateTrunc(V: getShadow(I: &I, i: 0), DestTy: I.getType(), Name: "_msprop"));
2590 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
2591 }
2592
2593 void visitBitCastInst(BitCastInst &I) {
2594 // Special case: if this is the bitcast (there is exactly 1 allowed) between
2595 // a musttail call and a ret, don't instrument. New instructions are not
2596 // allowed after a musttail call.
2597 if (auto *CI = dyn_cast<CallInst>(Val: I.getOperand(i_nocapture: 0)))
2598 if (CI->isMustTailCall())
2599 return;
2600 IRBuilder<> IRB(&I);
2601 setShadow(V: &I, SV: IRB.CreateBitCast(V: getShadow(I: &I, i: 0), DestTy: getShadowTy(V: &I)));
2602 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
2603 }
2604
2605 void visitPtrToIntInst(PtrToIntInst &I) {
2606 IRBuilder<> IRB(&I);
2607 setShadow(V: &I, SV: IRB.CreateIntCast(V: getShadow(I: &I, i: 0), DestTy: getShadowTy(V: &I), isSigned: false,
2608 Name: "_msprop_ptrtoint"));
2609 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
2610 }
2611
2612 void visitIntToPtrInst(IntToPtrInst &I) {
2613 IRBuilder<> IRB(&I);
2614 setShadow(V: &I, SV: IRB.CreateIntCast(V: getShadow(I: &I, i: 0), DestTy: getShadowTy(V: &I), isSigned: false,
2615 Name: "_msprop_inttoptr"));
2616 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
2617 }
2618
2619 /// Handle LLVM and NEON vector convert intrinsics.
2620 ///
2621 /// e.g., <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float>)
2622 /// i32 @llvm.aarch64.neon.fcvtms.i32.f64 (double)
2623 /// <2 x i32> @fptoui (<2 x float>)
2624 /// i64 @llvm.fptosi.sat.i64.f64(double)
2625 ///
2626 /// Note that the size of input/output elements can differ e.g.,
2627 /// double @sitofp(i32)
2628 /// but the number of elements must be the same.
2629 ///
2630 /// For conversions to or from fixed-point, there is a trailing argument to
2631 /// indicate the fixed-point precision:
2632 /// - <4 x float> llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32)
2633 /// - <4 x i32> llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32)
2634 ///
2635 /// For x86 SSE vector convert intrinsics, see
2636 /// handleSSEVectorConvertIntrinsic().
2637 void handleGenericVectorConvertIntrinsic(Instruction &I, bool FixedPoint) {
2638 [[maybe_unused]] unsigned NumArgs = I.getNumOperands();
2639 if (auto *CI = dyn_cast<CallInst>(Val: &I))
2640 NumArgs = CI->arg_size();
2641
2642 if (FixedPoint) {
2643 assert(NumArgs == 2);
2644 Value *Precision = I.getOperand(i: 1);
2645 insertCheckShadowOf(Val: Precision, OrigIns: &I);
2646 } else {
2647 assert(NumArgs == 1);
2648 }
2649
2650 IRBuilder<> IRB(&I);
2651 Value *S0 = getShadow(I: &I, i: 0);
2652
2653 /// For scalars:
2654 /// Since they are converting from floating-point to integer, the output is
2655 /// - fully uninitialized if *any* bit of the input is uninitialized
2656 /// - fully ininitialized if all bits of the input are ininitialized
2657 /// We apply the same principle on a per-field basis for vectors.
2658 Value *OutShadow = IRB.CreateSExt(V: IRB.CreateICmpNE(LHS: S0, RHS: getCleanShadow(V: S0)),
2659 DestTy: getShadowTy(V: &I));
2660 setShadow(V: &I, SV: OutShadow);
2661 setOriginForNaryOp(I);
2662 }
2663
2664 void visitFPToSIInst(CastInst &I) {
2665 handleGenericVectorConvertIntrinsic(I, /*FixedPoint=*/false);
2666 }
2667 void visitFPToUIInst(CastInst &I) {
2668 handleGenericVectorConvertIntrinsic(I, /*FixedPoint=*/false);
2669 }
2670 void visitSIToFPInst(CastInst &I) {
2671 handleGenericVectorConvertIntrinsic(I, /*FixedPoint=*/false);
2672 }
2673 void visitUIToFPInst(CastInst &I) {
2674 handleGenericVectorConvertIntrinsic(I, /*FixedPoint=*/false);
2675 }
2676 void visitFPExtInst(CastInst &I) { handleShadowOr(I); }
2677 void visitFPTruncInst(CastInst &I) { handleShadowOr(I); }
2678
2679 /// Generic handler to compute shadow for bitwise AND.
2680 ///
2681 /// This is used by 'visitAnd' but also as a primitive for other handlers.
2682 ///
2683 /// This code is precise: it implements the rule that "And" of an initialized
2684 /// zero bit always results in an initialized value:
2685 // 1&1 => 1; 0&1 => 0; p&1 => p;
2686 // 1&0 => 0; 0&0 => 0; p&0 => 0;
2687 // 1&p => p; 0&p => 0; p&p => p;
2688 //
2689 // S = (S1 & S2) | (V1 & S2) | (S1 & V2)
2690 Value *handleBitwiseAnd(IRBuilder<> &IRB, Value *V1, Value *V2, Value *S1,
2691 Value *S2) {
2692 // "The two arguments to the ‘and’ instruction must be integer or vector
2693 // of integer values. Both arguments must have identical types."
2694 //
2695 // We enforce this condition for all callers to handleBitwiseAnd(); callers
2696 // with non-integer types should call CreateAppToShadowCast() themselves.
2697 assert(V1->getType()->isIntOrIntVectorTy());
2698 assert(V1->getType() == V2->getType());
2699
2700 // Conveniently, getShadowTy() of Int/IntVector returns the original type.
2701 assert(V1->getType() == S1->getType());
2702 assert(V2->getType() == S2->getType());
2703
2704 Value *S1S2 = IRB.CreateAnd(LHS: S1, RHS: S2);
2705 Value *V1S2 = IRB.CreateAnd(LHS: V1, RHS: S2);
2706 Value *S1V2 = IRB.CreateAnd(LHS: S1, RHS: V2);
2707
2708 return IRB.CreateOr(Ops: {S1S2, V1S2, S1V2});
2709 }
2710
2711 /// Handler for bitwise AND operator.
2712 void visitAnd(BinaryOperator &I) {
2713 IRBuilder<> IRB(&I);
2714 Value *V1 = I.getOperand(i_nocapture: 0);
2715 Value *V2 = I.getOperand(i_nocapture: 1);
2716 Value *S1 = getShadow(I: &I, i: 0);
2717 Value *S2 = getShadow(I: &I, i: 1);
2718
2719 Value *OutShadow = handleBitwiseAnd(IRB, V1, V2, S1, S2);
2720
2721 setShadow(V: &I, SV: OutShadow);
2722 setOriginForNaryOp(I);
2723 }
2724
2725 void visitOr(BinaryOperator &I) {
2726 IRBuilder<> IRB(&I);
2727 // "Or" of 1 and a poisoned value results in unpoisoned value:
2728 // 1|1 => 1; 0|1 => 1; p|1 => 1;
2729 // 1|0 => 1; 0|0 => 0; p|0 => p;
2730 // 1|p => 1; 0|p => p; p|p => p;
2731 //
2732 // S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
2733 //
2734 // If the "disjoint OR" property is violated, the result is poison, and
2735 // hence the entire shadow is uninitialized:
2736 // S = S | SignExt(V1 & V2 != 0)
2737 Value *S1 = getShadow(I: &I, i: 0);
2738 Value *S2 = getShadow(I: &I, i: 1);
2739 Value *V1 = I.getOperand(i_nocapture: 0);
2740 Value *V2 = I.getOperand(i_nocapture: 1);
2741
2742 // "The two arguments to the ‘or’ instruction must be integer or vector
2743 // of integer values. Both arguments must have identical types."
2744 assert(V1->getType()->isIntOrIntVectorTy());
2745 assert(V1->getType() == V2->getType());
2746
2747 // Conveniently, getShadowTy() of Int/IntVector returns the original type.
2748 assert(V1->getType() == S1->getType());
2749 assert(V2->getType() == S2->getType());
2750
2751 Value *NotV1 = IRB.CreateNot(V: V1);
2752 Value *NotV2 = IRB.CreateNot(V: V2);
2753
2754 Value *S1S2 = IRB.CreateAnd(LHS: S1, RHS: S2);
2755 Value *S2NotV1 = IRB.CreateAnd(LHS: NotV1, RHS: S2);
2756 Value *S1NotV2 = IRB.CreateAnd(LHS: S1, RHS: NotV2);
2757
2758 Value *S = IRB.CreateOr(Ops: {S1S2, S2NotV1, S1NotV2});
2759
2760 if (ClPreciseDisjointOr && cast<PossiblyDisjointInst>(Val: &I)->isDisjoint()) {
2761 Value *V1V2 = IRB.CreateAnd(LHS: V1, RHS: V2);
2762 Value *DisjointOrShadow = IRB.CreateSExt(
2763 V: IRB.CreateICmpNE(LHS: V1V2, RHS: getCleanShadow(V: V1V2)), DestTy: V1V2->getType());
2764 S = IRB.CreateOr(LHS: S, RHS: DisjointOrShadow, Name: "_ms_disjoint");
2765 }
2766
2767 setShadow(V: &I, SV: S);
2768 setOriginForNaryOp(I);
2769 }
2770
2771 /// Default propagation of shadow and/or origin.
2772 ///
2773 /// This class implements the general case of shadow propagation, used in all
2774 /// cases where we don't know and/or don't care about what the operation
2775 /// actually does. It converts all input shadow values to a common type
2776 /// (extending or truncating as necessary), and bitwise OR's them.
2777 ///
2778 /// This is much cheaper than inserting checks (i.e. requiring inputs to be
2779 /// fully initialized), and less prone to false positives.
2780 ///
2781 /// This class also implements the general case of origin propagation. For a
2782 /// Nary operation, result origin is set to the origin of an argument that is
2783 /// not entirely initialized. If there is more than one such arguments, the
2784 /// rightmost of them is picked. It does not matter which one is picked if all
2785 /// arguments are initialized.
2786 template <bool CombineShadow> class Combiner {
2787 Value *Shadow = nullptr;
2788 Value *Origin = nullptr;
2789 IRBuilder<> &IRB;
2790 MemorySanitizerVisitor *MSV;
2791
2792 public:
2793 Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB)
2794 : IRB(IRB), MSV(MSV) {}
2795
2796 /// Add a pair of shadow and origin values to the mix.
2797 Combiner &Add(Value *OpShadow, Value *OpOrigin) {
2798 if (CombineShadow) {
2799 assert(OpShadow);
2800 if (!Shadow)
2801 Shadow = OpShadow;
2802 else {
2803 OpShadow = MSV->CreateShadowCast(IRB, V: OpShadow, dstTy: Shadow->getType());
2804 Shadow = IRB.CreateOr(LHS: Shadow, RHS: OpShadow, Name: "_msprop");
2805 }
2806 }
2807
2808 if (MSV->MS.TrackOrigins) {
2809 assert(OpOrigin);
2810 if (!Origin) {
2811 Origin = OpOrigin;
2812 } else {
2813 Constant *ConstOrigin = dyn_cast<Constant>(Val: OpOrigin);
2814 // No point in adding something that might result in 0 origin value.
2815 if (!ConstOrigin || !ConstOrigin->isNullValue()) {
2816 Value *Cond = MSV->convertToBool(V: OpShadow, IRB);
2817 Origin = IRB.CreateSelect(C: Cond, True: OpOrigin, False: Origin);
2818 }
2819 }
2820 }
2821 return *this;
2822 }
2823
2824 /// Add an application value to the mix.
2825 Combiner &Add(Value *V) {
2826 Value *OpShadow = MSV->getShadow(V);
2827 Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : nullptr;
2828 return Add(OpShadow, OpOrigin);
2829 }
2830
2831 /// Set the current combined values as the given instruction's shadow
2832 /// and origin.
2833 void Done(Instruction *I) {
2834 if (CombineShadow) {
2835 assert(Shadow);
2836 Shadow = MSV->CreateShadowCast(IRB, V: Shadow, dstTy: MSV->getShadowTy(V: I));
2837 MSV->setShadow(V: I, SV: Shadow);
2838 }
2839 if (MSV->MS.TrackOrigins) {
2840 assert(Origin);
2841 MSV->setOrigin(V: I, Origin);
2842 }
2843 }
2844
2845 /// Store the current combined value at the specified origin
2846 /// location.
2847 void DoneAndStoreOrigin(TypeSize TS, Value *OriginPtr) {
2848 if (MSV->MS.TrackOrigins) {
2849 assert(Origin);
2850 MSV->paintOrigin(IRB, Origin, OriginPtr, TS, Alignment: kMinOriginAlignment);
2851 }
2852 }
2853 };
2854
2855 using ShadowAndOriginCombiner = Combiner<true>;
2856 using OriginCombiner = Combiner<false>;
2857
2858 /// Propagate origin for arbitrary operation.
2859 void setOriginForNaryOp(Instruction &I) {
2860 if (!MS.TrackOrigins)
2861 return;
2862 IRBuilder<> IRB(&I);
2863 OriginCombiner OC(this, IRB);
2864 for (Use &Op : I.operands())
2865 OC.Add(V: Op.get());
2866 OC.Done(I: &I);
2867 }
2868
2869 size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) {
2870 assert(!(Ty->isVectorTy() && Ty->getScalarType()->isPointerTy()) &&
2871 "Vector of pointers is not a valid shadow type");
2872 return Ty->isVectorTy() ? cast<FixedVectorType>(Val: Ty)->getNumElements() *
2873 Ty->getScalarSizeInBits()
2874 : Ty->getPrimitiveSizeInBits();
2875 }
2876
2877 /// Cast between two shadow types, extending or truncating as
2878 /// necessary.
2879 Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy,
2880 bool Signed = false) {
2881 Type *srcTy = V->getType();
2882 if (srcTy == dstTy)
2883 return V;
2884 size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(Ty: srcTy);
2885 size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(Ty: dstTy);
2886 if (srcSizeInBits > 1 && dstSizeInBits == 1)
2887 return IRB.CreateICmpNE(LHS: V, RHS: getCleanShadow(V));
2888
2889 if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
2890 return IRB.CreateIntCast(V, DestTy: dstTy, isSigned: Signed);
2891 if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
2892 cast<VectorType>(Val: dstTy)->getElementCount() ==
2893 cast<VectorType>(Val: srcTy)->getElementCount())
2894 return IRB.CreateIntCast(V, DestTy: dstTy, isSigned: Signed);
2895 Value *V1 = IRB.CreateBitCast(V, DestTy: Type::getIntNTy(C&: *MS.C, N: srcSizeInBits));
2896 Value *V2 =
2897 IRB.CreateIntCast(V: V1, DestTy: Type::getIntNTy(C&: *MS.C, N: dstSizeInBits), isSigned: Signed);
2898 return IRB.CreateBitCast(V: V2, DestTy: dstTy);
2899 // TODO: handle struct types.
2900 }
2901
2902 /// Cast an application value to the type of its own shadow.
2903 Value *CreateAppToShadowCast(IRBuilder<> &IRB, Value *V) {
2904 Type *ShadowTy = getShadowTy(V);
2905 if (V->getType() == ShadowTy)
2906 return V;
2907 if (V->getType()->isPtrOrPtrVectorTy())
2908 return IRB.CreatePtrToInt(V, DestTy: ShadowTy);
2909 else
2910 return IRB.CreateBitCast(V, DestTy: ShadowTy);
2911 }
2912
2913 /// Propagate shadow for arbitrary operation.
2914 void handleShadowOr(Instruction &I) {
2915 IRBuilder<> IRB(&I);
2916 ShadowAndOriginCombiner SC(this, IRB);
2917 for (Use &Op : I.operands())
2918 SC.Add(V: Op.get());
2919 SC.Done(I: &I);
2920 }
2921
2922 // Perform a bitwise OR on the horizontal pairs (or other specified grouping)
2923 // of elements.
2924 //
2925 // For example, suppose we have:
2926 // VectorA: <a0, a1, a2, a3, a4, a5>
2927 // VectorB: <b0, b1, b2, b3, b4, b5>
2928 // ReductionFactor: 3
2929 // Shards: 1
2930 // The output would be:
2931 // <a0|a1|a2, a3|a4|a5, b0|b1|b2, b3|b4|b5>
2932 //
2933 // If we have:
2934 // VectorA: <a0, a1, a2, a3, a4, a5, a6, a7>
2935 // VectorB: <b0, b1, b2, b3, b4, b5, b6, b7>
2936 // ReductionFactor: 2
2937 // Shards: 2
2938 // then a and be each have 2 "shards", resulting in the output being
2939 // interleaved:
2940 // <a0|a1, a2|a3, b0|b1, b2|b3, a4|a5, a6|a7, b4|b5, b6|b7>
2941 //
2942 // This is convenient for instrumenting horizontal add/sub.
2943 // For bitwise OR on "vertical" pairs, see maybeHandleSimpleNomemIntrinsic().
2944 Value *horizontalReduce(IntrinsicInst &I, unsigned ReductionFactor,
2945 unsigned Shards, Value *VectorA, Value *VectorB) {
2946 assert(isa<FixedVectorType>(VectorA->getType()));
2947 unsigned NumElems =
2948 cast<FixedVectorType>(Val: VectorA->getType())->getNumElements();
2949
2950 [[maybe_unused]] unsigned TotalNumElems = NumElems;
2951 if (VectorB) {
2952 assert(VectorA->getType() == VectorB->getType());
2953 TotalNumElems *= 2;
2954 }
2955
2956 assert(NumElems % (ReductionFactor * Shards) == 0);
2957
2958 Value *Or = nullptr;
2959
2960 IRBuilder<> IRB(&I);
2961 for (unsigned i = 0; i < ReductionFactor; i++) {
2962 SmallVector<int, 16> Mask;
2963
2964 for (unsigned j = 0; j < Shards; j++) {
2965 unsigned Offset = NumElems / Shards * j;
2966
2967 for (unsigned X = 0; X < NumElems / Shards; X += ReductionFactor)
2968 Mask.push_back(Elt: Offset + X + i);
2969
2970 if (VectorB) {
2971 for (unsigned X = 0; X < NumElems / Shards; X += ReductionFactor)
2972 Mask.push_back(Elt: NumElems + Offset + X + i);
2973 }
2974 }
2975
2976 Value *Masked;
2977 if (VectorB)
2978 Masked = IRB.CreateShuffleVector(V1: VectorA, V2: VectorB, Mask);
2979 else
2980 Masked = IRB.CreateShuffleVector(V: VectorA, Mask);
2981
2982 if (Or)
2983 Or = IRB.CreateOr(LHS: Or, RHS: Masked);
2984 else
2985 Or = Masked;
2986 }
2987
2988 return Or;
2989 }
2990
2991 /// Propagate shadow for 1- or 2-vector intrinsics that combine adjacent
2992 /// fields.
2993 ///
2994 /// e.g., <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16>)
2995 /// <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8>, <16 x i8>)
2996 void handlePairwiseShadowOrIntrinsic(IntrinsicInst &I, unsigned Shards) {
2997 assert(I.arg_size() == 1 || I.arg_size() == 2);
2998
2999 assert(I.getType()->isVectorTy());
3000 assert(I.getArgOperand(0)->getType()->isVectorTy());
3001
3002 [[maybe_unused]] FixedVectorType *ParamType =
3003 cast<FixedVectorType>(Val: I.getArgOperand(i: 0)->getType());
3004 assert((I.arg_size() != 2) ||
3005 (ParamType == cast<FixedVectorType>(I.getArgOperand(1)->getType())));
3006 [[maybe_unused]] FixedVectorType *ReturnType =
3007 cast<FixedVectorType>(Val: I.getType());
3008 assert(ParamType->getNumElements() * I.arg_size() ==
3009 2 * ReturnType->getNumElements());
3010
3011 IRBuilder<> IRB(&I);
3012
3013 // Horizontal OR of shadow
3014 Value *FirstArgShadow = getShadow(I: &I, i: 0);
3015 Value *SecondArgShadow = nullptr;
3016 if (I.arg_size() == 2)
3017 SecondArgShadow = getShadow(I: &I, i: 1);
3018
3019 Value *OrShadow = horizontalReduce(I, /*ReductionFactor=*/2, Shards,
3020 VectorA: FirstArgShadow, VectorB: SecondArgShadow);
3021
3022 OrShadow = CreateShadowCast(IRB, V: OrShadow, dstTy: getShadowTy(V: &I));
3023
3024 setShadow(V: &I, SV: OrShadow);
3025 setOriginForNaryOp(I);
3026 }
3027
3028 /// Propagate shadow for 1- or 2-vector intrinsics that combine adjacent
3029 /// fields, with the parameters reinterpreted to have elements of a specified
3030 /// width. For example:
3031 /// @llvm.x86.ssse3.phadd.w(<1 x i64> [[VAR1]], <1 x i64> [[VAR2]])
3032 /// conceptually operates on
3033 /// (<4 x i16> [[VAR1]], <4 x i16> [[VAR2]])
3034 /// and can be handled with ReinterpretElemWidth == 16.
3035 void handlePairwiseShadowOrIntrinsic(IntrinsicInst &I, unsigned Shards,
3036 int ReinterpretElemWidth) {
3037 assert(I.arg_size() == 1 || I.arg_size() == 2);
3038
3039 assert(I.getType()->isVectorTy());
3040 assert(I.getArgOperand(0)->getType()->isVectorTy());
3041
3042 FixedVectorType *ParamType =
3043 cast<FixedVectorType>(Val: I.getArgOperand(i: 0)->getType());
3044 assert((I.arg_size() != 2) ||
3045 (ParamType == cast<FixedVectorType>(I.getArgOperand(1)->getType())));
3046
3047 [[maybe_unused]] FixedVectorType *ReturnType =
3048 cast<FixedVectorType>(Val: I.getType());
3049 assert(ParamType->getNumElements() * I.arg_size() ==
3050 2 * ReturnType->getNumElements());
3051
3052 IRBuilder<> IRB(&I);
3053
3054 FixedVectorType *ReinterpretShadowTy = nullptr;
3055 assert(isAligned(Align(ReinterpretElemWidth),
3056 ParamType->getPrimitiveSizeInBits()));
3057 ReinterpretShadowTy = FixedVectorType::get(
3058 ElementType: IRB.getIntNTy(N: ReinterpretElemWidth),
3059 NumElts: ParamType->getPrimitiveSizeInBits() / ReinterpretElemWidth);
3060
3061 // Horizontal OR of shadow
3062 Value *FirstArgShadow = getShadow(I: &I, i: 0);
3063 FirstArgShadow = IRB.CreateBitCast(V: FirstArgShadow, DestTy: ReinterpretShadowTy);
3064
3065 // If we had two parameters each with an odd number of elements, the total
3066 // number of elements is even, but we have never seen this in extant
3067 // instruction sets, so we enforce that each parameter must have an even
3068 // number of elements.
3069 assert(isAligned(
3070 Align(2),
3071 cast<FixedVectorType>(FirstArgShadow->getType())->getNumElements()));
3072
3073 Value *SecondArgShadow = nullptr;
3074 if (I.arg_size() == 2) {
3075 SecondArgShadow = getShadow(I: &I, i: 1);
3076 SecondArgShadow = IRB.CreateBitCast(V: SecondArgShadow, DestTy: ReinterpretShadowTy);
3077 }
3078
3079 Value *OrShadow = horizontalReduce(I, /*ReductionFactor=*/2, Shards,
3080 VectorA: FirstArgShadow, VectorB: SecondArgShadow);
3081
3082 OrShadow = CreateShadowCast(IRB, V: OrShadow, dstTy: getShadowTy(V: &I));
3083
3084 setShadow(V: &I, SV: OrShadow);
3085 setOriginForNaryOp(I);
3086 }
3087
3088 void visitFNeg(UnaryOperator &I) { handleShadowOr(I); }
3089
3090 // Handle multiplication by constant.
3091 //
3092 // Handle a special case of multiplication by constant that may have one or
3093 // more zeros in the lower bits. This makes corresponding number of lower bits
3094 // of the result zero as well. We model it by shifting the other operand
3095 // shadow left by the required number of bits. Effectively, we transform
3096 // (X * (A * 2**B)) to ((X << B) * A) and instrument (X << B) as (Sx << B).
3097 // We use multiplication by 2**N instead of shift to cover the case of
3098 // multiplication by 0, which may occur in some elements of a vector operand.
3099 void handleMulByConstant(BinaryOperator &I, Constant *ConstArg,
3100 Value *OtherArg) {
3101 Constant *ShadowMul;
3102 Type *Ty = ConstArg->getType();
3103 if (auto *VTy = dyn_cast<VectorType>(Val: Ty)) {
3104 unsigned NumElements = cast<FixedVectorType>(Val: VTy)->getNumElements();
3105 Type *EltTy = VTy->getElementType();
3106 SmallVector<Constant *, 16> Elements;
3107 for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
3108 if (ConstantInt *Elt =
3109 dyn_cast<ConstantInt>(Val: ConstArg->getAggregateElement(Elt: Idx))) {
3110 const APInt &V = Elt->getValue();
3111 APInt V2 = APInt(V.getBitWidth(), 1) << V.countr_zero();
3112 Elements.push_back(Elt: ConstantInt::get(Ty: EltTy, V: V2));
3113 } else {
3114 Elements.push_back(Elt: ConstantInt::get(Ty: EltTy, V: 1));
3115 }
3116 }
3117 ShadowMul = ConstantVector::get(V: Elements);
3118 } else {
3119 if (ConstantInt *Elt = dyn_cast<ConstantInt>(Val: ConstArg)) {
3120 const APInt &V = Elt->getValue();
3121 APInt V2 = APInt(V.getBitWidth(), 1) << V.countr_zero();
3122 ShadowMul = ConstantInt::get(Ty, V: V2);
3123 } else {
3124 ShadowMul = ConstantInt::get(Ty, V: 1);
3125 }
3126 }
3127
3128 IRBuilder<> IRB(&I);
3129 setShadow(V: &I,
3130 SV: IRB.CreateMul(LHS: getShadow(V: OtherArg), RHS: ShadowMul, Name: "msprop_mul_cst"));
3131 setOrigin(V: &I, Origin: getOrigin(V: OtherArg));
3132 }
3133
3134 void visitMul(BinaryOperator &I) {
3135 Constant *constOp0 = dyn_cast<Constant>(Val: I.getOperand(i_nocapture: 0));
3136 Constant *constOp1 = dyn_cast<Constant>(Val: I.getOperand(i_nocapture: 1));
3137 if (constOp0 && !constOp1)
3138 handleMulByConstant(I, ConstArg: constOp0, OtherArg: I.getOperand(i_nocapture: 1));
3139 else if (constOp1 && !constOp0)
3140 handleMulByConstant(I, ConstArg: constOp1, OtherArg: I.getOperand(i_nocapture: 0));
3141 else
3142 handleShadowOr(I);
3143 }
3144
3145 void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
3146 void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
3147 void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
3148 void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
3149 void visitSub(BinaryOperator &I) { handleShadowOr(I); }
3150 void visitXor(BinaryOperator &I) { handleShadowOr(I); }
3151
3152 void handleIntegerDiv(Instruction &I) {
3153 IRBuilder<> IRB(&I);
3154 // Strict on the second argument.
3155 insertCheckShadowOf(Val: I.getOperand(i: 1), OrigIns: &I);
3156 setShadow(V: &I, SV: getShadow(I: &I, i: 0));
3157 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
3158 }
3159
3160 void visitUDiv(BinaryOperator &I) { handleIntegerDiv(I); }
3161 void visitSDiv(BinaryOperator &I) { handleIntegerDiv(I); }
3162 void visitURem(BinaryOperator &I) { handleIntegerDiv(I); }
3163 void visitSRem(BinaryOperator &I) { handleIntegerDiv(I); }
3164
3165 // Floating point division is side-effect free. We can not require that the
3166 // divisor is fully initialized and must propagate shadow. See PR37523.
3167 void visitFDiv(BinaryOperator &I) { handleShadowOr(I); }
3168 void visitFRem(BinaryOperator &I) { handleShadowOr(I); }
3169
3170 /// Instrument == and != comparisons.
3171 ///
3172 /// Sometimes the comparison result is known even if some of the bits of the
3173 /// arguments are not.
3174 void handleEqualityComparison(ICmpInst &I) {
3175 IRBuilder<> IRB(&I);
3176 Value *A = I.getOperand(i_nocapture: 0);
3177 Value *B = I.getOperand(i_nocapture: 1);
3178 Value *Sa = getShadow(V: A);
3179 Value *Sb = getShadow(V: B);
3180
3181 Value *Si = propagateEqualityComparison(IRB, A, B, Sa, Sb);
3182
3183 setShadow(V: &I, SV: Si);
3184 setOriginForNaryOp(I);
3185 }
3186
3187 /// Instrument relational comparisons.
3188 ///
3189 /// This function does exact shadow propagation for all relational
3190 /// comparisons of integers, pointers and vectors of those.
3191 /// FIXME: output seems suboptimal when one of the operands is a constant
3192 void handleRelationalComparisonExact(ICmpInst &I) {
3193 IRBuilder<> IRB(&I);
3194 Value *A = I.getOperand(i_nocapture: 0);
3195 Value *B = I.getOperand(i_nocapture: 1);
3196 Value *Sa = getShadow(V: A);
3197 Value *Sb = getShadow(V: B);
3198
3199 // Get rid of pointers and vectors of pointers.
3200 // For ints (and vectors of ints), types of A and Sa match,
3201 // and this is a no-op.
3202 A = IRB.CreatePointerCast(V: A, DestTy: Sa->getType());
3203 B = IRB.CreatePointerCast(V: B, DestTy: Sb->getType());
3204
3205 // Let [a0, a1] be the interval of possible values of A, taking into account
3206 // its undefined bits. Let [b0, b1] be the interval of possible values of B.
3207 // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0).
3208 bool IsSigned = I.isSigned();
3209
3210 auto GetMinMaxUnsigned = [&](Value *V, Value *S) {
3211 if (IsSigned) {
3212 // Sign-flip to map from signed range to unsigned range. Relation A vs B
3213 // should be preserved, if checked with `getUnsignedPredicate()`.
3214 // Relationship between Amin, Amax, Bmin, Bmax also will not be
3215 // affected, as they are created by effectively adding/substructing from
3216 // A (or B) a value, derived from shadow, with no overflow, either
3217 // before or after sign flip.
3218 APInt MinVal =
3219 APInt::getSignedMinValue(numBits: V->getType()->getScalarSizeInBits());
3220 V = IRB.CreateXor(LHS: V, RHS: ConstantInt::get(Ty: V->getType(), V: MinVal));
3221 }
3222 // Minimize undefined bits.
3223 Value *Min = IRB.CreateAnd(LHS: V, RHS: IRB.CreateNot(V: S));
3224 Value *Max = IRB.CreateOr(LHS: V, RHS: S);
3225 return std::make_pair(x&: Min, y&: Max);
3226 };
3227
3228 auto [Amin, Amax] = GetMinMaxUnsigned(A, Sa);
3229 auto [Bmin, Bmax] = GetMinMaxUnsigned(B, Sb);
3230 Value *S1 = IRB.CreateICmp(P: I.getUnsignedPredicate(), LHS: Amin, RHS: Bmax);
3231 Value *S2 = IRB.CreateICmp(P: I.getUnsignedPredicate(), LHS: Amax, RHS: Bmin);
3232
3233 Value *Si = IRB.CreateXor(LHS: S1, RHS: S2);
3234 setShadow(V: &I, SV: Si);
3235 setOriginForNaryOp(I);
3236 }
3237
3238 /// Instrument signed relational comparisons.
3239 ///
3240 /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest
3241 /// bit of the shadow. Everything else is delegated to handleShadowOr().
3242 void handleSignedRelationalComparison(ICmpInst &I) {
3243 Constant *constOp;
3244 Value *op = nullptr;
3245 CmpInst::Predicate pre;
3246 if ((constOp = dyn_cast<Constant>(Val: I.getOperand(i_nocapture: 1)))) {
3247 op = I.getOperand(i_nocapture: 0);
3248 pre = I.getPredicate();
3249 } else if ((constOp = dyn_cast<Constant>(Val: I.getOperand(i_nocapture: 0)))) {
3250 op = I.getOperand(i_nocapture: 1);
3251 pre = I.getSwappedPredicate();
3252 } else {
3253 handleShadowOr(I);
3254 return;
3255 }
3256
3257 if ((constOp->isNullValue() &&
3258 (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) ||
3259 (constOp->isAllOnesValue() &&
3260 (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) {
3261 IRBuilder<> IRB(&I);
3262 Value *Shadow = IRB.CreateICmpSLT(LHS: getShadow(V: op), RHS: getCleanShadow(V: op),
3263 Name: "_msprop_icmp_s");
3264 setShadow(V: &I, SV: Shadow);
3265 setOrigin(V: &I, Origin: getOrigin(V: op));
3266 } else {
3267 handleShadowOr(I);
3268 }
3269 }
3270
3271 void visitICmpInst(ICmpInst &I) {
3272 if (!ClHandleICmp) {
3273 handleShadowOr(I);
3274 return;
3275 }
3276 if (I.isEquality()) {
3277 handleEqualityComparison(I);
3278 return;
3279 }
3280
3281 assert(I.isRelational());
3282 if (ClHandleICmpExact) {
3283 handleRelationalComparisonExact(I);
3284 return;
3285 }
3286 if (I.isSigned()) {
3287 handleSignedRelationalComparison(I);
3288 return;
3289 }
3290
3291 assert(I.isUnsigned());
3292 if ((isa<Constant>(Val: I.getOperand(i_nocapture: 0)) || isa<Constant>(Val: I.getOperand(i_nocapture: 1)))) {
3293 handleRelationalComparisonExact(I);
3294 return;
3295 }
3296
3297 handleShadowOr(I);
3298 }
3299
3300 void visitFCmpInst(FCmpInst &I) { handleShadowOr(I); }
3301
3302 void handleShift(BinaryOperator &I) {
3303 IRBuilder<> IRB(&I);
3304 // If any of the S2 bits are poisoned, the whole thing is poisoned.
3305 // Otherwise perform the same shift on S1.
3306 Value *S1 = getShadow(I: &I, i: 0);
3307 Value *S2 = getShadow(I: &I, i: 1);
3308 Value *S2Conv =
3309 IRB.CreateSExt(V: IRB.CreateICmpNE(LHS: S2, RHS: getCleanShadow(V: S2)), DestTy: S2->getType());
3310 Value *V2 = I.getOperand(i_nocapture: 1);
3311 Value *Shift = IRB.CreateBinOp(Opc: I.getOpcode(), LHS: S1, RHS: V2);
3312 setShadow(V: &I, SV: IRB.CreateOr(LHS: Shift, RHS: S2Conv));
3313 setOriginForNaryOp(I);
3314 }
3315
3316 void visitShl(BinaryOperator &I) { handleShift(I); }
3317 void visitAShr(BinaryOperator &I) { handleShift(I); }
3318 void visitLShr(BinaryOperator &I) { handleShift(I); }
3319
3320 void handleFunnelShift(IntrinsicInst &I) {
3321 IRBuilder<> IRB(&I);
3322 // If any of the S2 bits are poisoned, the whole thing is poisoned.
3323 // Otherwise perform the same shift on S0 and S1.
3324 Value *S0 = getShadow(I: &I, i: 0);
3325 Value *S1 = getShadow(I: &I, i: 1);
3326 Value *S2 = getShadow(I: &I, i: 2);
3327 Value *S2Conv =
3328 IRB.CreateSExt(V: IRB.CreateICmpNE(LHS: S2, RHS: getCleanShadow(V: S2)), DestTy: S2->getType());
3329 Value *V2 = I.getOperand(i_nocapture: 2);
3330 Value *Shift = IRB.CreateIntrinsic(ID: I.getIntrinsicID(), OverloadTypes: S2Conv->getType(),
3331 Args: {S0, S1, V2});
3332 setShadow(V: &I, SV: IRB.CreateOr(LHS: Shift, RHS: S2Conv));
3333 setOriginForNaryOp(I);
3334 }
3335
3336 // Instrument bit manipulation intrinsics.
3337 // All of these intrinsics are Z = I(SRC, MASK)
3338 // where the types of all operands and the result match.
3339 // The following instrumentation happens to work for all of them:
3340 // Sz = I(Ssrc, MASK) | (sext (Smask != 0))
3341 void handleGenericBitManipulation(IntrinsicInst &I) {
3342 IRBuilder<> IRB(&I);
3343 Type *ShadowTy = getShadowTy(V: &I);
3344
3345 // If any bit of the mask operand is poisoned, then the whole thing is.
3346 Value *SMask = getShadow(I: &I, i: 1);
3347 SMask = IRB.CreateSExt(V: IRB.CreateICmpNE(LHS: SMask, RHS: getCleanShadow(OrigTy: ShadowTy)),
3348 DestTy: ShadowTy);
3349 // Apply the same intrinsic to the shadow of the first operand.
3350 Value *S;
3351 if (Function *Func = I.getCalledFunction())
3352 S = IRB.CreateCall(Callee: Func, Args: {getShadow(I: &I, i: 0), I.getOperand(i_nocapture: 1)});
3353 else
3354 S = IRB.CreateIntrinsic(ID: I.getIntrinsicID(), OverloadTypes: ShadowTy,
3355 Args: {getShadow(I: &I, i: 0), I.getOperand(i_nocapture: 1)});
3356
3357 setShadow(V: &I, SV: IRB.CreateOr(LHS: SMask, RHS: S));
3358 setOriginForNaryOp(I);
3359 }
3360
3361 /// Instrument llvm.memmove
3362 ///
3363 /// At this point we don't know if llvm.memmove will be inlined or not.
3364 /// If we don't instrument it and it gets inlined,
3365 /// our interceptor will not kick in and we will lose the memmove.
3366 /// If we instrument the call here, but it does not get inlined,
3367 /// we will memmove the shadow twice: which is bad in case
3368 /// of overlapping regions. So, we simply lower the intrinsic to a call.
3369 ///
3370 /// Similar situation exists for memcpy and memset.
3371 void visitMemMoveInst(MemMoveInst &I) {
3372 getShadow(V: I.getArgOperand(i: 1)); // Ensure shadow initialized
3373 IRBuilder<> IRB(&I);
3374 IRB.CreateCall(Callee: MS.MemmoveFn,
3375 Args: {I.getArgOperand(i: 0), I.getArgOperand(i: 1),
3376 IRB.CreateIntCast(V: I.getArgOperand(i: 2), DestTy: MS.IntptrTy, isSigned: false)});
3377 I.eraseFromParent();
3378 }
3379
3380 /// Instrument memcpy
3381 ///
3382 /// Similar to memmove: avoid copying shadow twice. This is somewhat
3383 /// unfortunate as it may slowdown small constant memcpys.
3384 /// FIXME: consider doing manual inline for small constant sizes and proper
3385 /// alignment.
3386 ///
3387 /// Note: This also handles memcpy.inline, which promises no calls to external
3388 /// functions as an optimization. However, with instrumentation enabled this
3389 /// is difficult to promise; additionally, we know that the MSan runtime
3390 /// exists and provides __msan_memcpy(). Therefore, we assume that with
3391 /// instrumentation it's safe to turn memcpy.inline into a call to
3392 /// __msan_memcpy(). Should this be wrong, such as when implementing memcpy()
3393 /// itself, instrumentation should be disabled with the no_sanitize attribute.
3394 void visitMemCpyInst(MemCpyInst &I) {
3395 getShadow(V: I.getArgOperand(i: 1)); // Ensure shadow initialized
3396 IRBuilder<> IRB(&I);
3397 IRB.CreateCall(Callee: MS.MemcpyFn,
3398 Args: {I.getArgOperand(i: 0), I.getArgOperand(i: 1),
3399 IRB.CreateIntCast(V: I.getArgOperand(i: 2), DestTy: MS.IntptrTy, isSigned: false)});
3400 I.eraseFromParent();
3401 }
3402
3403 // Same as memcpy.
3404 void visitMemSetInst(MemSetInst &I) {
3405 IRBuilder<> IRB(&I);
3406 IRB.CreateCall(
3407 Callee: MS.MemsetFn,
3408 Args: {I.getArgOperand(i: 0),
3409 IRB.CreateIntCast(V: I.getArgOperand(i: 1), DestTy: IRB.getInt32Ty(), isSigned: false),
3410 IRB.CreateIntCast(V: I.getArgOperand(i: 2), DestTy: MS.IntptrTy, isSigned: false)});
3411 I.eraseFromParent();
3412 }
3413
3414 void visitVAStartInst(VAStartInst &I) { VAHelper->visitVAStartInst(I); }
3415
3416 void visitVACopyInst(VACopyInst &I) { VAHelper->visitVACopyInst(I); }
3417
3418 /// Handle vector store-like intrinsics.
3419 ///
3420 /// Instrument intrinsics that look like a simple SIMD store: writes memory,
3421 /// has 1 pointer argument and 1 vector argument, returns void.
3422 bool handleVectorStoreIntrinsic(IntrinsicInst &I) {
3423 assert(I.arg_size() == 2);
3424
3425 IRBuilder<> IRB(&I);
3426 Value *Addr = I.getArgOperand(i: 0);
3427 Value *Shadow = getShadow(I: &I, i: 1);
3428 Value *ShadowPtr, *OriginPtr;
3429
3430 // We don't know the pointer alignment (could be unaligned SSE store!).
3431 // Have to assume to worst case.
3432 std::tie(args&: ShadowPtr, args&: OriginPtr) = getShadowOriginPtr(
3433 Addr, IRB, ShadowTy: Shadow->getType(), Alignment: Align(1), /*isStore*/ true);
3434 IRB.CreateAlignedStore(Val: Shadow, Ptr: ShadowPtr, Align: Align(1));
3435
3436 if (ClCheckAccessAddress)
3437 insertCheckShadowOf(Val: Addr, OrigIns: &I);
3438
3439 // FIXME: factor out common code from materializeStores
3440 if (MS.TrackOrigins)
3441 IRB.CreateStore(Val: getOrigin(I: &I, i: 1), Ptr: OriginPtr);
3442 return true;
3443 }
3444
3445 /// Handle vector load-like intrinsics.
3446 ///
3447 /// Instrument intrinsics that look like a simple SIMD load: reads memory,
3448 /// has 1 pointer argument, returns a vector.
3449 bool handleVectorLoadIntrinsic(IntrinsicInst &I) {
3450 assert(I.arg_size() == 1);
3451
3452 IRBuilder<> IRB(&I);
3453 Value *Addr = I.getArgOperand(i: 0);
3454
3455 Type *ShadowTy = getShadowTy(V: &I);
3456 Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
3457 if (PropagateShadow) {
3458 // We don't know the pointer alignment (could be unaligned SSE load!).
3459 // Have to assume to worst case.
3460 const Align Alignment = Align(1);
3461 std::tie(args&: ShadowPtr, args&: OriginPtr) =
3462 getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
3463 setShadow(V: &I,
3464 SV: IRB.CreateAlignedLoad(Ty: ShadowTy, Ptr: ShadowPtr, Align: Alignment, Name: "_msld"));
3465 } else {
3466 setShadow(V: &I, SV: getCleanShadow(V: &I));
3467 }
3468
3469 if (ClCheckAccessAddress)
3470 insertCheckShadowOf(Val: Addr, OrigIns: &I);
3471
3472 if (MS.TrackOrigins) {
3473 if (PropagateShadow)
3474 setOrigin(V: &I, Origin: IRB.CreateLoad(Ty: MS.OriginTy, Ptr: OriginPtr));
3475 else
3476 setOrigin(V: &I, Origin: getCleanOrigin());
3477 }
3478 return true;
3479 }
3480
3481 /// Handle (SIMD arithmetic)-like intrinsics.
3482 ///
3483 /// Instrument intrinsics with any number of arguments of the same type [*],
3484 /// equal to the return type, plus a specified number of trailing flags of
3485 /// any type.
3486 ///
3487 /// [*] The type should be simple (no aggregates or pointers; vectors are
3488 /// fine).
3489 ///
3490 /// Caller guarantees that this intrinsic does not access memory.
3491 ///
3492 /// TODO: "horizontal"/"pairwise" intrinsics are often incorrectly matched by
3493 /// by this handler. See horizontalReduce().
3494 ///
3495 /// TODO: permutation intrinsics are also often incorrectly matched.
3496 [[maybe_unused]] bool
3497 maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I,
3498 unsigned int trailingFlags) {
3499 Type *RetTy = I.getType();
3500 if (!(RetTy->isIntOrIntVectorTy() || RetTy->isFPOrFPVectorTy()))
3501 return false;
3502
3503 unsigned NumArgOperands = I.arg_size();
3504 assert(NumArgOperands >= trailingFlags);
3505 for (unsigned i = 0; i < NumArgOperands - trailingFlags; ++i) {
3506 Type *Ty = I.getArgOperand(i)->getType();
3507 if (Ty != RetTy)
3508 return false;
3509 }
3510
3511 IRBuilder<> IRB(&I);
3512 ShadowAndOriginCombiner SC(this, IRB);
3513 for (unsigned i = 0; i < NumArgOperands; ++i)
3514 SC.Add(V: I.getArgOperand(i));
3515 SC.Done(I: &I);
3516
3517 return true;
3518 }
3519
3520 /// Returns whether it was able to heuristically instrument unknown
3521 /// intrinsics.
3522 ///
3523 /// The main purpose of this code is to do something reasonable with all
3524 /// random intrinsics we might encounter, most importantly - SIMD intrinsics.
3525 /// We recognize several classes of intrinsics by their argument types and
3526 /// ModRefBehaviour and apply special instrumentation when we are reasonably
3527 /// sure that we know what the intrinsic does.
3528 ///
3529 /// We special-case intrinsics where this approach fails. See llvm.bswap
3530 /// handling as an example of that.
3531 bool maybeHandleUnknownIntrinsicUnlogged(IntrinsicInst &I) {
3532 unsigned NumArgOperands = I.arg_size();
3533 if (NumArgOperands == 0)
3534 return false;
3535
3536 if (NumArgOperands == 2 && I.getArgOperand(i: 0)->getType()->isPointerTy() &&
3537 I.getArgOperand(i: 1)->getType()->isVectorTy() &&
3538 I.getType()->isVoidTy() && !I.onlyReadsMemory()) {
3539 // This looks like a vector store.
3540 return handleVectorStoreIntrinsic(I);
3541 }
3542
3543 if (NumArgOperands == 1 && I.getArgOperand(i: 0)->getType()->isPointerTy() &&
3544 I.getType()->isVectorTy() && I.onlyReadsMemory()) {
3545 // This looks like a vector load.
3546 return handleVectorLoadIntrinsic(I);
3547 }
3548
3549 if (I.doesNotAccessMemory())
3550 if (maybeHandleSimpleNomemIntrinsic(I, /*trailingFlags=*/0))
3551 return true;
3552
3553 // FIXME: detect and handle SSE maskstore/maskload?
3554 // Some cases are now handled in handleAVXMasked{Load,Store}.
3555 return false;
3556 }
3557
3558 bool maybeHandleUnknownIntrinsic(IntrinsicInst &I) {
3559 if (maybeHandleUnknownIntrinsicUnlogged(I)) {
3560 if (ClDumpHeuristicInstructions)
3561 dumpInst(I, Prefix: "Heuristic");
3562
3563 LLVM_DEBUG(dbgs() << "UNKNOWN INSTRUCTION HANDLED HEURISTICALLY: " << I
3564 << "\n");
3565 return true;
3566 } else
3567 return false;
3568 }
3569
3570 void handleInvariantGroup(IntrinsicInst &I) {
3571 setShadow(V: &I, SV: getShadow(I: &I, i: 0));
3572 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
3573 }
3574
3575 void handleLifetimeStart(IntrinsicInst &I) {
3576 if (!PoisonStack)
3577 return;
3578 AllocaInst *AI = dyn_cast<AllocaInst>(Val: I.getArgOperand(i: 0));
3579 if (AI)
3580 LifetimeStartList.push_back(Elt: std::make_pair(x: &I, y&: AI));
3581 }
3582
3583 void handleBswap(IntrinsicInst &I) {
3584 IRBuilder<> IRB(&I);
3585 Value *Op = I.getArgOperand(i: 0);
3586 Type *OpType = Op->getType();
3587 setShadow(V: &I, SV: IRB.CreateIntrinsic(ID: Intrinsic::bswap, OverloadTypes: ArrayRef(&OpType, 1),
3588 Args: getShadow(V: Op)));
3589 setOrigin(V: &I, Origin: getOrigin(V: Op));
3590 }
3591
3592 // Uninitialized bits are ok if they appear after the leading/trailing 0's
3593 // and a 1. If the input is all zero, it is fully initialized iff
3594 // !is_zero_poison.
3595 //
3596 // e.g., for ctlz, with little-endian, if 0/1 are initialized bits with
3597 // concrete value 0/1, and ? is an uninitialized bit:
3598 // - 0001 0??? is fully initialized
3599 // - 000? ???? is fully uninitialized (*)
3600 // - ???? ???? is fully uninitialized
3601 // - 0000 0000 is fully uninitialized if is_zero_poison,
3602 // fully initialized otherwise
3603 //
3604 // (*) TODO: arguably, since the number of zeros is in the range [3, 8], we
3605 // only need to poison 4 bits.
3606 //
3607 // OutputShadow =
3608 // ((ConcreteZerosCount >= ShadowZerosCount) && !AllZeroShadow)
3609 // || (is_zero_poison && AllZeroSrc)
3610 void handleCountLeadingTrailingZeros(IntrinsicInst &I) {
3611 IRBuilder<> IRB(&I);
3612 Value *Src = I.getArgOperand(i: 0);
3613 Value *SrcShadow = getShadow(V: Src);
3614
3615 Value *False = IRB.getInt1(V: false);
3616 Value *ConcreteZerosCount = IRB.CreateIntrinsic(
3617 RetTy: I.getType(), ID: I.getIntrinsicID(), Args: {Src, /*is_zero_poison=*/False});
3618 Value *ShadowZerosCount = IRB.CreateIntrinsic(
3619 RetTy: I.getType(), ID: I.getIntrinsicID(), Args: {SrcShadow, /*is_zero_poison=*/False});
3620
3621 Value *CompareConcreteZeros = IRB.CreateICmpUGE(
3622 LHS: ConcreteZerosCount, RHS: ShadowZerosCount, Name: "_mscz_cmp_zeros");
3623
3624 Value *NotAllZeroShadow =
3625 IRB.CreateIsNotNull(Arg: SrcShadow, Name: "_mscz_shadow_not_null");
3626 Value *OutputShadow =
3627 IRB.CreateAnd(LHS: CompareConcreteZeros, RHS: NotAllZeroShadow, Name: "_mscz_main");
3628
3629 // If zero poison is requested, mix in with the shadow
3630 Constant *IsZeroPoison = cast<Constant>(Val: I.getOperand(i_nocapture: 1));
3631 if (!IsZeroPoison->isNullValue()) {
3632 Value *BoolZeroPoison = IRB.CreateIsNull(Arg: Src, Name: "_mscz_bzp");
3633 OutputShadow = IRB.CreateOr(LHS: OutputShadow, RHS: BoolZeroPoison, Name: "_mscz_bs");
3634 }
3635
3636 OutputShadow = IRB.CreateSExt(V: OutputShadow, DestTy: getShadowTy(V: Src), Name: "_mscz_os");
3637
3638 setShadow(V: &I, SV: OutputShadow);
3639 setOriginForNaryOp(I);
3640 }
3641
3642 /// Some instructions have additional zero-elements in the return type
3643 /// e.g., <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, ...)
3644 ///
3645 /// This function will return a vector type with the same number of elements
3646 /// as the input, but same per-element width as the return value e.g.,
3647 /// <8 x i8>.
3648 FixedVectorType *maybeShrinkVectorShadowType(Value *Src, IntrinsicInst &I) {
3649 assert(isa<FixedVectorType>(getShadowTy(&I)));
3650 FixedVectorType *ShadowType = cast<FixedVectorType>(Val: getShadowTy(V: &I));
3651
3652 // TODO: generalize beyond 2x?
3653 if (ShadowType->getElementCount() ==
3654 cast<VectorType>(Val: Src->getType())->getElementCount() * 2)
3655 ShadowType = FixedVectorType::getHalfElementsVectorType(VTy: ShadowType);
3656
3657 assert(ShadowType->getElementCount() ==
3658 cast<VectorType>(Src->getType())->getElementCount());
3659
3660 return ShadowType;
3661 }
3662
3663 /// Doubles the length of a vector shadow (extending with zeros) if necessary
3664 /// to match the length of the shadow for the instruction.
3665 /// If scalar types of the vectors are different, it will use the type of the
3666 /// input vector.
3667 /// This is more type-safe than CreateShadowCast().
3668 Value *maybeExtendVectorShadowWithZeros(Value *Shadow, IntrinsicInst &I) {
3669 IRBuilder<> IRB(&I);
3670 assert(isa<FixedVectorType>(Shadow->getType()));
3671 assert(isa<FixedVectorType>(I.getType()));
3672
3673 Value *FullShadow = getCleanShadow(V: &I);
3674 unsigned ShadowNumElems =
3675 cast<FixedVectorType>(Val: Shadow->getType())->getNumElements();
3676 unsigned FullShadowNumElems =
3677 cast<FixedVectorType>(Val: FullShadow->getType())->getNumElements();
3678
3679 assert((ShadowNumElems == FullShadowNumElems) ||
3680 (ShadowNumElems * 2 == FullShadowNumElems));
3681
3682 if (ShadowNumElems == FullShadowNumElems) {
3683 FullShadow = Shadow;
3684 } else {
3685 // TODO: generalize beyond 2x?
3686 SmallVector<int, 32> ShadowMask(FullShadowNumElems);
3687 std::iota(first: ShadowMask.begin(), last: ShadowMask.end(), value: 0);
3688
3689 // Append zeros
3690 FullShadow =
3691 IRB.CreateShuffleVector(V1: Shadow, V2: getCleanShadow(V: Shadow), Mask: ShadowMask);
3692 }
3693
3694 return FullShadow;
3695 }
3696
3697 /// Handle x86 SSE vector conversion.
3698 ///
3699 /// e.g., single-precision to half-precision conversion:
3700 /// <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0)
3701 /// <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0)
3702 ///
3703 /// floating-point to integer:
3704 /// <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>)
3705 /// <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>)
3706 ///
3707 /// Note: if the output has more elements, they are zero-initialized (and
3708 /// therefore the shadow will also be initialized).
3709 ///
3710 /// This differs from handleSSEVectorConvertIntrinsic() because it
3711 /// propagates uninitialized shadow (instead of checking the shadow).
3712 void handleSSEVectorConvertIntrinsicByProp(IntrinsicInst &I,
3713 bool HasRoundingMode) {
3714 if (HasRoundingMode) {
3715 assert(I.arg_size() == 2);
3716 [[maybe_unused]] Value *RoundingMode = I.getArgOperand(i: 1);
3717 assert(RoundingMode->getType()->isIntegerTy());
3718 } else {
3719 assert(I.arg_size() == 1);
3720 }
3721
3722 Value *Src = I.getArgOperand(i: 0);
3723 assert(Src->getType()->isVectorTy());
3724
3725 // The return type might have more elements than the input.
3726 // Temporarily shrink the return type's number of elements.
3727 VectorType *ShadowType = maybeShrinkVectorShadowType(Src, I);
3728
3729 IRBuilder<> IRB(&I);
3730 Value *S0 = getShadow(I: &I, i: 0);
3731
3732 /// For scalars:
3733 /// Since they are converting to and/or from floating-point, the output is:
3734 /// - fully uninitialized if *any* bit of the input is uninitialized
3735 /// - fully ininitialized if all bits of the input are ininitialized
3736 /// We apply the same principle on a per-field basis for vectors.
3737 Value *Shadow =
3738 IRB.CreateSExt(V: IRB.CreateICmpNE(LHS: S0, RHS: getCleanShadow(V: S0)), DestTy: ShadowType);
3739
3740 // The return type might have more elements than the input.
3741 // Extend the return type back to its original width if necessary.
3742 Value *FullShadow = maybeExtendVectorShadowWithZeros(Shadow, I);
3743
3744 setShadow(V: &I, SV: FullShadow);
3745 setOriginForNaryOp(I);
3746 }
3747
3748 // Instrument x86 SSE vector convert intrinsic.
3749 //
3750 // This function instruments intrinsics like cvtsi2ss:
3751 // %Out = int_xxx_cvtyyy(%ConvertOp)
3752 // or
3753 // %Out = int_xxx_cvtyyy(%CopyOp, %ConvertOp)
3754 // Intrinsic converts \p NumUsedElements elements of \p ConvertOp to the same
3755 // number \p Out elements, and (if has 2 arguments) copies the rest of the
3756 // elements from \p CopyOp.
3757 // In most cases conversion involves floating-point value which may trigger a
3758 // hardware exception when not fully initialized. For this reason we require
3759 // \p ConvertOp[0:NumUsedElements] to be fully initialized and trap otherwise.
3760 // We copy the shadow of \p CopyOp[NumUsedElements:] to \p
3761 // Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always
3762 // return a fully initialized value.
3763 //
3764 // For Arm NEON vector convert intrinsics, see
3765 // handleNEONVectorConvertIntrinsic().
3766 void handleSSEVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements,
3767 bool HasRoundingMode = false) {
3768 IRBuilder<> IRB(&I);
3769 Value *CopyOp, *ConvertOp;
3770
3771 assert((!HasRoundingMode ||
3772 isa<ConstantInt>(I.getArgOperand(I.arg_size() - 1))) &&
3773 "Invalid rounding mode");
3774
3775 switch (I.arg_size() - HasRoundingMode) {
3776 case 2:
3777 CopyOp = I.getArgOperand(i: 0);
3778 ConvertOp = I.getArgOperand(i: 1);
3779 break;
3780 case 1:
3781 ConvertOp = I.getArgOperand(i: 0);
3782 CopyOp = nullptr;
3783 break;
3784 default:
3785 llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
3786 }
3787
3788 // The first *NumUsedElements* elements of ConvertOp are converted to the
3789 // same number of output elements. The rest of the output is copied from
3790 // CopyOp, or (if not available) filled with zeroes.
3791 // Combine shadow for elements of ConvertOp that are used in this operation,
3792 // and insert a check.
3793 // FIXME: consider propagating shadow of ConvertOp, at least in the case of
3794 // int->any conversion.
3795 Value *ConvertShadow = getShadow(V: ConvertOp);
3796 Value *AggShadow = nullptr;
3797 if (ConvertOp->getType()->isVectorTy()) {
3798 AggShadow = IRB.CreateExtractElement(
3799 Vec: ConvertShadow, Idx: ConstantInt::get(Ty: IRB.getInt32Ty(), V: 0));
3800 for (int i = 1; i < NumUsedElements; ++i) {
3801 Value *MoreShadow = IRB.CreateExtractElement(
3802 Vec: ConvertShadow, Idx: ConstantInt::get(Ty: IRB.getInt32Ty(), V: i));
3803 AggShadow = IRB.CreateOr(LHS: AggShadow, RHS: MoreShadow);
3804 }
3805 } else {
3806 AggShadow = ConvertShadow;
3807 }
3808 assert(AggShadow->getType()->isIntegerTy());
3809 insertCheckShadow(Shadow: AggShadow, Origin: getOrigin(V: ConvertOp), OrigIns: &I);
3810
3811 // Build result shadow by zero-filling parts of CopyOp shadow that come from
3812 // ConvertOp.
3813 if (CopyOp) {
3814 assert(CopyOp->getType() == I.getType());
3815 assert(CopyOp->getType()->isVectorTy());
3816 Value *ResultShadow = getShadow(V: CopyOp);
3817 Type *EltTy = cast<VectorType>(Val: ResultShadow->getType())->getElementType();
3818 for (int i = 0; i < NumUsedElements; ++i) {
3819 ResultShadow = IRB.CreateInsertElement(
3820 Vec: ResultShadow, NewElt: ConstantInt::getNullValue(Ty: EltTy),
3821 Idx: ConstantInt::get(Ty: IRB.getInt32Ty(), V: i));
3822 }
3823 setShadow(V: &I, SV: ResultShadow);
3824 setOrigin(V: &I, Origin: getOrigin(V: CopyOp));
3825 } else {
3826 setShadow(V: &I, SV: getCleanShadow(V: &I));
3827 setOrigin(V: &I, Origin: getCleanOrigin());
3828 }
3829 }
3830
3831 // Given a scalar or vector, extract lower 64 bits (or less), and return all
3832 // zeroes if it is zero, and all ones otherwise.
3833 Value *Lower64ShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
3834 if (S->getType()->isVectorTy())
3835 S = CreateShadowCast(IRB, V: S, dstTy: IRB.getInt64Ty(), /* Signed */ true);
3836 assert(S->getType()->getPrimitiveSizeInBits() <= 64);
3837 Value *S2 = IRB.CreateICmpNE(LHS: S, RHS: getCleanShadow(V: S));
3838 return CreateShadowCast(IRB, V: S2, dstTy: T, /* Signed */ true);
3839 }
3840
3841 // Given a vector, extract its first element, and return all
3842 // zeroes if it is zero, and all ones otherwise.
3843 Value *LowerElementShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
3844 Value *S1 = IRB.CreateExtractElement(Vec: S, Idx: (uint64_t)0);
3845 Value *S2 = IRB.CreateICmpNE(LHS: S1, RHS: getCleanShadow(V: S1));
3846 return CreateShadowCast(IRB, V: S2, dstTy: T, /* Signed */ true);
3847 }
3848
3849 Value *VariableShadowExtend(IRBuilder<> &IRB, Value *S) {
3850 Type *T = S->getType();
3851 assert(T->isVectorTy());
3852 Value *S2 = IRB.CreateICmpNE(LHS: S, RHS: getCleanShadow(V: S));
3853 return IRB.CreateSExt(V: S2, DestTy: T);
3854 }
3855
3856 // Instrument vector shift intrinsic.
3857 //
3858 // This function instruments intrinsics like int_x86_avx2_psll_w.
3859 // Intrinsic shifts %In by %ShiftSize bits.
3860 // %ShiftSize may be a vector. In that case the lower 64 bits determine shift
3861 // size, and the rest is ignored. Behavior is defined even if shift size is
3862 // greater than register (or field) width.
3863 void handleVectorShiftIntrinsic(IntrinsicInst &I, bool Variable) {
3864 assert(I.arg_size() == 2);
3865 IRBuilder<> IRB(&I);
3866 // If any of the S2 bits are poisoned, the whole thing is poisoned.
3867 // Otherwise perform the same shift on S1.
3868 Value *S1 = getShadow(I: &I, i: 0);
3869 Value *S2 = getShadow(I: &I, i: 1);
3870 Value *S2Conv = Variable ? VariableShadowExtend(IRB, S: S2)
3871 : Lower64ShadowExtend(IRB, S: S2, T: getShadowTy(V: &I));
3872 Value *V1 = I.getOperand(i_nocapture: 0);
3873 Value *V2 = I.getOperand(i_nocapture: 1);
3874 Value *Shift = IRB.CreateCall(FTy: I.getFunctionType(), Callee: I.getCalledOperand(),
3875 Args: {IRB.CreateBitCast(V: S1, DestTy: V1->getType()), V2});
3876 Shift = IRB.CreateBitCast(V: Shift, DestTy: getShadowTy(V: &I));
3877 setShadow(V: &I, SV: IRB.CreateOr(LHS: Shift, RHS: S2Conv));
3878 setOriginForNaryOp(I);
3879 }
3880
3881 // Get an MMX-sized (64-bit) vector type, or optionally, other sized
3882 // vectors.
3883 Type *getMMXVectorTy(unsigned EltSizeInBits,
3884 unsigned X86_MMXSizeInBits = 64) {
3885 assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
3886 "Illegal MMX vector element size");
3887 return FixedVectorType::get(ElementType: IntegerType::get(C&: *MS.C, NumBits: EltSizeInBits),
3888 NumElts: X86_MMXSizeInBits / EltSizeInBits);
3889 }
3890
3891 // Returns a signed counterpart for an (un)signed-saturate-and-pack
3892 // intrinsic.
3893 Intrinsic::ID getSignedPackIntrinsic(Intrinsic::ID id) {
3894 switch (id) {
3895 case Intrinsic::x86_sse2_packsswb_128:
3896 case Intrinsic::x86_sse2_packuswb_128:
3897 return Intrinsic::x86_sse2_packsswb_128;
3898
3899 case Intrinsic::x86_sse2_packssdw_128:
3900 case Intrinsic::x86_sse41_packusdw:
3901 return Intrinsic::x86_sse2_packssdw_128;
3902
3903 case Intrinsic::x86_avx2_packsswb:
3904 case Intrinsic::x86_avx2_packuswb:
3905 return Intrinsic::x86_avx2_packsswb;
3906
3907 case Intrinsic::x86_avx2_packssdw:
3908 case Intrinsic::x86_avx2_packusdw:
3909 return Intrinsic::x86_avx2_packssdw;
3910
3911 case Intrinsic::x86_mmx_packsswb:
3912 case Intrinsic::x86_mmx_packuswb:
3913 return Intrinsic::x86_mmx_packsswb;
3914
3915 case Intrinsic::x86_mmx_packssdw:
3916 return Intrinsic::x86_mmx_packssdw;
3917
3918 case Intrinsic::x86_avx512_packssdw_512:
3919 case Intrinsic::x86_avx512_packusdw_512:
3920 return Intrinsic::x86_avx512_packssdw_512;
3921
3922 case Intrinsic::x86_avx512_packsswb_512:
3923 case Intrinsic::x86_avx512_packuswb_512:
3924 return Intrinsic::x86_avx512_packsswb_512;
3925
3926 default:
3927 llvm_unreachable("unexpected intrinsic id");
3928 }
3929 }
3930
3931 // Instrument vector pack intrinsic.
3932 //
3933 // This function instruments intrinsics like x86_mmx_packsswb, that
3934 // packs elements of 2 input vectors into half as many bits with saturation.
3935 // Shadow is propagated with the signed variant of the same intrinsic applied
3936 // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
3937 // MMXEltSizeInBits is used only for x86mmx arguments.
3938 //
3939 // TODO: consider using GetMinMaxUnsigned() to handle saturation precisely
3940 void handleVectorPackIntrinsic(IntrinsicInst &I,
3941 unsigned MMXEltSizeInBits = 0) {
3942 assert(I.arg_size() == 2);
3943 IRBuilder<> IRB(&I);
3944 Value *S1 = getShadow(I: &I, i: 0);
3945 Value *S2 = getShadow(I: &I, i: 1);
3946 assert(S1->getType()->isVectorTy());
3947
3948 // SExt and ICmpNE below must apply to individual elements of input vectors.
3949 // In case of x86mmx arguments, cast them to appropriate vector types and
3950 // back.
3951 Type *T =
3952 MMXEltSizeInBits ? getMMXVectorTy(EltSizeInBits: MMXEltSizeInBits) : S1->getType();
3953 if (MMXEltSizeInBits) {
3954 S1 = IRB.CreateBitCast(V: S1, DestTy: T);
3955 S2 = IRB.CreateBitCast(V: S2, DestTy: T);
3956 }
3957 Value *S1_ext =
3958 IRB.CreateSExt(V: IRB.CreateICmpNE(LHS: S1, RHS: Constant::getNullValue(Ty: T)), DestTy: T);
3959 Value *S2_ext =
3960 IRB.CreateSExt(V: IRB.CreateICmpNE(LHS: S2, RHS: Constant::getNullValue(Ty: T)), DestTy: T);
3961 if (MMXEltSizeInBits) {
3962 S1_ext = IRB.CreateBitCast(V: S1_ext, DestTy: getMMXVectorTy(EltSizeInBits: 64));
3963 S2_ext = IRB.CreateBitCast(V: S2_ext, DestTy: getMMXVectorTy(EltSizeInBits: 64));
3964 }
3965
3966 Value *S = IRB.CreateIntrinsic(ID: getSignedPackIntrinsic(id: I.getIntrinsicID()),
3967 Args: {S1_ext, S2_ext}, /*FMFSource=*/nullptr,
3968 Name: "_msprop_vector_pack");
3969 if (MMXEltSizeInBits)
3970 S = IRB.CreateBitCast(V: S, DestTy: getShadowTy(V: &I));
3971 setShadow(V: &I, SV: S);
3972 setOriginForNaryOp(I);
3973 }
3974
3975 // Convert `Mask` into `<n x i1>`.
3976 Constant *createDppMask(unsigned Width, unsigned Mask) {
3977 SmallVector<Constant *, 4> R(Width);
3978 for (auto &M : R) {
3979 M = ConstantInt::getBool(Context&: F.getContext(), V: Mask & 1);
3980 Mask >>= 1;
3981 }
3982 return ConstantVector::get(V: R);
3983 }
3984
3985 // Calculate output shadow as array of booleans `<n x i1>`, assuming if any
3986 // arg is poisoned, entire dot product is poisoned.
3987 Value *findDppPoisonedOutput(IRBuilder<> &IRB, Value *S, unsigned SrcMask,
3988 unsigned DstMask) {
3989 const unsigned Width =
3990 cast<FixedVectorType>(Val: S->getType())->getNumElements();
3991
3992 S = IRB.CreateSelect(C: createDppMask(Width, Mask: SrcMask), True: S,
3993 False: Constant::getNullValue(Ty: S->getType()));
3994 Value *SElem = IRB.CreateOrReduce(Src: S);
3995 Value *IsClean = IRB.CreateIsNull(Arg: SElem, Name: "_msdpp");
3996 Value *DstMaskV = createDppMask(Width, Mask: DstMask);
3997
3998 return IRB.CreateSelect(
3999 C: IsClean, True: Constant::getNullValue(Ty: DstMaskV->getType()), False: DstMaskV);
4000 }
4001
4002 // See `Intel Intrinsics Guide` for `_dp_p*` instructions.
4003 //
4004 // 2 and 4 element versions produce single scalar of dot product, and then
4005 // puts it into elements of output vector, selected by 4 lowest bits of the
4006 // mask. Top 4 bits of the mask control which elements of input to use for dot
4007 // product.
4008 //
4009 // 8 element version mask still has only 4 bit for input, and 4 bit for output
4010 // mask. According to the spec it just operates as 4 element version on first
4011 // 4 elements of inputs and output, and then on last 4 elements of inputs and
4012 // output.
4013 void handleDppIntrinsic(IntrinsicInst &I) {
4014 IRBuilder<> IRB(&I);
4015
4016 Value *S0 = getShadow(I: &I, i: 0);
4017 Value *S1 = getShadow(I: &I, i: 1);
4018 Value *S = IRB.CreateOr(LHS: S0, RHS: S1);
4019
4020 const unsigned Width =
4021 cast<FixedVectorType>(Val: S->getType())->getNumElements();
4022 assert(Width == 2 || Width == 4 || Width == 8);
4023
4024 const unsigned Mask = cast<ConstantInt>(Val: I.getArgOperand(i: 2))->getZExtValue();
4025 const unsigned SrcMask = Mask >> 4;
4026 const unsigned DstMask = Mask & 0xf;
4027
4028 // Calculate shadow as `<n x i1>`.
4029 Value *SI1 = findDppPoisonedOutput(IRB, S, SrcMask, DstMask);
4030 if (Width == 8) {
4031 // First 4 elements of shadow are already calculated. `makeDppShadow`
4032 // operats on 32 bit masks, so we can just shift masks, and repeat.
4033 SI1 = IRB.CreateOr(
4034 LHS: SI1, RHS: findDppPoisonedOutput(IRB, S, SrcMask: SrcMask << 4, DstMask: DstMask << 4));
4035 }
4036 // Extend to real size of shadow, poisoning either all or none bits of an
4037 // element.
4038 S = IRB.CreateSExt(V: SI1, DestTy: S->getType(), Name: "_msdpp");
4039
4040 setShadow(V: &I, SV: S);
4041 setOriginForNaryOp(I);
4042 }
4043
4044 Value *convertBlendvToSelectMask(IRBuilder<> &IRB, Value *C) {
4045 C = CreateAppToShadowCast(IRB, V: C);
4046 FixedVectorType *FVT = cast<FixedVectorType>(Val: C->getType());
4047 unsigned ElSize = FVT->getElementType()->getPrimitiveSizeInBits();
4048 C = IRB.CreateAShr(LHS: C, RHS: ElSize - 1);
4049 FVT = FixedVectorType::get(ElementType: IRB.getInt1Ty(), NumElts: FVT->getNumElements());
4050 return IRB.CreateTrunc(V: C, DestTy: FVT);
4051 }
4052
4053 // `blendv(f, t, c)` is effectively `select(c[top_bit], t, f)`.
4054 void handleBlendvIntrinsic(IntrinsicInst &I) {
4055 Value *C = I.getOperand(i_nocapture: 2);
4056 Value *T = I.getOperand(i_nocapture: 1);
4057 Value *F = I.getOperand(i_nocapture: 0);
4058
4059 Value *Sc = getShadow(I: &I, i: 2);
4060 Value *Oc = MS.TrackOrigins ? getOrigin(V: C) : nullptr;
4061
4062 {
4063 IRBuilder<> IRB(&I);
4064 // Extract top bit from condition and its shadow.
4065 C = convertBlendvToSelectMask(IRB, C);
4066 Sc = convertBlendvToSelectMask(IRB, C: Sc);
4067
4068 setShadow(V: C, SV: Sc);
4069 setOrigin(V: C, Origin: Oc);
4070 }
4071
4072 handleSelectLikeInst(I, B: C, C: T, D: F);
4073 }
4074
4075 // Instrument sum-of-absolute-differences intrinsic.
4076 void handleVectorSadIntrinsic(IntrinsicInst &I, bool IsMMX = false) {
4077 const unsigned SignificantBitsPerResultElement = 16;
4078 Type *ResTy = IsMMX ? IntegerType::get(C&: *MS.C, NumBits: 64) : I.getType();
4079 unsigned ZeroBitsPerResultElement =
4080 ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement;
4081
4082 IRBuilder<> IRB(&I);
4083 auto *Shadow0 = getShadow(I: &I, i: 0);
4084 auto *Shadow1 = getShadow(I: &I, i: 1);
4085 Value *S = IRB.CreateOr(LHS: Shadow0, RHS: Shadow1);
4086 S = IRB.CreateBitCast(V: S, DestTy: ResTy);
4087 S = IRB.CreateSExt(V: IRB.CreateICmpNE(LHS: S, RHS: Constant::getNullValue(Ty: ResTy)),
4088 DestTy: ResTy);
4089 S = IRB.CreateLShr(LHS: S, RHS: ZeroBitsPerResultElement);
4090 S = IRB.CreateBitCast(V: S, DestTy: getShadowTy(V: &I));
4091 setShadow(V: &I, SV: S);
4092 setOriginForNaryOp(I);
4093 }
4094
4095 // Instrument dot-product / multiply-add(-accumulate)? intrinsics.
4096 //
4097 // e.g., Two operands:
4098 // <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a, <8 x i16> %b)
4099 //
4100 // Two operands which require an EltSizeInBits override:
4101 // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %a, <1 x i64> %b)
4102 //
4103 // Three operands:
4104 // <4 x i32> @llvm.x86.avx512.vpdpbusd.128
4105 // (<4 x i32> %s, <16 x i8> %a, <16 x i8> %b)
4106 // <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v4bf16
4107 // (<2 x float> %acc, <4 x bfloat> %a, <4 x bfloat> %b)
4108 // (these are equivalent to multiply-add on %a and %b, followed by
4109 // adding/"accumulating" %s. "Accumulation" stores the result in one
4110 // of the source registers, but this accumulate vs. add distinction
4111 // is lost when dealing with LLVM intrinsics.)
4112 //
4113 // ZeroPurifies means that multiplying a known-zero with an uninitialized
4114 // value results in an initialized value. This is applicable for integer
4115 // multiplication, but not floating-point (counter-example: NaN).
4116 void handleVectorDotProductIntrinsic(IntrinsicInst &I,
4117 unsigned ReductionFactor,
4118 bool ZeroPurifies,
4119 unsigned EltSizeInBits,
4120 enum OddOrEvenLanes Lanes) {
4121 IRBuilder<> IRB(&I);
4122
4123 [[maybe_unused]] FixedVectorType *ReturnType =
4124 cast<FixedVectorType>(Val: I.getType());
4125 assert(isa<FixedVectorType>(ReturnType));
4126
4127 // Vectors A and B, and shadows
4128 Value *Va = nullptr;
4129 Value *Vb = nullptr;
4130 Value *Sa = nullptr;
4131 Value *Sb = nullptr;
4132
4133 assert(I.arg_size() == 2 || I.arg_size() == 3);
4134 if (I.arg_size() == 2) {
4135 assert(Lanes == kBothLanes);
4136
4137 Va = I.getOperand(i_nocapture: 0);
4138 Vb = I.getOperand(i_nocapture: 1);
4139
4140 Sa = getShadow(I: &I, i: 0);
4141 Sb = getShadow(I: &I, i: 1);
4142 } else if (I.arg_size() == 3) {
4143 // Operand 0 is the accumulator. We will deal with that below.
4144 Va = I.getOperand(i_nocapture: 1);
4145 Vb = I.getOperand(i_nocapture: 2);
4146
4147 Sa = getShadow(I: &I, i: 1);
4148 Sb = getShadow(I: &I, i: 2);
4149
4150 if (Lanes == kEvenLanes || Lanes == kOddLanes) {
4151 // Convert < S0, S1, S2, S3, S4, S5, S6, S7 >
4152 // to < S0, S0, S2, S2, S4, S4, S6, S6 > (if even)
4153 // to < S1, S1, S3, S3, S5, S5, S7, S7 > (if odd)
4154 //
4155 // Note: for aarch64.neon.bfmlalb/t, the odd/even-indexed values are
4156 // zeroed, not duplicated. However, for shadow propagation, this
4157 // distinction is unimportant because Step 1 below will squeeze
4158 // each pair of elements (e.g., [S0, S0]) into a single bit, and
4159 // we only care if it is fully initialized.
4160
4161 FixedVectorType *InputShadowType = cast<FixedVectorType>(Val: Sa->getType());
4162 unsigned Width = InputShadowType->getNumElements();
4163
4164 Sa = IRB.CreateShuffleVector(
4165 V: Sa, Mask: getPclmulMask(Width, /*OddElements=*/Lanes == kOddLanes));
4166 Sb = IRB.CreateShuffleVector(
4167 V: Sb, Mask: getPclmulMask(Width, /*OddElements=*/Lanes == kOddLanes));
4168 }
4169 }
4170
4171 FixedVectorType *ParamType = cast<FixedVectorType>(Val: Va->getType());
4172 assert(ParamType == Vb->getType());
4173
4174 assert(ParamType->getPrimitiveSizeInBits() ==
4175 ReturnType->getPrimitiveSizeInBits());
4176
4177 if (I.arg_size() == 3) {
4178 [[maybe_unused]] auto *AccumulatorType =
4179 cast<FixedVectorType>(Val: I.getOperand(i_nocapture: 0)->getType());
4180 assert(AccumulatorType == ReturnType);
4181 }
4182
4183 FixedVectorType *ImplicitReturnType =
4184 cast<FixedVectorType>(Val: getShadowTy(OrigTy: ReturnType));
4185 // Step 1: instrument multiplication of corresponding vector elements
4186 if (EltSizeInBits) {
4187 ImplicitReturnType = cast<FixedVectorType>(
4188 Val: getMMXVectorTy(EltSizeInBits: EltSizeInBits * ReductionFactor,
4189 X86_MMXSizeInBits: ParamType->getPrimitiveSizeInBits()));
4190 ParamType = cast<FixedVectorType>(
4191 Val: getMMXVectorTy(EltSizeInBits, X86_MMXSizeInBits: ParamType->getPrimitiveSizeInBits()));
4192
4193 Va = IRB.CreateBitCast(V: Va, DestTy: ParamType);
4194 Vb = IRB.CreateBitCast(V: Vb, DestTy: ParamType);
4195
4196 Sa = IRB.CreateBitCast(V: Sa, DestTy: getShadowTy(OrigTy: ParamType));
4197 Sb = IRB.CreateBitCast(V: Sb, DestTy: getShadowTy(OrigTy: ParamType));
4198 } else {
4199 assert(ParamType->getNumElements() ==
4200 ReturnType->getNumElements() * ReductionFactor);
4201 }
4202
4203 // Each element of the vector is represented by a single bit (poisoned or
4204 // not) e.g., <8 x i1>.
4205 Value *SaNonZero = IRB.CreateIsNotNull(Arg: Sa);
4206 Value *SbNonZero = IRB.CreateIsNotNull(Arg: Sb);
4207 Value *And;
4208 if (ZeroPurifies) {
4209 // Multiplying an *initialized* zero by an uninitialized element results
4210 // in an initialized zero element.
4211 //
4212 // This is analogous to bitwise AND, where "AND" of 0 and a poisoned value
4213 // results in an unpoisoned value.
4214 Value *VaInt = Va;
4215 Value *VbInt = Vb;
4216 if (!Va->getType()->isIntegerTy()) {
4217 VaInt = CreateAppToShadowCast(IRB, V: Va);
4218 VbInt = CreateAppToShadowCast(IRB, V: Vb);
4219 }
4220
4221 // We check for non-zero on a per-element basis, not per-bit.
4222 Value *VaNonZero = IRB.CreateIsNotNull(Arg: VaInt);
4223 Value *VbNonZero = IRB.CreateIsNotNull(Arg: VbInt);
4224
4225 And = handleBitwiseAnd(IRB, V1: VaNonZero, V2: VbNonZero, S1: SaNonZero, S2: SbNonZero);
4226 } else {
4227 And = IRB.CreateOr(Ops: {SaNonZero, SbNonZero});
4228 }
4229
4230 // Extend <8 x i1> to <8 x i16>.
4231 // (The real pmadd intrinsic would have computed intermediate values of
4232 // <8 x i32>, but that is irrelevant for our shadow purposes because we
4233 // consider each element to be either fully initialized or fully
4234 // uninitialized.)
4235 And = IRB.CreateSExt(V: And, DestTy: Sa->getType());
4236
4237 // Step 2: instrument horizontal add
4238 // We don't need bit-precise horizontalReduce because we only want to check
4239 // if each pair/quad of elements is fully zero.
4240 // Cast to <4 x i32>.
4241 Value *Horizontal = IRB.CreateBitCast(V: And, DestTy: ImplicitReturnType);
4242
4243 // Compute <4 x i1>, then extend back to <4 x i32>.
4244 Value *OutShadow = IRB.CreateSExt(
4245 V: IRB.CreateICmpNE(LHS: Horizontal,
4246 RHS: Constant::getNullValue(Ty: Horizontal->getType())),
4247 DestTy: ImplicitReturnType);
4248
4249 // Cast it back to the required fake return type (if MMX: <1 x i64>; for
4250 // AVX, it is already correct).
4251 if (EltSizeInBits)
4252 OutShadow = CreateShadowCast(IRB, V: OutShadow, dstTy: getShadowTy(V: &I));
4253
4254 // Step 3 (if applicable): instrument accumulator
4255 if (I.arg_size() == 3)
4256 OutShadow = IRB.CreateOr(LHS: OutShadow, RHS: getShadow(I: &I, i: 0));
4257
4258 setShadow(V: &I, SV: OutShadow);
4259 setOriginForNaryOp(I);
4260 }
4261
4262 // Instrument compare-packed intrinsic.
4263 //
4264 // x86 has the predicate as the third operand, which is ImmArg e.g.,
4265 // - <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8)
4266 // - <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8)
4267 //
4268 // while Arm has separate intrinsics for >= and > e.g.,
4269 // - <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32
4270 // (<2 x float> %A, <2 x float>)
4271 // - <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32
4272 // (<2 x float> %A, <2 x float>)
4273 //
4274 // Bonus: this also handles scalar cases e.g.,
4275 // - i32 @llvm.aarch64.neon.facgt.i32.f32(float %A, float %B)
4276 void handleVectorComparePackedIntrinsic(IntrinsicInst &I,
4277 bool PredicateAsOperand) {
4278 if (PredicateAsOperand) {
4279 assert(I.arg_size() == 3);
4280 assert(I.paramHasAttr(2, Attribute::ImmArg));
4281 } else
4282 assert(I.arg_size() == 2);
4283
4284 IRBuilder<> IRB(&I);
4285
4286 // Basically, an or followed by sext(icmp ne 0) to end up with all-zeros or
4287 // all-ones shadow.
4288 Type *ResTy = getShadowTy(V: &I);
4289 auto *Shadow0 = getShadow(I: &I, i: 0);
4290 auto *Shadow1 = getShadow(I: &I, i: 1);
4291 Value *S0 = IRB.CreateOr(LHS: Shadow0, RHS: Shadow1);
4292 Value *S = IRB.CreateSExt(
4293 V: IRB.CreateICmpNE(LHS: S0, RHS: Constant::getNullValue(Ty: ResTy)), DestTy: ResTy);
4294 setShadow(V: &I, SV: S);
4295 setOriginForNaryOp(I);
4296 }
4297
4298 // Instrument compare-scalar intrinsic.
4299 // This handles both cmp* intrinsics which return the result in the first
4300 // element of a vector, and comi* which return the result as i32.
4301 void handleVectorCompareScalarIntrinsic(IntrinsicInst &I) {
4302 IRBuilder<> IRB(&I);
4303 auto *Shadow0 = getShadow(I: &I, i: 0);
4304 auto *Shadow1 = getShadow(I: &I, i: 1);
4305 Value *S0 = IRB.CreateOr(LHS: Shadow0, RHS: Shadow1);
4306 Value *S = LowerElementShadowExtend(IRB, S: S0, T: getShadowTy(V: &I));
4307 setShadow(V: &I, SV: S);
4308 setOriginForNaryOp(I);
4309 }
4310
4311 // Instrument generic vector reduction intrinsics
4312 // by ORing together all their fields.
4313 //
4314 // If AllowShadowCast is true, the return type does not need to be the same
4315 // type as the fields
4316 // e.g., declare i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8>)
4317 void handleVectorReduceIntrinsic(IntrinsicInst &I, bool AllowShadowCast) {
4318 assert(I.arg_size() == 1);
4319
4320 IRBuilder<> IRB(&I);
4321 Value *S = IRB.CreateOrReduce(Src: getShadow(I: &I, i: 0));
4322 if (AllowShadowCast)
4323 S = CreateShadowCast(IRB, V: S, dstTy: getShadowTy(V: &I));
4324 else
4325 assert(S->getType() == getShadowTy(&I));
4326 setShadow(V: &I, SV: S);
4327 setOriginForNaryOp(I);
4328 }
4329
4330 // Similar to handleVectorReduceIntrinsic but with an initial starting value.
4331 // e.g., call float @llvm.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float>
4332 // %a1)
4333 // shadow = shadow[a0] | shadow[a1.0] | shadow[a1.1]
4334 //
4335 // The type of the return value, initial starting value, and elements of the
4336 // vector must be identical.
4337 void handleVectorReduceWithStarterIntrinsic(IntrinsicInst &I) {
4338 assert(I.arg_size() == 2);
4339
4340 IRBuilder<> IRB(&I);
4341 Value *Shadow0 = getShadow(I: &I, i: 0);
4342 Value *Shadow1 = IRB.CreateOrReduce(Src: getShadow(I: &I, i: 1));
4343 assert(Shadow0->getType() == Shadow1->getType());
4344 Value *S = IRB.CreateOr(LHS: Shadow0, RHS: Shadow1);
4345 assert(S->getType() == getShadowTy(&I));
4346 setShadow(V: &I, SV: S);
4347 setOriginForNaryOp(I);
4348 }
4349
4350 // Instrument vector.reduce.or intrinsic.
4351 // Valid (non-poisoned) set bits in the operand pull low the
4352 // corresponding shadow bits.
4353 void handleVectorReduceOrIntrinsic(IntrinsicInst &I) {
4354 assert(I.arg_size() == 1);
4355
4356 IRBuilder<> IRB(&I);
4357 Value *OperandShadow = getShadow(I: &I, i: 0);
4358 Value *OperandUnsetBits = IRB.CreateNot(V: I.getOperand(i_nocapture: 0));
4359 Value *OperandUnsetOrPoison = IRB.CreateOr(LHS: OperandUnsetBits, RHS: OperandShadow);
4360 // Bit N is clean if any field's bit N is 1 and unpoison
4361 Value *OutShadowMask = IRB.CreateAndReduce(Src: OperandUnsetOrPoison);
4362 // Otherwise, it is clean if every field's bit N is unpoison
4363 Value *OrShadow = IRB.CreateOrReduce(Src: OperandShadow);
4364 Value *S = IRB.CreateAnd(LHS: OutShadowMask, RHS: OrShadow);
4365
4366 setShadow(V: &I, SV: S);
4367 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
4368 }
4369
4370 // Instrument vector.reduce.and intrinsic.
4371 // Valid (non-poisoned) unset bits in the operand pull down the
4372 // corresponding shadow bits.
4373 void handleVectorReduceAndIntrinsic(IntrinsicInst &I) {
4374 assert(I.arg_size() == 1);
4375
4376 IRBuilder<> IRB(&I);
4377 Value *OperandShadow = getShadow(I: &I, i: 0);
4378 Value *OperandSetOrPoison = IRB.CreateOr(LHS: I.getOperand(i_nocapture: 0), RHS: OperandShadow);
4379 // Bit N is clean if any field's bit N is 0 and unpoison
4380 Value *OutShadowMask = IRB.CreateAndReduce(Src: OperandSetOrPoison);
4381 // Otherwise, it is clean if every field's bit N is unpoison
4382 Value *OrShadow = IRB.CreateOrReduce(Src: OperandShadow);
4383 Value *S = IRB.CreateAnd(LHS: OutShadowMask, RHS: OrShadow);
4384
4385 setShadow(V: &I, SV: S);
4386 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
4387 }
4388
4389 void handleStmxcsr(IntrinsicInst &I) {
4390 IRBuilder<> IRB(&I);
4391 Value *Addr = I.getArgOperand(i: 0);
4392 Type *Ty = IRB.getInt32Ty();
4393 Value *ShadowPtr =
4394 getShadowOriginPtr(Addr, IRB, ShadowTy: Ty, Alignment: Align(1), /*isStore*/ true).first;
4395
4396 IRB.CreateStore(Val: getCleanShadow(OrigTy: Ty), Ptr: ShadowPtr);
4397
4398 if (ClCheckAccessAddress)
4399 insertCheckShadowOf(Val: Addr, OrigIns: &I);
4400 }
4401
4402 void handleLdmxcsr(IntrinsicInst &I) {
4403 if (!InsertChecks)
4404 return;
4405
4406 IRBuilder<> IRB(&I);
4407 Value *Addr = I.getArgOperand(i: 0);
4408 Type *Ty = IRB.getInt32Ty();
4409 const Align Alignment = Align(1);
4410 Value *ShadowPtr, *OriginPtr;
4411 std::tie(args&: ShadowPtr, args&: OriginPtr) =
4412 getShadowOriginPtr(Addr, IRB, ShadowTy: Ty, Alignment, /*isStore*/ false);
4413
4414 if (ClCheckAccessAddress)
4415 insertCheckShadowOf(Val: Addr, OrigIns: &I);
4416
4417 Value *Shadow = IRB.CreateAlignedLoad(Ty, Ptr: ShadowPtr, Align: Alignment, Name: "_ldmxcsr");
4418 Value *Origin = MS.TrackOrigins ? IRB.CreateLoad(Ty: MS.OriginTy, Ptr: OriginPtr)
4419 : getCleanOrigin();
4420 insertCheckShadow(Shadow, Origin, OrigIns: &I);
4421 }
4422
4423 void handleMaskedExpandLoad(IntrinsicInst &I) {
4424 IRBuilder<> IRB(&I);
4425 Value *Ptr = I.getArgOperand(i: 0);
4426 MaybeAlign Align = I.getParamAlign(ArgNo: 0);
4427 Value *Mask = I.getArgOperand(i: 1);
4428 Value *PassThru = I.getArgOperand(i: 2);
4429
4430 if (ClCheckAccessAddress) {
4431 insertCheckShadowOf(Val: Ptr, OrigIns: &I);
4432 insertCheckShadowOf(Val: Mask, OrigIns: &I);
4433 }
4434
4435 if (!PropagateShadow) {
4436 setShadow(V: &I, SV: getCleanShadow(V: &I));
4437 setOrigin(V: &I, Origin: getCleanOrigin());
4438 return;
4439 }
4440
4441 Type *ShadowTy = getShadowTy(V: &I);
4442 Type *ElementShadowTy = cast<VectorType>(Val: ShadowTy)->getElementType();
4443 auto [ShadowPtr, OriginPtr] =
4444 getShadowOriginPtr(Addr: Ptr, IRB, ShadowTy: ElementShadowTy, Alignment: Align, /*isStore*/ false);
4445
4446 Value *Shadow =
4447 IRB.CreateMaskedExpandLoad(Ty: ShadowTy, Ptr: ShadowPtr, Align, Mask,
4448 PassThru: getShadow(V: PassThru), Name: "_msmaskedexpload");
4449
4450 setShadow(V: &I, SV: Shadow);
4451
4452 // TODO: Store origins.
4453 setOrigin(V: &I, Origin: getCleanOrigin());
4454 }
4455
4456 void handleMaskedCompressStore(IntrinsicInst &I) {
4457 IRBuilder<> IRB(&I);
4458 Value *Values = I.getArgOperand(i: 0);
4459 Value *Ptr = I.getArgOperand(i: 1);
4460 MaybeAlign Align = I.getParamAlign(ArgNo: 1);
4461 Value *Mask = I.getArgOperand(i: 2);
4462
4463 if (ClCheckAccessAddress) {
4464 insertCheckShadowOf(Val: Ptr, OrigIns: &I);
4465 insertCheckShadowOf(Val: Mask, OrigIns: &I);
4466 }
4467
4468 Value *Shadow = getShadow(V: Values);
4469 Type *ElementShadowTy =
4470 getShadowTy(OrigTy: cast<VectorType>(Val: Values->getType())->getElementType());
4471 auto [ShadowPtr, OriginPtrs] =
4472 getShadowOriginPtr(Addr: Ptr, IRB, ShadowTy: ElementShadowTy, Alignment: Align, /*isStore*/ true);
4473
4474 IRB.CreateMaskedCompressStore(Val: Shadow, Ptr: ShadowPtr, Align, Mask);
4475
4476 // TODO: Store origins.
4477 }
4478
4479 void handleMaskedGather(IntrinsicInst &I) {
4480 IRBuilder<> IRB(&I);
4481 Value *Ptrs = I.getArgOperand(i: 0);
4482 const Align Alignment = I.getParamAlign(ArgNo: 0).valueOrOne();
4483 Value *Mask = I.getArgOperand(i: 1);
4484 Value *PassThru = I.getArgOperand(i: 2);
4485
4486 Type *PtrsShadowTy = getShadowTy(V: Ptrs);
4487 if (ClCheckAccessAddress) {
4488 insertCheckShadowOf(Val: Mask, OrigIns: &I);
4489 Value *MaskedPtrShadow = IRB.CreateSelect(
4490 C: Mask, True: getShadow(V: Ptrs), False: Constant::getNullValue(Ty: (PtrsShadowTy)),
4491 Name: "_msmaskedptrs");
4492 insertCheckShadow(Shadow: MaskedPtrShadow, Origin: getOrigin(V: Ptrs), OrigIns: &I);
4493 }
4494
4495 if (!PropagateShadow) {
4496 setShadow(V: &I, SV: getCleanShadow(V: &I));
4497 setOrigin(V: &I, Origin: getCleanOrigin());
4498 return;
4499 }
4500
4501 Type *ShadowTy = getShadowTy(V: &I);
4502 Type *ElementShadowTy = cast<VectorType>(Val: ShadowTy)->getElementType();
4503 auto [ShadowPtrs, OriginPtrs] = getShadowOriginPtr(
4504 Addr: Ptrs, IRB, ShadowTy: ElementShadowTy, Alignment, /*isStore*/ false);
4505
4506 Value *Shadow =
4507 IRB.CreateMaskedGather(Ty: ShadowTy, Ptrs: ShadowPtrs, Alignment, Mask,
4508 PassThru: getShadow(V: PassThru), Name: "_msmaskedgather");
4509
4510 setShadow(V: &I, SV: Shadow);
4511
4512 // TODO: Store origins.
4513 setOrigin(V: &I, Origin: getCleanOrigin());
4514 }
4515
4516 void handleMaskedScatter(IntrinsicInst &I) {
4517 IRBuilder<> IRB(&I);
4518 Value *Values = I.getArgOperand(i: 0);
4519 Value *Ptrs = I.getArgOperand(i: 1);
4520 const Align Alignment = I.getParamAlign(ArgNo: 1).valueOrOne();
4521 Value *Mask = I.getArgOperand(i: 2);
4522
4523 Type *PtrsShadowTy = getShadowTy(V: Ptrs);
4524 if (ClCheckAccessAddress) {
4525 insertCheckShadowOf(Val: Mask, OrigIns: &I);
4526 Value *MaskedPtrShadow = IRB.CreateSelect(
4527 C: Mask, True: getShadow(V: Ptrs), False: Constant::getNullValue(Ty: (PtrsShadowTy)),
4528 Name: "_msmaskedptrs");
4529 insertCheckShadow(Shadow: MaskedPtrShadow, Origin: getOrigin(V: Ptrs), OrigIns: &I);
4530 }
4531
4532 Value *Shadow = getShadow(V: Values);
4533 Type *ElementShadowTy =
4534 getShadowTy(OrigTy: cast<VectorType>(Val: Values->getType())->getElementType());
4535 auto [ShadowPtrs, OriginPtrs] = getShadowOriginPtr(
4536 Addr: Ptrs, IRB, ShadowTy: ElementShadowTy, Alignment, /*isStore*/ true);
4537
4538 IRB.CreateMaskedScatter(Val: Shadow, Ptrs: ShadowPtrs, Alignment, Mask);
4539
4540 // TODO: Store origin.
4541 }
4542
4543 // Intrinsic::masked_store
4544 //
4545 // Note: handleAVXMaskedStore handles AVX/AVX2 variants, though AVX512 masked
4546 // stores are lowered to Intrinsic::masked_store.
4547 void handleMaskedStore(IntrinsicInst &I) {
4548 IRBuilder<> IRB(&I);
4549 Value *V = I.getArgOperand(i: 0);
4550 Value *Ptr = I.getArgOperand(i: 1);
4551 const Align Alignment = I.getParamAlign(ArgNo: 1).valueOrOne();
4552 Value *Mask = I.getArgOperand(i: 2);
4553 Value *Shadow = getShadow(V);
4554
4555 if (ClCheckAccessAddress) {
4556 insertCheckShadowOf(Val: Ptr, OrigIns: &I);
4557 insertCheckShadowOf(Val: Mask, OrigIns: &I);
4558 }
4559
4560 Value *ShadowPtr;
4561 Value *OriginPtr;
4562 std::tie(args&: ShadowPtr, args&: OriginPtr) = getShadowOriginPtr(
4563 Addr: Ptr, IRB, ShadowTy: Shadow->getType(), Alignment, /*isStore*/ true);
4564
4565 IRB.CreateMaskedStore(Val: Shadow, Ptr: ShadowPtr, Alignment, Mask);
4566
4567 if (!MS.TrackOrigins)
4568 return;
4569
4570 auto &DL = F.getDataLayout();
4571 paintOrigin(IRB, Origin: getOrigin(V), OriginPtr,
4572 TS: DL.getTypeStoreSize(Ty: Shadow->getType()),
4573 Alignment: std::max(a: Alignment, b: kMinOriginAlignment));
4574 }
4575
4576 // Intrinsic::masked_load
4577 //
4578 // Note: handleAVXMaskedLoad handles AVX/AVX2 variants, though AVX512 masked
4579 // loads are lowered to Intrinsic::masked_load.
4580 void handleMaskedLoad(IntrinsicInst &I) {
4581 IRBuilder<> IRB(&I);
4582 Value *Ptr = I.getArgOperand(i: 0);
4583 const Align Alignment = I.getParamAlign(ArgNo: 0).valueOrOne();
4584 Value *Mask = I.getArgOperand(i: 1);
4585 Value *PassThru = I.getArgOperand(i: 2);
4586
4587 if (ClCheckAccessAddress) {
4588 insertCheckShadowOf(Val: Ptr, OrigIns: &I);
4589 insertCheckShadowOf(Val: Mask, OrigIns: &I);
4590 }
4591
4592 if (!PropagateShadow) {
4593 setShadow(V: &I, SV: getCleanShadow(V: &I));
4594 setOrigin(V: &I, Origin: getCleanOrigin());
4595 return;
4596 }
4597
4598 Type *ShadowTy = getShadowTy(V: &I);
4599 Value *ShadowPtr, *OriginPtr;
4600 std::tie(args&: ShadowPtr, args&: OriginPtr) =
4601 getShadowOriginPtr(Addr: Ptr, IRB, ShadowTy, Alignment, /*isStore*/ false);
4602 setShadow(V: &I, SV: IRB.CreateMaskedLoad(Ty: ShadowTy, Ptr: ShadowPtr, Alignment, Mask,
4603 PassThru: getShadow(V: PassThru), Name: "_msmaskedld"));
4604
4605 if (!MS.TrackOrigins)
4606 return;
4607
4608 // Choose between PassThru's and the loaded value's origins.
4609 Value *MaskedPassThruShadow = IRB.CreateAnd(
4610 LHS: getShadow(V: PassThru), RHS: IRB.CreateSExt(V: IRB.CreateNeg(V: Mask), DestTy: ShadowTy));
4611
4612 Value *NotNull = convertToBool(V: MaskedPassThruShadow, IRB, name: "_mscmp");
4613
4614 Value *PtrOrigin = IRB.CreateLoad(Ty: MS.OriginTy, Ptr: OriginPtr);
4615 Value *Origin = IRB.CreateSelect(C: NotNull, True: getOrigin(V: PassThru), False: PtrOrigin);
4616
4617 setOrigin(V: &I, Origin);
4618 }
4619
4620 // e.g., void @llvm.x86.avx.maskstore.ps.256(ptr, <8 x i32>, <8 x float>)
4621 // dst mask src
4622 //
4623 // AVX512 masked stores are lowered to Intrinsic::masked_load and are handled
4624 // by handleMaskedStore.
4625 //
4626 // This function handles AVX and AVX2 masked stores; these use the MSBs of a
4627 // vector of integers, unlike the LLVM masked intrinsics, which require a
4628 // vector of booleans. X86InstCombineIntrinsic.cpp::simplifyX86MaskedLoad
4629 // mentions that the x86 backend does not know how to efficiently convert
4630 // from a vector of booleans back into the AVX mask format; therefore, they
4631 // (and we) do not reduce AVX/AVX2 masked intrinsics into LLVM masked
4632 // intrinsics.
4633 void handleAVXMaskedStore(IntrinsicInst &I) {
4634 assert(I.arg_size() == 3);
4635
4636 IRBuilder<> IRB(&I);
4637
4638 Value *Dst = I.getArgOperand(i: 0);
4639 assert(Dst->getType()->isPointerTy() && "Destination is not a pointer!");
4640
4641 Value *Mask = I.getArgOperand(i: 1);
4642 assert(isa<VectorType>(Mask->getType()) && "Mask is not a vector!");
4643
4644 Value *Src = I.getArgOperand(i: 2);
4645 assert(isa<VectorType>(Src->getType()) && "Source is not a vector!");
4646
4647 const Align Alignment = Align(1);
4648
4649 Value *SrcShadow = getShadow(V: Src);
4650
4651 if (ClCheckAccessAddress) {
4652 insertCheckShadowOf(Val: Dst, OrigIns: &I);
4653 insertCheckShadowOf(Val: Mask, OrigIns: &I);
4654 }
4655
4656 Value *DstShadowPtr;
4657 Value *DstOriginPtr;
4658 std::tie(args&: DstShadowPtr, args&: DstOriginPtr) = getShadowOriginPtr(
4659 Addr: Dst, IRB, ShadowTy: SrcShadow->getType(), Alignment, /*isStore*/ true);
4660
4661 SmallVector<Value *, 2> ShadowArgs;
4662 ShadowArgs.append(NumInputs: 1, Elt: DstShadowPtr);
4663 ShadowArgs.append(NumInputs: 1, Elt: Mask);
4664 // The intrinsic may require floating-point but shadows can be arbitrary
4665 // bit patterns, of which some would be interpreted as "invalid"
4666 // floating-point values (NaN etc.); we assume the intrinsic will happily
4667 // copy them.
4668 ShadowArgs.append(NumInputs: 1, Elt: IRB.CreateBitCast(V: SrcShadow, DestTy: Src->getType()));
4669
4670 CallInst *CI = IRB.CreateIntrinsicWithoutFolding(
4671 RetTy: IRB.getVoidTy(), ID: I.getIntrinsicID(), Args: ShadowArgs);
4672 setShadow(V: &I, SV: CI);
4673
4674 if (!MS.TrackOrigins)
4675 return;
4676
4677 // Approximation only
4678 auto &DL = F.getDataLayout();
4679 paintOrigin(IRB, Origin: getOrigin(V: Src), OriginPtr: DstOriginPtr,
4680 TS: DL.getTypeStoreSize(Ty: SrcShadow->getType()),
4681 Alignment: std::max(a: Alignment, b: kMinOriginAlignment));
4682 }
4683
4684 // e.g., <8 x float> @llvm.x86.avx.maskload.ps.256(ptr, <8 x i32>)
4685 // return src mask
4686 //
4687 // Masked-off values are replaced with 0, which conveniently also represents
4688 // initialized memory.
4689 //
4690 // AVX512 masked stores are lowered to Intrinsic::masked_load and are handled
4691 // by handleMaskedStore.
4692 //
4693 // We do not combine this with handleMaskedLoad; see comment in
4694 // handleAVXMaskedStore for the rationale.
4695 //
4696 // This is subtly different than handleIntrinsicByApplyingToShadow(I, 1)
4697 // because we need to apply getShadowOriginPtr, not getShadow, to the first
4698 // parameter.
4699 void handleAVXMaskedLoad(IntrinsicInst &I) {
4700 assert(I.arg_size() == 2);
4701
4702 IRBuilder<> IRB(&I);
4703
4704 Value *Src = I.getArgOperand(i: 0);
4705 assert(Src->getType()->isPointerTy() && "Source is not a pointer!");
4706
4707 Value *Mask = I.getArgOperand(i: 1);
4708 assert(isa<VectorType>(Mask->getType()) && "Mask is not a vector!");
4709
4710 const Align Alignment = Align(1);
4711
4712 if (ClCheckAccessAddress) {
4713 insertCheckShadowOf(Val: Mask, OrigIns: &I);
4714 }
4715
4716 Type *SrcShadowTy = getShadowTy(V: Src);
4717 Value *SrcShadowPtr, *SrcOriginPtr;
4718 std::tie(args&: SrcShadowPtr, args&: SrcOriginPtr) =
4719 getShadowOriginPtr(Addr: Src, IRB, ShadowTy: SrcShadowTy, Alignment, /*isStore*/ false);
4720
4721 SmallVector<Value *, 2> ShadowArgs;
4722 ShadowArgs.append(NumInputs: 1, Elt: SrcShadowPtr);
4723 ShadowArgs.append(NumInputs: 1, Elt: Mask);
4724
4725 CallInst *CI = IRB.CreateIntrinsicWithoutFolding(
4726 RetTy: I.getType(), ID: I.getIntrinsicID(), Args: ShadowArgs);
4727 // The AVX masked load intrinsics do not have integer variants. We use the
4728 // floating-point variants, which will happily copy the shadows even if
4729 // they are interpreted as "invalid" floating-point values (NaN etc.).
4730 setShadow(V: &I, SV: IRB.CreateBitCast(V: CI, DestTy: getShadowTy(V: &I)));
4731
4732 if (!MS.TrackOrigins)
4733 return;
4734
4735 // The "pass-through" value is always zero (initialized). To the extent
4736 // that that results in initialized aligned 4-byte chunks, the origin value
4737 // is ignored. It is therefore correct to simply copy the origin from src.
4738 Value *PtrSrcOrigin = IRB.CreateLoad(Ty: MS.OriginTy, Ptr: SrcOriginPtr);
4739 setOrigin(V: &I, Origin: PtrSrcOrigin);
4740 }
4741
4742 // Test whether the mask indices are initialized, only checking the bits that
4743 // are actually used.
4744 //
4745 // e.g., if Idx is <32 x i16>, only (log2(32) == 5) bits of each index are
4746 // used/checked.
4747 void maskedCheckAVXIndexShadow(IRBuilder<> &IRB, Value *Idx, Instruction *I) {
4748 assert(isFixedIntVector(Idx));
4749 auto IdxVectorSize =
4750 cast<FixedVectorType>(Val: Idx->getType())->getNumElements();
4751 assert(isPowerOf2_64(IdxVectorSize));
4752
4753 // Compiler isn't smart enough, let's help it
4754 if (isa<Constant>(Val: Idx))
4755 return;
4756
4757 auto *IdxShadow = getShadow(V: Idx);
4758 Value *Truncated = IRB.CreateTrunc(
4759 V: IdxShadow,
4760 DestTy: FixedVectorType::get(ElementType: Type::getIntNTy(C&: *MS.C, N: Log2_64(Value: IdxVectorSize)),
4761 NumElts: IdxVectorSize));
4762 insertCheckShadow(Shadow: Truncated, Origin: getOrigin(V: Idx), OrigIns: I);
4763 }
4764
4765 // Instrument AVX permutation intrinsic.
4766 // We apply the same permutation (argument index 1) to the shadow.
4767 void handleAVXVpermilvar(IntrinsicInst &I) {
4768 IRBuilder<> IRB(&I);
4769 Value *Shadow = getShadow(I: &I, i: 0);
4770 maskedCheckAVXIndexShadow(IRB, Idx: I.getArgOperand(i: 1), I: &I);
4771
4772 // Shadows are integer-ish types but some intrinsics require a
4773 // different (e.g., floating-point) type.
4774 Shadow = IRB.CreateBitCast(V: Shadow, DestTy: I.getArgOperand(i: 0)->getType());
4775 CallInst *CI = IRB.CreateIntrinsicWithoutFolding(
4776 RetTy: I.getType(), ID: I.getIntrinsicID(), Args: {Shadow, I.getArgOperand(i: 1)});
4777
4778 setShadow(V: &I, SV: IRB.CreateBitCast(V: CI, DestTy: getShadowTy(V: &I)));
4779 setOriginForNaryOp(I);
4780 }
4781
4782 // Instrument AVX permutation intrinsic.
4783 // We apply the same permutation (argument index 1) to the shadows.
4784 void handleAVXVpermi2var(IntrinsicInst &I) {
4785 assert(I.arg_size() == 3);
4786 assert(isa<FixedVectorType>(I.getArgOperand(0)->getType()));
4787 assert(isa<FixedVectorType>(I.getArgOperand(1)->getType()));
4788 assert(isa<FixedVectorType>(I.getArgOperand(2)->getType()));
4789 [[maybe_unused]] auto ArgVectorSize =
4790 cast<FixedVectorType>(Val: I.getArgOperand(i: 0)->getType())->getNumElements();
4791 assert(cast<FixedVectorType>(I.getArgOperand(1)->getType())
4792 ->getNumElements() == ArgVectorSize);
4793 assert(cast<FixedVectorType>(I.getArgOperand(2)->getType())
4794 ->getNumElements() == ArgVectorSize);
4795 assert(I.getArgOperand(0)->getType() == I.getArgOperand(2)->getType());
4796 assert(I.getType() == I.getArgOperand(0)->getType());
4797 assert(I.getArgOperand(1)->getType()->isIntOrIntVectorTy());
4798 IRBuilder<> IRB(&I);
4799 Value *AShadow = getShadow(I: &I, i: 0);
4800 Value *Idx = I.getArgOperand(i: 1);
4801 Value *BShadow = getShadow(I: &I, i: 2);
4802
4803 maskedCheckAVXIndexShadow(IRB, Idx, I: &I);
4804
4805 // Shadows are integer-ish types but some intrinsics require a
4806 // different (e.g., floating-point) type.
4807 AShadow = IRB.CreateBitCast(V: AShadow, DestTy: I.getArgOperand(i: 0)->getType());
4808 BShadow = IRB.CreateBitCast(V: BShadow, DestTy: I.getArgOperand(i: 2)->getType());
4809 CallInst *CI = IRB.CreateIntrinsicWithoutFolding(
4810 RetTy: I.getType(), ID: I.getIntrinsicID(), Args: {AShadow, Idx, BShadow});
4811 setShadow(V: &I, SV: IRB.CreateBitCast(V: CI, DestTy: getShadowTy(V: &I)));
4812 setOriginForNaryOp(I);
4813 }
4814
4815 [[maybe_unused]] static bool isFixedIntVectorTy(const Type *T) {
4816 return isa<FixedVectorType>(Val: T) && T->isIntOrIntVectorTy();
4817 }
4818
4819 [[maybe_unused]] static bool isFixedFPVectorTy(const Type *T) {
4820 return isa<FixedVectorType>(Val: T) && T->isFPOrFPVectorTy();
4821 }
4822
4823 [[maybe_unused]] static bool isFixedIntVector(const Value *V) {
4824 return isFixedIntVectorTy(T: V->getType());
4825 }
4826
4827 [[maybe_unused]] static bool isFixedFPVector(const Value *V) {
4828 return isFixedFPVectorTy(T: V->getType());
4829 }
4830
4831 // e.g., <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
4832 // (<16 x float> a, <16 x i32> writethru, i16 mask,
4833 // i32 rounding)
4834 //
4835 // Inconveniently, some similar intrinsics have a different operand order:
4836 // <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512
4837 // (<16 x float> a, i32 rounding, <16 x i16> writethru,
4838 // i16 mask)
4839 //
4840 // If the return type has more elements than A, the excess elements are
4841 // zeroed (and the corresponding shadow is initialized).
4842 // <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128
4843 // (<4 x float> a, i32 rounding, <8 x i16> writethru,
4844 // i8 mask)
4845 //
4846 // dst[i] = mask[i] ? convert(a[i]) : writethru[i]
4847 // dst_shadow[i] = mask[i] ? all_or_nothing(a_shadow[i]) : writethru_shadow[i]
4848 // where all_or_nothing(x) is fully uninitialized if x has any
4849 // uninitialized bits
4850 void handleAVX512VectorConvertFPToInt(IntrinsicInst &I, bool LastMask) {
4851 IRBuilder<> IRB(&I);
4852
4853 assert(I.arg_size() == 4);
4854 Value *A = I.getOperand(i_nocapture: 0);
4855 Value *WriteThrough;
4856 Value *Mask;
4857 Value *RoundingMode;
4858 if (LastMask) {
4859 WriteThrough = I.getOperand(i_nocapture: 2);
4860 Mask = I.getOperand(i_nocapture: 3);
4861 RoundingMode = I.getOperand(i_nocapture: 1);
4862 } else {
4863 WriteThrough = I.getOperand(i_nocapture: 1);
4864 Mask = I.getOperand(i_nocapture: 2);
4865 RoundingMode = I.getOperand(i_nocapture: 3);
4866 }
4867
4868 assert(isFixedFPVector(A));
4869 assert(isFixedIntVector(WriteThrough));
4870
4871 unsigned ANumElements =
4872 cast<FixedVectorType>(Val: A->getType())->getNumElements();
4873 [[maybe_unused]] unsigned WriteThruNumElements =
4874 cast<FixedVectorType>(Val: WriteThrough->getType())->getNumElements();
4875 assert(ANumElements == WriteThruNumElements ||
4876 ANumElements * 2 == WriteThruNumElements);
4877
4878 assert(Mask->getType()->isIntegerTy());
4879 unsigned MaskNumElements = Mask->getType()->getScalarSizeInBits();
4880 assert(ANumElements == MaskNumElements ||
4881 ANumElements * 2 == MaskNumElements);
4882
4883 assert(WriteThruNumElements == MaskNumElements);
4884
4885 // Some bits of the mask may be unused, though it's unusual to have partly
4886 // uninitialized bits.
4887 insertCheckShadowOf(Val: Mask, OrigIns: &I);
4888
4889 assert(RoundingMode->getType()->isIntegerTy());
4890 // Only some bits of the rounding mode are used, though it's very
4891 // unusual to have uninitialized bits there (more commonly, it's a
4892 // constant).
4893 insertCheckShadowOf(Val: RoundingMode, OrigIns: &I);
4894
4895 assert(I.getType() == WriteThrough->getType());
4896
4897 Value *AShadow = getShadow(V: A);
4898 AShadow = maybeExtendVectorShadowWithZeros(Shadow: AShadow, I);
4899
4900 if (ANumElements * 2 == MaskNumElements) {
4901 // Ensure that the irrelevant bits of the mask are zero, hence selecting
4902 // from the zeroed shadow instead of the writethrough's shadow.
4903 Mask =
4904 IRB.CreateTrunc(V: Mask, DestTy: IRB.getIntNTy(N: ANumElements), Name: "_ms_mask_trunc");
4905 Mask =
4906 IRB.CreateZExt(V: Mask, DestTy: IRB.getIntNTy(N: MaskNumElements), Name: "_ms_mask_zext");
4907 }
4908
4909 // Convert i16 mask to <16 x i1>
4910 Mask = IRB.CreateBitCast(
4911 V: Mask, DestTy: FixedVectorType::get(ElementType: IRB.getInt1Ty(), NumElts: MaskNumElements),
4912 Name: "_ms_mask_bitcast");
4913
4914 /// For floating-point to integer conversion, the output is:
4915 /// - fully uninitialized if *any* bit of the input is uninitialized
4916 /// - fully ininitialized if all bits of the input are ininitialized
4917 /// We apply the same principle on a per-element basis for vectors.
4918 ///
4919 /// We use the scalar width of the return type instead of A's.
4920 AShadow = IRB.CreateSExt(
4921 V: IRB.CreateICmpNE(LHS: AShadow, RHS: getCleanShadow(OrigTy: AShadow->getType())),
4922 DestTy: getShadowTy(V: &I), Name: "_ms_a_shadow");
4923
4924 Value *WriteThroughShadow = getShadow(V: WriteThrough);
4925 Value *Shadow = IRB.CreateSelect(C: Mask, True: AShadow, False: WriteThroughShadow,
4926 Name: "_ms_writethru_select");
4927
4928 setShadow(V: &I, SV: Shadow);
4929 setOriginForNaryOp(I);
4930 }
4931
4932 static SmallVector<int, 8> getPclmulMask(unsigned Width, bool OddElements) {
4933 SmallVector<int, 8> Mask;
4934 for (unsigned X = OddElements ? 1 : 0; X < Width; X += 2) {
4935 Mask.append(NumInputs: 2, Elt: X);
4936 }
4937 return Mask;
4938 }
4939
4940 // Instrument pclmul intrinsics.
4941 // These intrinsics operate either on odd or on even elements of the input
4942 // vectors, depending on the constant in the 3rd argument, ignoring the rest.
4943 // Replace the unused elements with copies of the used ones, ex:
4944 // (0, 1, 2, 3) -> (0, 0, 2, 2) (even case)
4945 // or
4946 // (0, 1, 2, 3) -> (1, 1, 3, 3) (odd case)
4947 // and then apply the usual shadow combining logic.
4948 void handlePclmulIntrinsic(IntrinsicInst &I) {
4949 IRBuilder<> IRB(&I);
4950 unsigned Width =
4951 cast<FixedVectorType>(Val: I.getArgOperand(i: 0)->getType())->getNumElements();
4952 assert(isa<ConstantInt>(I.getArgOperand(2)) &&
4953 "pclmul 3rd operand must be a constant");
4954 unsigned Imm = cast<ConstantInt>(Val: I.getArgOperand(i: 2))->getZExtValue();
4955 Value *Shuf0 = IRB.CreateShuffleVector(V: getShadow(I: &I, i: 0),
4956 Mask: getPclmulMask(Width, OddElements: Imm & 0x01));
4957 Value *Shuf1 = IRB.CreateShuffleVector(V: getShadow(I: &I, i: 1),
4958 Mask: getPclmulMask(Width, OddElements: Imm & 0x10));
4959 ShadowAndOriginCombiner SOC(this, IRB);
4960 SOC.Add(OpShadow: Shuf0, OpOrigin: getOrigin(I: &I, i: 0));
4961 SOC.Add(OpShadow: Shuf1, OpOrigin: getOrigin(I: &I, i: 1));
4962 SOC.Done(I: &I);
4963 }
4964
4965 // Instrument _mm_*_sd|ss intrinsics
4966 void handleUnarySdSsIntrinsic(IntrinsicInst &I) {
4967 IRBuilder<> IRB(&I);
4968 unsigned Width =
4969 cast<FixedVectorType>(Val: I.getArgOperand(i: 0)->getType())->getNumElements();
4970 Value *First = getShadow(I: &I, i: 0);
4971 Value *Second = getShadow(I: &I, i: 1);
4972 // First element of second operand, remaining elements of first operand
4973 SmallVector<int, 16> Mask;
4974 Mask.push_back(Elt: Width);
4975 for (unsigned i = 1; i < Width; i++)
4976 Mask.push_back(Elt: i);
4977 Value *Shadow = IRB.CreateShuffleVector(V1: First, V2: Second, Mask);
4978
4979 setShadow(V: &I, SV: Shadow);
4980 setOriginForNaryOp(I);
4981 }
4982
4983 void handleVtestIntrinsic(IntrinsicInst &I) {
4984 IRBuilder<> IRB(&I);
4985 Value *Shadow0 = getShadow(I: &I, i: 0);
4986 Value *Shadow1 = getShadow(I: &I, i: 1);
4987 Value *Or = IRB.CreateOr(LHS: Shadow0, RHS: Shadow1);
4988 Value *NZ = IRB.CreateICmpNE(LHS: Or, RHS: Constant::getNullValue(Ty: Or->getType()));
4989 Value *Scalar = convertShadowToScalar(V: NZ, IRB);
4990 Value *Shadow = IRB.CreateZExt(V: Scalar, DestTy: getShadowTy(V: &I));
4991
4992 setShadow(V: &I, SV: Shadow);
4993 setOriginForNaryOp(I);
4994 }
4995
4996 void handleBinarySdSsIntrinsic(IntrinsicInst &I) {
4997 IRBuilder<> IRB(&I);
4998 unsigned Width =
4999 cast<FixedVectorType>(Val: I.getArgOperand(i: 0)->getType())->getNumElements();
5000 Value *First = getShadow(I: &I, i: 0);
5001 Value *Second = getShadow(I: &I, i: 1);
5002 Value *OrShadow = IRB.CreateOr(LHS: First, RHS: Second);
5003 // First element of both OR'd together, remaining elements of first operand
5004 SmallVector<int, 16> Mask;
5005 Mask.push_back(Elt: Width);
5006 for (unsigned i = 1; i < Width; i++)
5007 Mask.push_back(Elt: i);
5008 Value *Shadow = IRB.CreateShuffleVector(V1: First, V2: OrShadow, Mask);
5009
5010 setShadow(V: &I, SV: Shadow);
5011 setOriginForNaryOp(I);
5012 }
5013
5014 // _mm_round_ps / _mm_round_ps.
5015 // Similar to maybeHandleSimpleNomemIntrinsic except
5016 // the second argument is guaranteed to be a constant integer.
5017 void handleRoundPdPsIntrinsic(IntrinsicInst &I) {
5018 assert(I.getArgOperand(0)->getType() == I.getType());
5019 assert(I.arg_size() == 2);
5020 assert(isa<ConstantInt>(I.getArgOperand(1)));
5021
5022 IRBuilder<> IRB(&I);
5023 ShadowAndOriginCombiner SC(this, IRB);
5024 SC.Add(V: I.getArgOperand(i: 0));
5025 SC.Done(I: &I);
5026 }
5027
5028 // Instrument @llvm.abs intrinsic.
5029 //
5030 // e.g., i32 @llvm.abs.i32 (i32 <Src>, i1 <is_int_min_poison>)
5031 // <4 x i32> @llvm.abs.v4i32(<4 x i32> <Src>, i1 <is_int_min_poison>)
5032 void handleAbsIntrinsic(IntrinsicInst &I) {
5033 assert(I.arg_size() == 2);
5034 Value *Src = I.getArgOperand(i: 0);
5035 Value *IsIntMinPoison = I.getArgOperand(i: 1);
5036
5037 assert(I.getType()->isIntOrIntVectorTy());
5038
5039 assert(Src->getType() == I.getType());
5040
5041 assert(IsIntMinPoison->getType()->isIntegerTy());
5042 assert(IsIntMinPoison->getType()->getIntegerBitWidth() == 1);
5043
5044 IRBuilder<> IRB(&I);
5045 Value *SrcShadow = getShadow(V: Src);
5046
5047 APInt MinVal =
5048 APInt::getSignedMinValue(numBits: Src->getType()->getScalarSizeInBits());
5049 Value *MinValVec = ConstantInt::get(Ty: Src->getType(), V: MinVal);
5050 Value *SrcIsMin = IRB.CreateICmp(P: CmpInst::ICMP_EQ, LHS: Src, RHS: MinValVec);
5051
5052 Value *PoisonedShadow = getPoisonedShadow(V: Src);
5053 Value *PoisonedIfIntMinShadow =
5054 IRB.CreateSelect(C: SrcIsMin, True: PoisonedShadow, False: SrcShadow);
5055 Value *Shadow =
5056 IRB.CreateSelect(C: IsIntMinPoison, True: PoisonedIfIntMinShadow, False: SrcShadow);
5057
5058 setShadow(V: &I, SV: Shadow);
5059 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
5060 }
5061
5062 void handleIsFpClass(IntrinsicInst &I) {
5063 IRBuilder<> IRB(&I);
5064 Value *Shadow = getShadow(I: &I, i: 0);
5065 setShadow(V: &I, SV: IRB.CreateICmpNE(LHS: Shadow, RHS: getCleanShadow(V: Shadow)));
5066 setOrigin(V: &I, Origin: getOrigin(I: &I, i: 0));
5067 }
5068
5069 void handleArithmeticWithOverflow(IntrinsicInst &I) {
5070 IRBuilder<> IRB(&I);
5071 Value *Shadow0 = getShadow(I: &I, i: 0);
5072 Value *Shadow1 = getShadow(I: &I, i: 1);
5073 Value *ShadowElt0 = IRB.CreateOr(LHS: Shadow0, RHS: Shadow1);
5074 Value *ShadowElt1 =
5075 IRB.CreateICmpNE(LHS: ShadowElt0, RHS: getCleanShadow(V: ShadowElt0));
5076
5077 Value *Shadow = PoisonValue::get(T: getShadowTy(V: &I));
5078 Shadow = IRB.CreateInsertValue(Agg: Shadow, Val: ShadowElt0, Idxs: 0);
5079 Shadow = IRB.CreateInsertValue(Agg: Shadow, Val: ShadowElt1, Idxs: 1);
5080
5081 setShadow(V: &I, SV: Shadow);
5082 setOriginForNaryOp(I);
5083 }
5084
5085 Value *extractLowerShadow(IRBuilder<> &IRB, Value *V) {
5086 assert(isa<FixedVectorType>(V->getType()));
5087 assert(cast<FixedVectorType>(V->getType())->getNumElements() > 0);
5088 Value *Shadow = getShadow(V);
5089 return IRB.CreateExtractElement(Vec: Shadow,
5090 Idx: ConstantInt::get(Ty: IRB.getInt32Ty(), V: 0));
5091 }
5092
5093 // Handle llvm.x86.avx512.mask.pmov{,s,us}.*.{128,256,512}
5094 //
5095 // e.g., call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512
5096 // (<8 x i64>, <16 x i8>, i8)
5097 // A WriteThru Mask
5098 //
5099 // call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512
5100 // (<16 x i32>, <16 x i8>, i16)
5101 //
5102 // Dst[i] = Mask[i] ? truncate_or_saturate(A[i]) : WriteThru[i]
5103 // Dst_shadow[i] = Mask[i] ? truncate(A_shadow[i]) : WriteThru_shadow[i]
5104 //
5105 // If Dst has more elements than A, the excess elements are zeroed (and the
5106 // corresponding shadow is initialized).
5107 //
5108 // Note: for PMOV (truncation), handleIntrinsicByApplyingToShadow is precise
5109 // and is much faster than this handler.
5110 void handleAVX512VectorDownConvert(IntrinsicInst &I) {
5111 IRBuilder<> IRB(&I);
5112
5113 assert(I.arg_size() == 3);
5114 Value *A = I.getOperand(i_nocapture: 0);
5115 Value *WriteThrough = I.getOperand(i_nocapture: 1);
5116 Value *Mask = I.getOperand(i_nocapture: 2);
5117
5118 assert(isFixedIntVector(A));
5119 assert(isFixedIntVector(WriteThrough));
5120
5121 unsigned ANumElements =
5122 cast<FixedVectorType>(Val: A->getType())->getNumElements();
5123 unsigned OutputNumElements =
5124 cast<FixedVectorType>(Val: WriteThrough->getType())->getNumElements();
5125 assert(ANumElements == OutputNumElements ||
5126 ANumElements * 2 == OutputNumElements);
5127 // N.B. some PMOV{,S,US} instructions have a 4x or even 8x ratio in the
5128 // number of elements e.g.,
5129 // <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256
5130 // (<4 x i64>, <16 x i8>, i8)
5131 // <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128
5132 // (<2 x i64>, <16 x i8>, i8)
5133 // However, we currently handle those elsewhere.
5134
5135 assert(Mask->getType()->isIntegerTy());
5136 insertCheckShadowOf(Val: Mask, OrigIns: &I);
5137
5138 // The mask has 1 bit per element of A, but a minimum of 8 bits.
5139 if (Mask->getType()->getScalarSizeInBits() == 8 && OutputNumElements < 8)
5140 Mask = IRB.CreateTrunc(V: Mask, DestTy: Type::getIntNTy(C&: *MS.C, N: OutputNumElements));
5141 assert(Mask->getType()->getScalarSizeInBits() == ANumElements);
5142
5143 assert(I.getType() == WriteThrough->getType());
5144
5145 // Widen the mask, if necessary, to have one bit per element of the output
5146 // vector.
5147 // We want the extra bits to have '1's, so that the CreateSelect will
5148 // select the values from AShadow instead of WriteThroughShadow ("maskless"
5149 // versions of the intrinsics are sometimes implemented using an all-1's
5150 // mask and an undefined value for WriteThroughShadow). We accomplish this
5151 // by using bitwise NOT before and after the ZExt.
5152 if (ANumElements != OutputNumElements) {
5153 Mask = IRB.CreateNot(V: Mask);
5154 Mask = IRB.CreateZExt(V: Mask, DestTy: Type::getIntNTy(C&: *MS.C, N: OutputNumElements),
5155 Name: "_ms_widen_mask");
5156 Mask = IRB.CreateNot(V: Mask);
5157 }
5158 Mask = IRB.CreateBitCast(
5159 V: Mask, DestTy: FixedVectorType::get(ElementType: IRB.getInt1Ty(), NumElts: OutputNumElements));
5160
5161 Value *AShadow = getShadow(V: A);
5162
5163 // The return type might have more elements than the input.
5164 // Temporarily shrink the return type's number of elements.
5165 VectorType *ShadowType = maybeShrinkVectorShadowType(Src: A, I);
5166
5167 // PMOV truncates; PMOVS/PMOVUS uses signed/unsigned saturation.
5168 // This handler treats them all as truncation, which leads to some rare
5169 // false positives in the cases where the truncated bytes could
5170 // unambiguously saturate the value e.g., if A = ??????10 ????????
5171 // (big-endian), the unsigned saturated byte conversion is 11111111 i.e.,
5172 // fully defined, but the truncated byte is ????????.
5173 //
5174 // TODO: use GetMinMaxUnsigned() to handle saturation precisely.
5175 AShadow = IRB.CreateTrunc(V: AShadow, DestTy: ShadowType, Name: "_ms_trunc_shadow");
5176 AShadow = maybeExtendVectorShadowWithZeros(Shadow: AShadow, I);
5177
5178 Value *WriteThroughShadow = getShadow(V: WriteThrough);
5179
5180 Value *Shadow = IRB.CreateSelect(C: Mask, True: AShadow, False: WriteThroughShadow);
5181 setShadow(V: &I, SV: Shadow);
5182 setOriginForNaryOp(I);
5183 }
5184
5185 // Handle llvm.x86.avx512.* instructions that take vector(s) of floating-point
5186 // values and perform an operation whose shadow propagation should be handled
5187 // as all-or-nothing [*], with masking provided by a vector and a mask
5188 // supplied as an integer.
5189 //
5190 // [*] if all bits of a vector element are initialized, the output is fully
5191 // initialized; otherwise, the output is fully uninitialized
5192 //
5193 // e.g., <16 x float> @llvm.x86.avx512.rsqrt14.ps.512
5194 // (<16 x float>, <16 x float>, i16)
5195 // A WriteThru Mask
5196 //
5197 // <2 x double> @llvm.x86.avx512.rcp14.pd.128
5198 // (<2 x double>, <2 x double>, i8)
5199 // A WriteThru Mask
5200 //
5201 // <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512
5202 // (<8 x double>, i32, <8 x double>, i8, i32)
5203 // A Imm WriteThru Mask Rounding
5204 //
5205 // <16 x float> @llvm.x86.avx512.mask.scalef.ps.512
5206 // (<16 x float>, <16 x float>, <16 x float>, i16, i32)
5207 // WriteThru A B Mask Rnd
5208 //
5209 // All operands other than A, B, ..., and WriteThru (e.g., Mask, Imm,
5210 // Rounding) must be fully initialized.
5211 //
5212 // Dst[i] = Mask[i] ? some_op(A[i], B[i], ...)
5213 // : WriteThru[i]
5214 // Dst_shadow[i] = Mask[i] ? all_or_nothing(A_shadow[i] | B_shadow[i] | ...)
5215 // : WriteThru_shadow[i]
5216 void handleAVX512VectorGenericMaskedFP(IntrinsicInst &I,
5217 SmallVector<unsigned, 4> DataIndices,
5218 unsigned WriteThruIndex,
5219 unsigned MaskIndex) {
5220 IRBuilder<> IRB(&I);
5221
5222 unsigned NumArgs = I.arg_size();
5223
5224 assert(WriteThruIndex < NumArgs);
5225 assert(MaskIndex < NumArgs);
5226 assert(WriteThruIndex != MaskIndex);
5227 Value *WriteThru = I.getOperand(i_nocapture: WriteThruIndex);
5228
5229 unsigned OutputNumElements =
5230 cast<FixedVectorType>(Val: WriteThru->getType())->getNumElements();
5231
5232 assert(DataIndices.size() > 0);
5233
5234 bool isData[16] = {false};
5235 assert(NumArgs <= 16);
5236 for (unsigned i : DataIndices) {
5237 assert(i < NumArgs);
5238 assert(i != WriteThruIndex);
5239 assert(i != MaskIndex);
5240
5241 isData[i] = true;
5242
5243 Value *A = I.getOperand(i_nocapture: i);
5244 assert(isFixedFPVector(A));
5245 [[maybe_unused]] unsigned ANumElements =
5246 cast<FixedVectorType>(Val: A->getType())->getNumElements();
5247 assert(ANumElements == OutputNumElements);
5248 }
5249
5250 Value *Mask = I.getOperand(i_nocapture: MaskIndex);
5251
5252 assert(isFixedFPVector(WriteThru));
5253
5254 for (unsigned i = 0; i < NumArgs; ++i) {
5255 if (!isData[i] && i != WriteThruIndex) {
5256 // Imm, Mask, Rounding etc. are "control" data, hence we require that
5257 // they be fully initialized.
5258 assert(I.getOperand(i)->getType()->isIntegerTy());
5259 insertCheckShadowOf(Val: I.getOperand(i_nocapture: i), OrigIns: &I);
5260 }
5261 }
5262
5263 // The mask has 1 bit per element of A, but a minimum of 8 bits.
5264 if (Mask->getType()->getScalarSizeInBits() == 8 && OutputNumElements < 8)
5265 Mask = IRB.CreateTrunc(V: Mask, DestTy: Type::getIntNTy(C&: *MS.C, N: OutputNumElements));
5266 assert(Mask->getType()->getScalarSizeInBits() == OutputNumElements);
5267
5268 assert(I.getType() == WriteThru->getType());
5269
5270 Mask = IRB.CreateBitCast(
5271 V: Mask, DestTy: FixedVectorType::get(ElementType: IRB.getInt1Ty(), NumElts: OutputNumElements));
5272
5273 Value *DataShadow = nullptr;
5274 for (unsigned i : DataIndices) {
5275 Value *A = I.getOperand(i_nocapture: i);
5276 if (DataShadow)
5277 DataShadow = IRB.CreateOr(LHS: DataShadow, RHS: getShadow(V: A));
5278 else
5279 DataShadow = getShadow(V: A);
5280 }
5281
5282 // All-or-nothing shadow
5283 DataShadow =
5284 IRB.CreateSExt(V: IRB.CreateICmpNE(LHS: DataShadow, RHS: getCleanShadow(V: DataShadow)),
5285 DestTy: DataShadow->getType());
5286
5287 Value *WriteThruShadow = getShadow(V: WriteThru);
5288
5289 Value *Shadow = IRB.CreateSelect(C: Mask, True: DataShadow, False: WriteThruShadow);
5290 setShadow(V: &I, SV: Shadow);
5291
5292 setOriginForNaryOp(I);
5293 }
5294
5295 // For sh.* compiler intrinsics:
5296 // llvm.x86.avx512fp16.mask.{add/sub/mul/div/max/min}.sh.round
5297 // (<8 x half>, <8 x half>, <8 x half>, i8, i32)
5298 // A B WriteThru Mask RoundingMode
5299 //
5300 // DstShadow[0] = Mask[0] ? (AShadow[0] | BShadow[0]) : WriteThruShadow[0]
5301 // DstShadow[1..7] = AShadow[1..7]
5302 void visitGenericScalarHalfwordInst(IntrinsicInst &I) {
5303 IRBuilder<> IRB(&I);
5304
5305 assert(I.arg_size() == 5);
5306 Value *A = I.getOperand(i_nocapture: 0);
5307 Value *B = I.getOperand(i_nocapture: 1);
5308 Value *WriteThrough = I.getOperand(i_nocapture: 2);
5309 Value *Mask = I.getOperand(i_nocapture: 3);
5310 Value *RoundingMode = I.getOperand(i_nocapture: 4);
5311
5312 // Technically, we could probably just check whether the LSB is
5313 // initialized, but intuitively it feels like a partly uninitialized mask
5314 // is unintended, and we should warn the user immediately.
5315 insertCheckShadowOf(Val: Mask, OrigIns: &I);
5316 insertCheckShadowOf(Val: RoundingMode, OrigIns: &I);
5317
5318 assert(isa<FixedVectorType>(A->getType()));
5319 unsigned NumElements =
5320 cast<FixedVectorType>(Val: A->getType())->getNumElements();
5321 assert(NumElements == 8);
5322 assert(A->getType() == B->getType());
5323 assert(B->getType() == WriteThrough->getType());
5324 assert(Mask->getType()->getPrimitiveSizeInBits() == NumElements);
5325 assert(RoundingMode->getType()->isIntegerTy());
5326
5327 Value *ALowerShadow = extractLowerShadow(IRB, V: A);
5328 Value *BLowerShadow = extractLowerShadow(IRB, V: B);
5329
5330 Value *ABLowerShadow = IRB.CreateOr(LHS: ALowerShadow, RHS: BLowerShadow);
5331
5332 Value *WriteThroughLowerShadow = extractLowerShadow(IRB, V: WriteThrough);
5333
5334 Mask = IRB.CreateBitCast(
5335 V: Mask, DestTy: FixedVectorType::get(ElementType: IRB.getInt1Ty(), NumElts: NumElements));
5336 Value *MaskLower =
5337 IRB.CreateExtractElement(Vec: Mask, Idx: ConstantInt::get(Ty: IRB.getInt32Ty(), V: 0));
5338
5339 Value *AShadow = getShadow(V: A);
5340 Value *DstLowerShadow =
5341 IRB.CreateSelect(C: MaskLower, True: ABLowerShadow, False: WriteThroughLowerShadow);
5342 Value *DstShadow = IRB.CreateInsertElement(
5343 Vec: AShadow, NewElt: DstLowerShadow, Idx: ConstantInt::get(Ty: IRB.getInt32Ty(), V: 0),
5344 Name: "_msprop");
5345
5346 setShadow(V: &I, SV: DstShadow);
5347 setOriginForNaryOp(I);
5348 }
5349
5350 // Approximately handle AVX Galois Field Affine Transformation
5351 //
5352 // e.g.,
5353 // <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8>, <16 x i8>, i8)
5354 // <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8>, <32 x i8>, i8)
5355 // <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8>, <64 x i8>, i8)
5356 // Out A x b
5357 // where A and x are packed matrices, b is a vector,
5358 // Out = A * x + b in GF(2)
5359 //
5360 // Multiplication in GF(2) is equivalent to bitwise AND. However, the matrix
5361 // computation also includes a parity calculation.
5362 //
5363 // For the bitwise AND of bits V1 and V2, the exact shadow is:
5364 // Out_Shadow = (V1_Shadow & V2_Shadow)
5365 // | (V1 & V2_Shadow)
5366 // | (V1_Shadow & V2 )
5367 //
5368 // We approximate the shadow of gf2p8affineqb using:
5369 // Out_Shadow = gf2p8affineqb(x_Shadow, A_shadow, 0)
5370 // | gf2p8affineqb(x, A_shadow, 0)
5371 // | gf2p8affineqb(x_Shadow, A, 0)
5372 // | set1_epi8(b_Shadow)
5373 //
5374 // This approximation has false negatives: if an intermediate dot-product
5375 // contains an even number of 1's, the parity is 0.
5376 // It has no false positives.
5377 void handleAVXGF2P8Affine(IntrinsicInst &I) {
5378 IRBuilder<> IRB(&I);
5379
5380 assert(I.arg_size() == 3);
5381 Value *A = I.getOperand(i_nocapture: 0);
5382 Value *X = I.getOperand(i_nocapture: 1);
5383 Value *B = I.getOperand(i_nocapture: 2);
5384
5385 assert(isFixedIntVector(A));
5386 assert(cast<VectorType>(A->getType())
5387 ->getElementType()
5388 ->getScalarSizeInBits() == 8);
5389
5390 assert(A->getType() == X->getType());
5391
5392 assert(B->getType()->isIntegerTy());
5393 assert(B->getType()->getScalarSizeInBits() == 8);
5394
5395 assert(I.getType() == A->getType());
5396
5397 Value *AShadow = getShadow(V: A);
5398 Value *XShadow = getShadow(V: X);
5399 Value *BZeroShadow = getCleanShadow(V: B);
5400
5401 Value *AShadowXShadow = IRB.CreateIntrinsic(
5402 RetTy: I.getType(), ID: I.getIntrinsicID(), Args: {XShadow, AShadow, BZeroShadow});
5403 Value *AShadowX = IRB.CreateIntrinsic(RetTy: I.getType(), ID: I.getIntrinsicID(),
5404 Args: {X, AShadow, BZeroShadow});
5405 Value *XShadowA = IRB.CreateIntrinsic(RetTy: I.getType(), ID: I.getIntrinsicID(),
5406 Args: {XShadow, A, BZeroShadow});
5407
5408 unsigned NumElements = cast<FixedVectorType>(Val: I.getType())->getNumElements();
5409 Value *BShadow = getShadow(V: B);
5410 Value *BBroadcastShadow = getCleanShadow(V: AShadow);
5411 // There is no LLVM IR intrinsic for _mm512_set1_epi8.
5412 // This loop generates a lot of LLVM IR, which we expect that CodeGen will
5413 // lower appropriately (e.g., VPBROADCASTB).
5414 // Besides, b is often a constant, in which case it is fully initialized.
5415 for (unsigned i = 0; i < NumElements; i++)
5416 BBroadcastShadow = IRB.CreateInsertElement(Vec: BBroadcastShadow, NewElt: BShadow, Idx: i);
5417
5418 setShadow(V: &I, SV: IRB.CreateOr(
5419 Ops: {AShadowXShadow, AShadowX, XShadowA, BBroadcastShadow}));
5420 setOriginForNaryOp(I);
5421 }
5422
5423 // Handle Arm NEON vector load intrinsics (vld*).
5424 //
5425 // The WithLane instructions (ld[234]lane) are similar to:
5426 // call {<4 x i32>, <4 x i32>, <4 x i32>}
5427 // @llvm.aarch64.neon.ld3lane.v4i32.p0
5428 // (<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 %lane, ptr
5429 // %A)
5430 //
5431 // The non-WithLane instructions (ld[234], ld1x[234], ld[234]r) are similar
5432 // to:
5433 // call {<8 x i8>, <8 x i8>} @llvm.aarch64.neon.ld2.v8i8.p0(ptr %A)
5434 void handleNEONVectorLoad(IntrinsicInst &I, bool WithLane) {
5435 unsigned int numArgs = I.arg_size();
5436
5437 // Return type is a struct of vectors of integers or floating-point
5438 assert(I.getType()->isStructTy());
5439 [[maybe_unused]] StructType *RetTy = cast<StructType>(Val: I.getType());
5440 assert(RetTy->getNumElements() > 0);
5441 assert(RetTy->getElementType(0)->isIntOrIntVectorTy() ||
5442 RetTy->getElementType(0)->isFPOrFPVectorTy());
5443 for (unsigned int i = 0; i < RetTy->getNumElements(); i++)
5444 assert(RetTy->getElementType(i) == RetTy->getElementType(0));
5445
5446 if (WithLane) {
5447 // 2, 3 or 4 vectors, plus lane number, plus input pointer
5448 assert(4 <= numArgs && numArgs <= 6);
5449
5450 // Return type is a struct of the input vectors
5451 assert(RetTy->getNumElements() + 2 == numArgs);
5452 for (unsigned int i = 0; i < RetTy->getNumElements(); i++)
5453 assert(I.getArgOperand(i)->getType() == RetTy->getElementType(0));
5454 } else {
5455 assert(numArgs == 1);
5456 }
5457
5458 IRBuilder<> IRB(&I);
5459
5460 SmallVector<Value *, 6> ShadowArgs;
5461 if (WithLane) {
5462 for (unsigned int i = 0; i < numArgs - 2; i++)
5463 ShadowArgs.push_back(Elt: getShadow(V: I.getArgOperand(i)));
5464
5465 // Lane number, passed verbatim
5466 Value *LaneNumber = I.getArgOperand(i: numArgs - 2);
5467 ShadowArgs.push_back(Elt: LaneNumber);
5468
5469 // TODO: blend shadow of lane number into output shadow?
5470 insertCheckShadowOf(Val: LaneNumber, OrigIns: &I);
5471 }
5472
5473 Value *Src = I.getArgOperand(i: numArgs - 1);
5474 assert(Src->getType()->isPointerTy() && "Source is not a pointer!");
5475
5476 Type *SrcShadowTy = getShadowTy(V: Src);
5477 auto [SrcShadowPtr, SrcOriginPtr] =
5478 getShadowOriginPtr(Addr: Src, IRB, ShadowTy: SrcShadowTy, Alignment: Align(1), /*isStore*/ false);
5479 ShadowArgs.push_back(Elt: SrcShadowPtr);
5480
5481 // The NEON vector load instructions handled by this function all have
5482 // integer variants. It is easier to use those rather than trying to cast
5483 // a struct of vectors of floats into a struct of vectors of integers.
5484 CallInst *CI = IRB.CreateIntrinsicWithoutFolding(
5485 RetTy: getShadowTy(V: &I), ID: I.getIntrinsicID(), Args: ShadowArgs);
5486 setShadow(V: &I, SV: CI);
5487
5488 if (!MS.TrackOrigins)
5489 return;
5490
5491 Value *PtrSrcOrigin = IRB.CreateLoad(Ty: MS.OriginTy, Ptr: SrcOriginPtr);
5492 setOrigin(V: &I, Origin: PtrSrcOrigin);
5493 }
5494
5495 /// Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
5496 /// and vst{2,3,4}lane).
5497 ///
5498 /// Arm NEON vector store intrinsics have the output address (pointer) as the
5499 /// last argument, with the initial arguments being the inputs (and lane
5500 /// number for vst{2,3,4}lane). They return void.
5501 ///
5502 /// - st4 interleaves the output e.g., st4 (inA, inB, inC, inD, outP) writes
5503 /// abcdabcdabcdabcd... into *outP
5504 /// - st1_x4 is non-interleaved e.g., st1_x4 (inA, inB, inC, inD, outP)
5505 /// writes aaaa...bbbb...cccc...dddd... into *outP
5506 /// - st4lane has arguments of (inA, inB, inC, inD, lane, outP)
5507 /// These instructions can all be instrumented with essentially the same
5508 /// MSan logic, simply by applying the corresponding intrinsic to the shadow.
5509 void handleNEONVectorStoreIntrinsic(IntrinsicInst &I, bool useLane) {
5510 IRBuilder<> IRB(&I);
5511
5512 // Don't use getNumOperands() because it includes the callee
5513 int numArgOperands = I.arg_size();
5514
5515 // The last arg operand is the output (pointer)
5516 assert(numArgOperands >= 1);
5517 Value *Addr = I.getArgOperand(i: numArgOperands - 1);
5518 assert(Addr->getType()->isPointerTy());
5519 int skipTrailingOperands = 1;
5520
5521 if (ClCheckAccessAddress)
5522 insertCheckShadowOf(Val: Addr, OrigIns: &I);
5523
5524 // Second-last operand is the lane number (for vst{2,3,4}lane)
5525 if (useLane) {
5526 skipTrailingOperands++;
5527 assert(numArgOperands >= static_cast<int>(skipTrailingOperands));
5528 assert(isa<IntegerType>(
5529 I.getArgOperand(numArgOperands - skipTrailingOperands)->getType()));
5530 }
5531
5532 SmallVector<Value *, 8> ShadowArgs;
5533 // All the initial operands are the inputs
5534 for (int i = 0; i < numArgOperands - skipTrailingOperands; i++) {
5535 assert(isa<FixedVectorType>(I.getArgOperand(i)->getType()));
5536 Value *Shadow = getShadow(I: &I, i);
5537 ShadowArgs.append(NumInputs: 1, Elt: Shadow);
5538 }
5539
5540 // MSan's GetShadowTy assumes the LHS is the type we want the shadow for
5541 // e.g., for:
5542 // [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5543 // we know the type of the output (and its shadow) is <16 x i8>.
5544 //
5545 // Arm NEON VST is unusual because the last argument is the output address:
5546 // define void @st2_16b(<16 x i8> %A, <16 x i8> %B, ptr %P) {
5547 // call void @llvm.aarch64.neon.st2.v16i8.p0
5548 // (<16 x i8> [[A]], <16 x i8> [[B]], ptr [[P]])
5549 // and we have no type information about P's operand. We must manually
5550 // compute the type (<16 x i8> x 2).
5551 FixedVectorType *OutputVectorTy = FixedVectorType::get(
5552 ElementType: cast<FixedVectorType>(Val: I.getArgOperand(i: 0)->getType())->getElementType(),
5553 NumElts: cast<FixedVectorType>(Val: I.getArgOperand(i: 0)->getType())->getNumElements() *
5554 (numArgOperands - skipTrailingOperands));
5555 Type *OutputShadowTy = getShadowTy(OrigTy: OutputVectorTy);
5556
5557 if (useLane)
5558 ShadowArgs.append(NumInputs: 1,
5559 Elt: I.getArgOperand(i: numArgOperands - skipTrailingOperands));
5560
5561 Value *OutputShadowPtr, *OutputOriginPtr;
5562 // AArch64 NEON does not need alignment (unless OS requires it)
5563 std::tie(args&: OutputShadowPtr, args&: OutputOriginPtr) = getShadowOriginPtr(
5564 Addr, IRB, ShadowTy: OutputShadowTy, Alignment: Align(1), /*isStore*/ true);
5565 ShadowArgs.append(NumInputs: 1, Elt: OutputShadowPtr);
5566
5567 CallInst *CI = IRB.CreateIntrinsicWithoutFolding(
5568 RetTy: IRB.getVoidTy(), ID: I.getIntrinsicID(), Args: ShadowArgs);
5569 setShadow(V: &I, SV: CI);
5570
5571 if (MS.TrackOrigins) {
5572 // TODO: if we modelled the vst* instruction more precisely, we could
5573 // more accurately track the origins (e.g., if both inputs are
5574 // uninitialized for vst2, we currently blame the second input, even
5575 // though part of the output depends only on the first input).
5576 //
5577 // This is particularly imprecise for vst{2,3,4}lane, since only one
5578 // lane of each input is actually copied to the output.
5579 OriginCombiner OC(this, IRB);
5580 for (int i = 0; i < numArgOperands - skipTrailingOperands; i++)
5581 OC.Add(V: I.getArgOperand(i));
5582
5583 const DataLayout &DL = F.getDataLayout();
5584 OC.DoneAndStoreOrigin(TS: DL.getTypeStoreSize(Ty: OutputVectorTy),
5585 OriginPtr: OutputOriginPtr);
5586 }
5587 }
5588
5589 // Integer matrix multiplication:
5590 // - <4 x i32> @llvm.aarch64.neon.{s,u,us}mmla.v4i32.v16i8
5591 // (<4 x i32> %R, <16 x i8> %A, <16 x i8> %B)
5592 // - <4 x i32> is a 2x2 matrix
5593 // - <16 x i8> %A and %B are 2x8 and 8x2 matrices respectively
5594 //
5595 // Floating-point matrix multiplication:
5596 // - <4 x float> @llvm.aarch64.neon.bfmmla
5597 // (<4 x float> %R, <8 x bfloat> %A, <8 x bfloat> %B)
5598 // - <4 x float> is a 2x2 matrix
5599 // - <8 x bfloat> %A and %B are 2x4 and 4x2 matrices respectively
5600 //
5601 // The general shadow propagation approach is:
5602 // 1) get the shadows of the input matrices %A and %B
5603 // 2) map each shadow value to 0x1 if the corresponding value is fully
5604 // initialized, and 0x0 otherwise
5605 // 3) perform a matrix multiplication on the shadows of %A and %B [*].
5606 // The output will be a 2x2 matrix. For each element, a value of 0x8
5607 // (for {s,u,us}mmla) or 0x4 (for bfmmla) means all the corresponding
5608 // inputs were clean; if so, set the shadow to zero, otherwise set to -1.
5609 // 4) blend in the shadow of %R
5610 //
5611 // [*] Since shadows are integral, the obvious approach is to always apply
5612 // ummla to the shadows. Unfortunately, Armv8.2+bf16 supports bfmmla,
5613 // but not ummla. Thus, for bfmmla, our instrumentation reuses bfmmla.
5614 //
5615 // TODO: consider allowing multiplication of zero with an uninitialized value
5616 // to result in an initialized value.
5617 void handleNEONMatrixMultiply(IntrinsicInst &I) {
5618 IRBuilder<> IRB(&I);
5619
5620 assert(I.arg_size() == 3);
5621 Value *R = I.getArgOperand(i: 0);
5622 Value *A = I.getArgOperand(i: 1);
5623 Value *B = I.getArgOperand(i: 2);
5624
5625 assert(I.getType() == R->getType());
5626
5627 assert(isa<FixedVectorType>(R->getType()));
5628 assert(isa<FixedVectorType>(A->getType()));
5629 assert(isa<FixedVectorType>(B->getType()));
5630
5631 FixedVectorType *RTy = cast<FixedVectorType>(Val: R->getType());
5632 FixedVectorType *ATy = cast<FixedVectorType>(Val: A->getType());
5633 FixedVectorType *BTy = cast<FixedVectorType>(Val: B->getType());
5634 assert(ATy->getElementType() == BTy->getElementType());
5635
5636 if (RTy->getElementType()->isIntegerTy()) {
5637 // <4 x i32> @llvm.aarch64.neon.ummla.v4i32.v16i8
5638 // (<4 x i32> %R, <16 x i8> %X, <16 x i8> %Y)
5639 assert(RTy == FixedVectorType::get(IntegerType::get(*MS.C, 32), 4));
5640 assert(ATy == FixedVectorType::get(IntegerType::get(*MS.C, 8), 16));
5641 assert(BTy == FixedVectorType::get(IntegerType::get(*MS.C, 8), 16));
5642 } else {
5643 // <4 x float> @llvm.aarch64.neon.bfmmla
5644 // (<4 x float> %R, <8 x bfloat> %X, <8 x bfloat> %Y)
5645 assert(RTy == FixedVectorType::get(Type::getFloatTy(*MS.C), 4));
5646 assert(ATy == FixedVectorType::get(Type::getBFloatTy(*MS.C), 8));
5647 assert(BTy == FixedVectorType::get(Type::getBFloatTy(*MS.C), 8));
5648 }
5649
5650 Value *ShadowR = getShadow(I: &I, i: 0);
5651 Value *ShadowA = getShadow(I: &I, i: 1);
5652 Value *ShadowB = getShadow(I: &I, i: 2);
5653
5654 Value *ShadowAB;
5655 Value *FullyInit;
5656
5657 if (RTy->getElementType()->isIntegerTy()) {
5658 // If the value is fully initialized, the shadow will be 000...001.
5659 // Otherwise, the shadow will be all zero.
5660 // (This is the opposite of how we typically handle shadows.)
5661 ShadowA = IRB.CreateZExt(V: IRB.CreateICmpEQ(LHS: ShadowA, RHS: getCleanShadow(OrigTy: ATy)),
5662 DestTy: getShadowTy(OrigTy: ATy));
5663 ShadowB = IRB.CreateZExt(V: IRB.CreateICmpEQ(LHS: ShadowB, RHS: getCleanShadow(OrigTy: BTy)),
5664 DestTy: getShadowTy(OrigTy: BTy));
5665 // TODO: the CreateSelect approach used below for floating-point is more
5666 // generic than CreateZExt. Investigate whether it is worthwhile
5667 // unifying the two approaches.
5668
5669 ShadowAB = IRB.CreateIntrinsic(RetTy: RTy, ID: Intrinsic::aarch64_neon_ummla,
5670 Args: {getCleanShadow(OrigTy: RTy), ShadowA, ShadowB});
5671
5672 // ummla multiplies a 2x8 matrix with an 8x2 matrix. If all entries of the
5673 // input matrices are equal to 0x1, all entries of the output matrix will
5674 // be 0x8.
5675 FullyInit = ConstantVector::getSplat(
5676 EC: RTy->getElementCount(), Elt: ConstantInt::get(Ty: RTy->getElementType(), V: 0x8));
5677
5678 ShadowAB = IRB.CreateICmpNE(LHS: ShadowAB, RHS: FullyInit);
5679 } else {
5680 Constant *ABZeros = ConstantVector::getSplat(
5681 EC: ATy->getElementCount(), Elt: ConstantFP::get(Ty: ATy->getElementType(), V: 0));
5682 Constant *ABOnes = ConstantVector::getSplat(
5683 EC: ATy->getElementCount(), Elt: ConstantFP::get(Ty: ATy->getElementType(), V: 1));
5684
5685 // As per the integer case, if the shadow is clean, we store 0x1,
5686 // otherwise we store 0x0 (the opposite of usual shadow arithmetic).
5687 ShadowA = IRB.CreateSelect(C: IRB.CreateICmpEQ(LHS: ShadowA, RHS: getCleanShadow(OrigTy: ATy)),
5688 True: ABOnes, False: ABZeros);
5689 ShadowB = IRB.CreateSelect(C: IRB.CreateICmpEQ(LHS: ShadowB, RHS: getCleanShadow(OrigTy: BTy)),
5690 True: ABOnes, False: ABZeros);
5691
5692 Constant *RZeros = ConstantVector::getSplat(
5693 EC: RTy->getElementCount(), Elt: ConstantFP::get(Ty: RTy->getElementType(), V: 0));
5694
5695 ShadowAB = IRB.CreateIntrinsic(RetTy: RTy, ID: Intrinsic::aarch64_neon_bfmmla,
5696 Args: {RZeros, ShadowA, ShadowB});
5697
5698 // bfmmla multiplies a 2x4 matrix with an 4x2 matrix. If all entries of
5699 // the input matrices are equal to 0x1, all entries of the output matrix
5700 // will be 4.0. (To avoid floating-point error, we check if each entry
5701 // < 3.5.)
5702 FullyInit = ConstantVector::getSplat(
5703 EC: RTy->getElementCount(), Elt: ConstantFP::get(Ty: RTy->getElementType(), V: 3.5));
5704
5705 // FCmpULT: "yields true if either operand is a QNAN or op1 is less than"
5706 // op2"
5707 ShadowAB = IRB.CreateFCmpULT(LHS: ShadowAB, RHS: FullyInit);
5708 }
5709
5710 ShadowR = IRB.CreateICmpNE(LHS: ShadowR, RHS: getCleanShadow(OrigTy: RTy));
5711 ShadowR = IRB.CreateOr(LHS: ShadowAB, RHS: ShadowR);
5712
5713 setShadow(V: &I, SV: IRB.CreateSExt(V: ShadowR, DestTy: getShadowTy(OrigTy: RTy)));
5714
5715 setOriginForNaryOp(I);
5716 }
5717
5718 /// Handle intrinsics by applying the intrinsic to the shadows.
5719 ///
5720 /// The trailing arguments are passed verbatim to the intrinsic, though any
5721 /// uninitialized trailing arguments can also taint the shadow e.g., for an
5722 /// intrinsic with one trailing verbatim argument:
5723 /// out = intrinsic(var1, var2, opType)
5724 /// we compute:
5725 /// shadow[out] =
5726 /// intrinsic(shadow[var1], shadow[var2], opType) | shadow[opType]
5727 ///
5728 /// Typically, shadowIntrinsicID will be specified by the caller to be
5729 /// I.getIntrinsicID(), but the caller can choose to replace it with another
5730 /// intrinsic of the same type.
5731 ///
5732 /// CAUTION: this assumes that the intrinsic will handle arbitrary
5733 /// bit-patterns (for example, if the intrinsic accepts floats for
5734 /// var1, we require that it doesn't care if inputs are NaNs).
5735 ///
5736 /// For example, this can be applied to the Arm NEON vector table intrinsics
5737 /// (tbl{1,2,3,4}).
5738 ///
5739 /// The origin is approximated using setOriginForNaryOp.
5740 void handleIntrinsicByApplyingToShadow(IntrinsicInst &I,
5741 Intrinsic::ID shadowIntrinsicID,
5742 unsigned int trailingVerbatimArgs) {
5743 IRBuilder<> IRB(&I);
5744
5745 assert(trailingVerbatimArgs < I.arg_size());
5746
5747 SmallVector<Value *, 8> ShadowArgs;
5748 // Don't use getNumOperands() because it includes the callee
5749 for (unsigned int i = 0; i < I.arg_size() - trailingVerbatimArgs; i++) {
5750 Value *Shadow = getShadow(I: &I, i);
5751
5752 // Shadows are integer-ish types but some intrinsics require a
5753 // different (e.g., floating-point) type.
5754 ShadowArgs.push_back(
5755 Elt: IRB.CreateBitCast(V: Shadow, DestTy: I.getArgOperand(i)->getType()));
5756 }
5757
5758 for (unsigned int i = I.arg_size() - trailingVerbatimArgs; i < I.arg_size();
5759 i++) {
5760 Value *Arg = I.getArgOperand(i);
5761 ShadowArgs.push_back(Elt: Arg);
5762 }
5763
5764 Value *CI = IRB.CreateIntrinsic(RetTy: I.getType(), ID: shadowIntrinsicID, Args: ShadowArgs);
5765 Value *CombinedShadow = CI;
5766
5767 // Combine the computed shadow with the shadow of trailing args
5768 for (unsigned int i = I.arg_size() - trailingVerbatimArgs; i < I.arg_size();
5769 i++) {
5770 Value *Shadow =
5771 CreateShadowCast(IRB, V: getShadow(I: &I, i), dstTy: CombinedShadow->getType());
5772 CombinedShadow = IRB.CreateOr(LHS: Shadow, RHS: CombinedShadow, Name: "_msprop");
5773 }
5774
5775 setShadow(V: &I, SV: IRB.CreateBitCast(V: CombinedShadow, DestTy: getShadowTy(V: &I)));
5776
5777 setOriginForNaryOp(I);
5778 }
5779
5780 // Approximation only
5781 //
5782 // e.g., <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64)
5783 void handleNEONVectorMultiplyIntrinsic(IntrinsicInst &I) {
5784 assert(I.arg_size() == 2);
5785
5786 handleShadowOr(I);
5787 }
5788
5789 bool maybeHandleCrossPlatformIntrinsic(IntrinsicInst &I) {
5790 switch (I.getIntrinsicID()) {
5791 case Intrinsic::uadd_with_overflow:
5792 case Intrinsic::sadd_with_overflow:
5793 case Intrinsic::usub_with_overflow:
5794 case Intrinsic::ssub_with_overflow:
5795 case Intrinsic::umul_with_overflow:
5796 case Intrinsic::smul_with_overflow:
5797 handleArithmeticWithOverflow(I);
5798 break;
5799 case Intrinsic::abs:
5800 handleAbsIntrinsic(I);
5801 break;
5802 case Intrinsic::bitreverse:
5803 handleIntrinsicByApplyingToShadow(I, shadowIntrinsicID: I.getIntrinsicID(),
5804 /*trailingVerbatimArgs*/ 0);
5805 break;
5806 case Intrinsic::is_fpclass:
5807 handleIsFpClass(I);
5808 break;
5809 case Intrinsic::lifetime_start:
5810 handleLifetimeStart(I);
5811 break;
5812 case Intrinsic::launder_invariant_group:
5813 case Intrinsic::strip_invariant_group:
5814 handleInvariantGroup(I);
5815 break;
5816 case Intrinsic::bswap:
5817 handleBswap(I);
5818 break;
5819 case Intrinsic::ctlz:
5820 case Intrinsic::cttz:
5821 handleCountLeadingTrailingZeros(I);
5822 break;
5823 case Intrinsic::masked_compressstore:
5824 handleMaskedCompressStore(I);
5825 break;
5826 case Intrinsic::masked_expandload:
5827 handleMaskedExpandLoad(I);
5828 break;
5829 case Intrinsic::masked_gather:
5830 handleMaskedGather(I);
5831 break;
5832 case Intrinsic::masked_scatter:
5833 handleMaskedScatter(I);
5834 break;
5835 case Intrinsic::masked_store:
5836 handleMaskedStore(I);
5837 break;
5838 case Intrinsic::masked_load:
5839 handleMaskedLoad(I);
5840 break;
5841 case Intrinsic::vector_reduce_and:
5842 handleVectorReduceAndIntrinsic(I);
5843 break;
5844 case Intrinsic::vector_reduce_or:
5845 handleVectorReduceOrIntrinsic(I);
5846 break;
5847
5848 case Intrinsic::vector_reduce_add:
5849 case Intrinsic::vector_reduce_xor:
5850 case Intrinsic::vector_reduce_mul:
5851 // Signed/Unsigned Min/Max
5852 // TODO: handling similarly to AND/OR may be more precise.
5853 case Intrinsic::vector_reduce_smax:
5854 case Intrinsic::vector_reduce_smin:
5855 case Intrinsic::vector_reduce_umax:
5856 case Intrinsic::vector_reduce_umin:
5857 // TODO: this has no false positives, but arguably we should check that all
5858 // the bits are initialized.
5859 case Intrinsic::vector_reduce_fmax:
5860 case Intrinsic::vector_reduce_fmin:
5861 handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/false);
5862 break;
5863
5864 case Intrinsic::vector_reduce_fadd:
5865 case Intrinsic::vector_reduce_fmul:
5866 handleVectorReduceWithStarterIntrinsic(I);
5867 break;
5868
5869 case Intrinsic::scmp:
5870 case Intrinsic::ucmp: {
5871 handleShadowOr(I);
5872 break;
5873 }
5874
5875 case Intrinsic::fshl:
5876 case Intrinsic::fshr:
5877 handleFunnelShift(I);
5878 break;
5879
5880 case Intrinsic::pdep:
5881 case Intrinsic::pext:
5882 handleGenericBitManipulation(I);
5883 break;
5884
5885 case Intrinsic::is_constant:
5886 // The result of llvm.is.constant() is always defined.
5887 setShadow(V: &I, SV: getCleanShadow(V: &I));
5888 setOrigin(V: &I, Origin: getCleanOrigin());
5889 break;
5890
5891 // The non-saturating versions are handled by visitFPTo[US]IInst().
5892 //
5893 // N.B. some platform-specific intrinsics, such as AArch64 fcvtz[us], are
5894 // lowered to these cross-platform intrinsics.
5895 case Intrinsic::fptosi_sat:
5896 case Intrinsic::fptoui_sat:
5897 handleGenericVectorConvertIntrinsic(I, /*FixedPoint=*/false);
5898 break;
5899
5900 default:
5901 return false;
5902 }
5903
5904 return true;
5905 }
5906
5907 bool maybeHandleX86SIMDIntrinsic(IntrinsicInst &I) {
5908 switch (I.getIntrinsicID()) {
5909 case Intrinsic::x86_sse_stmxcsr:
5910 handleStmxcsr(I);
5911 break;
5912 case Intrinsic::x86_sse_ldmxcsr:
5913 handleLdmxcsr(I);
5914 break;
5915
5916 // Convert Scalar Double Precision Floating-Point Value
5917 // to Unsigned Doubleword Integer
5918 // etc.
5919 case Intrinsic::x86_avx512_vcvtsd2usi64:
5920 case Intrinsic::x86_avx512_vcvtsd2usi32:
5921 case Intrinsic::x86_avx512_vcvtss2usi64:
5922 case Intrinsic::x86_avx512_vcvtss2usi32:
5923 case Intrinsic::x86_avx512_cvttss2usi64:
5924 case Intrinsic::x86_avx512_cvttss2usi:
5925 case Intrinsic::x86_avx512_cvttsd2usi64:
5926 case Intrinsic::x86_avx512_cvttsd2usi:
5927 case Intrinsic::x86_avx512_cvtusi2ss:
5928 case Intrinsic::x86_avx512_cvtusi642sd:
5929 case Intrinsic::x86_avx512_cvtusi642ss:
5930 handleSSEVectorConvertIntrinsic(I, NumUsedElements: 1, HasRoundingMode: true);
5931 break;
5932 case Intrinsic::x86_sse2_cvtsd2si64:
5933 case Intrinsic::x86_sse2_cvtsd2si:
5934 case Intrinsic::x86_sse2_cvtsd2ss:
5935 case Intrinsic::x86_sse2_cvttsd2si64:
5936 case Intrinsic::x86_sse2_cvttsd2si:
5937 case Intrinsic::x86_sse_cvtss2si64:
5938 case Intrinsic::x86_sse_cvtss2si:
5939 case Intrinsic::x86_sse_cvttss2si64:
5940 case Intrinsic::x86_sse_cvttss2si:
5941 handleSSEVectorConvertIntrinsic(I, NumUsedElements: 1);
5942 break;
5943 case Intrinsic::x86_sse_cvtps2pi:
5944 case Intrinsic::x86_sse_cvttps2pi:
5945 handleSSEVectorConvertIntrinsic(I, NumUsedElements: 2);
5946 break;
5947
5948 // TODO:
5949 // <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double>)
5950 // <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64>)
5951 // <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>)
5952
5953 case Intrinsic::x86_vcvtps2ph_128:
5954 case Intrinsic::x86_vcvtps2ph_256: {
5955 handleSSEVectorConvertIntrinsicByProp(I, /*HasRoundingMode=*/true);
5956 break;
5957 }
5958
5959 // Convert Packed Single Precision Floating-Point Values
5960 // to Packed Signed Doubleword Integer Values
5961 //
5962 // <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
5963 // (<16 x float>, <16 x i32>, i16, i32)
5964 case Intrinsic::x86_avx512_mask_cvtps2dq_512:
5965 handleAVX512VectorConvertFPToInt(I, /*LastMask=*/false);
5966 break;
5967
5968 // Convert Packed Double Precision Floating-Point Values
5969 // to Packed Single Precision Floating-Point Values
5970 case Intrinsic::x86_sse2_cvtpd2ps:
5971 case Intrinsic::x86_sse2_cvtps2dq:
5972 case Intrinsic::x86_sse2_cvtpd2dq:
5973 case Intrinsic::x86_sse2_cvttps2dq:
5974 case Intrinsic::x86_sse2_cvttpd2dq:
5975 case Intrinsic::x86_avx_cvt_pd2_ps_256:
5976 case Intrinsic::x86_avx_cvt_ps2dq_256:
5977 case Intrinsic::x86_avx_cvt_pd2dq_256:
5978 case Intrinsic::x86_avx_cvtt_ps2dq_256:
5979 case Intrinsic::x86_avx_cvtt_pd2dq_256: {
5980 handleSSEVectorConvertIntrinsicByProp(I, /*HasRoundingMode=*/false);
5981 break;
5982 }
5983
5984 // Convert Single-Precision FP Value to 16-bit FP Value
5985 // <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512
5986 // (<16 x float>, i32, <16 x i16>, i16)
5987 // <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128
5988 // (<4 x float>, i32, <8 x i16>, i8)
5989 // <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256
5990 // (<8 x float>, i32, <8 x i16>, i8)
5991 case Intrinsic::x86_avx512_mask_vcvtps2ph_512:
5992 case Intrinsic::x86_avx512_mask_vcvtps2ph_256:
5993 case Intrinsic::x86_avx512_mask_vcvtps2ph_128:
5994 handleAVX512VectorConvertFPToInt(I, /*LastMask=*/true);
5995 break;
5996
5997 // Shift Packed Data (Left Logical, Right Arithmetic, Right Logical)
5998 case Intrinsic::x86_avx512_psll_w_512:
5999 case Intrinsic::x86_avx512_psll_d_512:
6000 case Intrinsic::x86_avx512_psll_q_512:
6001 case Intrinsic::x86_avx512_pslli_w_512:
6002 case Intrinsic::x86_avx512_pslli_d_512:
6003 case Intrinsic::x86_avx512_pslli_q_512:
6004 case Intrinsic::x86_avx512_psrl_w_512:
6005 case Intrinsic::x86_avx512_psrl_d_512:
6006 case Intrinsic::x86_avx512_psrl_q_512:
6007 case Intrinsic::x86_avx512_psra_w_512:
6008 case Intrinsic::x86_avx512_psra_d_512:
6009 case Intrinsic::x86_avx512_psra_q_512:
6010 case Intrinsic::x86_avx512_psrli_w_512:
6011 case Intrinsic::x86_avx512_psrli_d_512:
6012 case Intrinsic::x86_avx512_psrli_q_512:
6013 case Intrinsic::x86_avx512_psrai_w_512:
6014 case Intrinsic::x86_avx512_psrai_d_512:
6015 case Intrinsic::x86_avx512_psrai_q_512:
6016 case Intrinsic::x86_avx512_psra_q_256:
6017 case Intrinsic::x86_avx512_psra_q_128:
6018 case Intrinsic::x86_avx512_psrai_q_256:
6019 case Intrinsic::x86_avx512_psrai_q_128:
6020 case Intrinsic::x86_avx2_psll_w:
6021 case Intrinsic::x86_avx2_psll_d:
6022 case Intrinsic::x86_avx2_psll_q:
6023 case Intrinsic::x86_avx2_pslli_w:
6024 case Intrinsic::x86_avx2_pslli_d:
6025 case Intrinsic::x86_avx2_pslli_q:
6026 case Intrinsic::x86_avx2_psrl_w:
6027 case Intrinsic::x86_avx2_psrl_d:
6028 case Intrinsic::x86_avx2_psrl_q:
6029 case Intrinsic::x86_avx2_psra_w:
6030 case Intrinsic::x86_avx2_psra_d:
6031 case Intrinsic::x86_avx2_psrli_w:
6032 case Intrinsic::x86_avx2_psrli_d:
6033 case Intrinsic::x86_avx2_psrli_q:
6034 case Intrinsic::x86_avx2_psrai_w:
6035 case Intrinsic::x86_avx2_psrai_d:
6036 case Intrinsic::x86_sse2_psll_w:
6037 case Intrinsic::x86_sse2_psll_d:
6038 case Intrinsic::x86_sse2_psll_q:
6039 case Intrinsic::x86_sse2_pslli_w:
6040 case Intrinsic::x86_sse2_pslli_d:
6041 case Intrinsic::x86_sse2_pslli_q:
6042 case Intrinsic::x86_sse2_psrl_w:
6043 case Intrinsic::x86_sse2_psrl_d:
6044 case Intrinsic::x86_sse2_psrl_q:
6045 case Intrinsic::x86_sse2_psra_w:
6046 case Intrinsic::x86_sse2_psra_d:
6047 case Intrinsic::x86_sse2_psrli_w:
6048 case Intrinsic::x86_sse2_psrli_d:
6049 case Intrinsic::x86_sse2_psrli_q:
6050 case Intrinsic::x86_sse2_psrai_w:
6051 case Intrinsic::x86_sse2_psrai_d:
6052 case Intrinsic::x86_mmx_psll_w:
6053 case Intrinsic::x86_mmx_psll_d:
6054 case Intrinsic::x86_mmx_psll_q:
6055 case Intrinsic::x86_mmx_pslli_w:
6056 case Intrinsic::x86_mmx_pslli_d:
6057 case Intrinsic::x86_mmx_pslli_q:
6058 case Intrinsic::x86_mmx_psrl_w:
6059 case Intrinsic::x86_mmx_psrl_d:
6060 case Intrinsic::x86_mmx_psrl_q:
6061 case Intrinsic::x86_mmx_psra_w:
6062 case Intrinsic::x86_mmx_psra_d:
6063 case Intrinsic::x86_mmx_psrli_w:
6064 case Intrinsic::x86_mmx_psrli_d:
6065 case Intrinsic::x86_mmx_psrli_q:
6066 case Intrinsic::x86_mmx_psrai_w:
6067 case Intrinsic::x86_mmx_psrai_d:
6068 handleVectorShiftIntrinsic(I, /* Variable */ false);
6069 break;
6070 case Intrinsic::x86_avx2_psllv_d:
6071 case Intrinsic::x86_avx2_psllv_d_256:
6072 case Intrinsic::x86_avx512_psllv_d_512:
6073 case Intrinsic::x86_avx2_psllv_q:
6074 case Intrinsic::x86_avx2_psllv_q_256:
6075 case Intrinsic::x86_avx512_psllv_q_512:
6076 case Intrinsic::x86_avx2_psrlv_d:
6077 case Intrinsic::x86_avx2_psrlv_d_256:
6078 case Intrinsic::x86_avx512_psrlv_d_512:
6079 case Intrinsic::x86_avx2_psrlv_q:
6080 case Intrinsic::x86_avx2_psrlv_q_256:
6081 case Intrinsic::x86_avx512_psrlv_q_512:
6082 case Intrinsic::x86_avx2_psrav_d:
6083 case Intrinsic::x86_avx2_psrav_d_256:
6084 case Intrinsic::x86_avx512_psrav_d_512:
6085 case Intrinsic::x86_avx512_psrav_q_128:
6086 case Intrinsic::x86_avx512_psrav_q_256:
6087 case Intrinsic::x86_avx512_psrav_q_512:
6088 handleVectorShiftIntrinsic(I, /* Variable */ true);
6089 break;
6090
6091 // Pack with Signed/Unsigned Saturation
6092 case Intrinsic::x86_sse2_packsswb_128:
6093 case Intrinsic::x86_sse2_packssdw_128:
6094 case Intrinsic::x86_sse2_packuswb_128:
6095 case Intrinsic::x86_sse41_packusdw:
6096 case Intrinsic::x86_avx2_packsswb:
6097 case Intrinsic::x86_avx2_packssdw:
6098 case Intrinsic::x86_avx2_packuswb:
6099 case Intrinsic::x86_avx2_packusdw:
6100 // e.g., <64 x i8> @llvm.x86.avx512.packsswb.512
6101 // (<32 x i16> %a, <32 x i16> %b)
6102 // <32 x i16> @llvm.x86.avx512.packssdw.512
6103 // (<16 x i32> %a, <16 x i32> %b)
6104 // Note: AVX512 masked variants are auto-upgraded by LLVM.
6105 case Intrinsic::x86_avx512_packsswb_512:
6106 case Intrinsic::x86_avx512_packssdw_512:
6107 case Intrinsic::x86_avx512_packuswb_512:
6108 case Intrinsic::x86_avx512_packusdw_512:
6109 handleVectorPackIntrinsic(I);
6110 break;
6111
6112 case Intrinsic::x86_sse41_pblendvb:
6113 case Intrinsic::x86_sse41_blendvpd:
6114 case Intrinsic::x86_sse41_blendvps:
6115 case Intrinsic::x86_avx_blendv_pd_256:
6116 case Intrinsic::x86_avx_blendv_ps_256:
6117 case Intrinsic::x86_avx2_pblendvb:
6118 handleBlendvIntrinsic(I);
6119 break;
6120
6121 case Intrinsic::x86_avx_dp_ps_256:
6122 case Intrinsic::x86_sse41_dppd:
6123 case Intrinsic::x86_sse41_dpps:
6124 handleDppIntrinsic(I);
6125 break;
6126
6127 case Intrinsic::x86_mmx_packsswb:
6128 case Intrinsic::x86_mmx_packuswb:
6129 handleVectorPackIntrinsic(I, MMXEltSizeInBits: 16);
6130 break;
6131
6132 case Intrinsic::x86_mmx_packssdw:
6133 handleVectorPackIntrinsic(I, MMXEltSizeInBits: 32);
6134 break;
6135
6136 case Intrinsic::x86_mmx_psad_bw:
6137 handleVectorSadIntrinsic(I, IsMMX: true);
6138 break;
6139 case Intrinsic::x86_sse2_psad_bw:
6140 case Intrinsic::x86_avx2_psad_bw:
6141 handleVectorSadIntrinsic(I);
6142 break;
6143
6144 // Multiply and Add Packed Words
6145 // < 4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>)
6146 // < 8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>)
6147 // <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16>, <32 x i16>)
6148 //
6149 // Multiply and Add Packed Signed and Unsigned Bytes
6150 // < 8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>)
6151 // <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>)
6152 // <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8>, <64 x i8>)
6153 //
6154 // These intrinsics are auto-upgraded into non-masked forms:
6155 // < 4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128
6156 // (<8 x i16>, <8 x i16>, <4 x i32>, i8)
6157 // < 8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256
6158 // (<16 x i16>, <16 x i16>, <8 x i32>, i8)
6159 // <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512
6160 // (<32 x i16>, <32 x i16>, <16 x i32>, i16)
6161 // < 8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128
6162 // (<16 x i8>, <16 x i8>, <8 x i16>, i8)
6163 // <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256
6164 // (<32 x i8>, <32 x i8>, <16 x i16>, i16)
6165 // <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512
6166 // (<64 x i8>, <64 x i8>, <32 x i16>, i32)
6167 case Intrinsic::x86_sse2_pmadd_wd:
6168 case Intrinsic::x86_avx2_pmadd_wd:
6169 case Intrinsic::x86_avx512_pmaddw_d_512:
6170 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
6171 case Intrinsic::x86_avx2_pmadd_ub_sw:
6172 case Intrinsic::x86_avx512_pmaddubs_w_512:
6173 handleVectorDotProductIntrinsic(I, /*ReductionFactor=*/2,
6174 /*ZeroPurifies=*/true,
6175 /*EltSizeInBits=*/0,
6176 /*Lanes=*/kBothLanes);
6177 break;
6178
6179 // <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>)
6180 case Intrinsic::x86_ssse3_pmadd_ub_sw:
6181 handleVectorDotProductIntrinsic(I, /*ReductionFactor=*/2,
6182 /*ZeroPurifies=*/true,
6183 /*EltSizeInBits=*/8,
6184 /*Lanes=*/kBothLanes);
6185 break;
6186
6187 // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>)
6188 case Intrinsic::x86_mmx_pmadd_wd:
6189 handleVectorDotProductIntrinsic(I, /*ReductionFactor=*/2,
6190 /*ZeroPurifies=*/true,
6191 /*EltSizeInBits=*/16,
6192 /*Lanes=*/kBothLanes);
6193 break;
6194
6195 // BFloat16 multiply-add to single-precision
6196 // <4 x float> llvm.aarch64.neon.bfmlalt
6197 // (<4 x float>, <8 x bfloat>, <8 x bfloat>)
6198 case Intrinsic::aarch64_neon_bfmlalt:
6199 handleVectorDotProductIntrinsic(I, /*ReductionFactor=*/2,
6200 /*ZeroPurifies=*/false,
6201 /*EltSizeInBits=*/0,
6202 /*Lanes=*/kOddLanes);
6203 break;
6204
6205 // <4 x float> llvm.aarch64.neon.bfmlalb
6206 // (<4 x float>, <8 x bfloat>, <8 x bfloat>)
6207 case Intrinsic::aarch64_neon_bfmlalb:
6208 handleVectorDotProductIntrinsic(I, /*ReductionFactor=*/2,
6209 /*ZeroPurifies=*/false,
6210 /*EltSizeInBits=*/0,
6211 /*Lanes=*/kEvenLanes);
6212 break;
6213
6214 // AVX Vector Neural Network Instructions: bytes
6215 //
6216 // Multiply and Add Signed Bytes
6217 // < 4 x i32> @llvm.x86.avx2.vpdpbssd.128
6218 // (< 4 x i32>, <16 x i8>, <16 x i8>)
6219 // < 8 x i32> @llvm.x86.avx2.vpdpbssd.256
6220 // (< 8 x i32>, <32 x i8>, <32 x i8>)
6221 // <16 x i32> @llvm.x86.avx10.vpdpbssd.512
6222 // (<16 x i32>, <64 x i8>, <64 x i8>)
6223 //
6224 // Multiply and Add Signed Bytes With Saturation
6225 // < 4 x i32> @llvm.x86.avx2.vpdpbssds.128
6226 // (< 4 x i32>, <16 x i8>, <16 x i8>)
6227 // < 8 x i32> @llvm.x86.avx2.vpdpbssds.256
6228 // (< 8 x i32>, <32 x i8>, <32 x i8>)
6229 // <16 x i32> @llvm.x86.avx10.vpdpbssds.512
6230 // (<16 x i32>, <64 x i8>, <64 x i8>)
6231 //
6232 // Multiply and Add Signed and Unsigned Bytes
6233 // < 4 x i32> @llvm.x86.avx2.vpdpbsud.128
6234 // (< 4 x i32>, <16 x i8>, <16 x i8>)
6235 // < 8 x i32> @llvm.x86.avx2.vpdpbsud.256
6236 // (< 8 x i32>, <32 x i8>, <32 x i8>)
6237 // <16 x i32> @llvm.x86.avx10.vpdpbsud.512
6238 // (<16 x i32>, <64 x i8>, <64 x i8>)
6239 //
6240 // Multiply and Add Signed and Unsigned Bytes With Saturation
6241 // < 4 x i32> @llvm.x86.avx2.vpdpbsuds.128
6242 // (< 4 x i32>, <16 x i8>, <16 x i8>)
6243 // < 8 x i32> @llvm.x86.avx2.vpdpbsuds.256
6244 // (< 8 x i32>, <32 x i8>, <32 x i8>)
6245 // <16 x i32> @llvm.x86.avx512.vpdpbusds.512
6246 // (<16 x i32>, <64 x i8>, <64 x i8>)
6247 //
6248 // Multiply and Add Unsigned and Signed Bytes
6249 // < 4 x i32> @llvm.x86.avx512.vpdpbusd.128
6250 // (< 4 x i32>, <16 x i8>, <16 x i8>)
6251 // < 8 x i32> @llvm.x86.avx512.vpdpbusd.256
6252 // (< 8 x i32>, <32 x i8>, <32 x i8>)
6253 // <16 x i32> @llvm.x86.avx512.vpdpbusd.512
6254 // (<16 x i32>, <64 x i8>, <64 x i8>)
6255 //
6256 // Multiply and Add Unsigned and Signed Bytes With Saturation
6257 // < 4 x i32> @llvm.x86.avx512.vpdpbusds.128
6258 // (< 4 x i32>, <16 x i8>, <16 x i8>)
6259 // < 8 x i32> @llvm.x86.avx512.vpdpbusds.256
6260 // (< 8 x i32>, <32 x i8>, <32 x i8>)
6261 // <16 x i32> @llvm.x86.avx10.vpdpbsuds.512
6262 // (<16 x i32>, <64 x i8>, <64 x i8>)
6263 //
6264 // Multiply and Add Unsigned Bytes
6265 // < 4 x i32> @llvm.x86.avx2.vpdpbuud.128
6266 // (< 4 x i32>, <16 x i8>, <16 x i8>)
6267 // < 8 x i32> @llvm.x86.avx2.vpdpbuud.256
6268 // (< 8 x i32>, <32 x i8>, <32 x i8>)
6269 // <16 x i32> @llvm.x86.avx10.vpdpbuud.512
6270 // (<16 x i32>, <64 x i8>, <64 x i8>)
6271 //
6272 // Multiply and Add Unsigned Bytes With Saturation
6273 // < 4 x i32> @llvm.x86.avx2.vpdpbuuds.128
6274 // (< 4 x i32>, <16 x i8>, <16 x i8>)
6275 // < 8 x i32> @llvm.x86.avx2.vpdpbuuds.256
6276 // (< 8 x i32>, <32 x i8>, <32 x i8>)
6277 // <16 x i32> @llvm.x86.avx10.vpdpbuuds.512
6278 // (<16 x i32>, <64 x i8>, <64 x i8>)
6279 //
6280 // These intrinsics are auto-upgraded into non-masked forms:
6281 // <4 x i32> @llvm.x86.avx512.mask.vpdpbusd.128
6282 // (<4 x i32>, <16 x i8>, <16 x i8>, i8)
6283 // <4 x i32> @llvm.x86.avx512.maskz.vpdpbusd.128
6284 // (<4 x i32>, <16 x i8>, <16 x i8>, i8)
6285 // <8 x i32> @llvm.x86.avx512.mask.vpdpbusd.256
6286 // (<8 x i32>, <32 x i8>, <32 x i8>, i8)
6287 // <8 x i32> @llvm.x86.avx512.maskz.vpdpbusd.256
6288 // (<8 x i32>, <32 x i8>, <32 x i8>, i8)
6289 // <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512
6290 // (<16 x i32>, <64 x i8>, <64 x i8>, i16)
6291 // <16 x i32> @llvm.x86.avx512.maskz.vpdpbusd.512
6292 // (<16 x i32>, <64 x i8>, <64 x i8>, i16)
6293 //
6294 // <4 x i32> @llvm.x86.avx512.mask.vpdpbusds.128
6295 // (<4 x i32>, <16 x i8>, <16 x i8>, i8)
6296 // <4 x i32> @llvm.x86.avx512.maskz.vpdpbusds.128
6297 // (<4 x i32>, <16 x i8>, <16 x i8>, i8)
6298 // <8 x i32> @llvm.x86.avx512.mask.vpdpbusds.256
6299 // (<8 x i32>, <32 x i8>, <32 x i8>, i8)
6300 // <8 x i32> @llvm.x86.avx512.maskz.vpdpbusds.256
6301 // (<8 x i32>, <32 x i8>, <32 x i8>, i8)
6302 // <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512
6303 // (<16 x i32>, <64 x i8>, <64 x i8>, i16)
6304 // <16 x i32> @llvm.x86.avx512.maskz.vpdpbusds.512
6305 // (<16 x i32>, <64 x i8>, <64 x i8>, i16)
6306 case Intrinsic::x86_avx512_vpdpbusd_128:
6307 case Intrinsic::x86_avx512_vpdpbusd_256:
6308 case Intrinsic::x86_avx512_vpdpbusd_512:
6309 case Intrinsic::x86_avx512_vpdpbusds_128:
6310 case Intrinsic::x86_avx512_vpdpbusds_256:
6311 case Intrinsic::x86_avx512_vpdpbusds_512:
6312 case Intrinsic::x86_avx2_vpdpbssd_128:
6313 case Intrinsic::x86_avx2_vpdpbssd_256:
6314 case Intrinsic::x86_avx10_vpdpbssd_512:
6315 case Intrinsic::x86_avx2_vpdpbssds_128:
6316 case Intrinsic::x86_avx2_vpdpbssds_256:
6317 case Intrinsic::x86_avx10_vpdpbssds_512:
6318 case Intrinsic::x86_avx2_vpdpbsud_128:
6319 case Intrinsic::x86_avx2_vpdpbsud_256:
6320 case Intrinsic::x86_avx10_vpdpbsud_512:
6321 case Intrinsic::x86_avx2_vpdpbsuds_128:
6322 case Intrinsic::x86_avx2_vpdpbsuds_256:
6323 case Intrinsic::x86_avx10_vpdpbsuds_512:
6324 case Intrinsic::x86_avx2_vpdpbuud_128:
6325 case Intrinsic::x86_avx2_vpdpbuud_256:
6326 case Intrinsic::x86_avx10_vpdpbuud_512:
6327 case Intrinsic::x86_avx2_vpdpbuuds_128:
6328 case Intrinsic::x86_avx2_vpdpbuuds_256:
6329 case Intrinsic::x86_avx10_vpdpbuuds_512:
6330 handleVectorDotProductIntrinsic(I, /*ReductionFactor=*/4,
6331 /*ZeroPurifies=*/true,
6332 /*EltSizeInBits=*/0,
6333 /*Lanes=*/kBothLanes);
6334 break;
6335
6336 // AVX Vector Neural Network Instructions: words
6337 //
6338 // Multiply and Add Signed Word Integers
6339 // < 4 x i32> @llvm.x86.avx512.vpdpwssd.128
6340 // (< 4 x i32>, < 8 x i16>, < 8 x i16>)
6341 // < 8 x i32> @llvm.x86.avx512.vpdpwssd.256
6342 // (< 8 x i32>, <16 x i16>, <16 x i16>)
6343 // <16 x i32> @llvm.x86.avx512.vpdpwssd.512
6344 // (<16 x i32>, <32 x i16>, <32 x i16>)
6345 //
6346 // Multiply and Add Signed Word Integers With Saturation
6347 // < 4 x i32> @llvm.x86.avx512.vpdpwssds.128
6348 // (< 4 x i32>, < 8 x i16>, < 8 x i16>)
6349 // < 8 x i32> @llvm.x86.avx512.vpdpwssds.256
6350 // (< 8 x i32>, <16 x i16>, <16 x i16>)
6351 // <16 x i32> @llvm.x86.avx512.vpdpwssds.512
6352 // (<16 x i32>, <32 x i16>, <32 x i16>)
6353 //
6354 // Multiply and Add Signed and Unsigned Word Integers
6355 // < 4 x i32> @llvm.x86.avx2.vpdpwsud.128
6356 // (< 4 x i32>, < 8 x i16>, < 8 x i16>)
6357 // < 8 x i32> @llvm.x86.avx2.vpdpwsud.256
6358 // (< 8 x i32>, <16 x i16>, <16 x i16>)
6359 // <16 x i32> @llvm.x86.avx10.vpdpwsud.512
6360 // (<16 x i32>, <32 x i16>, <32 x i16>)
6361 //
6362 // Multiply and Add Signed and Unsigned Word Integers With Saturation
6363 // < 4 x i32> @llvm.x86.avx2.vpdpwsuds.128
6364 // (< 4 x i32>, < 8 x i16>, < 8 x i16>)
6365 // < 8 x i32> @llvm.x86.avx2.vpdpwsuds.256
6366 // (< 8 x i32>, <16 x i16>, <16 x i16>)
6367 // <16 x i32> @llvm.x86.avx10.vpdpwsuds.512
6368 // (<16 x i32>, <32 x i16>, <32 x i16>)
6369 //
6370 // Multiply and Add Unsigned and Signed Word Integers
6371 // < 4 x i32> @llvm.x86.avx2.vpdpwusd.128
6372 // (< 4 x i32>, < 8 x i16>, < 8 x i16>)
6373 // < 8 x i32> @llvm.x86.avx2.vpdpwusd.256
6374 // (< 8 x i32>, <16 x i16>, <16 x i16>)
6375 // <16 x i32> @llvm.x86.avx10.vpdpwusd.512
6376 // (<16 x i32>, <32 x i16>, <32 x i16>)
6377 //
6378 // Multiply and Add Unsigned and Signed Word Integers With Saturation
6379 // < 4 x i32> @llvm.x86.avx2.vpdpwusds.128
6380 // (< 4 x i32>, < 8 x i16>, < 8 x i16>)
6381 // < 8 x i32> @llvm.x86.avx2.vpdpwusds.256
6382 // (< 8 x i32>, <16 x i16>, <16 x i16>)
6383 // <16 x i32> @llvm.x86.avx10.vpdpwusds.512
6384 // (<16 x i32>, <32 x i16>, <32 x i16>)
6385 //
6386 // Multiply and Add Unsigned and Unsigned Word Integers
6387 // < 4 x i32> @llvm.x86.avx2.vpdpwuud.128
6388 // (< 4 x i32>, < 8 x i16>, < 8 x i16>)
6389 // < 8 x i32> @llvm.x86.avx2.vpdpwuud.256
6390 // (< 8 x i32>, <16 x i16>, <16 x i16>)
6391 // <16 x i32> @llvm.x86.avx10.vpdpwuud.512
6392 // (<16 x i32>, <32 x i16>, <32 x i16>)
6393 //
6394 // Multiply and Add Unsigned and Unsigned Word Integers With Saturation
6395 // < 4 x i32> @llvm.x86.avx2.vpdpwuuds.128
6396 // (< 4 x i32>, < 8 x i16>, < 8 x i16>)
6397 // < 8 x i32> @llvm.x86.avx2.vpdpwuuds.256
6398 // (< 8 x i32>, <16 x i16>, <16 x i16>)
6399 // <16 x i32> @llvm.x86.avx10.vpdpwuuds.512
6400 // (<16 x i32>, <32 x i16>, <32 x i16>)
6401 //
6402 // These intrinsics are auto-upgraded into non-masked forms:
6403 // <4 x i32> @llvm.x86.avx512.mask.vpdpwssd.128
6404 // (<4 x i32>, <8 x i16>, <8 x i16>, i8)
6405 // <4 x i32> @llvm.x86.avx512.maskz.vpdpwssd.128
6406 // (<4 x i32>, <8 x i16>, <8 x i16>, i8)
6407 // <8 x i32> @llvm.x86.avx512.mask.vpdpwssd.256
6408 // (<8 x i32>, <16 x i16>, <16 x i16>, i8)
6409 // <8 x i32> @llvm.x86.avx512.maskz.vpdpwssd.256
6410 // (<8 x i32>, <16 x i16>, <16 x i16>, i8)
6411 // <16 x i32> @llvm.x86.avx512.mask.vpdpwssd.512
6412 // (<16 x i32>, <32 x i16>, <32 x i16>, i16)
6413 // <16 x i32> @llvm.x86.avx512.maskz.vpdpwssd.512
6414 // (<16 x i32>, <32 x i16>, <32 x i16>, i16)
6415 //
6416 // <4 x i32> @llvm.x86.avx512.mask.vpdpwssds.128
6417 // (<4 x i32>, <8 x i16>, <8 x i16>, i8)
6418 // <4 x i32> @llvm.x86.avx512.maskz.vpdpwssds.128
6419 // (<4 x i32>, <8 x i16>, <8 x i16>, i8)
6420 // <8 x i32> @llvm.x86.avx512.mask.vpdpwssds.256
6421 // (<8 x i32>, <16 x i16>, <16 x i16>, i8)
6422 // <8 x i32> @llvm.x86.avx512.maskz.vpdpwssds.256
6423 // (<8 x i32>, <16 x i16>, <16 x i16>, i8)
6424 // <16 x i32> @llvm.x86.avx512.mask.vpdpwssds.512
6425 // (<16 x i32>, <32 x i16>, <32 x i16>, i16)
6426 // <16 x i32> @llvm.x86.avx512.maskz.vpdpwssds.512
6427 // (<16 x i32>, <32 x i16>, <32 x i16>, i16)
6428 case Intrinsic::x86_avx512_vpdpwssd_128:
6429 case Intrinsic::x86_avx512_vpdpwssd_256:
6430 case Intrinsic::x86_avx512_vpdpwssd_512:
6431 case Intrinsic::x86_avx512_vpdpwssds_128:
6432 case Intrinsic::x86_avx512_vpdpwssds_256:
6433 case Intrinsic::x86_avx512_vpdpwssds_512:
6434 case Intrinsic::x86_avx2_vpdpwsud_128:
6435 case Intrinsic::x86_avx2_vpdpwsud_256:
6436 case Intrinsic::x86_avx10_vpdpwsud_512:
6437 case Intrinsic::x86_avx2_vpdpwsuds_128:
6438 case Intrinsic::x86_avx2_vpdpwsuds_256:
6439 case Intrinsic::x86_avx10_vpdpwsuds_512:
6440 case Intrinsic::x86_avx2_vpdpwusd_128:
6441 case Intrinsic::x86_avx2_vpdpwusd_256:
6442 case Intrinsic::x86_avx10_vpdpwusd_512:
6443 case Intrinsic::x86_avx2_vpdpwusds_128:
6444 case Intrinsic::x86_avx2_vpdpwusds_256:
6445 case Intrinsic::x86_avx10_vpdpwusds_512:
6446 case Intrinsic::x86_avx2_vpdpwuud_128:
6447 case Intrinsic::x86_avx2_vpdpwuud_256:
6448 case Intrinsic::x86_avx10_vpdpwuud_512:
6449 case Intrinsic::x86_avx2_vpdpwuuds_128:
6450 case Intrinsic::x86_avx2_vpdpwuuds_256:
6451 case Intrinsic::x86_avx10_vpdpwuuds_512:
6452 handleVectorDotProductIntrinsic(I, /*ReductionFactor=*/2,
6453 /*ZeroPurifies=*/true,
6454 /*EltSizeInBits=*/0,
6455 /*Lanes=*/kBothLanes);
6456 break;
6457
6458 // Dot Product of BF16 Pairs Accumulated Into Packed Single
6459 // Precision
6460 // <4 x float> @llvm.x86.avx512bf16.dpbf16ps.128
6461 // (<4 x float>, <8 x bfloat>, <8 x bfloat>)
6462 // <8 x float> @llvm.x86.avx512bf16.dpbf16ps.256
6463 // (<8 x float>, <16 x bfloat>, <16 x bfloat>)
6464 // <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512
6465 // (<16 x float>, <32 x bfloat>, <32 x bfloat>)
6466 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
6467 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
6468 case Intrinsic::x86_avx512bf16_dpbf16ps_512:
6469 handleVectorDotProductIntrinsic(I, /*ReductionFactor=*/2,
6470 /*ZeroPurifies=*/false,
6471 /*EltSizeInBits=*/0,
6472 /*Lanes=*/kBothLanes);
6473 break;
6474
6475 case Intrinsic::x86_sse_cmp_ss:
6476 case Intrinsic::x86_sse2_cmp_sd:
6477 case Intrinsic::x86_sse_comieq_ss:
6478 case Intrinsic::x86_sse_comilt_ss:
6479 case Intrinsic::x86_sse_comile_ss:
6480 case Intrinsic::x86_sse_comigt_ss:
6481 case Intrinsic::x86_sse_comige_ss:
6482 case Intrinsic::x86_sse_comineq_ss:
6483 case Intrinsic::x86_sse_ucomieq_ss:
6484 case Intrinsic::x86_sse_ucomilt_ss:
6485 case Intrinsic::x86_sse_ucomile_ss:
6486 case Intrinsic::x86_sse_ucomigt_ss:
6487 case Intrinsic::x86_sse_ucomige_ss:
6488 case Intrinsic::x86_sse_ucomineq_ss:
6489 case Intrinsic::x86_sse2_comieq_sd:
6490 case Intrinsic::x86_sse2_comilt_sd:
6491 case Intrinsic::x86_sse2_comile_sd:
6492 case Intrinsic::x86_sse2_comigt_sd:
6493 case Intrinsic::x86_sse2_comige_sd:
6494 case Intrinsic::x86_sse2_comineq_sd:
6495 case Intrinsic::x86_sse2_ucomieq_sd:
6496 case Intrinsic::x86_sse2_ucomilt_sd:
6497 case Intrinsic::x86_sse2_ucomile_sd:
6498 case Intrinsic::x86_sse2_ucomigt_sd:
6499 case Intrinsic::x86_sse2_ucomige_sd:
6500 case Intrinsic::x86_sse2_ucomineq_sd:
6501 handleVectorCompareScalarIntrinsic(I);
6502 break;
6503
6504 case Intrinsic::x86_avx_cmp_pd_256:
6505 case Intrinsic::x86_avx_cmp_ps_256:
6506 case Intrinsic::x86_sse2_cmp_pd:
6507 case Intrinsic::x86_sse_cmp_ps:
6508 handleVectorComparePackedIntrinsic(I, /*PredicateAsOperand=*/true);
6509 break;
6510
6511 case Intrinsic::x86_bmi_bextr_32:
6512 case Intrinsic::x86_bmi_bextr_64:
6513 case Intrinsic::x86_bmi_bzhi_32:
6514 case Intrinsic::x86_bmi_bzhi_64:
6515 handleGenericBitManipulation(I);
6516 break;
6517
6518 case Intrinsic::x86_pclmulqdq:
6519 case Intrinsic::x86_pclmulqdq_256:
6520 case Intrinsic::x86_pclmulqdq_512:
6521 handlePclmulIntrinsic(I);
6522 break;
6523
6524 case Intrinsic::x86_avx_round_pd_256:
6525 case Intrinsic::x86_avx_round_ps_256:
6526 case Intrinsic::x86_sse41_round_pd:
6527 case Intrinsic::x86_sse41_round_ps:
6528 handleRoundPdPsIntrinsic(I);
6529 break;
6530
6531 case Intrinsic::x86_sse41_round_sd:
6532 case Intrinsic::x86_sse41_round_ss:
6533 handleUnarySdSsIntrinsic(I);
6534 break;
6535
6536 case Intrinsic::x86_sse2_max_sd:
6537 case Intrinsic::x86_sse_max_ss:
6538 case Intrinsic::x86_sse2_min_sd:
6539 case Intrinsic::x86_sse_min_ss:
6540 handleBinarySdSsIntrinsic(I);
6541 break;
6542
6543 case Intrinsic::x86_avx_vtestc_pd:
6544 case Intrinsic::x86_avx_vtestc_pd_256:
6545 case Intrinsic::x86_avx_vtestc_ps:
6546 case Intrinsic::x86_avx_vtestc_ps_256:
6547 case Intrinsic::x86_avx_vtestnzc_pd:
6548 case Intrinsic::x86_avx_vtestnzc_pd_256:
6549 case Intrinsic::x86_avx_vtestnzc_ps:
6550 case Intrinsic::x86_avx_vtestnzc_ps_256:
6551 case Intrinsic::x86_avx_vtestz_pd:
6552 case Intrinsic::x86_avx_vtestz_pd_256:
6553 case Intrinsic::x86_avx_vtestz_ps:
6554 case Intrinsic::x86_avx_vtestz_ps_256:
6555 case Intrinsic::x86_avx_ptestc_256:
6556 case Intrinsic::x86_avx_ptestnzc_256:
6557 case Intrinsic::x86_avx_ptestz_256:
6558 case Intrinsic::x86_sse41_ptestc:
6559 case Intrinsic::x86_sse41_ptestnzc:
6560 case Intrinsic::x86_sse41_ptestz:
6561 handleVtestIntrinsic(I);
6562 break;
6563
6564 // Packed Horizontal Add/Subtract
6565 case Intrinsic::x86_ssse3_phadd_w:
6566 case Intrinsic::x86_ssse3_phadd_w_128:
6567 case Intrinsic::x86_ssse3_phsub_w:
6568 case Intrinsic::x86_ssse3_phsub_w_128:
6569 handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1,
6570 /*ReinterpretElemWidth=*/16);
6571 break;
6572
6573 case Intrinsic::x86_avx2_phadd_w:
6574 case Intrinsic::x86_avx2_phsub_w:
6575 handlePairwiseShadowOrIntrinsic(I, /*Shards=*/2,
6576 /*ReinterpretElemWidth=*/16);
6577 break;
6578
6579 // Packed Horizontal Add/Subtract
6580 case Intrinsic::x86_ssse3_phadd_d:
6581 case Intrinsic::x86_ssse3_phadd_d_128:
6582 case Intrinsic::x86_ssse3_phsub_d:
6583 case Intrinsic::x86_ssse3_phsub_d_128:
6584 handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1,
6585 /*ReinterpretElemWidth=*/32);
6586 break;
6587
6588 case Intrinsic::x86_avx2_phadd_d:
6589 case Intrinsic::x86_avx2_phsub_d:
6590 handlePairwiseShadowOrIntrinsic(I, /*Shards=*/2,
6591 /*ReinterpretElemWidth=*/32);
6592 break;
6593
6594 // Packed Horizontal Add/Subtract and Saturate
6595 case Intrinsic::x86_ssse3_phadd_sw:
6596 case Intrinsic::x86_ssse3_phadd_sw_128:
6597 case Intrinsic::x86_ssse3_phsub_sw:
6598 case Intrinsic::x86_ssse3_phsub_sw_128:
6599 handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1,
6600 /*ReinterpretElemWidth=*/16);
6601 break;
6602
6603 case Intrinsic::x86_avx2_phadd_sw:
6604 case Intrinsic::x86_avx2_phsub_sw:
6605 handlePairwiseShadowOrIntrinsic(I, /*Shards=*/2,
6606 /*ReinterpretElemWidth=*/16);
6607 break;
6608
6609 // Packed Single/Double Precision Floating-Point Horizontal Add
6610 case Intrinsic::x86_sse3_hadd_ps:
6611 case Intrinsic::x86_sse3_hadd_pd:
6612 case Intrinsic::x86_sse3_hsub_ps:
6613 case Intrinsic::x86_sse3_hsub_pd:
6614 handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1);
6615 break;
6616
6617 case Intrinsic::x86_avx_hadd_pd_256:
6618 case Intrinsic::x86_avx_hadd_ps_256:
6619 case Intrinsic::x86_avx_hsub_pd_256:
6620 case Intrinsic::x86_avx_hsub_ps_256:
6621 handlePairwiseShadowOrIntrinsic(I, /*Shards=*/2);
6622 break;
6623
6624 case Intrinsic::x86_avx_maskstore_ps:
6625 case Intrinsic::x86_avx_maskstore_pd:
6626 case Intrinsic::x86_avx_maskstore_ps_256:
6627 case Intrinsic::x86_avx_maskstore_pd_256:
6628 case Intrinsic::x86_avx2_maskstore_d:
6629 case Intrinsic::x86_avx2_maskstore_q:
6630 case Intrinsic::x86_avx2_maskstore_d_256:
6631 case Intrinsic::x86_avx2_maskstore_q_256: {
6632 handleAVXMaskedStore(I);
6633 break;
6634 }
6635
6636 case Intrinsic::x86_avx_maskload_ps:
6637 case Intrinsic::x86_avx_maskload_pd:
6638 case Intrinsic::x86_avx_maskload_ps_256:
6639 case Intrinsic::x86_avx_maskload_pd_256:
6640 case Intrinsic::x86_avx2_maskload_d:
6641 case Intrinsic::x86_avx2_maskload_q:
6642 case Intrinsic::x86_avx2_maskload_d_256:
6643 case Intrinsic::x86_avx2_maskload_q_256: {
6644 handleAVXMaskedLoad(I);
6645 break;
6646 }
6647
6648 // Packed
6649 case Intrinsic::x86_avx512fp16_add_ph_512:
6650 case Intrinsic::x86_avx512fp16_sub_ph_512:
6651 case Intrinsic::x86_avx512fp16_mul_ph_512:
6652 case Intrinsic::x86_avx512fp16_div_ph_512:
6653 case Intrinsic::x86_avx512fp16_max_ph_512:
6654 case Intrinsic::x86_avx512fp16_min_ph_512:
6655 case Intrinsic::x86_avx512_min_ps_512:
6656 case Intrinsic::x86_avx512_min_pd_512:
6657 case Intrinsic::x86_avx512_max_ps_512:
6658 case Intrinsic::x86_avx512_max_pd_512: {
6659 // These AVX512 variants contain the rounding mode as a trailing flag.
6660 // Earlier variants do not have a trailing flag and are already handled
6661 // by maybeHandleSimpleNomemIntrinsic(I, 0) via
6662 // maybeHandleUnknownIntrinsic.
6663 [[maybe_unused]] bool Success =
6664 maybeHandleSimpleNomemIntrinsic(I, /*trailingFlags=*/1);
6665 assert(Success);
6666 break;
6667 }
6668
6669 case Intrinsic::x86_avx_vpermilvar_pd:
6670 case Intrinsic::x86_avx_vpermilvar_pd_256:
6671 case Intrinsic::x86_avx512_vpermilvar_pd_512:
6672 case Intrinsic::x86_avx_vpermilvar_ps:
6673 case Intrinsic::x86_avx_vpermilvar_ps_256:
6674 case Intrinsic::x86_avx512_vpermilvar_ps_512: {
6675 handleAVXVpermilvar(I);
6676 break;
6677 }
6678
6679 case Intrinsic::x86_avx512_vpermi2var_d_128:
6680 case Intrinsic::x86_avx512_vpermi2var_d_256:
6681 case Intrinsic::x86_avx512_vpermi2var_d_512:
6682 case Intrinsic::x86_avx512_vpermi2var_hi_128:
6683 case Intrinsic::x86_avx512_vpermi2var_hi_256:
6684 case Intrinsic::x86_avx512_vpermi2var_hi_512:
6685 case Intrinsic::x86_avx512_vpermi2var_pd_128:
6686 case Intrinsic::x86_avx512_vpermi2var_pd_256:
6687 case Intrinsic::x86_avx512_vpermi2var_pd_512:
6688 case Intrinsic::x86_avx512_vpermi2var_ps_128:
6689 case Intrinsic::x86_avx512_vpermi2var_ps_256:
6690 case Intrinsic::x86_avx512_vpermi2var_ps_512:
6691 case Intrinsic::x86_avx512_vpermi2var_q_128:
6692 case Intrinsic::x86_avx512_vpermi2var_q_256:
6693 case Intrinsic::x86_avx512_vpermi2var_q_512:
6694 case Intrinsic::x86_avx512_vpermi2var_qi_128:
6695 case Intrinsic::x86_avx512_vpermi2var_qi_256:
6696 case Intrinsic::x86_avx512_vpermi2var_qi_512:
6697 handleAVXVpermi2var(I);
6698 break;
6699
6700 // Packed Shuffle
6701 // llvm.x86.sse.pshuf.w(<1 x i64>, i8)
6702 // llvm.x86.ssse3.pshuf.b(<1 x i64>, <1 x i64>)
6703 // llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
6704 // llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)
6705 // llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>)
6706 //
6707 // The following intrinsics are auto-upgraded:
6708 // llvm.x86.sse2.pshuf.d(<4 x i32>, i8)
6709 // llvm.x86.sse2.gpshufh.w(<8 x i16>, i8)
6710 // llvm.x86.sse2.pshufl.w(<8 x i16>, i8)
6711 case Intrinsic::x86_avx2_pshuf_b:
6712 case Intrinsic::x86_sse_pshuf_w:
6713 case Intrinsic::x86_ssse3_pshuf_b_128:
6714 case Intrinsic::x86_ssse3_pshuf_b:
6715 case Intrinsic::x86_avx512_pshuf_b_512:
6716 handleIntrinsicByApplyingToShadow(I, shadowIntrinsicID: I.getIntrinsicID(),
6717 /*trailingVerbatimArgs=*/1);
6718 break;
6719
6720 // AVX512 PMOV: Packed MOV, with truncation
6721 // Precisely handled by applying the same intrinsic to the shadow
6722 case Intrinsic::x86_avx512_mask_pmov_dw_128:
6723 case Intrinsic::x86_avx512_mask_pmov_db_128:
6724 case Intrinsic::x86_avx512_mask_pmov_qb_128:
6725 case Intrinsic::x86_avx512_mask_pmov_qw_128:
6726 case Intrinsic::x86_avx512_mask_pmov_qd_128:
6727 case Intrinsic::x86_avx512_mask_pmov_wb_128:
6728 case Intrinsic::x86_avx512_mask_pmov_dw_256:
6729 case Intrinsic::x86_avx512_mask_pmov_db_256:
6730 case Intrinsic::x86_avx512_mask_pmov_qb_256:
6731 case Intrinsic::x86_avx512_mask_pmov_qw_256:
6732 case Intrinsic::x86_avx512_mask_pmov_dw_512:
6733 case Intrinsic::x86_avx512_mask_pmov_db_512:
6734 case Intrinsic::x86_avx512_mask_pmov_qb_512:
6735 case Intrinsic::x86_avx512_mask_pmov_qw_512: {
6736 // Intrinsic::x86_avx512_mask_pmov_{qd,wb}_{256,512} were removed in
6737 // f608dc1f5775ee880e8ea30e2d06ab5a4a935c22
6738 handleIntrinsicByApplyingToShadow(I, shadowIntrinsicID: I.getIntrinsicID(),
6739 /*trailingVerbatimArgs=*/1);
6740 break;
6741 }
6742
6743 // AVX512 PMOV{S,US}: Packed MOV, with signed/unsigned saturation
6744 // Approximately handled using the corresponding truncation intrinsic
6745 // TODO: improve handleAVX512VectorDownConvert to precisely model saturation
6746 case Intrinsic::x86_avx512_mask_pmovs_dw_512:
6747 case Intrinsic::x86_avx512_mask_pmovus_dw_512: {
6748 handleIntrinsicByApplyingToShadow(I,
6749 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_dw_512,
6750 /*trailingVerbatimArgs=*/1);
6751 break;
6752 }
6753
6754 case Intrinsic::x86_avx512_mask_pmovs_dw_256:
6755 case Intrinsic::x86_avx512_mask_pmovus_dw_256:
6756 handleIntrinsicByApplyingToShadow(I,
6757 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_dw_256,
6758 /*trailingVerbatimArgs=*/1);
6759 break;
6760
6761 case Intrinsic::x86_avx512_mask_pmovs_dw_128:
6762 case Intrinsic::x86_avx512_mask_pmovus_dw_128:
6763 handleIntrinsicByApplyingToShadow(I,
6764 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_dw_128,
6765 /*trailingVerbatimArgs=*/1);
6766 break;
6767
6768 case Intrinsic::x86_avx512_mask_pmovs_db_512:
6769 case Intrinsic::x86_avx512_mask_pmovus_db_512: {
6770 handleIntrinsicByApplyingToShadow(I,
6771 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_db_512,
6772 /*trailingVerbatimArgs=*/1);
6773 break;
6774 }
6775
6776 case Intrinsic::x86_avx512_mask_pmovs_db_256:
6777 case Intrinsic::x86_avx512_mask_pmovus_db_256:
6778 handleIntrinsicByApplyingToShadow(I,
6779 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_db_256,
6780 /*trailingVerbatimArgs=*/1);
6781 break;
6782
6783 case Intrinsic::x86_avx512_mask_pmovs_db_128:
6784 case Intrinsic::x86_avx512_mask_pmovus_db_128:
6785 handleIntrinsicByApplyingToShadow(I,
6786 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_db_128,
6787 /*trailingVerbatimArgs=*/1);
6788 break;
6789
6790 case Intrinsic::x86_avx512_mask_pmovs_qb_512:
6791 case Intrinsic::x86_avx512_mask_pmovus_qb_512: {
6792 handleIntrinsicByApplyingToShadow(I,
6793 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_qb_512,
6794 /*trailingVerbatimArgs=*/1);
6795 break;
6796 }
6797
6798 case Intrinsic::x86_avx512_mask_pmovs_qb_256:
6799 case Intrinsic::x86_avx512_mask_pmovus_qb_256:
6800 handleIntrinsicByApplyingToShadow(I,
6801 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_qb_256,
6802 /*trailingVerbatimArgs=*/1);
6803 break;
6804
6805 case Intrinsic::x86_avx512_mask_pmovs_qb_128:
6806 case Intrinsic::x86_avx512_mask_pmovus_qb_128:
6807 handleIntrinsicByApplyingToShadow(I,
6808 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_qb_128,
6809 /*trailingVerbatimArgs=*/1);
6810 break;
6811
6812 case Intrinsic::x86_avx512_mask_pmovs_qw_512:
6813 case Intrinsic::x86_avx512_mask_pmovus_qw_512: {
6814 handleIntrinsicByApplyingToShadow(I,
6815 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_qw_512,
6816 /*trailingVerbatimArgs=*/1);
6817 break;
6818 }
6819
6820 case Intrinsic::x86_avx512_mask_pmovs_qw_256:
6821 case Intrinsic::x86_avx512_mask_pmovus_qw_256:
6822 handleIntrinsicByApplyingToShadow(I,
6823 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_qw_256,
6824 /*trailingVerbatimArgs=*/1);
6825 break;
6826
6827 case Intrinsic::x86_avx512_mask_pmovs_qw_128:
6828 case Intrinsic::x86_avx512_mask_pmovus_qw_128:
6829 handleIntrinsicByApplyingToShadow(I,
6830 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_qw_128,
6831 /*trailingVerbatimArgs=*/1);
6832 break;
6833
6834 case Intrinsic::x86_avx512_mask_pmovs_qd_128:
6835 case Intrinsic::x86_avx512_mask_pmovus_qd_128:
6836 handleIntrinsicByApplyingToShadow(I,
6837 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_qd_128,
6838 /*trailingVerbatimArgs=*/1);
6839 break;
6840
6841 case Intrinsic::x86_avx512_mask_pmovs_wb_128:
6842 case Intrinsic::x86_avx512_mask_pmovus_wb_128:
6843 handleIntrinsicByApplyingToShadow(I,
6844 shadowIntrinsicID: Intrinsic::x86_avx512_mask_pmov_wb_128,
6845 /*trailingVerbatimArgs=*/1);
6846 break;
6847
6848 case Intrinsic::x86_avx512_mask_pmovs_qd_256:
6849 case Intrinsic::x86_avx512_mask_pmovus_qd_256:
6850 case Intrinsic::x86_avx512_mask_pmovs_wb_256:
6851 case Intrinsic::x86_avx512_mask_pmovus_wb_256:
6852 case Intrinsic::x86_avx512_mask_pmovs_qd_512:
6853 case Intrinsic::x86_avx512_mask_pmovus_qd_512:
6854 case Intrinsic::x86_avx512_mask_pmovs_wb_512:
6855 case Intrinsic::x86_avx512_mask_pmovus_wb_512: {
6856 // Since Intrinsic::x86_avx512_mask_pmov_{qd,wb}_{256,512} do not exist,
6857 // we cannot use handleIntrinsicByApplyingToShadow. Instead, we call the
6858 // slow-path handler.
6859 handleAVX512VectorDownConvert(I);
6860 break;
6861 }
6862
6863 // AVX512/AVX10 Reciprocal
6864 // <16 x float> @llvm.x86.avx512.rsqrt14.ps.512
6865 // (<16 x float>, <16 x float>, i16)
6866 // <8 x float> @llvm.x86.avx512.rsqrt14.ps.256
6867 // (<8 x float>, <8 x float>, i8)
6868 // <4 x float> @llvm.x86.avx512.rsqrt14.ps.128
6869 // (<4 x float>, <4 x float>, i8)
6870 //
6871 // <8 x double> @llvm.x86.avx512.rsqrt14.pd.512
6872 // (<8 x double>, <8 x double>, i8)
6873 // <4 x double> @llvm.x86.avx512.rsqrt14.pd.256
6874 // (<4 x double>, <4 x double>, i8)
6875 // <2 x double> @llvm.x86.avx512.rsqrt14.pd.128
6876 // (<2 x double>, <2 x double>, i8)
6877 //
6878 // <32 x bfloat> @llvm.x86.avx10.mask.rsqrt.bf16.512
6879 // (<32 x bfloat>, <32 x bfloat>, i32)
6880 // <16 x bfloat> @llvm.x86.avx10.mask.rsqrt.bf16.256
6881 // (<16 x bfloat>, <16 x bfloat>, i16)
6882 // <8 x bfloat> @llvm.x86.avx10.mask.rsqrt.bf16.128
6883 // (<8 x bfloat>, <8 x bfloat>, i8)
6884 //
6885 // <32 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.512
6886 // (<32 x half>, <32 x half>, i32)
6887 // <16 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.256
6888 // (<16 x half>, <16 x half>, i16)
6889 // <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.ph.128
6890 // (<8 x half>, <8 x half>, i8)
6891 //
6892 // TODO: 3-operand variants are not handled:
6893 // <2 x double> @llvm.x86.avx512.rsqrt14.sd
6894 // (<2 x double>, <2 x double>, <2 x double>, i8)
6895 // <4 x float> @llvm.x86.avx512.rsqrt14.ss
6896 // (<4 x float>, <4 x float>, <4 x float>, i8)
6897 // <8 x half> @llvm.x86.avx512fp16.mask.rsqrt.sh
6898 // (<8 x half>, <8 x half>, <8 x half>, i8)
6899 case Intrinsic::x86_avx512_rsqrt14_ps_512:
6900 case Intrinsic::x86_avx512_rsqrt14_ps_256:
6901 case Intrinsic::x86_avx512_rsqrt14_ps_128:
6902 case Intrinsic::x86_avx512_rsqrt14_pd_512:
6903 case Intrinsic::x86_avx512_rsqrt14_pd_256:
6904 case Intrinsic::x86_avx512_rsqrt14_pd_128:
6905 case Intrinsic::x86_avx10_mask_rsqrt_bf16_512:
6906 case Intrinsic::x86_avx10_mask_rsqrt_bf16_256:
6907 case Intrinsic::x86_avx10_mask_rsqrt_bf16_128:
6908 case Intrinsic::x86_avx512fp16_mask_rsqrt_ph_512:
6909 case Intrinsic::x86_avx512fp16_mask_rsqrt_ph_256:
6910 case Intrinsic::x86_avx512fp16_mask_rsqrt_ph_128:
6911 handleAVX512VectorGenericMaskedFP(I, /*DataIndices=*/{0},
6912 /*WriteThruIndex=*/1,
6913 /*MaskIndex=*/2);
6914 break;
6915
6916 // AVX512/AVX10 Reciprocal Square Root
6917 // <16 x float> @llvm.x86.avx512.rcp14.ps.512
6918 // (<16 x float>, <16 x float>, i16)
6919 // <8 x float> @llvm.x86.avx512.rcp14.ps.256
6920 // (<8 x float>, <8 x float>, i8)
6921 // <4 x float> @llvm.x86.avx512.rcp14.ps.128
6922 // (<4 x float>, <4 x float>, i8)
6923 //
6924 // <8 x double> @llvm.x86.avx512.rcp14.pd.512
6925 // (<8 x double>, <8 x double>, i8)
6926 // <4 x double> @llvm.x86.avx512.rcp14.pd.256
6927 // (<4 x double>, <4 x double>, i8)
6928 // <2 x double> @llvm.x86.avx512.rcp14.pd.128
6929 // (<2 x double>, <2 x double>, i8)
6930 //
6931 // <32 x bfloat> @llvm.x86.avx10.mask.rcp.bf16.512
6932 // (<32 x bfloat>, <32 x bfloat>, i32)
6933 // <16 x bfloat> @llvm.x86.avx10.mask.rcp.bf16.256
6934 // (<16 x bfloat>, <16 x bfloat>, i16)
6935 // <8 x bfloat> @llvm.x86.avx10.mask.rcp.bf16.128
6936 // (<8 x bfloat>, <8 x bfloat>, i8)
6937 //
6938 // <32 x half> @llvm.x86.avx512fp16.mask.rcp.ph.512
6939 // (<32 x half>, <32 x half>, i32)
6940 // <16 x half> @llvm.x86.avx512fp16.mask.rcp.ph.256
6941 // (<16 x half>, <16 x half>, i16)
6942 // <8 x half> @llvm.x86.avx512fp16.mask.rcp.ph.128
6943 // (<8 x half>, <8 x half>, i8)
6944 //
6945 // TODO: 3-operand variants are not handled:
6946 // <2 x double> @llvm.x86.avx512.rcp14.sd
6947 // (<2 x double>, <2 x double>, <2 x double>, i8)
6948 // <4 x float> @llvm.x86.avx512.rcp14.ss
6949 // (<4 x float>, <4 x float>, <4 x float>, i8)
6950 // <8 x half> @llvm.x86.avx512fp16.mask.rcp.sh
6951 // (<8 x half>, <8 x half>, <8 x half>, i8)
6952 case Intrinsic::x86_avx512_rcp14_ps_512:
6953 case Intrinsic::x86_avx512_rcp14_ps_256:
6954 case Intrinsic::x86_avx512_rcp14_ps_128:
6955 case Intrinsic::x86_avx512_rcp14_pd_512:
6956 case Intrinsic::x86_avx512_rcp14_pd_256:
6957 case Intrinsic::x86_avx512_rcp14_pd_128:
6958 case Intrinsic::x86_avx10_mask_rcp_bf16_512:
6959 case Intrinsic::x86_avx10_mask_rcp_bf16_256:
6960 case Intrinsic::x86_avx10_mask_rcp_bf16_128:
6961 case Intrinsic::x86_avx512fp16_mask_rcp_ph_512:
6962 case Intrinsic::x86_avx512fp16_mask_rcp_ph_256:
6963 case Intrinsic::x86_avx512fp16_mask_rcp_ph_128:
6964 handleAVX512VectorGenericMaskedFP(I, /*DataIndices=*/{0},
6965 /*WriteThruIndex=*/1,
6966 /*MaskIndex=*/2);
6967 break;
6968
6969 // <32 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.512
6970 // (<32 x half>, i32, <32 x half>, i32, i32)
6971 // <16 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.256
6972 // (<16 x half>, i32, <16 x half>, i32, i16)
6973 // <8 x half> @llvm.x86.avx512fp16.mask.rndscale.ph.128
6974 // (<8 x half>, i32, <8 x half>, i32, i8)
6975 //
6976 // <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512
6977 // (<16 x float>, i32, <16 x float>, i16, i32)
6978 // <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256
6979 // (<8 x float>, i32, <8 x float>, i8)
6980 // <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128
6981 // (<4 x float>, i32, <4 x float>, i8)
6982 //
6983 // <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512
6984 // (<8 x double>, i32, <8 x double>, i8, i32)
6985 // A Imm WriteThru Mask Rounding
6986 // <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256
6987 // (<4 x double>, i32, <4 x double>, i8)
6988 // <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128
6989 // (<2 x double>, i32, <2 x double>, i8)
6990 // A Imm WriteThru Mask
6991 //
6992 // <32 x bfloat> @llvm.x86.avx10.mask.rndscale.bf16.512
6993 // (<32 x bfloat>, i32, <32 x bfloat>, i32)
6994 // <16 x bfloat> @llvm.x86.avx10.mask.rndscale.bf16.256
6995 // (<16 x bfloat>, i32, <16 x bfloat>, i16)
6996 // <8 x bfloat> @llvm.x86.avx10.mask.rndscale.bf16.128
6997 // (<8 x bfloat>, i32, <8 x bfloat>, i8)
6998 //
6999 // Not supported: three vectors
7000 // - <8 x half> @llvm.x86.avx512fp16.mask.rndscale.sh
7001 // (<8 x half>, <8 x half>,<8 x half>, i8, i32, i32)
7002 // - <4 x float> @llvm.x86.avx512.mask.rndscale.ss
7003 // (<4 x float>, <4 x float>, <4 x float>, i8, i32, i32)
7004 // - <2 x double> @llvm.x86.avx512.mask.rndscale.sd
7005 // (<2 x double>, <2 x double>, <2 x double>, i8, i32,
7006 // i32)
7007 // A B WriteThru Mask Imm
7008 // Rounding
7009 case Intrinsic::x86_avx512fp16_mask_rndscale_ph_512:
7010 case Intrinsic::x86_avx512fp16_mask_rndscale_ph_256:
7011 case Intrinsic::x86_avx512fp16_mask_rndscale_ph_128:
7012 case Intrinsic::x86_avx512_mask_rndscale_ps_512:
7013 case Intrinsic::x86_avx512_mask_rndscale_ps_256:
7014 case Intrinsic::x86_avx512_mask_rndscale_ps_128:
7015 case Intrinsic::x86_avx512_mask_rndscale_pd_512:
7016 case Intrinsic::x86_avx512_mask_rndscale_pd_256:
7017 case Intrinsic::x86_avx512_mask_rndscale_pd_128:
7018 case Intrinsic::x86_avx10_mask_rndscale_bf16_512:
7019 case Intrinsic::x86_avx10_mask_rndscale_bf16_256:
7020 case Intrinsic::x86_avx10_mask_rndscale_bf16_128:
7021 handleAVX512VectorGenericMaskedFP(I, /*DataIndices=*/{0},
7022 /*WriteThruIndex=*/2,
7023 /*MaskIndex=*/3);
7024 break;
7025
7026 // AVX512 Vector Scale Float* Packed
7027 //
7028 // < 8 x double> @llvm.x86.avx512.mask.scalef.pd.512
7029 // (<8 x double>, <8 x double>, <8 x double>, i8, i32)
7030 // A B WriteThru Msk Round
7031 // < 4 x double> @llvm.x86.avx512.mask.scalef.pd.256
7032 // (<4 x double>, <4 x double>, <4 x double>, i8)
7033 // < 2 x double> @llvm.x86.avx512.mask.scalef.pd.128
7034 // (<2 x double>, <2 x double>, <2 x double>, i8)
7035 //
7036 // <16 x float> @llvm.x86.avx512.mask.scalef.ps.512
7037 // (<16 x float>, <16 x float>, <16 x float>, i16, i32)
7038 // < 8 x float> @llvm.x86.avx512.mask.scalef.ps.256
7039 // (<8 x float>, <8 x float>, <8 x float>, i8)
7040 // < 4 x float> @llvm.x86.avx512.mask.scalef.ps.128
7041 // (<4 x float>, <4 x float>, <4 x float>, i8)
7042 //
7043 // <32 x half> @llvm.x86.avx512fp16.mask.scalef.ph.512
7044 // (<32 x half>, <32 x half>, <32 x half>, i32, i32)
7045 // <16 x half> @llvm.x86.avx512fp16.mask.scalef.ph.256
7046 // (<16 x half>, <16 x half>, <16 x half>, i16)
7047 // < 8 x half> @llvm.x86.avx512fp16.mask.scalef.ph.128
7048 // (<8 x half>, <8 x half>, <8 x half>, i8)
7049 //
7050 // TODO: AVX10
7051 // <32 x bfloat> @llvm.x86.avx10.mask.scalef.bf16.512
7052 // (<32 x bfloat>, <32 x bfloat>, <32 x bfloat>, i32)
7053 // <16 x bfloat> @llvm.x86.avx10.mask.scalef.bf16.256
7054 // (<16 x bfloat>, <16 x bfloat>, <16 x bfloat>, i16)
7055 // < 8 x bfloat> @llvm.x86.avx10.mask.scalef.bf16.128
7056 // (<8 x bfloat>, <8 x bfloat>, <8 x bfloat>, i8)
7057 case Intrinsic::x86_avx512_mask_scalef_pd_512:
7058 case Intrinsic::x86_avx512_mask_scalef_pd_256:
7059 case Intrinsic::x86_avx512_mask_scalef_pd_128:
7060 case Intrinsic::x86_avx512_mask_scalef_ps_512:
7061 case Intrinsic::x86_avx512_mask_scalef_ps_256:
7062 case Intrinsic::x86_avx512_mask_scalef_ps_128:
7063 case Intrinsic::x86_avx512fp16_mask_scalef_ph_512:
7064 case Intrinsic::x86_avx512fp16_mask_scalef_ph_256:
7065 case Intrinsic::x86_avx512fp16_mask_scalef_ph_128:
7066 // The AVX512 512-bit operand variants have an extra operand (the
7067 // Rounding mode). The extra operand, if present, will be
7068 // automatically checked by the handler.
7069 handleAVX512VectorGenericMaskedFP(I, /*DataIndices=*/{0, 1},
7070 /*WriteThruIndex=*/2,
7071 /*MaskIndex=*/3);
7072 break;
7073
7074 // TODO: AVX512 Vector Scale Float* Scalar
7075 //
7076 // This is different from the Packed variant, because some bits are copied,
7077 // and some bits are zeroed.
7078 //
7079 // < 4 x float> @llvm.x86.avx512.mask.scalef.ss
7080 // (<4 x float>, <4 x float>, <4 x float>, i8, i32)
7081 //
7082 // < 2 x double> @llvm.x86.avx512.mask.scalef.sd
7083 // (<2 x double>, <2 x double>, <2 x double>, i8, i32)
7084 //
7085 // < 8 x half> @llvm.x86.avx512fp16.mask.scalef.sh
7086 // (<8 x half>, <8 x half>, <8 x half>, i8, i32)
7087
7088 // AVX512 FP16 Arithmetic
7089 case Intrinsic::x86_avx512fp16_mask_add_sh_round:
7090 case Intrinsic::x86_avx512fp16_mask_sub_sh_round:
7091 case Intrinsic::x86_avx512fp16_mask_mul_sh_round:
7092 case Intrinsic::x86_avx512fp16_mask_div_sh_round:
7093 case Intrinsic::x86_avx512fp16_mask_max_sh_round:
7094 case Intrinsic::x86_avx512fp16_mask_min_sh_round: {
7095 visitGenericScalarHalfwordInst(I);
7096 break;
7097 }
7098
7099 // AVX Galois Field New Instructions
7100 case Intrinsic::x86_vgf2p8affineqb_128:
7101 case Intrinsic::x86_vgf2p8affineqb_256:
7102 case Intrinsic::x86_vgf2p8affineqb_512:
7103 handleAVXGF2P8Affine(I);
7104 break;
7105
7106 default:
7107 return false;
7108 }
7109
7110 return true;
7111 }
7112
7113 bool maybeHandleArmSIMDIntrinsic(IntrinsicInst &I) {
7114 switch (I.getIntrinsicID()) {
7115 // Two operands e.g.,
7116 // - <8 x i8> @llvm.aarch64.neon.rshrn.v8i8 (<8 x i16>, i32)
7117 // - <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>)
7118 case Intrinsic::aarch64_neon_rshrn:
7119 case Intrinsic::aarch64_neon_sqrshl:
7120 case Intrinsic::aarch64_neon_sqrshrn:
7121 case Intrinsic::aarch64_neon_sqrshrun:
7122 case Intrinsic::aarch64_neon_sqshl:
7123 case Intrinsic::aarch64_neon_sqshlu:
7124 case Intrinsic::aarch64_neon_sqshrn:
7125 case Intrinsic::aarch64_neon_sqshrun:
7126 case Intrinsic::aarch64_neon_srshl:
7127 case Intrinsic::aarch64_neon_sshl:
7128 case Intrinsic::aarch64_neon_uqrshl:
7129 case Intrinsic::aarch64_neon_uqrshrn:
7130 case Intrinsic::aarch64_neon_uqshl:
7131 case Intrinsic::aarch64_neon_uqshrn:
7132 case Intrinsic::aarch64_neon_urshl:
7133 case Intrinsic::aarch64_neon_ushl:
7134 handleVectorShiftIntrinsic(I, /* Variable */ false);
7135 break;
7136
7137 // Vector Shift Left/Right and Insert
7138 //
7139 // Three operands e.g.,
7140 // - <4 x i16> @llvm.aarch64.neon.vsli.v4i16
7141 // (<4 x i16> %a, <4 x i16> %b, i32 %n)
7142 // - <16 x i8> @llvm.aarch64.neon.vsri.v16i8
7143 // (<16 x i8> %a, <16 x i8> %b, i32 %n)
7144 //
7145 // %b is shifted by %n bits, and the "missing" bits are filled in with %a
7146 // (instead of zero-extending/sign-extending).
7147 case Intrinsic::aarch64_neon_vsli:
7148 case Intrinsic::aarch64_neon_vsri:
7149 handleIntrinsicByApplyingToShadow(I, shadowIntrinsicID: I.getIntrinsicID(),
7150 /*trailingVerbatimArgs=*/1);
7151 break;
7152
7153 // TODO: handling max/min similarly to AND/OR may be more precise
7154 // Floating-Point Maximum/Minimum Pairwise
7155 case Intrinsic::aarch64_neon_fmaxp:
7156 case Intrinsic::aarch64_neon_fminp:
7157 // Floating-Point Maximum/Minimum Number Pairwise
7158 case Intrinsic::aarch64_neon_fmaxnmp:
7159 case Intrinsic::aarch64_neon_fminnmp:
7160 // Signed/Unsigned Maximum/Minimum Pairwise
7161 case Intrinsic::aarch64_neon_smaxp:
7162 case Intrinsic::aarch64_neon_sminp:
7163 case Intrinsic::aarch64_neon_umaxp:
7164 case Intrinsic::aarch64_neon_uminp:
7165 // Add Pairwise
7166 case Intrinsic::aarch64_neon_addp:
7167 // Floating-point Add Pairwise
7168 case Intrinsic::aarch64_neon_faddp:
7169 // Add Long Pairwise
7170 case Intrinsic::aarch64_neon_saddlp:
7171 case Intrinsic::aarch64_neon_uaddlp: {
7172 handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1);
7173 break;
7174 }
7175
7176 // Floating-point Convert to integer, rounding to nearest with ties to Away
7177 case Intrinsic::aarch64_neon_fcvtas:
7178 case Intrinsic::aarch64_neon_fcvtau:
7179 // Floating-point convert to integer, rounding toward minus infinity
7180 case Intrinsic::aarch64_neon_fcvtms:
7181 case Intrinsic::aarch64_neon_fcvtmu:
7182 // Floating-point convert to integer, rounding to nearest with ties to even
7183 case Intrinsic::aarch64_neon_fcvtns:
7184 case Intrinsic::aarch64_neon_fcvtnu:
7185 // Floating-point convert to integer, rounding toward plus infinity
7186 case Intrinsic::aarch64_neon_fcvtps:
7187 case Intrinsic::aarch64_neon_fcvtpu:
7188 // Floating-point Convert to integer, rounding toward Zero
7189 case Intrinsic::aarch64_neon_fcvtzs:
7190 case Intrinsic::aarch64_neon_fcvtzu:
7191 // Floating-point convert to lower precision narrow, rounding to odd
7192 case Intrinsic::aarch64_neon_fcvtxn:
7193 handleGenericVectorConvertIntrinsic(I, /*FixedPoint=*/false);
7194 break;
7195
7196 // Vector Conversions Between Fixed-Point and Floating-Point
7197 case Intrinsic::aarch64_neon_vcvtfxs2fp:
7198 case Intrinsic::aarch64_neon_vcvtfp2fxs:
7199 case Intrinsic::aarch64_neon_vcvtfxu2fp:
7200 case Intrinsic::aarch64_neon_vcvtfp2fxu:
7201 handleGenericVectorConvertIntrinsic(I, /*FixedPoint=*/true);
7202 break;
7203
7204 // TODO: bfloat conversions
7205 // - bfloat @llvm.aarch64.neon.bfcvt(float)
7206 // - <8 x bfloat> @llvm.aarch64.neon.bfcvtn(<4 x float>)
7207 // - <8 x bfloat> @llvm.aarch64.neon.bfcvtn2(<8 x bfloat>, <4 x float>)
7208
7209 // Add reduction to scalar
7210 case Intrinsic::aarch64_neon_faddv:
7211 case Intrinsic::aarch64_neon_saddv:
7212 case Intrinsic::aarch64_neon_uaddv:
7213 // Signed/Unsigned min/max (Vector)
7214 // TODO: handling similarly to AND/OR may be more precise.
7215 case Intrinsic::aarch64_neon_smaxv:
7216 case Intrinsic::aarch64_neon_sminv:
7217 case Intrinsic::aarch64_neon_umaxv:
7218 case Intrinsic::aarch64_neon_uminv:
7219 // Floating-point min/max (vector)
7220 // The f{min,max}"nm"v variants handle NaN differently than f{min,max}v,
7221 // but our shadow propagation is the same.
7222 case Intrinsic::aarch64_neon_fmaxv:
7223 case Intrinsic::aarch64_neon_fminv:
7224 case Intrinsic::aarch64_neon_fmaxnmv:
7225 case Intrinsic::aarch64_neon_fminnmv:
7226 // Sum long across vector
7227 case Intrinsic::aarch64_neon_saddlv:
7228 case Intrinsic::aarch64_neon_uaddlv:
7229 handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/true);
7230 break;
7231
7232 case Intrinsic::aarch64_neon_ld1x2:
7233 case Intrinsic::aarch64_neon_ld1x3:
7234 case Intrinsic::aarch64_neon_ld1x4:
7235 case Intrinsic::aarch64_neon_ld2:
7236 case Intrinsic::aarch64_neon_ld3:
7237 case Intrinsic::aarch64_neon_ld4:
7238 case Intrinsic::aarch64_neon_ld2r:
7239 case Intrinsic::aarch64_neon_ld3r:
7240 case Intrinsic::aarch64_neon_ld4r: {
7241 handleNEONVectorLoad(I, /*WithLane=*/false);
7242 break;
7243 }
7244
7245 case Intrinsic::aarch64_neon_ld2lane:
7246 case Intrinsic::aarch64_neon_ld3lane:
7247 case Intrinsic::aarch64_neon_ld4lane: {
7248 handleNEONVectorLoad(I, /*WithLane=*/true);
7249 break;
7250 }
7251
7252 // Saturating extract narrow
7253 case Intrinsic::aarch64_neon_sqxtn:
7254 case Intrinsic::aarch64_neon_sqxtun:
7255 case Intrinsic::aarch64_neon_uqxtn:
7256 // These only have one argument, but we (ab)use handleShadowOr because it
7257 // does work on single argument intrinsics and will typecast the shadow
7258 // (and update the origin).
7259 handleShadowOr(I);
7260 break;
7261
7262 case Intrinsic::aarch64_neon_st1x2:
7263 case Intrinsic::aarch64_neon_st1x3:
7264 case Intrinsic::aarch64_neon_st1x4:
7265 case Intrinsic::aarch64_neon_st2:
7266 case Intrinsic::aarch64_neon_st3:
7267 case Intrinsic::aarch64_neon_st4: {
7268 handleNEONVectorStoreIntrinsic(I, useLane: false);
7269 break;
7270 }
7271
7272 case Intrinsic::aarch64_neon_st2lane:
7273 case Intrinsic::aarch64_neon_st3lane:
7274 case Intrinsic::aarch64_neon_st4lane: {
7275 handleNEONVectorStoreIntrinsic(I, useLane: true);
7276 break;
7277 }
7278
7279 // Arm NEON vector table intrinsics have the source/table register(s) as
7280 // arguments, followed by the index register. They return the output.
7281 //
7282 // 'TBL writes a zero if an index is out-of-range, while TBX leaves the
7283 // original value unchanged in the destination register.'
7284 // Conveniently, zero denotes a clean shadow, which means out-of-range
7285 // indices for TBL will initialize the user data with zero and also clean
7286 // the shadow. (For TBX, neither the user data nor the shadow will be
7287 // updated, which is also correct.)
7288 case Intrinsic::aarch64_neon_tbl1:
7289 case Intrinsic::aarch64_neon_tbl2:
7290 case Intrinsic::aarch64_neon_tbl3:
7291 case Intrinsic::aarch64_neon_tbl4:
7292 case Intrinsic::aarch64_neon_tbx1:
7293 case Intrinsic::aarch64_neon_tbx2:
7294 case Intrinsic::aarch64_neon_tbx3:
7295 case Intrinsic::aarch64_neon_tbx4: {
7296 // The last trailing argument (index register) should be handled verbatim
7297 handleIntrinsicByApplyingToShadow(
7298 I, /*shadowIntrinsicID=*/I.getIntrinsicID(),
7299 /*trailingVerbatimArgs*/ 1);
7300 break;
7301 }
7302
7303 case Intrinsic::aarch64_neon_fmulx:
7304 case Intrinsic::aarch64_neon_pmul:
7305 case Intrinsic::aarch64_neon_pmull:
7306 case Intrinsic::aarch64_neon_smull:
7307 case Intrinsic::aarch64_neon_pmull64:
7308 case Intrinsic::aarch64_neon_umull: {
7309 handleNEONVectorMultiplyIntrinsic(I);
7310 break;
7311 }
7312
7313 case Intrinsic::aarch64_neon_smmla:
7314 case Intrinsic::aarch64_neon_ummla:
7315 case Intrinsic::aarch64_neon_usmmla:
7316 case Intrinsic::aarch64_neon_bfmmla:
7317 handleNEONMatrixMultiply(I);
7318 break;
7319
7320 // <2 x i32> @llvm.aarch64.neon.{u,s,us}dot.v2i32.v8i8
7321 // (<2 x i32> %acc, <8 x i8> %a, <8 x i8> %b)
7322 // <4 x i32> @llvm.aarch64.neon.{u,s,us}dot.v4i32.v16i8
7323 // (<4 x i32> %acc, <16 x i8> %a, <16 x i8> %b)
7324 case Intrinsic::aarch64_neon_sdot:
7325 case Intrinsic::aarch64_neon_udot:
7326 case Intrinsic::aarch64_neon_usdot:
7327 handleVectorDotProductIntrinsic(I, /*ReductionFactor=*/4,
7328 /*ZeroPurifies=*/true,
7329 /*EltSizeInBits=*/0,
7330 /*Lanes=*/kBothLanes);
7331 break;
7332
7333 // <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v4bf16
7334 // (<2 x float> %acc, <4 x bfloat> %a, <4 x bfloat> %b)
7335 // <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v8bf16
7336 // (<4 x float> %acc, <8 x bfloat> %a, <8 x bfloat> %b)
7337 case Intrinsic::aarch64_neon_bfdot:
7338 handleVectorDotProductIntrinsic(I, /*ReductionFactor=*/2,
7339 /*ZeroPurifies=*/false,
7340 /*EltSizeInBits=*/0,
7341 /*Lanes=*/kBothLanes);
7342 break;
7343
7344 // Floating-Point Absolute Compare Greater Than/Equal
7345 case Intrinsic::aarch64_neon_facge:
7346 case Intrinsic::aarch64_neon_facgt:
7347 handleVectorComparePackedIntrinsic(I, /*PredicateAsOperand=*/false);
7348 break;
7349
7350 default:
7351 return false;
7352 }
7353
7354 return true;
7355 }
7356
7357 void visitIntrinsicInst(IntrinsicInst &I) {
7358 if (maybeHandleCrossPlatformIntrinsic(I))
7359 return;
7360
7361 if (maybeHandleX86SIMDIntrinsic(I))
7362 return;
7363
7364 if (maybeHandleArmSIMDIntrinsic(I))
7365 return;
7366
7367 if (maybeHandleUnknownIntrinsic(I))
7368 return;
7369
7370 visitInstruction(I);
7371 }
7372
7373 void visitLibAtomicLoad(CallBase &CB) {
7374 // Since we use getNextNode here, we can't have CB terminate the BB.
7375 assert(isa<CallInst>(CB));
7376
7377 IRBuilder<> IRB(&CB);
7378 Value *Size = CB.getArgOperand(i: 0);
7379 Value *SrcPtr = CB.getArgOperand(i: 1);
7380 Value *DstPtr = CB.getArgOperand(i: 2);
7381 Value *Ordering = CB.getArgOperand(i: 3);
7382 // Convert the call to have at least Acquire ordering to make sure
7383 // the shadow operations aren't reordered before it.
7384 Value *NewOrdering =
7385 IRB.CreateExtractElement(Vec: makeAddAcquireOrderingTable(IRB), Idx: Ordering);
7386 CB.setArgOperand(i: 3, v: NewOrdering);
7387
7388 NextNodeIRBuilder NextIRB(&CB);
7389 Value *SrcShadowPtr, *SrcOriginPtr;
7390 std::tie(args&: SrcShadowPtr, args&: SrcOriginPtr) =
7391 getShadowOriginPtr(Addr: SrcPtr, IRB&: NextIRB, ShadowTy: NextIRB.getInt8Ty(), Alignment: Align(1),
7392 /*isStore*/ false);
7393 Value *DstShadowPtr =
7394 getShadowOriginPtr(Addr: DstPtr, IRB&: NextIRB, ShadowTy: NextIRB.getInt8Ty(), Alignment: Align(1),
7395 /*isStore*/ true)
7396 .first;
7397
7398 NextIRB.CreateMemCpy(Dst: DstShadowPtr, DstAlign: Align(1), Src: SrcShadowPtr, SrcAlign: Align(1), Size);
7399 if (MS.TrackOrigins) {
7400 Value *SrcOrigin = NextIRB.CreateAlignedLoad(Ty: MS.OriginTy, Ptr: SrcOriginPtr,
7401 Align: kMinOriginAlignment);
7402 Value *NewOrigin = updateOrigin(V: SrcOrigin, IRB&: NextIRB);
7403 NextIRB.CreateCall(Callee: MS.MsanSetOriginFn, Args: {DstPtr, Size, NewOrigin});
7404 }
7405 }
7406
7407 void visitLibAtomicStore(CallBase &CB) {
7408 IRBuilder<> IRB(&CB);
7409 Value *Size = CB.getArgOperand(i: 0);
7410 Value *DstPtr = CB.getArgOperand(i: 2);
7411 Value *Ordering = CB.getArgOperand(i: 3);
7412 // Convert the call to have at least Release ordering to make sure
7413 // the shadow operations aren't reordered after it.
7414 Value *NewOrdering =
7415 IRB.CreateExtractElement(Vec: makeAddReleaseOrderingTable(IRB), Idx: Ordering);
7416 CB.setArgOperand(i: 3, v: NewOrdering);
7417
7418 Value *DstShadowPtr =
7419 getShadowOriginPtr(Addr: DstPtr, IRB, ShadowTy: IRB.getInt8Ty(), Alignment: Align(1),
7420 /*isStore*/ true)
7421 .first;
7422
7423 // Atomic store always paints clean shadow/origin. See file header.
7424 IRB.CreateMemSet(Ptr: DstShadowPtr, Val: getCleanShadow(OrigTy: IRB.getInt8Ty()), Size,
7425 Align: Align(1));
7426 }
7427
7428 void visitCallBase(CallBase &CB) {
7429 assert(!CB.getMetadata(LLVMContext::MD_nosanitize));
7430 if (CB.isInlineAsm()) {
7431 // For inline asm (either a call to asm function, or callbr instruction),
7432 // do the usual thing: check argument shadow and mark all outputs as
7433 // clean. Note that any side effects of the inline asm that are not
7434 // immediately visible in its constraints are not handled.
7435 if (ClHandleAsmConservative)
7436 visitAsmInstruction(I&: CB);
7437 else
7438 visitInstruction(I&: CB);
7439 return;
7440 }
7441 LibFunc LF;
7442 if (TLI->getLibFunc(CB, F&: LF)) {
7443 // libatomic.a functions need to have special handling because there isn't
7444 // a good way to intercept them or compile the library with
7445 // instrumentation.
7446 switch (LF) {
7447 case LibFunc_atomic_load:
7448 if (!isa<CallInst>(Val: CB)) {
7449 llvm::errs() << "MSAN -- cannot instrument invoke of libatomic load."
7450 "Ignoring!\n";
7451 break;
7452 }
7453 visitLibAtomicLoad(CB);
7454 return;
7455 case LibFunc_atomic_store:
7456 visitLibAtomicStore(CB);
7457 return;
7458 default:
7459 break;
7460 }
7461 }
7462
7463 if (auto *Call = dyn_cast<CallInst>(Val: &CB)) {
7464 assert(!isa<IntrinsicInst>(Call) && "intrinsics are handled elsewhere");
7465
7466 // We are going to insert code that relies on the fact that the callee
7467 // will become a non-readonly function after it is instrumented by us. To
7468 // prevent this code from being optimized out, mark that function
7469 // non-readonly in advance.
7470 // TODO: We can likely do better than dropping memory() completely here.
7471 AttributeMask B;
7472 B.addAttribute(Val: Attribute::Memory).addAttribute(Val: Attribute::Speculatable);
7473
7474 Call->removeFnAttrs(AttrsToRemove: B);
7475 if (Function *Func = Call->getCalledFunction()) {
7476 Func->removeFnAttrs(Attrs: B);
7477 }
7478
7479 maybeMarkSanitizerLibraryCallNoBuiltin(CI: Call, TLI);
7480 }
7481 IRBuilder<> IRB(&CB);
7482 bool MayCheckCall = MS.EagerChecks;
7483 if (Function *Func = CB.getCalledFunction()) {
7484 // __sanitizer_unaligned_{load,store} functions may be called by users
7485 // and always expects shadows in the TLS. So don't check them.
7486 MayCheckCall &= !Func->getName().starts_with(Prefix: "__sanitizer_unaligned_");
7487 }
7488
7489 unsigned ArgOffset = 0;
7490 LLVM_DEBUG(dbgs() << " CallSite: " << CB << "\n");
7491 for (const auto &[i, A] : llvm::enumerate(First: CB.args())) {
7492 if (!A->getType()->isSized()) {
7493 LLVM_DEBUG(dbgs() << "Arg " << i << " is not sized: " << CB << "\n");
7494 continue;
7495 }
7496
7497 if (A->getType()->isScalableTy()) {
7498 LLVM_DEBUG(dbgs() << "Arg " << i << " is vscale: " << CB << "\n");
7499 // Handle as noundef, but don't reserve tls slots.
7500 insertCheckShadowOf(Val: A, OrigIns: &CB);
7501 continue;
7502 }
7503
7504 unsigned Size = 0;
7505 const DataLayout &DL = F.getDataLayout();
7506
7507 bool ByVal = CB.paramHasAttr(ArgNo: i, Kind: Attribute::ByVal);
7508 bool NoUndef = CB.paramHasAttr(ArgNo: i, Kind: Attribute::NoUndef);
7509 bool EagerCheck = MayCheckCall && !ByVal && NoUndef;
7510
7511 if (EagerCheck) {
7512 insertCheckShadowOf(Val: A, OrigIns: &CB);
7513 Size = DL.getTypeAllocSize(Ty: A->getType());
7514 } else {
7515 [[maybe_unused]] Value *Store = nullptr;
7516 // Compute the Shadow for arg even if it is ByVal, because
7517 // in that case getShadow() will copy the actual arg shadow to
7518 // __msan_param_tls.
7519 Value *ArgShadow = getShadow(V: A);
7520 Value *ArgShadowBase = getShadowPtrForArgument(IRB, ArgOffset);
7521 LLVM_DEBUG(dbgs() << " Arg#" << i << ": " << *A
7522 << " Shadow: " << *ArgShadow << "\n");
7523 if (ByVal) {
7524 // ByVal requires some special handling as it's too big for a single
7525 // load
7526 assert(A->getType()->isPointerTy() &&
7527 "ByVal argument is not a pointer!");
7528 Size = DL.getTypeAllocSize(Ty: CB.getParamByValType(ArgNo: i));
7529 if (ArgOffset + Size > kParamTLSSize)
7530 break;
7531 const MaybeAlign ParamAlignment(CB.getParamAlign(ArgNo: i));
7532 MaybeAlign Alignment = std::nullopt;
7533 if (ParamAlignment)
7534 Alignment = std::min(a: *ParamAlignment, b: kShadowTLSAlignment);
7535 Value *AShadowPtr, *AOriginPtr;
7536 std::tie(args&: AShadowPtr, args&: AOriginPtr) =
7537 getShadowOriginPtr(Addr: A, IRB, ShadowTy: IRB.getInt8Ty(), Alignment,
7538 /*isStore*/ false);
7539 if (!PropagateShadow) {
7540 Store = IRB.CreateMemSet(Ptr: ArgShadowBase,
7541 Val: Constant::getNullValue(Ty: IRB.getInt8Ty()),
7542 Size, Align: Alignment);
7543 } else {
7544 Store = IRB.CreateMemCpy(Dst: ArgShadowBase, DstAlign: Alignment, Src: AShadowPtr,
7545 SrcAlign: Alignment, Size);
7546 if (MS.TrackOrigins) {
7547 Value *ArgOriginBase = getOriginPtrForArgument(IRB, ArgOffset);
7548 // FIXME: OriginSize should be:
7549 // alignTo(A % kMinOriginAlignment + Size, kMinOriginAlignment)
7550 unsigned OriginSize = alignTo(Size, A: kMinOriginAlignment);
7551 IRB.CreateMemCpy(
7552 Dst: ArgOriginBase,
7553 /* by origin_tls[ArgOffset] */ DstAlign: kMinOriginAlignment,
7554 Src: AOriginPtr,
7555 /* by getShadowOriginPtr */ SrcAlign: kMinOriginAlignment, Size: OriginSize);
7556 }
7557 }
7558 } else {
7559 // Any other parameters mean we need bit-grained tracking of uninit
7560 // data
7561 Size = DL.getTypeAllocSize(Ty: A->getType());
7562 if (ArgOffset + Size > kParamTLSSize)
7563 break;
7564 Store = IRB.CreateAlignedStore(Val: ArgShadow, Ptr: ArgShadowBase,
7565 Align: kShadowTLSAlignment);
7566 Constant *Cst = dyn_cast<Constant>(Val: ArgShadow);
7567 if (MS.TrackOrigins && !(Cst && Cst->isNullValue())) {
7568 IRB.CreateStore(Val: getOrigin(V: A),
7569 Ptr: getOriginPtrForArgument(IRB, ArgOffset));
7570 }
7571 }
7572 assert(Store != nullptr);
7573 LLVM_DEBUG(dbgs() << " Param:" << *Store << "\n");
7574 }
7575 assert(Size != 0);
7576 ArgOffset += alignTo(Size, A: kShadowTLSAlignment);
7577 }
7578 LLVM_DEBUG(dbgs() << " done with call args\n");
7579
7580 FunctionType *FT = CB.getFunctionType();
7581 if (FT->isVarArg()) {
7582 VAHelper->visitCallBase(CB, IRB);
7583 }
7584
7585 // Now, get the shadow for the RetVal.
7586 if (!CB.getType()->isSized())
7587 return;
7588 // Don't emit the epilogue for musttail call returns.
7589 if (isa<CallInst>(Val: CB) && cast<CallInst>(Val&: CB).isMustTailCall())
7590 return;
7591
7592 if (MayCheckCall && CB.hasRetAttr(Kind: Attribute::NoUndef)) {
7593 setShadow(V: &CB, SV: getCleanShadow(V: &CB));
7594 setOrigin(V: &CB, Origin: getCleanOrigin());
7595 return;
7596 }
7597
7598 IRBuilder<> IRBBefore(&CB);
7599 // Until we have full dynamic coverage, make sure the retval shadow is 0.
7600 Value *Base = getShadowPtrForRetval(IRB&: IRBBefore);
7601 IRBBefore.CreateAlignedStore(Val: getCleanShadow(V: &CB), Ptr: Base,
7602 Align: kShadowTLSAlignment);
7603 BasicBlock::iterator NextInsn;
7604 if (isa<CallInst>(Val: CB)) {
7605 NextInsn = ++CB.getIterator();
7606 assert(NextInsn != CB.getParent()->end());
7607 } else {
7608 BasicBlock *NormalDest = cast<InvokeInst>(Val&: CB).getNormalDest();
7609 if (!NormalDest->getSinglePredecessor()) {
7610 // FIXME: this case is tricky, so we are just conservative here.
7611 // Perhaps we need to split the edge between this BB and NormalDest,
7612 // but a naive attempt to use SplitEdge leads to a crash.
7613 setShadow(V: &CB, SV: getCleanShadow(V: &CB));
7614 setOrigin(V: &CB, Origin: getCleanOrigin());
7615 return;
7616 }
7617 // FIXME: NextInsn is likely in a basic block that has not been visited
7618 // yet. Anything inserted there will be instrumented by MSan later!
7619 NextInsn = NormalDest->getFirstInsertionPt();
7620 assert(NextInsn != NormalDest->end() &&
7621 "Could not find insertion point for retval shadow load");
7622 }
7623 IRBuilder<> IRBAfter(&*NextInsn);
7624 Value *RetvalShadow = IRBAfter.CreateAlignedLoad(
7625 Ty: getShadowTy(V: &CB), Ptr: getShadowPtrForRetval(IRB&: IRBAfter), Align: kShadowTLSAlignment,
7626 Name: "_msret");
7627 setShadow(V: &CB, SV: RetvalShadow);
7628 if (MS.TrackOrigins)
7629 setOrigin(V: &CB, Origin: IRBAfter.CreateLoad(Ty: MS.OriginTy, Ptr: getOriginPtrForRetval()));
7630 }
7631
7632 bool isAMustTailRetVal(Value *RetVal) {
7633 if (auto *I = dyn_cast<BitCastInst>(Val: RetVal)) {
7634 RetVal = I->getOperand(i_nocapture: 0);
7635 }
7636 if (auto *I = dyn_cast<CallInst>(Val: RetVal)) {
7637 return I->isMustTailCall();
7638 }
7639 return false;
7640 }
7641
7642 void visitReturnInst(ReturnInst &I) {
7643 IRBuilder<> IRB(&I);
7644 Value *RetVal = I.getReturnValue();
7645 if (!RetVal)
7646 return;
7647 // Don't emit the epilogue for musttail call returns.
7648 if (isAMustTailRetVal(RetVal))
7649 return;
7650 Value *ShadowPtr = getShadowPtrForRetval(IRB);
7651 bool HasNoUndef = F.hasRetAttribute(Kind: Attribute::NoUndef);
7652 bool StoreShadow = !(MS.EagerChecks && HasNoUndef);
7653 // FIXME: Consider using SpecialCaseList to specify a list of functions that
7654 // must always return fully initialized values. For now, we hardcode "main".
7655 bool EagerCheck = (MS.EagerChecks && HasNoUndef) || (F.getName() == "main");
7656
7657 Value *Shadow = getShadow(V: RetVal);
7658 bool StoreOrigin = true;
7659 if (EagerCheck) {
7660 insertCheckShadowOf(Val: RetVal, OrigIns: &I);
7661 Shadow = getCleanShadow(V: RetVal);
7662 StoreOrigin = false;
7663 }
7664
7665 // The caller may still expect information passed over TLS if we pass our
7666 // check
7667 if (StoreShadow) {
7668 IRB.CreateAlignedStore(Val: Shadow, Ptr: ShadowPtr, Align: kShadowTLSAlignment);
7669 if (MS.TrackOrigins && StoreOrigin)
7670 IRB.CreateStore(Val: getOrigin(V: RetVal), Ptr: getOriginPtrForRetval());
7671 }
7672 }
7673
7674 void visitPHINode(PHINode &I) {
7675 IRBuilder<> IRB(&I);
7676 if (!PropagateShadow) {
7677 setShadow(V: &I, SV: getCleanShadow(V: &I));
7678 setOrigin(V: &I, Origin: getCleanOrigin());
7679 return;
7680 }
7681
7682 ShadowPHINodes.push_back(Elt: &I);
7683 setShadow(V: &I, SV: IRB.CreatePHI(Ty: getShadowTy(V: &I), NumReservedValues: I.getNumIncomingValues(),
7684 Name: "_msphi_s"));
7685 if (MS.TrackOrigins)
7686 setOrigin(
7687 V: &I, Origin: IRB.CreatePHI(Ty: MS.OriginTy, NumReservedValues: I.getNumIncomingValues(), Name: "_msphi_o"));
7688 }
7689
7690 Value *getLocalVarIdptr(AllocaInst &I) {
7691 ConstantInt *IntConst =
7692 ConstantInt::get(Ty: Type::getInt32Ty(C&: (*F.getParent()).getContext()), V: 0);
7693 return new GlobalVariable(*F.getParent(), IntConst->getType(),
7694 /*isConstant=*/false, GlobalValue::PrivateLinkage,
7695 IntConst);
7696 }
7697
7698 Value *getLocalVarDescription(AllocaInst &I) {
7699 return createPrivateConstGlobalForString(M&: *F.getParent(), Str: I.getName());
7700 }
7701
7702 void poisonAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
7703 if (PoisonStack && ClPoisonStackWithCall) {
7704 IRB.CreateCall(Callee: MS.MsanPoisonStackFn, Args: {&I, Len});
7705 } else {
7706 Value *ShadowBase, *OriginBase;
7707 std::tie(args&: ShadowBase, args&: OriginBase) = getShadowOriginPtr(
7708 Addr: &I, IRB, ShadowTy: IRB.getInt8Ty(), Alignment: Align(1), /*isStore*/ true);
7709
7710 Value *PoisonValue = IRB.getInt8(C: PoisonStack ? ClPoisonStackPattern : 0);
7711 IRB.CreateMemSet(Ptr: ShadowBase, Val: PoisonValue, Size: Len, Align: I.getAlign());
7712 }
7713
7714 if (PoisonStack && MS.TrackOrigins) {
7715 Value *Idptr = getLocalVarIdptr(I);
7716 if (ClPrintStackNames) {
7717 Value *Descr = getLocalVarDescription(I);
7718 IRB.CreateCall(Callee: MS.MsanSetAllocaOriginWithDescriptionFn,
7719 Args: {&I, Len, Idptr, Descr});
7720 } else {
7721 IRB.CreateCall(Callee: MS.MsanSetAllocaOriginNoDescriptionFn, Args: {&I, Len, Idptr});
7722 }
7723 }
7724 }
7725
7726 void poisonAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
7727 Value *Descr = getLocalVarDescription(I);
7728 if (PoisonStack) {
7729 IRB.CreateCall(Callee: MS.MsanPoisonAllocaFn, Args: {&I, Len, Descr});
7730 } else {
7731 IRB.CreateCall(Callee: MS.MsanUnpoisonAllocaFn, Args: {&I, Len});
7732 }
7733 }
7734
7735 void instrumentAlloca(AllocaInst &I, Instruction *InsPoint = nullptr) {
7736 if (!InsPoint)
7737 InsPoint = &I;
7738 NextNodeIRBuilder IRB(InsPoint);
7739 Value *Len = IRB.CreateAllocationSize(DestTy: MS.IntptrTy, AI: &I);
7740
7741 if (MS.CompileKernel)
7742 poisonAllocaKmsan(I, IRB, Len);
7743 else
7744 poisonAllocaUserspace(I, IRB, Len);
7745 }
7746
7747 void visitAllocaInst(AllocaInst &I) {
7748 setShadow(V: &I, SV: getCleanShadow(V: &I));
7749 setOrigin(V: &I, Origin: getCleanOrigin());
7750 // We'll get to this alloca later unless it's poisoned at the corresponding
7751 // llvm.lifetime.start.
7752 AllocaSet.insert(X: &I);
7753 }
7754
7755 void visitSelectInst(SelectInst &I) {
7756 // a = select b, c, d
7757 Value *B = I.getCondition();
7758 Value *C = I.getTrueValue();
7759 Value *D = I.getFalseValue();
7760
7761 handleSelectLikeInst(I, B, C, D);
7762 }
7763
7764 void handleSelectLikeInst(Instruction &I, Value *B, Value *C, Value *D) {
7765 IRBuilder<> IRB(&I);
7766
7767 Value *Sb = getShadow(V: B);
7768 Value *Sc = getShadow(V: C);
7769 Value *Sd = getShadow(V: D);
7770
7771 Value *Ob = MS.TrackOrigins ? getOrigin(V: B) : nullptr;
7772 Value *Oc = MS.TrackOrigins ? getOrigin(V: C) : nullptr;
7773 Value *Od = MS.TrackOrigins ? getOrigin(V: D) : nullptr;
7774
7775 // Result shadow if condition shadow is 0.
7776 Value *Sa0 = IRB.CreateSelect(C: B, True: Sc, False: Sd);
7777 Value *Sa1;
7778 if (I.getType()->isAggregateType()) {
7779 // To avoid "sign extending" i1 to an arbitrary aggregate type, we just do
7780 // an extra "select". This results in much more compact IR.
7781 // Sa = select Sb, poisoned, (select b, Sc, Sd)
7782 Sa1 = getPoisonedShadow(ShadowTy: getShadowTy(OrigTy: I.getType()));
7783 } else if (isScalableNonVectorType(Ty: I.getType())) {
7784 // This is intended to handle target("aarch64.svcount"), which can't be
7785 // handled in the else branch because of incompatibility with CreateXor
7786 // ("The supported LLVM operations on this type are limited to load,
7787 // store, phi, select and alloca instructions").
7788
7789 // TODO: this currently underapproximates. Use Arm SVE EOR in the else
7790 // branch as needed instead.
7791 Sa1 = getCleanShadow(OrigTy: getShadowTy(OrigTy: I.getType()));
7792 } else {
7793 // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ]
7794 // If Sb (condition is poisoned), look for bits in c and d that are equal
7795 // and both unpoisoned.
7796 // If !Sb (condition is unpoisoned), simply pick one of Sc and Sd.
7797
7798 // Cast arguments to shadow-compatible type.
7799 C = CreateAppToShadowCast(IRB, V: C);
7800 D = CreateAppToShadowCast(IRB, V: D);
7801
7802 // Result shadow if condition shadow is 1.
7803 Sa1 = IRB.CreateOr(Ops: {IRB.CreateXor(LHS: C, RHS: D), Sc, Sd});
7804 }
7805 Value *Sa = IRB.CreateSelect(C: Sb, True: Sa1, False: Sa0, Name: "_msprop_select");
7806 setShadow(V: &I, SV: Sa);
7807 if (MS.TrackOrigins) {
7808 // Origins are always i32, so any vector conditions must be flattened.
7809 // FIXME: consider tracking vector origins for app vectors?
7810 if (B->getType()->isVectorTy()) {
7811 B = convertToBool(V: B, IRB);
7812 Sb = convertToBool(V: Sb, IRB);
7813 }
7814 // a = select b, c, d
7815 // Oa = Sb ? Ob : (b ? Oc : Od)
7816 setOrigin(V: &I, Origin: IRB.CreateSelect(C: Sb, True: Ob, False: IRB.CreateSelect(C: B, True: Oc, False: Od)));
7817 }
7818 }
7819
7820 void visitLandingPadInst(LandingPadInst &I) {
7821 // Do nothing.
7822 // See https://github.com/google/sanitizers/issues/504
7823 setShadow(V: &I, SV: getCleanShadow(V: &I));
7824 setOrigin(V: &I, Origin: getCleanOrigin());
7825 }
7826
7827 void visitCatchSwitchInst(CatchSwitchInst &I) {
7828 setShadow(V: &I, SV: getCleanShadow(V: &I));
7829 setOrigin(V: &I, Origin: getCleanOrigin());
7830 }
7831
7832 void visitFuncletPadInst(FuncletPadInst &I) {
7833 setShadow(V: &I, SV: getCleanShadow(V: &I));
7834 setOrigin(V: &I, Origin: getCleanOrigin());
7835 }
7836
7837 void visitGetElementPtrInst(GetElementPtrInst &I) { handleShadowOr(I); }
7838
7839 void visitExtractValueInst(ExtractValueInst &I) {
7840 IRBuilder<> IRB(&I);
7841 Value *Agg = I.getAggregateOperand();
7842 LLVM_DEBUG(dbgs() << "ExtractValue: " << I << "\n");
7843 Value *AggShadow = getShadow(V: Agg);
7844 LLVM_DEBUG(dbgs() << " AggShadow: " << *AggShadow << "\n");
7845 Value *ResShadow = IRB.CreateExtractValue(Agg: AggShadow, Idxs: I.getIndices());
7846 LLVM_DEBUG(dbgs() << " ResShadow: " << *ResShadow << "\n");
7847 setShadow(V: &I, SV: ResShadow);
7848 setOriginForNaryOp(I);
7849 }
7850
7851 void visitInsertValueInst(InsertValueInst &I) {
7852 IRBuilder<> IRB(&I);
7853 LLVM_DEBUG(dbgs() << "InsertValue: " << I << "\n");
7854 Value *AggShadow = getShadow(V: I.getAggregateOperand());
7855 Value *InsShadow = getShadow(V: I.getInsertedValueOperand());
7856 LLVM_DEBUG(dbgs() << " AggShadow: " << *AggShadow << "\n");
7857 LLVM_DEBUG(dbgs() << " InsShadow: " << *InsShadow << "\n");
7858 Value *Res = IRB.CreateInsertValue(Agg: AggShadow, Val: InsShadow, Idxs: I.getIndices());
7859 LLVM_DEBUG(dbgs() << " Res: " << *Res << "\n");
7860 setShadow(V: &I, SV: Res);
7861 setOriginForNaryOp(I);
7862 }
7863
7864 void dumpInst(Instruction &I, const Twine &Prefix) {
7865 // Instruction name only
7866 // For intrinsics, the full/overloaded name is used
7867 //
7868 // e.g., "call llvm.aarch64.neon.uqsub.v16i8"
7869 if (CallInst *CI = dyn_cast<CallInst>(Val: &I)) {
7870 errs() << "ZZZ:" << Prefix << " call "
7871 << CI->getCalledFunction()->getName() << "\n";
7872 } else {
7873 errs() << "ZZZ:" << Prefix << " " << I.getOpcodeName() << "\n";
7874 }
7875
7876 // Instruction prototype (including return type and parameter types)
7877 // For intrinsics, we use the base/non-overloaded name
7878 //
7879 // e.g., "call <16 x i8> @llvm.aarch64.neon.uqsub(<16 x i8>, <16 x i8>)"
7880 unsigned NumOperands = I.getNumOperands();
7881 if (CallInst *CI = dyn_cast<CallInst>(Val: &I)) {
7882 errs() << "YYY:" << Prefix << " call " << *I.getType() << " @";
7883
7884 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: CI))
7885 errs() << Intrinsic::getBaseName(id: II->getIntrinsicID());
7886 else
7887 errs() << CI->getCalledFunction()->getName();
7888
7889 errs() << "(";
7890
7891 // The last operand of a CallInst is the function itself.
7892 NumOperands--;
7893 } else
7894 errs() << "YYY:" << Prefix << " " << *I.getType() << " "
7895 << I.getOpcodeName() << "(";
7896
7897 for (size_t i = 0; i < NumOperands; i++) {
7898 if (i > 0)
7899 errs() << ", ";
7900
7901 errs() << *(I.getOperand(i)->getType());
7902 }
7903
7904 errs() << ")\n";
7905
7906 // Full instruction, including types and operand values
7907 // For intrinsics, the full/overloaded name is used
7908 //
7909 // e.g., "%vqsubq_v.i15 = call noundef <16 x i8>
7910 // @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %vext21.i,
7911 // <16 x i8> splat (i8 1)), !dbg !66"
7912 errs() << "QQQ:" << Prefix << " " << I << "\n";
7913 }
7914
7915 void visitResumeInst(ResumeInst &I) {
7916 LLVM_DEBUG(dbgs() << "Resume: " << I << "\n");
7917 // Nothing to do here.
7918 }
7919
7920 void visitCleanupReturnInst(CleanupReturnInst &CRI) {
7921 LLVM_DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n");
7922 // Nothing to do here.
7923 }
7924
7925 void visitCatchReturnInst(CatchReturnInst &CRI) {
7926 LLVM_DEBUG(dbgs() << "CatchReturn: " << CRI << "\n");
7927 // Nothing to do here.
7928 }
7929
7930 void instrumentAsmArgument(Value *Operand, Type *ElemTy, Instruction &I,
7931 IRBuilder<> &IRB, const DataLayout &DL,
7932 bool isOutput) {
7933 // For each assembly argument, we check its value for being initialized.
7934 // If the argument is a pointer, we assume it points to a single element
7935 // of the corresponding type (or to a 8-byte word, if the type is unsized).
7936 // Each such pointer is instrumented with a call to the runtime library.
7937 Type *OpType = Operand->getType();
7938 // Check the operand value itself.
7939 insertCheckShadowOf(Val: Operand, OrigIns: &I);
7940 if (!OpType->isPointerTy() || !isOutput) {
7941 assert(!isOutput);
7942 return;
7943 }
7944 if (!ElemTy->isSized())
7945 return;
7946 auto Size = DL.getTypeStoreSize(Ty: ElemTy);
7947 Value *SizeVal = IRB.CreateTypeSize(Ty: MS.IntptrTy, Size);
7948 if (MS.CompileKernel) {
7949 IRB.CreateCall(Callee: MS.MsanInstrumentAsmStoreFn, Args: {Operand, SizeVal});
7950 } else {
7951 // ElemTy, derived from elementtype(), does not encode the alignment of
7952 // the pointer. Conservatively assume that the shadow memory is unaligned.
7953 // When Size is large, avoid StoreInst as it would expand to many
7954 // instructions.
7955 auto [ShadowPtr, _] =
7956 getShadowOriginPtrUserspace(Addr: Operand, IRB, ShadowTy: IRB.getInt8Ty(), Alignment: Align(1));
7957 if (Size <= 32)
7958 IRB.CreateAlignedStore(Val: getCleanShadow(OrigTy: ElemTy), Ptr: ShadowPtr, Align: Align(1));
7959 else
7960 IRB.CreateMemSet(Ptr: ShadowPtr, Val: ConstantInt::getNullValue(Ty: IRB.getInt8Ty()),
7961 Size: SizeVal, Align: Align(1));
7962 }
7963 }
7964
7965 /// Get the number of output arguments returned by pointers.
7966 int getNumOutputArgs(InlineAsm *IA, CallBase *CB) {
7967 int NumRetOutputs = 0;
7968 int NumOutputs = 0;
7969 Type *RetTy = cast<Value>(Val: CB)->getType();
7970 if (!RetTy->isVoidTy()) {
7971 // Register outputs are returned via the CallInst return value.
7972 auto *ST = dyn_cast<StructType>(Val: RetTy);
7973 if (ST)
7974 NumRetOutputs = ST->getNumElements();
7975 else
7976 NumRetOutputs = 1;
7977 }
7978 InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
7979 for (const InlineAsm::ConstraintInfo &Info : Constraints) {
7980 switch (Info.Type) {
7981 case InlineAsm::isOutput:
7982 NumOutputs++;
7983 break;
7984 default:
7985 break;
7986 }
7987 }
7988 return NumOutputs - NumRetOutputs;
7989 }
7990
7991 void visitAsmInstruction(Instruction &I) {
7992 // Conservative inline assembly handling: check for poisoned shadow of
7993 // asm() arguments, then unpoison the result and all the memory locations
7994 // pointed to by those arguments.
7995 // An inline asm() statement in C++ contains lists of input and output
7996 // arguments used by the assembly code. These are mapped to operands of the
7997 // CallInst as follows:
7998 // - nR register outputs ("=r) are returned by value in a single structure
7999 // (SSA value of the CallInst);
8000 // - nO other outputs ("=m" and others) are returned by pointer as first
8001 // nO operands of the CallInst;
8002 // - nI inputs ("r", "m" and others) are passed to CallInst as the
8003 // remaining nI operands.
8004 // The total number of asm() arguments in the source is nR+nO+nI, and the
8005 // corresponding CallInst has nO+nI+1 operands (the last operand is the
8006 // function to be called).
8007 const DataLayout &DL = F.getDataLayout();
8008 CallBase *CB = cast<CallBase>(Val: &I);
8009 IRBuilder<> IRB(&I);
8010 InlineAsm *IA = cast<InlineAsm>(Val: CB->getCalledOperand());
8011 int OutputArgs = getNumOutputArgs(IA, CB);
8012 // The last operand of a CallInst is the function itself.
8013 int NumOperands = CB->getNumOperands() - 1;
8014
8015 // Check input arguments. Doing so before unpoisoning output arguments, so
8016 // that we won't overwrite uninit values before checking them.
8017 for (int i = OutputArgs; i < NumOperands; i++) {
8018 Value *Operand = CB->getOperand(i_nocapture: i);
8019 instrumentAsmArgument(Operand, ElemTy: CB->getParamElementType(ArgNo: i), I, IRB, DL,
8020 /*isOutput*/ false);
8021 }
8022 // Unpoison output arguments. This must happen before the actual InlineAsm
8023 // call, so that the shadow for memory published in the asm() statement
8024 // remains valid.
8025 for (int i = 0; i < OutputArgs; i++) {
8026 Value *Operand = CB->getOperand(i_nocapture: i);
8027 instrumentAsmArgument(Operand, ElemTy: CB->getParamElementType(ArgNo: i), I, IRB, DL,
8028 /*isOutput*/ true);
8029 }
8030
8031 setShadow(V: &I, SV: getCleanShadow(V: &I));
8032 setOrigin(V: &I, Origin: getCleanOrigin());
8033 }
8034
8035 void visitFreezeInst(FreezeInst &I) {
8036 // Freeze always returns a fully defined value.
8037 setShadow(V: &I, SV: getCleanShadow(V: &I));
8038 setOrigin(V: &I, Origin: getCleanOrigin());
8039 }
8040
8041 void visitInstruction(Instruction &I) {
8042 // Everything else: stop propagating and check for poisoned shadow.
8043 if (ClDumpStrictInstructions)
8044 dumpInst(I, Prefix: "Strict");
8045 LLVM_DEBUG(dbgs() << "DEFAULT: " << I << "\n");
8046 for (size_t i = 0, n = I.getNumOperands(); i < n; i++) {
8047 Value *Operand = I.getOperand(i);
8048 if (Operand->getType()->isSized())
8049 insertCheckShadowOf(Val: Operand, OrigIns: &I);
8050 }
8051 setShadow(V: &I, SV: getCleanShadow(V: &I));
8052 setOrigin(V: &I, Origin: getCleanOrigin());
8053 }
8054};
8055
8056struct VarArgHelperBase : public VarArgHelper {
8057 Function &F;
8058 MemorySanitizer &MS;
8059 MemorySanitizerVisitor &MSV;
8060 SmallVector<CallInst *, 16> VAStartInstrumentationList;
8061 const unsigned VAListTagSize;
8062
8063 VarArgHelperBase(Function &F, MemorySanitizer &MS,
8064 MemorySanitizerVisitor &MSV, unsigned VAListTagSize)
8065 : F(F), MS(MS), MSV(MSV), VAListTagSize(VAListTagSize) {}
8066
8067 Value *getShadowAddrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) {
8068 Value *Base = IRB.CreatePointerCast(V: MS.VAArgTLS, DestTy: MS.IntptrTy);
8069 return IRB.CreateAdd(LHS: Base, RHS: ConstantInt::get(Ty: MS.IntptrTy, V: ArgOffset));
8070 }
8071
8072 /// Compute the shadow address for a given va_arg.
8073 Value *getShadowPtrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) {
8074 return IRB.CreatePtrAdd(
8075 Ptr: MS.VAArgTLS, Offset: ConstantInt::get(Ty: MS.IntptrTy, V: ArgOffset), Name: "_msarg_va_s");
8076 }
8077
8078 /// Compute the shadow address for a given va_arg.
8079 Value *getShadowPtrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset,
8080 unsigned ArgSize) {
8081 // Make sure we don't overflow __msan_va_arg_tls.
8082 if (ArgOffset + ArgSize > kParamTLSSize)
8083 return nullptr;
8084 return getShadowPtrForVAArgument(IRB, ArgOffset);
8085 }
8086
8087 /// Compute the origin address for a given va_arg.
8088 Value *getOriginPtrForVAArgument(IRBuilder<> &IRB, int ArgOffset) {
8089 // getOriginPtrForVAArgument() is always called after
8090 // getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never
8091 // overflow.
8092 return IRB.CreatePtrAdd(Ptr: MS.VAArgOriginTLS,
8093 Offset: ConstantInt::get(Ty: MS.IntptrTy, V: ArgOffset),
8094 Name: "_msarg_va_o");
8095 }
8096
8097 void CleanUnusedTLS(IRBuilder<> &IRB, Value *ShadowBase,
8098 unsigned BaseOffset) {
8099 // The tails of __msan_va_arg_tls is not large enough to fit full
8100 // value shadow, but it will be copied to backup anyway. Make it
8101 // clean.
8102 if (BaseOffset >= kParamTLSSize)
8103 return;
8104 Value *TailSize =
8105 ConstantInt::getSigned(Ty: IRB.getInt32Ty(), V: kParamTLSSize - BaseOffset);
8106 IRB.CreateMemSet(Ptr: ShadowBase, Val: ConstantInt::getNullValue(Ty: IRB.getInt8Ty()),
8107 Size: TailSize, Align: Align(8));
8108 }
8109
8110 void unpoisonVAListTagForInst(IntrinsicInst &I) {
8111 IRBuilder<> IRB(&I);
8112 Value *VAListTag = I.getArgOperand(i: 0);
8113 const Align Alignment = Align(8);
8114 auto [ShadowPtr, OriginPtr] = MSV.getShadowOriginPtr(
8115 Addr: VAListTag, IRB, ShadowTy: IRB.getInt8Ty(), Alignment, /*isStore*/ true);
8116 // Unpoison the whole __va_list_tag.
8117 IRB.CreateMemSet(Ptr: ShadowPtr, Val: Constant::getNullValue(Ty: IRB.getInt8Ty()),
8118 Size: VAListTagSize, Align: Alignment, isVolatile: false);
8119 }
8120
8121 void visitVAStartInst(VAStartInst &I) override {
8122 if (F.getCallingConv() == CallingConv::Win64)
8123 return;
8124 VAStartInstrumentationList.push_back(Elt: &I);
8125 unpoisonVAListTagForInst(I);
8126 }
8127
8128 void visitVACopyInst(VACopyInst &I) override {
8129 if (F.getCallingConv() == CallingConv::Win64)
8130 return;
8131 unpoisonVAListTagForInst(I);
8132 }
8133};
8134
8135/// AMD64-specific implementation of VarArgHelper.
8136struct VarArgAMD64Helper : public VarArgHelperBase {
8137 // An unfortunate workaround for asymmetric lowering of va_arg stuff.
8138 // See a comment in visitCallBase for more details.
8139 static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7
8140 static const unsigned AMD64FpEndOffsetSSE = 176;
8141 // If SSE is disabled, fp_offset in va_list is zero.
8142 static const unsigned AMD64FpEndOffsetNoSSE = AMD64GpEndOffset;
8143
8144 unsigned AMD64FpEndOffset;
8145 AllocaInst *VAArgTLSCopy = nullptr;
8146 AllocaInst *VAArgTLSOriginCopy = nullptr;
8147 Value *VAArgOverflowSize = nullptr;
8148
8149 enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
8150
8151 VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
8152 MemorySanitizerVisitor &MSV)
8153 : VarArgHelperBase(F, MS, MSV, /*VAListTagSize=*/24) {
8154 AMD64FpEndOffset = AMD64FpEndOffsetSSE;
8155 for (const auto &Attr : F.getAttributes().getFnAttrs()) {
8156 if (Attr.isStringAttribute() &&
8157 (Attr.getKindAsString() == "target-features")) {
8158 if (Attr.getValueAsString().contains(Other: "-sse"))
8159 AMD64FpEndOffset = AMD64FpEndOffsetNoSSE;
8160 break;
8161 }
8162 }
8163 }
8164
8165 ArgKind classifyArgument(Value *arg) {
8166 // A very rough approximation of X86_64 argument classification rules.
8167 Type *T = arg->getType();
8168 if (T->isX86_FP80Ty())
8169 return AK_Memory;
8170 if (T->isFPOrFPVectorTy())
8171 return AK_FloatingPoint;
8172 if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
8173 return AK_GeneralPurpose;
8174 if (T->isPointerTy())
8175 return AK_GeneralPurpose;
8176 return AK_Memory;
8177 }
8178
8179 // For VarArg functions, store the argument shadow in an ABI-specific format
8180 // that corresponds to va_list layout.
8181 // We do this because Clang lowers va_arg in the frontend, and this pass
8182 // only sees the low level code that deals with va_list internals.
8183 // A much easier alternative (provided that Clang emits va_arg instructions)
8184 // would have been to associate each live instance of va_list with a copy of
8185 // MSanParamTLS, and extract shadow on va_arg() call in the argument list
8186 // order.
8187 void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
8188 unsigned GpOffset = 0;
8189 unsigned FpOffset = AMD64GpEndOffset;
8190 unsigned OverflowOffset = AMD64FpEndOffset;
8191 const DataLayout &DL = F.getDataLayout();
8192
8193 for (const auto &[ArgNo, A] : llvm::enumerate(First: CB.args())) {
8194 bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
8195 bool IsByVal = CB.paramHasAttr(ArgNo, Kind: Attribute::ByVal);
8196 if (IsByVal) {
8197 // ByVal arguments always go to the overflow area.
8198 // Fixed arguments passed through the overflow area will be stepped
8199 // over by va_start, so don't count them towards the offset.
8200 if (IsFixed)
8201 continue;
8202 assert(A->getType()->isPointerTy());
8203 Type *RealTy = CB.getParamByValType(ArgNo);
8204 uint64_t ArgSize = DL.getTypeAllocSize(Ty: RealTy);
8205 uint64_t AlignedSize = alignTo(Value: ArgSize, Align: 8);
8206 unsigned BaseOffset = OverflowOffset;
8207 Value *ShadowBase = getShadowPtrForVAArgument(IRB, ArgOffset: OverflowOffset);
8208 Value *OriginBase = nullptr;
8209 if (MS.TrackOrigins)
8210 OriginBase = getOriginPtrForVAArgument(IRB, ArgOffset: OverflowOffset);
8211 OverflowOffset += AlignedSize;
8212
8213 if (OverflowOffset > kParamTLSSize) {
8214 CleanUnusedTLS(IRB, ShadowBase, BaseOffset);
8215 continue; // We have no space to copy shadow there.
8216 }
8217
8218 Value *ShadowPtr, *OriginPtr;
8219 std::tie(args&: ShadowPtr, args&: OriginPtr) =
8220 MSV.getShadowOriginPtr(Addr: A, IRB, ShadowTy: IRB.getInt8Ty(), Alignment: kShadowTLSAlignment,
8221 /*isStore*/ false);
8222 IRB.CreateMemCpy(Dst: ShadowBase, DstAlign: kShadowTLSAlignment, Src: ShadowPtr,
8223 SrcAlign: kShadowTLSAlignment, Size: ArgSize);
8224 if (MS.TrackOrigins)
8225 IRB.CreateMemCpy(Dst: OriginBase, DstAlign: kShadowTLSAlignment, Src: OriginPtr,
8226 SrcAlign: kShadowTLSAlignment, Size: ArgSize);
8227 } else {
8228 ArgKind AK = classifyArgument(arg: A);
8229 if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
8230 AK = AK_Memory;
8231 if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
8232 AK = AK_Memory;
8233 Value *ShadowBase, *OriginBase = nullptr;
8234 switch (AK) {
8235 case AK_GeneralPurpose:
8236 ShadowBase = getShadowPtrForVAArgument(IRB, ArgOffset: GpOffset);
8237 if (MS.TrackOrigins)
8238 OriginBase = getOriginPtrForVAArgument(IRB, ArgOffset: GpOffset);
8239 GpOffset += 8;
8240 assert(GpOffset <= kParamTLSSize);
8241 break;
8242 case AK_FloatingPoint:
8243 ShadowBase = getShadowPtrForVAArgument(IRB, ArgOffset: FpOffset);
8244 if (MS.TrackOrigins)
8245 OriginBase = getOriginPtrForVAArgument(IRB, ArgOffset: FpOffset);
8246 FpOffset += 16;
8247 assert(FpOffset <= kParamTLSSize);
8248 break;
8249 case AK_Memory:
8250 if (IsFixed)
8251 continue;
8252 uint64_t ArgSize = DL.getTypeAllocSize(Ty: A->getType());
8253 uint64_t AlignedSize = alignTo(Value: ArgSize, Align: 8);
8254 unsigned BaseOffset = OverflowOffset;
8255 ShadowBase = getShadowPtrForVAArgument(IRB, ArgOffset: OverflowOffset);
8256 if (MS.TrackOrigins) {
8257 OriginBase = getOriginPtrForVAArgument(IRB, ArgOffset: OverflowOffset);
8258 }
8259 OverflowOffset += AlignedSize;
8260 if (OverflowOffset > kParamTLSSize) {
8261 // We have no space to copy shadow there.
8262 CleanUnusedTLS(IRB, ShadowBase, BaseOffset);
8263 continue;
8264 }
8265 }
8266 // Take fixed arguments into account for GpOffset and FpOffset,
8267 // but don't actually store shadows for them.
8268 // TODO(glider): don't call get*PtrForVAArgument() for them.
8269 if (IsFixed)
8270 continue;
8271 Value *Shadow = MSV.getShadow(V: A);
8272 IRB.CreateAlignedStore(Val: Shadow, Ptr: ShadowBase, Align: kShadowTLSAlignment);
8273 if (MS.TrackOrigins) {
8274 Value *Origin = MSV.getOrigin(V: A);
8275 TypeSize StoreSize = DL.getTypeStoreSize(Ty: Shadow->getType());
8276 MSV.paintOrigin(IRB, Origin, OriginPtr: OriginBase, TS: StoreSize,
8277 Alignment: std::max(a: kShadowTLSAlignment, b: kMinOriginAlignment));
8278 }
8279 }
8280 }
8281 Constant *OverflowSize =
8282 ConstantInt::get(Ty: IRB.getInt64Ty(), V: OverflowOffset - AMD64FpEndOffset);
8283 IRB.CreateStore(Val: OverflowSize, Ptr: MS.VAArgOverflowSizeTLS);
8284 }
8285
8286 void finalizeInstrumentation() override {
8287 assert(!VAArgOverflowSize && !VAArgTLSCopy &&
8288 "finalizeInstrumentation called twice");
8289 if (!VAStartInstrumentationList.empty()) {
8290 // If there is a va_start in this function, make a backup copy of
8291 // va_arg_tls somewhere in the function entry block.
8292 IRBuilder<> IRB(MSV.FnPrologueEnd);
8293 VAArgOverflowSize =
8294 IRB.CreateLoad(Ty: IRB.getInt64Ty(), Ptr: MS.VAArgOverflowSizeTLS);
8295 Value *CopySize = IRB.CreateAdd(
8296 LHS: ConstantInt::get(Ty: MS.IntptrTy, V: AMD64FpEndOffset), RHS: VAArgOverflowSize);
8297 VAArgTLSCopy = IRB.CreateAlloca(Ty: Type::getInt8Ty(C&: *MS.C), ArraySize: CopySize);
8298 VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
8299 IRB.CreateMemSet(Ptr: VAArgTLSCopy, Val: Constant::getNullValue(Ty: IRB.getInt8Ty()),
8300 Size: CopySize, Align: kShadowTLSAlignment, isVolatile: false);
8301
8302 Value *SrcSize = IRB.CreateBinaryIntrinsic(
8303 ID: Intrinsic::umin, LHS: CopySize,
8304 RHS: ConstantInt::get(Ty: MS.IntptrTy, V: kParamTLSSize));
8305 IRB.CreateMemCpy(Dst: VAArgTLSCopy, DstAlign: kShadowTLSAlignment, Src: MS.VAArgTLS,
8306 SrcAlign: kShadowTLSAlignment, Size: SrcSize);
8307 if (MS.TrackOrigins) {
8308 VAArgTLSOriginCopy = IRB.CreateAlloca(Ty: Type::getInt8Ty(C&: *MS.C), ArraySize: CopySize);
8309 VAArgTLSOriginCopy->setAlignment(kShadowTLSAlignment);
8310 IRB.CreateMemCpy(Dst: VAArgTLSOriginCopy, DstAlign: kShadowTLSAlignment,
8311 Src: MS.VAArgOriginTLS, SrcAlign: kShadowTLSAlignment, Size: SrcSize);
8312 }
8313 }
8314
8315 // Instrument va_start.
8316 // Copy va_list shadow from the backup copy of the TLS contents.
8317 for (CallInst *OrigInst : VAStartInstrumentationList) {
8318 NextNodeIRBuilder IRB(OrigInst);
8319 Value *VAListTag = OrigInst->getArgOperand(i: 0);
8320
8321 Value *RegSaveAreaPtrPtr =
8322 IRB.CreatePtrAdd(Ptr: VAListTag, Offset: ConstantInt::get(Ty: MS.IntptrTy, V: 16));
8323 Value *RegSaveAreaPtr = IRB.CreateLoad(Ty: MS.PtrTy, Ptr: RegSaveAreaPtrPtr);
8324 Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
8325 const Align Alignment = Align(16);
8326 std::tie(args&: RegSaveAreaShadowPtr, args&: RegSaveAreaOriginPtr) =
8327 MSV.getShadowOriginPtr(Addr: RegSaveAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(),
8328 Alignment, /*isStore*/ true);
8329 IRB.CreateMemCpy(Dst: RegSaveAreaShadowPtr, DstAlign: Alignment, Src: VAArgTLSCopy, SrcAlign: Alignment,
8330 Size: AMD64FpEndOffset);
8331 if (MS.TrackOrigins)
8332 IRB.CreateMemCpy(Dst: RegSaveAreaOriginPtr, DstAlign: Alignment, Src: VAArgTLSOriginCopy,
8333 SrcAlign: Alignment, Size: AMD64FpEndOffset);
8334 Value *OverflowArgAreaPtrPtr =
8335 IRB.CreatePtrAdd(Ptr: VAListTag, Offset: ConstantInt::get(Ty: MS.IntptrTy, V: 8));
8336 Value *OverflowArgAreaPtr =
8337 IRB.CreateLoad(Ty: MS.PtrTy, Ptr: OverflowArgAreaPtrPtr);
8338 Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
8339 std::tie(args&: OverflowArgAreaShadowPtr, args&: OverflowArgAreaOriginPtr) =
8340 MSV.getShadowOriginPtr(Addr: OverflowArgAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(),
8341 Alignment, /*isStore*/ true);
8342 Value *SrcPtr = IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(), Ptr: VAArgTLSCopy,
8343 Idx0: AMD64FpEndOffset);
8344 IRB.CreateMemCpy(Dst: OverflowArgAreaShadowPtr, DstAlign: Alignment, Src: SrcPtr, SrcAlign: Alignment,
8345 Size: VAArgOverflowSize);
8346 if (MS.TrackOrigins) {
8347 SrcPtr = IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(), Ptr: VAArgTLSOriginCopy,
8348 Idx0: AMD64FpEndOffset);
8349 IRB.CreateMemCpy(Dst: OverflowArgAreaOriginPtr, DstAlign: Alignment, Src: SrcPtr, SrcAlign: Alignment,
8350 Size: VAArgOverflowSize);
8351 }
8352 }
8353 }
8354};
8355
8356/// AArch64-specific implementation of VarArgHelper.
8357struct VarArgAArch64Helper : public VarArgHelperBase {
8358 static const unsigned kAArch64GrArgSize = 64;
8359 static const unsigned kAArch64VrArgSize = 128;
8360
8361 static const unsigned AArch64GrBegOffset = 0;
8362 static const unsigned AArch64GrEndOffset = kAArch64GrArgSize;
8363 // Make VR space aligned to 16 bytes.
8364 static const unsigned AArch64VrBegOffset = AArch64GrEndOffset;
8365 static const unsigned AArch64VrEndOffset =
8366 AArch64VrBegOffset + kAArch64VrArgSize;
8367 static const unsigned AArch64VAEndOffset = AArch64VrEndOffset;
8368
8369 AllocaInst *VAArgTLSCopy = nullptr;
8370 Value *VAArgOverflowSize = nullptr;
8371
8372 enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
8373
8374 VarArgAArch64Helper(Function &F, MemorySanitizer &MS,
8375 MemorySanitizerVisitor &MSV)
8376 : VarArgHelperBase(F, MS, MSV, /*VAListTagSize=*/32) {}
8377
8378 // A very rough approximation of aarch64 argument classification rules.
8379 std::pair<ArgKind, uint64_t> classifyArgument(Type *T) {
8380 if (T->isIntOrPtrTy() && T->getPrimitiveSizeInBits() <= 64)
8381 return {AK_GeneralPurpose, 1};
8382 if (T->isFloatingPointTy() && T->getPrimitiveSizeInBits() <= 128)
8383 return {AK_FloatingPoint, 1};
8384
8385 if (T->isArrayTy()) {
8386 auto R = classifyArgument(T: T->getArrayElementType());
8387 R.second *= T->getScalarType()->getArrayNumElements();
8388 return R;
8389 }
8390
8391 if (const FixedVectorType *FV = dyn_cast<FixedVectorType>(Val: T)) {
8392 auto R = classifyArgument(T: FV->getScalarType());
8393 R.second *= FV->getNumElements();
8394 return R;
8395 }
8396
8397 LLVM_DEBUG(errs() << "Unknown vararg type: " << *T << "\n");
8398 return {AK_Memory, 0};
8399 }
8400
8401 // The instrumentation stores the argument shadow in a non ABI-specific
8402 // format because it does not know which argument is named (since Clang,
8403 // like x86_64 case, lowers the va_args in the frontend and this pass only
8404 // sees the low level code that deals with va_list internals).
8405 // The first seven GR registers are saved in the first 56 bytes of the
8406 // va_arg tls arra, followed by the first 8 FP/SIMD registers, and then
8407 // the remaining arguments.
8408 // Using constant offset within the va_arg TLS array allows fast copy
8409 // in the finalize instrumentation.
8410 void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
8411 unsigned GrOffset = AArch64GrBegOffset;
8412 unsigned VrOffset = AArch64VrBegOffset;
8413 unsigned OverflowOffset = AArch64VAEndOffset;
8414
8415 const DataLayout &DL = F.getDataLayout();
8416 for (const auto &[ArgNo, A] : llvm::enumerate(First: CB.args())) {
8417 bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
8418 auto [AK, RegNum] = classifyArgument(T: A->getType());
8419 if (AK == AK_GeneralPurpose &&
8420 (GrOffset + RegNum * 8) > AArch64GrEndOffset)
8421 AK = AK_Memory;
8422 if (AK == AK_FloatingPoint &&
8423 (VrOffset + RegNum * 16) > AArch64VrEndOffset)
8424 AK = AK_Memory;
8425 Value *Base;
8426 switch (AK) {
8427 case AK_GeneralPurpose:
8428 Base = getShadowPtrForVAArgument(IRB, ArgOffset: GrOffset);
8429 GrOffset += 8 * RegNum;
8430 break;
8431 case AK_FloatingPoint:
8432 Base = getShadowPtrForVAArgument(IRB, ArgOffset: VrOffset);
8433 VrOffset += 16 * RegNum;
8434 break;
8435 case AK_Memory:
8436 // Don't count fixed arguments in the overflow area - va_start will
8437 // skip right over them.
8438 if (IsFixed)
8439 continue;
8440 uint64_t ArgSize = DL.getTypeAllocSize(Ty: A->getType());
8441 uint64_t AlignedSize = alignTo(Value: ArgSize, Align: 8);
8442 unsigned BaseOffset = OverflowOffset;
8443 Base = getShadowPtrForVAArgument(IRB, ArgOffset: BaseOffset);
8444 OverflowOffset += AlignedSize;
8445 if (OverflowOffset > kParamTLSSize) {
8446 // We have no space to copy shadow there.
8447 CleanUnusedTLS(IRB, ShadowBase: Base, BaseOffset);
8448 continue;
8449 }
8450 break;
8451 }
8452 // Count Gp/Vr fixed arguments to their respective offsets, but don't
8453 // bother to actually store a shadow.
8454 if (IsFixed)
8455 continue;
8456 IRB.CreateAlignedStore(Val: MSV.getShadow(V: A), Ptr: Base, Align: kShadowTLSAlignment);
8457 }
8458 Constant *OverflowSize =
8459 ConstantInt::get(Ty: IRB.getInt64Ty(), V: OverflowOffset - AArch64VAEndOffset);
8460 IRB.CreateStore(Val: OverflowSize, Ptr: MS.VAArgOverflowSizeTLS);
8461 }
8462
8463 // Retrieve a va_list field of 'void*' size.
8464 Value *getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
8465 Value *SaveAreaPtrPtr =
8466 IRB.CreatePtrAdd(Ptr: VAListTag, Offset: ConstantInt::get(Ty: MS.IntptrTy, V: offset));
8467 return IRB.CreateLoad(Ty: Type::getInt64Ty(C&: *MS.C), Ptr: SaveAreaPtrPtr);
8468 }
8469
8470 // Retrieve a va_list field of 'int' size.
8471 Value *getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) {
8472 Value *SaveAreaPtr =
8473 IRB.CreatePtrAdd(Ptr: VAListTag, Offset: ConstantInt::get(Ty: MS.IntptrTy, V: offset));
8474 Value *SaveArea32 = IRB.CreateLoad(Ty: IRB.getInt32Ty(), Ptr: SaveAreaPtr);
8475 return IRB.CreateSExt(V: SaveArea32, DestTy: MS.IntptrTy);
8476 }
8477
8478 void finalizeInstrumentation() override {
8479 assert(!VAArgOverflowSize && !VAArgTLSCopy &&
8480 "finalizeInstrumentation called twice");
8481 if (!VAStartInstrumentationList.empty()) {
8482 // If there is a va_start in this function, make a backup copy of
8483 // va_arg_tls somewhere in the function entry block.
8484 IRBuilder<> IRB(MSV.FnPrologueEnd);
8485 VAArgOverflowSize =
8486 IRB.CreateLoad(Ty: IRB.getInt64Ty(), Ptr: MS.VAArgOverflowSizeTLS);
8487 Value *CopySize = IRB.CreateAdd(
8488 LHS: ConstantInt::get(Ty: MS.IntptrTy, V: AArch64VAEndOffset), RHS: VAArgOverflowSize);
8489 VAArgTLSCopy = IRB.CreateAlloca(Ty: Type::getInt8Ty(C&: *MS.C), ArraySize: CopySize);
8490 VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
8491 IRB.CreateMemSet(Ptr: VAArgTLSCopy, Val: Constant::getNullValue(Ty: IRB.getInt8Ty()),
8492 Size: CopySize, Align: kShadowTLSAlignment, isVolatile: false);
8493
8494 Value *SrcSize = IRB.CreateBinaryIntrinsic(
8495 ID: Intrinsic::umin, LHS: CopySize,
8496 RHS: ConstantInt::get(Ty: MS.IntptrTy, V: kParamTLSSize));
8497 IRB.CreateMemCpy(Dst: VAArgTLSCopy, DstAlign: kShadowTLSAlignment, Src: MS.VAArgTLS,
8498 SrcAlign: kShadowTLSAlignment, Size: SrcSize);
8499 }
8500
8501 Value *GrArgSize = ConstantInt::get(Ty: MS.IntptrTy, V: kAArch64GrArgSize);
8502 Value *VrArgSize = ConstantInt::get(Ty: MS.IntptrTy, V: kAArch64VrArgSize);
8503
8504 // Instrument va_start, copy va_list shadow from the backup copy of
8505 // the TLS contents.
8506 for (CallInst *OrigInst : VAStartInstrumentationList) {
8507 NextNodeIRBuilder IRB(OrigInst);
8508
8509 Value *VAListTag = OrigInst->getArgOperand(i: 0);
8510
8511 // The variadic ABI for AArch64 creates two areas to save the incoming
8512 // argument registers (one for 64-bit general register xn-x7 and another
8513 // for 128-bit FP/SIMD vn-v7).
8514 // We need then to propagate the shadow arguments on both regions
8515 // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'.
8516 // The remaining arguments are saved on shadow for 'va::stack'.
8517 // One caveat is it requires only to propagate the non-named arguments,
8518 // however on the call site instrumentation 'all' the arguments are
8519 // saved. So to copy the shadow values from the va_arg TLS array
8520 // we need to adjust the offset for both GR and VR fields based on
8521 // the __{gr,vr}_offs value (since they are stores based on incoming
8522 // named arguments).
8523 Type *RegSaveAreaPtrTy = IRB.getPtrTy();
8524
8525 // Read the stack pointer from the va_list.
8526 Value *StackSaveAreaPtr =
8527 IRB.CreateIntToPtr(V: getVAField64(IRB, VAListTag, offset: 0), DestTy: RegSaveAreaPtrTy);
8528
8529 // Read both the __gr_top and __gr_off and add them up.
8530 Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, offset: 8);
8531 Value *GrOffSaveArea = getVAField32(IRB, VAListTag, offset: 24);
8532
8533 Value *GrRegSaveAreaPtr = IRB.CreateIntToPtr(
8534 V: IRB.CreateAdd(LHS: GrTopSaveAreaPtr, RHS: GrOffSaveArea), DestTy: RegSaveAreaPtrTy);
8535
8536 // Read both the __vr_top and __vr_off and add them up.
8537 Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, offset: 16);
8538 Value *VrOffSaveArea = getVAField32(IRB, VAListTag, offset: 28);
8539
8540 Value *VrRegSaveAreaPtr = IRB.CreateIntToPtr(
8541 V: IRB.CreateAdd(LHS: VrTopSaveAreaPtr, RHS: VrOffSaveArea), DestTy: RegSaveAreaPtrTy);
8542
8543 // It does not know how many named arguments is being used and, on the
8544 // callsite all the arguments were saved. Since __gr_off is defined as
8545 // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic
8546 // argument by ignoring the bytes of shadow from named arguments.
8547 Value *GrRegSaveAreaShadowPtrOff =
8548 IRB.CreateAdd(LHS: GrArgSize, RHS: GrOffSaveArea);
8549
8550 Value *GrRegSaveAreaShadowPtr =
8551 MSV.getShadowOriginPtr(Addr: GrRegSaveAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(),
8552 Alignment: Align(8), /*isStore*/ true)
8553 .first;
8554
8555 Value *GrSrcPtr =
8556 IRB.CreateInBoundsPtrAdd(Ptr: VAArgTLSCopy, Offset: GrRegSaveAreaShadowPtrOff);
8557 Value *GrCopySize = IRB.CreateSub(LHS: GrArgSize, RHS: GrRegSaveAreaShadowPtrOff);
8558
8559 IRB.CreateMemCpy(Dst: GrRegSaveAreaShadowPtr, DstAlign: Align(8), Src: GrSrcPtr, SrcAlign: Align(8),
8560 Size: GrCopySize);
8561
8562 // Again, but for FP/SIMD values.
8563 Value *VrRegSaveAreaShadowPtrOff =
8564 IRB.CreateAdd(LHS: VrArgSize, RHS: VrOffSaveArea);
8565
8566 Value *VrRegSaveAreaShadowPtr =
8567 MSV.getShadowOriginPtr(Addr: VrRegSaveAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(),
8568 Alignment: Align(8), /*isStore*/ true)
8569 .first;
8570
8571 Value *VrSrcPtr = IRB.CreateInBoundsPtrAdd(
8572 Ptr: IRB.CreateInBoundsPtrAdd(Ptr: VAArgTLSCopy,
8573 Offset: IRB.getInt32(C: AArch64VrBegOffset)),
8574 Offset: VrRegSaveAreaShadowPtrOff);
8575 Value *VrCopySize = IRB.CreateSub(LHS: VrArgSize, RHS: VrRegSaveAreaShadowPtrOff);
8576
8577 IRB.CreateMemCpy(Dst: VrRegSaveAreaShadowPtr, DstAlign: Align(8), Src: VrSrcPtr, SrcAlign: Align(8),
8578 Size: VrCopySize);
8579
8580 // And finally for remaining arguments.
8581 Value *StackSaveAreaShadowPtr =
8582 MSV.getShadowOriginPtr(Addr: StackSaveAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(),
8583 Alignment: Align(16), /*isStore*/ true)
8584 .first;
8585
8586 Value *StackSrcPtr = IRB.CreateInBoundsPtrAdd(
8587 Ptr: VAArgTLSCopy, Offset: IRB.getInt32(C: AArch64VAEndOffset));
8588
8589 IRB.CreateMemCpy(Dst: StackSaveAreaShadowPtr, DstAlign: Align(16), Src: StackSrcPtr,
8590 SrcAlign: Align(16), Size: VAArgOverflowSize);
8591 }
8592 }
8593};
8594
8595/// PowerPC64-specific implementation of VarArgHelper.
8596struct VarArgPowerPC64Helper : public VarArgHelperBase {
8597 AllocaInst *VAArgTLSCopy = nullptr;
8598 Value *VAArgSize = nullptr;
8599
8600 VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS,
8601 MemorySanitizerVisitor &MSV)
8602 : VarArgHelperBase(F, MS, MSV, /*VAListTagSize=*/8) {}
8603
8604 void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
8605 // For PowerPC, we need to deal with alignment of stack arguments -
8606 // they are mostly aligned to 8 bytes, but vectors and i128 arrays
8607 // are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes,
8608 // For that reason, we compute current offset from stack pointer (which is
8609 // always properly aligned), and offset for the first vararg, then subtract
8610 // them.
8611 unsigned VAArgBase;
8612 Triple TargetTriple(F.getParent()->getTargetTriple());
8613 // Parameter save area starts at 48 bytes from frame pointer for ABIv1,
8614 // and 32 bytes for ABIv2. This is usually determined by target
8615 // endianness, but in theory could be overridden by function attribute.
8616 if (TargetTriple.isPPC64ELFv2ABI())
8617 VAArgBase = 32;
8618 else
8619 VAArgBase = 48;
8620 unsigned VAArgOffset = VAArgBase;
8621 const DataLayout &DL = F.getDataLayout();
8622 for (const auto &[ArgNo, A] : llvm::enumerate(First: CB.args())) {
8623 bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
8624 bool IsByVal = CB.paramHasAttr(ArgNo, Kind: Attribute::ByVal);
8625 if (IsByVal) {
8626 assert(A->getType()->isPointerTy());
8627 Type *RealTy = CB.getParamByValType(ArgNo);
8628 uint64_t ArgSize = DL.getTypeAllocSize(Ty: RealTy);
8629 Align ArgAlign = CB.getParamAlign(ArgNo).value_or(u: Align(8));
8630 if (ArgAlign < 8)
8631 ArgAlign = Align(8);
8632 VAArgOffset = alignTo(Size: VAArgOffset, A: ArgAlign);
8633 if (!IsFixed) {
8634 Value *Base =
8635 getShadowPtrForVAArgument(IRB, ArgOffset: VAArgOffset - VAArgBase, ArgSize);
8636 if (Base) {
8637 Value *AShadowPtr, *AOriginPtr;
8638 std::tie(args&: AShadowPtr, args&: AOriginPtr) =
8639 MSV.getShadowOriginPtr(Addr: A, IRB, ShadowTy: IRB.getInt8Ty(),
8640 Alignment: kShadowTLSAlignment, /*isStore*/ false);
8641
8642 IRB.CreateMemCpy(Dst: Base, DstAlign: kShadowTLSAlignment, Src: AShadowPtr,
8643 SrcAlign: kShadowTLSAlignment, Size: ArgSize);
8644 }
8645 }
8646 VAArgOffset += alignTo(Size: ArgSize, A: Align(8));
8647 } else {
8648 Value *Base;
8649 uint64_t ArgSize = DL.getTypeAllocSize(Ty: A->getType());
8650 Align ArgAlign = Align(8);
8651 if (A->getType()->isArrayTy()) {
8652 // Arrays are aligned to element size, except for long double
8653 // arrays, which are aligned to 8 bytes.
8654 Type *ElementTy = A->getType()->getArrayElementType();
8655 if (!ElementTy->isPPC_FP128Ty())
8656 ArgAlign = Align(DL.getTypeAllocSize(Ty: ElementTy));
8657 } else if (A->getType()->isVectorTy()) {
8658 // Vectors are naturally aligned.
8659 ArgAlign = Align(ArgSize);
8660 }
8661 if (ArgAlign < 8)
8662 ArgAlign = Align(8);
8663 VAArgOffset = alignTo(Size: VAArgOffset, A: ArgAlign);
8664 if (DL.isBigEndian()) {
8665 // Adjusting the shadow for argument with size < 8 to match the
8666 // placement of bits in big endian system
8667 if (ArgSize < 8)
8668 VAArgOffset += (8 - ArgSize);
8669 }
8670 if (!IsFixed) {
8671 Base =
8672 getShadowPtrForVAArgument(IRB, ArgOffset: VAArgOffset - VAArgBase, ArgSize);
8673 if (Base)
8674 IRB.CreateAlignedStore(Val: MSV.getShadow(V: A), Ptr: Base, Align: kShadowTLSAlignment);
8675 }
8676 VAArgOffset += ArgSize;
8677 VAArgOffset = alignTo(Size: VAArgOffset, A: Align(8));
8678 }
8679 if (IsFixed)
8680 VAArgBase = VAArgOffset;
8681 }
8682
8683 Constant *TotalVAArgSize =
8684 ConstantInt::get(Ty: MS.IntptrTy, V: VAArgOffset - VAArgBase);
8685 // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
8686 // a new class member i.e. it is the total size of all VarArgs.
8687 IRB.CreateStore(Val: TotalVAArgSize, Ptr: MS.VAArgOverflowSizeTLS);
8688 }
8689
8690 void finalizeInstrumentation() override {
8691 assert(!VAArgSize && !VAArgTLSCopy &&
8692 "finalizeInstrumentation called twice");
8693 IRBuilder<> IRB(MSV.FnPrologueEnd);
8694 VAArgSize = IRB.CreateLoad(Ty: IRB.getInt64Ty(), Ptr: MS.VAArgOverflowSizeTLS);
8695 Value *CopySize = VAArgSize;
8696
8697 if (!VAStartInstrumentationList.empty()) {
8698 // If there is a va_start in this function, make a backup copy of
8699 // va_arg_tls somewhere in the function entry block.
8700
8701 VAArgTLSCopy = IRB.CreateAlloca(Ty: Type::getInt8Ty(C&: *MS.C), ArraySize: CopySize);
8702 VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
8703 IRB.CreateMemSet(Ptr: VAArgTLSCopy, Val: Constant::getNullValue(Ty: IRB.getInt8Ty()),
8704 Size: CopySize, Align: kShadowTLSAlignment, isVolatile: false);
8705
8706 Value *SrcSize = IRB.CreateBinaryIntrinsic(
8707 ID: Intrinsic::umin, LHS: CopySize,
8708 RHS: ConstantInt::get(Ty: IRB.getInt64Ty(), V: kParamTLSSize));
8709 IRB.CreateMemCpy(Dst: VAArgTLSCopy, DstAlign: kShadowTLSAlignment, Src: MS.VAArgTLS,
8710 SrcAlign: kShadowTLSAlignment, Size: SrcSize);
8711 }
8712
8713 // Instrument va_start.
8714 // Copy va_list shadow from the backup copy of the TLS contents.
8715 for (CallInst *OrigInst : VAStartInstrumentationList) {
8716 NextNodeIRBuilder IRB(OrigInst);
8717 Value *VAListTag = OrigInst->getArgOperand(i: 0);
8718 Value *RegSaveAreaPtrPtr = IRB.CreatePtrToInt(V: VAListTag, DestTy: MS.IntptrTy);
8719
8720 RegSaveAreaPtrPtr = IRB.CreateIntToPtr(V: RegSaveAreaPtrPtr, DestTy: MS.PtrTy);
8721
8722 Value *RegSaveAreaPtr = IRB.CreateLoad(Ty: MS.PtrTy, Ptr: RegSaveAreaPtrPtr);
8723 Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
8724 const DataLayout &DL = F.getDataLayout();
8725 unsigned IntptrSize = DL.getTypeStoreSize(Ty: MS.IntptrTy);
8726 const Align Alignment = Align(IntptrSize);
8727 std::tie(args&: RegSaveAreaShadowPtr, args&: RegSaveAreaOriginPtr) =
8728 MSV.getShadowOriginPtr(Addr: RegSaveAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(),
8729 Alignment, /*isStore*/ true);
8730 IRB.CreateMemCpy(Dst: RegSaveAreaShadowPtr, DstAlign: Alignment, Src: VAArgTLSCopy, SrcAlign: Alignment,
8731 Size: CopySize);
8732 }
8733 }
8734};
8735
8736/// PowerPC32-specific implementation of VarArgHelper.
8737struct VarArgPowerPC32Helper : public VarArgHelperBase {
8738 AllocaInst *VAArgTLSCopy = nullptr;
8739 Value *VAArgSize = nullptr;
8740
8741 VarArgPowerPC32Helper(Function &F, MemorySanitizer &MS,
8742 MemorySanitizerVisitor &MSV)
8743 : VarArgHelperBase(F, MS, MSV, /*VAListTagSize=*/12) {}
8744
8745 void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
8746 unsigned VAArgBase;
8747 // Parameter save area is 8 bytes from frame pointer in PPC32
8748 VAArgBase = 8;
8749 unsigned VAArgOffset = VAArgBase;
8750 const DataLayout &DL = F.getDataLayout();
8751 unsigned IntptrSize = DL.getTypeStoreSize(Ty: MS.IntptrTy);
8752 for (const auto &[ArgNo, A] : llvm::enumerate(First: CB.args())) {
8753 bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
8754 bool IsByVal = CB.paramHasAttr(ArgNo, Kind: Attribute::ByVal);
8755 if (IsByVal) {
8756 assert(A->getType()->isPointerTy());
8757 Type *RealTy = CB.getParamByValType(ArgNo);
8758 uint64_t ArgSize = DL.getTypeAllocSize(Ty: RealTy);
8759 Align ArgAlign = CB.getParamAlign(ArgNo).value_or(u: Align(IntptrSize));
8760 if (ArgAlign < IntptrSize)
8761 ArgAlign = Align(IntptrSize);
8762 VAArgOffset = alignTo(Size: VAArgOffset, A: ArgAlign);
8763 if (!IsFixed) {
8764 Value *Base =
8765 getShadowPtrForVAArgument(IRB, ArgOffset: VAArgOffset - VAArgBase, ArgSize);
8766 if (Base) {
8767 Value *AShadowPtr, *AOriginPtr;
8768 std::tie(args&: AShadowPtr, args&: AOriginPtr) =
8769 MSV.getShadowOriginPtr(Addr: A, IRB, ShadowTy: IRB.getInt8Ty(),
8770 Alignment: kShadowTLSAlignment, /*isStore*/ false);
8771
8772 IRB.CreateMemCpy(Dst: Base, DstAlign: kShadowTLSAlignment, Src: AShadowPtr,
8773 SrcAlign: kShadowTLSAlignment, Size: ArgSize);
8774 }
8775 }
8776 VAArgOffset += alignTo(Size: ArgSize, A: Align(IntptrSize));
8777 } else {
8778 Value *Base;
8779 Type *ArgTy = A->getType();
8780
8781 // On PPC 32 floating point variable arguments are stored in separate
8782 // area: fp_save_area = reg_save_area + 4*8. We do not copy shaodow for
8783 // them as they will be found when checking call arguments.
8784 if (!ArgTy->isFloatingPointTy()) {
8785 uint64_t ArgSize = DL.getTypeAllocSize(Ty: ArgTy);
8786 Align ArgAlign = Align(IntptrSize);
8787 if (ArgTy->isArrayTy()) {
8788 // Arrays are aligned to element size, except for long double
8789 // arrays, which are aligned to 8 bytes.
8790 Type *ElementTy = ArgTy->getArrayElementType();
8791 if (!ElementTy->isPPC_FP128Ty())
8792 ArgAlign = Align(DL.getTypeAllocSize(Ty: ElementTy));
8793 } else if (ArgTy->isVectorTy()) {
8794 // Vectors are naturally aligned.
8795 ArgAlign = Align(ArgSize);
8796 }
8797 if (ArgAlign < IntptrSize)
8798 ArgAlign = Align(IntptrSize);
8799 VAArgOffset = alignTo(Size: VAArgOffset, A: ArgAlign);
8800 if (DL.isBigEndian()) {
8801 // Adjusting the shadow for argument with size < IntptrSize to match
8802 // the placement of bits in big endian system
8803 if (ArgSize < IntptrSize)
8804 VAArgOffset += (IntptrSize - ArgSize);
8805 }
8806 if (!IsFixed) {
8807 Base = getShadowPtrForVAArgument(IRB, ArgOffset: VAArgOffset - VAArgBase,
8808 ArgSize);
8809 if (Base)
8810 IRB.CreateAlignedStore(Val: MSV.getShadow(V: A), Ptr: Base,
8811 Align: kShadowTLSAlignment);
8812 }
8813 VAArgOffset += ArgSize;
8814 VAArgOffset = alignTo(Size: VAArgOffset, A: Align(IntptrSize));
8815 }
8816 }
8817 }
8818
8819 Constant *TotalVAArgSize =
8820 ConstantInt::get(Ty: MS.IntptrTy, V: VAArgOffset - VAArgBase);
8821 // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
8822 // a new class member i.e. it is the total size of all VarArgs.
8823 IRB.CreateStore(Val: TotalVAArgSize, Ptr: MS.VAArgOverflowSizeTLS);
8824 }
8825
8826 void finalizeInstrumentation() override {
8827 assert(!VAArgSize && !VAArgTLSCopy &&
8828 "finalizeInstrumentation called twice");
8829 IRBuilder<> IRB(MSV.FnPrologueEnd);
8830 VAArgSize = IRB.CreateLoad(Ty: MS.IntptrTy, Ptr: MS.VAArgOverflowSizeTLS);
8831 Value *CopySize = VAArgSize;
8832
8833 if (!VAStartInstrumentationList.empty()) {
8834 // If there is a va_start in this function, make a backup copy of
8835 // va_arg_tls somewhere in the function entry block.
8836
8837 VAArgTLSCopy = IRB.CreateAlloca(Ty: Type::getInt8Ty(C&: *MS.C), ArraySize: CopySize);
8838 VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
8839 IRB.CreateMemSet(Ptr: VAArgTLSCopy, Val: Constant::getNullValue(Ty: IRB.getInt8Ty()),
8840 Size: CopySize, Align: kShadowTLSAlignment, isVolatile: false);
8841
8842 Value *SrcSize = IRB.CreateBinaryIntrinsic(
8843 ID: Intrinsic::umin, LHS: CopySize,
8844 RHS: ConstantInt::get(Ty: MS.IntptrTy, V: kParamTLSSize));
8845 IRB.CreateMemCpy(Dst: VAArgTLSCopy, DstAlign: kShadowTLSAlignment, Src: MS.VAArgTLS,
8846 SrcAlign: kShadowTLSAlignment, Size: SrcSize);
8847 }
8848
8849 // Instrument va_start.
8850 // Copy va_list shadow from the backup copy of the TLS contents.
8851 for (CallInst *OrigInst : VAStartInstrumentationList) {
8852 NextNodeIRBuilder IRB(OrigInst);
8853 Value *VAListTag = OrigInst->getArgOperand(i: 0);
8854 Value *RegSaveAreaPtrPtr = IRB.CreatePtrToInt(V: VAListTag, DestTy: MS.IntptrTy);
8855 Value *RegSaveAreaSize = CopySize;
8856
8857 // In PPC32 va_list_tag is a struct
8858 RegSaveAreaPtrPtr =
8859 IRB.CreateAdd(LHS: RegSaveAreaPtrPtr, RHS: ConstantInt::get(Ty: MS.IntptrTy, V: 8));
8860
8861 // On PPC 32 reg_save_area can only hold 32 bytes of data
8862 RegSaveAreaSize = IRB.CreateBinaryIntrinsic(
8863 ID: Intrinsic::umin, LHS: CopySize, RHS: ConstantInt::get(Ty: MS.IntptrTy, V: 32));
8864
8865 RegSaveAreaPtrPtr = IRB.CreateIntToPtr(V: RegSaveAreaPtrPtr, DestTy: MS.PtrTy);
8866 Value *RegSaveAreaPtr = IRB.CreateLoad(Ty: MS.PtrTy, Ptr: RegSaveAreaPtrPtr);
8867
8868 const DataLayout &DL = F.getDataLayout();
8869 unsigned IntptrSize = DL.getTypeStoreSize(Ty: MS.IntptrTy);
8870 const Align Alignment = Align(IntptrSize);
8871
8872 { // Copy reg save area
8873 Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
8874 std::tie(args&: RegSaveAreaShadowPtr, args&: RegSaveAreaOriginPtr) =
8875 MSV.getShadowOriginPtr(Addr: RegSaveAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(),
8876 Alignment, /*isStore*/ true);
8877 IRB.CreateMemCpy(Dst: RegSaveAreaShadowPtr, DstAlign: Alignment, Src: VAArgTLSCopy,
8878 SrcAlign: Alignment, Size: RegSaveAreaSize);
8879
8880 RegSaveAreaShadowPtr =
8881 IRB.CreatePtrToInt(V: RegSaveAreaShadowPtr, DestTy: MS.IntptrTy);
8882 Value *FPSaveArea = IRB.CreateAdd(LHS: RegSaveAreaShadowPtr,
8883 RHS: ConstantInt::get(Ty: MS.IntptrTy, V: 32));
8884 FPSaveArea = IRB.CreateIntToPtr(V: FPSaveArea, DestTy: MS.PtrTy);
8885 // We fill fp shadow with zeroes as uninitialized fp args should have
8886 // been found during call base check
8887 IRB.CreateMemSet(Ptr: FPSaveArea, Val: ConstantInt::getNullValue(Ty: IRB.getInt8Ty()),
8888 Size: ConstantInt::get(Ty: MS.IntptrTy, V: 32), Align: Alignment);
8889 }
8890
8891 { // Copy overflow area
8892 // RegSaveAreaSize is min(CopySize, 32) -> no overflow can occur
8893 Value *OverflowAreaSize = IRB.CreateSub(LHS: CopySize, RHS: RegSaveAreaSize);
8894
8895 Value *OverflowAreaPtrPtr = IRB.CreatePtrToInt(V: VAListTag, DestTy: MS.IntptrTy);
8896 OverflowAreaPtrPtr =
8897 IRB.CreateAdd(LHS: OverflowAreaPtrPtr, RHS: ConstantInt::get(Ty: MS.IntptrTy, V: 4));
8898 OverflowAreaPtrPtr = IRB.CreateIntToPtr(V: OverflowAreaPtrPtr, DestTy: MS.PtrTy);
8899
8900 Value *OverflowAreaPtr = IRB.CreateLoad(Ty: MS.PtrTy, Ptr: OverflowAreaPtrPtr);
8901
8902 Value *OverflowAreaShadowPtr, *OverflowAreaOriginPtr;
8903 std::tie(args&: OverflowAreaShadowPtr, args&: OverflowAreaOriginPtr) =
8904 MSV.getShadowOriginPtr(Addr: OverflowAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(),
8905 Alignment, /*isStore*/ true);
8906
8907 Value *OverflowVAArgTLSCopyPtr =
8908 IRB.CreatePtrToInt(V: VAArgTLSCopy, DestTy: MS.IntptrTy);
8909 OverflowVAArgTLSCopyPtr =
8910 IRB.CreateAdd(LHS: OverflowVAArgTLSCopyPtr, RHS: RegSaveAreaSize);
8911
8912 OverflowVAArgTLSCopyPtr =
8913 IRB.CreateIntToPtr(V: OverflowVAArgTLSCopyPtr, DestTy: MS.PtrTy);
8914 IRB.CreateMemCpy(Dst: OverflowAreaShadowPtr, DstAlign: Alignment,
8915 Src: OverflowVAArgTLSCopyPtr, SrcAlign: Alignment, Size: OverflowAreaSize);
8916 }
8917 }
8918 }
8919};
8920
8921/// SystemZ-specific implementation of VarArgHelper.
8922struct VarArgSystemZHelper : public VarArgHelperBase {
8923 static const unsigned SystemZGpOffset = 16;
8924 static const unsigned SystemZGpEndOffset = 56;
8925 static const unsigned SystemZFpOffset = 128;
8926 static const unsigned SystemZFpEndOffset = 160;
8927 static const unsigned SystemZMaxVrArgs = 8;
8928 static const unsigned SystemZRegSaveAreaSize = 160;
8929 static const unsigned SystemZOverflowOffset = 160;
8930 static const unsigned SystemZVAListTagSize = 32;
8931 static const unsigned SystemZOverflowArgAreaPtrOffset = 16;
8932 static const unsigned SystemZRegSaveAreaPtrOffset = 24;
8933
8934 bool IsSoftFloatABI;
8935 AllocaInst *VAArgTLSCopy = nullptr;
8936 AllocaInst *VAArgTLSOriginCopy = nullptr;
8937 Value *VAArgOverflowSize = nullptr;
8938
8939 enum class ArgKind {
8940 GeneralPurpose,
8941 FloatingPoint,
8942 Vector,
8943 Memory,
8944 Indirect,
8945 };
8946
8947 enum class ShadowExtension { None, Zero, Sign };
8948
8949 VarArgSystemZHelper(Function &F, MemorySanitizer &MS,
8950 MemorySanitizerVisitor &MSV)
8951 : VarArgHelperBase(F, MS, MSV, SystemZVAListTagSize),
8952 IsSoftFloatABI(F.getFnAttribute(Kind: "use-soft-float").getValueAsBool()) {}
8953
8954 ArgKind classifyArgument(Type *T) {
8955 // T is a SystemZABIInfo::classifyArgumentType() output, and there are
8956 // only a few possibilities of what it can be. In particular, enums, single
8957 // element structs and large types have already been taken care of.
8958
8959 // Some i128 and fp128 arguments are converted to pointers only in the
8960 // back end.
8961 if (T->isIntegerTy(BitWidth: 128) || T->isFP128Ty())
8962 return ArgKind::Indirect;
8963 if (T->isFloatingPointTy())
8964 return IsSoftFloatABI ? ArgKind::GeneralPurpose : ArgKind::FloatingPoint;
8965 if (T->isIntegerTy() || T->isPointerTy())
8966 return ArgKind::GeneralPurpose;
8967 if (T->isVectorTy())
8968 return ArgKind::Vector;
8969 return ArgKind::Memory;
8970 }
8971
8972 ShadowExtension getShadowExtension(const CallBase &CB, unsigned ArgNo) {
8973 // ABI says: "One of the simple integer types no more than 64 bits wide.
8974 // ... If such an argument is shorter than 64 bits, replace it by a full
8975 // 64-bit integer representing the same number, using sign or zero
8976 // extension". Shadow for an integer argument has the same type as the
8977 // argument itself, so it can be sign or zero extended as well.
8978 bool ZExt = CB.paramHasAttr(ArgNo, Kind: Attribute::ZExt);
8979 bool SExt = CB.paramHasAttr(ArgNo, Kind: Attribute::SExt);
8980 if (ZExt) {
8981 assert(!SExt);
8982 return ShadowExtension::Zero;
8983 }
8984 if (SExt) {
8985 assert(!ZExt);
8986 return ShadowExtension::Sign;
8987 }
8988 return ShadowExtension::None;
8989 }
8990
8991 void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
8992 unsigned GpOffset = SystemZGpOffset;
8993 unsigned FpOffset = SystemZFpOffset;
8994 unsigned VrIndex = 0;
8995 unsigned OverflowOffset = SystemZOverflowOffset;
8996 const DataLayout &DL = F.getDataLayout();
8997 for (const auto &[ArgNo, A] : llvm::enumerate(First: CB.args())) {
8998 bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
8999 // SystemZABIInfo does not produce ByVal parameters.
9000 assert(!CB.paramHasAttr(ArgNo, Attribute::ByVal));
9001 Type *T = A->getType();
9002 ArgKind AK = classifyArgument(T);
9003 if (AK == ArgKind::Indirect) {
9004 T = MS.PtrTy;
9005 AK = ArgKind::GeneralPurpose;
9006 }
9007 if (AK == ArgKind::GeneralPurpose && GpOffset >= SystemZGpEndOffset)
9008 AK = ArgKind::Memory;
9009 if (AK == ArgKind::FloatingPoint && FpOffset >= SystemZFpEndOffset)
9010 AK = ArgKind::Memory;
9011 if (AK == ArgKind::Vector && (VrIndex >= SystemZMaxVrArgs || !IsFixed))
9012 AK = ArgKind::Memory;
9013 Value *ShadowBase = nullptr;
9014 Value *OriginBase = nullptr;
9015 ShadowExtension SE = ShadowExtension::None;
9016 switch (AK) {
9017 case ArgKind::GeneralPurpose: {
9018 // Always keep track of GpOffset, but store shadow only for varargs.
9019 uint64_t ArgSize = 8;
9020 if (GpOffset + ArgSize <= kParamTLSSize) {
9021 if (!IsFixed) {
9022 SE = getShadowExtension(CB, ArgNo);
9023 uint64_t GapSize = 0;
9024 if (SE == ShadowExtension::None) {
9025 uint64_t ArgAllocSize = DL.getTypeAllocSize(Ty: T);
9026 assert(ArgAllocSize <= ArgSize);
9027 GapSize = ArgSize - ArgAllocSize;
9028 }
9029 ShadowBase = getShadowAddrForVAArgument(IRB, ArgOffset: GpOffset + GapSize);
9030 if (MS.TrackOrigins)
9031 OriginBase = getOriginPtrForVAArgument(IRB, ArgOffset: GpOffset + GapSize);
9032 }
9033 GpOffset += ArgSize;
9034 } else {
9035 GpOffset = kParamTLSSize;
9036 }
9037 break;
9038 }
9039 case ArgKind::FloatingPoint: {
9040 // Always keep track of FpOffset, but store shadow only for varargs.
9041 uint64_t ArgSize = 8;
9042 if (FpOffset + ArgSize <= kParamTLSSize) {
9043 if (!IsFixed) {
9044 // PoP says: "A short floating-point datum requires only the
9045 // left-most 32 bit positions of a floating-point register".
9046 // Therefore, in contrast to AK_GeneralPurpose and AK_Memory,
9047 // don't extend shadow and don't mind the gap.
9048 ShadowBase = getShadowAddrForVAArgument(IRB, ArgOffset: FpOffset);
9049 if (MS.TrackOrigins)
9050 OriginBase = getOriginPtrForVAArgument(IRB, ArgOffset: FpOffset);
9051 }
9052 FpOffset += ArgSize;
9053 } else {
9054 FpOffset = kParamTLSSize;
9055 }
9056 break;
9057 }
9058 case ArgKind::Vector: {
9059 // Keep track of VrIndex. No need to store shadow, since vector varargs
9060 // go through AK_Memory.
9061 assert(IsFixed);
9062 VrIndex++;
9063 break;
9064 }
9065 case ArgKind::Memory: {
9066 // Keep track of OverflowOffset and store shadow only for varargs.
9067 // Ignore fixed args, since we need to copy only the vararg portion of
9068 // the overflow area shadow.
9069 if (!IsFixed) {
9070 uint64_t ArgAllocSize = DL.getTypeAllocSize(Ty: T);
9071 uint64_t ArgSize = alignTo(Value: ArgAllocSize, Align: 8);
9072 if (OverflowOffset + ArgSize <= kParamTLSSize) {
9073 SE = getShadowExtension(CB, ArgNo);
9074 uint64_t GapSize =
9075 SE == ShadowExtension::None ? ArgSize - ArgAllocSize : 0;
9076 ShadowBase =
9077 getShadowAddrForVAArgument(IRB, ArgOffset: OverflowOffset + GapSize);
9078 if (MS.TrackOrigins)
9079 OriginBase =
9080 getOriginPtrForVAArgument(IRB, ArgOffset: OverflowOffset + GapSize);
9081 OverflowOffset += ArgSize;
9082 } else {
9083 OverflowOffset = kParamTLSSize;
9084 }
9085 }
9086 break;
9087 }
9088 case ArgKind::Indirect:
9089 llvm_unreachable("Indirect must be converted to GeneralPurpose");
9090 }
9091 if (ShadowBase == nullptr)
9092 continue;
9093 Value *Shadow = MSV.getShadow(V: A);
9094 if (SE != ShadowExtension::None)
9095 Shadow = MSV.CreateShadowCast(IRB, V: Shadow, dstTy: IRB.getInt64Ty(),
9096 /*Signed*/ SE == ShadowExtension::Sign);
9097 ShadowBase = IRB.CreateIntToPtr(V: ShadowBase, DestTy: MS.PtrTy, Name: "_msarg_va_s");
9098 IRB.CreateStore(Val: Shadow, Ptr: ShadowBase);
9099 if (MS.TrackOrigins) {
9100 Value *Origin = MSV.getOrigin(V: A);
9101 TypeSize StoreSize = DL.getTypeStoreSize(Ty: Shadow->getType());
9102 MSV.paintOrigin(IRB, Origin, OriginPtr: OriginBase, TS: StoreSize,
9103 Alignment: kMinOriginAlignment);
9104 }
9105 }
9106 Constant *OverflowSize = ConstantInt::get(
9107 Ty: IRB.getInt64Ty(), V: OverflowOffset - SystemZOverflowOffset);
9108 IRB.CreateStore(Val: OverflowSize, Ptr: MS.VAArgOverflowSizeTLS);
9109 }
9110
9111 void copyRegSaveArea(IRBuilder<> &IRB, Value *VAListTag) {
9112 Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
9113 V: IRB.CreateAdd(
9114 LHS: IRB.CreatePtrToInt(V: VAListTag, DestTy: MS.IntptrTy),
9115 RHS: ConstantInt::get(Ty: MS.IntptrTy, V: SystemZRegSaveAreaPtrOffset)),
9116 DestTy: MS.PtrTy);
9117 Value *RegSaveAreaPtr = IRB.CreateLoad(Ty: MS.PtrTy, Ptr: RegSaveAreaPtrPtr);
9118 Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
9119 const Align Alignment = Align(8);
9120 std::tie(args&: RegSaveAreaShadowPtr, args&: RegSaveAreaOriginPtr) =
9121 MSV.getShadowOriginPtr(Addr: RegSaveAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(), Alignment,
9122 /*isStore*/ true);
9123 // TODO(iii): copy only fragments filled by visitCallBase()
9124 // TODO(iii): support packed-stack && !use-soft-float
9125 // For use-soft-float functions, it is enough to copy just the GPRs.
9126 unsigned RegSaveAreaSize =
9127 IsSoftFloatABI ? SystemZGpEndOffset : SystemZRegSaveAreaSize;
9128 IRB.CreateMemCpy(Dst: RegSaveAreaShadowPtr, DstAlign: Alignment, Src: VAArgTLSCopy, SrcAlign: Alignment,
9129 Size: RegSaveAreaSize);
9130 if (MS.TrackOrigins)
9131 IRB.CreateMemCpy(Dst: RegSaveAreaOriginPtr, DstAlign: Alignment, Src: VAArgTLSOriginCopy,
9132 SrcAlign: Alignment, Size: RegSaveAreaSize);
9133 }
9134
9135 // FIXME: This implementation limits OverflowOffset to kParamTLSSize, so we
9136 // don't know real overflow size and can't clear shadow beyond kParamTLSSize.
9137 void copyOverflowArea(IRBuilder<> &IRB, Value *VAListTag) {
9138 Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
9139 V: IRB.CreateAdd(
9140 LHS: IRB.CreatePtrToInt(V: VAListTag, DestTy: MS.IntptrTy),
9141 RHS: ConstantInt::get(Ty: MS.IntptrTy, V: SystemZOverflowArgAreaPtrOffset)),
9142 DestTy: MS.PtrTy);
9143 Value *OverflowArgAreaPtr = IRB.CreateLoad(Ty: MS.PtrTy, Ptr: OverflowArgAreaPtrPtr);
9144 Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
9145 const Align Alignment = Align(8);
9146 std::tie(args&: OverflowArgAreaShadowPtr, args&: OverflowArgAreaOriginPtr) =
9147 MSV.getShadowOriginPtr(Addr: OverflowArgAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(),
9148 Alignment, /*isStore*/ true);
9149 Value *SrcPtr = IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(), Ptr: VAArgTLSCopy,
9150 Idx0: SystemZOverflowOffset);
9151 IRB.CreateMemCpy(Dst: OverflowArgAreaShadowPtr, DstAlign: Alignment, Src: SrcPtr, SrcAlign: Alignment,
9152 Size: VAArgOverflowSize);
9153 if (MS.TrackOrigins) {
9154 SrcPtr = IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(), Ptr: VAArgTLSOriginCopy,
9155 Idx0: SystemZOverflowOffset);
9156 IRB.CreateMemCpy(Dst: OverflowArgAreaOriginPtr, DstAlign: Alignment, Src: SrcPtr, SrcAlign: Alignment,
9157 Size: VAArgOverflowSize);
9158 }
9159 }
9160
9161 void finalizeInstrumentation() override {
9162 assert(!VAArgOverflowSize && !VAArgTLSCopy &&
9163 "finalizeInstrumentation called twice");
9164 if (!VAStartInstrumentationList.empty()) {
9165 // If there is a va_start in this function, make a backup copy of
9166 // va_arg_tls somewhere in the function entry block.
9167 IRBuilder<> IRB(MSV.FnPrologueEnd);
9168 VAArgOverflowSize =
9169 IRB.CreateLoad(Ty: IRB.getInt64Ty(), Ptr: MS.VAArgOverflowSizeTLS);
9170 Value *CopySize =
9171 IRB.CreateAdd(LHS: ConstantInt::get(Ty: MS.IntptrTy, V: SystemZOverflowOffset),
9172 RHS: VAArgOverflowSize);
9173 VAArgTLSCopy = IRB.CreateAlloca(Ty: Type::getInt8Ty(C&: *MS.C), ArraySize: CopySize);
9174 VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
9175 IRB.CreateMemSet(Ptr: VAArgTLSCopy, Val: Constant::getNullValue(Ty: IRB.getInt8Ty()),
9176 Size: CopySize, Align: kShadowTLSAlignment, isVolatile: false);
9177
9178 Value *SrcSize = IRB.CreateBinaryIntrinsic(
9179 ID: Intrinsic::umin, LHS: CopySize,
9180 RHS: ConstantInt::get(Ty: MS.IntptrTy, V: kParamTLSSize));
9181 IRB.CreateMemCpy(Dst: VAArgTLSCopy, DstAlign: kShadowTLSAlignment, Src: MS.VAArgTLS,
9182 SrcAlign: kShadowTLSAlignment, Size: SrcSize);
9183 if (MS.TrackOrigins) {
9184 VAArgTLSOriginCopy = IRB.CreateAlloca(Ty: Type::getInt8Ty(C&: *MS.C), ArraySize: CopySize);
9185 VAArgTLSOriginCopy->setAlignment(kShadowTLSAlignment);
9186 IRB.CreateMemCpy(Dst: VAArgTLSOriginCopy, DstAlign: kShadowTLSAlignment,
9187 Src: MS.VAArgOriginTLS, SrcAlign: kShadowTLSAlignment, Size: SrcSize);
9188 }
9189 }
9190
9191 // Instrument va_start.
9192 // Copy va_list shadow from the backup copy of the TLS contents.
9193 for (CallInst *OrigInst : VAStartInstrumentationList) {
9194 NextNodeIRBuilder IRB(OrigInst);
9195 Value *VAListTag = OrigInst->getArgOperand(i: 0);
9196 copyRegSaveArea(IRB, VAListTag);
9197 copyOverflowArea(IRB, VAListTag);
9198 }
9199 }
9200};
9201
9202/// i386-specific implementation of VarArgHelper.
9203struct VarArgI386Helper : public VarArgHelperBase {
9204 AllocaInst *VAArgTLSCopy = nullptr;
9205 Value *VAArgSize = nullptr;
9206
9207 VarArgI386Helper(Function &F, MemorySanitizer &MS,
9208 MemorySanitizerVisitor &MSV)
9209 : VarArgHelperBase(F, MS, MSV, /*VAListTagSize=*/4) {}
9210
9211 void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
9212 const DataLayout &DL = F.getDataLayout();
9213 unsigned IntptrSize = DL.getTypeStoreSize(Ty: MS.IntptrTy);
9214 unsigned VAArgOffset = 0;
9215 for (const auto &[ArgNo, A] : llvm::enumerate(First: CB.args())) {
9216 bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
9217 bool IsByVal = CB.paramHasAttr(ArgNo, Kind: Attribute::ByVal);
9218 if (IsByVal) {
9219 assert(A->getType()->isPointerTy());
9220 Type *RealTy = CB.getParamByValType(ArgNo);
9221 uint64_t ArgSize = DL.getTypeAllocSize(Ty: RealTy);
9222 Align ArgAlign = CB.getParamAlign(ArgNo).value_or(u: Align(IntptrSize));
9223 if (ArgAlign < IntptrSize)
9224 ArgAlign = Align(IntptrSize);
9225 VAArgOffset = alignTo(Size: VAArgOffset, A: ArgAlign);
9226 if (!IsFixed) {
9227 Value *Base = getShadowPtrForVAArgument(IRB, ArgOffset: VAArgOffset, ArgSize);
9228 if (Base) {
9229 Value *AShadowPtr, *AOriginPtr;
9230 std::tie(args&: AShadowPtr, args&: AOriginPtr) =
9231 MSV.getShadowOriginPtr(Addr: A, IRB, ShadowTy: IRB.getInt8Ty(),
9232 Alignment: kShadowTLSAlignment, /*isStore*/ false);
9233
9234 IRB.CreateMemCpy(Dst: Base, DstAlign: kShadowTLSAlignment, Src: AShadowPtr,
9235 SrcAlign: kShadowTLSAlignment, Size: ArgSize);
9236 }
9237 VAArgOffset += alignTo(Size: ArgSize, A: Align(IntptrSize));
9238 }
9239 } else {
9240 Value *Base;
9241 uint64_t ArgSize = DL.getTypeAllocSize(Ty: A->getType());
9242 Align ArgAlign = Align(IntptrSize);
9243 VAArgOffset = alignTo(Size: VAArgOffset, A: ArgAlign);
9244 if (DL.isBigEndian()) {
9245 // Adjusting the shadow for argument with size < IntptrSize to match
9246 // the placement of bits in big endian system
9247 if (ArgSize < IntptrSize)
9248 VAArgOffset += (IntptrSize - ArgSize);
9249 }
9250 if (!IsFixed) {
9251 Base = getShadowPtrForVAArgument(IRB, ArgOffset: VAArgOffset, ArgSize);
9252 if (Base)
9253 IRB.CreateAlignedStore(Val: MSV.getShadow(V: A), Ptr: Base, Align: kShadowTLSAlignment);
9254 VAArgOffset += ArgSize;
9255 VAArgOffset = alignTo(Size: VAArgOffset, A: Align(IntptrSize));
9256 }
9257 }
9258 }
9259
9260 Constant *TotalVAArgSize = ConstantInt::get(Ty: MS.IntptrTy, V: VAArgOffset);
9261 // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
9262 // a new class member i.e. it is the total size of all VarArgs.
9263 IRB.CreateStore(Val: TotalVAArgSize, Ptr: MS.VAArgOverflowSizeTLS);
9264 }
9265
9266 void finalizeInstrumentation() override {
9267 assert(!VAArgSize && !VAArgTLSCopy &&
9268 "finalizeInstrumentation called twice");
9269 IRBuilder<> IRB(MSV.FnPrologueEnd);
9270 VAArgSize = IRB.CreateLoad(Ty: MS.IntptrTy, Ptr: MS.VAArgOverflowSizeTLS);
9271 Value *CopySize = VAArgSize;
9272
9273 if (!VAStartInstrumentationList.empty()) {
9274 // If there is a va_start in this function, make a backup copy of
9275 // va_arg_tls somewhere in the function entry block.
9276 VAArgTLSCopy = IRB.CreateAlloca(Ty: Type::getInt8Ty(C&: *MS.C), ArraySize: CopySize);
9277 VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
9278 IRB.CreateMemSet(Ptr: VAArgTLSCopy, Val: Constant::getNullValue(Ty: IRB.getInt8Ty()),
9279 Size: CopySize, Align: kShadowTLSAlignment, isVolatile: false);
9280
9281 Value *SrcSize = IRB.CreateBinaryIntrinsic(
9282 ID: Intrinsic::umin, LHS: CopySize,
9283 RHS: ConstantInt::get(Ty: MS.IntptrTy, V: kParamTLSSize));
9284 IRB.CreateMemCpy(Dst: VAArgTLSCopy, DstAlign: kShadowTLSAlignment, Src: MS.VAArgTLS,
9285 SrcAlign: kShadowTLSAlignment, Size: SrcSize);
9286 }
9287
9288 // Instrument va_start.
9289 // Copy va_list shadow from the backup copy of the TLS contents.
9290 for (CallInst *OrigInst : VAStartInstrumentationList) {
9291 NextNodeIRBuilder IRB(OrigInst);
9292 Value *VAListTag = OrigInst->getArgOperand(i: 0);
9293 Type *RegSaveAreaPtrTy = PointerType::getUnqual(C&: *MS.C);
9294 Value *RegSaveAreaPtrPtr =
9295 IRB.CreateIntToPtr(V: IRB.CreatePtrToInt(V: VAListTag, DestTy: MS.IntptrTy),
9296 DestTy: PointerType::get(C&: *MS.C, AddressSpace: 0));
9297 Value *RegSaveAreaPtr =
9298 IRB.CreateLoad(Ty: RegSaveAreaPtrTy, Ptr: RegSaveAreaPtrPtr);
9299 Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
9300 const DataLayout &DL = F.getDataLayout();
9301 unsigned IntptrSize = DL.getTypeStoreSize(Ty: MS.IntptrTy);
9302 const Align Alignment = Align(IntptrSize);
9303 std::tie(args&: RegSaveAreaShadowPtr, args&: RegSaveAreaOriginPtr) =
9304 MSV.getShadowOriginPtr(Addr: RegSaveAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(),
9305 Alignment, /*isStore*/ true);
9306 IRB.CreateMemCpy(Dst: RegSaveAreaShadowPtr, DstAlign: Alignment, Src: VAArgTLSCopy, SrcAlign: Alignment,
9307 Size: CopySize);
9308 }
9309 }
9310};
9311
9312/// Implementation of VarArgHelper that is used for ARM32, MIPS, RISCV,
9313/// LoongArch64.
9314struct VarArgGenericHelper : public VarArgHelperBase {
9315 AllocaInst *VAArgTLSCopy = nullptr;
9316 Value *VAArgSize = nullptr;
9317
9318 VarArgGenericHelper(Function &F, MemorySanitizer &MS,
9319 MemorySanitizerVisitor &MSV, const unsigned VAListTagSize)
9320 : VarArgHelperBase(F, MS, MSV, VAListTagSize) {}
9321
9322 void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
9323 unsigned VAArgOffset = 0;
9324 const DataLayout &DL = F.getDataLayout();
9325 unsigned IntptrSize = DL.getTypeStoreSize(Ty: MS.IntptrTy);
9326 for (const auto &[ArgNo, A] : llvm::enumerate(First: CB.args())) {
9327 bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
9328 if (IsFixed)
9329 continue;
9330 uint64_t ArgSize = DL.getTypeAllocSize(Ty: A->getType());
9331 if (DL.isBigEndian()) {
9332 // Adjusting the shadow for argument with size < IntptrSize to match the
9333 // placement of bits in big endian system
9334 if (ArgSize < IntptrSize)
9335 VAArgOffset += (IntptrSize - ArgSize);
9336 }
9337 Value *Base = getShadowPtrForVAArgument(IRB, ArgOffset: VAArgOffset, ArgSize);
9338 VAArgOffset += ArgSize;
9339 VAArgOffset = alignTo(Value: VAArgOffset, Align: IntptrSize);
9340 if (!Base)
9341 continue;
9342 IRB.CreateAlignedStore(Val: MSV.getShadow(V: A), Ptr: Base, Align: kShadowTLSAlignment);
9343 }
9344
9345 Constant *TotalVAArgSize = ConstantInt::get(Ty: MS.IntptrTy, V: VAArgOffset);
9346 // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
9347 // a new class member i.e. it is the total size of all VarArgs.
9348 IRB.CreateStore(Val: TotalVAArgSize, Ptr: MS.VAArgOverflowSizeTLS);
9349 }
9350
9351 void finalizeInstrumentation() override {
9352 assert(!VAArgSize && !VAArgTLSCopy &&
9353 "finalizeInstrumentation called twice");
9354 IRBuilder<> IRB(MSV.FnPrologueEnd);
9355 VAArgSize = IRB.CreateLoad(Ty: MS.IntptrTy, Ptr: MS.VAArgOverflowSizeTLS);
9356 Value *CopySize = VAArgSize;
9357
9358 if (!VAStartInstrumentationList.empty()) {
9359 // If there is a va_start in this function, make a backup copy of
9360 // va_arg_tls somewhere in the function entry block.
9361 VAArgTLSCopy = IRB.CreateAlloca(Ty: Type::getInt8Ty(C&: *MS.C), ArraySize: CopySize);
9362 VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
9363 IRB.CreateMemSet(Ptr: VAArgTLSCopy, Val: Constant::getNullValue(Ty: IRB.getInt8Ty()),
9364 Size: CopySize, Align: kShadowTLSAlignment, isVolatile: false);
9365
9366 Value *SrcSize = IRB.CreateBinaryIntrinsic(
9367 ID: Intrinsic::umin, LHS: CopySize,
9368 RHS: ConstantInt::get(Ty: MS.IntptrTy, V: kParamTLSSize));
9369 IRB.CreateMemCpy(Dst: VAArgTLSCopy, DstAlign: kShadowTLSAlignment, Src: MS.VAArgTLS,
9370 SrcAlign: kShadowTLSAlignment, Size: SrcSize);
9371 }
9372
9373 // Instrument va_start.
9374 // Copy va_list shadow from the backup copy of the TLS contents.
9375 for (CallInst *OrigInst : VAStartInstrumentationList) {
9376 NextNodeIRBuilder IRB(OrigInst);
9377 Value *VAListTag = OrigInst->getArgOperand(i: 0);
9378 Type *RegSaveAreaPtrTy = PointerType::getUnqual(C&: *MS.C);
9379 Value *RegSaveAreaPtrPtr =
9380 IRB.CreateIntToPtr(V: IRB.CreatePtrToInt(V: VAListTag, DestTy: MS.IntptrTy),
9381 DestTy: PointerType::get(C&: *MS.C, AddressSpace: 0));
9382 Value *RegSaveAreaPtr =
9383 IRB.CreateLoad(Ty: RegSaveAreaPtrTy, Ptr: RegSaveAreaPtrPtr);
9384 Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
9385 const DataLayout &DL = F.getDataLayout();
9386 unsigned IntptrSize = DL.getTypeStoreSize(Ty: MS.IntptrTy);
9387 const Align Alignment = Align(IntptrSize);
9388 std::tie(args&: RegSaveAreaShadowPtr, args&: RegSaveAreaOriginPtr) =
9389 MSV.getShadowOriginPtr(Addr: RegSaveAreaPtr, IRB, ShadowTy: IRB.getInt8Ty(),
9390 Alignment, /*isStore*/ true);
9391 IRB.CreateMemCpy(Dst: RegSaveAreaShadowPtr, DstAlign: Alignment, Src: VAArgTLSCopy, SrcAlign: Alignment,
9392 Size: CopySize);
9393 }
9394 }
9395};
9396
9397// ARM32, Loongarch64, MIPS and RISCV share the same calling conventions
9398// regarding VAArgs.
9399using VarArgARM32Helper = VarArgGenericHelper;
9400using VarArgRISCVHelper = VarArgGenericHelper;
9401using VarArgMIPSHelper = VarArgGenericHelper;
9402using VarArgLoongArch64Helper = VarArgGenericHelper;
9403using VarArgHexagonHelper = VarArgGenericHelper;
9404
9405/// A no-op implementation of VarArgHelper.
9406struct VarArgNoOpHelper : public VarArgHelper {
9407 VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
9408 MemorySanitizerVisitor &MSV) {}
9409
9410 void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {}
9411
9412 void visitVAStartInst(VAStartInst &I) override {}
9413
9414 void visitVACopyInst(VACopyInst &I) override {}
9415
9416 void finalizeInstrumentation() override {}
9417};
9418
9419} // end anonymous namespace
9420
9421static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
9422 MemorySanitizerVisitor &Visitor) {
9423 // VarArg handling is only implemented on AMD64. False positives are possible
9424 // on other platforms.
9425 Triple TargetTriple(Func.getParent()->getTargetTriple());
9426
9427 if (TargetTriple.getArch() == Triple::x86)
9428 return new VarArgI386Helper(Func, Msan, Visitor);
9429
9430 if (TargetTriple.getArch() == Triple::x86_64)
9431 return new VarArgAMD64Helper(Func, Msan, Visitor);
9432
9433 if (TargetTriple.isARM())
9434 return new VarArgARM32Helper(Func, Msan, Visitor, /*VAListTagSize=*/4);
9435
9436 if (TargetTriple.isAArch64())
9437 return new VarArgAArch64Helper(Func, Msan, Visitor);
9438
9439 if (TargetTriple.isSystemZ())
9440 return new VarArgSystemZHelper(Func, Msan, Visitor);
9441
9442 // On PowerPC32 VAListTag is a struct
9443 // {char, char, i16 padding, char *, char *}
9444 if (TargetTriple.isPPC32())
9445 return new VarArgPowerPC32Helper(Func, Msan, Visitor);
9446
9447 if (TargetTriple.isPPC64())
9448 return new VarArgPowerPC64Helper(Func, Msan, Visitor);
9449
9450 if (TargetTriple.isRISCV32())
9451 return new VarArgRISCVHelper(Func, Msan, Visitor, /*VAListTagSize=*/4);
9452
9453 if (TargetTriple.isRISCV64())
9454 return new VarArgRISCVHelper(Func, Msan, Visitor, /*VAListTagSize=*/8);
9455
9456 if (TargetTriple.isMIPS32())
9457 return new VarArgMIPSHelper(Func, Msan, Visitor, /*VAListTagSize=*/4);
9458
9459 if (TargetTriple.isMIPS64())
9460 return new VarArgMIPSHelper(Func, Msan, Visitor, /*VAListTagSize=*/8);
9461
9462 if (TargetTriple.isLoongArch64())
9463 return new VarArgLoongArch64Helper(Func, Msan, Visitor,
9464 /*VAListTagSize=*/8);
9465
9466 if (TargetTriple.getArch() == Triple::hexagon)
9467 return new VarArgHexagonHelper(Func, Msan, Visitor, /*VAListTagSize=*/12);
9468
9469 return new VarArgNoOpHelper(Func, Msan, Visitor);
9470}
9471
9472bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
9473 if (!CompileKernel && F.getName() == kMsanModuleCtorName)
9474 return false;
9475
9476 if (F.hasFnAttribute(Kind: Attribute::DisableSanitizerInstrumentation))
9477 return false;
9478
9479 MemorySanitizerVisitor Visitor(F, *this, TLI);
9480
9481 // Clear out memory attributes.
9482 AttributeMask B;
9483 B.addAttribute(Val: Attribute::Memory).addAttribute(Val: Attribute::Speculatable);
9484 F.removeFnAttrs(Attrs: B);
9485
9486 return Visitor.runOnFunction();
9487}
9488