1//===- PreISelIntrinsicLowering.cpp - Pre-ISel intrinsic lowering pass ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements IR lowering for the llvm.memcpy, llvm.memmove,
10// llvm.memset, llvm.load.relative and llvm.objc.* intrinsics.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/CodeGen/PreISelIntrinsicLowering.h"
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/Analysis/ObjCARCInstKind.h"
17#include "llvm/Analysis/ObjCARCUtil.h"
18#include "llvm/Analysis/TargetLibraryInfo.h"
19#include "llvm/Analysis/TargetTransformInfo.h"
20#include "llvm/CodeGen/ExpandVectorPredication.h"
21#include "llvm/CodeGen/LibcallLoweringInfo.h"
22#include "llvm/CodeGen/Passes.h"
23#include "llvm/CodeGen/TargetLowering.h"
24#include "llvm/CodeGen/TargetPassConfig.h"
25#include "llvm/IR/Function.h"
26#include "llvm/IR/GlobalValue.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/Instructions.h"
29#include "llvm/IR/IntrinsicInst.h"
30#include "llvm/IR/Metadata.h"
31#include "llvm/IR/Module.h"
32#include "llvm/IR/ProfDataUtils.h"
33#include "llvm/IR/RuntimeLibcalls.h"
34#include "llvm/IR/Type.h"
35#include "llvm/IR/Use.h"
36#include "llvm/InitializePasses.h"
37#include "llvm/Pass.h"
38#include "llvm/Support/Casting.h"
39#include "llvm/Target/TargetMachine.h"
40#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
41#include "llvm/Transforms/Utils/BasicBlockUtils.h"
42#include "llvm/Transforms/Utils/BuildLibCalls.h"
43#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
44#include "llvm/Transforms/Utils/LowerVectorIntrinsics.h"
45
46using namespace llvm;
47
48#define DEBUG_TYPE "pre-isel-intrinsic-lowering"
49
50/// Threshold to leave statically sized memory intrinsic calls. Calls of known
51/// size larger than this will be expanded by the pass. Calls of unknown or
52/// lower size will be left for expansion in codegen.
53static cl::opt<int64_t> MemIntrinsicExpandSizeThresholdOpt(
54 "mem-intrinsic-expand-size",
55 cl::desc("Set minimum mem intrinsic size to expand in IR"), cl::init(Val: -1),
56 cl::Hidden);
57
58namespace {
59
60struct PreISelIntrinsicLowering {
61 const TargetMachine *TM;
62 const LibcallLoweringModuleAnalysisResult &ModuleLibcalls;
63 const function_ref<TargetTransformInfo &(Function &)> LookupTTI;
64 const function_ref<TargetLibraryInfo &(Function &)> LookupTLI;
65
66 /// If this is true, assume it's preferably to leave memory intrinsic calls
67 /// for replacement with a library call later. Otherwise this depends on
68 /// TargetLoweringInfo availability of the corresponding function.
69 const bool UseMemIntrinsicLibFunc;
70
71 explicit PreISelIntrinsicLowering(
72 const TargetMachine *TM_,
73 const LibcallLoweringModuleAnalysisResult &ModuleLibcalls_,
74 function_ref<TargetTransformInfo &(Function &)> LookupTTI_,
75 function_ref<TargetLibraryInfo &(Function &)> LookupTLI_,
76 bool UseMemIntrinsicLibFunc_ = true)
77 : TM(TM_), ModuleLibcalls(ModuleLibcalls_), LookupTTI(LookupTTI_),
78 LookupTLI(LookupTLI_), UseMemIntrinsicLibFunc(UseMemIntrinsicLibFunc_) {
79 }
80
81 static bool shouldExpandMemIntrinsicWithSize(Value *Size,
82 const TargetTransformInfo &TTI);
83 bool
84 expandMemIntrinsicUses(Function &F,
85 DenseMap<Constant *, GlobalVariable *> &CMap) const;
86 bool lowerIntrinsics(Module &M) const;
87};
88
89} // namespace
90
91template <class T> static bool forEachCall(Function &Intrin, T Callback) {
92 // Lowering all intrinsics in a function will delete multiple uses, so we
93 // can't use an early-inc-range. In case some remain, we don't want to look
94 // at them again. Unfortunately, Value::UseList is private, so we can't use a
95 // simple Use**. If LastUse is null, the next use to consider is
96 // Intrin.use_begin(), otherwise it's LastUse->getNext().
97 Use *LastUse = nullptr;
98 bool Changed = false;
99 while (!Intrin.use_empty() && (!LastUse || LastUse->getNext())) {
100 Use *U = LastUse ? LastUse->getNext() : &*Intrin.use_begin();
101 bool Removed = false;
102 // An intrinsic cannot have its address taken, so it cannot be an argument
103 // operand. It might be used as operand in debug metadata, though.
104 if (auto CI = dyn_cast<CallInst>(Val: U->getUser()))
105 Changed |= Removed = Callback(CI);
106 if (!Removed)
107 LastUse = U;
108 }
109 return Changed;
110}
111
112static bool lowerLoadRelative(Function &F) {
113 if (F.use_empty())
114 return false;
115
116 bool Changed = false;
117 Type *Int32Ty = Type::getInt32Ty(C&: F.getContext());
118
119 for (Use &U : llvm::make_early_inc_range(Range: F.uses())) {
120 auto CI = dyn_cast<CallInst>(Val: U.getUser());
121 if (!CI || CI->getCalledOperand() != &F)
122 continue;
123
124 IRBuilder<> B(CI);
125 Value *OffsetPtr =
126 B.CreatePtrAdd(Ptr: CI->getArgOperand(i: 0), Offset: CI->getArgOperand(i: 1));
127 Value *OffsetI32 = B.CreateAlignedLoad(Ty: Int32Ty, Ptr: OffsetPtr, Align: Align(4));
128
129 Value *ResultPtr = B.CreatePtrAdd(Ptr: CI->getArgOperand(i: 0), Offset: OffsetI32);
130
131 CI->replaceAllUsesWith(V: ResultPtr);
132 CI->eraseFromParent();
133 Changed = true;
134 }
135
136 return Changed;
137}
138
139// ObjCARC has knowledge about whether an obj-c runtime function needs to be
140// always tail-called or never tail-called.
141static CallInst::TailCallKind getOverridingTailCallKind(const Function &F) {
142 objcarc::ARCInstKind Kind = objcarc::GetFunctionClass(F: &F);
143 if (objcarc::IsAlwaysTail(Class: Kind))
144 return CallInst::TCK_Tail;
145 else if (objcarc::IsNeverTail(Class: Kind))
146 return CallInst::TCK_NoTail;
147 return CallInst::TCK_None;
148}
149
150static bool lowerObjCCall(Function &F, RTLIB::LibcallImpl NewFn,
151 bool setNonLazyBind = false) {
152 assert(IntrinsicInst::mayLowerToFunctionCall(F.getIntrinsicID()) &&
153 "Pre-ISel intrinsics do lower into regular function calls");
154 if (F.use_empty())
155 return false;
156
157 // FIXME: When RuntimeLibcalls is an analysis, check if the function is really
158 // supported, and go through RTLIB::Libcall.
159 StringRef NewFnName = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(CallImpl: NewFn);
160
161 // If we haven't already looked up this function, check to see if the
162 // program already contains a function with this name.
163 Module *M = F.getParent();
164 FunctionCallee FCache =
165 M->getOrInsertFunction(Name: NewFnName, T: F.getFunctionType());
166
167 if (Function *Fn = dyn_cast<Function>(Val: FCache.getCallee())) {
168 Fn->setLinkage(F.getLinkage());
169 if (setNonLazyBind && !Fn->isWeakForLinker()) {
170 // If we have Native ARC, set nonlazybind attribute for these APIs for
171 // performance.
172 Fn->addFnAttr(Kind: Attribute::NonLazyBind);
173 }
174 }
175
176 CallInst::TailCallKind OverridingTCK = getOverridingTailCallKind(F);
177
178 for (Use &U : llvm::make_early_inc_range(Range: F.uses())) {
179 auto *CB = cast<CallBase>(Val: U.getUser());
180
181 if (CB->getCalledFunction() != &F) {
182 assert(objcarc::getAttachedARCFunction(CB) == &F &&
183 "use expected to be the argument of operand bundle "
184 "\"clang.arc.attachedcall\"");
185 U.set(FCache.getCallee());
186 continue;
187 }
188
189 auto *CI = cast<CallInst>(Val: CB);
190 assert(CI->getCalledFunction() && "Cannot lower an indirect call!");
191
192 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
193 SmallVector<Value *, 8> Args(CI->args());
194 SmallVector<llvm::OperandBundleDef, 1> BundleList;
195 CI->getOperandBundlesAsDefs(Defs&: BundleList);
196 CallInst *NewCI = Builder.CreateCall(Callee: FCache, Args, OpBundles: BundleList);
197 NewCI->setName(CI->getName());
198
199 // Try to set the most appropriate TailCallKind based on both the current
200 // attributes and the ones that we could get from ObjCARC's special
201 // knowledge of the runtime functions.
202 //
203 // std::max respects both requirements of notail and tail here:
204 // * notail on either the call or from ObjCARC becomes notail
205 // * tail on either side is stronger than none, but not notail
206 CallInst::TailCallKind TCK = CI->getTailCallKind();
207 NewCI->setTailCallKind(std::max(a: TCK, b: OverridingTCK));
208
209 // Transfer the 'returned' attribute from the intrinsic to the call site.
210 // By applying this only to intrinsic call sites, we avoid applying it to
211 // non-ARC explicit calls to things like objc_retain which have not been
212 // auto-upgraded to use the intrinsics.
213 unsigned Index;
214 if (F.getAttributes().hasAttrSomewhere(Kind: Attribute::Returned, Index: &Index) &&
215 Index)
216 NewCI->addParamAttr(ArgNo: Index - AttributeList::FirstArgIndex,
217 Kind: Attribute::Returned);
218
219 if (!CI->use_empty())
220 CI->replaceAllUsesWith(V: NewCI);
221 CI->eraseFromParent();
222 }
223
224 return true;
225}
226
227// TODO: Should refine based on estimated number of accesses (e.g. does it
228// require splitting based on alignment)
229bool PreISelIntrinsicLowering::shouldExpandMemIntrinsicWithSize(
230 Value *Size, const TargetTransformInfo &TTI) {
231 ConstantInt *CI = dyn_cast<ConstantInt>(Val: Size);
232 if (!CI)
233 return true;
234 uint64_t Threshold = MemIntrinsicExpandSizeThresholdOpt.getNumOccurrences()
235 ? MemIntrinsicExpandSizeThresholdOpt
236 : TTI.getMaxMemIntrinsicInlineSizeThreshold();
237 uint64_t SizeVal = CI->getZExtValue();
238
239 // Treat a threshold of 0 as a special case to force expansion of all
240 // intrinsics, including size 0.
241 return SizeVal > Threshold || Threshold == 0;
242}
243
244static bool
245canEmitLibcall(const LibcallLoweringModuleAnalysisResult &ModuleLowering,
246 const TargetMachine *TM, Function *F, RTLIB::Libcall LC) {
247 // TODO: Should this consider the address space of the memcpy?
248 if (!TM)
249 return true;
250 const LibcallLoweringInfo &Lowering =
251 ModuleLowering.getLibcallLowering(Subtarget: *TM->getSubtargetImpl(*F));
252 return Lowering.getLibcallImpl(Call: LC) != RTLIB::Unsupported;
253}
254
255static bool
256canEmitMemcpy(const LibcallLoweringModuleAnalysisResult &ModuleLowering,
257 const TargetMachine *TM, Function *F) {
258 // TODO: Should this consider the address space of the memcpy?
259 if (!TM)
260 return true;
261 const LibcallLoweringInfo &Lowering =
262 ModuleLowering.getLibcallLowering(Subtarget: *TM->getSubtargetImpl(*F));
263 return Lowering.getMemcpyImpl() != RTLIB::Unsupported;
264}
265
266// Return a value appropriate for use with the memset_pattern16 libcall, if
267// possible and if we know how. (Adapted from equivalent helper in
268// LoopIdiomRecognize).
269static Constant *getMemSetPattern16Value(MemSetPatternInst *Inst,
270 const TargetLibraryInfo &TLI) {
271 // TODO: This could check for UndefValue because it can be merged into any
272 // other valid pattern.
273
274 // Don't emit libcalls if a non-default address space is being used.
275 if (Inst->getRawDest()->getType()->getPointerAddressSpace() != 0)
276 return nullptr;
277
278 Value *V = Inst->getValue();
279 Type *VTy = V->getType();
280 const DataLayout &DL = Inst->getDataLayout();
281 Module *M = Inst->getModule();
282
283 if (!isLibFuncEmittable(M, TLI: &TLI, TheLibFunc: LibFunc_memset_pattern16))
284 return nullptr;
285
286 // If the value isn't a constant, we can't promote it to being in a constant
287 // array. We could theoretically do a store to an alloca or something, but
288 // that doesn't seem worthwhile.
289 Constant *C = dyn_cast<Constant>(Val: V);
290 if (!C || isa<ConstantExpr>(Val: C))
291 return nullptr;
292
293 // Only handle simple values that are a power of two bytes in size.
294 uint64_t Size = DL.getTypeSizeInBits(Ty: VTy);
295 if (!DL.typeSizeEqualsStoreSize(Ty: VTy) || !isPowerOf2_64(Value: Size))
296 return nullptr;
297
298 // Don't care enough about darwin/ppc to implement this.
299 if (DL.isBigEndian())
300 return nullptr;
301
302 // Convert to size in bytes.
303 Size /= 8;
304
305 // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
306 // if the top and bottom are the same (e.g. for vectors and large integers).
307 if (Size > 16)
308 return nullptr;
309
310 // If the constant is exactly 16 bytes, just use it.
311 if (Size == 16)
312 return C;
313
314 // Otherwise, we'll use an array of the constants.
315 uint64_t ArraySize = 16 / Size;
316 ArrayType *AT = ArrayType::get(ElementType: V->getType(), NumElements: ArraySize);
317 return ConstantArray::get(T: AT, V: std::vector<Constant *>(ArraySize, C));
318}
319
320// TODO: Handle atomic memcpy and memcpy.inline
321// TODO: Pass ScalarEvolution
322bool PreISelIntrinsicLowering::expandMemIntrinsicUses(
323 Function &F, DenseMap<Constant *, GlobalVariable *> &CMap) const {
324 Intrinsic::ID ID = F.getIntrinsicID();
325 bool Changed = false;
326
327 for (User *U : llvm::make_early_inc_range(Range: F.users())) {
328 Instruction *Inst = cast<Instruction>(Val: U);
329
330 switch (ID) {
331 case Intrinsic::memcpy: {
332 auto *Memcpy = cast<MemCpyInst>(Val: Inst);
333 Function *ParentFunc = Memcpy->getFunction();
334 const TargetTransformInfo &TTI = LookupTTI(*ParentFunc);
335 if (shouldExpandMemIntrinsicWithSize(Size: Memcpy->getLength(), TTI)) {
336 if (UseMemIntrinsicLibFunc &&
337 canEmitMemcpy(ModuleLowering: ModuleLibcalls, TM, F: ParentFunc))
338 break;
339
340 // TODO: For optsize, emit the loop into a separate function
341 expandMemCpyAsLoop(MemCpy: Memcpy, TTI);
342 Changed = true;
343 Memcpy->eraseFromParent();
344 }
345
346 break;
347 }
348 case Intrinsic::memcpy_inline: {
349 // Only expand llvm.memcpy.inline with non-constant length in this
350 // codepath, leaving the current SelectionDAG expansion for constant
351 // length memcpy intrinsics undisturbed.
352 auto *Memcpy = cast<MemCpyInst>(Val: Inst);
353 if (isa<ConstantInt>(Val: Memcpy->getLength()))
354 break;
355
356 Function *ParentFunc = Memcpy->getFunction();
357 const TargetTransformInfo &TTI = LookupTTI(*ParentFunc);
358 expandMemCpyAsLoop(MemCpy: Memcpy, TTI);
359 Changed = true;
360 Memcpy->eraseFromParent();
361 break;
362 }
363 case Intrinsic::memmove: {
364 auto *Memmove = cast<MemMoveInst>(Val: Inst);
365 Function *ParentFunc = Memmove->getFunction();
366 const TargetTransformInfo &TTI = LookupTTI(*ParentFunc);
367 if (shouldExpandMemIntrinsicWithSize(Size: Memmove->getLength(), TTI)) {
368 if (UseMemIntrinsicLibFunc &&
369 canEmitLibcall(ModuleLowering: ModuleLibcalls, TM, F: ParentFunc, LC: RTLIB::MEMMOVE))
370 break;
371
372 if (expandMemMoveAsLoop(MemMove: Memmove, TTI)) {
373 Changed = true;
374 Memmove->eraseFromParent();
375 }
376 }
377
378 break;
379 }
380 case Intrinsic::memset: {
381 auto *Memset = cast<MemSetInst>(Val: Inst);
382 Function *ParentFunc = Memset->getFunction();
383 const TargetTransformInfo &TTI = LookupTTI(*ParentFunc);
384 if (shouldExpandMemIntrinsicWithSize(Size: Memset->getLength(), TTI)) {
385 if (UseMemIntrinsicLibFunc &&
386 canEmitLibcall(ModuleLowering: ModuleLibcalls, TM, F: ParentFunc, LC: RTLIB::MEMSET))
387 break;
388
389 expandMemSetAsLoop(MemSet: Memset, TTI);
390 Changed = true;
391 Memset->eraseFromParent();
392 }
393
394 break;
395 }
396 case Intrinsic::memset_inline: {
397 // Only expand llvm.memset.inline with non-constant length in this
398 // codepath, leaving the current SelectionDAG expansion for constant
399 // length memset intrinsics undisturbed.
400 auto *Memset = cast<MemSetInst>(Val: Inst);
401 if (isa<ConstantInt>(Val: Memset->getLength()))
402 break;
403
404 Function *ParentFunc = Memset->getFunction();
405 const TargetTransformInfo &TTI = LookupTTI(*ParentFunc);
406 expandMemSetAsLoop(MemSet: Memset, TTI);
407 Changed = true;
408 Memset->eraseFromParent();
409 break;
410 }
411 case Intrinsic::experimental_memset_pattern: {
412 auto *Memset = cast<MemSetPatternInst>(Val: Inst);
413 Function *ParentFunc = Memset->getFunction();
414 const TargetLibraryInfo &TLI = LookupTLI(*ParentFunc);
415 Constant *PatternValue = getMemSetPattern16Value(Inst: Memset, TLI);
416 if (!PatternValue) {
417 // If it isn't possible to emit a memset_pattern16 libcall, expand to
418 // a loop instead.
419 const TargetTransformInfo &TTI = LookupTTI(*ParentFunc);
420 expandMemSetPatternAsLoop(MemSet: Memset, TTI);
421 Changed = true;
422 Memset->eraseFromParent();
423 break;
424 }
425 // FIXME: There is currently no profitability calculation for emitting
426 // the libcall vs expanding the memset.pattern directly.
427 IRBuilder<> Builder(Inst);
428 Module *M = Memset->getModule();
429 const DataLayout &DL = Memset->getDataLayout();
430
431 Type *DestPtrTy = Memset->getRawDest()->getType();
432 Type *SizeTTy = TLI.getSizeTType(M: *M);
433 StringRef FuncName = "memset_pattern16";
434 FunctionCallee MSP = getOrInsertLibFunc(M, TLI, TheLibFunc: LibFunc_memset_pattern16,
435 RetTy: Builder.getVoidTy(), Args: DestPtrTy,
436 Args: Builder.getPtrTy(), Args: SizeTTy);
437 inferNonMandatoryLibFuncAttrs(M, Name: FuncName, TLI);
438
439 // Otherwise we should form a memset_pattern16. PatternValue is known
440 // to be an constant array of 16-bytes. Put the value into a mergable
441 // global.
442 assert(Memset->getRawDest()->getType()->getPointerAddressSpace() == 0 &&
443 "Should have skipped if non-zero AS");
444 GlobalVariable *GV;
445 auto It = CMap.find(Val: PatternValue);
446 if (It != CMap.end()) {
447 GV = It->second;
448 } else {
449 GV = new GlobalVariable(
450 *M, PatternValue->getType(), /*isConstant=*/true,
451 GlobalValue::PrivateLinkage, PatternValue, ".memset_pattern");
452 GV->setUnnamedAddr(
453 GlobalValue::UnnamedAddr::Global); // Ok to merge these.
454 // TODO: Consider relaxing alignment requirement.
455 GV->setAlignment(Align(16));
456 CMap[PatternValue] = GV;
457 }
458 Value *PatternPtr = GV;
459 Value *NumBytes = Builder.CreateMul(
460 LHS: TLI.getAsSizeT(V: DL.getTypeAllocSize(Ty: Memset->getValue()->getType()),
461 M: *M),
462 RHS: Builder.CreateZExtOrTrunc(V: Memset->getLength(), DestTy: SizeTTy));
463 CallInst *MemsetPattern16Call =
464 Builder.CreateCall(Callee: MSP, Args: {Memset->getRawDest(), PatternPtr, NumBytes});
465 MemsetPattern16Call->setAAMetadata(Memset->getAAMetadata());
466 // Preserve any call site attributes on the destination pointer
467 // argument (e.g. alignment).
468 AttrBuilder ArgAttrs(Memset->getContext(),
469 Memset->getAttributes().getParamAttrs(ArgNo: 0));
470 MemsetPattern16Call->setAttributes(
471 MemsetPattern16Call->getAttributes().addParamAttributes(
472 C&: Memset->getContext(), ArgNo: 0, B: ArgAttrs));
473 Changed = true;
474 Memset->eraseFromParent();
475 break;
476 }
477 default:
478 llvm_unreachable("unhandled intrinsic");
479 }
480 }
481
482 return Changed;
483}
484
485static GlobalValue *getDeactivationSymbol(CallInst *Call) {
486 if (auto Bundle = Call->getOperandBundle(ID: LLVMContext::OB_deactivation_symbol))
487 return cast<GlobalValue>(Val: Bundle->Inputs[0]);
488 return nullptr;
489}
490
491static bool expandPtrauthForEmuPAC(Function &Intr) {
492 Module &M = *Intr.getParent();
493 if (Triple(M.getTargetTriple()).isArm64e())
494 return false;
495
496 Type *Int64Ty = Type::getInt64Ty(C&: M.getContext());
497
498 assert(Intr.getIntrinsicID() == Intrinsic::ptrauth_sign ||
499 Intr.getIntrinsicID() == Intrinsic::ptrauth_auth);
500 auto *EmuFnTy = FunctionType::get(Result: Int64Ty, Params: {Int64Ty, Int64Ty}, isVarArg: false);
501 FunctionCallee EmuIntr = M.getOrInsertFunction(
502 Name: Intr.getIntrinsicID() == Intrinsic::ptrauth_auth ? "__emupac_autda"
503 : "__emupac_pacda",
504 T: EmuFnTy);
505
506 for (User *U : llvm::make_early_inc_range(Range: Intr.users())) {
507 auto *Call = cast<CallInst>(Val: U);
508 // We only support the DA key for now.
509 if (auto *Key = dyn_cast<ConstantInt>(Val: Call->getArgOperand(i: 1));
510 !Key || Key->getZExtValue() != /*AArch64PACKey::DA*/ 2)
511 continue;
512
513 Function *F = Call->getParent()->getParent();
514 Attribute FSAttr = F->getFnAttribute(Kind: "target-features");
515 if (FSAttr.isValid() && FSAttr.getValueAsString().contains(Other: "+pauth"))
516 continue;
517
518 std::vector<OperandBundleDef> DSBundle;
519 if (auto *DS = getDeactivationSymbol(Call))
520 DSBundle.push_back(x: OperandBundleDef("deactivation-symbol", DS));
521
522 IRBuilder<> B(Call);
523 auto *EmuCall = B.CreateCall(
524 Callee: EmuIntr, Args: {Call->getArgOperand(i: 0), Call->getArgOperand(i: 2)}, OpBundles: DSBundle);
525 Call->replaceAllUsesWith(V: EmuCall);
526 Call->eraseFromParent();
527 }
528 return true;
529}
530
531static bool expandProtectedFieldPtr(Function &Intr) {
532 Module &M = *Intr.getParent();
533
534 SmallPtrSet<GlobalValue *, 2> DSsToDeactivate;
535
536 Type *Int8Ty = Type::getInt8Ty(C&: M.getContext());
537 Type *Int64Ty = Type::getInt64Ty(C&: M.getContext());
538 PointerType *PtrTy = PointerType::get(C&: M.getContext(), AddressSpace: 0);
539
540 for (User *U : llvm::make_early_inc_range(Range: Intr.users())) {
541 auto *Call = cast<CallInst>(Val: U);
542
543 auto *Pointer = Call->getArgOperand(i: 0);
544 bool UseHWEncoding =
545 cast<ConstantInt>(Val: Call->getArgOperand(i: 2))->getZExtValue();
546 if (!UseHWEncoding)
547 reportFatalUsageError(reason: "software encoding currently unsupported");
548
549 auto *DS = getDeactivationSymbol(Call);
550 OperandBundleDef DSBundle("deactivation-symbol", DS);
551
552 for (Use &U : llvm::make_early_inc_range(Range: Call->uses())) {
553 // Comparisons against null cannot be used to recover the original
554 // pointer so we replace them with comparisons against the original
555 // pointer.
556 if (auto *CI = dyn_cast<ICmpInst>(Val: U.getUser())) {
557 if (auto *Op = dyn_cast<Constant>(Val: CI->getOperand(i_nocapture: 0))) {
558 if (Op->isNullValue()) {
559 CI->setOperand(i_nocapture: 1, Val_nocapture: Pointer);
560 continue;
561 }
562 }
563 if (auto *Op = dyn_cast<Constant>(Val: CI->getOperand(i_nocapture: 1))) {
564 if (Op->isNullValue()) {
565 CI->setOperand(i_nocapture: 0, Val_nocapture: Pointer);
566 continue;
567 }
568 }
569 }
570
571 // If we are here, this means that we couldn't rewrite away this use of
572 // the intrinsic. Any load or store uses were removed by InstCombine, and
573 // in general, we can't rewrite away non-load/store uses of
574 // llvm.protected.field.ptr because doing so could expose the encoded
575 // pointer value to the program. Replace it with the pointer operand, and
576 // arrange to define a deactivation symbol.
577 U.set(Pointer);
578 if (DS)
579 DSsToDeactivate.insert(Ptr: DS);
580 }
581
582 Call->eraseFromParent();
583 }
584
585 if (!DSsToDeactivate.empty()) {
586 // This is an AArch64 NOP instruction. When the deactivation symbol support
587 // is expanded to more architectures, there will likely need to be an API
588 // for retrieving this constant.
589 Constant *Nop =
590 ConstantExpr::getIntToPtr(C: ConstantInt::get(Ty: Int64Ty, V: 0xd503201f), Ty: PtrTy);
591 for (GlobalValue *OldDS : DSsToDeactivate) {
592 GlobalValue *DS = GlobalAlias::create(
593 Ty: Int8Ty, AddressSpace: 0, Linkage: GlobalValue::ExternalLinkage, Name: OldDS->getName(), Aliasee: Nop, Parent: &M);
594 DS->setVisibility(GlobalValue::HiddenVisibility);
595 DS->takeName(V: OldDS);
596 OldDS->replaceAllUsesWith(V: DS);
597 OldDS->eraseFromParent();
598 }
599 }
600 return true;
601}
602
603static bool expandCondLoop(Function &Intr) {
604 for (User *U : llvm::make_early_inc_range(Range: Intr.users())) {
605 auto *Call = cast<CallInst>(Val: U);
606
607 auto *Br = cast<UncondBrInst>(
608 Val: SplitBlockAndInsertIfThen(Cond: Call->getArgOperand(i: 0), SplitBefore: Call, Unreachable: false,
609 BranchWeights: getExplicitlyUnknownBranchWeightsIfProfiled(
610 F&: *Call->getFunction(), DEBUG_TYPE)));
611 Br->setSuccessor(Br->getParent());
612 Call->eraseFromParent();
613 }
614 return true;
615}
616
617static bool expandLoopTrap(Function &Intr) {
618 for (User *U : make_early_inc_range(Range: Intr.users())) {
619 auto *Call = cast<CallInst>(Val: U);
620 if (!Call->getParent()->isEntryBlock() &&
621 std::all_of(first: Call->getParent()->begin(), last: BasicBlock::iterator(Call),
622 pred: [](Instruction &I) { return !I.mayHaveSideEffects(); })) {
623 for (auto *BB : predecessors(BB: Call->getParent())) {
624 auto *BI = dyn_cast<CondBrInst>(Val: BB->getTerminator());
625 if (!BI)
626 continue;
627 IRBuilder<> B(BI);
628 Value *Cond;
629 // The looptrap can either be on the true branch or the false branch.
630 // We insert the cond loop before the branch, which uses the branch's
631 // original condition for going to the looptrap as its condition, and
632 // force the branch to take whichever path does not lead to the
633 // looptrap, as the original path to the looptrap is now unreachable
634 // thanks to the cond loop. The codegenprepare pass will clean up our
635 // "unconditional conditional branch" by combining the two basic blocks
636 // if possible, or replacing it with an unconditional branch.
637 if (BI->getSuccessor(i: 0) == Call->getParent()) {
638 // The looptrap is on the true branch.
639 Cond = BI->getCondition();
640 BI->setCondition(ConstantInt::getFalse(Context&: BI->getContext()));
641 } else {
642 // The looptrap is on the false branch, which means that we need to
643 // invert the condition.
644 Cond = B.CreateNot(V: BI->getCondition());
645 BI->setCondition(ConstantInt::getTrue(Context&: BI->getContext()));
646 }
647 B.CreateIntrinsic(ID: Intrinsic::cond_loop, Args: Cond);
648 }
649 }
650 IRBuilder<> B(Call);
651 B.CreateIntrinsic(ID: Intrinsic::cond_loop,
652 Args: ConstantInt::getTrue(Context&: Call->getContext()));
653 Call->eraseFromParent();
654 }
655 return true;
656}
657
658bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
659 // Map unique constants to globals.
660 DenseMap<Constant *, GlobalVariable *> CMap;
661 bool Changed = false;
662 for (Function &F : M) {
663 switch (F.getIntrinsicID()) {
664 default:
665 break;
666 case Intrinsic::memcpy:
667 case Intrinsic::memcpy_inline:
668 case Intrinsic::memmove:
669 case Intrinsic::memset:
670 case Intrinsic::memset_inline:
671 case Intrinsic::experimental_memset_pattern:
672 Changed |= expandMemIntrinsicUses(F, CMap);
673 break;
674 case Intrinsic::load_relative:
675 Changed |= lowerLoadRelative(F);
676 break;
677 case Intrinsic::is_constant:
678 case Intrinsic::objectsize:
679 Changed |= forEachCall(Intrin&: F, Callback: [&](CallInst *CI) {
680 Function *Parent = CI->getParent()->getParent();
681 TargetLibraryInfo &TLI = LookupTLI(*Parent);
682 // Intrinsics in unreachable code are not lowered.
683 bool Changed = lowerConstantIntrinsics(F&: *Parent, TLI, /*DT=*/nullptr);
684 return Changed;
685 });
686 break;
687#define BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \
688 case Intrinsic::VPID:
689#include "llvm/IR/VPIntrinsics.def"
690 forEachCall(Intrin&: F, Callback: [&](CallInst *CI) {
691 Function *Parent = CI->getParent()->getParent();
692 const TargetTransformInfo &TTI = LookupTTI(*Parent);
693 auto *VPI = cast<VPIntrinsic>(Val: CI);
694 VPExpansionDetails ED = expandVectorPredicationIntrinsic(VPI&: *VPI, TTI);
695 // Expansion of VP intrinsics may change the IR but not actually
696 // replace the intrinsic, so update Changed for the pass
697 // and compute Removed for forEachCall.
698 Changed |= ED != VPExpansionDetails::IntrinsicUnchanged;
699 bool Removed = ED == VPExpansionDetails::IntrinsicReplaced;
700 return Removed;
701 });
702 break;
703 case Intrinsic::objc_autorelease:
704 Changed |= lowerObjCCall(F, NewFn: RTLIB::impl_objc_autorelease);
705 break;
706 case Intrinsic::objc_autoreleasePoolPop:
707 Changed |= lowerObjCCall(F, NewFn: RTLIB::impl_objc_autoreleasePoolPop);
708 break;
709 case Intrinsic::objc_autoreleasePoolPush:
710 Changed |= lowerObjCCall(F, NewFn: RTLIB::impl_objc_autoreleasePoolPush);
711 break;
712 case Intrinsic::objc_autoreleaseReturnValue:
713 Changed |= lowerObjCCall(F, NewFn: RTLIB::impl_objc_autoreleaseReturnValue);
714 break;
715 case Intrinsic::objc_copyWeak:
716 Changed |= lowerObjCCall(F, NewFn: RTLIB::impl_objc_copyWeak);
717 break;
718 case Intrinsic::objc_destroyWeak:
719 Changed |= lowerObjCCall(F, NewFn: RTLIB::impl_objc_destroyWeak);
720 break;
721 case Intrinsic::objc_initWeak:
722 Changed |= lowerObjCCall(F, NewFn: RTLIB::impl_objc_initWeak);
723 break;
724 case Intrinsic::objc_loadWeak:
725 Changed |= lowerObjCCall(F, NewFn: RTLIB::impl_objc_loadWeak);
726 break;
727 case Intrinsic::objc_loadWeakRetained:
728 Changed |= lowerObjCCall(F, NewFn: RTLIB::impl_objc_loadWeakRetained);
729 break;
730 case Intrinsic::objc_moveWeak:
731 Changed |= lowerObjCCall(F, NewFn: RTLIB::impl_objc_moveWeak);
732 break;
733 case Intrinsic::objc_release:
734 Changed |= lowerObjCCall(F, NewFn: RTLIB::impl_objc_release, setNonLazyBind: true);
735 break;
736 case Intrinsic::objc_retain:
737 Changed |= lowerObjCCall(F, NewFn: RTLIB::impl_objc_retain, setNonLazyBind: true);
738 break;
739 case Intrinsic::objc_retainAutorelease:
740 Changed |= lowerObjCCall(F, NewFn: RTLIB::impl_objc_retainAutorelease);
741 break;
742 case Intrinsic::objc_retainAutoreleaseReturnValue:
743 Changed |=
744 lowerObjCCall(F, NewFn: RTLIB::impl_objc_retainAutoreleaseReturnValue);
745 break;
746 case Intrinsic::objc_retainAutoreleasedReturnValue:
747 Changed |=
748 lowerObjCCall(F, NewFn: RTLIB::impl_objc_retainAutoreleasedReturnValue);
749 break;
750 case Intrinsic::objc_claimAutoreleasedReturnValue:
751 Changed |=
752 lowerObjCCall(F, NewFn: RTLIB::impl_objc_claimAutoreleasedReturnValue);
753 break;
754 case Intrinsic::objc_retainBlock:
755 Changed |= lowerObjCCall(F, NewFn: RTLIB::impl_objc_retainBlock);
756 break;
757 case Intrinsic::objc_storeStrong:
758 Changed |= lowerObjCCall(F, NewFn: RTLIB::impl_objc_storeStrong);
759 break;
760 case Intrinsic::objc_storeWeak:
761 Changed |= lowerObjCCall(F, NewFn: RTLIB::impl_objc_storeWeak);
762 break;
763 case Intrinsic::objc_unsafeClaimAutoreleasedReturnValue:
764 Changed |=
765 lowerObjCCall(F, NewFn: RTLIB::impl_objc_unsafeClaimAutoreleasedReturnValue);
766 break;
767 case Intrinsic::objc_retainedObject:
768 Changed |= lowerObjCCall(F, NewFn: RTLIB::impl_objc_retainedObject);
769 break;
770 case Intrinsic::objc_unretainedObject:
771 Changed |= lowerObjCCall(F, NewFn: RTLIB::impl_objc_unretainedObject);
772 break;
773 case Intrinsic::objc_unretainedPointer:
774 Changed |= lowerObjCCall(F, NewFn: RTLIB::impl_objc_unretainedPointer);
775 break;
776 case Intrinsic::objc_retain_autorelease:
777 Changed |= lowerObjCCall(F, NewFn: RTLIB::impl_objc_retain_autorelease);
778 break;
779 case Intrinsic::objc_sync_enter:
780 Changed |= lowerObjCCall(F, NewFn: RTLIB::impl_objc_sync_enter);
781 break;
782 case Intrinsic::objc_sync_exit:
783 Changed |= lowerObjCCall(F, NewFn: RTLIB::impl_objc_sync_exit);
784 break;
785 case Intrinsic::acos:
786 case Intrinsic::asin:
787 case Intrinsic::atan:
788 case Intrinsic::cos:
789 case Intrinsic::cosh:
790 case Intrinsic::exp:
791 case Intrinsic::exp2:
792 case Intrinsic::exp10:
793 case Intrinsic::log:
794 case Intrinsic::log2:
795 case Intrinsic::log10:
796 case Intrinsic::sin:
797 case Intrinsic::sinh:
798 case Intrinsic::tan:
799 case Intrinsic::tanh:
800 Changed |= forEachCall(Intrin&: F, Callback: [&](CallInst *CI) {
801 Type *Ty = CI->getArgOperand(i: 0)->getType();
802 if (!TM || !isa<ScalableVectorType>(Val: Ty))
803 return false;
804 const TargetLowering *TL = TM->getSubtargetImpl(F)->getTargetLowering();
805 unsigned Op = TL->IntrinsicIDToISD(ID: F.getIntrinsicID());
806 assert(Op != ISD::DELETED_NODE && "unsupported intrinsic");
807 if (!TL->isOperationExpand(Op, VT: EVT::getEVT(Ty)))
808 return false;
809 return lowerUnaryVectorIntrinsicAsLoop(M, CI);
810 });
811 break;
812 case Intrinsic::ptrauth_sign:
813 case Intrinsic::ptrauth_auth:
814 Changed |= expandPtrauthForEmuPAC(Intr&: F);
815 break;
816 case Intrinsic::protected_field_ptr:
817 Changed |= expandProtectedFieldPtr(Intr&: F);
818 break;
819 case Intrinsic::cond_loop:
820 if (!TM->canLowerCondLoop())
821 Changed |= expandCondLoop(Intr&: F);
822 break;
823 case Intrinsic::looptrap:
824 Changed |= expandLoopTrap(Intr&: F);
825 if (!TM->canLowerCondLoop())
826 if (auto *CondLoop = M.getFunction(Name: "llvm.cond.loop"))
827 Changed |= expandCondLoop(Intr&: *CondLoop);
828 break;
829 }
830 }
831 return Changed;
832}
833
834namespace {
835
836class PreISelIntrinsicLoweringLegacyPass : public ModulePass {
837public:
838 static char ID;
839
840 PreISelIntrinsicLoweringLegacyPass() : ModulePass(ID) {}
841
842 void getAnalysisUsage(AnalysisUsage &AU) const override {
843 AU.addRequired<TargetTransformInfoWrapperPass>();
844 AU.addRequired<TargetLibraryInfoWrapperPass>();
845 AU.addRequired<LibcallLoweringInfoWrapper>();
846 AU.addRequired<TargetPassConfig>();
847 }
848
849 bool runOnModule(Module &M) override {
850 const LibcallLoweringModuleAnalysisResult &ModuleLibcalls =
851 getAnalysis<LibcallLoweringInfoWrapper>().getResult(M);
852
853 auto LookupTTI = [this](Function &F) -> TargetTransformInfo & {
854 return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
855 };
856 auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & {
857 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
858 };
859
860 const auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
861 PreISelIntrinsicLowering Lowering(TM, ModuleLibcalls, LookupTTI, LookupTLI);
862 return Lowering.lowerIntrinsics(M);
863 }
864};
865
866} // end anonymous namespace
867
868char PreISelIntrinsicLoweringLegacyPass::ID;
869
870INITIALIZE_PASS_BEGIN(PreISelIntrinsicLoweringLegacyPass,
871 "pre-isel-intrinsic-lowering",
872 "Pre-ISel Intrinsic Lowering", false, false)
873INITIALIZE_PASS_DEPENDENCY(LibcallLoweringInfoWrapper)
874INITIALIZE_PASS_DEPENDENCY(RuntimeLibraryInfoWrapper)
875INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
876INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
877INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
878INITIALIZE_PASS_END(PreISelIntrinsicLoweringLegacyPass,
879 "pre-isel-intrinsic-lowering",
880 "Pre-ISel Intrinsic Lowering", false, false)
881
882ModulePass *llvm::createPreISelIntrinsicLoweringPass() {
883 return new PreISelIntrinsicLoweringLegacyPass();
884}
885
886PreservedAnalyses
887PreISelIntrinsicLoweringPass::run(Module &M, ModuleAnalysisManager &MAM) {
888 const LibcallLoweringModuleAnalysisResult &LibcallLowering =
889 MAM.getResult<LibcallLoweringModuleAnalysis>(IR&: M);
890
891 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
892
893 auto LookupTTI = [&FAM](Function &F) -> TargetTransformInfo & {
894 return FAM.getResult<TargetIRAnalysis>(IR&: F);
895 };
896 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
897 return FAM.getResult<TargetLibraryAnalysis>(IR&: F);
898 };
899
900 PreISelIntrinsicLowering Lowering(TM, LibcallLowering, LookupTTI, LookupTLI);
901 if (!Lowering.lowerIntrinsics(M))
902 return PreservedAnalyses::all();
903 else
904 return PreservedAnalyses::none();
905}
906