1//===-- AArch64Arm64ECCallLowering.cpp - Lower Arm64EC calls ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the IR transform to lower external or indirect calls for
11/// the ARM64EC calling convention. Such calls must go through the runtime, so
12/// we can translate the calling convention for calls into the emulator.
13///
14/// This subsumes Control Flow Guard handling.
15///
16//===----------------------------------------------------------------------===//
17
18#include "AArch64.h"
19#include "llvm/ADT/SetVector.h"
20#include "llvm/ADT/SmallString.h"
21#include "llvm/ADT/SmallVector.h"
22#include "llvm/ADT/Statistic.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/DiagnosticInfo.h"
25#include "llvm/IR/GlobalAlias.h"
26#include "llvm/IR/IRBuilder.h"
27#include "llvm/IR/Instruction.h"
28#include "llvm/IR/Mangler.h"
29#include "llvm/IR/Module.h"
30#include "llvm/Object/COFF.h"
31#include "llvm/Pass.h"
32#include "llvm/Support/CommandLine.h"
33#include "llvm/TargetParser/Triple.h"
34
35using namespace llvm;
36using namespace llvm::COFF;
37
38using OperandBundleDef = OperandBundleDefT<Value *>;
39
40#define DEBUG_TYPE "arm64eccalllowering"
41
42STATISTIC(Arm64ECCallsLowered, "Number of Arm64EC calls lowered");
43
44static cl::opt<bool> LowerDirectToIndirect("arm64ec-lower-direct-to-indirect",
45 cl::Hidden, cl::init(Val: true));
46static cl::opt<bool> GenerateThunks("arm64ec-generate-thunks", cl::Hidden,
47 cl::init(Val: true));
48
49namespace {
50
51enum ThunkArgTranslation : uint8_t {
52 Direct,
53 Bitcast,
54 PointerIndirection,
55};
56
57struct ThunkArgInfo {
58 Type *Arm64Ty;
59 Type *X64Ty;
60 ThunkArgTranslation Translation;
61};
62
63class AArch64Arm64ECCallLowering : public ModulePass {
64public:
65 static char ID;
66 AArch64Arm64ECCallLowering() : ModulePass(ID) {}
67
68 Function *buildExitThunk(FunctionType *FnTy, AttributeList Attrs);
69 Function *buildEntryThunk(Function *F);
70 void lowerCall(CallBase *CB);
71 Function *buildGuestExitThunk(Function *F);
72 Function *buildPatchableThunk(GlobalAlias *UnmangledAlias,
73 GlobalAlias *MangledAlias);
74 bool processFunction(Function &F, SetVector<GlobalValue *> &DirectCalledFns,
75 DenseMap<GlobalAlias *, GlobalAlias *> &FnsMap);
76 bool runOnModule(Module &M) override;
77
78private:
79 ControlFlowGuardMode CFGuardModuleFlag = ControlFlowGuardMode::Disabled;
80 FunctionType *GuardFnType = nullptr;
81 FunctionType *DispatchFnType = nullptr;
82 Constant *GuardFnCFGlobal = nullptr;
83 Constant *GuardFnGlobal = nullptr;
84 Constant *DispatchFnGlobal = nullptr;
85 Module *M = nullptr;
86
87 Type *PtrTy;
88 Type *I64Ty;
89 Type *VoidTy;
90
91 void getThunkType(FunctionType *FT, AttributeList AttrList,
92 Arm64ECThunkType TT, raw_ostream &Out,
93 FunctionType *&Arm64Ty, FunctionType *&X64Ty,
94 SmallVector<ThunkArgTranslation> &ArgTranslations);
95 void getThunkRetType(FunctionType *FT, AttributeList AttrList,
96 raw_ostream &Out, Type *&Arm64RetTy, Type *&X64RetTy,
97 SmallVectorImpl<Type *> &Arm64ArgTypes,
98 SmallVectorImpl<Type *> &X64ArgTypes,
99 SmallVector<ThunkArgTranslation> &ArgTranslations,
100 bool &HasSretPtr);
101 void getThunkArgTypes(FunctionType *FT, AttributeList AttrList,
102 Arm64ECThunkType TT, raw_ostream &Out,
103 SmallVectorImpl<Type *> &Arm64ArgTypes,
104 SmallVectorImpl<Type *> &X64ArgTypes,
105 SmallVectorImpl<ThunkArgTranslation> &ArgTranslations,
106 bool HasSretPtr);
107 ThunkArgInfo canonicalizeThunkType(Type *T, Align Alignment, bool Ret,
108 uint64_t ArgSizeBytes, raw_ostream &Out);
109};
110
111} // end anonymous namespace
112
113void AArch64Arm64ECCallLowering::getThunkType(
114 FunctionType *FT, AttributeList AttrList, Arm64ECThunkType TT,
115 raw_ostream &Out, FunctionType *&Arm64Ty, FunctionType *&X64Ty,
116 SmallVector<ThunkArgTranslation> &ArgTranslations) {
117 Out << (TT == Arm64ECThunkType::Entry ? "$ientry_thunk$cdecl$"
118 : "$iexit_thunk$cdecl$");
119
120 Type *Arm64RetTy;
121 Type *X64RetTy;
122
123 SmallVector<Type *> Arm64ArgTypes;
124 SmallVector<Type *> X64ArgTypes;
125
126 // The first argument to a thunk is the called function, stored in x9.
127 // For exit thunks, we pass the called function down to the emulator;
128 // for entry/guest exit thunks, we just call the Arm64 function directly.
129 if (TT == Arm64ECThunkType::Exit)
130 Arm64ArgTypes.push_back(Elt: PtrTy);
131 X64ArgTypes.push_back(Elt: PtrTy);
132
133 bool HasSretPtr = false;
134 getThunkRetType(FT, AttrList, Out, Arm64RetTy, X64RetTy, Arm64ArgTypes,
135 X64ArgTypes, ArgTranslations, HasSretPtr);
136
137 getThunkArgTypes(FT, AttrList, TT, Out, Arm64ArgTypes, X64ArgTypes,
138 ArgTranslations, HasSretPtr);
139
140 Arm64Ty = FunctionType::get(Result: Arm64RetTy, Params: Arm64ArgTypes, isVarArg: false);
141
142 X64Ty = FunctionType::get(Result: X64RetTy, Params: X64ArgTypes, isVarArg: false);
143}
144
145void AArch64Arm64ECCallLowering::getThunkArgTypes(
146 FunctionType *FT, AttributeList AttrList, Arm64ECThunkType TT,
147 raw_ostream &Out, SmallVectorImpl<Type *> &Arm64ArgTypes,
148 SmallVectorImpl<Type *> &X64ArgTypes,
149 SmallVectorImpl<ThunkArgTranslation> &ArgTranslations, bool HasSretPtr) {
150
151 Out << "$";
152 if (FT->isVarArg()) {
153 // We treat the variadic function's thunk as a normal function
154 // with the following type on the ARM side:
155 // rettype exitthunk(
156 // ptr x9, ptr x0, i64 x1, i64 x2, i64 x3, ptr x4, i64 x5)
157 //
158 // that can coverage all types of variadic function.
159 // x9 is similar to normal exit thunk, store the called function.
160 // x0-x3 is the arguments be stored in registers.
161 // x4 is the address of the arguments on the stack.
162 // x5 is the size of the arguments on the stack.
163 //
164 // On the x64 side, it's the same except that x5 isn't set.
165 //
166 // If both the ARM and X64 sides are sret, there are only three
167 // arguments in registers.
168 //
169 // If the X64 side is sret, but the ARM side isn't, we pass an extra value
170 // to/from the X64 side, and let SelectionDAG transform it into a memory
171 // location.
172 Out << "varargs";
173
174 // x0-x3
175 for (int i = HasSretPtr ? 1 : 0; i < 4; i++) {
176 Arm64ArgTypes.push_back(Elt: I64Ty);
177 X64ArgTypes.push_back(Elt: I64Ty);
178 ArgTranslations.push_back(Elt: ThunkArgTranslation::Direct);
179 }
180
181 // x4
182 Arm64ArgTypes.push_back(Elt: PtrTy);
183 X64ArgTypes.push_back(Elt: PtrTy);
184 ArgTranslations.push_back(Elt: ThunkArgTranslation::Direct);
185 // x5
186 Arm64ArgTypes.push_back(Elt: I64Ty);
187 if (TT != Arm64ECThunkType::Entry) {
188 // FIXME: x5 isn't actually used by the x64 side; revisit once we
189 // have proper isel for varargs
190 X64ArgTypes.push_back(Elt: I64Ty);
191 ArgTranslations.push_back(Elt: ThunkArgTranslation::Direct);
192 }
193 return;
194 }
195
196 unsigned I = 0;
197 if (HasSretPtr)
198 I++;
199
200 if (I == FT->getNumParams()) {
201 Out << "v";
202 return;
203 }
204
205 for (unsigned E = FT->getNumParams(); I != E; ++I) {
206#if 0
207 // FIXME: Need more information about argument size; see
208 // https://reviews.llvm.org/D132926
209 uint64_t ArgSizeBytes = AttrList.getParamArm64ECArgSizeBytes(I);
210 Align ParamAlign = AttrList.getParamAlignment(I).valueOrOne();
211#else
212 uint64_t ArgSizeBytes = 0;
213 Align ParamAlign = Align();
214#endif
215 auto [Arm64Ty, X64Ty, ArgTranslation] =
216 canonicalizeThunkType(T: FT->getParamType(i: I), Alignment: ParamAlign,
217 /*Ret*/ false, ArgSizeBytes, Out);
218 Arm64ArgTypes.push_back(Elt: Arm64Ty);
219 X64ArgTypes.push_back(Elt: X64Ty);
220 ArgTranslations.push_back(Elt: ArgTranslation);
221 }
222}
223
224void AArch64Arm64ECCallLowering::getThunkRetType(
225 FunctionType *FT, AttributeList AttrList, raw_ostream &Out,
226 Type *&Arm64RetTy, Type *&X64RetTy, SmallVectorImpl<Type *> &Arm64ArgTypes,
227 SmallVectorImpl<Type *> &X64ArgTypes,
228 SmallVector<ThunkArgTranslation> &ArgTranslations, bool &HasSretPtr) {
229 Type *T = FT->getReturnType();
230#if 0
231 // FIXME: Need more information about argument size; see
232 // https://reviews.llvm.org/D132926
233 uint64_t ArgSizeBytes = AttrList.getRetArm64ECArgSizeBytes();
234#else
235 int64_t ArgSizeBytes = 0;
236#endif
237 if (T->isVoidTy()) {
238 if (FT->getNumParams()) {
239 Attribute SRetAttr0 = AttrList.getParamAttr(ArgNo: 0, Kind: Attribute::StructRet);
240 Attribute InRegAttr0 = AttrList.getParamAttr(ArgNo: 0, Kind: Attribute::InReg);
241 Attribute SRetAttr1, InRegAttr1;
242 if (FT->getNumParams() > 1) {
243 // Also check the second parameter (for class methods, the first
244 // parameter is "this", and the second parameter is the sret pointer.)
245 // It doesn't matter which one is sret.
246 SRetAttr1 = AttrList.getParamAttr(ArgNo: 1, Kind: Attribute::StructRet);
247 InRegAttr1 = AttrList.getParamAttr(ArgNo: 1, Kind: Attribute::InReg);
248 }
249 if ((SRetAttr0.isValid() && InRegAttr0.isValid()) ||
250 (SRetAttr1.isValid() && InRegAttr1.isValid())) {
251 // sret+inreg indicates a call that returns a C++ class value. This is
252 // actually equivalent to just passing and returning a void* pointer
253 // as the first or second argument. Translate it that way, instead of
254 // trying to model "inreg" in the thunk's calling convention; this
255 // simplfies the rest of the code, and matches MSVC mangling.
256 Out << "i8";
257 Arm64RetTy = I64Ty;
258 X64RetTy = I64Ty;
259 return;
260 }
261 if (SRetAttr0.isValid()) {
262 // FIXME: Sanity-check the sret type; if it's an integer or pointer,
263 // we'll get screwy mangling/codegen.
264 // FIXME: For large struct types, mangle as an integer argument and
265 // integer return, so we can reuse more thunks, instead of "m" syntax.
266 // (MSVC mangles this case as an integer return with no argument, but
267 // that's a miscompile.)
268 Type *SRetType = SRetAttr0.getValueAsType();
269 Align SRetAlign = AttrList.getParamAlignment(ArgNo: 0).valueOrOne();
270 canonicalizeThunkType(T: SRetType, Alignment: SRetAlign, /*Ret*/ true, ArgSizeBytes,
271 Out);
272 Arm64RetTy = VoidTy;
273 X64RetTy = VoidTy;
274 Arm64ArgTypes.push_back(Elt: FT->getParamType(i: 0));
275 X64ArgTypes.push_back(Elt: FT->getParamType(i: 0));
276 ArgTranslations.push_back(Elt: ThunkArgTranslation::Direct);
277 HasSretPtr = true;
278 return;
279 }
280 }
281
282 Out << "v";
283 Arm64RetTy = VoidTy;
284 X64RetTy = VoidTy;
285 return;
286 }
287
288 auto info =
289 canonicalizeThunkType(T, Alignment: Align(), /*Ret*/ true, ArgSizeBytes, Out);
290 Arm64RetTy = info.Arm64Ty;
291 X64RetTy = info.X64Ty;
292 if (X64RetTy->isPointerTy()) {
293 // If the X64 type is canonicalized to a pointer, that means it's
294 // passed/returned indirectly. For a return value, that means it's an
295 // sret pointer.
296 X64ArgTypes.push_back(Elt: X64RetTy);
297 X64RetTy = VoidTy;
298 }
299}
300
301ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType(
302 Type *T, Align Alignment, bool Ret, uint64_t ArgSizeBytes,
303 raw_ostream &Out) {
304
305 auto direct = [](Type *T) {
306 return ThunkArgInfo{.Arm64Ty: T, .X64Ty: T, .Translation: ThunkArgTranslation::Direct};
307 };
308
309 auto bitcast = [this](Type *Arm64Ty, uint64_t SizeInBytes) {
310 return ThunkArgInfo{.Arm64Ty: Arm64Ty,
311 .X64Ty: llvm::Type::getIntNTy(C&: M->getContext(), N: SizeInBytes * 8),
312 .Translation: ThunkArgTranslation::Bitcast};
313 };
314
315 auto pointerIndirection = [this](Type *Arm64Ty) {
316 return ThunkArgInfo{.Arm64Ty: Arm64Ty, .X64Ty: PtrTy,
317 .Translation: ThunkArgTranslation::PointerIndirection};
318 };
319
320 if (T->isHalfTy()) {
321 // Prefix with `llvm` since MSVC doesn't specify `_Float16`
322 Out << "__llvm_h__";
323 return direct(T);
324 }
325
326 if (T->isFloatTy()) {
327 Out << "f";
328 return direct(T);
329 }
330
331 if (T->isDoubleTy()) {
332 Out << "d";
333 return direct(T);
334 }
335
336 if (T->isFloatingPointTy()) {
337 report_fatal_error(reason: "Only 16, 32, and 64 bit floating points are supported "
338 "for ARM64EC thunks");
339 }
340
341 auto &DL = M->getDataLayout();
342
343 if (auto *StructTy = dyn_cast<StructType>(Val: T))
344 if (StructTy->getNumElements() == 1)
345 T = StructTy->getElementType(N: 0);
346
347 if (T->isArrayTy()) {
348 Type *ElementTy = T->getArrayElementType();
349 uint64_t ElementCnt = T->getArrayNumElements();
350 uint64_t ElementSizePerBytes = DL.getTypeSizeInBits(Ty: ElementTy) / 8;
351 uint64_t TotalSizeBytes = ElementCnt * ElementSizePerBytes;
352 if (ElementTy->isHalfTy() || ElementTy->isFloatTy() ||
353 ElementTy->isDoubleTy()) {
354 if (ElementTy->isHalfTy())
355 // Prefix with `llvm` since MSVC doesn't specify `_Float16`
356 Out << "__llvm_H__";
357 else if (ElementTy->isFloatTy())
358 Out << "F";
359 else if (ElementTy->isDoubleTy())
360 Out << "D";
361 Out << TotalSizeBytes;
362 if (Alignment.value() >= 16 && !Ret)
363 Out << "a" << Alignment.value();
364 if (TotalSizeBytes <= 8) {
365 // Arm64 returns small structs of float/double in float registers;
366 // X64 uses RAX.
367 return bitcast(T, TotalSizeBytes);
368 } else {
369 // Struct is passed directly on Arm64, but indirectly on X64.
370 return pointerIndirection(T);
371 }
372 } else if (T->isFloatingPointTy()) {
373 report_fatal_error(
374 reason: "Only 16, 32, and 64 bit floating points are supported "
375 "for ARM64EC thunks");
376 }
377 }
378
379 if ((T->isIntegerTy() || T->isPointerTy()) && DL.getTypeSizeInBits(Ty: T) <= 64) {
380 Out << "i8";
381 return direct(I64Ty);
382 }
383
384 unsigned TypeSize = ArgSizeBytes;
385 if (TypeSize == 0)
386 TypeSize = DL.getTypeSizeInBits(Ty: T) / 8;
387 Out << "m";
388 if (TypeSize != 4)
389 Out << TypeSize;
390 if (Alignment.value() >= 16 && !Ret)
391 Out << "a" << Alignment.value();
392 // FIXME: Try to canonicalize Arm64Ty more thoroughly?
393 if (TypeSize == 1 || TypeSize == 2 || TypeSize == 4 || TypeSize == 8) {
394 // Pass directly in an integer register
395 return bitcast(T, TypeSize);
396 } else {
397 // Passed directly on Arm64, but indirectly on X64.
398 return pointerIndirection(T);
399 }
400}
401
402// This function builds the "exit thunk", a function which translates
403// arguments and return values when calling x64 code from AArch64 code.
404Function *AArch64Arm64ECCallLowering::buildExitThunk(FunctionType *FT,
405 AttributeList Attrs) {
406 SmallString<256> ExitThunkName;
407 llvm::raw_svector_ostream ExitThunkStream(ExitThunkName);
408 FunctionType *Arm64Ty, *X64Ty;
409 SmallVector<ThunkArgTranslation> ArgTranslations;
410 getThunkType(FT, AttrList: Attrs, TT: Arm64ECThunkType::Exit, Out&: ExitThunkStream, Arm64Ty,
411 X64Ty, ArgTranslations);
412 if (Function *F = M->getFunction(Name: ExitThunkName))
413 return F;
414
415 Function *F = Function::Create(Ty: Arm64Ty, Linkage: GlobalValue::LinkOnceODRLinkage, AddrSpace: 0,
416 N: ExitThunkName, M);
417 F->setCallingConv(CallingConv::ARM64EC_Thunk_Native);
418 F->setSection(".wowthk$aa");
419 F->setComdat(M->getOrInsertComdat(Name: ExitThunkName));
420 // Copy MSVC, and always set up a frame pointer. (Maybe this isn't necessary.)
421 F->addFnAttr(Kind: "frame-pointer", Val: "all");
422 // Only copy sret from the first argument. For C++ instance methods, clang can
423 // stick an sret marking on a later argument, but it doesn't actually affect
424 // the ABI, so we can omit it. This avoids triggering a verifier assertion.
425 if (FT->getNumParams()) {
426 auto SRet = Attrs.getParamAttr(ArgNo: 0, Kind: Attribute::StructRet);
427 auto InReg = Attrs.getParamAttr(ArgNo: 0, Kind: Attribute::InReg);
428 if (SRet.isValid() && !InReg.isValid())
429 F->addParamAttr(ArgNo: 1, Attr: SRet);
430 }
431 // FIXME: Copy anything other than sret? Shouldn't be necessary for normal
432 // C ABI, but might show up in other cases.
433 BasicBlock *BB = BasicBlock::Create(Context&: M->getContext(), Name: "", Parent: F);
434 IRBuilder<> IRB(BB);
435 Value *CalleePtr =
436 M->getOrInsertGlobal(Name: "__os_arm64x_dispatch_call_no_redirect", Ty: PtrTy);
437 Value *Callee = IRB.CreateLoad(Ty: PtrTy, Ptr: CalleePtr);
438 auto &DL = M->getDataLayout();
439 SmallVector<Value *> Args;
440
441 // Pass the called function in x9.
442 auto X64TyOffset = 1;
443 Args.push_back(Elt: F->arg_begin());
444
445 Type *RetTy = Arm64Ty->getReturnType();
446 if (RetTy != X64Ty->getReturnType()) {
447 // If the return type is an array or struct, translate it. Values of size
448 // 8 or less go into RAX; bigger values go into memory, and we pass a
449 // pointer.
450 if (DL.getTypeStoreSize(Ty: RetTy) > 8) {
451 Args.push_back(Elt: IRB.CreateAlloca(Ty: RetTy));
452 X64TyOffset++;
453 }
454 }
455
456 for (auto [Arg, X64ArgType, ArgTranslation] : llvm::zip_equal(
457 t: make_range(x: F->arg_begin() + 1, y: F->arg_end()),
458 u: make_range(x: X64Ty->param_begin() + X64TyOffset, y: X64Ty->param_end()),
459 args&: ArgTranslations)) {
460 // Translate arguments from AArch64 calling convention to x86 calling
461 // convention.
462 //
463 // For simple types, we don't need to do any translation: they're
464 // represented the same way. (Implicit sign extension is not part of
465 // either convention.)
466 //
467 // The big thing we have to worry about is struct types... but
468 // fortunately AArch64 clang is pretty friendly here: the cases that need
469 // translation are always passed as a struct or array. (If we run into
470 // some cases where this doesn't work, we can teach clang to mark it up
471 // with an attribute.)
472 //
473 // The first argument is the called function, stored in x9.
474 if (ArgTranslation != ThunkArgTranslation::Direct) {
475 Value *Mem = IRB.CreateAlloca(Ty: Arg.getType());
476 IRB.CreateStore(Val: &Arg, Ptr: Mem);
477 if (ArgTranslation == ThunkArgTranslation::Bitcast) {
478 Type *IntTy = IRB.getIntNTy(N: DL.getTypeStoreSizeInBits(Ty: Arg.getType()));
479 Args.push_back(Elt: IRB.CreateLoad(Ty: IntTy, Ptr: Mem));
480 } else {
481 assert(ArgTranslation == ThunkArgTranslation::PointerIndirection);
482 Args.push_back(Elt: Mem);
483 }
484 } else {
485 Args.push_back(Elt: &Arg);
486 }
487 assert(Args.back()->getType() == X64ArgType);
488 }
489 // FIXME: Transfer necessary attributes? sret? anything else?
490
491 CallInst *Call = IRB.CreateCall(FTy: X64Ty, Callee, Args);
492 Call->setCallingConv(CallingConv::ARM64EC_Thunk_X64);
493
494 Value *RetVal = Call;
495 if (RetTy != X64Ty->getReturnType()) {
496 // If we rewrote the return type earlier, convert the return value to
497 // the proper type.
498 if (DL.getTypeStoreSize(Ty: RetTy) > 8) {
499 RetVal = IRB.CreateLoad(Ty: RetTy, Ptr: Args[1]);
500 } else {
501 Value *CastAlloca = IRB.CreateAlloca(Ty: RetTy);
502 IRB.CreateStore(Val: Call, Ptr: CastAlloca);
503 RetVal = IRB.CreateLoad(Ty: RetTy, Ptr: CastAlloca);
504 }
505 }
506
507 if (RetTy->isVoidTy())
508 IRB.CreateRetVoid();
509 else
510 IRB.CreateRet(V: RetVal);
511 return F;
512}
513
514// This function builds the "entry thunk", a function which translates
515// arguments and return values when calling AArch64 code from x64 code.
516Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
517 SmallString<256> EntryThunkName;
518 llvm::raw_svector_ostream EntryThunkStream(EntryThunkName);
519 FunctionType *Arm64Ty, *X64Ty;
520 SmallVector<ThunkArgTranslation> ArgTranslations;
521 getThunkType(FT: F->getFunctionType(), AttrList: F->getAttributes(),
522 TT: Arm64ECThunkType::Entry, Out&: EntryThunkStream, Arm64Ty, X64Ty,
523 ArgTranslations);
524 if (Function *F = M->getFunction(Name: EntryThunkName))
525 return F;
526
527 Function *Thunk = Function::Create(Ty: X64Ty, Linkage: GlobalValue::LinkOnceODRLinkage, AddrSpace: 0,
528 N: EntryThunkName, M);
529 Thunk->setCallingConv(CallingConv::ARM64EC_Thunk_X64);
530 Thunk->setSection(".wowthk$aa");
531 Thunk->setComdat(M->getOrInsertComdat(Name: EntryThunkName));
532 // Copy MSVC, and always set up a frame pointer. (Maybe this isn't necessary.)
533 Thunk->addFnAttr(Kind: "frame-pointer", Val: "all");
534
535 BasicBlock *BB = BasicBlock::Create(Context&: M->getContext(), Name: "", Parent: Thunk);
536 IRBuilder<> IRB(BB);
537
538 Type *RetTy = Arm64Ty->getReturnType();
539 Type *X64RetType = X64Ty->getReturnType();
540
541 bool TransformDirectToSRet = X64RetType->isVoidTy() && !RetTy->isVoidTy();
542 unsigned ThunkArgOffset = TransformDirectToSRet ? 2 : 1;
543 unsigned PassthroughArgSize =
544 (F->isVarArg() ? 5 : Thunk->arg_size()) - ThunkArgOffset;
545 assert(ArgTranslations.size() == (F->isVarArg() ? 5 : PassthroughArgSize));
546
547 // Translate arguments to call.
548 SmallVector<Value *> Args;
549 for (unsigned i = 0; i != PassthroughArgSize; ++i) {
550 Value *Arg = Thunk->getArg(i: i + ThunkArgOffset);
551 Type *ArgTy = Arm64Ty->getParamType(i);
552 ThunkArgTranslation ArgTranslation = ArgTranslations[i];
553 if (ArgTranslation != ThunkArgTranslation::Direct) {
554 // Translate array/struct arguments to the expected type.
555 if (ArgTranslation == ThunkArgTranslation::Bitcast) {
556 Value *CastAlloca = IRB.CreateAlloca(Ty: ArgTy);
557 IRB.CreateStore(Val: Arg, Ptr: CastAlloca);
558 Arg = IRB.CreateLoad(Ty: ArgTy, Ptr: CastAlloca);
559 } else {
560 assert(ArgTranslation == ThunkArgTranslation::PointerIndirection);
561 Arg = IRB.CreateLoad(Ty: ArgTy, Ptr: Arg);
562 }
563 }
564 assert(Arg->getType() == ArgTy);
565 Args.push_back(Elt: Arg);
566 }
567
568 if (F->isVarArg()) {
569 // The 5th argument to variadic entry thunks is used to model the x64 sp
570 // which is passed to the thunk in x4, this can be passed to the callee as
571 // the variadic argument start address after skipping over the 32 byte
572 // shadow store.
573
574 // The EC thunk CC will assign any argument marked as InReg to x4.
575 Thunk->addParamAttr(ArgNo: 5, Kind: Attribute::InReg);
576 Value *Arg = Thunk->getArg(i: 5);
577 Arg = IRB.CreatePtrAdd(Ptr: Arg, Offset: IRB.getInt64(C: 0x20));
578 Args.push_back(Elt: Arg);
579
580 // Pass in a zero variadic argument size (in x5).
581 Args.push_back(Elt: IRB.getInt64(C: 0));
582 }
583
584 // Call the function passed to the thunk.
585 Value *Callee = Thunk->getArg(i: 0);
586 CallInst *Call = IRB.CreateCall(FTy: Arm64Ty, Callee, Args);
587
588 auto SRetAttr = F->getAttributes().getParamAttr(ArgNo: 0, Kind: Attribute::StructRet);
589 auto InRegAttr = F->getAttributes().getParamAttr(ArgNo: 0, Kind: Attribute::InReg);
590 if (SRetAttr.isValid() && !InRegAttr.isValid()) {
591 Thunk->addParamAttr(ArgNo: 1, Attr: SRetAttr);
592 Call->addParamAttr(ArgNo: 0, Attr: SRetAttr);
593 }
594
595 Value *RetVal = Call;
596 if (TransformDirectToSRet) {
597 // The x64 side returns this value indirectly via a hidden pointer (sret).
598 // Mark the thunk's pointer arg with sret so that ISel saves it and copies
599 // it into x8 (RAX) on return, matching the x64 calling convention.
600 Thunk->addParamAttr(
601 ArgNo: 1, Attr: Attribute::getWithStructRetType(Context&: M->getContext(), Ty: RetTy));
602 IRB.CreateStore(Val: RetVal, Ptr: Thunk->getArg(i: 1));
603 } else if (X64RetType != RetTy) {
604 Value *CastAlloca = IRB.CreateAlloca(Ty: X64RetType);
605 IRB.CreateStore(Val: Call, Ptr: CastAlloca);
606 RetVal = IRB.CreateLoad(Ty: X64RetType, Ptr: CastAlloca);
607 }
608
609 // Return to the caller. Note that the isel has code to translate this
610 // "ret" to a tail call to __os_arm64x_dispatch_ret. (Alternatively, we
611 // could emit a tail call here, but that would require a dedicated calling
612 // convention, which seems more complicated overall.)
613 if (X64RetType->isVoidTy())
614 IRB.CreateRetVoid();
615 else
616 IRB.CreateRet(V: RetVal);
617
618 return Thunk;
619}
620
621std::optional<std::string> getArm64ECMangledFunctionName(GlobalValue &GV) {
622 if (!GV.hasName()) {
623 GV.setName("__unnamed");
624 }
625
626 return llvm::getArm64ECMangledFunctionName(Name: GV.getName());
627}
628
629// Builds the "guest exit thunk", a helper to call a function which may or may
630// not be an exit thunk. (We optimistically assume non-dllimport function
631// declarations refer to functions defined in AArch64 code; if the linker
632// can't prove that, we use this routine instead.)
633Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) {
634 llvm::raw_null_ostream NullThunkName;
635 FunctionType *Arm64Ty, *X64Ty;
636 SmallVector<ThunkArgTranslation> ArgTranslations;
637 getThunkType(FT: F->getFunctionType(), AttrList: F->getAttributes(),
638 TT: Arm64ECThunkType::GuestExit, Out&: NullThunkName, Arm64Ty, X64Ty,
639 ArgTranslations);
640 auto MangledName = getArm64ECMangledFunctionName(GV&: *F);
641 assert(MangledName && "Can't guest exit to function that's already native");
642 std::string ThunkName = *MangledName;
643 if (ThunkName[0] == '?' && ThunkName.find(s: "@") != std::string::npos) {
644 ThunkName.insert(pos: ThunkName.find(s: "@"), s: "$exit_thunk");
645 } else {
646 ThunkName.append(s: "$exit_thunk");
647 }
648 Function *GuestExit =
649 Function::Create(Ty: Arm64Ty, Linkage: GlobalValue::WeakODRLinkage, AddrSpace: 0, N: ThunkName, M);
650 GuestExit->setComdat(M->getOrInsertComdat(Name: ThunkName));
651 GuestExit->setSection(".wowthk$aa");
652 GuestExit->addMetadata(
653 Kind: "arm64ec_unmangled_name",
654 MD&: *MDNode::get(Context&: M->getContext(),
655 MDs: MDString::get(Context&: M->getContext(), Str: F->getName())));
656 GuestExit->setMetadata(
657 Kind: "arm64ec_ecmangled_name",
658 Node: MDNode::get(Context&: M->getContext(),
659 MDs: MDString::get(Context&: M->getContext(), Str: *MangledName)));
660 F->setMetadata(Kind: "arm64ec_hasguestexit", Node: MDNode::get(Context&: M->getContext(), MDs: {}));
661 BasicBlock *BB = BasicBlock::Create(Context&: M->getContext(), Name: "", Parent: GuestExit);
662 IRBuilder<> B(BB);
663
664 // Create new call instruction. The call check should always be a call,
665 // even if the original CallBase is an Invoke or CallBr instructio.
666 // This is treated as a direct call, so do not use GuardFnCFGlobal.
667 LoadInst *GuardCheckLoad = B.CreateLoad(Ty: PtrTy, Ptr: GuardFnGlobal);
668 Function *Thunk = buildExitThunk(FT: F->getFunctionType(), Attrs: F->getAttributes());
669 CallInst *GuardCheck = B.CreateCall(
670 FTy: GuardFnType, Callee: GuardCheckLoad, Args: {F, Thunk});
671 Value *GuardCheckDest = B.CreateExtractValue(Agg: GuardCheck, Idxs: 0);
672 Value *GuardFinalDest = B.CreateExtractValue(Agg: GuardCheck, Idxs: 1);
673
674 // Ensure that the first argument is passed in the correct register.
675 GuardCheck->setCallingConv(CallingConv::CFGuard_Check);
676
677 SmallVector<Value *> Args(llvm::make_pointer_range(Range: GuestExit->args()));
678 OperandBundleDef OB("cfguardtarget", GuardFinalDest);
679 CallInst *Call = B.CreateCall(FTy: Arm64Ty, Callee: GuardCheckDest, Args, OpBundles: OB);
680 Call->setTailCallKind(llvm::CallInst::TCK_MustTail);
681
682 if (Call->getType()->isVoidTy())
683 B.CreateRetVoid();
684 else
685 B.CreateRet(V: Call);
686
687 auto SRetAttr = F->getAttributes().getParamAttr(ArgNo: 0, Kind: Attribute::StructRet);
688 auto InRegAttr = F->getAttributes().getParamAttr(ArgNo: 0, Kind: Attribute::InReg);
689 if (SRetAttr.isValid() && !InRegAttr.isValid()) {
690 GuestExit->addParamAttr(ArgNo: 0, Attr: SRetAttr);
691 Call->addParamAttr(ArgNo: 0, Attr: SRetAttr);
692 }
693
694 return GuestExit;
695}
696
697Function *
698AArch64Arm64ECCallLowering::buildPatchableThunk(GlobalAlias *UnmangledAlias,
699 GlobalAlias *MangledAlias) {
700 llvm::raw_null_ostream NullThunkName;
701 FunctionType *Arm64Ty, *X64Ty;
702 Function *F = cast<Function>(Val: MangledAlias->getAliasee());
703 SmallVector<ThunkArgTranslation> ArgTranslations;
704 getThunkType(FT: F->getFunctionType(), AttrList: F->getAttributes(),
705 TT: Arm64ECThunkType::GuestExit, Out&: NullThunkName, Arm64Ty, X64Ty,
706 ArgTranslations);
707 std::string ThunkName(MangledAlias->getName());
708 if (ThunkName[0] == '?' && ThunkName.find(s: "@") != std::string::npos) {
709 ThunkName.insert(pos: ThunkName.find(s: "@"), s: "$hybpatch_thunk");
710 } else {
711 ThunkName.append(s: "$hybpatch_thunk");
712 }
713
714 Function *GuestExit =
715 Function::Create(Ty: Arm64Ty, Linkage: GlobalValue::WeakODRLinkage, AddrSpace: 0, N: ThunkName, M);
716 GuestExit->setComdat(M->getOrInsertComdat(Name: ThunkName));
717 GuestExit->setSection(".wowthk$aa");
718 BasicBlock *BB = BasicBlock::Create(Context&: M->getContext(), Name: "", Parent: GuestExit);
719 IRBuilder<> B(BB);
720
721 // Load the global symbol as a pointer to the check function.
722 LoadInst *DispatchLoad = B.CreateLoad(Ty: PtrTy, Ptr: DispatchFnGlobal);
723
724 // Create new dispatch call instruction.
725 Function *ExitThunk =
726 buildExitThunk(FT: F->getFunctionType(), Attrs: F->getAttributes());
727 CallInst *Dispatch =
728 B.CreateCall(FTy: DispatchFnType, Callee: DispatchLoad,
729 Args: {UnmangledAlias, ExitThunk, UnmangledAlias->getAliasee()});
730
731 // Ensure that the first arguments are passed in the correct registers.
732 Dispatch->setCallingConv(CallingConv::CFGuard_Check);
733
734 SmallVector<Value *> Args(llvm::make_pointer_range(Range: GuestExit->args()));
735 CallInst *Call = B.CreateCall(FTy: Arm64Ty, Callee: Dispatch, Args);
736 Call->setTailCallKind(llvm::CallInst::TCK_MustTail);
737
738 if (Call->getType()->isVoidTy())
739 B.CreateRetVoid();
740 else
741 B.CreateRet(V: Call);
742
743 auto SRetAttr = F->getAttributes().getParamAttr(ArgNo: 0, Kind: Attribute::StructRet);
744 auto InRegAttr = F->getAttributes().getParamAttr(ArgNo: 0, Kind: Attribute::InReg);
745 if (SRetAttr.isValid() && !InRegAttr.isValid()) {
746 GuestExit->addParamAttr(ArgNo: 0, Attr: SRetAttr);
747 Call->addParamAttr(ArgNo: 0, Attr: SRetAttr);
748 }
749
750 MangledAlias->setAliasee(GuestExit);
751 return GuestExit;
752}
753
754// Lower an indirect call with inline code.
755void AArch64Arm64ECCallLowering::lowerCall(CallBase *CB) {
756 IRBuilder<> B(CB);
757 Value *CalledOperand = CB->getCalledOperand();
758
759 // If the indirect call is called within catchpad or cleanuppad,
760 // we need to copy "funclet" bundle of the call.
761 SmallVector<llvm::OperandBundleDef, 1> Bundles;
762 if (auto Bundle = CB->getOperandBundle(ID: LLVMContext::OB_funclet))
763 Bundles.push_back(Elt: OperandBundleDef(*Bundle));
764
765 // Load the global symbol as a pointer to the check function.
766 Value *GuardFn;
767 if ((CFGuardModuleFlag == ControlFlowGuardMode::Enabled) &&
768 !CB->hasFnAttr(Kind: "guard_nocf"))
769 GuardFn = GuardFnCFGlobal;
770 else
771 GuardFn = GuardFnGlobal;
772 LoadInst *GuardCheckLoad = B.CreateLoad(Ty: PtrTy, Ptr: GuardFn);
773
774 // Create new call instruction. The CFGuard check should always be a call,
775 // even if the original CallBase is an Invoke or CallBr instruction.
776 Function *Thunk = buildExitThunk(FT: CB->getFunctionType(), Attrs: CB->getAttributes());
777 CallInst *GuardCheck =
778 B.CreateCall(FTy: GuardFnType, Callee: GuardCheckLoad, Args: {CalledOperand, Thunk},
779 OpBundles: Bundles);
780 Value *GuardCheckDest = B.CreateExtractValue(Agg: GuardCheck, Idxs: 0);
781 Value *GuardFinalDest = B.CreateExtractValue(Agg: GuardCheck, Idxs: 1);
782
783 // Ensure that the first argument is passed in the correct register.
784 GuardCheck->setCallingConv(CallingConv::CFGuard_Check);
785
786 // Update the call: set the callee, and add a bundle with the final
787 // destination,
788 CB->setCalledOperand(GuardCheckDest);
789 OperandBundleDef OB("cfguardtarget", GuardFinalDest);
790 auto *NewCall = CallBase::addOperandBundle(CB, ID: LLVMContext::OB_cfguardtarget,
791 OB, InsertPt: CB->getIterator());
792 NewCall->copyMetadata(SrcInst: *CB);
793 CB->replaceAllUsesWith(V: NewCall);
794 CB->eraseFromParent();
795}
796
797bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) {
798 if (!GenerateThunks)
799 return false;
800
801 M = &Mod;
802
803 // Check if this module has the cfguard flag and read its value.
804 CFGuardModuleFlag = M->getControlFlowGuardMode();
805
806 // Warn if the module flag requests an unsupported CFGuard mechanism.
807 if (CFGuardModuleFlag == ControlFlowGuardMode::Enabled) {
808 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(
809 MD: Mod.getModuleFlag(Key: "cfguard-mechanism"))) {
810 auto MechanismOverride =
811 static_cast<ControlFlowGuardMechanism>(CI->getZExtValue());
812 if (MechanismOverride != ControlFlowGuardMechanism::Automatic &&
813 MechanismOverride != ControlFlowGuardMechanism::Check)
814 Mod.getContext().diagnose(
815 DI: DiagnosticInfoGeneric("only the Check Control Flow Guard mechanism "
816 "is supported for Arm64EC",
817 DS_Warning));
818 }
819 }
820
821 PtrTy = PointerType::getUnqual(C&: M->getContext());
822 I64Ty = Type::getInt64Ty(C&: M->getContext());
823 VoidTy = Type::getVoidTy(C&: M->getContext());
824
825 GuardFnType =
826 FunctionType::get(Result: StructType::get(elt1: PtrTy, elts: PtrTy), Params: {PtrTy, PtrTy}, isVarArg: false);
827 DispatchFnType = FunctionType::get(Result: PtrTy, Params: {PtrTy, PtrTy, PtrTy}, isVarArg: false);
828 GuardFnCFGlobal = M->getOrInsertGlobal(Name: "__os_arm64x_check_icall_cfg", Ty: PtrTy);
829 GuardFnGlobal = M->getOrInsertGlobal(Name: "__os_arm64x_check_icall", Ty: PtrTy);
830 DispatchFnGlobal = M->getOrInsertGlobal(Name: "__os_arm64x_dispatch_call", Ty: PtrTy);
831
832 // Mangle names of function aliases and add the alias name to
833 // arm64ec_unmangled_name metadata to ensure a weak anti-dependency symbol is
834 // emitted for the alias as well. Do this early, before handling
835 // hybrid_patchable functions, to avoid mangling their aliases.
836 for (GlobalAlias &A : Mod.aliases()) {
837 auto F = dyn_cast_or_null<Function>(Val: A.getAliaseeObject());
838 if (!F)
839 continue;
840 if (std::optional<std::string> MangledName =
841 getArm64ECMangledFunctionName(GV&: A)) {
842 F->addMetadata(Kind: "arm64ec_unmangled_name",
843 MD&: *MDNode::get(Context&: M->getContext(),
844 MDs: MDString::get(Context&: M->getContext(), Str: A.getName())));
845 A.setName(MangledName.value());
846 }
847 }
848
849 DenseMap<GlobalAlias *, GlobalAlias *> FnsMap;
850 SetVector<GlobalAlias *> PatchableFns;
851
852 for (Function &F : Mod) {
853 if (F.hasPersonalityFn()) {
854 GlobalValue *PersFn =
855 cast<GlobalValue>(Val: F.getPersonalityFn()->stripPointerCasts());
856 if (PersFn->getValueType() && PersFn->getValueType()->isFunctionTy()) {
857 if (std::optional<std::string> MangledName =
858 getArm64ECMangledFunctionName(GV&: *PersFn)) {
859 PersFn->setName(MangledName.value());
860 }
861 }
862 }
863
864 if (!F.hasFnAttribute(Kind: Attribute::HybridPatchable) ||
865 F.isDeclarationForLinker() || F.hasLocalLinkage() ||
866 F.getName().ends_with(Suffix: HybridPatchableTargetSuffix))
867 continue;
868
869 // Rename hybrid patchable functions and change callers to use a global
870 // alias instead.
871 if (std::optional<std::string> MangledName =
872 getArm64ECMangledFunctionName(GV&: F)) {
873 std::string OrigName(F.getName());
874 F.setName(MangledName.value() + HybridPatchableTargetSuffix);
875
876 // The unmangled symbol is a weak alias to an undefined symbol with the
877 // "EXP+" prefix. This undefined symbol is resolved by the linker by
878 // creating an x86 thunk that jumps back to the actual EC target. Since we
879 // can't represent that in IR, we create an alias to the target instead.
880 // The "EXP+" symbol is set as metadata, which is then used by
881 // emitGlobalAlias to emit the right alias.
882 auto *A =
883 GlobalAlias::create(Linkage: GlobalValue::LinkOnceODRLinkage, Name: OrigName, Aliasee: &F);
884 auto *AM = GlobalAlias::create(Linkage: GlobalValue::LinkOnceODRLinkage,
885 Name: MangledName.value(), Aliasee: &F);
886 F.replaceUsesWithIf(New: AM,
887 ShouldReplace: [](Use &U) { return isa<GlobalAlias>(Val: U.getUser()); });
888 F.replaceAllUsesWith(V: A);
889 F.setMetadata(Kind: "arm64ec_exp_name",
890 Node: MDNode::get(Context&: M->getContext(),
891 MDs: MDString::get(Context&: M->getContext(),
892 Str: "EXP+" + MangledName.value())));
893 A->setAliasee(&F);
894 AM->setAliasee(&F);
895
896 if (F.hasDLLExportStorageClass()) {
897 A->setDLLStorageClass(GlobalValue::DLLExportStorageClass);
898 F.setDLLStorageClass(GlobalValue::DefaultStorageClass);
899 }
900
901 FnsMap[A] = AM;
902 PatchableFns.insert(X: A);
903 }
904 }
905
906 SetVector<GlobalValue *> DirectCalledFns;
907 for (Function &F : Mod)
908 if (!F.isDeclarationForLinker() &&
909 F.getCallingConv() != CallingConv::ARM64EC_Thunk_Native &&
910 F.getCallingConv() != CallingConv::ARM64EC_Thunk_X64)
911 processFunction(F, DirectCalledFns, FnsMap);
912
913 struct ThunkInfo {
914 Constant *Src;
915 Constant *Dst;
916 Arm64ECThunkType Kind;
917 };
918 SmallVector<ThunkInfo> ThunkMapping;
919 for (Function &F : Mod) {
920 if (!F.isDeclarationForLinker() &&
921 (!F.hasLocalLinkage() || F.hasAddressTaken()) &&
922 F.getCallingConv() != CallingConv::ARM64EC_Thunk_Native &&
923 F.getCallingConv() != CallingConv::ARM64EC_Thunk_X64) {
924 if (!F.hasComdat())
925 F.setComdat(Mod.getOrInsertComdat(Name: F.getName()));
926 ThunkMapping.push_back(
927 Elt: {.Src: &F, .Dst: buildEntryThunk(F: &F), .Kind: Arm64ECThunkType::Entry});
928 }
929 }
930 for (GlobalValue *O : DirectCalledFns) {
931 auto GA = dyn_cast<GlobalAlias>(Val: O);
932 auto F = dyn_cast<Function>(Val: GA ? GA->getAliasee() : O);
933 ThunkMapping.push_back(
934 Elt: {.Src: O, .Dst: buildExitThunk(FT: F->getFunctionType(), Attrs: F->getAttributes()),
935 .Kind: Arm64ECThunkType::Exit});
936 if (!GA && !F->hasDLLImportStorageClass())
937 ThunkMapping.push_back(
938 Elt: {.Src: buildGuestExitThunk(F), .Dst: F, .Kind: Arm64ECThunkType::GuestExit});
939 }
940 for (GlobalAlias *A : PatchableFns) {
941 Function *Thunk = buildPatchableThunk(UnmangledAlias: A, MangledAlias: FnsMap[A]);
942 ThunkMapping.push_back(Elt: {.Src: Thunk, .Dst: A, .Kind: Arm64ECThunkType::GuestExit});
943 }
944
945 if (!ThunkMapping.empty()) {
946 SmallVector<Constant *> ThunkMappingArrayElems;
947 for (ThunkInfo &Thunk : ThunkMapping) {
948 ThunkMappingArrayElems.push_back(Elt: ConstantStruct::getAnon(
949 V: {Thunk.Src, Thunk.Dst,
950 ConstantInt::get(Context&: M->getContext(), V: APInt(32, uint8_t(Thunk.Kind)))}));
951 }
952 Constant *ThunkMappingArray = ConstantArray::get(
953 T: llvm::ArrayType::get(ElementType: ThunkMappingArrayElems[0]->getType(),
954 NumElements: ThunkMappingArrayElems.size()),
955 V: ThunkMappingArrayElems);
956 new GlobalVariable(Mod, ThunkMappingArray->getType(), /*isConstant*/ false,
957 GlobalValue::ExternalLinkage, ThunkMappingArray,
958 "llvm.arm64ec.symbolmap");
959 }
960
961 return true;
962}
963
964bool AArch64Arm64ECCallLowering::processFunction(
965 Function &F, SetVector<GlobalValue *> &DirectCalledFns,
966 DenseMap<GlobalAlias *, GlobalAlias *> &FnsMap) {
967 SmallVector<CallBase *, 8> IndirectCalls;
968
969 // For ARM64EC targets, a function definition's name is mangled differently
970 // from the normal symbol. We currently have no representation of this sort
971 // of symbol in IR, so we change the name to the mangled name, then store
972 // the unmangled name as metadata. Later passes that need the unmangled
973 // name (emitting the definition) can grab it from the metadata.
974 //
975 // FIXME: Handle functions with weak linkage?
976 if (!F.hasLocalLinkage() || F.hasAddressTaken()) {
977 if (std::optional<std::string> MangledName =
978 getArm64ECMangledFunctionName(GV&: F)) {
979 F.addMetadata(Kind: "arm64ec_unmangled_name",
980 MD&: *MDNode::get(Context&: M->getContext(),
981 MDs: MDString::get(Context&: M->getContext(), Str: F.getName())));
982 if (F.hasComdat() && F.getComdat()->getName() == F.getName()) {
983 Comdat *MangledComdat = M->getOrInsertComdat(Name: MangledName.value());
984 SmallVector<GlobalObject *> ComdatUsers =
985 to_vector(Range: F.getComdat()->getUsers());
986 for (GlobalObject *User : ComdatUsers)
987 User->setComdat(MangledComdat);
988 }
989 F.setName(MangledName.value());
990 }
991 }
992
993 // Iterate over the instructions to find all indirect call/invoke/callbr
994 // instructions. Make a separate list of pointers to indirect
995 // call/invoke/callbr instructions because the original instructions will be
996 // deleted as the checks are added.
997 for (BasicBlock &BB : F) {
998 for (Instruction &I : BB) {
999 auto *CB = dyn_cast<CallBase>(Val: &I);
1000 if (!CB || CB->getCallingConv() == CallingConv::ARM64EC_Thunk_X64 ||
1001 CB->isInlineAsm())
1002 continue;
1003
1004 // We need to instrument any call that isn't directly calling an
1005 // ARM64 function.
1006 //
1007 // FIXME: getCalledFunction() fails if there's a bitcast (e.g.
1008 // unprototyped functions in C)
1009 if (Function *F = CB->getCalledFunction()) {
1010 if (!LowerDirectToIndirect || F->hasLocalLinkage() ||
1011 F->isIntrinsic() || !F->isDeclarationForLinker())
1012 continue;
1013
1014 DirectCalledFns.insert(X: F);
1015 continue;
1016 }
1017
1018 // Use mangled global alias for direct calls to patchable functions.
1019 if (GlobalAlias *A = dyn_cast<GlobalAlias>(Val: CB->getCalledOperand())) {
1020 auto I = FnsMap.find(Val: A);
1021 if (I != FnsMap.end()) {
1022 CB->setCalledOperand(I->second);
1023 DirectCalledFns.insert(X: I->first);
1024 continue;
1025 }
1026 }
1027
1028 IndirectCalls.push_back(Elt: CB);
1029 ++Arm64ECCallsLowered;
1030 }
1031 }
1032
1033 if (IndirectCalls.empty())
1034 return false;
1035
1036 for (CallBase *CB : IndirectCalls)
1037 lowerCall(CB);
1038
1039 return true;
1040}
1041
1042char AArch64Arm64ECCallLowering::ID = 0;
1043INITIALIZE_PASS(AArch64Arm64ECCallLowering, "Arm64ECCallLowering",
1044 "AArch64Arm64ECCallLowering", false, false)
1045
1046ModulePass *llvm::createAArch64Arm64ECCallLoweringPass() {
1047 return new AArch64Arm64ECCallLowering;
1048}
1049