| 1 | //===-- AArch64Arm64ECCallLowering.cpp - Lower Arm64EC calls ----*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// This file contains the IR transform to lower external or indirect calls for |
| 11 | /// the ARM64EC calling convention. Such calls must go through the runtime, so |
| 12 | /// we can translate the calling convention for calls into the emulator. |
| 13 | /// |
| 14 | /// This subsumes Control Flow Guard handling. |
| 15 | /// |
| 16 | //===----------------------------------------------------------------------===// |
| 17 | |
| 18 | #include "AArch64.h" |
| 19 | #include "llvm/ADT/SetVector.h" |
| 20 | #include "llvm/ADT/SmallString.h" |
| 21 | #include "llvm/ADT/SmallVector.h" |
| 22 | #include "llvm/ADT/Statistic.h" |
| 23 | #include "llvm/IR/CallingConv.h" |
| 24 | #include "llvm/IR/GlobalAlias.h" |
| 25 | #include "llvm/IR/IRBuilder.h" |
| 26 | #include "llvm/IR/Instruction.h" |
| 27 | #include "llvm/IR/Mangler.h" |
| 28 | #include "llvm/IR/Module.h" |
| 29 | #include "llvm/Object/COFF.h" |
| 30 | #include "llvm/Pass.h" |
| 31 | #include "llvm/Support/CommandLine.h" |
| 32 | #include "llvm/TargetParser/Triple.h" |
| 33 | |
| 34 | using namespace llvm; |
| 35 | using namespace llvm::COFF; |
| 36 | |
| 37 | using OperandBundleDef = OperandBundleDefT<Value *>; |
| 38 | |
| 39 | #define DEBUG_TYPE "arm64eccalllowering" |
| 40 | |
| 41 | STATISTIC(Arm64ECCallsLowered, "Number of Arm64EC calls lowered" ); |
| 42 | |
| 43 | static cl::opt<bool> LowerDirectToIndirect("arm64ec-lower-direct-to-indirect" , |
| 44 | cl::Hidden, cl::init(Val: true)); |
| 45 | static cl::opt<bool> GenerateThunks("arm64ec-generate-thunks" , cl::Hidden, |
| 46 | cl::init(Val: true)); |
| 47 | |
| 48 | namespace { |
| 49 | |
| 50 | enum ThunkArgTranslation : uint8_t { |
| 51 | Direct, |
| 52 | Bitcast, |
| 53 | PointerIndirection, |
| 54 | }; |
| 55 | |
| 56 | struct ThunkArgInfo { |
| 57 | Type *Arm64Ty; |
| 58 | Type *X64Ty; |
| 59 | ThunkArgTranslation Translation; |
| 60 | }; |
| 61 | |
| 62 | class AArch64Arm64ECCallLowering : public ModulePass { |
| 63 | public: |
| 64 | static char ID; |
| 65 | AArch64Arm64ECCallLowering() : ModulePass(ID) {} |
| 66 | |
| 67 | Function *buildExitThunk(FunctionType *FnTy, AttributeList Attrs); |
| 68 | Function *buildEntryThunk(Function *F); |
| 69 | void lowerCall(CallBase *CB); |
| 70 | Function *buildGuestExitThunk(Function *F); |
| 71 | Function *buildPatchableThunk(GlobalAlias *UnmangledAlias, |
| 72 | GlobalAlias *MangledAlias); |
| 73 | bool processFunction(Function &F, SetVector<GlobalValue *> &DirectCalledFns, |
| 74 | DenseMap<GlobalAlias *, GlobalAlias *> &FnsMap); |
| 75 | bool runOnModule(Module &M) override; |
| 76 | |
| 77 | private: |
| 78 | int cfguard_module_flag = 0; |
| 79 | FunctionType *GuardFnType = nullptr; |
| 80 | FunctionType *DispatchFnType = nullptr; |
| 81 | Constant *GuardFnCFGlobal = nullptr; |
| 82 | Constant *GuardFnGlobal = nullptr; |
| 83 | Constant *DispatchFnGlobal = nullptr; |
| 84 | Module *M = nullptr; |
| 85 | |
| 86 | Type *PtrTy; |
| 87 | Type *I64Ty; |
| 88 | Type *VoidTy; |
| 89 | |
| 90 | void getThunkType(FunctionType *FT, AttributeList AttrList, |
| 91 | Arm64ECThunkType TT, raw_ostream &Out, |
| 92 | FunctionType *&Arm64Ty, FunctionType *&X64Ty, |
| 93 | SmallVector<ThunkArgTranslation> &ArgTranslations); |
| 94 | void getThunkRetType(FunctionType *FT, AttributeList AttrList, |
| 95 | raw_ostream &Out, Type *&Arm64RetTy, Type *&X64RetTy, |
| 96 | SmallVectorImpl<Type *> &Arm64ArgTypes, |
| 97 | SmallVectorImpl<Type *> &X64ArgTypes, |
| 98 | SmallVector<ThunkArgTranslation> &ArgTranslations, |
| 99 | bool &HasSretPtr); |
| 100 | void getThunkArgTypes(FunctionType *FT, AttributeList AttrList, |
| 101 | Arm64ECThunkType TT, raw_ostream &Out, |
| 102 | SmallVectorImpl<Type *> &Arm64ArgTypes, |
| 103 | SmallVectorImpl<Type *> &X64ArgTypes, |
| 104 | SmallVectorImpl<ThunkArgTranslation> &ArgTranslations, |
| 105 | bool HasSretPtr); |
| 106 | ThunkArgInfo canonicalizeThunkType(Type *T, Align Alignment, bool Ret, |
| 107 | uint64_t ArgSizeBytes, raw_ostream &Out); |
| 108 | }; |
| 109 | |
| 110 | } // end anonymous namespace |
| 111 | |
| 112 | void AArch64Arm64ECCallLowering::getThunkType( |
| 113 | FunctionType *FT, AttributeList AttrList, Arm64ECThunkType TT, |
| 114 | raw_ostream &Out, FunctionType *&Arm64Ty, FunctionType *&X64Ty, |
| 115 | SmallVector<ThunkArgTranslation> &ArgTranslations) { |
| 116 | Out << (TT == Arm64ECThunkType::Entry ? "$ientry_thunk$cdecl$" |
| 117 | : "$iexit_thunk$cdecl$" ); |
| 118 | |
| 119 | Type *Arm64RetTy; |
| 120 | Type *X64RetTy; |
| 121 | |
| 122 | SmallVector<Type *> Arm64ArgTypes; |
| 123 | SmallVector<Type *> X64ArgTypes; |
| 124 | |
| 125 | // The first argument to a thunk is the called function, stored in x9. |
| 126 | // For exit thunks, we pass the called function down to the emulator; |
| 127 | // for entry/guest exit thunks, we just call the Arm64 function directly. |
| 128 | if (TT == Arm64ECThunkType::Exit) |
| 129 | Arm64ArgTypes.push_back(Elt: PtrTy); |
| 130 | X64ArgTypes.push_back(Elt: PtrTy); |
| 131 | |
| 132 | bool HasSretPtr = false; |
| 133 | getThunkRetType(FT, AttrList, Out, Arm64RetTy, X64RetTy, Arm64ArgTypes, |
| 134 | X64ArgTypes, ArgTranslations, HasSretPtr); |
| 135 | |
| 136 | getThunkArgTypes(FT, AttrList, TT, Out, Arm64ArgTypes, X64ArgTypes, |
| 137 | ArgTranslations, HasSretPtr); |
| 138 | |
| 139 | Arm64Ty = FunctionType::get(Result: Arm64RetTy, Params: Arm64ArgTypes, isVarArg: false); |
| 140 | |
| 141 | X64Ty = FunctionType::get(Result: X64RetTy, Params: X64ArgTypes, isVarArg: false); |
| 142 | } |
| 143 | |
| 144 | void AArch64Arm64ECCallLowering::getThunkArgTypes( |
| 145 | FunctionType *FT, AttributeList AttrList, Arm64ECThunkType TT, |
| 146 | raw_ostream &Out, SmallVectorImpl<Type *> &Arm64ArgTypes, |
| 147 | SmallVectorImpl<Type *> &X64ArgTypes, |
| 148 | SmallVectorImpl<ThunkArgTranslation> &ArgTranslations, bool HasSretPtr) { |
| 149 | |
| 150 | Out << "$" ; |
| 151 | if (FT->isVarArg()) { |
| 152 | // We treat the variadic function's thunk as a normal function |
| 153 | // with the following type on the ARM side: |
| 154 | // rettype exitthunk( |
| 155 | // ptr x9, ptr x0, i64 x1, i64 x2, i64 x3, ptr x4, i64 x5) |
| 156 | // |
| 157 | // that can coverage all types of variadic function. |
| 158 | // x9 is similar to normal exit thunk, store the called function. |
| 159 | // x0-x3 is the arguments be stored in registers. |
| 160 | // x4 is the address of the arguments on the stack. |
| 161 | // x5 is the size of the arguments on the stack. |
| 162 | // |
| 163 | // On the x64 side, it's the same except that x5 isn't set. |
| 164 | // |
| 165 | // If both the ARM and X64 sides are sret, there are only three |
| 166 | // arguments in registers. |
| 167 | // |
| 168 | // If the X64 side is sret, but the ARM side isn't, we pass an extra value |
| 169 | // to/from the X64 side, and let SelectionDAG transform it into a memory |
| 170 | // location. |
| 171 | Out << "varargs" ; |
| 172 | |
| 173 | // x0-x3 |
| 174 | for (int i = HasSretPtr ? 1 : 0; i < 4; i++) { |
| 175 | Arm64ArgTypes.push_back(Elt: I64Ty); |
| 176 | X64ArgTypes.push_back(Elt: I64Ty); |
| 177 | ArgTranslations.push_back(Elt: ThunkArgTranslation::Direct); |
| 178 | } |
| 179 | |
| 180 | // x4 |
| 181 | Arm64ArgTypes.push_back(Elt: PtrTy); |
| 182 | X64ArgTypes.push_back(Elt: PtrTy); |
| 183 | ArgTranslations.push_back(Elt: ThunkArgTranslation::Direct); |
| 184 | // x5 |
| 185 | Arm64ArgTypes.push_back(Elt: I64Ty); |
| 186 | if (TT != Arm64ECThunkType::Entry) { |
| 187 | // FIXME: x5 isn't actually used by the x64 side; revisit once we |
| 188 | // have proper isel for varargs |
| 189 | X64ArgTypes.push_back(Elt: I64Ty); |
| 190 | ArgTranslations.push_back(Elt: ThunkArgTranslation::Direct); |
| 191 | } |
| 192 | return; |
| 193 | } |
| 194 | |
| 195 | unsigned I = 0; |
| 196 | if (HasSretPtr) |
| 197 | I++; |
| 198 | |
| 199 | if (I == FT->getNumParams()) { |
| 200 | Out << "v" ; |
| 201 | return; |
| 202 | } |
| 203 | |
| 204 | for (unsigned E = FT->getNumParams(); I != E; ++I) { |
| 205 | #if 0 |
| 206 | // FIXME: Need more information about argument size; see |
| 207 | // https://reviews.llvm.org/D132926 |
| 208 | uint64_t ArgSizeBytes = AttrList.getParamArm64ECArgSizeBytes(I); |
| 209 | Align ParamAlign = AttrList.getParamAlignment(I).valueOrOne(); |
| 210 | #else |
| 211 | uint64_t ArgSizeBytes = 0; |
| 212 | Align ParamAlign = Align(); |
| 213 | #endif |
| 214 | auto [Arm64Ty, X64Ty, ArgTranslation] = |
| 215 | canonicalizeThunkType(T: FT->getParamType(i: I), Alignment: ParamAlign, |
| 216 | /*Ret*/ false, ArgSizeBytes, Out); |
| 217 | Arm64ArgTypes.push_back(Elt: Arm64Ty); |
| 218 | X64ArgTypes.push_back(Elt: X64Ty); |
| 219 | ArgTranslations.push_back(Elt: ArgTranslation); |
| 220 | } |
| 221 | } |
| 222 | |
| 223 | void AArch64Arm64ECCallLowering::getThunkRetType( |
| 224 | FunctionType *FT, AttributeList AttrList, raw_ostream &Out, |
| 225 | Type *&Arm64RetTy, Type *&X64RetTy, SmallVectorImpl<Type *> &Arm64ArgTypes, |
| 226 | SmallVectorImpl<Type *> &X64ArgTypes, |
| 227 | SmallVector<ThunkArgTranslation> &ArgTranslations, bool &HasSretPtr) { |
| 228 | Type *T = FT->getReturnType(); |
| 229 | #if 0 |
| 230 | // FIXME: Need more information about argument size; see |
| 231 | // https://reviews.llvm.org/D132926 |
| 232 | uint64_t ArgSizeBytes = AttrList.getRetArm64ECArgSizeBytes(); |
| 233 | #else |
| 234 | int64_t ArgSizeBytes = 0; |
| 235 | #endif |
| 236 | if (T->isVoidTy()) { |
| 237 | if (FT->getNumParams()) { |
| 238 | Attribute SRetAttr0 = AttrList.getParamAttr(ArgNo: 0, Kind: Attribute::StructRet); |
| 239 | Attribute InRegAttr0 = AttrList.getParamAttr(ArgNo: 0, Kind: Attribute::InReg); |
| 240 | Attribute SRetAttr1, InRegAttr1; |
| 241 | if (FT->getNumParams() > 1) { |
| 242 | // Also check the second parameter (for class methods, the first |
| 243 | // parameter is "this", and the second parameter is the sret pointer.) |
| 244 | // It doesn't matter which one is sret. |
| 245 | SRetAttr1 = AttrList.getParamAttr(ArgNo: 1, Kind: Attribute::StructRet); |
| 246 | InRegAttr1 = AttrList.getParamAttr(ArgNo: 1, Kind: Attribute::InReg); |
| 247 | } |
| 248 | if ((SRetAttr0.isValid() && InRegAttr0.isValid()) || |
| 249 | (SRetAttr1.isValid() && InRegAttr1.isValid())) { |
| 250 | // sret+inreg indicates a call that returns a C++ class value. This is |
| 251 | // actually equivalent to just passing and returning a void* pointer |
| 252 | // as the first or second argument. Translate it that way, instead of |
| 253 | // trying to model "inreg" in the thunk's calling convention; this |
| 254 | // simplfies the rest of the code, and matches MSVC mangling. |
| 255 | Out << "i8" ; |
| 256 | Arm64RetTy = I64Ty; |
| 257 | X64RetTy = I64Ty; |
| 258 | return; |
| 259 | } |
| 260 | if (SRetAttr0.isValid()) { |
| 261 | // FIXME: Sanity-check the sret type; if it's an integer or pointer, |
| 262 | // we'll get screwy mangling/codegen. |
| 263 | // FIXME: For large struct types, mangle as an integer argument and |
| 264 | // integer return, so we can reuse more thunks, instead of "m" syntax. |
| 265 | // (MSVC mangles this case as an integer return with no argument, but |
| 266 | // that's a miscompile.) |
| 267 | Type *SRetType = SRetAttr0.getValueAsType(); |
| 268 | Align SRetAlign = AttrList.getParamAlignment(ArgNo: 0).valueOrOne(); |
| 269 | canonicalizeThunkType(T: SRetType, Alignment: SRetAlign, /*Ret*/ true, ArgSizeBytes, |
| 270 | Out); |
| 271 | Arm64RetTy = VoidTy; |
| 272 | X64RetTy = VoidTy; |
| 273 | Arm64ArgTypes.push_back(Elt: FT->getParamType(i: 0)); |
| 274 | X64ArgTypes.push_back(Elt: FT->getParamType(i: 0)); |
| 275 | ArgTranslations.push_back(Elt: ThunkArgTranslation::Direct); |
| 276 | HasSretPtr = true; |
| 277 | return; |
| 278 | } |
| 279 | } |
| 280 | |
| 281 | Out << "v" ; |
| 282 | Arm64RetTy = VoidTy; |
| 283 | X64RetTy = VoidTy; |
| 284 | return; |
| 285 | } |
| 286 | |
| 287 | auto info = |
| 288 | canonicalizeThunkType(T, Alignment: Align(), /*Ret*/ true, ArgSizeBytes, Out); |
| 289 | Arm64RetTy = info.Arm64Ty; |
| 290 | X64RetTy = info.X64Ty; |
| 291 | if (X64RetTy->isPointerTy()) { |
| 292 | // If the X64 type is canonicalized to a pointer, that means it's |
| 293 | // passed/returned indirectly. For a return value, that means it's an |
| 294 | // sret pointer. |
| 295 | X64ArgTypes.push_back(Elt: X64RetTy); |
| 296 | X64RetTy = VoidTy; |
| 297 | } |
| 298 | } |
| 299 | |
| 300 | ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType( |
| 301 | Type *T, Align Alignment, bool Ret, uint64_t ArgSizeBytes, |
| 302 | raw_ostream &Out) { |
| 303 | |
| 304 | auto direct = [](Type *T) { |
| 305 | return ThunkArgInfo{.Arm64Ty: T, .X64Ty: T, .Translation: ThunkArgTranslation::Direct}; |
| 306 | }; |
| 307 | |
| 308 | auto bitcast = [this](Type *Arm64Ty, uint64_t SizeInBytes) { |
| 309 | return ThunkArgInfo{.Arm64Ty: Arm64Ty, |
| 310 | .X64Ty: llvm::Type::getIntNTy(C&: M->getContext(), N: SizeInBytes * 8), |
| 311 | .Translation: ThunkArgTranslation::Bitcast}; |
| 312 | }; |
| 313 | |
| 314 | auto pointerIndirection = [this](Type *Arm64Ty) { |
| 315 | return ThunkArgInfo{.Arm64Ty: Arm64Ty, .X64Ty: PtrTy, |
| 316 | .Translation: ThunkArgTranslation::PointerIndirection}; |
| 317 | }; |
| 318 | |
| 319 | if (T->isFloatTy()) { |
| 320 | Out << "f" ; |
| 321 | return direct(T); |
| 322 | } |
| 323 | |
| 324 | if (T->isDoubleTy()) { |
| 325 | Out << "d" ; |
| 326 | return direct(T); |
| 327 | } |
| 328 | |
| 329 | if (T->isFloatingPointTy()) { |
| 330 | report_fatal_error( |
| 331 | reason: "Only 32 and 64 bit floating points are supported for ARM64EC thunks" ); |
| 332 | } |
| 333 | |
| 334 | auto &DL = M->getDataLayout(); |
| 335 | |
| 336 | if (auto *StructTy = dyn_cast<StructType>(Val: T)) |
| 337 | if (StructTy->getNumElements() == 1) |
| 338 | T = StructTy->getElementType(N: 0); |
| 339 | |
| 340 | if (T->isArrayTy()) { |
| 341 | Type *ElementTy = T->getArrayElementType(); |
| 342 | uint64_t ElementCnt = T->getArrayNumElements(); |
| 343 | uint64_t ElementSizePerBytes = DL.getTypeSizeInBits(Ty: ElementTy) / 8; |
| 344 | uint64_t TotalSizeBytes = ElementCnt * ElementSizePerBytes; |
| 345 | if (ElementTy->isFloatTy() || ElementTy->isDoubleTy()) { |
| 346 | Out << (ElementTy->isFloatTy() ? "F" : "D" ) << TotalSizeBytes; |
| 347 | if (Alignment.value() >= 16 && !Ret) |
| 348 | Out << "a" << Alignment.value(); |
| 349 | if (TotalSizeBytes <= 8) { |
| 350 | // Arm64 returns small structs of float/double in float registers; |
| 351 | // X64 uses RAX. |
| 352 | return bitcast(T, TotalSizeBytes); |
| 353 | } else { |
| 354 | // Struct is passed directly on Arm64, but indirectly on X64. |
| 355 | return pointerIndirection(T); |
| 356 | } |
| 357 | } else if (T->isFloatingPointTy()) { |
| 358 | report_fatal_error(reason: "Only 32 and 64 bit floating points are supported for " |
| 359 | "ARM64EC thunks" ); |
| 360 | } |
| 361 | } |
| 362 | |
| 363 | if ((T->isIntegerTy() || T->isPointerTy()) && DL.getTypeSizeInBits(Ty: T) <= 64) { |
| 364 | Out << "i8" ; |
| 365 | return direct(I64Ty); |
| 366 | } |
| 367 | |
| 368 | unsigned TypeSize = ArgSizeBytes; |
| 369 | if (TypeSize == 0) |
| 370 | TypeSize = DL.getTypeSizeInBits(Ty: T) / 8; |
| 371 | Out << "m" ; |
| 372 | if (TypeSize != 4) |
| 373 | Out << TypeSize; |
| 374 | if (Alignment.value() >= 16 && !Ret) |
| 375 | Out << "a" << Alignment.value(); |
| 376 | // FIXME: Try to canonicalize Arm64Ty more thoroughly? |
| 377 | if (TypeSize == 1 || TypeSize == 2 || TypeSize == 4 || TypeSize == 8) { |
| 378 | // Pass directly in an integer register |
| 379 | return bitcast(T, TypeSize); |
| 380 | } else { |
| 381 | // Passed directly on Arm64, but indirectly on X64. |
| 382 | return pointerIndirection(T); |
| 383 | } |
| 384 | } |
| 385 | |
| 386 | // This function builds the "exit thunk", a function which translates |
| 387 | // arguments and return values when calling x64 code from AArch64 code. |
| 388 | Function *AArch64Arm64ECCallLowering::buildExitThunk(FunctionType *FT, |
| 389 | AttributeList Attrs) { |
| 390 | SmallString<256> ExitThunkName; |
| 391 | llvm::raw_svector_ostream ExitThunkStream(ExitThunkName); |
| 392 | FunctionType *Arm64Ty, *X64Ty; |
| 393 | SmallVector<ThunkArgTranslation> ArgTranslations; |
| 394 | getThunkType(FT, AttrList: Attrs, TT: Arm64ECThunkType::Exit, Out&: ExitThunkStream, Arm64Ty, |
| 395 | X64Ty, ArgTranslations); |
| 396 | if (Function *F = M->getFunction(Name: ExitThunkName)) |
| 397 | return F; |
| 398 | |
| 399 | Function *F = Function::Create(Ty: Arm64Ty, Linkage: GlobalValue::LinkOnceODRLinkage, AddrSpace: 0, |
| 400 | N: ExitThunkName, M); |
| 401 | F->setCallingConv(CallingConv::ARM64EC_Thunk_Native); |
| 402 | F->setSection(".wowthk$aa" ); |
| 403 | F->setComdat(M->getOrInsertComdat(Name: ExitThunkName)); |
| 404 | // Copy MSVC, and always set up a frame pointer. (Maybe this isn't necessary.) |
| 405 | F->addFnAttr(Kind: "frame-pointer" , Val: "all" ); |
| 406 | // Only copy sret from the first argument. For C++ instance methods, clang can |
| 407 | // stick an sret marking on a later argument, but it doesn't actually affect |
| 408 | // the ABI, so we can omit it. This avoids triggering a verifier assertion. |
| 409 | if (FT->getNumParams()) { |
| 410 | auto SRet = Attrs.getParamAttr(ArgNo: 0, Kind: Attribute::StructRet); |
| 411 | auto InReg = Attrs.getParamAttr(ArgNo: 0, Kind: Attribute::InReg); |
| 412 | if (SRet.isValid() && !InReg.isValid()) |
| 413 | F->addParamAttr(ArgNo: 1, Attr: SRet); |
| 414 | } |
| 415 | // FIXME: Copy anything other than sret? Shouldn't be necessary for normal |
| 416 | // C ABI, but might show up in other cases. |
| 417 | BasicBlock *BB = BasicBlock::Create(Context&: M->getContext(), Name: "" , Parent: F); |
| 418 | IRBuilder<> IRB(BB); |
| 419 | Value *CalleePtr = |
| 420 | M->getOrInsertGlobal(Name: "__os_arm64x_dispatch_call_no_redirect" , Ty: PtrTy); |
| 421 | Value *Callee = IRB.CreateLoad(Ty: PtrTy, Ptr: CalleePtr); |
| 422 | auto &DL = M->getDataLayout(); |
| 423 | SmallVector<Value *> Args; |
| 424 | |
| 425 | // Pass the called function in x9. |
| 426 | auto X64TyOffset = 1; |
| 427 | Args.push_back(Elt: F->arg_begin()); |
| 428 | |
| 429 | Type *RetTy = Arm64Ty->getReturnType(); |
| 430 | if (RetTy != X64Ty->getReturnType()) { |
| 431 | // If the return type is an array or struct, translate it. Values of size |
| 432 | // 8 or less go into RAX; bigger values go into memory, and we pass a |
| 433 | // pointer. |
| 434 | if (DL.getTypeStoreSize(Ty: RetTy) > 8) { |
| 435 | Args.push_back(Elt: IRB.CreateAlloca(Ty: RetTy)); |
| 436 | X64TyOffset++; |
| 437 | } |
| 438 | } |
| 439 | |
| 440 | for (auto [Arg, X64ArgType, ArgTranslation] : llvm::zip_equal( |
| 441 | t: make_range(x: F->arg_begin() + 1, y: F->arg_end()), |
| 442 | u: make_range(x: X64Ty->param_begin() + X64TyOffset, y: X64Ty->param_end()), |
| 443 | args&: ArgTranslations)) { |
| 444 | // Translate arguments from AArch64 calling convention to x86 calling |
| 445 | // convention. |
| 446 | // |
| 447 | // For simple types, we don't need to do any translation: they're |
| 448 | // represented the same way. (Implicit sign extension is not part of |
| 449 | // either convention.) |
| 450 | // |
| 451 | // The big thing we have to worry about is struct types... but |
| 452 | // fortunately AArch64 clang is pretty friendly here: the cases that need |
| 453 | // translation are always passed as a struct or array. (If we run into |
| 454 | // some cases where this doesn't work, we can teach clang to mark it up |
| 455 | // with an attribute.) |
| 456 | // |
| 457 | // The first argument is the called function, stored in x9. |
| 458 | if (ArgTranslation != ThunkArgTranslation::Direct) { |
| 459 | Value *Mem = IRB.CreateAlloca(Ty: Arg.getType()); |
| 460 | IRB.CreateStore(Val: &Arg, Ptr: Mem); |
| 461 | if (ArgTranslation == ThunkArgTranslation::Bitcast) { |
| 462 | Type *IntTy = IRB.getIntNTy(N: DL.getTypeStoreSizeInBits(Ty: Arg.getType())); |
| 463 | Args.push_back(Elt: IRB.CreateLoad(Ty: IntTy, Ptr: Mem)); |
| 464 | } else { |
| 465 | assert(ArgTranslation == ThunkArgTranslation::PointerIndirection); |
| 466 | Args.push_back(Elt: Mem); |
| 467 | } |
| 468 | } else { |
| 469 | Args.push_back(Elt: &Arg); |
| 470 | } |
| 471 | assert(Args.back()->getType() == X64ArgType); |
| 472 | } |
| 473 | // FIXME: Transfer necessary attributes? sret? anything else? |
| 474 | |
| 475 | CallInst *Call = IRB.CreateCall(FTy: X64Ty, Callee, Args); |
| 476 | Call->setCallingConv(CallingConv::ARM64EC_Thunk_X64); |
| 477 | |
| 478 | Value *RetVal = Call; |
| 479 | if (RetTy != X64Ty->getReturnType()) { |
| 480 | // If we rewrote the return type earlier, convert the return value to |
| 481 | // the proper type. |
| 482 | if (DL.getTypeStoreSize(Ty: RetTy) > 8) { |
| 483 | RetVal = IRB.CreateLoad(Ty: RetTy, Ptr: Args[1]); |
| 484 | } else { |
| 485 | Value *CastAlloca = IRB.CreateAlloca(Ty: RetTy); |
| 486 | IRB.CreateStore(Val: Call, Ptr: CastAlloca); |
| 487 | RetVal = IRB.CreateLoad(Ty: RetTy, Ptr: CastAlloca); |
| 488 | } |
| 489 | } |
| 490 | |
| 491 | if (RetTy->isVoidTy()) |
| 492 | IRB.CreateRetVoid(); |
| 493 | else |
| 494 | IRB.CreateRet(V: RetVal); |
| 495 | return F; |
| 496 | } |
| 497 | |
| 498 | // This function builds the "entry thunk", a function which translates |
| 499 | // arguments and return values when calling AArch64 code from x64 code. |
| 500 | Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) { |
| 501 | SmallString<256> EntryThunkName; |
| 502 | llvm::raw_svector_ostream EntryThunkStream(EntryThunkName); |
| 503 | FunctionType *Arm64Ty, *X64Ty; |
| 504 | SmallVector<ThunkArgTranslation> ArgTranslations; |
| 505 | getThunkType(FT: F->getFunctionType(), AttrList: F->getAttributes(), |
| 506 | TT: Arm64ECThunkType::Entry, Out&: EntryThunkStream, Arm64Ty, X64Ty, |
| 507 | ArgTranslations); |
| 508 | if (Function *F = M->getFunction(Name: EntryThunkName)) |
| 509 | return F; |
| 510 | |
| 511 | Function *Thunk = Function::Create(Ty: X64Ty, Linkage: GlobalValue::LinkOnceODRLinkage, AddrSpace: 0, |
| 512 | N: EntryThunkName, M); |
| 513 | Thunk->setCallingConv(CallingConv::ARM64EC_Thunk_X64); |
| 514 | Thunk->setSection(".wowthk$aa" ); |
| 515 | Thunk->setComdat(M->getOrInsertComdat(Name: EntryThunkName)); |
| 516 | // Copy MSVC, and always set up a frame pointer. (Maybe this isn't necessary.) |
| 517 | Thunk->addFnAttr(Kind: "frame-pointer" , Val: "all" ); |
| 518 | |
| 519 | BasicBlock *BB = BasicBlock::Create(Context&: M->getContext(), Name: "" , Parent: Thunk); |
| 520 | IRBuilder<> IRB(BB); |
| 521 | |
| 522 | Type *RetTy = Arm64Ty->getReturnType(); |
| 523 | Type *X64RetType = X64Ty->getReturnType(); |
| 524 | |
| 525 | bool TransformDirectToSRet = X64RetType->isVoidTy() && !RetTy->isVoidTy(); |
| 526 | unsigned ThunkArgOffset = TransformDirectToSRet ? 2 : 1; |
| 527 | unsigned PassthroughArgSize = |
| 528 | (F->isVarArg() ? 5 : Thunk->arg_size()) - ThunkArgOffset; |
| 529 | assert(ArgTranslations.size() == (F->isVarArg() ? 5 : PassthroughArgSize)); |
| 530 | |
| 531 | // Translate arguments to call. |
| 532 | SmallVector<Value *> Args; |
| 533 | for (unsigned i = 0; i != PassthroughArgSize; ++i) { |
| 534 | Value *Arg = Thunk->getArg(i: i + ThunkArgOffset); |
| 535 | Type *ArgTy = Arm64Ty->getParamType(i); |
| 536 | ThunkArgTranslation ArgTranslation = ArgTranslations[i]; |
| 537 | if (ArgTranslation != ThunkArgTranslation::Direct) { |
| 538 | // Translate array/struct arguments to the expected type. |
| 539 | if (ArgTranslation == ThunkArgTranslation::Bitcast) { |
| 540 | Value *CastAlloca = IRB.CreateAlloca(Ty: ArgTy); |
| 541 | IRB.CreateStore(Val: Arg, Ptr: CastAlloca); |
| 542 | Arg = IRB.CreateLoad(Ty: ArgTy, Ptr: CastAlloca); |
| 543 | } else { |
| 544 | assert(ArgTranslation == ThunkArgTranslation::PointerIndirection); |
| 545 | Arg = IRB.CreateLoad(Ty: ArgTy, Ptr: Arg); |
| 546 | } |
| 547 | } |
| 548 | assert(Arg->getType() == ArgTy); |
| 549 | Args.push_back(Elt: Arg); |
| 550 | } |
| 551 | |
| 552 | if (F->isVarArg()) { |
| 553 | // The 5th argument to variadic entry thunks is used to model the x64 sp |
| 554 | // which is passed to the thunk in x4, this can be passed to the callee as |
| 555 | // the variadic argument start address after skipping over the 32 byte |
| 556 | // shadow store. |
| 557 | |
| 558 | // The EC thunk CC will assign any argument marked as InReg to x4. |
| 559 | Thunk->addParamAttr(ArgNo: 5, Kind: Attribute::InReg); |
| 560 | Value *Arg = Thunk->getArg(i: 5); |
| 561 | Arg = IRB.CreatePtrAdd(Ptr: Arg, Offset: IRB.getInt64(C: 0x20)); |
| 562 | Args.push_back(Elt: Arg); |
| 563 | |
| 564 | // Pass in a zero variadic argument size (in x5). |
| 565 | Args.push_back(Elt: IRB.getInt64(C: 0)); |
| 566 | } |
| 567 | |
| 568 | // Call the function passed to the thunk. |
| 569 | Value *Callee = Thunk->getArg(i: 0); |
| 570 | CallInst *Call = IRB.CreateCall(FTy: Arm64Ty, Callee, Args); |
| 571 | |
| 572 | auto SRetAttr = F->getAttributes().getParamAttr(ArgNo: 0, Kind: Attribute::StructRet); |
| 573 | auto InRegAttr = F->getAttributes().getParamAttr(ArgNo: 0, Kind: Attribute::InReg); |
| 574 | if (SRetAttr.isValid() && !InRegAttr.isValid()) { |
| 575 | Thunk->addParamAttr(ArgNo: 1, Attr: SRetAttr); |
| 576 | Call->addParamAttr(ArgNo: 0, Attr: SRetAttr); |
| 577 | } |
| 578 | |
| 579 | Value *RetVal = Call; |
| 580 | if (TransformDirectToSRet) { |
| 581 | IRB.CreateStore(Val: RetVal, Ptr: Thunk->getArg(i: 1)); |
| 582 | } else if (X64RetType != RetTy) { |
| 583 | Value *CastAlloca = IRB.CreateAlloca(Ty: X64RetType); |
| 584 | IRB.CreateStore(Val: Call, Ptr: CastAlloca); |
| 585 | RetVal = IRB.CreateLoad(Ty: X64RetType, Ptr: CastAlloca); |
| 586 | } |
| 587 | |
| 588 | // Return to the caller. Note that the isel has code to translate this |
| 589 | // "ret" to a tail call to __os_arm64x_dispatch_ret. (Alternatively, we |
| 590 | // could emit a tail call here, but that would require a dedicated calling |
| 591 | // convention, which seems more complicated overall.) |
| 592 | if (X64RetType->isVoidTy()) |
| 593 | IRB.CreateRetVoid(); |
| 594 | else |
| 595 | IRB.CreateRet(V: RetVal); |
| 596 | |
| 597 | return Thunk; |
| 598 | } |
| 599 | |
| 600 | // Builds the "guest exit thunk", a helper to call a function which may or may |
| 601 | // not be an exit thunk. (We optimistically assume non-dllimport function |
| 602 | // declarations refer to functions defined in AArch64 code; if the linker |
| 603 | // can't prove that, we use this routine instead.) |
| 604 | Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) { |
| 605 | llvm::raw_null_ostream NullThunkName; |
| 606 | FunctionType *Arm64Ty, *X64Ty; |
| 607 | SmallVector<ThunkArgTranslation> ArgTranslations; |
| 608 | getThunkType(FT: F->getFunctionType(), AttrList: F->getAttributes(), |
| 609 | TT: Arm64ECThunkType::GuestExit, Out&: NullThunkName, Arm64Ty, X64Ty, |
| 610 | ArgTranslations); |
| 611 | auto MangledName = getArm64ECMangledFunctionName(Name: F->getName().str()); |
| 612 | assert(MangledName && "Can't guest exit to function that's already native" ); |
| 613 | std::string ThunkName = *MangledName; |
| 614 | if (ThunkName[0] == '?' && ThunkName.find(s: "@" ) != std::string::npos) { |
| 615 | ThunkName.insert(pos: ThunkName.find(s: "@" ), s: "$exit_thunk" ); |
| 616 | } else { |
| 617 | ThunkName.append(s: "$exit_thunk" ); |
| 618 | } |
| 619 | Function *GuestExit = |
| 620 | Function::Create(Ty: Arm64Ty, Linkage: GlobalValue::WeakODRLinkage, AddrSpace: 0, N: ThunkName, M); |
| 621 | GuestExit->setComdat(M->getOrInsertComdat(Name: ThunkName)); |
| 622 | GuestExit->setSection(".wowthk$aa" ); |
| 623 | GuestExit->addMetadata( |
| 624 | Kind: "arm64ec_unmangled_name" , |
| 625 | MD&: *MDNode::get(Context&: M->getContext(), |
| 626 | MDs: MDString::get(Context&: M->getContext(), Str: F->getName()))); |
| 627 | GuestExit->setMetadata( |
| 628 | Kind: "arm64ec_ecmangled_name" , |
| 629 | Node: MDNode::get(Context&: M->getContext(), |
| 630 | MDs: MDString::get(Context&: M->getContext(), Str: *MangledName))); |
| 631 | F->setMetadata(Kind: "arm64ec_hasguestexit" , Node: MDNode::get(Context&: M->getContext(), MDs: {})); |
| 632 | BasicBlock *BB = BasicBlock::Create(Context&: M->getContext(), Name: "" , Parent: GuestExit); |
| 633 | IRBuilder<> B(BB); |
| 634 | |
| 635 | // Load the global symbol as a pointer to the check function. |
| 636 | Value *GuardFn; |
| 637 | if (cfguard_module_flag == 2 && !F->hasFnAttribute(Kind: "guard_nocf" )) |
| 638 | GuardFn = GuardFnCFGlobal; |
| 639 | else |
| 640 | GuardFn = GuardFnGlobal; |
| 641 | LoadInst *GuardCheckLoad = B.CreateLoad(Ty: PtrTy, Ptr: GuardFn); |
| 642 | |
| 643 | // Create new call instruction. The CFGuard check should always be a call, |
| 644 | // even if the original CallBase is an Invoke or CallBr instruction. |
| 645 | Function *Thunk = buildExitThunk(FT: F->getFunctionType(), Attrs: F->getAttributes()); |
| 646 | CallInst *GuardCheck = B.CreateCall( |
| 647 | FTy: GuardFnType, Callee: GuardCheckLoad, Args: {F, Thunk}); |
| 648 | |
| 649 | // Ensure that the first argument is passed in the correct register. |
| 650 | GuardCheck->setCallingConv(CallingConv::CFGuard_Check); |
| 651 | |
| 652 | SmallVector<Value *> Args(llvm::make_pointer_range(Range: GuestExit->args())); |
| 653 | CallInst *Call = B.CreateCall(FTy: Arm64Ty, Callee: GuardCheck, Args); |
| 654 | Call->setTailCallKind(llvm::CallInst::TCK_MustTail); |
| 655 | |
| 656 | if (Call->getType()->isVoidTy()) |
| 657 | B.CreateRetVoid(); |
| 658 | else |
| 659 | B.CreateRet(V: Call); |
| 660 | |
| 661 | auto SRetAttr = F->getAttributes().getParamAttr(ArgNo: 0, Kind: Attribute::StructRet); |
| 662 | auto InRegAttr = F->getAttributes().getParamAttr(ArgNo: 0, Kind: Attribute::InReg); |
| 663 | if (SRetAttr.isValid() && !InRegAttr.isValid()) { |
| 664 | GuestExit->addParamAttr(ArgNo: 0, Attr: SRetAttr); |
| 665 | Call->addParamAttr(ArgNo: 0, Attr: SRetAttr); |
| 666 | } |
| 667 | |
| 668 | return GuestExit; |
| 669 | } |
| 670 | |
| 671 | Function * |
| 672 | AArch64Arm64ECCallLowering::buildPatchableThunk(GlobalAlias *UnmangledAlias, |
| 673 | GlobalAlias *MangledAlias) { |
| 674 | llvm::raw_null_ostream NullThunkName; |
| 675 | FunctionType *Arm64Ty, *X64Ty; |
| 676 | Function *F = cast<Function>(Val: MangledAlias->getAliasee()); |
| 677 | SmallVector<ThunkArgTranslation> ArgTranslations; |
| 678 | getThunkType(FT: F->getFunctionType(), AttrList: F->getAttributes(), |
| 679 | TT: Arm64ECThunkType::GuestExit, Out&: NullThunkName, Arm64Ty, X64Ty, |
| 680 | ArgTranslations); |
| 681 | std::string ThunkName(MangledAlias->getName()); |
| 682 | if (ThunkName[0] == '?' && ThunkName.find(s: "@" ) != std::string::npos) { |
| 683 | ThunkName.insert(pos: ThunkName.find(s: "@" ), s: "$hybpatch_thunk" ); |
| 684 | } else { |
| 685 | ThunkName.append(s: "$hybpatch_thunk" ); |
| 686 | } |
| 687 | |
| 688 | Function *GuestExit = |
| 689 | Function::Create(Ty: Arm64Ty, Linkage: GlobalValue::WeakODRLinkage, AddrSpace: 0, N: ThunkName, M); |
| 690 | GuestExit->setComdat(M->getOrInsertComdat(Name: ThunkName)); |
| 691 | GuestExit->setSection(".wowthk$aa" ); |
| 692 | BasicBlock *BB = BasicBlock::Create(Context&: M->getContext(), Name: "" , Parent: GuestExit); |
| 693 | IRBuilder<> B(BB); |
| 694 | |
| 695 | // Load the global symbol as a pointer to the check function. |
| 696 | LoadInst *DispatchLoad = B.CreateLoad(Ty: PtrTy, Ptr: DispatchFnGlobal); |
| 697 | |
| 698 | // Create new dispatch call instruction. |
| 699 | Function *ExitThunk = |
| 700 | buildExitThunk(FT: F->getFunctionType(), Attrs: F->getAttributes()); |
| 701 | CallInst *Dispatch = |
| 702 | B.CreateCall(FTy: DispatchFnType, Callee: DispatchLoad, |
| 703 | Args: {UnmangledAlias, ExitThunk, UnmangledAlias->getAliasee()}); |
| 704 | |
| 705 | // Ensure that the first arguments are passed in the correct registers. |
| 706 | Dispatch->setCallingConv(CallingConv::CFGuard_Check); |
| 707 | |
| 708 | SmallVector<Value *> Args(llvm::make_pointer_range(Range: GuestExit->args())); |
| 709 | CallInst *Call = B.CreateCall(FTy: Arm64Ty, Callee: Dispatch, Args); |
| 710 | Call->setTailCallKind(llvm::CallInst::TCK_MustTail); |
| 711 | |
| 712 | if (Call->getType()->isVoidTy()) |
| 713 | B.CreateRetVoid(); |
| 714 | else |
| 715 | B.CreateRet(V: Call); |
| 716 | |
| 717 | auto SRetAttr = F->getAttributes().getParamAttr(ArgNo: 0, Kind: Attribute::StructRet); |
| 718 | auto InRegAttr = F->getAttributes().getParamAttr(ArgNo: 0, Kind: Attribute::InReg); |
| 719 | if (SRetAttr.isValid() && !InRegAttr.isValid()) { |
| 720 | GuestExit->addParamAttr(ArgNo: 0, Attr: SRetAttr); |
| 721 | Call->addParamAttr(ArgNo: 0, Attr: SRetAttr); |
| 722 | } |
| 723 | |
| 724 | MangledAlias->setAliasee(GuestExit); |
| 725 | return GuestExit; |
| 726 | } |
| 727 | |
| 728 | // Lower an indirect call with inline code. |
| 729 | void AArch64Arm64ECCallLowering::lowerCall(CallBase *CB) { |
| 730 | assert(CB->getModule()->getTargetTriple().isOSWindows() && |
| 731 | "Only applicable for Windows targets" ); |
| 732 | |
| 733 | IRBuilder<> B(CB); |
| 734 | Value *CalledOperand = CB->getCalledOperand(); |
| 735 | |
| 736 | // If the indirect call is called within catchpad or cleanuppad, |
| 737 | // we need to copy "funclet" bundle of the call. |
| 738 | SmallVector<llvm::OperandBundleDef, 1> Bundles; |
| 739 | if (auto Bundle = CB->getOperandBundle(ID: LLVMContext::OB_funclet)) |
| 740 | Bundles.push_back(Elt: OperandBundleDef(*Bundle)); |
| 741 | |
| 742 | // Load the global symbol as a pointer to the check function. |
| 743 | Value *GuardFn; |
| 744 | if (cfguard_module_flag == 2 && !CB->hasFnAttr(Kind: "guard_nocf" )) |
| 745 | GuardFn = GuardFnCFGlobal; |
| 746 | else |
| 747 | GuardFn = GuardFnGlobal; |
| 748 | LoadInst *GuardCheckLoad = B.CreateLoad(Ty: PtrTy, Ptr: GuardFn); |
| 749 | |
| 750 | // Create new call instruction. The CFGuard check should always be a call, |
| 751 | // even if the original CallBase is an Invoke or CallBr instruction. |
| 752 | Function *Thunk = buildExitThunk(FT: CB->getFunctionType(), Attrs: CB->getAttributes()); |
| 753 | CallInst *GuardCheck = |
| 754 | B.CreateCall(FTy: GuardFnType, Callee: GuardCheckLoad, Args: {CalledOperand, Thunk}, |
| 755 | OpBundles: Bundles); |
| 756 | |
| 757 | // Ensure that the first argument is passed in the correct register. |
| 758 | GuardCheck->setCallingConv(CallingConv::CFGuard_Check); |
| 759 | |
| 760 | CB->setCalledOperand(GuardCheck); |
| 761 | } |
| 762 | |
| 763 | bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) { |
| 764 | if (!GenerateThunks) |
| 765 | return false; |
| 766 | |
| 767 | M = &Mod; |
| 768 | |
| 769 | // Check if this module has the cfguard flag and read its value. |
| 770 | if (auto *MD = |
| 771 | mdconst::extract_or_null<ConstantInt>(MD: M->getModuleFlag(Key: "cfguard" ))) |
| 772 | cfguard_module_flag = MD->getZExtValue(); |
| 773 | |
| 774 | PtrTy = PointerType::getUnqual(C&: M->getContext()); |
| 775 | I64Ty = Type::getInt64Ty(C&: M->getContext()); |
| 776 | VoidTy = Type::getVoidTy(C&: M->getContext()); |
| 777 | |
| 778 | GuardFnType = FunctionType::get(Result: PtrTy, Params: {PtrTy, PtrTy}, isVarArg: false); |
| 779 | DispatchFnType = FunctionType::get(Result: PtrTy, Params: {PtrTy, PtrTy, PtrTy}, isVarArg: false); |
| 780 | GuardFnCFGlobal = M->getOrInsertGlobal(Name: "__os_arm64x_check_icall_cfg" , Ty: PtrTy); |
| 781 | GuardFnGlobal = M->getOrInsertGlobal(Name: "__os_arm64x_check_icall" , Ty: PtrTy); |
| 782 | DispatchFnGlobal = M->getOrInsertGlobal(Name: "__os_arm64x_dispatch_call" , Ty: PtrTy); |
| 783 | |
| 784 | // Mangle names of function aliases and add the alias name to |
| 785 | // arm64ec_unmangled_name metadata to ensure a weak anti-dependency symbol is |
| 786 | // emitted for the alias as well. Do this early, before handling |
| 787 | // hybrid_patchable functions, to avoid mangling their aliases. |
| 788 | for (GlobalAlias &A : Mod.aliases()) { |
| 789 | auto F = dyn_cast_or_null<Function>(Val: A.getAliaseeObject()); |
| 790 | if (!F) |
| 791 | continue; |
| 792 | if (std::optional<std::string> MangledName = |
| 793 | getArm64ECMangledFunctionName(Name: A.getName().str())) { |
| 794 | F->addMetadata(Kind: "arm64ec_unmangled_name" , |
| 795 | MD&: *MDNode::get(Context&: M->getContext(), |
| 796 | MDs: MDString::get(Context&: M->getContext(), Str: A.getName()))); |
| 797 | A.setName(MangledName.value()); |
| 798 | } |
| 799 | } |
| 800 | |
| 801 | DenseMap<GlobalAlias *, GlobalAlias *> FnsMap; |
| 802 | SetVector<GlobalAlias *> PatchableFns; |
| 803 | |
| 804 | for (Function &F : Mod) { |
| 805 | if (F.hasPersonalityFn()) { |
| 806 | GlobalValue *PersFn = |
| 807 | cast<GlobalValue>(Val: F.getPersonalityFn()->stripPointerCasts()); |
| 808 | if (PersFn->getValueType() && PersFn->getValueType()->isFunctionTy()) { |
| 809 | if (std::optional<std::string> MangledName = |
| 810 | getArm64ECMangledFunctionName(Name: PersFn->getName().str())) { |
| 811 | PersFn->setName(MangledName.value()); |
| 812 | } |
| 813 | } |
| 814 | } |
| 815 | |
| 816 | if (!F.hasFnAttribute(Kind: Attribute::HybridPatchable) || F.isDeclaration() || |
| 817 | F.hasLocalLinkage() || |
| 818 | F.getName().ends_with(Suffix: HybridPatchableTargetSuffix)) |
| 819 | continue; |
| 820 | |
| 821 | // Rename hybrid patchable functions and change callers to use a global |
| 822 | // alias instead. |
| 823 | if (std::optional<std::string> MangledName = |
| 824 | getArm64ECMangledFunctionName(Name: F.getName().str())) { |
| 825 | std::string OrigName(F.getName()); |
| 826 | F.setName(MangledName.value() + HybridPatchableTargetSuffix); |
| 827 | |
| 828 | // The unmangled symbol is a weak alias to an undefined symbol with the |
| 829 | // "EXP+" prefix. This undefined symbol is resolved by the linker by |
| 830 | // creating an x86 thunk that jumps back to the actual EC target. Since we |
| 831 | // can't represent that in IR, we create an alias to the target instead. |
| 832 | // The "EXP+" symbol is set as metadata, which is then used by |
| 833 | // emitGlobalAlias to emit the right alias. |
| 834 | auto *A = |
| 835 | GlobalAlias::create(Linkage: GlobalValue::LinkOnceODRLinkage, Name: OrigName, Aliasee: &F); |
| 836 | auto *AM = GlobalAlias::create(Linkage: GlobalValue::LinkOnceODRLinkage, |
| 837 | Name: MangledName.value(), Aliasee: &F); |
| 838 | F.replaceUsesWithIf(New: AM, |
| 839 | ShouldReplace: [](Use &U) { return isa<GlobalAlias>(Val: U.getUser()); }); |
| 840 | F.replaceAllUsesWith(V: A); |
| 841 | F.setMetadata(Kind: "arm64ec_exp_name" , |
| 842 | Node: MDNode::get(Context&: M->getContext(), |
| 843 | MDs: MDString::get(Context&: M->getContext(), |
| 844 | Str: "EXP+" + MangledName.value()))); |
| 845 | A->setAliasee(&F); |
| 846 | AM->setAliasee(&F); |
| 847 | |
| 848 | if (F.hasDLLExportStorageClass()) { |
| 849 | A->setDLLStorageClass(GlobalValue::DLLExportStorageClass); |
| 850 | F.setDLLStorageClass(GlobalValue::DefaultStorageClass); |
| 851 | } |
| 852 | |
| 853 | FnsMap[A] = AM; |
| 854 | PatchableFns.insert(X: A); |
| 855 | } |
| 856 | } |
| 857 | |
| 858 | SetVector<GlobalValue *> DirectCalledFns; |
| 859 | for (Function &F : Mod) |
| 860 | if (!F.isDeclaration() && |
| 861 | F.getCallingConv() != CallingConv::ARM64EC_Thunk_Native && |
| 862 | F.getCallingConv() != CallingConv::ARM64EC_Thunk_X64) |
| 863 | processFunction(F, DirectCalledFns, FnsMap); |
| 864 | |
| 865 | struct ThunkInfo { |
| 866 | Constant *Src; |
| 867 | Constant *Dst; |
| 868 | Arm64ECThunkType Kind; |
| 869 | }; |
| 870 | SmallVector<ThunkInfo> ThunkMapping; |
| 871 | for (Function &F : Mod) { |
| 872 | if (!F.isDeclaration() && (!F.hasLocalLinkage() || F.hasAddressTaken()) && |
| 873 | F.getCallingConv() != CallingConv::ARM64EC_Thunk_Native && |
| 874 | F.getCallingConv() != CallingConv::ARM64EC_Thunk_X64) { |
| 875 | if (!F.hasComdat()) |
| 876 | F.setComdat(Mod.getOrInsertComdat(Name: F.getName())); |
| 877 | ThunkMapping.push_back( |
| 878 | Elt: {.Src: &F, .Dst: buildEntryThunk(F: &F), .Kind: Arm64ECThunkType::Entry}); |
| 879 | } |
| 880 | } |
| 881 | for (GlobalValue *O : DirectCalledFns) { |
| 882 | auto GA = dyn_cast<GlobalAlias>(Val: O); |
| 883 | auto F = dyn_cast<Function>(Val: GA ? GA->getAliasee() : O); |
| 884 | ThunkMapping.push_back( |
| 885 | Elt: {.Src: O, .Dst: buildExitThunk(FT: F->getFunctionType(), Attrs: F->getAttributes()), |
| 886 | .Kind: Arm64ECThunkType::Exit}); |
| 887 | if (!GA && !F->hasDLLImportStorageClass()) |
| 888 | ThunkMapping.push_back( |
| 889 | Elt: {.Src: buildGuestExitThunk(F), .Dst: F, .Kind: Arm64ECThunkType::GuestExit}); |
| 890 | } |
| 891 | for (GlobalAlias *A : PatchableFns) { |
| 892 | Function *Thunk = buildPatchableThunk(UnmangledAlias: A, MangledAlias: FnsMap[A]); |
| 893 | ThunkMapping.push_back(Elt: {.Src: Thunk, .Dst: A, .Kind: Arm64ECThunkType::GuestExit}); |
| 894 | } |
| 895 | |
| 896 | if (!ThunkMapping.empty()) { |
| 897 | SmallVector<Constant *> ThunkMappingArrayElems; |
| 898 | for (ThunkInfo &Thunk : ThunkMapping) { |
| 899 | ThunkMappingArrayElems.push_back(Elt: ConstantStruct::getAnon( |
| 900 | V: {Thunk.Src, Thunk.Dst, |
| 901 | ConstantInt::get(Context&: M->getContext(), V: APInt(32, uint8_t(Thunk.Kind)))})); |
| 902 | } |
| 903 | Constant *ThunkMappingArray = ConstantArray::get( |
| 904 | T: llvm::ArrayType::get(ElementType: ThunkMappingArrayElems[0]->getType(), |
| 905 | NumElements: ThunkMappingArrayElems.size()), |
| 906 | V: ThunkMappingArrayElems); |
| 907 | new GlobalVariable(Mod, ThunkMappingArray->getType(), /*isConstant*/ false, |
| 908 | GlobalValue::ExternalLinkage, ThunkMappingArray, |
| 909 | "llvm.arm64ec.symbolmap" ); |
| 910 | } |
| 911 | |
| 912 | return true; |
| 913 | } |
| 914 | |
| 915 | bool AArch64Arm64ECCallLowering::processFunction( |
| 916 | Function &F, SetVector<GlobalValue *> &DirectCalledFns, |
| 917 | DenseMap<GlobalAlias *, GlobalAlias *> &FnsMap) { |
| 918 | SmallVector<CallBase *, 8> IndirectCalls; |
| 919 | |
| 920 | // For ARM64EC targets, a function definition's name is mangled differently |
| 921 | // from the normal symbol. We currently have no representation of this sort |
| 922 | // of symbol in IR, so we change the name to the mangled name, then store |
| 923 | // the unmangled name as metadata. Later passes that need the unmangled |
| 924 | // name (emitting the definition) can grab it from the metadata. |
| 925 | // |
| 926 | // FIXME: Handle functions with weak linkage? |
| 927 | if (!F.hasLocalLinkage() || F.hasAddressTaken()) { |
| 928 | if (std::optional<std::string> MangledName = |
| 929 | getArm64ECMangledFunctionName(Name: F.getName().str())) { |
| 930 | F.addMetadata(Kind: "arm64ec_unmangled_name" , |
| 931 | MD&: *MDNode::get(Context&: M->getContext(), |
| 932 | MDs: MDString::get(Context&: M->getContext(), Str: F.getName()))); |
| 933 | if (F.hasComdat() && F.getComdat()->getName() == F.getName()) { |
| 934 | Comdat *MangledComdat = M->getOrInsertComdat(Name: MangledName.value()); |
| 935 | SmallVector<GlobalObject *> ComdatUsers = |
| 936 | to_vector(Range: F.getComdat()->getUsers()); |
| 937 | for (GlobalObject *User : ComdatUsers) |
| 938 | User->setComdat(MangledComdat); |
| 939 | } |
| 940 | F.setName(MangledName.value()); |
| 941 | } |
| 942 | } |
| 943 | |
| 944 | // Iterate over the instructions to find all indirect call/invoke/callbr |
| 945 | // instructions. Make a separate list of pointers to indirect |
| 946 | // call/invoke/callbr instructions because the original instructions will be |
| 947 | // deleted as the checks are added. |
| 948 | for (BasicBlock &BB : F) { |
| 949 | for (Instruction &I : BB) { |
| 950 | auto *CB = dyn_cast<CallBase>(Val: &I); |
| 951 | if (!CB || CB->getCallingConv() == CallingConv::ARM64EC_Thunk_X64 || |
| 952 | CB->isInlineAsm()) |
| 953 | continue; |
| 954 | |
| 955 | // We need to instrument any call that isn't directly calling an |
| 956 | // ARM64 function. |
| 957 | // |
| 958 | // FIXME: getCalledFunction() fails if there's a bitcast (e.g. |
| 959 | // unprototyped functions in C) |
| 960 | if (Function *F = CB->getCalledFunction()) { |
| 961 | if (!LowerDirectToIndirect || F->hasLocalLinkage() || |
| 962 | F->isIntrinsic() || !F->isDeclaration()) |
| 963 | continue; |
| 964 | |
| 965 | DirectCalledFns.insert(X: F); |
| 966 | continue; |
| 967 | } |
| 968 | |
| 969 | // Use mangled global alias for direct calls to patchable functions. |
| 970 | if (GlobalAlias *A = dyn_cast<GlobalAlias>(Val: CB->getCalledOperand())) { |
| 971 | auto I = FnsMap.find(Val: A); |
| 972 | if (I != FnsMap.end()) { |
| 973 | CB->setCalledOperand(I->second); |
| 974 | DirectCalledFns.insert(X: I->first); |
| 975 | continue; |
| 976 | } |
| 977 | } |
| 978 | |
| 979 | IndirectCalls.push_back(Elt: CB); |
| 980 | ++Arm64ECCallsLowered; |
| 981 | } |
| 982 | } |
| 983 | |
| 984 | if (IndirectCalls.empty()) |
| 985 | return false; |
| 986 | |
| 987 | for (CallBase *CB : IndirectCalls) |
| 988 | lowerCall(CB); |
| 989 | |
| 990 | return true; |
| 991 | } |
| 992 | |
| 993 | char AArch64Arm64ECCallLowering::ID = 0; |
| 994 | INITIALIZE_PASS(AArch64Arm64ECCallLowering, "Arm64ECCallLowering" , |
| 995 | "AArch64Arm64ECCallLowering" , false, false) |
| 996 | |
| 997 | ModulePass *llvm::createAArch64Arm64ECCallLoweringPass() { |
| 998 | return new AArch64Arm64ECCallLowering; |
| 999 | } |
| 1000 | |