| 1 | //===-- Target.cpp ----------------------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | #include "../Target.h" |
| 9 | |
| 10 | #include "../Error.h" |
| 11 | #include "../MmapUtils.h" |
| 12 | #include "../ParallelSnippetGenerator.h" |
| 13 | #include "../SerialSnippetGenerator.h" |
| 14 | #include "../SnippetGenerator.h" |
| 15 | #include "../SubprocessMemory.h" |
| 16 | #include "MCTargetDesc/X86BaseInfo.h" |
| 17 | #include "MCTargetDesc/X86MCTargetDesc.h" |
| 18 | #include "X86.h" |
| 19 | #include "X86Counter.h" |
| 20 | #include "X86RegisterInfo.h" |
| 21 | #include "llvm/ADT/Sequence.h" |
| 22 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 23 | #include "llvm/MC/MCInstBuilder.h" |
| 24 | #include "llvm/Support/Errc.h" |
| 25 | #include "llvm/Support/Error.h" |
| 26 | #include "llvm/Support/ErrorHandling.h" |
| 27 | #include "llvm/Support/FormatVariadic.h" |
| 28 | #include "llvm/TargetParser/Host.h" |
| 29 | |
| 30 | #include <memory> |
| 31 | #include <string> |
| 32 | #include <vector> |
| 33 | #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) |
| 34 | #include <immintrin.h> |
| 35 | #include <intrin.h> |
| 36 | #endif |
| 37 | #if defined(_MSC_VER) && defined(_M_X64) |
| 38 | #include <float.h> // For _clearfp in ~X86SavedState(). |
| 39 | #endif |
| 40 | |
| 41 | #ifdef __linux__ |
| 42 | #ifdef __x86_64__ |
| 43 | #include <asm/prctl.h> |
| 44 | #endif // __x86_64__ |
| 45 | #include <sys/mman.h> |
| 46 | #include <sys/syscall.h> |
| 47 | #include <unistd.h> |
| 48 | #ifdef HAVE_LIBPFM |
| 49 | #include <perfmon/perf_event.h> |
| 50 | #endif // HAVE_LIBPFM |
| 51 | #endif |
| 52 | |
| 53 | #define GET_AVAILABLE_OPCODE_CHECKER |
| 54 | #include "X86GenInstrInfo.inc" |
| 55 | |
| 56 | namespace llvm { |
| 57 | namespace exegesis { |
| 58 | |
| 59 | // If a positive value is specified, we are going to use the LBR in |
| 60 | // latency-mode. |
| 61 | // |
| 62 | // Note: |
| 63 | // - A small value is preferred, but too low a value could result in |
| 64 | // throttling. |
| 65 | // - A prime number is preferred to avoid always skipping certain blocks. |
| 66 | // |
| 67 | static cl::opt<unsigned> LbrSamplingPeriod( |
| 68 | "x86-lbr-sample-period" , |
| 69 | cl::desc("The sample period (nbranches/sample), used for LBR sampling" ), |
| 70 | cl::cat(BenchmarkOptions), cl::init(Val: 0)); |
| 71 | |
| 72 | static cl::opt<bool> |
| 73 | ("x86-disable-upper-sse-registers" , |
| 74 | cl::desc("Disable XMM8-XMM15 register usage" ), |
| 75 | cl::cat(BenchmarkOptions), cl::init(Val: false)); |
| 76 | |
| 77 | // FIXME: Validates that repetition-mode is loop if LBR is requested. |
| 78 | |
| 79 | // Returns a non-null reason if we cannot handle the memory references in this |
| 80 | // instruction. |
| 81 | static const char *isInvalidMemoryInstr(const Instruction &Instr) { |
| 82 | switch (Instr.Description.TSFlags & X86II::FormMask) { |
| 83 | default: |
| 84 | return "Unknown FormMask value" ; |
| 85 | // These have no memory access. |
| 86 | case X86II::Pseudo: |
| 87 | case X86II::RawFrm: |
| 88 | case X86II::AddCCFrm: |
| 89 | case X86II::PrefixByte: |
| 90 | case X86II::MRMDestReg: |
| 91 | case X86II::MRMSrcReg: |
| 92 | case X86II::MRMSrcReg4VOp3: |
| 93 | case X86II::MRMSrcRegOp4: |
| 94 | case X86II::MRMSrcRegCC: |
| 95 | case X86II::MRMXrCC: |
| 96 | case X86II::MRMr0: |
| 97 | case X86II::MRMXr: |
| 98 | case X86II::MRM0r: |
| 99 | case X86II::MRM1r: |
| 100 | case X86II::MRM2r: |
| 101 | case X86II::MRM3r: |
| 102 | case X86II::MRM4r: |
| 103 | case X86II::MRM5r: |
| 104 | case X86II::MRM6r: |
| 105 | case X86II::MRM7r: |
| 106 | case X86II::MRM0X: |
| 107 | case X86II::MRM1X: |
| 108 | case X86II::MRM2X: |
| 109 | case X86II::MRM3X: |
| 110 | case X86II::MRM4X: |
| 111 | case X86II::MRM5X: |
| 112 | case X86II::MRM6X: |
| 113 | case X86II::MRM7X: |
| 114 | case X86II::MRM_C0: |
| 115 | case X86II::MRM_C1: |
| 116 | case X86II::MRM_C2: |
| 117 | case X86II::MRM_C3: |
| 118 | case X86II::MRM_C4: |
| 119 | case X86II::MRM_C5: |
| 120 | case X86II::MRM_C6: |
| 121 | case X86II::MRM_C7: |
| 122 | case X86II::MRM_C8: |
| 123 | case X86II::MRM_C9: |
| 124 | case X86II::MRM_CA: |
| 125 | case X86II::MRM_CB: |
| 126 | case X86II::MRM_CC: |
| 127 | case X86II::MRM_CD: |
| 128 | case X86II::MRM_CE: |
| 129 | case X86II::MRM_CF: |
| 130 | case X86II::MRM_D0: |
| 131 | case X86II::MRM_D1: |
| 132 | case X86II::MRM_D2: |
| 133 | case X86II::MRM_D3: |
| 134 | case X86II::MRM_D4: |
| 135 | case X86II::MRM_D5: |
| 136 | case X86II::MRM_D6: |
| 137 | case X86II::MRM_D7: |
| 138 | case X86II::MRM_D8: |
| 139 | case X86II::MRM_D9: |
| 140 | case X86II::MRM_DA: |
| 141 | case X86II::MRM_DB: |
| 142 | case X86II::MRM_DC: |
| 143 | case X86II::MRM_DD: |
| 144 | case X86II::MRM_DE: |
| 145 | case X86II::MRM_DF: |
| 146 | case X86II::MRM_E0: |
| 147 | case X86II::MRM_E1: |
| 148 | case X86II::MRM_E2: |
| 149 | case X86II::MRM_E3: |
| 150 | case X86II::MRM_E4: |
| 151 | case X86II::MRM_E5: |
| 152 | case X86II::MRM_E6: |
| 153 | case X86II::MRM_E7: |
| 154 | case X86II::MRM_E8: |
| 155 | case X86II::MRM_E9: |
| 156 | case X86II::MRM_EA: |
| 157 | case X86II::MRM_EB: |
| 158 | case X86II::MRM_EC: |
| 159 | case X86II::MRM_ED: |
| 160 | case X86II::MRM_EE: |
| 161 | case X86II::MRM_EF: |
| 162 | case X86II::MRM_F0: |
| 163 | case X86II::MRM_F1: |
| 164 | case X86II::MRM_F2: |
| 165 | case X86II::MRM_F3: |
| 166 | case X86II::MRM_F4: |
| 167 | case X86II::MRM_F5: |
| 168 | case X86II::MRM_F6: |
| 169 | case X86II::MRM_F7: |
| 170 | case X86II::MRM_F8: |
| 171 | case X86II::MRM_F9: |
| 172 | case X86II::MRM_FA: |
| 173 | case X86II::MRM_FB: |
| 174 | case X86II::MRM_FC: |
| 175 | case X86II::MRM_FD: |
| 176 | case X86II::MRM_FE: |
| 177 | case X86II::MRM_FF: |
| 178 | case X86II::RawFrmImm8: |
| 179 | return nullptr; |
| 180 | case X86II::AddRegFrm: |
| 181 | return (Instr.Description.Opcode == X86::POP16r || |
| 182 | Instr.Description.Opcode == X86::POP32r || |
| 183 | Instr.Description.Opcode == X86::PUSH16r || |
| 184 | Instr.Description.Opcode == X86::PUSH32r) |
| 185 | ? "unsupported opcode: unsupported memory access" |
| 186 | : nullptr; |
| 187 | // These access memory and are handled. |
| 188 | case X86II::MRMDestMem: |
| 189 | case X86II::MRMSrcMem: |
| 190 | case X86II::MRMSrcMem4VOp3: |
| 191 | case X86II::MRMSrcMemOp4: |
| 192 | case X86II::MRMSrcMemCC: |
| 193 | case X86II::MRMXmCC: |
| 194 | case X86II::MRMXm: |
| 195 | case X86II::MRM0m: |
| 196 | case X86II::MRM1m: |
| 197 | case X86II::MRM2m: |
| 198 | case X86II::MRM3m: |
| 199 | case X86II::MRM4m: |
| 200 | case X86II::MRM5m: |
| 201 | case X86II::MRM6m: |
| 202 | case X86II::MRM7m: |
| 203 | return nullptr; |
| 204 | // These access memory and are not handled yet. |
| 205 | case X86II::RawFrmImm16: |
| 206 | case X86II::RawFrmMemOffs: |
| 207 | case X86II::RawFrmSrc: |
| 208 | case X86II::RawFrmDst: |
| 209 | case X86II::RawFrmDstSrc: |
| 210 | return "unsupported opcode: non uniform memory access" ; |
| 211 | } |
| 212 | } |
| 213 | |
| 214 | // If the opcode is invalid, returns a pointer to a character literal indicating |
| 215 | // the reason. nullptr indicates a valid opcode. |
| 216 | static const char *isInvalidOpcode(const Instruction &Instr) { |
| 217 | const auto OpcodeName = Instr.Name; |
| 218 | if ((Instr.Description.TSFlags & X86II::FormMask) == X86II::Pseudo) |
| 219 | return "unsupported opcode: pseudo instruction" ; |
| 220 | if ((OpcodeName.starts_with(Prefix: "POP" ) && !OpcodeName.starts_with(Prefix: "POPCNT" )) || |
| 221 | OpcodeName.starts_with(Prefix: "PUSH" ) || |
| 222 | OpcodeName.starts_with(Prefix: "ADJCALLSTACK" ) || OpcodeName.starts_with(Prefix: "LEAVE" )) |
| 223 | return "unsupported opcode: Push/Pop/AdjCallStack/Leave" ; |
| 224 | switch (Instr.Description.Opcode) { |
| 225 | case X86::LFS16rm: |
| 226 | case X86::LFS32rm: |
| 227 | case X86::LFS64rm: |
| 228 | case X86::LGS16rm: |
| 229 | case X86::LGS32rm: |
| 230 | case X86::LGS64rm: |
| 231 | case X86::LSS16rm: |
| 232 | case X86::LSS32rm: |
| 233 | case X86::LSS64rm: |
| 234 | case X86::SYSENTER: |
| 235 | case X86::WRFSBASE: |
| 236 | case X86::WRFSBASE64: |
| 237 | return "unsupported opcode" ; |
| 238 | default: |
| 239 | break; |
| 240 | } |
| 241 | if (const auto reason = isInvalidMemoryInstr(Instr)) |
| 242 | return reason; |
| 243 | // We do not handle instructions with OPERAND_PCREL. |
| 244 | for (const Operand &Op : Instr.Operands) |
| 245 | if (Op.isExplicit() && |
| 246 | Op.getExplicitOperandInfo().OperandType == MCOI::OPERAND_PCREL) |
| 247 | return "unsupported opcode: PC relative operand" ; |
| 248 | // We do not handle second-form X87 instructions. We only handle first-form |
| 249 | // ones (_Fp), see comment in X86InstrFPStack.td. |
| 250 | for (const Operand &Op : Instr.Operands) |
| 251 | if (Op.isReg() && Op.isExplicit() && |
| 252 | Op.getExplicitOperandInfo().RegClass == X86::RSTRegClassID) |
| 253 | return "unsupported second-form X87 instruction" ; |
| 254 | return nullptr; |
| 255 | } |
| 256 | |
| 257 | static unsigned getX86FPFlags(const Instruction &Instr) { |
| 258 | return Instr.Description.TSFlags & X86II::FPTypeMask; |
| 259 | } |
| 260 | |
| 261 | // Helper to fill a memory operand with a value. |
| 262 | static void setMemOp(InstructionTemplate &IT, int OpIdx, |
| 263 | const MCOperand &OpVal) { |
| 264 | const auto Op = IT.getInstr().Operands[OpIdx]; |
| 265 | assert(Op.isExplicit() && "invalid memory pattern" ); |
| 266 | IT.getValueFor(Op) = OpVal; |
| 267 | } |
| 268 | |
| 269 | // Common (latency, uops) code for LEA templates. `GetDestReg` takes the |
| 270 | // addressing base and index registers and returns the LEA destination register. |
| 271 | static Expected<std::vector<CodeTemplate>> generateLEATemplatesCommon( |
| 272 | const Instruction &Instr, const BitVector &ForbiddenRegisters, |
| 273 | const LLVMState &State, const SnippetGenerator::Options &Opts, |
| 274 | std::function<void(unsigned, unsigned, BitVector &CandidateDestRegs)> |
| 275 | RestrictDestRegs) { |
| 276 | assert(Instr.Operands.size() == 6 && "invalid LEA" ); |
| 277 | assert(X86II::getMemoryOperandNo(Instr.Description.TSFlags) == 1 && |
| 278 | "invalid LEA" ); |
| 279 | |
| 280 | constexpr const int kDestOp = 0; |
| 281 | constexpr const int kBaseOp = 1; |
| 282 | constexpr const int kIndexOp = 3; |
| 283 | auto PossibleDestRegs = |
| 284 | Instr.Operands[kDestOp].getRegisterAliasing().sourceBits(); |
| 285 | remove(A&: PossibleDestRegs, B: ForbiddenRegisters); |
| 286 | auto PossibleBaseRegs = |
| 287 | Instr.Operands[kBaseOp].getRegisterAliasing().sourceBits(); |
| 288 | remove(A&: PossibleBaseRegs, B: ForbiddenRegisters); |
| 289 | auto PossibleIndexRegs = |
| 290 | Instr.Operands[kIndexOp].getRegisterAliasing().sourceBits(); |
| 291 | remove(A&: PossibleIndexRegs, B: ForbiddenRegisters); |
| 292 | |
| 293 | const auto &RegInfo = State.getRegInfo(); |
| 294 | std::vector<CodeTemplate> Result; |
| 295 | for (const unsigned BaseReg : PossibleBaseRegs.set_bits()) { |
| 296 | for (const unsigned IndexReg : PossibleIndexRegs.set_bits()) { |
| 297 | for (int LogScale = 0; LogScale <= 3; ++LogScale) { |
| 298 | // FIXME: Add an option for controlling how we explore immediates. |
| 299 | for (const int Disp : {0, 42}) { |
| 300 | InstructionTemplate IT(&Instr); |
| 301 | const int64_t Scale = 1ull << LogScale; |
| 302 | setMemOp(IT, OpIdx: 1, OpVal: MCOperand::createReg(Reg: BaseReg)); |
| 303 | setMemOp(IT, OpIdx: 2, OpVal: MCOperand::createImm(Val: Scale)); |
| 304 | setMemOp(IT, OpIdx: 3, OpVal: MCOperand::createReg(Reg: IndexReg)); |
| 305 | setMemOp(IT, OpIdx: 4, OpVal: MCOperand::createImm(Val: Disp)); |
| 306 | // SegmentReg must be 0 for LEA. |
| 307 | setMemOp(IT, OpIdx: 5, OpVal: MCOperand::createReg(Reg: 0)); |
| 308 | |
| 309 | // Output reg candidates are selected by the caller. |
| 310 | auto PossibleDestRegsNow = PossibleDestRegs; |
| 311 | RestrictDestRegs(BaseReg, IndexReg, PossibleDestRegsNow); |
| 312 | assert(PossibleDestRegsNow.set_bits().begin() != |
| 313 | PossibleDestRegsNow.set_bits().end() && |
| 314 | "no remaining registers" ); |
| 315 | setMemOp( |
| 316 | IT, OpIdx: 0, |
| 317 | OpVal: MCOperand::createReg(Reg: *PossibleDestRegsNow.set_bits().begin())); |
| 318 | |
| 319 | CodeTemplate CT; |
| 320 | CT.Instructions.push_back(x: std::move(IT)); |
| 321 | CT.Config = formatv(Fmt: "{3}(%{0}, %{1}, {2})" , Vals: RegInfo.getName(RegNo: BaseReg), |
| 322 | Vals: RegInfo.getName(RegNo: IndexReg), Vals: Scale, Vals: Disp) |
| 323 | .str(); |
| 324 | Result.push_back(x: std::move(CT)); |
| 325 | if (Result.size() >= Opts.MaxConfigsPerOpcode) |
| 326 | return std::move(Result); |
| 327 | } |
| 328 | } |
| 329 | } |
| 330 | } |
| 331 | |
| 332 | return std::move(Result); |
| 333 | } |
| 334 | |
| 335 | namespace { |
| 336 | class X86SerialSnippetGenerator : public SerialSnippetGenerator { |
| 337 | public: |
| 338 | using SerialSnippetGenerator::SerialSnippetGenerator; |
| 339 | |
| 340 | Expected<std::vector<CodeTemplate>> |
| 341 | generateCodeTemplates(InstructionTemplate Variant, |
| 342 | const BitVector &ForbiddenRegisters) const override; |
| 343 | }; |
| 344 | } // namespace |
| 345 | |
| 346 | Expected<std::vector<CodeTemplate>> |
| 347 | X86SerialSnippetGenerator::generateCodeTemplates( |
| 348 | InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const { |
| 349 | const Instruction &Instr = Variant.getInstr(); |
| 350 | |
| 351 | if (const auto reason = isInvalidOpcode(Instr)) |
| 352 | return make_error<Failure>(Args: reason); |
| 353 | |
| 354 | // LEA gets special attention. |
| 355 | const auto Opcode = Instr.Description.getOpcode(); |
| 356 | if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) { |
| 357 | return generateLEATemplatesCommon( |
| 358 | Instr, ForbiddenRegisters, State, Opts, |
| 359 | RestrictDestRegs: [this](unsigned BaseReg, unsigned IndexReg, |
| 360 | BitVector &CandidateDestRegs) { |
| 361 | // We just select a destination register that aliases the base |
| 362 | // register. |
| 363 | CandidateDestRegs &= |
| 364 | State.getRATC().getRegister(Reg: BaseReg).aliasedBits(); |
| 365 | }); |
| 366 | } |
| 367 | |
| 368 | if (Instr.hasMemoryOperands()) |
| 369 | return make_error<Failure>( |
| 370 | Args: "unsupported memory operand in latency measurements" ); |
| 371 | |
| 372 | switch (getX86FPFlags(Instr)) { |
| 373 | case X86II::NotFP: |
| 374 | return SerialSnippetGenerator::generateCodeTemplates(Variant, |
| 375 | ForbiddenRegisters); |
| 376 | case X86II::ZeroArgFP: |
| 377 | case X86II::OneArgFP: |
| 378 | case X86II::SpecialFP: |
| 379 | case X86II::CompareFP: |
| 380 | case X86II::CondMovFP: |
| 381 | return make_error<Failure>(Args: "Unsupported x87 Instruction" ); |
| 382 | case X86II::OneArgFPRW: |
| 383 | case X86II::TwoArgFP: |
| 384 | // These are instructions like |
| 385 | // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW) |
| 386 | // - `ST(0) = ST(0) + ST(i)` (TwoArgFP) |
| 387 | // They are intrinsically serial and do not modify the state of the stack. |
| 388 | return generateSelfAliasingCodeTemplates(Variant, ForbiddenRegisters); |
| 389 | default: |
| 390 | llvm_unreachable("Unknown FP Type!" ); |
| 391 | } |
| 392 | } |
| 393 | |
| 394 | namespace { |
| 395 | class X86ParallelSnippetGenerator : public ParallelSnippetGenerator { |
| 396 | public: |
| 397 | using ParallelSnippetGenerator::ParallelSnippetGenerator; |
| 398 | |
| 399 | Expected<std::vector<CodeTemplate>> |
| 400 | generateCodeTemplates(InstructionTemplate Variant, |
| 401 | const BitVector &ForbiddenRegisters) const override; |
| 402 | }; |
| 403 | |
| 404 | } // namespace |
| 405 | |
| 406 | Expected<std::vector<CodeTemplate>> |
| 407 | X86ParallelSnippetGenerator::generateCodeTemplates( |
| 408 | InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const { |
| 409 | const Instruction &Instr = Variant.getInstr(); |
| 410 | |
| 411 | if (const auto reason = isInvalidOpcode(Instr)) |
| 412 | return make_error<Failure>(Args: reason); |
| 413 | |
| 414 | // LEA gets special attention. |
| 415 | const auto Opcode = Instr.Description.getOpcode(); |
| 416 | if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) { |
| 417 | return generateLEATemplatesCommon( |
| 418 | Instr, ForbiddenRegisters, State, Opts, |
| 419 | RestrictDestRegs: [this](unsigned BaseReg, unsigned IndexReg, |
| 420 | BitVector &CandidateDestRegs) { |
| 421 | // Any destination register that is not used for addressing is fine. |
| 422 | remove(A&: CandidateDestRegs, |
| 423 | B: State.getRATC().getRegister(Reg: BaseReg).aliasedBits()); |
| 424 | remove(A&: CandidateDestRegs, |
| 425 | B: State.getRATC().getRegister(Reg: IndexReg).aliasedBits()); |
| 426 | }); |
| 427 | } |
| 428 | |
| 429 | switch (getX86FPFlags(Instr)) { |
| 430 | case X86II::NotFP: |
| 431 | return ParallelSnippetGenerator::generateCodeTemplates(Variant, |
| 432 | ForbiddenRegisters); |
| 433 | case X86II::ZeroArgFP: |
| 434 | case X86II::OneArgFP: |
| 435 | case X86II::SpecialFP: |
| 436 | return make_error<Failure>(Args: "Unsupported x87 Instruction" ); |
| 437 | case X86II::OneArgFPRW: |
| 438 | case X86II::TwoArgFP: |
| 439 | // These are instructions like |
| 440 | // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW) |
| 441 | // - `ST(0) = ST(0) + ST(i)` (TwoArgFP) |
| 442 | // They are intrinsically serial and do not modify the state of the stack. |
| 443 | // We generate the same code for latency and uops. |
| 444 | return generateSelfAliasingCodeTemplates(Variant, ForbiddenRegisters); |
| 445 | case X86II::CompareFP: |
| 446 | case X86II::CondMovFP: |
| 447 | // We can compute uops for any FP instruction that does not grow or shrink |
| 448 | // the stack (either do not touch the stack or push as much as they pop). |
| 449 | return generateUnconstrainedCodeTemplates( |
| 450 | Variant, Msg: "instruction does not grow/shrink the FP stack" ); |
| 451 | default: |
| 452 | llvm_unreachable("Unknown FP Type!" ); |
| 453 | } |
| 454 | } |
| 455 | |
| 456 | static unsigned getLoadImmediateOpcode(unsigned RegBitWidth) { |
| 457 | switch (RegBitWidth) { |
| 458 | case 8: |
| 459 | return X86::MOV8ri; |
| 460 | case 16: |
| 461 | return X86::MOV16ri; |
| 462 | case 32: |
| 463 | return X86::MOV32ri; |
| 464 | case 64: |
| 465 | return X86::MOV64ri; |
| 466 | } |
| 467 | llvm_unreachable("Invalid Value Width" ); |
| 468 | } |
| 469 | |
| 470 | // Generates instruction to load an immediate value into a register. |
| 471 | static MCInst loadImmediate(MCRegister Reg, unsigned RegBitWidth, |
| 472 | const APInt &Value) { |
| 473 | if (Value.getBitWidth() > RegBitWidth) |
| 474 | llvm_unreachable("Value must fit in the Register" ); |
| 475 | return MCInstBuilder(getLoadImmediateOpcode(RegBitWidth)) |
| 476 | .addReg(Reg) |
| 477 | .addImm(Val: Value.getZExtValue()); |
| 478 | } |
| 479 | |
| 480 | // Allocates scratch memory on the stack. |
| 481 | static MCInst allocateStackSpace(unsigned Bytes) { |
| 482 | return MCInstBuilder(X86::SUB64ri8) |
| 483 | .addReg(Reg: X86::RSP) |
| 484 | .addReg(Reg: X86::RSP) |
| 485 | .addImm(Val: Bytes); |
| 486 | } |
| 487 | |
| 488 | // Fills scratch memory at offset `OffsetBytes` with value `Imm`. |
| 489 | static MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes, |
| 490 | uint64_t Imm) { |
| 491 | return MCInstBuilder(MovOpcode) |
| 492 | // Address = ESP |
| 493 | .addReg(Reg: X86::RSP) // BaseReg |
| 494 | .addImm(Val: 1) // ScaleAmt |
| 495 | .addReg(Reg: 0) // IndexReg |
| 496 | .addImm(Val: OffsetBytes) // Disp |
| 497 | .addReg(Reg: 0) // Segment |
| 498 | // Immediate. |
| 499 | .addImm(Val: Imm); |
| 500 | } |
| 501 | |
| 502 | // Loads scratch memory into register `Reg` using opcode `RMOpcode`. |
| 503 | static MCInst loadToReg(MCRegister Reg, unsigned RMOpcode) { |
| 504 | return MCInstBuilder(RMOpcode) |
| 505 | .addReg(Reg) |
| 506 | // Address = ESP |
| 507 | .addReg(Reg: X86::RSP) // BaseReg |
| 508 | .addImm(Val: 1) // ScaleAmt |
| 509 | .addReg(Reg: 0) // IndexReg |
| 510 | .addImm(Val: 0) // Disp |
| 511 | .addReg(Reg: 0); // Segment |
| 512 | } |
| 513 | |
| 514 | // Releases scratch memory. |
| 515 | static MCInst releaseStackSpace(unsigned Bytes) { |
| 516 | return MCInstBuilder(X86::ADD64ri8) |
| 517 | .addReg(Reg: X86::RSP) |
| 518 | .addReg(Reg: X86::RSP) |
| 519 | .addImm(Val: Bytes); |
| 520 | } |
| 521 | |
| 522 | // Reserves some space on the stack, fills it with the content of the provided |
| 523 | // constant and provide methods to load the stack value into a register. |
| 524 | namespace { |
| 525 | struct ConstantInliner { |
| 526 | explicit ConstantInliner(const APInt &Constant) : Constant_(Constant) {} |
| 527 | |
| 528 | std::vector<MCInst> loadAndFinalize(MCRegister Reg, unsigned RegBitWidth, |
| 529 | unsigned Opcode); |
| 530 | |
| 531 | std::vector<MCInst> loadX87STAndFinalize(MCRegister Reg); |
| 532 | |
| 533 | std::vector<MCInst> loadX87FPAndFinalize(MCRegister Reg); |
| 534 | |
| 535 | std::vector<MCInst> popFlagAndFinalize(); |
| 536 | |
| 537 | std::vector<MCInst> loadImplicitRegAndFinalize(unsigned Opcode, |
| 538 | unsigned Value); |
| 539 | |
| 540 | std::vector<MCInst> loadDirectionFlagAndFinalize(); |
| 541 | |
| 542 | private: |
| 543 | ConstantInliner &add(const MCInst &Inst) { |
| 544 | Instructions.push_back(x: Inst); |
| 545 | return *this; |
| 546 | } |
| 547 | |
| 548 | void initStack(unsigned Bytes); |
| 549 | |
| 550 | static constexpr const unsigned kF80Bytes = 10; // 80 bits. |
| 551 | |
| 552 | APInt Constant_; |
| 553 | std::vector<MCInst> Instructions; |
| 554 | }; |
| 555 | } // namespace |
| 556 | |
| 557 | std::vector<MCInst> ConstantInliner::loadAndFinalize(MCRegister Reg, |
| 558 | unsigned RegBitWidth, |
| 559 | unsigned Opcode) { |
| 560 | assert((RegBitWidth & 7) == 0 && "RegBitWidth must be a multiple of 8 bits" ); |
| 561 | initStack(Bytes: RegBitWidth / 8); |
| 562 | add(Inst: loadToReg(Reg, RMOpcode: Opcode)); |
| 563 | add(Inst: releaseStackSpace(Bytes: RegBitWidth / 8)); |
| 564 | return std::move(Instructions); |
| 565 | } |
| 566 | |
| 567 | std::vector<MCInst> ConstantInliner::loadX87STAndFinalize(MCRegister Reg) { |
| 568 | initStack(Bytes: kF80Bytes); |
| 569 | add(Inst: MCInstBuilder(X86::LD_F80m) |
| 570 | // Address = ESP |
| 571 | .addReg(Reg: X86::RSP) // BaseReg |
| 572 | .addImm(Val: 1) // ScaleAmt |
| 573 | .addReg(Reg: 0) // IndexReg |
| 574 | .addImm(Val: 0) // Disp |
| 575 | .addReg(Reg: 0)); // Segment |
| 576 | if (Reg != X86::ST0) |
| 577 | add(Inst: MCInstBuilder(X86::ST_Frr).addReg(Reg)); |
| 578 | add(Inst: releaseStackSpace(Bytes: kF80Bytes)); |
| 579 | return std::move(Instructions); |
| 580 | } |
| 581 | |
| 582 | std::vector<MCInst> ConstantInliner::loadX87FPAndFinalize(MCRegister Reg) { |
| 583 | initStack(Bytes: kF80Bytes); |
| 584 | add(Inst: MCInstBuilder(X86::LD_Fp80m) |
| 585 | .addReg(Reg) |
| 586 | // Address = ESP |
| 587 | .addReg(Reg: X86::RSP) // BaseReg |
| 588 | .addImm(Val: 1) // ScaleAmt |
| 589 | .addReg(Reg: 0) // IndexReg |
| 590 | .addImm(Val: 0) // Disp |
| 591 | .addReg(Reg: 0)); // Segment |
| 592 | add(Inst: releaseStackSpace(Bytes: kF80Bytes)); |
| 593 | return std::move(Instructions); |
| 594 | } |
| 595 | |
| 596 | std::vector<MCInst> ConstantInliner::popFlagAndFinalize() { |
| 597 | initStack(Bytes: 8); |
| 598 | add(Inst: MCInstBuilder(X86::POPF64)); |
| 599 | return std::move(Instructions); |
| 600 | } |
| 601 | |
| 602 | std::vector<MCInst> |
| 603 | ConstantInliner::loadImplicitRegAndFinalize(unsigned Opcode, unsigned Value) { |
| 604 | add(Inst: allocateStackSpace(Bytes: 4)); |
| 605 | add(Inst: fillStackSpace(MovOpcode: X86::MOV32mi, OffsetBytes: 0, Imm: Value)); // Mask all FP exceptions |
| 606 | add(Inst: MCInstBuilder(Opcode) |
| 607 | // Address = ESP |
| 608 | .addReg(Reg: X86::RSP) // BaseReg |
| 609 | .addImm(Val: 1) // ScaleAmt |
| 610 | .addReg(Reg: 0) // IndexReg |
| 611 | .addImm(Val: 0) // Disp |
| 612 | .addReg(Reg: 0)); // Segment |
| 613 | add(Inst: releaseStackSpace(Bytes: 4)); |
| 614 | return std::move(Instructions); |
| 615 | } |
| 616 | |
| 617 | std::vector<MCInst> ConstantInliner::loadDirectionFlagAndFinalize() { |
| 618 | if (Constant_.isZero()) |
| 619 | add(Inst: MCInstBuilder(X86::CLD)); |
| 620 | else if (Constant_.isOne()) |
| 621 | add(Inst: MCInstBuilder(X86::STD)); |
| 622 | |
| 623 | return std::move(Instructions); |
| 624 | } |
| 625 | |
| 626 | void ConstantInliner::initStack(unsigned Bytes) { |
| 627 | assert(Constant_.getBitWidth() <= Bytes * 8 && |
| 628 | "Value does not have the correct size" ); |
| 629 | const APInt WideConstant = Constant_.getBitWidth() < Bytes * 8 |
| 630 | ? Constant_.sext(width: Bytes * 8) |
| 631 | : Constant_; |
| 632 | add(Inst: allocateStackSpace(Bytes)); |
| 633 | size_t ByteOffset = 0; |
| 634 | for (; Bytes - ByteOffset >= 4; ByteOffset += 4) |
| 635 | add(Inst: fillStackSpace( |
| 636 | MovOpcode: X86::MOV32mi, OffsetBytes: ByteOffset, |
| 637 | Imm: WideConstant.extractBits(numBits: 32, bitPosition: ByteOffset * 8).getZExtValue())); |
| 638 | if (Bytes - ByteOffset >= 2) { |
| 639 | add(Inst: fillStackSpace( |
| 640 | MovOpcode: X86::MOV16mi, OffsetBytes: ByteOffset, |
| 641 | Imm: WideConstant.extractBits(numBits: 16, bitPosition: ByteOffset * 8).getZExtValue())); |
| 642 | ByteOffset += 2; |
| 643 | } |
| 644 | if (Bytes - ByteOffset >= 1) |
| 645 | add(Inst: fillStackSpace( |
| 646 | MovOpcode: X86::MOV8mi, OffsetBytes: ByteOffset, |
| 647 | Imm: WideConstant.extractBits(numBits: 8, bitPosition: ByteOffset * 8).getZExtValue())); |
| 648 | } |
| 649 | |
| 650 | #include "X86GenExegesis.inc" |
| 651 | |
| 652 | namespace { |
| 653 | |
| 654 | class X86SavedState : public ExegesisTarget::SavedState { |
| 655 | public: |
| 656 | X86SavedState() { |
| 657 | #if defined(_MSC_VER) && defined(_M_X64) |
| 658 | _fxsave64(FPState); |
| 659 | Eflags = __readeflags(); |
| 660 | #elif defined(__GNUC__) && defined(__x86_64__) |
| 661 | __builtin_ia32_fxsave64(FPState); |
| 662 | Eflags = __builtin_ia32_readeflags_u64(); |
| 663 | #else |
| 664 | report_fatal_error("X86 exegesis running on unsupported target" ); |
| 665 | #endif |
| 666 | } |
| 667 | |
| 668 | ~X86SavedState() { |
| 669 | // Restoring the X87 state does not flush pending exceptions, make sure |
| 670 | // these exceptions are flushed now. |
| 671 | #if defined(_MSC_VER) && defined(_M_X64) |
| 672 | _clearfp(); |
| 673 | _fxrstor64(FPState); |
| 674 | __writeeflags(Eflags); |
| 675 | #elif defined(__GNUC__) && defined(__x86_64__) |
| 676 | asm volatile("fwait" ); |
| 677 | __builtin_ia32_fxrstor64(FPState); |
| 678 | __builtin_ia32_writeeflags_u64(Eflags); |
| 679 | #else |
| 680 | report_fatal_error("X86 exegesis running on unsupported target" ); |
| 681 | #endif |
| 682 | } |
| 683 | |
| 684 | private: |
| 685 | #if defined(__x86_64__) || defined(_M_X64) |
| 686 | alignas(16) char FPState[512]; |
| 687 | uint64_t Eflags; |
| 688 | #endif |
| 689 | }; |
| 690 | |
| 691 | class ExegesisX86Target : public ExegesisTarget { |
| 692 | public: |
| 693 | ExegesisX86Target() |
| 694 | : ExegesisTarget(X86CpuPfmCounters, X86_MC::isOpcodeAvailable) {} |
| 695 | |
| 696 | Expected<std::unique_ptr<pfm::CounterGroup>> |
| 697 | createCounter(StringRef CounterName, const LLVMState &State, |
| 698 | ArrayRef<const char *> ValidationCounters, |
| 699 | const pid_t ProcessID) const override { |
| 700 | // If LbrSamplingPeriod was provided, then ignore the |
| 701 | // CounterName because we only have one for LBR. |
| 702 | if (LbrSamplingPeriod > 0) { |
| 703 | // Can't use LBR without HAVE_LIBPFM, LIBPFM_HAS_FIELD_CYCLES, or without |
| 704 | // __linux__ (for now) |
| 705 | #if defined(HAVE_LIBPFM) && defined(LIBPFM_HAS_FIELD_CYCLES) && \ |
| 706 | defined(__linux__) |
| 707 | // TODO(boomanaiden154): Add in support for using validation counters when |
| 708 | // using LBR counters. |
| 709 | if (ValidationCounters.size() > 0) |
| 710 | return make_error<StringError>( |
| 711 | "Using LBR is not currently supported with validation counters" , |
| 712 | errc::invalid_argument); |
| 713 | |
| 714 | return std::make_unique<X86LbrCounter>( |
| 715 | X86LbrPerfEvent(LbrSamplingPeriod)); |
| 716 | #else |
| 717 | return make_error<StringError>( |
| 718 | Args: "LBR counter requested without HAVE_LIBPFM, LIBPFM_HAS_FIELD_CYCLES, " |
| 719 | "or running on Linux." , |
| 720 | Args: errc::invalid_argument); |
| 721 | #endif |
| 722 | } |
| 723 | return ExegesisTarget::createCounter(CounterName, State, ValidationCounters, |
| 724 | ProcessID); |
| 725 | } |
| 726 | |
| 727 | enum ArgumentRegisters { CodeSize = X86::R12, AuxiliaryMemoryFD = X86::R13 }; |
| 728 | |
| 729 | private: |
| 730 | void addTargetSpecificPasses(PassManagerBase &PM) const override; |
| 731 | |
| 732 | MCRegister getScratchMemoryRegister(const Triple &TT) const override; |
| 733 | |
| 734 | MCRegister getDefaultLoopCounterRegister(const Triple &) const override; |
| 735 | |
| 736 | unsigned getMaxMemoryAccessSize() const override { return 64; } |
| 737 | |
| 738 | Error randomizeTargetMCOperand(const Instruction &Instr, const Variable &Var, |
| 739 | MCOperand &AssignedValue, |
| 740 | const BitVector &ForbiddenRegs) const override; |
| 741 | |
| 742 | void fillMemoryOperands(InstructionTemplate &IT, MCRegister Reg, |
| 743 | unsigned Offset) const override; |
| 744 | |
| 745 | void decrementLoopCounterAndJump(MachineBasicBlock &MBB, |
| 746 | MachineBasicBlock &TargetMBB, |
| 747 | const MCInstrInfo &MII, |
| 748 | MCRegister LoopRegister) const override; |
| 749 | |
| 750 | std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, MCRegister Reg, |
| 751 | const APInt &Value) const override; |
| 752 | |
| 753 | #ifdef __linux__ |
| 754 | void generateLowerMunmap(std::vector<MCInst> &GeneratedCode) const override; |
| 755 | |
| 756 | void generateUpperMunmap(std::vector<MCInst> &GeneratedCode) const override; |
| 757 | |
| 758 | std::vector<MCInst> generateExitSyscall(unsigned ExitCode) const override; |
| 759 | |
| 760 | std::vector<MCInst> |
| 761 | generateMmap(uintptr_t Address, size_t Length, |
| 762 | uintptr_t FileDescriptorAddress) const override; |
| 763 | |
| 764 | void generateMmapAuxMem(std::vector<MCInst> &GeneratedCode) const override; |
| 765 | |
| 766 | void moveArgumentRegisters(std::vector<MCInst> &GeneratedCode) const override; |
| 767 | |
| 768 | std::vector<MCInst> generateMemoryInitialSetup() const override; |
| 769 | |
| 770 | std::vector<MCInst> setStackRegisterToAuxMem() const override; |
| 771 | |
| 772 | uintptr_t getAuxiliaryMemoryStartAddress() const override; |
| 773 | |
| 774 | std::vector<MCInst> configurePerfCounter(long Request, bool SaveRegisters) const override; |
| 775 | |
| 776 | std::vector<MCRegister> getArgumentRegisters() const override; |
| 777 | |
| 778 | std::vector<MCRegister> getRegistersNeedSaving() const override; |
| 779 | #endif // __linux__ |
| 780 | |
| 781 | ArrayRef<MCPhysReg> getUnavailableRegisters() const override { |
| 782 | if (DisableUpperSSERegisters) |
| 783 | return ArrayRef(kUnavailableRegistersSSE); |
| 784 | |
| 785 | return ArrayRef(kUnavailableRegisters); |
| 786 | } |
| 787 | |
| 788 | bool allowAsBackToBack(const Instruction &Instr) const override { |
| 789 | const unsigned Opcode = Instr.Description.Opcode; |
| 790 | return !isInvalidOpcode(Instr) && Opcode != X86::LEA64r && |
| 791 | Opcode != X86::LEA64_32r && Opcode != X86::LEA16r; |
| 792 | } |
| 793 | |
| 794 | std::vector<InstructionTemplate> |
| 795 | generateInstructionVariants(const Instruction &Instr, |
| 796 | unsigned MaxConfigsPerOpcode) const override; |
| 797 | |
| 798 | std::unique_ptr<SnippetGenerator> createSerialSnippetGenerator( |
| 799 | const LLVMState &State, |
| 800 | const SnippetGenerator::Options &Opts) const override { |
| 801 | return std::make_unique<X86SerialSnippetGenerator>(args: State, args: Opts); |
| 802 | } |
| 803 | |
| 804 | std::unique_ptr<SnippetGenerator> createParallelSnippetGenerator( |
| 805 | const LLVMState &State, |
| 806 | const SnippetGenerator::Options &Opts) const override { |
| 807 | return std::make_unique<X86ParallelSnippetGenerator>(args: State, args: Opts); |
| 808 | } |
| 809 | |
| 810 | bool matchesArch(Triple::ArchType Arch) const override { |
| 811 | return Arch == Triple::x86_64 || Arch == Triple::x86; |
| 812 | } |
| 813 | |
| 814 | Error checkFeatureSupport() const override { |
| 815 | // LBR is the only feature we conditionally support now. |
| 816 | // So if LBR is not requested, then we should be able to run the benchmarks. |
| 817 | if (LbrSamplingPeriod == 0) |
| 818 | return Error::success(); |
| 819 | |
| 820 | #if defined(__linux__) && defined(HAVE_LIBPFM) && \ |
| 821 | defined(LIBPFM_HAS_FIELD_CYCLES) |
| 822 | // FIXME: Fix this. |
| 823 | // https://bugs.llvm.org/show_bug.cgi?id=48918 |
| 824 | // For now, only do the check if we see an Intel machine because |
| 825 | // the counter uses some intel-specific magic and it could |
| 826 | // be confuse and think an AMD machine actually has LBR support. |
| 827 | #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ |
| 828 | defined(_M_X64) |
| 829 | using namespace sys::detail::x86; |
| 830 | |
| 831 | if (getVendorSignature() == VendorSignatures::GENUINE_INTEL) |
| 832 | // If the kernel supports it, the hardware still may not have it. |
| 833 | return X86LbrCounter::checkLbrSupport(); |
| 834 | #else |
| 835 | report_fatal_error("Running X86 exegesis on unsupported target" ); |
| 836 | #endif |
| 837 | #endif |
| 838 | return make_error<StringError>( |
| 839 | Args: "LBR not supported on this kernel and/or platform" , |
| 840 | Args: errc::not_supported); |
| 841 | } |
| 842 | |
| 843 | std::unique_ptr<SavedState> withSavedState() const override { |
| 844 | return std::make_unique<X86SavedState>(); |
| 845 | } |
| 846 | |
| 847 | static const MCPhysReg kUnavailableRegisters[4]; |
| 848 | static const MCPhysReg [12]; |
| 849 | }; |
| 850 | |
| 851 | // We disable a few registers that cannot be encoded on instructions with a REX |
| 852 | // prefix. |
| 853 | const MCPhysReg ExegesisX86Target::kUnavailableRegisters[4] = { |
| 854 | X86::AH, X86::BH, X86::CH, X86::DH}; |
| 855 | |
| 856 | // Optionally, also disable the upper (x86_64) SSE registers to reduce frontend |
| 857 | // decoder load. |
| 858 | const MCPhysReg ExegesisX86Target::[12] = { |
| 859 | X86::AH, X86::BH, X86::CH, X86::DH, X86::XMM8, X86::XMM9, |
| 860 | X86::XMM10, X86::XMM11, X86::XMM12, X86::XMM13, X86::XMM14, X86::XMM15}; |
| 861 | |
| 862 | // We're using one of R8-R15 because these registers are never hardcoded in |
| 863 | // instructions (e.g. MOVS writes to EDI, ESI, EDX), so they have less |
| 864 | // conflicts. |
| 865 | constexpr const MCPhysReg kDefaultLoopCounterReg = X86::R8; |
| 866 | |
| 867 | } // namespace |
| 868 | |
| 869 | void ExegesisX86Target::addTargetSpecificPasses(PassManagerBase &PM) const { |
| 870 | // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F. |
| 871 | PM.add(P: createX86FloatingPointStackifierPass()); |
| 872 | } |
| 873 | |
| 874 | MCRegister ExegesisX86Target::getScratchMemoryRegister(const Triple &TT) const { |
| 875 | if (!TT.isArch64Bit()) { |
| 876 | // FIXME: This would require popping from the stack, so we would have to |
| 877 | // add some additional setup code. |
| 878 | return MCRegister(); |
| 879 | } |
| 880 | return TT.isOSWindows() ? X86::RCX : X86::RDI; |
| 881 | } |
| 882 | |
| 883 | MCRegister |
| 884 | ExegesisX86Target::getDefaultLoopCounterRegister(const Triple &TT) const { |
| 885 | if (!TT.isArch64Bit()) { |
| 886 | return MCRegister(); |
| 887 | } |
| 888 | return kDefaultLoopCounterReg; |
| 889 | } |
| 890 | |
| 891 | Error ExegesisX86Target::randomizeTargetMCOperand( |
| 892 | const Instruction &Instr, const Variable &Var, MCOperand &AssignedValue, |
| 893 | const BitVector &ForbiddenRegs) const { |
| 894 | const Operand &Op = Instr.getPrimaryOperand(Var); |
| 895 | switch (Op.getExplicitOperandInfo().OperandType) { |
| 896 | case X86::OperandType::OPERAND_COND_CODE: |
| 897 | AssignedValue = |
| 898 | MCOperand::createImm(Val: randomIndex(Max: X86::CondCode::LAST_VALID_COND)); |
| 899 | return Error::success(); |
| 900 | case X86::OperandType::OPERAND_ROUNDING_CONTROL: |
| 901 | AssignedValue = |
| 902 | MCOperand::createImm(Val: randomIndex(Max: X86::STATIC_ROUNDING::TO_ZERO)); |
| 903 | return Error::success(); |
| 904 | default: |
| 905 | break; |
| 906 | } |
| 907 | return make_error<Failure>( |
| 908 | Args: Twine("unimplemented operand type " ) |
| 909 | .concat(Suffix: Twine(Op.getExplicitOperandInfo().OperandType))); |
| 910 | } |
| 911 | |
| 912 | void ExegesisX86Target::fillMemoryOperands(InstructionTemplate &IT, |
| 913 | MCRegister Reg, |
| 914 | unsigned Offset) const { |
| 915 | assert(!isInvalidMemoryInstr(IT.getInstr()) && |
| 916 | "fillMemoryOperands requires a valid memory instruction" ); |
| 917 | int MemOpIdx = X86II::getMemoryOperandNo(TSFlags: IT.getInstr().Description.TSFlags); |
| 918 | assert(MemOpIdx >= 0 && "invalid memory operand index" ); |
| 919 | // getMemoryOperandNo() ignores tied operands, so we have to add them back. |
| 920 | MemOpIdx += X86II::getOperandBias(Desc: IT.getInstr().Description); |
| 921 | setMemOp(IT, OpIdx: MemOpIdx + 0, OpVal: MCOperand::createReg(Reg)); // BaseReg |
| 922 | setMemOp(IT, OpIdx: MemOpIdx + 1, OpVal: MCOperand::createImm(Val: 1)); // ScaleAmt |
| 923 | setMemOp(IT, OpIdx: MemOpIdx + 2, OpVal: MCOperand::createReg(Reg: 0)); // IndexReg |
| 924 | setMemOp(IT, OpIdx: MemOpIdx + 3, OpVal: MCOperand::createImm(Val: Offset)); // Disp |
| 925 | setMemOp(IT, OpIdx: MemOpIdx + 4, OpVal: MCOperand::createReg(Reg: 0)); // Segment |
| 926 | } |
| 927 | |
| 928 | void ExegesisX86Target::decrementLoopCounterAndJump( |
| 929 | MachineBasicBlock &MBB, MachineBasicBlock &TargetMBB, |
| 930 | const MCInstrInfo &MII, MCRegister LoopRegister) const { |
| 931 | BuildMI(BB: &MBB, MIMD: DebugLoc(), MCID: MII.get(Opcode: X86::ADD64ri8)) |
| 932 | .addDef(RegNo: LoopRegister) |
| 933 | .addUse(RegNo: LoopRegister) |
| 934 | .addImm(Val: -1); |
| 935 | BuildMI(BB: &MBB, MIMD: DebugLoc(), MCID: MII.get(Opcode: X86::JCC_1)) |
| 936 | .addMBB(MBB: &TargetMBB) |
| 937 | .addImm(Val: X86::COND_NE); |
| 938 | } |
| 939 | |
| 940 | void generateRegisterStackPush(unsigned int Register, |
| 941 | std::vector<MCInst> &GeneratedCode) { |
| 942 | GeneratedCode.push_back(x: MCInstBuilder(X86::PUSH64r).addReg(Reg: Register)); |
| 943 | } |
| 944 | |
| 945 | void generateRegisterStackPop(unsigned int Register, |
| 946 | std::vector<MCInst> &GeneratedCode) { |
| 947 | GeneratedCode.push_back(x: MCInstBuilder(X86::POP64r).addReg(Reg: Register)); |
| 948 | } |
| 949 | |
| 950 | void generateSyscall(long SyscallNumber, std::vector<MCInst> &GeneratedCode) { |
| 951 | GeneratedCode.push_back( |
| 952 | x: loadImmediate(Reg: X86::RAX, RegBitWidth: 64, Value: APInt(64, SyscallNumber))); |
| 953 | GeneratedCode.push_back(x: MCInstBuilder(X86::SYSCALL)); |
| 954 | } |
| 955 | |
| 956 | // The functions below for saving and restoring system call registers are only |
| 957 | // used when llvm-exegesis is built on Linux. |
| 958 | #ifdef __linux__ |
| 959 | constexpr std::array<unsigned, 6> SyscallArgumentRegisters{ |
| 960 | X86::RDI, X86::RSI, X86::RDX, X86::R10, X86::R8, X86::R9}; |
| 961 | |
| 962 | static void saveSyscallRegisters(std::vector<MCInst> &GeneratedCode, |
| 963 | unsigned ArgumentCount) { |
| 964 | assert(ArgumentCount <= 6 && |
| 965 | "System calls only X86-64 Linux can only take six arguments" ); |
| 966 | // Preserve RCX and R11 (Clobbered by the system call). |
| 967 | generateRegisterStackPush(Register: X86::RCX, GeneratedCode); |
| 968 | generateRegisterStackPush(Register: X86::R11, GeneratedCode); |
| 969 | // Preserve RAX (used for the syscall number/return value). |
| 970 | generateRegisterStackPush(Register: X86::RAX, GeneratedCode); |
| 971 | // Preserve the registers used to pass arguments to the system call. |
| 972 | for (unsigned I = 0; I < ArgumentCount; ++I) |
| 973 | generateRegisterStackPush(Register: SyscallArgumentRegisters[I], GeneratedCode); |
| 974 | } |
| 975 | |
| 976 | static void restoreSyscallRegisters(std::vector<MCInst> &GeneratedCode, |
| 977 | unsigned ArgumentCount) { |
| 978 | assert(ArgumentCount <= 6 && |
| 979 | "System calls only X86-64 Linux can only take six arguments" ); |
| 980 | // Restore the argument registers, in the opposite order of the way they are |
| 981 | // saved. |
| 982 | for (unsigned I = ArgumentCount; I > 0; --I) { |
| 983 | generateRegisterStackPop(Register: SyscallArgumentRegisters[I - 1], GeneratedCode); |
| 984 | } |
| 985 | generateRegisterStackPop(Register: X86::RAX, GeneratedCode); |
| 986 | generateRegisterStackPop(Register: X86::R11, GeneratedCode); |
| 987 | generateRegisterStackPop(Register: X86::RCX, GeneratedCode); |
| 988 | } |
| 989 | #endif // __linux__ |
| 990 | |
| 991 | static std::vector<MCInst> loadImmediateSegmentRegister(MCRegister Reg, |
| 992 | const APInt &Value) { |
| 993 | #if defined(__x86_64__) && defined(__linux__) |
| 994 | assert(Value.getBitWidth() <= 64 && "Value must fit in the register." ); |
| 995 | std::vector<MCInst> loadSegmentRegisterCode; |
| 996 | // Preserve the syscall registers here as we don't |
| 997 | // want to make any assumptions about the ordering of what registers are |
| 998 | // loaded in first, and we might have already loaded in registers that we are |
| 999 | // going to be clobbering here. |
| 1000 | saveSyscallRegisters(GeneratedCode&: loadSegmentRegisterCode, ArgumentCount: 2); |
| 1001 | // Generate the instructions to make the arch_prctl system call to set |
| 1002 | // the registers. |
| 1003 | int SyscallCode = 0; |
| 1004 | if (Reg == X86::FS) |
| 1005 | SyscallCode = ARCH_SET_FS; |
| 1006 | else if (Reg == X86::GS) |
| 1007 | SyscallCode = ARCH_SET_GS; |
| 1008 | else |
| 1009 | llvm_unreachable("Only the segment registers GS and FS are supported" ); |
| 1010 | loadSegmentRegisterCode.push_back( |
| 1011 | x: loadImmediate(Reg: X86::RDI, RegBitWidth: 64, Value: APInt(64, SyscallCode))); |
| 1012 | loadSegmentRegisterCode.push_back(x: loadImmediate(Reg: X86::RSI, RegBitWidth: 64, Value)); |
| 1013 | generateSyscall(SYS_arch_prctl, GeneratedCode&: loadSegmentRegisterCode); |
| 1014 | // Restore the registers in reverse order |
| 1015 | restoreSyscallRegisters(GeneratedCode&: loadSegmentRegisterCode, ArgumentCount: 2); |
| 1016 | return loadSegmentRegisterCode; |
| 1017 | #else |
| 1018 | llvm_unreachable("Loading immediate segment registers is only supported with " |
| 1019 | "x86-64 llvm-exegesis" ); |
| 1020 | #endif // defined(__x86_64__) && defined(__linux__) |
| 1021 | } |
| 1022 | |
| 1023 | std::vector<MCInst> ExegesisX86Target::setRegTo(const MCSubtargetInfo &STI, |
| 1024 | MCRegister Reg, |
| 1025 | const APInt &Value) const { |
| 1026 | if (X86::SEGMENT_REGRegClass.contains(Reg)) |
| 1027 | return loadImmediateSegmentRegister(Reg, Value); |
| 1028 | if (X86::GR8RegClass.contains(Reg)) |
| 1029 | return {loadImmediate(Reg, RegBitWidth: 8, Value)}; |
| 1030 | if (X86::GR16RegClass.contains(Reg)) |
| 1031 | return {loadImmediate(Reg, RegBitWidth: 16, Value)}; |
| 1032 | if (X86::GR32RegClass.contains(Reg)) |
| 1033 | return {loadImmediate(Reg, RegBitWidth: 32, Value)}; |
| 1034 | if (X86::GR64RegClass.contains(Reg)) |
| 1035 | return {loadImmediate(Reg, RegBitWidth: 64, Value)}; |
| 1036 | if (X86::VK8RegClass.contains(Reg) || X86::VK16RegClass.contains(Reg) || |
| 1037 | X86::VK32RegClass.contains(Reg) || X86::VK64RegClass.contains(Reg)) { |
| 1038 | switch (Value.getBitWidth()) { |
| 1039 | case 8: |
| 1040 | if (STI.getFeatureBits()[X86::FeatureDQI]) { |
| 1041 | ConstantInliner CI(Value); |
| 1042 | return CI.loadAndFinalize(Reg, RegBitWidth: Value.getBitWidth(), Opcode: X86::KMOVBkm); |
| 1043 | } |
| 1044 | [[fallthrough]]; |
| 1045 | case 16: |
| 1046 | if (STI.getFeatureBits()[X86::FeatureAVX512]) { |
| 1047 | ConstantInliner CI(Value.zextOrTrunc(width: 16)); |
| 1048 | return CI.loadAndFinalize(Reg, RegBitWidth: 16, Opcode: X86::KMOVWkm); |
| 1049 | } |
| 1050 | break; |
| 1051 | case 32: |
| 1052 | if (STI.getFeatureBits()[X86::FeatureBWI]) { |
| 1053 | ConstantInliner CI(Value); |
| 1054 | return CI.loadAndFinalize(Reg, RegBitWidth: Value.getBitWidth(), Opcode: X86::KMOVDkm); |
| 1055 | } |
| 1056 | break; |
| 1057 | case 64: |
| 1058 | if (STI.getFeatureBits()[X86::FeatureBWI]) { |
| 1059 | ConstantInliner CI(Value); |
| 1060 | return CI.loadAndFinalize(Reg, RegBitWidth: Value.getBitWidth(), Opcode: X86::KMOVQkm); |
| 1061 | } |
| 1062 | break; |
| 1063 | } |
| 1064 | } |
| 1065 | ConstantInliner CI(Value); |
| 1066 | if (X86::VR64RegClass.contains(Reg)) |
| 1067 | return CI.loadAndFinalize(Reg, RegBitWidth: 64, Opcode: X86::MMX_MOVQ64rm); |
| 1068 | if (X86::VR128RegClass.contains(Reg)) { |
| 1069 | if (STI.getFeatureBits()[X86::FeatureAVX]) |
| 1070 | return CI.loadAndFinalize(Reg, RegBitWidth: 128, Opcode: X86::VMOVDQUrm); |
| 1071 | return CI.loadAndFinalize(Reg, RegBitWidth: 128, Opcode: X86::MOVDQUrm); |
| 1072 | } |
| 1073 | if (X86::VR128XRegClass.contains(Reg)) { |
| 1074 | if (STI.getFeatureBits()[X86::FeatureAVX512]) |
| 1075 | return CI.loadAndFinalize(Reg, RegBitWidth: 128, Opcode: X86::VMOVDQU32Z128rm); |
| 1076 | } |
| 1077 | if (X86::VR256RegClass.contains(Reg)) { |
| 1078 | if (STI.getFeatureBits()[X86::FeatureAVX]) |
| 1079 | return CI.loadAndFinalize(Reg, RegBitWidth: 256, Opcode: X86::VMOVDQUYrm); |
| 1080 | } |
| 1081 | if (X86::VR256XRegClass.contains(Reg)) { |
| 1082 | if (STI.getFeatureBits()[X86::FeatureAVX512]) |
| 1083 | return CI.loadAndFinalize(Reg, RegBitWidth: 256, Opcode: X86::VMOVDQU32Z256rm); |
| 1084 | } |
| 1085 | if (X86::VR512RegClass.contains(Reg)) |
| 1086 | if (STI.getFeatureBits()[X86::FeatureAVX512]) |
| 1087 | return CI.loadAndFinalize(Reg, RegBitWidth: 512, Opcode: X86::VMOVDQU32Zrm); |
| 1088 | if (X86::RSTRegClass.contains(Reg)) { |
| 1089 | return CI.loadX87STAndFinalize(Reg); |
| 1090 | } |
| 1091 | if (X86::RFP32RegClass.contains(Reg) || X86::RFP64RegClass.contains(Reg) || |
| 1092 | X86::RFP80RegClass.contains(Reg)) { |
| 1093 | return CI.loadX87FPAndFinalize(Reg); |
| 1094 | } |
| 1095 | if (Reg == X86::EFLAGS) |
| 1096 | return CI.popFlagAndFinalize(); |
| 1097 | if (Reg == X86::MXCSR) |
| 1098 | return CI.loadImplicitRegAndFinalize( |
| 1099 | Opcode: STI.getFeatureBits()[X86::FeatureAVX] ? X86::VLDMXCSR : X86::LDMXCSR, |
| 1100 | Value: 0x1f80); |
| 1101 | if (Reg == X86::FPCW) |
| 1102 | return CI.loadImplicitRegAndFinalize(Opcode: X86::FLDCW16m, Value: 0x37f); |
| 1103 | if (Reg == X86::DF) |
| 1104 | return CI.loadDirectionFlagAndFinalize(); |
| 1105 | return {}; // Not yet implemented. |
| 1106 | } |
| 1107 | |
| 1108 | #ifdef __linux__ |
| 1109 | |
| 1110 | #ifdef __arm__ |
| 1111 | static constexpr const uintptr_t VAddressSpaceCeiling = 0xC0000000; |
| 1112 | #else |
| 1113 | static constexpr const uintptr_t VAddressSpaceCeiling = 0x0000800000000000; |
| 1114 | #endif |
| 1115 | |
| 1116 | void generateRoundToNearestPage(unsigned int Register, |
| 1117 | std::vector<MCInst> &GeneratedCode) { |
| 1118 | int PageSizeShift = static_cast<int>(round(x: log2(x: getpagesize()))); |
| 1119 | // Round down to the nearest page by getting rid of the least significant bits |
| 1120 | // representing location in the page. Shift right to get rid of this info and |
| 1121 | // then shift back left. |
| 1122 | GeneratedCode.push_back(x: MCInstBuilder(X86::SHR64ri) |
| 1123 | .addReg(Reg: Register) |
| 1124 | .addReg(Reg: Register) |
| 1125 | .addImm(Val: PageSizeShift)); |
| 1126 | GeneratedCode.push_back(x: MCInstBuilder(X86::SHL64ri) |
| 1127 | .addReg(Reg: Register) |
| 1128 | .addReg(Reg: Register) |
| 1129 | .addImm(Val: PageSizeShift)); |
| 1130 | } |
| 1131 | |
| 1132 | void generateGetInstructionPointer(unsigned int ResultRegister, |
| 1133 | std::vector<MCInst> &GeneratedCode) { |
| 1134 | // Use a load effective address to get the current instruction pointer and put |
| 1135 | // it into the result register. |
| 1136 | GeneratedCode.push_back(x: MCInstBuilder(X86::LEA64r) |
| 1137 | .addReg(Reg: ResultRegister) |
| 1138 | .addReg(Reg: X86::RIP) |
| 1139 | .addImm(Val: 1) |
| 1140 | .addReg(Reg: 0) |
| 1141 | .addImm(Val: 0) |
| 1142 | .addReg(Reg: 0)); |
| 1143 | } |
| 1144 | |
| 1145 | void ExegesisX86Target::generateLowerMunmap( |
| 1146 | std::vector<MCInst> &GeneratedCode) const { |
| 1147 | // Unmap starting at address zero |
| 1148 | GeneratedCode.push_back(x: loadImmediate(Reg: X86::RDI, RegBitWidth: 64, Value: APInt(64, 0))); |
| 1149 | // Get the current instruction pointer so we know where to unmap up to. |
| 1150 | generateGetInstructionPointer(ResultRegister: X86::RSI, GeneratedCode); |
| 1151 | generateRoundToNearestPage(Register: X86::RSI, GeneratedCode); |
| 1152 | // Subtract a page from the end of the unmap so we don't unmap the currently |
| 1153 | // executing section. |
| 1154 | GeneratedCode.push_back(x: MCInstBuilder(X86::SUB64ri32) |
| 1155 | .addReg(Reg: X86::RSI) |
| 1156 | .addReg(Reg: X86::RSI) |
| 1157 | .addImm(Val: getpagesize())); |
| 1158 | generateSyscall(SYS_munmap, GeneratedCode); |
| 1159 | } |
| 1160 | |
| 1161 | void ExegesisX86Target::generateUpperMunmap( |
| 1162 | std::vector<MCInst> &GeneratedCode) const { |
| 1163 | generateGetInstructionPointer(ResultRegister: X86::R8, GeneratedCode); |
| 1164 | // Load in the size of the snippet to RDI from from the argument register. |
| 1165 | GeneratedCode.push_back(x: MCInstBuilder(X86::MOV64rr) |
| 1166 | .addReg(Reg: X86::RDI) |
| 1167 | .addReg(Reg: ArgumentRegisters::CodeSize)); |
| 1168 | // Add the length of the snippet (in %RDI) to the current instruction pointer |
| 1169 | // (%R8) to get the address where we should start unmapping at. |
| 1170 | GeneratedCode.push_back(x: MCInstBuilder(X86::ADD64rr) |
| 1171 | .addReg(Reg: X86::RDI) |
| 1172 | .addReg(Reg: X86::RDI) |
| 1173 | .addReg(Reg: X86::R8)); |
| 1174 | generateRoundToNearestPage(Register: X86::RDI, GeneratedCode); |
| 1175 | // Add a one page to the start address to ensure that we're above the snippet |
| 1176 | // since the above function rounds down. |
| 1177 | GeneratedCode.push_back(x: MCInstBuilder(X86::ADD64ri32) |
| 1178 | .addReg(Reg: X86::RDI) |
| 1179 | .addReg(Reg: X86::RDI) |
| 1180 | .addImm(Val: getpagesize())); |
| 1181 | // Unmap to just one page under the ceiling of the address space. |
| 1182 | GeneratedCode.push_back(x: loadImmediate( |
| 1183 | Reg: X86::RSI, RegBitWidth: 64, Value: APInt(64, VAddressSpaceCeiling - getpagesize()))); |
| 1184 | GeneratedCode.push_back(x: MCInstBuilder(X86::SUB64rr) |
| 1185 | .addReg(Reg: X86::RSI) |
| 1186 | .addReg(Reg: X86::RSI) |
| 1187 | .addReg(Reg: X86::RDI)); |
| 1188 | generateSyscall(SYS_munmap, GeneratedCode); |
| 1189 | } |
| 1190 | |
| 1191 | std::vector<MCInst> |
| 1192 | ExegesisX86Target::generateExitSyscall(unsigned ExitCode) const { |
| 1193 | std::vector<MCInst> ExitCallCode; |
| 1194 | ExitCallCode.push_back(x: loadImmediate(Reg: X86::RDI, RegBitWidth: 64, Value: APInt(64, ExitCode))); |
| 1195 | generateSyscall(SYS_exit, GeneratedCode&: ExitCallCode); |
| 1196 | return ExitCallCode; |
| 1197 | } |
| 1198 | |
| 1199 | std::vector<MCInst> |
| 1200 | ExegesisX86Target::generateMmap(uintptr_t Address, size_t Length, |
| 1201 | uintptr_t FileDescriptorAddress) const { |
| 1202 | std::vector<MCInst> MmapCode; |
| 1203 | MmapCode.push_back(x: loadImmediate(Reg: X86::RDI, RegBitWidth: 64, Value: APInt(64, Address))); |
| 1204 | MmapCode.push_back(x: loadImmediate(Reg: X86::RSI, RegBitWidth: 64, Value: APInt(64, Length))); |
| 1205 | MmapCode.push_back( |
| 1206 | x: loadImmediate(Reg: X86::RDX, RegBitWidth: 64, Value: APInt(64, PROT_READ | PROT_WRITE))); |
| 1207 | MmapCode.push_back( |
| 1208 | x: loadImmediate(Reg: X86::R10, RegBitWidth: 64, Value: APInt(64, MAP_SHARED | MAP_FIXED_NOREPLACE))); |
| 1209 | // Copy file descriptor location from aux memory into R8 |
| 1210 | MmapCode.push_back( |
| 1211 | x: loadImmediate(Reg: X86::R8, RegBitWidth: 64, Value: APInt(64, FileDescriptorAddress))); |
| 1212 | // Dereference file descriptor into FD argument register |
| 1213 | MmapCode.push_back(x: MCInstBuilder(X86::MOV32rm) |
| 1214 | .addReg(Reg: X86::R8D) |
| 1215 | .addReg(Reg: X86::R8) |
| 1216 | .addImm(Val: 1) |
| 1217 | .addReg(Reg: 0) |
| 1218 | .addImm(Val: 0) |
| 1219 | .addReg(Reg: 0)); |
| 1220 | MmapCode.push_back(x: loadImmediate(Reg: X86::R9, RegBitWidth: 64, Value: APInt(64, 0))); |
| 1221 | generateSyscall(SYS_mmap, GeneratedCode&: MmapCode); |
| 1222 | return MmapCode; |
| 1223 | } |
| 1224 | |
| 1225 | void ExegesisX86Target::generateMmapAuxMem( |
| 1226 | std::vector<MCInst> &GeneratedCode) const { |
| 1227 | GeneratedCode.push_back( |
| 1228 | x: loadImmediate(Reg: X86::RDI, RegBitWidth: 64, Value: APInt(64, getAuxiliaryMemoryStartAddress()))); |
| 1229 | GeneratedCode.push_back(x: loadImmediate( |
| 1230 | Reg: X86::RSI, RegBitWidth: 64, Value: APInt(64, SubprocessMemory::AuxiliaryMemorySize))); |
| 1231 | GeneratedCode.push_back( |
| 1232 | x: loadImmediate(Reg: X86::RDX, RegBitWidth: 64, Value: APInt(64, PROT_READ | PROT_WRITE))); |
| 1233 | GeneratedCode.push_back( |
| 1234 | x: loadImmediate(Reg: X86::R10, RegBitWidth: 64, Value: APInt(64, MAP_SHARED | MAP_FIXED_NOREPLACE))); |
| 1235 | GeneratedCode.push_back(x: MCInstBuilder(X86::MOV64rr) |
| 1236 | .addReg(Reg: X86::R8) |
| 1237 | .addReg(Reg: ArgumentRegisters::AuxiliaryMemoryFD)); |
| 1238 | GeneratedCode.push_back(x: loadImmediate(Reg: X86::R9, RegBitWidth: 64, Value: APInt(64, 0))); |
| 1239 | generateSyscall(SYS_mmap, GeneratedCode); |
| 1240 | } |
| 1241 | |
| 1242 | void ExegesisX86Target::moveArgumentRegisters( |
| 1243 | std::vector<MCInst> &GeneratedCode) const { |
| 1244 | GeneratedCode.push_back(x: MCInstBuilder(X86::MOV64rr) |
| 1245 | .addReg(Reg: ArgumentRegisters::CodeSize) |
| 1246 | .addReg(Reg: X86::RDI)); |
| 1247 | GeneratedCode.push_back(x: MCInstBuilder(X86::MOV64rr) |
| 1248 | .addReg(Reg: ArgumentRegisters::AuxiliaryMemoryFD) |
| 1249 | .addReg(Reg: X86::RSI)); |
| 1250 | } |
| 1251 | |
| 1252 | std::vector<MCInst> ExegesisX86Target::generateMemoryInitialSetup() const { |
| 1253 | std::vector<MCInst> MemoryInitialSetupCode; |
| 1254 | moveArgumentRegisters(GeneratedCode&: MemoryInitialSetupCode); |
| 1255 | generateLowerMunmap(GeneratedCode&: MemoryInitialSetupCode); |
| 1256 | generateUpperMunmap(GeneratedCode&: MemoryInitialSetupCode); |
| 1257 | generateMmapAuxMem(GeneratedCode&: MemoryInitialSetupCode); |
| 1258 | return MemoryInitialSetupCode; |
| 1259 | } |
| 1260 | |
| 1261 | std::vector<MCInst> ExegesisX86Target::setStackRegisterToAuxMem() const { |
| 1262 | // Moves %rsp to the end of the auxiliary memory |
| 1263 | return {MCInstBuilder(X86::MOV64ri) |
| 1264 | .addReg(Reg: X86::RSP) |
| 1265 | .addImm(Val: getAuxiliaryMemoryStartAddress() + |
| 1266 | SubprocessMemory::AuxiliaryMemorySize)}; |
| 1267 | } |
| 1268 | |
| 1269 | uintptr_t ExegesisX86Target::getAuxiliaryMemoryStartAddress() const { |
| 1270 | // Return the second to last page in the virtual address space to try and |
| 1271 | // prevent interference with memory annotations in the snippet |
| 1272 | return VAddressSpaceCeiling - 2 * getpagesize(); |
| 1273 | } |
| 1274 | |
| 1275 | std::vector<MCInst> |
| 1276 | ExegesisX86Target::configurePerfCounter(long Request, bool SaveRegisters) const { |
| 1277 | std::vector<MCInst> ConfigurePerfCounterCode; |
| 1278 | if (SaveRegisters) |
| 1279 | saveSyscallRegisters(GeneratedCode&: ConfigurePerfCounterCode, ArgumentCount: 3); |
| 1280 | ConfigurePerfCounterCode.push_back( |
| 1281 | x: loadImmediate(Reg: X86::RDI, RegBitWidth: 64, Value: APInt(64, getAuxiliaryMemoryStartAddress()))); |
| 1282 | ConfigurePerfCounterCode.push_back(x: MCInstBuilder(X86::MOV32rm) |
| 1283 | .addReg(Reg: X86::EDI) |
| 1284 | .addReg(Reg: X86::RDI) |
| 1285 | .addImm(Val: 1) |
| 1286 | .addReg(Reg: 0) |
| 1287 | .addImm(Val: 0) |
| 1288 | .addReg(Reg: 0)); |
| 1289 | ConfigurePerfCounterCode.push_back( |
| 1290 | x: loadImmediate(Reg: X86::RSI, RegBitWidth: 64, Value: APInt(64, Request))); |
| 1291 | #ifdef HAVE_LIBPFM |
| 1292 | ConfigurePerfCounterCode.push_back( |
| 1293 | loadImmediate(X86::RDX, 64, APInt(64, PERF_IOC_FLAG_GROUP))); |
| 1294 | #endif // HAVE_LIBPFM |
| 1295 | generateSyscall(SYS_ioctl, GeneratedCode&: ConfigurePerfCounterCode); |
| 1296 | if (SaveRegisters) |
| 1297 | restoreSyscallRegisters(GeneratedCode&: ConfigurePerfCounterCode, ArgumentCount: 3); |
| 1298 | return ConfigurePerfCounterCode; |
| 1299 | } |
| 1300 | |
| 1301 | std::vector<MCRegister> ExegesisX86Target::getArgumentRegisters() const { |
| 1302 | return {X86::RDI, X86::RSI}; |
| 1303 | } |
| 1304 | |
| 1305 | std::vector<MCRegister> ExegesisX86Target::getRegistersNeedSaving() const { |
| 1306 | return {X86::RAX, X86::RDI, X86::RSI, X86::RCX, X86::R11}; |
| 1307 | } |
| 1308 | |
| 1309 | #endif // __linux__ |
| 1310 | |
| 1311 | // Instruction can have some variable operands, and we may want to see how |
| 1312 | // different operands affect performance. So for each operand position, |
| 1313 | // precompute all the possible choices we might care about, |
| 1314 | // and greedily generate all the possible combinations of choices. |
| 1315 | std::vector<InstructionTemplate> ExegesisX86Target::generateInstructionVariants( |
| 1316 | const Instruction &Instr, unsigned MaxConfigsPerOpcode) const { |
| 1317 | bool Exploration = false; |
| 1318 | SmallVector<SmallVector<MCOperand, 1>, 4> VariableChoices; |
| 1319 | VariableChoices.resize(N: Instr.Variables.size()); |
| 1320 | for (auto I : zip(t: Instr.Variables, u&: VariableChoices)) { |
| 1321 | const Variable &Var = std::get<0>(t&: I); |
| 1322 | SmallVectorImpl<MCOperand> &Choices = std::get<1>(t&: I); |
| 1323 | |
| 1324 | switch (Instr.getPrimaryOperand(Var).getExplicitOperandInfo().OperandType) { |
| 1325 | default: |
| 1326 | // We don't wish to explicitly explore this variable. |
| 1327 | Choices.emplace_back(); // But add invalid MCOperand to simplify logic. |
| 1328 | continue; |
| 1329 | case X86::OperandType::OPERAND_COND_CODE: { |
| 1330 | Exploration = true; |
| 1331 | auto CondCodes = enum_seq_inclusive(Begin: X86::CondCode::COND_O, |
| 1332 | End: X86::CondCode::LAST_VALID_COND, |
| 1333 | force_iteration_on_noniterable_enum); |
| 1334 | Choices.reserve(N: CondCodes.size()); |
| 1335 | for (int CondCode : CondCodes) |
| 1336 | Choices.emplace_back(Args: MCOperand::createImm(Val: CondCode)); |
| 1337 | break; |
| 1338 | } |
| 1339 | } |
| 1340 | } |
| 1341 | |
| 1342 | // If we don't wish to explore any variables, defer to the baseline method. |
| 1343 | if (!Exploration) |
| 1344 | return ExegesisTarget::generateInstructionVariants(Instr, |
| 1345 | MaxConfigsPerOpcode); |
| 1346 | |
| 1347 | std::vector<InstructionTemplate> Variants; |
| 1348 | size_t NumVariants; |
| 1349 | CombinationGenerator<MCOperand, decltype(VariableChoices)::value_type, 4> G( |
| 1350 | VariableChoices); |
| 1351 | |
| 1352 | // How many operand combinations can we produce, within the limit? |
| 1353 | NumVariants = std::min(a: G.numCombinations(), b: (size_t)MaxConfigsPerOpcode); |
| 1354 | // And actually produce all the wanted operand combinations. |
| 1355 | Variants.reserve(n: NumVariants); |
| 1356 | G.generate(Callback: [&](ArrayRef<MCOperand> State) -> bool { |
| 1357 | Variants.emplace_back(args: &Instr); |
| 1358 | Variants.back().setVariableValues(State); |
| 1359 | // Did we run out of space for variants? |
| 1360 | return Variants.size() >= NumVariants; |
| 1361 | }); |
| 1362 | |
| 1363 | assert(Variants.size() == NumVariants && |
| 1364 | Variants.size() <= MaxConfigsPerOpcode && |
| 1365 | "Should not produce too many variants" ); |
| 1366 | return Variants; |
| 1367 | } |
| 1368 | |
| 1369 | static ExegesisTarget *getTheExegesisX86Target() { |
| 1370 | static ExegesisX86Target Target; |
| 1371 | return &Target; |
| 1372 | } |
| 1373 | |
| 1374 | void InitializeX86ExegesisTarget() { |
| 1375 | ExegesisTarget::registerTarget(T: getTheExegesisX86Target()); |
| 1376 | } |
| 1377 | |
| 1378 | } // namespace exegesis |
| 1379 | } // namespace llvm |
| 1380 | |