| 1 | //===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "SystemZTargetMachine.h" |
| 10 | #include "MCTargetDesc/SystemZMCTargetDesc.h" |
| 11 | #include "SystemZ.h" |
| 12 | #include "SystemZMachineFunctionInfo.h" |
| 13 | #include "SystemZMachineScheduler.h" |
| 14 | #include "SystemZTargetObjectFile.h" |
| 15 | #include "SystemZTargetTransformInfo.h" |
| 16 | #include "TargetInfo/SystemZTargetInfo.h" |
| 17 | #include "llvm/ADT/StringRef.h" |
| 18 | #include "llvm/Analysis/TargetTransformInfo.h" |
| 19 | #include "llvm/CodeGen/Passes.h" |
| 20 | #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" |
| 21 | #include "llvm/CodeGen/TargetPassConfig.h" |
| 22 | #include "llvm/IR/DataLayout.h" |
| 23 | #include "llvm/MC/TargetRegistry.h" |
| 24 | #include "llvm/Support/CodeGen.h" |
| 25 | #include "llvm/Support/Compiler.h" |
| 26 | #include "llvm/Target/TargetLoweringObjectFile.h" |
| 27 | #include "llvm/Transforms/Scalar.h" |
| 28 | #include <memory> |
| 29 | #include <optional> |
| 30 | #include <string> |
| 31 | |
| 32 | using namespace llvm; |
| 33 | |
| 34 | static cl::opt<bool> EnableMachineCombinerPass( |
| 35 | "systemz-machine-combiner" , |
| 36 | cl::desc("Enable the machine combiner pass" ), |
| 37 | cl::init(Val: true), cl::Hidden); |
| 38 | |
| 39 | // NOLINTNEXTLINE(readability-identifier-naming) |
| 40 | extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void |
| 41 | LLVMInitializeSystemZTarget() { |
| 42 | // Register the target. |
| 43 | RegisterTargetMachine<SystemZTargetMachine> X(getTheSystemZTarget()); |
| 44 | auto &PR = *PassRegistry::getPassRegistry(); |
| 45 | initializeSystemZAsmPrinterPass(PR); |
| 46 | initializeSystemZElimComparePass(PR); |
| 47 | initializeSystemZShortenInstPass(PR); |
| 48 | initializeSystemZLongBranchPass(PR); |
| 49 | initializeSystemZLDCleanupPass(PR); |
| 50 | initializeSystemZShortenInstPass(PR); |
| 51 | initializeSystemZPostRewritePass(PR); |
| 52 | initializeSystemZTDCPassPass(PR); |
| 53 | initializeSystemZDAGToDAGISelLegacyPass(PR); |
| 54 | initializeSystemZCopyPhysRegsPass(PR); |
| 55 | } |
| 56 | |
| 57 | static std::string computeDataLayout(const Triple &TT) { |
| 58 | std::string Ret; |
| 59 | |
| 60 | // Big endian. |
| 61 | Ret += "E" ; |
| 62 | |
| 63 | // Data mangling. |
| 64 | Ret += DataLayout::getManglingComponent(T: TT); |
| 65 | |
| 66 | // Special features for z/OS. |
| 67 | if (TT.isOSzOS()) { |
| 68 | if (TT.isArch64Bit()) { |
| 69 | // Custom address space for ptr32. |
| 70 | Ret += "-p1:32:32" ; |
| 71 | } |
| 72 | } |
| 73 | |
| 74 | // Make sure that global data has at least 16 bits of alignment by |
| 75 | // default, so that we can refer to it using LARL. We don't have any |
| 76 | // special requirements for stack variables though. |
| 77 | Ret += "-i1:8:16-i8:8:16" ; |
| 78 | |
| 79 | // 64-bit integers are naturally aligned. |
| 80 | Ret += "-i64:64" ; |
| 81 | |
| 82 | // 128-bit floats are aligned only to 64 bits. |
| 83 | Ret += "-f128:64" ; |
| 84 | |
| 85 | // The DataLayout string always holds a vector alignment of 64 bits, see |
| 86 | // comment in clang/lib/Basic/Targets/SystemZ.h. |
| 87 | Ret += "-v128:64" ; |
| 88 | |
| 89 | // We prefer 16 bits of aligned for all globals; see above. |
| 90 | Ret += "-a:8:16" ; |
| 91 | |
| 92 | // Integer registers are 32 or 64 bits. |
| 93 | Ret += "-n32:64" ; |
| 94 | |
| 95 | return Ret; |
| 96 | } |
| 97 | |
| 98 | static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { |
| 99 | if (TT.isOSzOS()) |
| 100 | return std::make_unique<TargetLoweringObjectFileGOFF>(); |
| 101 | |
| 102 | // Note: Some times run with -triple s390x-unknown. |
| 103 | // In this case, default to ELF unless z/OS specifically provided. |
| 104 | return std::make_unique<SystemZELFTargetObjectFile>(); |
| 105 | } |
| 106 | |
| 107 | static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) { |
| 108 | // Static code is suitable for use in a dynamic executable; there is no |
| 109 | // separate DynamicNoPIC model. |
| 110 | if (!RM || *RM == Reloc::DynamicNoPIC) |
| 111 | return Reloc::Static; |
| 112 | return *RM; |
| 113 | } |
| 114 | |
| 115 | // For SystemZ we define the models as follows: |
| 116 | // |
| 117 | // Small: BRASL can call any function and will use a stub if necessary. |
| 118 | // Locally-binding symbols will always be in range of LARL. |
| 119 | // |
| 120 | // Medium: BRASL can call any function and will use a stub if necessary. |
| 121 | // GOT slots and locally-defined text will always be in range |
| 122 | // of LARL, but other symbols might not be. |
| 123 | // |
| 124 | // Large: Equivalent to Medium for now. |
| 125 | // |
| 126 | // Kernel: Equivalent to Medium for now. |
| 127 | // |
| 128 | // This means that any PIC module smaller than 4GB meets the |
| 129 | // requirements of Small, so Small seems like the best default there. |
| 130 | // |
| 131 | // All symbols bind locally in a non-PIC module, so the choice is less |
| 132 | // obvious. There are two cases: |
| 133 | // |
| 134 | // - When creating an executable, PLTs and copy relocations allow |
| 135 | // us to treat external symbols as part of the executable. |
| 136 | // Any executable smaller than 4GB meets the requirements of Small, |
| 137 | // so that seems like the best default. |
| 138 | // |
| 139 | // - When creating JIT code, stubs will be in range of BRASL if the |
| 140 | // image is less than 4GB in size. GOT entries will likewise be |
| 141 | // in range of LARL. However, the JIT environment has no equivalent |
| 142 | // of copy relocs, so locally-binding data symbols might not be in |
| 143 | // the range of LARL. We need the Medium model in that case. |
| 144 | static CodeModel::Model |
| 145 | getEffectiveSystemZCodeModel(std::optional<CodeModel::Model> CM, |
| 146 | Reloc::Model RM, bool JIT) { |
| 147 | if (CM) { |
| 148 | if (*CM == CodeModel::Tiny) |
| 149 | report_fatal_error(reason: "Target does not support the tiny CodeModel" , gen_crash_diag: false); |
| 150 | if (*CM == CodeModel::Kernel) |
| 151 | report_fatal_error(reason: "Target does not support the kernel CodeModel" , gen_crash_diag: false); |
| 152 | return *CM; |
| 153 | } |
| 154 | if (JIT) |
| 155 | return RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium; |
| 156 | return CodeModel::Small; |
| 157 | } |
| 158 | |
| 159 | SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT, |
| 160 | StringRef CPU, StringRef FS, |
| 161 | const TargetOptions &Options, |
| 162 | std::optional<Reloc::Model> RM, |
| 163 | std::optional<CodeModel::Model> CM, |
| 164 | CodeGenOptLevel OL, bool JIT) |
| 165 | : CodeGenTargetMachineImpl( |
| 166 | T, computeDataLayout(TT), TT, CPU, FS, Options, |
| 167 | getEffectiveRelocModel(RM), |
| 168 | getEffectiveSystemZCodeModel(CM, RM: getEffectiveRelocModel(RM), JIT), |
| 169 | OL), |
| 170 | TLOF(createTLOF(TT: getTargetTriple())) { |
| 171 | initAsmInfo(); |
| 172 | } |
| 173 | |
| 174 | SystemZTargetMachine::~SystemZTargetMachine() = default; |
| 175 | |
| 176 | const SystemZSubtarget * |
| 177 | SystemZTargetMachine::getSubtargetImpl(const Function &F) const { |
| 178 | Attribute CPUAttr = F.getFnAttribute(Kind: "target-cpu" ); |
| 179 | Attribute TuneAttr = F.getFnAttribute(Kind: "tune-cpu" ); |
| 180 | Attribute FSAttr = F.getFnAttribute(Kind: "target-features" ); |
| 181 | |
| 182 | std::string CPU = |
| 183 | CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; |
| 184 | std::string TuneCPU = |
| 185 | TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU; |
| 186 | std::string FS = |
| 187 | FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; |
| 188 | |
| 189 | // FIXME: This is related to the code below to reset the target options, |
| 190 | // we need to know whether the soft float and backchain flags are set on the |
| 191 | // function, so we can enable them as subtarget features. |
| 192 | bool SoftFloat = F.getFnAttribute(Kind: "use-soft-float" ).getValueAsBool(); |
| 193 | if (SoftFloat) |
| 194 | FS += FS.empty() ? "+soft-float" : ",+soft-float" ; |
| 195 | bool BackChain = F.hasFnAttribute(Kind: "backchain" ); |
| 196 | if (BackChain) |
| 197 | FS += FS.empty() ? "+backchain" : ",+backchain" ; |
| 198 | |
| 199 | auto &I = SubtargetMap[CPU + TuneCPU + FS]; |
| 200 | if (!I) { |
| 201 | // This needs to be done before we create a new subtarget since any |
| 202 | // creation will depend on the TM and the code generation flags on the |
| 203 | // function that reside in TargetOptions. |
| 204 | resetTargetOptions(F); |
| 205 | I = std::make_unique<SystemZSubtarget>(args: TargetTriple, args&: CPU, args&: TuneCPU, args&: FS, |
| 206 | args: *this); |
| 207 | } |
| 208 | |
| 209 | return I.get(); |
| 210 | } |
| 211 | |
| 212 | ScheduleDAGInstrs * |
| 213 | SystemZTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const { |
| 214 | return createSchedPostRA<SystemZPostRASchedStrategy>(C); |
| 215 | } |
| 216 | |
| 217 | namespace { |
| 218 | |
| 219 | /// SystemZ Code Generator Pass Configuration Options. |
| 220 | class SystemZPassConfig : public TargetPassConfig { |
| 221 | public: |
| 222 | SystemZPassConfig(SystemZTargetMachine &TM, PassManagerBase &PM) |
| 223 | : TargetPassConfig(TM, PM) {} |
| 224 | |
| 225 | SystemZTargetMachine &getSystemZTargetMachine() const { |
| 226 | return getTM<SystemZTargetMachine>(); |
| 227 | } |
| 228 | |
| 229 | void addIRPasses() override; |
| 230 | bool addInstSelector() override; |
| 231 | bool addILPOpts() override; |
| 232 | void addPreRegAlloc() override; |
| 233 | void addPostRewrite() override; |
| 234 | void addPostRegAlloc() override; |
| 235 | void addPreSched2() override; |
| 236 | void addPreEmitPass() override; |
| 237 | }; |
| 238 | |
| 239 | } // end anonymous namespace |
| 240 | |
| 241 | void SystemZPassConfig::addIRPasses() { |
| 242 | if (getOptLevel() != CodeGenOptLevel::None) { |
| 243 | addPass(P: createSystemZTDCPass()); |
| 244 | addPass(P: createLoopDataPrefetchPass()); |
| 245 | } |
| 246 | |
| 247 | addPass(P: createAtomicExpandLegacyPass()); |
| 248 | |
| 249 | TargetPassConfig::addIRPasses(); |
| 250 | } |
| 251 | |
| 252 | bool SystemZPassConfig::addInstSelector() { |
| 253 | addPass(P: createSystemZISelDag(TM&: getSystemZTargetMachine(), OptLevel: getOptLevel())); |
| 254 | |
| 255 | if (getOptLevel() != CodeGenOptLevel::None) |
| 256 | addPass(P: createSystemZLDCleanupPass(TM&: getSystemZTargetMachine())); |
| 257 | |
| 258 | return false; |
| 259 | } |
| 260 | |
| 261 | bool SystemZPassConfig::addILPOpts() { |
| 262 | addPass(PassID: &EarlyIfConverterLegacyID); |
| 263 | |
| 264 | if (EnableMachineCombinerPass) |
| 265 | addPass(PassID: &MachineCombinerID); |
| 266 | |
| 267 | return true; |
| 268 | } |
| 269 | |
| 270 | void SystemZPassConfig::addPreRegAlloc() { |
| 271 | addPass(P: createSystemZCopyPhysRegsPass(TM&: getSystemZTargetMachine())); |
| 272 | } |
| 273 | |
| 274 | void SystemZPassConfig::addPostRewrite() { |
| 275 | addPass(P: createSystemZPostRewritePass(TM&: getSystemZTargetMachine())); |
| 276 | } |
| 277 | |
| 278 | void SystemZPassConfig::addPostRegAlloc() { |
| 279 | // PostRewrite needs to be run at -O0 also (in which case addPostRewrite() |
| 280 | // is not called). |
| 281 | if (getOptLevel() == CodeGenOptLevel::None) |
| 282 | addPass(P: createSystemZPostRewritePass(TM&: getSystemZTargetMachine())); |
| 283 | } |
| 284 | |
| 285 | void SystemZPassConfig::addPreSched2() { |
| 286 | if (getOptLevel() != CodeGenOptLevel::None) |
| 287 | addPass(PassID: &IfConverterID); |
| 288 | } |
| 289 | |
| 290 | void SystemZPassConfig::addPreEmitPass() { |
| 291 | // Do instruction shortening before compare elimination because some |
| 292 | // vector instructions will be shortened into opcodes that compare |
| 293 | // elimination recognizes. |
| 294 | if (getOptLevel() != CodeGenOptLevel::None) |
| 295 | addPass(P: createSystemZShortenInstPass(TM&: getSystemZTargetMachine())); |
| 296 | |
| 297 | // We eliminate comparisons here rather than earlier because some |
| 298 | // transformations can change the set of available CC values and we |
| 299 | // generally want those transformations to have priority. This is |
| 300 | // especially true in the commonest case where the result of the comparison |
| 301 | // is used by a single in-range branch instruction, since we will then |
| 302 | // be able to fuse the compare and the branch instead. |
| 303 | // |
| 304 | // For example, two-address NILF can sometimes be converted into |
| 305 | // three-address RISBLG. NILF produces a CC value that indicates whether |
| 306 | // the low word is zero, but RISBLG does not modify CC at all. On the |
| 307 | // other hand, 64-bit ANDs like NILL can sometimes be converted to RISBG. |
| 308 | // The CC value produced by NILL isn't useful for our purposes, but the |
| 309 | // value produced by RISBG can be used for any comparison with zero |
| 310 | // (not just equality). So there are some transformations that lose |
| 311 | // CC values (while still being worthwhile) and others that happen to make |
| 312 | // the CC result more useful than it was originally. |
| 313 | // |
| 314 | // Another reason is that we only want to use BRANCH ON COUNT in cases |
| 315 | // where we know that the count register is not going to be spilled. |
| 316 | // |
| 317 | // Doing it so late makes it more likely that a register will be reused |
| 318 | // between the comparison and the branch, but it isn't clear whether |
| 319 | // preventing that would be a win or not. |
| 320 | if (getOptLevel() != CodeGenOptLevel::None) |
| 321 | addPass(P: createSystemZElimComparePass(TM&: getSystemZTargetMachine())); |
| 322 | addPass(P: createSystemZLongBranchPass(TM&: getSystemZTargetMachine())); |
| 323 | |
| 324 | // Do final scheduling after all other optimizations, to get an |
| 325 | // optimal input for the decoder (branch relaxation must happen |
| 326 | // after block placement). |
| 327 | if (getOptLevel() != CodeGenOptLevel::None) |
| 328 | addPass(PassID: &PostMachineSchedulerID); |
| 329 | } |
| 330 | |
| 331 | TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) { |
| 332 | return new SystemZPassConfig(*this, PM); |
| 333 | } |
| 334 | |
| 335 | TargetTransformInfo |
| 336 | SystemZTargetMachine::getTargetTransformInfo(const Function &F) const { |
| 337 | return TargetTransformInfo(std::make_unique<SystemZTTIImpl>(args: this, args: F)); |
| 338 | } |
| 339 | |
| 340 | MachineFunctionInfo *SystemZTargetMachine::createMachineFunctionInfo( |
| 341 | BumpPtrAllocator &Allocator, const Function &F, |
| 342 | const TargetSubtargetInfo *STI) const { |
| 343 | return SystemZMachineFunctionInfo::create<SystemZMachineFunctionInfo>( |
| 344 | Allocator, F, STI); |
| 345 | } |
| 346 | |