| 1 | //===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "SystemZTargetMachine.h" |
| 10 | #include "MCTargetDesc/SystemZMCTargetDesc.h" |
| 11 | #include "SystemZ.h" |
| 12 | #include "SystemZMachineFunctionInfo.h" |
| 13 | #include "SystemZMachineScheduler.h" |
| 14 | #include "SystemZTargetObjectFile.h" |
| 15 | #include "SystemZTargetTransformInfo.h" |
| 16 | #include "TargetInfo/SystemZTargetInfo.h" |
| 17 | #include "llvm/ADT/StringRef.h" |
| 18 | #include "llvm/Analysis/TargetTransformInfo.h" |
| 19 | #include "llvm/CodeGen/Passes.h" |
| 20 | #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" |
| 21 | #include "llvm/CodeGen/TargetPassConfig.h" |
| 22 | #include "llvm/IR/DataLayout.h" |
| 23 | #include "llvm/MC/TargetRegistry.h" |
| 24 | #include "llvm/Support/CodeGen.h" |
| 25 | #include "llvm/Support/Compiler.h" |
| 26 | #include "llvm/Target/TargetLoweringObjectFile.h" |
| 27 | #include "llvm/Transforms/Scalar.h" |
| 28 | #include <memory> |
| 29 | #include <optional> |
| 30 | #include <string> |
| 31 | |
| 32 | using namespace llvm; |
| 33 | |
| 34 | static cl::opt<bool> EnableMachineCombinerPass( |
| 35 | "systemz-machine-combiner" , |
| 36 | cl::desc("Enable the machine combiner pass" ), |
| 37 | cl::init(Val: true), cl::Hidden); |
| 38 | |
| 39 | // NOLINTNEXTLINE(readability-identifier-naming) |
| 40 | extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void |
| 41 | LLVMInitializeSystemZTarget() { |
| 42 | // Register the target. |
| 43 | RegisterTargetMachine<SystemZTargetMachine> X(getTheSystemZTarget()); |
| 44 | auto &PR = *PassRegistry::getPassRegistry(); |
| 45 | initializeSystemZAsmPrinterPass(PR); |
| 46 | initializeSystemZElimComparePass(PR); |
| 47 | initializeSystemZShortenInstPass(PR); |
| 48 | initializeSystemZLongBranchPass(PR); |
| 49 | initializeSystemZLDCleanupPass(PR); |
| 50 | initializeSystemZShortenInstPass(PR); |
| 51 | initializeSystemZPostRewritePass(PR); |
| 52 | initializeSystemZTDCPassPass(PR); |
| 53 | initializeSystemZDAGToDAGISelLegacyPass(PR); |
| 54 | initializeSystemZCopyPhysRegsPass(PR); |
| 55 | } |
| 56 | |
| 57 | static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { |
| 58 | if (TT.isOSzOS()) |
| 59 | return std::make_unique<TargetLoweringObjectFileGOFF>(); |
| 60 | |
| 61 | // Note: Some times run with -triple s390x-unknown. |
| 62 | // In this case, default to ELF unless z/OS specifically provided. |
| 63 | return std::make_unique<SystemZELFTargetObjectFile>(); |
| 64 | } |
| 65 | |
| 66 | static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) { |
| 67 | // Static code is suitable for use in a dynamic executable; there is no |
| 68 | // separate DynamicNoPIC model. |
| 69 | if (!RM || *RM == Reloc::DynamicNoPIC) |
| 70 | return Reloc::Static; |
| 71 | return *RM; |
| 72 | } |
| 73 | |
| 74 | // For SystemZ we define the models as follows: |
| 75 | // |
| 76 | // Small: BRASL can call any function and will use a stub if necessary. |
| 77 | // Locally-binding symbols will always be in range of LARL. |
| 78 | // |
| 79 | // Medium: BRASL can call any function and will use a stub if necessary. |
| 80 | // GOT slots and locally-defined text will always be in range |
| 81 | // of LARL, but other symbols might not be. |
| 82 | // |
| 83 | // Large: Equivalent to Medium for now. |
| 84 | // |
| 85 | // Kernel: Equivalent to Medium for now. |
| 86 | // |
| 87 | // This means that any PIC module smaller than 4GB meets the |
| 88 | // requirements of Small, so Small seems like the best default there. |
| 89 | // |
| 90 | // All symbols bind locally in a non-PIC module, so the choice is less |
| 91 | // obvious. There are two cases: |
| 92 | // |
| 93 | // - When creating an executable, PLTs and copy relocations allow |
| 94 | // us to treat external symbols as part of the executable. |
| 95 | // Any executable smaller than 4GB meets the requirements of Small, |
| 96 | // so that seems like the best default. |
| 97 | // |
| 98 | // - When creating JIT code, stubs will be in range of BRASL if the |
| 99 | // image is less than 4GB in size. GOT entries will likewise be |
| 100 | // in range of LARL. However, the JIT environment has no equivalent |
| 101 | // of copy relocs, so locally-binding data symbols might not be in |
| 102 | // the range of LARL. We need the Medium model in that case. |
| 103 | static CodeModel::Model |
| 104 | getEffectiveSystemZCodeModel(std::optional<CodeModel::Model> CM, |
| 105 | Reloc::Model RM, bool JIT) { |
| 106 | if (CM) { |
| 107 | if (*CM == CodeModel::Tiny) |
| 108 | report_fatal_error(reason: "Target does not support the tiny CodeModel" , gen_crash_diag: false); |
| 109 | if (*CM == CodeModel::Kernel) |
| 110 | report_fatal_error(reason: "Target does not support the kernel CodeModel" , gen_crash_diag: false); |
| 111 | return *CM; |
| 112 | } |
| 113 | if (JIT) |
| 114 | return RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium; |
| 115 | return CodeModel::Small; |
| 116 | } |
| 117 | |
| 118 | SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT, |
| 119 | StringRef CPU, StringRef FS, |
| 120 | const TargetOptions &Options, |
| 121 | std::optional<Reloc::Model> RM, |
| 122 | std::optional<CodeModel::Model> CM, |
| 123 | CodeGenOptLevel OL, bool JIT) |
| 124 | : CodeGenTargetMachineImpl( |
| 125 | T, TT.computeDataLayout(), TT, CPU, FS, Options, |
| 126 | getEffectiveRelocModel(RM), |
| 127 | getEffectiveSystemZCodeModel(CM, RM: getEffectiveRelocModel(RM), JIT), |
| 128 | OL), |
| 129 | TLOF(createTLOF(TT: getTargetTriple())) { |
| 130 | initAsmInfo(); |
| 131 | } |
| 132 | |
| 133 | SystemZTargetMachine::~SystemZTargetMachine() = default; |
| 134 | |
| 135 | const SystemZSubtarget * |
| 136 | SystemZTargetMachine::getSubtargetImpl(const Function &F) const { |
| 137 | Attribute CPUAttr = F.getFnAttribute(Kind: "target-cpu" ); |
| 138 | Attribute TuneAttr = F.getFnAttribute(Kind: "tune-cpu" ); |
| 139 | Attribute FSAttr = F.getFnAttribute(Kind: "target-features" ); |
| 140 | |
| 141 | std::string CPU = |
| 142 | CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; |
| 143 | std::string TuneCPU = |
| 144 | TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU; |
| 145 | std::string FS = |
| 146 | FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; |
| 147 | |
| 148 | // FIXME: This is related to the code below to reset the target options, |
| 149 | // we need to know whether the soft float and backchain flags are set on the |
| 150 | // function, so we can enable them as subtarget features. |
| 151 | bool SoftFloat = F.getFnAttribute(Kind: "use-soft-float" ).getValueAsBool(); |
| 152 | if (SoftFloat) |
| 153 | FS += FS.empty() ? "+soft-float" : ",+soft-float" ; |
| 154 | bool BackChain = F.hasFnAttribute(Kind: "backchain" ); |
| 155 | if (BackChain) |
| 156 | FS += FS.empty() ? "+backchain" : ",+backchain" ; |
| 157 | |
| 158 | auto &I = SubtargetMap[CPU + TuneCPU + FS]; |
| 159 | if (!I) { |
| 160 | // This needs to be done before we create a new subtarget since any |
| 161 | // creation will depend on the TM and the code generation flags on the |
| 162 | // function that reside in TargetOptions. |
| 163 | resetTargetOptions(F); |
| 164 | I = std::make_unique<SystemZSubtarget>(args: TargetTriple, args&: CPU, args&: TuneCPU, args&: FS, |
| 165 | args: *this); |
| 166 | } |
| 167 | |
| 168 | return I.get(); |
| 169 | } |
| 170 | |
| 171 | ScheduleDAGInstrs * |
| 172 | SystemZTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const { |
| 173 | return createSchedPostRA<SystemZPostRASchedStrategy>(C); |
| 174 | } |
| 175 | |
| 176 | namespace { |
| 177 | |
| 178 | /// SystemZ Code Generator Pass Configuration Options. |
| 179 | class SystemZPassConfig : public TargetPassConfig { |
| 180 | public: |
| 181 | SystemZPassConfig(SystemZTargetMachine &TM, PassManagerBase &PM) |
| 182 | : TargetPassConfig(TM, PM) {} |
| 183 | |
| 184 | SystemZTargetMachine &getSystemZTargetMachine() const { |
| 185 | return getTM<SystemZTargetMachine>(); |
| 186 | } |
| 187 | |
| 188 | void addIRPasses() override; |
| 189 | bool addInstSelector() override; |
| 190 | bool addILPOpts() override; |
| 191 | void addPreRegAlloc() override; |
| 192 | void addPostRewrite() override; |
| 193 | void addPostRegAlloc() override; |
| 194 | void addPreSched2() override; |
| 195 | void addPreEmitPass() override; |
| 196 | }; |
| 197 | |
| 198 | } // end anonymous namespace |
| 199 | |
| 200 | void SystemZPassConfig::addIRPasses() { |
| 201 | if (getOptLevel() != CodeGenOptLevel::None) { |
| 202 | addPass(P: createSystemZTDCPass()); |
| 203 | addPass(P: createLoopDataPrefetchPass()); |
| 204 | } |
| 205 | |
| 206 | addPass(P: createAtomicExpandLegacyPass()); |
| 207 | |
| 208 | TargetPassConfig::addIRPasses(); |
| 209 | } |
| 210 | |
| 211 | bool SystemZPassConfig::addInstSelector() { |
| 212 | addPass(P: createSystemZISelDag(TM&: getSystemZTargetMachine(), OptLevel: getOptLevel())); |
| 213 | |
| 214 | if (getOptLevel() != CodeGenOptLevel::None) |
| 215 | addPass(P: createSystemZLDCleanupPass(TM&: getSystemZTargetMachine())); |
| 216 | |
| 217 | return false; |
| 218 | } |
| 219 | |
| 220 | bool SystemZPassConfig::addILPOpts() { |
| 221 | addPass(PassID: &EarlyIfConverterLegacyID); |
| 222 | |
| 223 | if (EnableMachineCombinerPass) |
| 224 | addPass(PassID: &MachineCombinerID); |
| 225 | |
| 226 | return true; |
| 227 | } |
| 228 | |
| 229 | void SystemZPassConfig::addPreRegAlloc() { |
| 230 | addPass(P: createSystemZCopyPhysRegsPass(TM&: getSystemZTargetMachine())); |
| 231 | } |
| 232 | |
| 233 | void SystemZPassConfig::addPostRewrite() { |
| 234 | addPass(P: createSystemZPostRewritePass(TM&: getSystemZTargetMachine())); |
| 235 | } |
| 236 | |
| 237 | void SystemZPassConfig::addPostRegAlloc() { |
| 238 | // PostRewrite needs to be run at -O0 also (in which case addPostRewrite() |
| 239 | // is not called). |
| 240 | if (getOptLevel() == CodeGenOptLevel::None) |
| 241 | addPass(P: createSystemZPostRewritePass(TM&: getSystemZTargetMachine())); |
| 242 | } |
| 243 | |
| 244 | void SystemZPassConfig::addPreSched2() { |
| 245 | if (getOptLevel() != CodeGenOptLevel::None) |
| 246 | addPass(PassID: &IfConverterID); |
| 247 | } |
| 248 | |
| 249 | void SystemZPassConfig::addPreEmitPass() { |
| 250 | // Do instruction shortening before compare elimination because some |
| 251 | // vector instructions will be shortened into opcodes that compare |
| 252 | // elimination recognizes. |
| 253 | if (getOptLevel() != CodeGenOptLevel::None) |
| 254 | addPass(P: createSystemZShortenInstPass(TM&: getSystemZTargetMachine())); |
| 255 | |
| 256 | // We eliminate comparisons here rather than earlier because some |
| 257 | // transformations can change the set of available CC values and we |
| 258 | // generally want those transformations to have priority. This is |
| 259 | // especially true in the commonest case where the result of the comparison |
| 260 | // is used by a single in-range branch instruction, since we will then |
| 261 | // be able to fuse the compare and the branch instead. |
| 262 | // |
| 263 | // For example, two-address NILF can sometimes be converted into |
| 264 | // three-address RISBLG. NILF produces a CC value that indicates whether |
| 265 | // the low word is zero, but RISBLG does not modify CC at all. On the |
| 266 | // other hand, 64-bit ANDs like NILL can sometimes be converted to RISBG. |
| 267 | // The CC value produced by NILL isn't useful for our purposes, but the |
| 268 | // value produced by RISBG can be used for any comparison with zero |
| 269 | // (not just equality). So there are some transformations that lose |
| 270 | // CC values (while still being worthwhile) and others that happen to make |
| 271 | // the CC result more useful than it was originally. |
| 272 | // |
| 273 | // Another reason is that we only want to use BRANCH ON COUNT in cases |
| 274 | // where we know that the count register is not going to be spilled. |
| 275 | // |
| 276 | // Doing it so late makes it more likely that a register will be reused |
| 277 | // between the comparison and the branch, but it isn't clear whether |
| 278 | // preventing that would be a win or not. |
| 279 | if (getOptLevel() != CodeGenOptLevel::None) |
| 280 | addPass(P: createSystemZElimComparePass(TM&: getSystemZTargetMachine())); |
| 281 | addPass(P: createSystemZLongBranchPass(TM&: getSystemZTargetMachine())); |
| 282 | |
| 283 | // Do final scheduling after all other optimizations, to get an |
| 284 | // optimal input for the decoder (branch relaxation must happen |
| 285 | // after block placement). |
| 286 | if (getOptLevel() != CodeGenOptLevel::None) |
| 287 | addPass(PassID: &PostMachineSchedulerID); |
| 288 | } |
| 289 | |
| 290 | TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) { |
| 291 | return new SystemZPassConfig(*this, PM); |
| 292 | } |
| 293 | |
| 294 | TargetTransformInfo |
| 295 | SystemZTargetMachine::getTargetTransformInfo(const Function &F) const { |
| 296 | return TargetTransformInfo(std::make_unique<SystemZTTIImpl>(args: this, args: F)); |
| 297 | } |
| 298 | |
| 299 | MachineFunctionInfo *SystemZTargetMachine::createMachineFunctionInfo( |
| 300 | BumpPtrAllocator &Allocator, const Function &F, |
| 301 | const TargetSubtargetInfo *STI) const { |
| 302 | return SystemZMachineFunctionInfo::create<SystemZMachineFunctionInfo>( |
| 303 | Allocator, F, STI); |
| 304 | } |
| 305 | |