1 | //===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "SystemZTargetMachine.h" |
10 | #include "MCTargetDesc/SystemZMCTargetDesc.h" |
11 | #include "SystemZ.h" |
12 | #include "SystemZMachineFunctionInfo.h" |
13 | #include "SystemZMachineScheduler.h" |
14 | #include "SystemZTargetObjectFile.h" |
15 | #include "SystemZTargetTransformInfo.h" |
16 | #include "TargetInfo/SystemZTargetInfo.h" |
17 | #include "llvm/ADT/StringRef.h" |
18 | #include "llvm/Analysis/TargetTransformInfo.h" |
19 | #include "llvm/CodeGen/Passes.h" |
20 | #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" |
21 | #include "llvm/CodeGen/TargetPassConfig.h" |
22 | #include "llvm/IR/DataLayout.h" |
23 | #include "llvm/MC/TargetRegistry.h" |
24 | #include "llvm/Support/CodeGen.h" |
25 | #include "llvm/Target/TargetLoweringObjectFile.h" |
26 | #include "llvm/Transforms/Scalar.h" |
27 | #include <memory> |
28 | #include <optional> |
29 | #include <string> |
30 | |
31 | using namespace llvm; |
32 | |
33 | static cl::opt<bool> EnableMachineCombinerPass( |
34 | "systemz-machine-combiner" , |
35 | cl::desc("Enable the machine combiner pass" ), |
36 | cl::init(Val: true), cl::Hidden); |
37 | |
38 | // NOLINTNEXTLINE(readability-identifier-naming) |
39 | extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTarget() { |
40 | // Register the target. |
41 | RegisterTargetMachine<SystemZTargetMachine> X(getTheSystemZTarget()); |
42 | auto &PR = *PassRegistry::getPassRegistry(); |
43 | initializeSystemZElimComparePass(PR); |
44 | initializeSystemZShortenInstPass(PR); |
45 | initializeSystemZLongBranchPass(PR); |
46 | initializeSystemZLDCleanupPass(PR); |
47 | initializeSystemZShortenInstPass(PR); |
48 | initializeSystemZPostRewritePass(PR); |
49 | initializeSystemZTDCPassPass(PR); |
50 | initializeSystemZDAGToDAGISelLegacyPass(PR); |
51 | } |
52 | |
53 | static std::string computeDataLayout(const Triple &TT) { |
54 | std::string Ret; |
55 | |
56 | // Big endian. |
57 | Ret += "E" ; |
58 | |
59 | // Data mangling. |
60 | Ret += DataLayout::getManglingComponent(T: TT); |
61 | |
62 | // Make sure that global data has at least 16 bits of alignment by |
63 | // default, so that we can refer to it using LARL. We don't have any |
64 | // special requirements for stack variables though. |
65 | Ret += "-i1:8:16-i8:8:16" ; |
66 | |
67 | // 64-bit integers are naturally aligned. |
68 | Ret += "-i64:64" ; |
69 | |
70 | // 128-bit floats are aligned only to 64 bits. |
71 | Ret += "-f128:64" ; |
72 | |
73 | // The DataLayout string always holds a vector alignment of 64 bits, see |
74 | // comment in clang/lib/Basic/Targets/SystemZ.h. |
75 | Ret += "-v128:64" ; |
76 | |
77 | // We prefer 16 bits of aligned for all globals; see above. |
78 | Ret += "-a:8:16" ; |
79 | |
80 | // Integer registers are 32 or 64 bits. |
81 | Ret += "-n32:64" ; |
82 | |
83 | return Ret; |
84 | } |
85 | |
86 | static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { |
87 | if (TT.isOSzOS()) |
88 | return std::make_unique<TargetLoweringObjectFileGOFF>(); |
89 | |
90 | // Note: Some times run with -triple s390x-unknown. |
91 | // In this case, default to ELF unless z/OS specifically provided. |
92 | return std::make_unique<SystemZELFTargetObjectFile>(); |
93 | } |
94 | |
95 | static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) { |
96 | // Static code is suitable for use in a dynamic executable; there is no |
97 | // separate DynamicNoPIC model. |
98 | if (!RM || *RM == Reloc::DynamicNoPIC) |
99 | return Reloc::Static; |
100 | return *RM; |
101 | } |
102 | |
103 | // For SystemZ we define the models as follows: |
104 | // |
105 | // Small: BRASL can call any function and will use a stub if necessary. |
106 | // Locally-binding symbols will always be in range of LARL. |
107 | // |
108 | // Medium: BRASL can call any function and will use a stub if necessary. |
109 | // GOT slots and locally-defined text will always be in range |
110 | // of LARL, but other symbols might not be. |
111 | // |
112 | // Large: Equivalent to Medium for now. |
113 | // |
114 | // Kernel: Equivalent to Medium for now. |
115 | // |
116 | // This means that any PIC module smaller than 4GB meets the |
117 | // requirements of Small, so Small seems like the best default there. |
118 | // |
119 | // All symbols bind locally in a non-PIC module, so the choice is less |
120 | // obvious. There are two cases: |
121 | // |
122 | // - When creating an executable, PLTs and copy relocations allow |
123 | // us to treat external symbols as part of the executable. |
124 | // Any executable smaller than 4GB meets the requirements of Small, |
125 | // so that seems like the best default. |
126 | // |
127 | // - When creating JIT code, stubs will be in range of BRASL if the |
128 | // image is less than 4GB in size. GOT entries will likewise be |
129 | // in range of LARL. However, the JIT environment has no equivalent |
130 | // of copy relocs, so locally-binding data symbols might not be in |
131 | // the range of LARL. We need the Medium model in that case. |
132 | static CodeModel::Model |
133 | getEffectiveSystemZCodeModel(std::optional<CodeModel::Model> CM, |
134 | Reloc::Model RM, bool JIT) { |
135 | if (CM) { |
136 | if (*CM == CodeModel::Tiny) |
137 | report_fatal_error(reason: "Target does not support the tiny CodeModel" , gen_crash_diag: false); |
138 | if (*CM == CodeModel::Kernel) |
139 | report_fatal_error(reason: "Target does not support the kernel CodeModel" , gen_crash_diag: false); |
140 | return *CM; |
141 | } |
142 | if (JIT) |
143 | return RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium; |
144 | return CodeModel::Small; |
145 | } |
146 | |
147 | SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT, |
148 | StringRef CPU, StringRef FS, |
149 | const TargetOptions &Options, |
150 | std::optional<Reloc::Model> RM, |
151 | std::optional<CodeModel::Model> CM, |
152 | CodeGenOptLevel OL, bool JIT) |
153 | : LLVMTargetMachine( |
154 | T, computeDataLayout(TT), TT, CPU, FS, Options, |
155 | getEffectiveRelocModel(RM), |
156 | getEffectiveSystemZCodeModel(CM, RM: getEffectiveRelocModel(RM), JIT), |
157 | OL), |
158 | TLOF(createTLOF(TT: getTargetTriple())) { |
159 | initAsmInfo(); |
160 | } |
161 | |
162 | SystemZTargetMachine::~SystemZTargetMachine() = default; |
163 | |
164 | const SystemZSubtarget * |
165 | SystemZTargetMachine::getSubtargetImpl(const Function &F) const { |
166 | Attribute CPUAttr = F.getFnAttribute(Kind: "target-cpu" ); |
167 | Attribute TuneAttr = F.getFnAttribute(Kind: "tune-cpu" ); |
168 | Attribute FSAttr = F.getFnAttribute(Kind: "target-features" ); |
169 | |
170 | std::string CPU = |
171 | CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; |
172 | std::string TuneCPU = |
173 | TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU; |
174 | std::string FS = |
175 | FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; |
176 | |
177 | // FIXME: This is related to the code below to reset the target options, |
178 | // we need to know whether the soft float and backchain flags are set on the |
179 | // function, so we can enable them as subtarget features. |
180 | bool SoftFloat = F.getFnAttribute(Kind: "use-soft-float" ).getValueAsBool(); |
181 | if (SoftFloat) |
182 | FS += FS.empty() ? "+soft-float" : ",+soft-float" ; |
183 | bool BackChain = F.hasFnAttribute(Kind: "backchain" ); |
184 | if (BackChain) |
185 | FS += FS.empty() ? "+backchain" : ",+backchain" ; |
186 | |
187 | auto &I = SubtargetMap[CPU + TuneCPU + FS]; |
188 | if (!I) { |
189 | // This needs to be done before we create a new subtarget since any |
190 | // creation will depend on the TM and the code generation flags on the |
191 | // function that reside in TargetOptions. |
192 | resetTargetOptions(F); |
193 | I = std::make_unique<SystemZSubtarget>(args: TargetTriple, args&: CPU, args&: TuneCPU, args&: FS, |
194 | args: *this); |
195 | } |
196 | |
197 | return I.get(); |
198 | } |
199 | |
200 | namespace { |
201 | |
202 | /// SystemZ Code Generator Pass Configuration Options. |
203 | class SystemZPassConfig : public TargetPassConfig { |
204 | public: |
205 | SystemZPassConfig(SystemZTargetMachine &TM, PassManagerBase &PM) |
206 | : TargetPassConfig(TM, PM) {} |
207 | |
208 | SystemZTargetMachine &getSystemZTargetMachine() const { |
209 | return getTM<SystemZTargetMachine>(); |
210 | } |
211 | |
212 | ScheduleDAGInstrs * |
213 | createPostMachineScheduler(MachineSchedContext *C) const override { |
214 | return new ScheduleDAGMI(C, |
215 | std::make_unique<SystemZPostRASchedStrategy>(args&: C), |
216 | /*RemoveKillFlags=*/true); |
217 | } |
218 | |
219 | void addIRPasses() override; |
220 | bool addInstSelector() override; |
221 | bool addILPOpts() override; |
222 | void addPreRegAlloc() override; |
223 | void addPostRewrite() override; |
224 | void addPostRegAlloc() override; |
225 | void addPreSched2() override; |
226 | void addPreEmitPass() override; |
227 | }; |
228 | |
229 | } // end anonymous namespace |
230 | |
231 | void SystemZPassConfig::addIRPasses() { |
232 | if (getOptLevel() != CodeGenOptLevel::None) { |
233 | addPass(P: createSystemZTDCPass()); |
234 | addPass(P: createLoopDataPrefetchPass()); |
235 | } |
236 | |
237 | addPass(P: createAtomicExpandLegacyPass()); |
238 | |
239 | TargetPassConfig::addIRPasses(); |
240 | } |
241 | |
242 | bool SystemZPassConfig::addInstSelector() { |
243 | addPass(P: createSystemZISelDag(TM&: getSystemZTargetMachine(), OptLevel: getOptLevel())); |
244 | |
245 | if (getOptLevel() != CodeGenOptLevel::None) |
246 | addPass(P: createSystemZLDCleanupPass(TM&: getSystemZTargetMachine())); |
247 | |
248 | return false; |
249 | } |
250 | |
251 | bool SystemZPassConfig::addILPOpts() { |
252 | addPass(PassID: &EarlyIfConverterID); |
253 | |
254 | if (EnableMachineCombinerPass) |
255 | addPass(PassID: &MachineCombinerID); |
256 | |
257 | return true; |
258 | } |
259 | |
260 | void SystemZPassConfig::addPreRegAlloc() { |
261 | addPass(P: createSystemZCopyPhysRegsPass(TM&: getSystemZTargetMachine())); |
262 | } |
263 | |
264 | void SystemZPassConfig::addPostRewrite() { |
265 | addPass(P: createSystemZPostRewritePass(TM&: getSystemZTargetMachine())); |
266 | } |
267 | |
268 | void SystemZPassConfig::addPostRegAlloc() { |
269 | // PostRewrite needs to be run at -O0 also (in which case addPostRewrite() |
270 | // is not called). |
271 | if (getOptLevel() == CodeGenOptLevel::None) |
272 | addPass(P: createSystemZPostRewritePass(TM&: getSystemZTargetMachine())); |
273 | } |
274 | |
275 | void SystemZPassConfig::addPreSched2() { |
276 | if (getOptLevel() != CodeGenOptLevel::None) |
277 | addPass(PassID: &IfConverterID); |
278 | } |
279 | |
280 | void SystemZPassConfig::addPreEmitPass() { |
281 | // Do instruction shortening before compare elimination because some |
282 | // vector instructions will be shortened into opcodes that compare |
283 | // elimination recognizes. |
284 | if (getOptLevel() != CodeGenOptLevel::None) |
285 | addPass(P: createSystemZShortenInstPass(TM&: getSystemZTargetMachine())); |
286 | |
287 | // We eliminate comparisons here rather than earlier because some |
288 | // transformations can change the set of available CC values and we |
289 | // generally want those transformations to have priority. This is |
290 | // especially true in the commonest case where the result of the comparison |
291 | // is used by a single in-range branch instruction, since we will then |
292 | // be able to fuse the compare and the branch instead. |
293 | // |
294 | // For example, two-address NILF can sometimes be converted into |
295 | // three-address RISBLG. NILF produces a CC value that indicates whether |
296 | // the low word is zero, but RISBLG does not modify CC at all. On the |
297 | // other hand, 64-bit ANDs like NILL can sometimes be converted to RISBG. |
298 | // The CC value produced by NILL isn't useful for our purposes, but the |
299 | // value produced by RISBG can be used for any comparison with zero |
300 | // (not just equality). So there are some transformations that lose |
301 | // CC values (while still being worthwhile) and others that happen to make |
302 | // the CC result more useful than it was originally. |
303 | // |
304 | // Another reason is that we only want to use BRANCH ON COUNT in cases |
305 | // where we know that the count register is not going to be spilled. |
306 | // |
307 | // Doing it so late makes it more likely that a register will be reused |
308 | // between the comparison and the branch, but it isn't clear whether |
309 | // preventing that would be a win or not. |
310 | if (getOptLevel() != CodeGenOptLevel::None) |
311 | addPass(P: createSystemZElimComparePass(TM&: getSystemZTargetMachine())); |
312 | addPass(P: createSystemZLongBranchPass(TM&: getSystemZTargetMachine())); |
313 | |
314 | // Do final scheduling after all other optimizations, to get an |
315 | // optimal input for the decoder (branch relaxation must happen |
316 | // after block placement). |
317 | if (getOptLevel() != CodeGenOptLevel::None) |
318 | addPass(PassID: &PostMachineSchedulerID); |
319 | } |
320 | |
321 | TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) { |
322 | return new SystemZPassConfig(*this, PM); |
323 | } |
324 | |
325 | TargetTransformInfo |
326 | SystemZTargetMachine::getTargetTransformInfo(const Function &F) const { |
327 | return TargetTransformInfo(SystemZTTIImpl(this, F)); |
328 | } |
329 | |
330 | MachineFunctionInfo *SystemZTargetMachine::createMachineFunctionInfo( |
331 | BumpPtrAllocator &Allocator, const Function &F, |
332 | const TargetSubtargetInfo *STI) const { |
333 | return SystemZMachineFunctionInfo::create<SystemZMachineFunctionInfo>( |
334 | Allocator, F, STI); |
335 | } |
336 | |