1 | //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the SystemZTargetLowering class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "SystemZISelLowering.h" |
14 | #include "SystemZCallingConv.h" |
15 | #include "SystemZConstantPoolValue.h" |
16 | #include "SystemZMachineFunctionInfo.h" |
17 | #include "SystemZTargetMachine.h" |
18 | #include "llvm/CodeGen/CallingConvLower.h" |
19 | #include "llvm/CodeGen/ISDOpcodes.h" |
20 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
21 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
22 | #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" |
23 | #include "llvm/IR/GlobalAlias.h" |
24 | #include "llvm/IR/IntrinsicInst.h" |
25 | #include "llvm/IR/Intrinsics.h" |
26 | #include "llvm/IR/IntrinsicsS390.h" |
27 | #include "llvm/Support/CommandLine.h" |
28 | #include "llvm/Support/ErrorHandling.h" |
29 | #include "llvm/Support/KnownBits.h" |
30 | #include <cctype> |
31 | #include <optional> |
32 | |
33 | using namespace llvm; |
34 | |
35 | #define DEBUG_TYPE "systemz-lower" |
36 | |
37 | // Temporarily let this be disabled by default until all known problems |
38 | // related to argument extensions are fixed. |
39 | static cl::opt<bool> EnableIntArgExtCheck( |
40 | "argext-abi-check" , cl::init(Val: false), |
41 | cl::desc("Verify that narrow int args are properly extended per the " |
42 | "SystemZ ABI." )); |
43 | |
44 | namespace { |
45 | // Represents information about a comparison. |
46 | struct Comparison { |
47 | Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn) |
48 | : Op0(Op0In), Op1(Op1In), Chain(ChainIn), |
49 | Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {} |
50 | |
51 | // The operands to the comparison. |
52 | SDValue Op0, Op1; |
53 | |
54 | // Chain if this is a strict floating-point comparison. |
55 | SDValue Chain; |
56 | |
57 | // The opcode that should be used to compare Op0 and Op1. |
58 | unsigned Opcode; |
59 | |
60 | // A SystemZICMP value. Only used for integer comparisons. |
61 | unsigned ICmpType; |
62 | |
63 | // The mask of CC values that Opcode can produce. |
64 | unsigned CCValid; |
65 | |
66 | // The mask of CC values for which the original condition is true. |
67 | unsigned CCMask; |
68 | }; |
69 | } // end anonymous namespace |
70 | |
71 | // Classify VT as either 32 or 64 bit. |
72 | static bool is32Bit(EVT VT) { |
73 | switch (VT.getSimpleVT().SimpleTy) { |
74 | case MVT::i32: |
75 | return true; |
76 | case MVT::i64: |
77 | return false; |
78 | default: |
79 | llvm_unreachable("Unsupported type" ); |
80 | } |
81 | } |
82 | |
83 | // Return a version of MachineOperand that can be safely used before the |
84 | // final use. |
85 | static MachineOperand earlyUseOperand(MachineOperand Op) { |
86 | if (Op.isReg()) |
87 | Op.setIsKill(false); |
88 | return Op; |
89 | } |
90 | |
91 | SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, |
92 | const SystemZSubtarget &STI) |
93 | : TargetLowering(TM), Subtarget(STI) { |
94 | MVT PtrVT = MVT::getIntegerVT(BitWidth: TM.getPointerSizeInBits(AS: 0)); |
95 | |
96 | auto *Regs = STI.getSpecialRegisters(); |
97 | |
98 | // Set up the register classes. |
99 | if (Subtarget.hasHighWord()) |
100 | addRegisterClass(VT: MVT::i32, RC: &SystemZ::GRX32BitRegClass); |
101 | else |
102 | addRegisterClass(VT: MVT::i32, RC: &SystemZ::GR32BitRegClass); |
103 | addRegisterClass(VT: MVT::i64, RC: &SystemZ::GR64BitRegClass); |
104 | if (!useSoftFloat()) { |
105 | if (Subtarget.hasVector()) { |
106 | addRegisterClass(VT: MVT::f16, RC: &SystemZ::VR16BitRegClass); |
107 | addRegisterClass(VT: MVT::f32, RC: &SystemZ::VR32BitRegClass); |
108 | addRegisterClass(VT: MVT::f64, RC: &SystemZ::VR64BitRegClass); |
109 | } else { |
110 | addRegisterClass(VT: MVT::f16, RC: &SystemZ::FP16BitRegClass); |
111 | addRegisterClass(VT: MVT::f32, RC: &SystemZ::FP32BitRegClass); |
112 | addRegisterClass(VT: MVT::f64, RC: &SystemZ::FP64BitRegClass); |
113 | } |
114 | if (Subtarget.hasVectorEnhancements1()) |
115 | addRegisterClass(VT: MVT::f128, RC: &SystemZ::VR128BitRegClass); |
116 | else |
117 | addRegisterClass(VT: MVT::f128, RC: &SystemZ::FP128BitRegClass); |
118 | |
119 | if (Subtarget.hasVector()) { |
120 | addRegisterClass(VT: MVT::v16i8, RC: &SystemZ::VR128BitRegClass); |
121 | addRegisterClass(VT: MVT::v8i16, RC: &SystemZ::VR128BitRegClass); |
122 | addRegisterClass(VT: MVT::v4i32, RC: &SystemZ::VR128BitRegClass); |
123 | addRegisterClass(VT: MVT::v2i64, RC: &SystemZ::VR128BitRegClass); |
124 | addRegisterClass(VT: MVT::v4f32, RC: &SystemZ::VR128BitRegClass); |
125 | addRegisterClass(VT: MVT::v2f64, RC: &SystemZ::VR128BitRegClass); |
126 | } |
127 | |
128 | if (Subtarget.hasVector()) |
129 | addRegisterClass(VT: MVT::i128, RC: &SystemZ::VR128BitRegClass); |
130 | } |
131 | |
132 | // Compute derived properties from the register classes |
133 | computeRegisterProperties(TRI: Subtarget.getRegisterInfo()); |
134 | |
135 | // Set up special registers. |
136 | setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister()); |
137 | |
138 | // TODO: It may be better to default to latency-oriented scheduling, however |
139 | // LLVM's current latency-oriented scheduler can't handle physreg definitions |
140 | // such as SystemZ has with CC, so set this to the register-pressure |
141 | // scheduler, because it can. |
142 | setSchedulingPreference(Sched::RegPressure); |
143 | |
144 | setBooleanContents(ZeroOrOneBooleanContent); |
145 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
146 | |
147 | setMaxAtomicSizeInBitsSupported(128); |
148 | |
149 | // Instructions are strings of 2-byte aligned 2-byte values. |
150 | setMinFunctionAlignment(Align(2)); |
151 | // For performance reasons we prefer 16-byte alignment. |
152 | setPrefFunctionAlignment(Align(16)); |
153 | |
154 | // Handle operations that are handled in a similar way for all types. |
155 | for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; |
156 | I <= MVT::LAST_FP_VALUETYPE; |
157 | ++I) { |
158 | MVT VT = MVT::SimpleValueType(I); |
159 | if (isTypeLegal(VT)) { |
160 | // Lower SET_CC into an IPM-based sequence. |
161 | setOperationAction(Op: ISD::SETCC, VT, Action: Custom); |
162 | setOperationAction(Op: ISD::STRICT_FSETCC, VT, Action: Custom); |
163 | setOperationAction(Op: ISD::STRICT_FSETCCS, VT, Action: Custom); |
164 | |
165 | // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE). |
166 | setOperationAction(Op: ISD::SELECT, VT, Action: Expand); |
167 | |
168 | // Lower SELECT_CC and BR_CC into separate comparisons and branches. |
169 | setOperationAction(Op: ISD::SELECT_CC, VT, Action: Custom); |
170 | setOperationAction(Op: ISD::BR_CC, VT, Action: Custom); |
171 | } |
172 | } |
173 | |
174 | // Expand jump table branches as address arithmetic followed by an |
175 | // indirect jump. |
176 | setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand); |
177 | |
178 | // Expand BRCOND into a BR_CC (see above). |
179 | setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Expand); |
180 | |
181 | // Handle integer types except i128. |
182 | for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; |
183 | I <= MVT::LAST_INTEGER_VALUETYPE; |
184 | ++I) { |
185 | MVT VT = MVT::SimpleValueType(I); |
186 | if (isTypeLegal(VT) && VT != MVT::i128) { |
187 | setOperationAction(Op: ISD::ABS, VT, Action: Legal); |
188 | |
189 | // Expand individual DIV and REMs into DIVREMs. |
190 | setOperationAction(Op: ISD::SDIV, VT, Action: Expand); |
191 | setOperationAction(Op: ISD::UDIV, VT, Action: Expand); |
192 | setOperationAction(Op: ISD::SREM, VT, Action: Expand); |
193 | setOperationAction(Op: ISD::UREM, VT, Action: Expand); |
194 | setOperationAction(Op: ISD::SDIVREM, VT, Action: Custom); |
195 | setOperationAction(Op: ISD::UDIVREM, VT, Action: Custom); |
196 | |
197 | // Support addition/subtraction with overflow. |
198 | setOperationAction(Op: ISD::SADDO, VT, Action: Custom); |
199 | setOperationAction(Op: ISD::SSUBO, VT, Action: Custom); |
200 | |
201 | // Support addition/subtraction with carry. |
202 | setOperationAction(Op: ISD::UADDO, VT, Action: Custom); |
203 | setOperationAction(Op: ISD::USUBO, VT, Action: Custom); |
204 | |
205 | // Support carry in as value rather than glue. |
206 | setOperationAction(Op: ISD::UADDO_CARRY, VT, Action: Custom); |
207 | setOperationAction(Op: ISD::USUBO_CARRY, VT, Action: Custom); |
208 | |
209 | // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are |
210 | // available, or if the operand is constant. |
211 | setOperationAction(Op: ISD::ATOMIC_LOAD_SUB, VT, Action: Custom); |
212 | |
213 | // Use POPCNT on z196 and above. |
214 | if (Subtarget.hasPopulationCount()) |
215 | setOperationAction(Op: ISD::CTPOP, VT, Action: Custom); |
216 | else |
217 | setOperationAction(Op: ISD::CTPOP, VT, Action: Expand); |
218 | |
219 | // No special instructions for these. |
220 | setOperationAction(Op: ISD::CTTZ, VT, Action: Expand); |
221 | setOperationAction(Op: ISD::ROTR, VT, Action: Expand); |
222 | |
223 | // Use *MUL_LOHI where possible instead of MULH*. |
224 | setOperationAction(Op: ISD::MULHS, VT, Action: Expand); |
225 | setOperationAction(Op: ISD::MULHU, VT, Action: Expand); |
226 | setOperationAction(Op: ISD::SMUL_LOHI, VT, Action: Custom); |
227 | setOperationAction(Op: ISD::UMUL_LOHI, VT, Action: Custom); |
228 | |
229 | // The fp<=>i32/i64 conversions are all Legal except for f16 and for |
230 | // unsigned on z10 (only z196 and above have native support for |
231 | // unsigned conversions). |
232 | for (auto Op : {ISD::FP_TO_SINT, ISD::STRICT_FP_TO_SINT, |
233 | ISD::SINT_TO_FP, ISD::STRICT_SINT_TO_FP}) |
234 | setOperationAction(Op, VT, Action: Custom); |
235 | for (auto Op : {ISD::FP_TO_UINT, ISD::STRICT_FP_TO_UINT}) |
236 | setOperationAction(Op, VT, Action: Custom); |
237 | for (auto Op : {ISD::UINT_TO_FP, ISD::STRICT_UINT_TO_FP}) { |
238 | // Handle unsigned 32-bit input types as signed 64-bit types on z10. |
239 | auto OpAction = |
240 | (!Subtarget.hasFPExtension() && VT == MVT::i32) ? Promote : Custom; |
241 | setOperationAction(Op, VT, Action: OpAction); |
242 | } |
243 | } |
244 | } |
245 | |
246 | // Handle i128 if legal. |
247 | if (isTypeLegal(VT: MVT::i128)) { |
248 | // No special instructions for these. |
249 | setOperationAction(Op: ISD::SDIVREM, VT: MVT::i128, Action: Expand); |
250 | setOperationAction(Op: ISD::UDIVREM, VT: MVT::i128, Action: Expand); |
251 | setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i128, Action: Expand); |
252 | setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i128, Action: Expand); |
253 | setOperationAction(Op: ISD::ROTR, VT: MVT::i128, Action: Expand); |
254 | setOperationAction(Op: ISD::ROTL, VT: MVT::i128, Action: Expand); |
255 | |
256 | // We may be able to use VSLDB/VSLD/VSRD for these. |
257 | setOperationAction(Op: ISD::FSHL, VT: MVT::i128, Action: Custom); |
258 | setOperationAction(Op: ISD::FSHR, VT: MVT::i128, Action: Custom); |
259 | |
260 | // No special instructions for these before z17. |
261 | if (!Subtarget.hasVectorEnhancements3()) { |
262 | setOperationAction(Op: ISD::MUL, VT: MVT::i128, Action: Expand); |
263 | setOperationAction(Op: ISD::MULHS, VT: MVT::i128, Action: Expand); |
264 | setOperationAction(Op: ISD::MULHU, VT: MVT::i128, Action: Expand); |
265 | setOperationAction(Op: ISD::SDIV, VT: MVT::i128, Action: Expand); |
266 | setOperationAction(Op: ISD::UDIV, VT: MVT::i128, Action: Expand); |
267 | setOperationAction(Op: ISD::SREM, VT: MVT::i128, Action: Expand); |
268 | setOperationAction(Op: ISD::UREM, VT: MVT::i128, Action: Expand); |
269 | setOperationAction(Op: ISD::CTLZ, VT: MVT::i128, Action: Expand); |
270 | setOperationAction(Op: ISD::CTTZ, VT: MVT::i128, Action: Expand); |
271 | } else { |
272 | // Even if we do have a legal 128-bit multiply, we do not |
273 | // want 64-bit multiply-high operations to use it. |
274 | setOperationAction(Op: ISD::MULHS, VT: MVT::i64, Action: Custom); |
275 | setOperationAction(Op: ISD::MULHU, VT: MVT::i64, Action: Custom); |
276 | } |
277 | |
278 | // Support addition/subtraction with carry. |
279 | setOperationAction(Op: ISD::UADDO, VT: MVT::i128, Action: Custom); |
280 | setOperationAction(Op: ISD::USUBO, VT: MVT::i128, Action: Custom); |
281 | setOperationAction(Op: ISD::UADDO_CARRY, VT: MVT::i128, Action: Custom); |
282 | setOperationAction(Op: ISD::USUBO_CARRY, VT: MVT::i128, Action: Custom); |
283 | |
284 | // Use VPOPCT and add up partial results. |
285 | setOperationAction(Op: ISD::CTPOP, VT: MVT::i128, Action: Custom); |
286 | |
287 | // Additional instructions available with z17. |
288 | if (Subtarget.hasVectorEnhancements3()) { |
289 | setOperationAction(Op: ISD::ABS, VT: MVT::i128, Action: Legal); |
290 | } |
291 | } |
292 | |
293 | // These need custom handling in order to handle the f16 conversions. |
294 | setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i128, Action: Custom); |
295 | setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i128, Action: Custom); |
296 | setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i128, Action: Custom); |
297 | setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i128, Action: Custom); |
298 | setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i128, Action: Custom); |
299 | setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i128, Action: Custom); |
300 | setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i128, Action: Custom); |
301 | setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i128, Action: Custom); |
302 | |
303 | // Type legalization will convert 8- and 16-bit atomic operations into |
304 | // forms that operate on i32s (but still keeping the original memory VT). |
305 | // Lower them into full i32 operations. |
306 | setOperationAction(Op: ISD::ATOMIC_SWAP, VT: MVT::i32, Action: Custom); |
307 | setOperationAction(Op: ISD::ATOMIC_LOAD_ADD, VT: MVT::i32, Action: Custom); |
308 | setOperationAction(Op: ISD::ATOMIC_LOAD_SUB, VT: MVT::i32, Action: Custom); |
309 | setOperationAction(Op: ISD::ATOMIC_LOAD_AND, VT: MVT::i32, Action: Custom); |
310 | setOperationAction(Op: ISD::ATOMIC_LOAD_OR, VT: MVT::i32, Action: Custom); |
311 | setOperationAction(Op: ISD::ATOMIC_LOAD_XOR, VT: MVT::i32, Action: Custom); |
312 | setOperationAction(Op: ISD::ATOMIC_LOAD_NAND, VT: MVT::i32, Action: Custom); |
313 | setOperationAction(Op: ISD::ATOMIC_LOAD_MIN, VT: MVT::i32, Action: Custom); |
314 | setOperationAction(Op: ISD::ATOMIC_LOAD_MAX, VT: MVT::i32, Action: Custom); |
315 | setOperationAction(Op: ISD::ATOMIC_LOAD_UMIN, VT: MVT::i32, Action: Custom); |
316 | setOperationAction(Op: ISD::ATOMIC_LOAD_UMAX, VT: MVT::i32, Action: Custom); |
317 | |
318 | // Whether or not i128 is not a legal type, we need to custom lower |
319 | // the atomic operations in order to exploit SystemZ instructions. |
320 | setOperationAction(Op: ISD::ATOMIC_LOAD, VT: MVT::i128, Action: Custom); |
321 | setOperationAction(Op: ISD::ATOMIC_STORE, VT: MVT::i128, Action: Custom); |
322 | setOperationAction(Op: ISD::ATOMIC_LOAD, VT: MVT::f128, Action: Custom); |
323 | setOperationAction(Op: ISD::ATOMIC_STORE, VT: MVT::f128, Action: Custom); |
324 | |
325 | // Mark sign/zero extending atomic loads as legal, which will make |
326 | // DAGCombiner fold extensions into atomic loads if possible. |
327 | setAtomicLoadExtAction(ExtTypes: {ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: MVT::i64, |
328 | MemVTs: {MVT::i8, MVT::i16, MVT::i32}, Action: Legal); |
329 | setAtomicLoadExtAction(ExtTypes: {ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: MVT::i32, |
330 | MemVTs: {MVT::i8, MVT::i16}, Action: Legal); |
331 | setAtomicLoadExtAction(ExtTypes: {ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: MVT::i16, |
332 | MemVT: MVT::i8, Action: Legal); |
333 | |
334 | // We can use the CC result of compare-and-swap to implement |
335 | // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS. |
336 | setOperationAction(Op: ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT: MVT::i32, Action: Custom); |
337 | setOperationAction(Op: ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT: MVT::i64, Action: Custom); |
338 | setOperationAction(Op: ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT: MVT::i128, Action: Custom); |
339 | |
340 | setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom); |
341 | |
342 | // Traps are legal, as we will convert them to "j .+2". |
343 | setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Legal); |
344 | |
345 | // We have native support for a 64-bit CTLZ, via FLOGR. |
346 | setOperationAction(Op: ISD::CTLZ, VT: MVT::i32, Action: Promote); |
347 | setOperationAction(Op: ISD::CTLZ_ZERO_UNDEF, VT: MVT::i32, Action: Promote); |
348 | setOperationAction(Op: ISD::CTLZ, VT: MVT::i64, Action: Legal); |
349 | |
350 | // On z17 we have native support for a 64-bit CTTZ. |
351 | if (Subtarget.hasMiscellaneousExtensions4()) { |
352 | setOperationAction(Op: ISD::CTTZ, VT: MVT::i32, Action: Promote); |
353 | setOperationAction(Op: ISD::CTTZ_ZERO_UNDEF, VT: MVT::i32, Action: Promote); |
354 | setOperationAction(Op: ISD::CTTZ, VT: MVT::i64, Action: Legal); |
355 | } |
356 | |
357 | // On z15 we have native support for a 64-bit CTPOP. |
358 | if (Subtarget.hasMiscellaneousExtensions3()) { |
359 | setOperationAction(Op: ISD::CTPOP, VT: MVT::i32, Action: Promote); |
360 | setOperationAction(Op: ISD::CTPOP, VT: MVT::i64, Action: Legal); |
361 | } |
362 | |
363 | // Give LowerOperation the chance to replace 64-bit ORs with subregs. |
364 | setOperationAction(Op: ISD::OR, VT: MVT::i64, Action: Custom); |
365 | |
366 | // Expand 128 bit shifts without using a libcall. |
367 | setOperationAction(Op: ISD::SRL_PARTS, VT: MVT::i64, Action: Expand); |
368 | setOperationAction(Op: ISD::SHL_PARTS, VT: MVT::i64, Action: Expand); |
369 | setOperationAction(Op: ISD::SRA_PARTS, VT: MVT::i64, Action: Expand); |
370 | |
371 | // Also expand 256 bit shifts if i128 is a legal type. |
372 | if (isTypeLegal(VT: MVT::i128)) { |
373 | setOperationAction(Op: ISD::SRL_PARTS, VT: MVT::i128, Action: Expand); |
374 | setOperationAction(Op: ISD::SHL_PARTS, VT: MVT::i128, Action: Expand); |
375 | setOperationAction(Op: ISD::SRA_PARTS, VT: MVT::i128, Action: Expand); |
376 | } |
377 | |
378 | // Handle bitcast from fp128 to i128. |
379 | if (!isTypeLegal(VT: MVT::i128)) |
380 | setOperationAction(Op: ISD::BITCAST, VT: MVT::i128, Action: Custom); |
381 | |
382 | // We have native instructions for i8, i16 and i32 extensions, but not i1. |
383 | setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand); |
384 | for (MVT VT : MVT::integer_valuetypes()) { |
385 | setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote); |
386 | setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote); |
387 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote); |
388 | } |
389 | |
390 | // Handle the various types of symbolic address. |
391 | setOperationAction(Op: ISD::ConstantPool, VT: PtrVT, Action: Custom); |
392 | setOperationAction(Op: ISD::GlobalAddress, VT: PtrVT, Action: Custom); |
393 | setOperationAction(Op: ISD::GlobalTLSAddress, VT: PtrVT, Action: Custom); |
394 | setOperationAction(Op: ISD::BlockAddress, VT: PtrVT, Action: Custom); |
395 | setOperationAction(Op: ISD::JumpTable, VT: PtrVT, Action: Custom); |
396 | |
397 | // We need to handle dynamic allocations specially because of the |
398 | // 160-byte area at the bottom of the stack. |
399 | setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: PtrVT, Action: Custom); |
400 | setOperationAction(Op: ISD::GET_DYNAMIC_AREA_OFFSET, VT: PtrVT, Action: Custom); |
401 | |
402 | setOperationAction(Op: ISD::STACKSAVE, VT: MVT::Other, Action: Custom); |
403 | setOperationAction(Op: ISD::STACKRESTORE, VT: MVT::Other, Action: Custom); |
404 | |
405 | // Handle prefetches with PFD or PFDRL. |
406 | setOperationAction(Op: ISD::PREFETCH, VT: MVT::Other, Action: Custom); |
407 | |
408 | // Handle readcyclecounter with STCKF. |
409 | setOperationAction(Op: ISD::READCYCLECOUNTER, VT: MVT::i64, Action: Custom); |
410 | |
411 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
412 | // Assume by default that all vector operations need to be expanded. |
413 | for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode) |
414 | if (getOperationAction(Op: Opcode, VT) == Legal) |
415 | setOperationAction(Op: Opcode, VT, Action: Expand); |
416 | |
417 | // Likewise all truncating stores and extending loads. |
418 | for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { |
419 | setTruncStoreAction(ValVT: VT, MemVT: InnerVT, Action: Expand); |
420 | setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand); |
421 | setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand); |
422 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand); |
423 | } |
424 | |
425 | if (isTypeLegal(VT)) { |
426 | // These operations are legal for anything that can be stored in a |
427 | // vector register, even if there is no native support for the format |
428 | // as such. In particular, we can do these for v4f32 even though there |
429 | // are no specific instructions for that format. |
430 | setOperationAction(Op: ISD::LOAD, VT, Action: Legal); |
431 | setOperationAction(Op: ISD::STORE, VT, Action: Legal); |
432 | setOperationAction(Op: ISD::VSELECT, VT, Action: Legal); |
433 | setOperationAction(Op: ISD::BITCAST, VT, Action: Legal); |
434 | setOperationAction(Op: ISD::UNDEF, VT, Action: Legal); |
435 | |
436 | // Likewise, except that we need to replace the nodes with something |
437 | // more specific. |
438 | setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom); |
439 | setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom); |
440 | } |
441 | } |
442 | |
443 | // Handle integer vector types. |
444 | for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { |
445 | if (isTypeLegal(VT)) { |
446 | // These operations have direct equivalents. |
447 | setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Legal); |
448 | setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Legal); |
449 | setOperationAction(Op: ISD::ADD, VT, Action: Legal); |
450 | setOperationAction(Op: ISD::SUB, VT, Action: Legal); |
451 | if (VT != MVT::v2i64 || Subtarget.hasVectorEnhancements3()) { |
452 | setOperationAction(Op: ISD::MUL, VT, Action: Legal); |
453 | setOperationAction(Op: ISD::MULHS, VT, Action: Legal); |
454 | setOperationAction(Op: ISD::MULHU, VT, Action: Legal); |
455 | } |
456 | if (Subtarget.hasVectorEnhancements3() && |
457 | VT != MVT::v16i8 && VT != MVT::v8i16) { |
458 | setOperationAction(Op: ISD::SDIV, VT, Action: Legal); |
459 | setOperationAction(Op: ISD::UDIV, VT, Action: Legal); |
460 | setOperationAction(Op: ISD::SREM, VT, Action: Legal); |
461 | setOperationAction(Op: ISD::UREM, VT, Action: Legal); |
462 | } |
463 | setOperationAction(Op: ISD::ABS, VT, Action: Legal); |
464 | setOperationAction(Op: ISD::AND, VT, Action: Legal); |
465 | setOperationAction(Op: ISD::OR, VT, Action: Legal); |
466 | setOperationAction(Op: ISD::XOR, VT, Action: Legal); |
467 | if (Subtarget.hasVectorEnhancements1()) |
468 | setOperationAction(Op: ISD::CTPOP, VT, Action: Legal); |
469 | else |
470 | setOperationAction(Op: ISD::CTPOP, VT, Action: Custom); |
471 | setOperationAction(Op: ISD::CTTZ, VT, Action: Legal); |
472 | setOperationAction(Op: ISD::CTLZ, VT, Action: Legal); |
473 | |
474 | // Convert a GPR scalar to a vector by inserting it into element 0. |
475 | setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Custom); |
476 | |
477 | // Use a series of unpacks for extensions. |
478 | setOperationAction(Op: ISD::SIGN_EXTEND_VECTOR_INREG, VT, Action: Custom); |
479 | setOperationAction(Op: ISD::ZERO_EXTEND_VECTOR_INREG, VT, Action: Custom); |
480 | |
481 | // Detect shifts/rotates by a scalar amount and convert them into |
482 | // V*_BY_SCALAR. |
483 | setOperationAction(Op: ISD::SHL, VT, Action: Custom); |
484 | setOperationAction(Op: ISD::SRA, VT, Action: Custom); |
485 | setOperationAction(Op: ISD::SRL, VT, Action: Custom); |
486 | setOperationAction(Op: ISD::ROTL, VT, Action: Custom); |
487 | |
488 | // Add ISD::VECREDUCE_ADD as custom in order to implement |
489 | // it with VZERO+VSUM |
490 | setOperationAction(Op: ISD::VECREDUCE_ADD, VT, Action: Custom); |
491 | |
492 | // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands |
493 | // and inverting the result as necessary. |
494 | setOperationAction(Op: ISD::SETCC, VT, Action: Custom); |
495 | } |
496 | } |
497 | |
498 | if (Subtarget.hasVector()) { |
499 | // There should be no need to check for float types other than v2f64 |
500 | // since <2 x f32> isn't a legal type. |
501 | setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::v2i64, Action: Legal); |
502 | setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::v2f64, Action: Legal); |
503 | setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::v2i64, Action: Legal); |
504 | setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::v2f64, Action: Legal); |
505 | setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v2i64, Action: Legal); |
506 | setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v2f64, Action: Legal); |
507 | setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v2i64, Action: Legal); |
508 | setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v2f64, Action: Legal); |
509 | |
510 | setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::v2i64, Action: Legal); |
511 | setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::v2f64, Action: Legal); |
512 | setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::v2i64, Action: Legal); |
513 | setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::v2f64, Action: Legal); |
514 | setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v2i64, Action: Legal); |
515 | setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v2f64, Action: Legal); |
516 | setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v2i64, Action: Legal); |
517 | setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v2f64, Action: Legal); |
518 | } |
519 | |
520 | if (Subtarget.hasVectorEnhancements2()) { |
521 | setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::v4i32, Action: Legal); |
522 | setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::v4f32, Action: Legal); |
523 | setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::v4i32, Action: Legal); |
524 | setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::v4f32, Action: Legal); |
525 | setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v4i32, Action: Legal); |
526 | setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v4f32, Action: Legal); |
527 | setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v4i32, Action: Legal); |
528 | setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v4f32, Action: Legal); |
529 | |
530 | setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::v4i32, Action: Legal); |
531 | setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::v4f32, Action: Legal); |
532 | setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::v4i32, Action: Legal); |
533 | setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::v4f32, Action: Legal); |
534 | setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v4i32, Action: Legal); |
535 | setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v4f32, Action: Legal); |
536 | setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v4i32, Action: Legal); |
537 | setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v4f32, Action: Legal); |
538 | } |
539 | |
540 | // Handle floating-point types. |
541 | if (!useSoftFloat()) { |
542 | // Promote all f16 operations to float, with some exceptions below. |
543 | for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) |
544 | setOperationAction(Op: Opc, VT: MVT::f16, Action: Promote); |
545 | setOperationAction(Op: ISD::ConstantFP, VT: MVT::f16, Action: Expand); |
546 | for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) { |
547 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: MVT::f16, Action: Expand); |
548 | setTruncStoreAction(ValVT: VT, MemVT: MVT::f16, Action: Expand); |
549 | } |
550 | for (auto Op : {ISD::LOAD, ISD::ATOMIC_LOAD, ISD::STORE, ISD::ATOMIC_STORE}) |
551 | setOperationAction(Op, VT: MVT::f16, Action: Subtarget.hasVector() ? Legal : Custom); |
552 | setOperationAction(Op: ISD::FP_ROUND, VT: MVT::f16, Action: LibCall); |
553 | setOperationAction(Op: ISD::STRICT_FP_ROUND, VT: MVT::f16, Action: LibCall); |
554 | setOperationAction(Op: ISD::BITCAST, VT: MVT::i16, Action: Custom); |
555 | setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f16, Action: Custom); |
556 | for (auto Op : {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN}) |
557 | setOperationAction(Op, VT: MVT::f16, Action: Legal); |
558 | } |
559 | |
560 | for (unsigned I = MVT::FIRST_FP_VALUETYPE; |
561 | I <= MVT::LAST_FP_VALUETYPE; |
562 | ++I) { |
563 | MVT VT = MVT::SimpleValueType(I); |
564 | if (isTypeLegal(VT) && VT != MVT::f16) { |
565 | // We can use FI for FRINT. |
566 | setOperationAction(Op: ISD::FRINT, VT, Action: Legal); |
567 | |
568 | // We can use the extended form of FI for other rounding operations. |
569 | if (Subtarget.hasFPExtension()) { |
570 | setOperationAction(Op: ISD::FNEARBYINT, VT, Action: Legal); |
571 | setOperationAction(Op: ISD::FFLOOR, VT, Action: Legal); |
572 | setOperationAction(Op: ISD::FCEIL, VT, Action: Legal); |
573 | setOperationAction(Op: ISD::FTRUNC, VT, Action: Legal); |
574 | setOperationAction(Op: ISD::FROUND, VT, Action: Legal); |
575 | setOperationAction(Op: ISD::FROUNDEVEN, VT, Action: Legal); |
576 | } |
577 | |
578 | // No special instructions for these. |
579 | setOperationAction(Op: ISD::FSIN, VT, Action: Expand); |
580 | setOperationAction(Op: ISD::FCOS, VT, Action: Expand); |
581 | setOperationAction(Op: ISD::FSINCOS, VT, Action: Expand); |
582 | setOperationAction(Op: ISD::FREM, VT, Action: Expand); |
583 | setOperationAction(Op: ISD::FPOW, VT, Action: Expand); |
584 | |
585 | // Special treatment. |
586 | setOperationAction(Op: ISD::IS_FPCLASS, VT, Action: Custom); |
587 | |
588 | // Handle constrained floating-point operations. |
589 | setOperationAction(Op: ISD::STRICT_FADD, VT, Action: Legal); |
590 | setOperationAction(Op: ISD::STRICT_FSUB, VT, Action: Legal); |
591 | setOperationAction(Op: ISD::STRICT_FMUL, VT, Action: Legal); |
592 | setOperationAction(Op: ISD::STRICT_FDIV, VT, Action: Legal); |
593 | setOperationAction(Op: ISD::STRICT_FMA, VT, Action: Legal); |
594 | setOperationAction(Op: ISD::STRICT_FSQRT, VT, Action: Legal); |
595 | setOperationAction(Op: ISD::STRICT_FRINT, VT, Action: Legal); |
596 | setOperationAction(Op: ISD::STRICT_FP_ROUND, VT, Action: Legal); |
597 | if (Subtarget.hasFPExtension()) { |
598 | setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT, Action: Legal); |
599 | setOperationAction(Op: ISD::STRICT_FFLOOR, VT, Action: Legal); |
600 | setOperationAction(Op: ISD::STRICT_FCEIL, VT, Action: Legal); |
601 | setOperationAction(Op: ISD::STRICT_FTRUNC, VT, Action: Legal); |
602 | setOperationAction(Op: ISD::STRICT_FROUND, VT, Action: Legal); |
603 | setOperationAction(Op: ISD::STRICT_FROUNDEVEN, VT, Action: Legal); |
604 | } |
605 | |
606 | // Extension from f16 needs libcall. |
607 | setOperationAction(Op: ISD::FP_EXTEND, VT, Action: Custom); |
608 | setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT, Action: Custom); |
609 | } |
610 | } |
611 | |
612 | // Handle floating-point vector types. |
613 | if (Subtarget.hasVector()) { |
614 | // Scalar-to-vector conversion is just a subreg. |
615 | setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v4f32, Action: Legal); |
616 | setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v2f64, Action: Legal); |
617 | |
618 | // Some insertions and extractions can be done directly but others |
619 | // need to go via integers. |
620 | setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v4f32, Action: Custom); |
621 | setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v2f64, Action: Custom); |
622 | setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v4f32, Action: Custom); |
623 | setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v2f64, Action: Custom); |
624 | |
625 | // These operations have direct equivalents. |
626 | setOperationAction(Op: ISD::FADD, VT: MVT::v2f64, Action: Legal); |
627 | setOperationAction(Op: ISD::FNEG, VT: MVT::v2f64, Action: Legal); |
628 | setOperationAction(Op: ISD::FSUB, VT: MVT::v2f64, Action: Legal); |
629 | setOperationAction(Op: ISD::FMUL, VT: MVT::v2f64, Action: Legal); |
630 | setOperationAction(Op: ISD::FMA, VT: MVT::v2f64, Action: Legal); |
631 | setOperationAction(Op: ISD::FDIV, VT: MVT::v2f64, Action: Legal); |
632 | setOperationAction(Op: ISD::FABS, VT: MVT::v2f64, Action: Legal); |
633 | setOperationAction(Op: ISD::FSQRT, VT: MVT::v2f64, Action: Legal); |
634 | setOperationAction(Op: ISD::FRINT, VT: MVT::v2f64, Action: Legal); |
635 | setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::v2f64, Action: Legal); |
636 | setOperationAction(Op: ISD::FFLOOR, VT: MVT::v2f64, Action: Legal); |
637 | setOperationAction(Op: ISD::FCEIL, VT: MVT::v2f64, Action: Legal); |
638 | setOperationAction(Op: ISD::FTRUNC, VT: MVT::v2f64, Action: Legal); |
639 | setOperationAction(Op: ISD::FROUND, VT: MVT::v2f64, Action: Legal); |
640 | setOperationAction(Op: ISD::FROUNDEVEN, VT: MVT::v2f64, Action: Legal); |
641 | |
642 | // Handle constrained floating-point operations. |
643 | setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::v2f64, Action: Legal); |
644 | setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::v2f64, Action: Legal); |
645 | setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::v2f64, Action: Legal); |
646 | setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::v2f64, Action: Legal); |
647 | setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::v2f64, Action: Legal); |
648 | setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::v2f64, Action: Legal); |
649 | setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::v2f64, Action: Legal); |
650 | setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT: MVT::v2f64, Action: Legal); |
651 | setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::v2f64, Action: Legal); |
652 | setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::v2f64, Action: Legal); |
653 | setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::v2f64, Action: Legal); |
654 | setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::v2f64, Action: Legal); |
655 | setOperationAction(Op: ISD::STRICT_FROUNDEVEN, VT: MVT::v2f64, Action: Legal); |
656 | |
657 | setOperationAction(Op: ISD::SETCC, VT: MVT::v2f64, Action: Custom); |
658 | setOperationAction(Op: ISD::SETCC, VT: MVT::v4f32, Action: Custom); |
659 | setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::v2f64, Action: Custom); |
660 | setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::v4f32, Action: Custom); |
661 | if (Subtarget.hasVectorEnhancements1()) { |
662 | setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::v2f64, Action: Custom); |
663 | setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::v4f32, Action: Custom); |
664 | } |
665 | } |
666 | |
667 | // The vector enhancements facility 1 has instructions for these. |
668 | if (Subtarget.hasVectorEnhancements1()) { |
669 | setOperationAction(Op: ISD::FADD, VT: MVT::v4f32, Action: Legal); |
670 | setOperationAction(Op: ISD::FNEG, VT: MVT::v4f32, Action: Legal); |
671 | setOperationAction(Op: ISD::FSUB, VT: MVT::v4f32, Action: Legal); |
672 | setOperationAction(Op: ISD::FMUL, VT: MVT::v4f32, Action: Legal); |
673 | setOperationAction(Op: ISD::FMA, VT: MVT::v4f32, Action: Legal); |
674 | setOperationAction(Op: ISD::FDIV, VT: MVT::v4f32, Action: Legal); |
675 | setOperationAction(Op: ISD::FABS, VT: MVT::v4f32, Action: Legal); |
676 | setOperationAction(Op: ISD::FSQRT, VT: MVT::v4f32, Action: Legal); |
677 | setOperationAction(Op: ISD::FRINT, VT: MVT::v4f32, Action: Legal); |
678 | setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::v4f32, Action: Legal); |
679 | setOperationAction(Op: ISD::FFLOOR, VT: MVT::v4f32, Action: Legal); |
680 | setOperationAction(Op: ISD::FCEIL, VT: MVT::v4f32, Action: Legal); |
681 | setOperationAction(Op: ISD::FTRUNC, VT: MVT::v4f32, Action: Legal); |
682 | setOperationAction(Op: ISD::FROUND, VT: MVT::v4f32, Action: Legal); |
683 | setOperationAction(Op: ISD::FROUNDEVEN, VT: MVT::v4f32, Action: Legal); |
684 | |
685 | setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f64, Action: Legal); |
686 | setOperationAction(Op: ISD::FMAXIMUM, VT: MVT::f64, Action: Legal); |
687 | setOperationAction(Op: ISD::FMINNUM, VT: MVT::f64, Action: Legal); |
688 | setOperationAction(Op: ISD::FMINIMUM, VT: MVT::f64, Action: Legal); |
689 | |
690 | setOperationAction(Op: ISD::FMAXNUM, VT: MVT::v2f64, Action: Legal); |
691 | setOperationAction(Op: ISD::FMAXIMUM, VT: MVT::v2f64, Action: Legal); |
692 | setOperationAction(Op: ISD::FMINNUM, VT: MVT::v2f64, Action: Legal); |
693 | setOperationAction(Op: ISD::FMINIMUM, VT: MVT::v2f64, Action: Legal); |
694 | |
695 | setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f32, Action: Legal); |
696 | setOperationAction(Op: ISD::FMAXIMUM, VT: MVT::f32, Action: Legal); |
697 | setOperationAction(Op: ISD::FMINNUM, VT: MVT::f32, Action: Legal); |
698 | setOperationAction(Op: ISD::FMINIMUM, VT: MVT::f32, Action: Legal); |
699 | |
700 | setOperationAction(Op: ISD::FMAXNUM, VT: MVT::v4f32, Action: Legal); |
701 | setOperationAction(Op: ISD::FMAXIMUM, VT: MVT::v4f32, Action: Legal); |
702 | setOperationAction(Op: ISD::FMINNUM, VT: MVT::v4f32, Action: Legal); |
703 | setOperationAction(Op: ISD::FMINIMUM, VT: MVT::v4f32, Action: Legal); |
704 | |
705 | setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f128, Action: Legal); |
706 | setOperationAction(Op: ISD::FMAXIMUM, VT: MVT::f128, Action: Legal); |
707 | setOperationAction(Op: ISD::FMINNUM, VT: MVT::f128, Action: Legal); |
708 | setOperationAction(Op: ISD::FMINIMUM, VT: MVT::f128, Action: Legal); |
709 | |
710 | // Handle constrained floating-point operations. |
711 | setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::v4f32, Action: Legal); |
712 | setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::v4f32, Action: Legal); |
713 | setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::v4f32, Action: Legal); |
714 | setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::v4f32, Action: Legal); |
715 | setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::v4f32, Action: Legal); |
716 | setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::v4f32, Action: Legal); |
717 | setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::v4f32, Action: Legal); |
718 | setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT: MVT::v4f32, Action: Legal); |
719 | setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::v4f32, Action: Legal); |
720 | setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::v4f32, Action: Legal); |
721 | setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::v4f32, Action: Legal); |
722 | setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::v4f32, Action: Legal); |
723 | setOperationAction(Op: ISD::STRICT_FROUNDEVEN, VT: MVT::v4f32, Action: Legal); |
724 | for (auto VT : { MVT::f32, MVT::f64, MVT::f128, |
725 | MVT::v4f32, MVT::v2f64 }) { |
726 | setOperationAction(Op: ISD::STRICT_FMAXNUM, VT, Action: Legal); |
727 | setOperationAction(Op: ISD::STRICT_FMINNUM, VT, Action: Legal); |
728 | setOperationAction(Op: ISD::STRICT_FMAXIMUM, VT, Action: Legal); |
729 | setOperationAction(Op: ISD::STRICT_FMINIMUM, VT, Action: Legal); |
730 | } |
731 | } |
732 | |
733 | // We only have fused f128 multiply-addition on vector registers. |
734 | if (!Subtarget.hasVectorEnhancements1()) { |
735 | setOperationAction(Op: ISD::FMA, VT: MVT::f128, Action: Expand); |
736 | setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::f128, Action: Expand); |
737 | } |
738 | |
739 | // We don't have a copysign instruction on vector registers. |
740 | if (Subtarget.hasVectorEnhancements1()) |
741 | setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f128, Action: Expand); |
742 | |
743 | // Needed so that we don't try to implement f128 constant loads using |
744 | // a load-and-extend of a f80 constant (in cases where the constant |
745 | // would fit in an f80). |
746 | for (MVT VT : MVT::fp_valuetypes()) |
747 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: MVT::f80, Action: Expand); |
748 | |
749 | // We don't have extending load instruction on vector registers. |
750 | if (Subtarget.hasVectorEnhancements1()) { |
751 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f128, MemVT: MVT::f32, Action: Expand); |
752 | setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f128, MemVT: MVT::f64, Action: Expand); |
753 | } |
754 | |
755 | // Floating-point truncation and stores need to be done separately. |
756 | setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand); |
757 | setTruncStoreAction(ValVT: MVT::f128, MemVT: MVT::f32, Action: Expand); |
758 | setTruncStoreAction(ValVT: MVT::f128, MemVT: MVT::f64, Action: Expand); |
759 | |
760 | // We have 64-bit FPR<->GPR moves, but need special handling for |
761 | // 32-bit forms. |
762 | if (!Subtarget.hasVector()) { |
763 | setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Custom); |
764 | setOperationAction(Op: ISD::BITCAST, VT: MVT::f32, Action: Custom); |
765 | } |
766 | |
767 | // VASTART and VACOPY need to deal with the SystemZ-specific varargs |
768 | // structure, but VAEND is a no-op. |
769 | setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom); |
770 | setOperationAction(Op: ISD::VACOPY, VT: MVT::Other, Action: Custom); |
771 | setOperationAction(Op: ISD::VAEND, VT: MVT::Other, Action: Expand); |
772 | |
773 | if (Subtarget.isTargetzOS()) { |
774 | // Handle address space casts between mixed sized pointers. |
775 | setOperationAction(Op: ISD::ADDRSPACECAST, VT: MVT::i32, Action: Custom); |
776 | setOperationAction(Op: ISD::ADDRSPACECAST, VT: MVT::i64, Action: Custom); |
777 | } |
778 | |
779 | setOperationAction(Op: ISD::GET_ROUNDING, VT: MVT::i32, Action: Custom); |
780 | |
781 | // Codes for which we want to perform some z-specific combinations. |
782 | setTargetDAGCombine({ISD::ZERO_EXTEND, |
783 | ISD::SIGN_EXTEND, |
784 | ISD::SIGN_EXTEND_INREG, |
785 | ISD::LOAD, |
786 | ISD::STORE, |
787 | ISD::VECTOR_SHUFFLE, |
788 | ISD::EXTRACT_VECTOR_ELT, |
789 | ISD::FP_ROUND, |
790 | ISD::STRICT_FP_ROUND, |
791 | ISD::FP_EXTEND, |
792 | ISD::SINT_TO_FP, |
793 | ISD::UINT_TO_FP, |
794 | ISD::STRICT_FP_EXTEND, |
795 | ISD::FCOPYSIGN, |
796 | ISD::BSWAP, |
797 | ISD::SETCC, |
798 | ISD::SRL, |
799 | ISD::SRA, |
800 | ISD::MUL, |
801 | ISD::SDIV, |
802 | ISD::UDIV, |
803 | ISD::SREM, |
804 | ISD::UREM, |
805 | ISD::INTRINSIC_VOID, |
806 | ISD::INTRINSIC_W_CHAIN}); |
807 | |
808 | // Handle intrinsics. |
809 | setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom); |
810 | setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom); |
811 | |
812 | // We're not using SJLJ for exception handling, but they're implemented |
813 | // solely to support use of __builtin_setjmp / __builtin_longjmp. |
814 | setOperationAction(Op: ISD::EH_SJLJ_SETJMP, VT: MVT::i32, Action: Custom); |
815 | setOperationAction(Op: ISD::EH_SJLJ_LONGJMP, VT: MVT::Other, Action: Custom); |
816 | |
817 | // We want to use MVC in preference to even a single load/store pair. |
818 | MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0; |
819 | MaxStoresPerMemcpyOptSize = 0; |
820 | |
821 | // The main memset sequence is a byte store followed by an MVC. |
822 | // Two STC or MV..I stores win over that, but the kind of fused stores |
823 | // generated by target-independent code don't when the byte value is |
824 | // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better |
825 | // than "STC;MVC". Handle the choice in target-specific code instead. |
826 | MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0; |
827 | MaxStoresPerMemsetOptSize = 0; |
828 | |
829 | // Default to having -disable-strictnode-mutation on |
830 | IsStrictFPEnabled = true; |
831 | } |
832 | |
833 | bool SystemZTargetLowering::useSoftFloat() const { |
834 | return Subtarget.hasSoftFloat(); |
835 | } |
836 | |
837 | EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL, |
838 | LLVMContext &, EVT VT) const { |
839 | if (!VT.isVector()) |
840 | return MVT::i32; |
841 | return VT.changeVectorElementTypeToInteger(); |
842 | } |
843 | |
844 | bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd( |
845 | const MachineFunction &MF, EVT VT) const { |
846 | if (useSoftFloat()) |
847 | return false; |
848 | |
849 | VT = VT.getScalarType(); |
850 | |
851 | if (!VT.isSimple()) |
852 | return false; |
853 | |
854 | switch (VT.getSimpleVT().SimpleTy) { |
855 | case MVT::f32: |
856 | case MVT::f64: |
857 | return true; |
858 | case MVT::f128: |
859 | return Subtarget.hasVectorEnhancements1(); |
860 | default: |
861 | break; |
862 | } |
863 | |
864 | return false; |
865 | } |
866 | |
867 | // Return true if the constant can be generated with a vector instruction, |
868 | // such as VGM, VGMB or VREPI. |
869 | bool SystemZVectorConstantInfo::isVectorConstantLegal( |
870 | const SystemZSubtarget &Subtarget) { |
871 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
872 | if (!Subtarget.hasVector() || |
873 | (isFP128 && !Subtarget.hasVectorEnhancements1())) |
874 | return false; |
875 | |
876 | // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- |
877 | // preferred way of creating all-zero and all-one vectors so give it |
878 | // priority over other methods below. |
879 | unsigned Mask = 0; |
880 | unsigned I = 0; |
881 | for (; I < SystemZ::VectorBytes; ++I) { |
882 | uint64_t Byte = IntBits.lshr(shiftAmt: I * 8).trunc(width: 8).getZExtValue(); |
883 | if (Byte == 0xff) |
884 | Mask |= 1ULL << I; |
885 | else if (Byte != 0) |
886 | break; |
887 | } |
888 | if (I == SystemZ::VectorBytes) { |
889 | Opcode = SystemZISD::BYTE_MASK; |
890 | OpVals.push_back(Elt: Mask); |
891 | VecVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: 8), NumElements: 16); |
892 | return true; |
893 | } |
894 | |
895 | if (SplatBitSize > 64) |
896 | return false; |
897 | |
898 | auto tryValue = [&](uint64_t Value) -> bool { |
899 | // Try VECTOR REPLICATE IMMEDIATE |
900 | int64_t SignedValue = SignExtend64(X: Value, B: SplatBitSize); |
901 | if (isInt<16>(x: SignedValue)) { |
902 | OpVals.push_back(Elt: ((unsigned) SignedValue)); |
903 | Opcode = SystemZISD::REPLICATE; |
904 | VecVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SplatBitSize), |
905 | NumElements: SystemZ::VectorBits / SplatBitSize); |
906 | return true; |
907 | } |
908 | // Try VECTOR GENERATE MASK |
909 | unsigned Start, End; |
910 | if (TII->isRxSBGMask(Mask: Value, BitSize: SplatBitSize, Start, End)) { |
911 | // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0 |
912 | // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for |
913 | // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1). |
914 | OpVals.push_back(Elt: Start - (64 - SplatBitSize)); |
915 | OpVals.push_back(Elt: End - (64 - SplatBitSize)); |
916 | Opcode = SystemZISD::ROTATE_MASK; |
917 | VecVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SplatBitSize), |
918 | NumElements: SystemZ::VectorBits / SplatBitSize); |
919 | return true; |
920 | } |
921 | return false; |
922 | }; |
923 | |
924 | // First try assuming that any undefined bits above the highest set bit |
925 | // and below the lowest set bit are 1s. This increases the likelihood of |
926 | // being able to use a sign-extended element value in VECTOR REPLICATE |
927 | // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. |
928 | uint64_t SplatBitsZ = SplatBits.getZExtValue(); |
929 | uint64_t SplatUndefZ = SplatUndef.getZExtValue(); |
930 | unsigned LowerBits = llvm::countr_zero(Val: SplatBitsZ); |
931 | unsigned UpperBits = llvm::countl_zero(Val: SplatBitsZ); |
932 | uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(N: LowerBits); |
933 | uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(N: UpperBits); |
934 | if (tryValue(SplatBitsZ | Upper | Lower)) |
935 | return true; |
936 | |
937 | // Now try assuming that any undefined bits between the first and |
938 | // last defined set bits are set. This increases the chances of |
939 | // using a non-wraparound mask. |
940 | uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; |
941 | return tryValue(SplatBitsZ | Middle); |
942 | } |
943 | |
944 | SystemZVectorConstantInfo::SystemZVectorConstantInfo(APInt IntImm) { |
945 | if (IntImm.isSingleWord()) { |
946 | IntBits = APInt(128, IntImm.getZExtValue()); |
947 | IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth()); |
948 | } else |
949 | IntBits = IntImm; |
950 | assert(IntBits.getBitWidth() == 128 && "Unsupported APInt." ); |
951 | |
952 | // Find the smallest splat. |
953 | SplatBits = IntImm; |
954 | unsigned Width = SplatBits.getBitWidth(); |
955 | while (Width > 8) { |
956 | unsigned HalfSize = Width / 2; |
957 | APInt HighValue = SplatBits.lshr(shiftAmt: HalfSize).trunc(width: HalfSize); |
958 | APInt LowValue = SplatBits.trunc(width: HalfSize); |
959 | |
960 | // If the two halves do not match, stop here. |
961 | if (HighValue != LowValue || 8 > HalfSize) |
962 | break; |
963 | |
964 | SplatBits = HighValue; |
965 | Width = HalfSize; |
966 | } |
967 | SplatUndef = 0; |
968 | SplatBitSize = Width; |
969 | } |
970 | |
971 | SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) { |
972 | assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR" ); |
973 | bool HasAnyUndefs; |
974 | |
975 | // Get IntBits by finding the 128 bit splat. |
976 | BVN->isConstantSplat(SplatValue&: IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, MinSplatBits: 128, |
977 | isBigEndian: true); |
978 | |
979 | // Get SplatBits by finding the 8 bit or greater splat. |
980 | BVN->isConstantSplat(SplatValue&: SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, MinSplatBits: 8, |
981 | isBigEndian: true); |
982 | } |
983 | |
984 | bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, |
985 | bool ForCodeSize) const { |
986 | // We can load zero using LZ?R and negative zero using LZ?R;LC?BR. |
987 | if (Imm.isZero() || Imm.isNegZero()) |
988 | return true; |
989 | |
990 | return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget); |
991 | } |
992 | |
993 | MachineBasicBlock * |
994 | SystemZTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, |
995 | MachineBasicBlock *MBB) const { |
996 | DebugLoc DL = MI.getDebugLoc(); |
997 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
998 | const SystemZRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
999 | |
1000 | MachineFunction *MF = MBB->getParent(); |
1001 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
1002 | |
1003 | const BasicBlock *BB = MBB->getBasicBlock(); |
1004 | MachineFunction::iterator I = ++MBB->getIterator(); |
1005 | |
1006 | Register DstReg = MI.getOperand(i: 0).getReg(); |
1007 | const TargetRegisterClass *RC = MRI.getRegClass(Reg: DstReg); |
1008 | assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!" ); |
1009 | (void)TRI; |
1010 | Register mainDstReg = MRI.createVirtualRegister(RegClass: RC); |
1011 | Register restoreDstReg = MRI.createVirtualRegister(RegClass: RC); |
1012 | |
1013 | MVT PVT = getPointerTy(DL: MF->getDataLayout()); |
1014 | assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!" ); |
1015 | // For v = setjmp(buf), we generate. |
1016 | // Algorithm: |
1017 | // |
1018 | // --------- |
1019 | // | thisMBB | |
1020 | // --------- |
1021 | // | |
1022 | // ------------------------ |
1023 | // | | |
1024 | // ---------- --------------- |
1025 | // | mainMBB | | restoreMBB | |
1026 | // | v = 0 | | v = 1 | |
1027 | // ---------- --------------- |
1028 | // | | |
1029 | // ------------------------- |
1030 | // | |
1031 | // ----------------------------- |
1032 | // | sinkMBB | |
1033 | // | phi(v_mainMBB,v_restoreMBB) | |
1034 | // ----------------------------- |
1035 | // thisMBB: |
1036 | // buf[FPOffset] = Frame Pointer if hasFP. |
1037 | // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB. |
1038 | // buf[BCOffset] = Backchain value if building with -mbackchain. |
1039 | // buf[SPOffset] = Stack Pointer. |
1040 | // buf[LPOffset] = We never write this slot with R13, gcc stores R13 always. |
1041 | // SjLjSetup restoreMBB |
1042 | // mainMBB: |
1043 | // v_main = 0 |
1044 | // sinkMBB: |
1045 | // v = phi(v_main, v_restore) |
1046 | // restoreMBB: |
1047 | // v_restore = 1 |
1048 | |
1049 | MachineBasicBlock *thisMBB = MBB; |
1050 | MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); |
1051 | MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); |
1052 | MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB); |
1053 | |
1054 | MF->insert(MBBI: I, MBB: mainMBB); |
1055 | MF->insert(MBBI: I, MBB: sinkMBB); |
1056 | MF->push_back(MBB: restoreMBB); |
1057 | restoreMBB->setMachineBlockAddressTaken(); |
1058 | |
1059 | MachineInstrBuilder MIB; |
1060 | |
1061 | // Transfer the remainder of BB and its successor edges to sinkMBB. |
1062 | sinkMBB->splice(Where: sinkMBB->begin(), Other: MBB, |
1063 | From: std::next(x: MachineBasicBlock::iterator(MI)), To: MBB->end()); |
1064 | sinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB); |
1065 | |
1066 | // thisMBB: |
1067 | const int64_t FPOffset = 0; // Slot 1. |
1068 | const int64_t LabelOffset = 1 * PVT.getStoreSize(); // Slot 2. |
1069 | const int64_t BCOffset = 2 * PVT.getStoreSize(); // Slot 3. |
1070 | const int64_t SPOffset = 3 * PVT.getStoreSize(); // Slot 4. |
1071 | |
1072 | // Buf address. |
1073 | Register BufReg = MI.getOperand(i: 1).getReg(); |
1074 | |
1075 | const TargetRegisterClass *PtrRC = getRegClassFor(VT: PVT); |
1076 | unsigned LabelReg = MRI.createVirtualRegister(RegClass: PtrRC); |
1077 | |
1078 | // Prepare IP for longjmp. |
1079 | BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LARL), DestReg: LabelReg) |
1080 | .addMBB(MBB: restoreMBB); |
1081 | // Store IP for return from jmp, slot 2, offset = 1. |
1082 | BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::STG)) |
1083 | .addReg(RegNo: LabelReg) |
1084 | .addReg(RegNo: BufReg) |
1085 | .addImm(Val: LabelOffset) |
1086 | .addReg(RegNo: 0); |
1087 | |
1088 | auto *SpecialRegs = Subtarget.getSpecialRegisters(); |
1089 | bool HasFP = Subtarget.getFrameLowering()->hasFP(MF: *MF); |
1090 | if (HasFP) { |
1091 | BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::STG)) |
1092 | .addReg(RegNo: SpecialRegs->getFramePointerRegister()) |
1093 | .addReg(RegNo: BufReg) |
1094 | .addImm(Val: FPOffset) |
1095 | .addReg(RegNo: 0); |
1096 | } |
1097 | |
1098 | // Store SP. |
1099 | BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::STG)) |
1100 | .addReg(RegNo: SpecialRegs->getStackPointerRegister()) |
1101 | .addReg(RegNo: BufReg) |
1102 | .addImm(Val: SPOffset) |
1103 | .addReg(RegNo: 0); |
1104 | |
1105 | // Slot 3(Offset = 2) Backchain value (if building with -mbackchain). |
1106 | bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain(); |
1107 | if (BackChain) { |
1108 | Register BCReg = MRI.createVirtualRegister(RegClass: PtrRC); |
1109 | auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>(); |
1110 | MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LG), DestReg: BCReg) |
1111 | .addReg(RegNo: SpecialRegs->getStackPointerRegister()) |
1112 | .addImm(Val: TFL->getBackchainOffset(MF&: *MF)) |
1113 | .addReg(RegNo: 0); |
1114 | |
1115 | BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::STG)) |
1116 | .addReg(RegNo: BCReg) |
1117 | .addReg(RegNo: BufReg) |
1118 | .addImm(Val: BCOffset) |
1119 | .addReg(RegNo: 0); |
1120 | } |
1121 | |
1122 | // Setup. |
1123 | MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::EH_SjLj_Setup)) |
1124 | .addMBB(MBB: restoreMBB); |
1125 | |
1126 | const SystemZRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
1127 | MIB.addRegMask(Mask: RegInfo->getNoPreservedMask()); |
1128 | |
1129 | thisMBB->addSuccessor(Succ: mainMBB); |
1130 | thisMBB->addSuccessor(Succ: restoreMBB); |
1131 | |
1132 | // mainMBB: |
1133 | BuildMI(BB: mainMBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LHI), DestReg: mainDstReg).addImm(Val: 0); |
1134 | mainMBB->addSuccessor(Succ: sinkMBB); |
1135 | |
1136 | // sinkMBB: |
1137 | BuildMI(BB&: *sinkMBB, I: sinkMBB->begin(), MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: DstReg) |
1138 | .addReg(RegNo: mainDstReg) |
1139 | .addMBB(MBB: mainMBB) |
1140 | .addReg(RegNo: restoreDstReg) |
1141 | .addMBB(MBB: restoreMBB); |
1142 | |
1143 | // restoreMBB. |
1144 | BuildMI(BB: restoreMBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LHI), DestReg: restoreDstReg).addImm(Val: 1); |
1145 | BuildMI(BB: restoreMBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::J)).addMBB(MBB: sinkMBB); |
1146 | restoreMBB->addSuccessor(Succ: sinkMBB); |
1147 | |
1148 | MI.eraseFromParent(); |
1149 | |
1150 | return sinkMBB; |
1151 | } |
1152 | |
1153 | MachineBasicBlock * |
1154 | SystemZTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, |
1155 | MachineBasicBlock *MBB) const { |
1156 | |
1157 | DebugLoc DL = MI.getDebugLoc(); |
1158 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
1159 | |
1160 | MachineFunction *MF = MBB->getParent(); |
1161 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
1162 | |
1163 | MVT PVT = getPointerTy(DL: MF->getDataLayout()); |
1164 | assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!" ); |
1165 | Register BufReg = MI.getOperand(i: 0).getReg(); |
1166 | const TargetRegisterClass *RC = MRI.getRegClass(Reg: BufReg); |
1167 | auto *SpecialRegs = Subtarget.getSpecialRegisters(); |
1168 | |
1169 | Register Tmp = MRI.createVirtualRegister(RegClass: RC); |
1170 | Register BCReg = MRI.createVirtualRegister(RegClass: RC); |
1171 | |
1172 | MachineInstrBuilder MIB; |
1173 | |
1174 | const int64_t FPOffset = 0; |
1175 | const int64_t LabelOffset = 1 * PVT.getStoreSize(); |
1176 | const int64_t BCOffset = 2 * PVT.getStoreSize(); |
1177 | const int64_t SPOffset = 3 * PVT.getStoreSize(); |
1178 | const int64_t LPOffset = 4 * PVT.getStoreSize(); |
1179 | |
1180 | MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LG), DestReg: Tmp) |
1181 | .addReg(RegNo: BufReg) |
1182 | .addImm(Val: LabelOffset) |
1183 | .addReg(RegNo: 0); |
1184 | |
1185 | MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LG), |
1186 | DestReg: SpecialRegs->getFramePointerRegister()) |
1187 | .addReg(RegNo: BufReg) |
1188 | .addImm(Val: FPOffset) |
1189 | .addReg(RegNo: 0); |
1190 | |
1191 | // We are restoring R13 even though we never stored in setjmp from llvm, |
1192 | // as gcc always stores R13 in builtin_setjmp. We could have mixed code |
1193 | // gcc setjmp and llvm longjmp. |
1194 | MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LG), DestReg: SystemZ::R13D) |
1195 | .addReg(RegNo: BufReg) |
1196 | .addImm(Val: LPOffset) |
1197 | .addReg(RegNo: 0); |
1198 | |
1199 | bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain(); |
1200 | if (BackChain) { |
1201 | MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LG), DestReg: BCReg) |
1202 | .addReg(RegNo: BufReg) |
1203 | .addImm(Val: BCOffset) |
1204 | .addReg(RegNo: 0); |
1205 | } |
1206 | |
1207 | MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LG), |
1208 | DestReg: SpecialRegs->getStackPointerRegister()) |
1209 | .addReg(RegNo: BufReg) |
1210 | .addImm(Val: SPOffset) |
1211 | .addReg(RegNo: 0); |
1212 | |
1213 | if (BackChain) { |
1214 | auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>(); |
1215 | BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::STG)) |
1216 | .addReg(RegNo: BCReg) |
1217 | .addReg(RegNo: SpecialRegs->getStackPointerRegister()) |
1218 | .addImm(Val: TFL->getBackchainOffset(MF&: *MF)) |
1219 | .addReg(RegNo: 0); |
1220 | } |
1221 | |
1222 | MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BR)).addReg(RegNo: Tmp); |
1223 | |
1224 | MI.eraseFromParent(); |
1225 | return MBB; |
1226 | } |
1227 | |
1228 | /// Returns true if stack probing through inline assembly is requested. |
1229 | bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const { |
1230 | // If the function specifically requests inline stack probes, emit them. |
1231 | if (MF.getFunction().hasFnAttribute(Kind: "probe-stack" )) |
1232 | return MF.getFunction().getFnAttribute(Kind: "probe-stack" ).getValueAsString() == |
1233 | "inline-asm" ; |
1234 | return false; |
1235 | } |
1236 | |
1237 | TargetLowering::AtomicExpansionKind |
1238 | SystemZTargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const { |
1239 | return AtomicExpansionKind::None; |
1240 | } |
1241 | |
1242 | TargetLowering::AtomicExpansionKind |
1243 | SystemZTargetLowering::shouldCastAtomicStoreInIR(StoreInst *SI) const { |
1244 | return AtomicExpansionKind::None; |
1245 | } |
1246 | |
1247 | TargetLowering::AtomicExpansionKind |
1248 | SystemZTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { |
1249 | // Don't expand subword operations as they require special treatment. |
1250 | if (RMW->getType()->isIntegerTy(Bitwidth: 8) || RMW->getType()->isIntegerTy(Bitwidth: 16)) |
1251 | return AtomicExpansionKind::None; |
1252 | |
1253 | // Don't expand if there is a target instruction available. |
1254 | if (Subtarget.hasInterlockedAccess1() && |
1255 | (RMW->getType()->isIntegerTy(Bitwidth: 32) || RMW->getType()->isIntegerTy(Bitwidth: 64)) && |
1256 | (RMW->getOperation() == AtomicRMWInst::BinOp::Add || |
1257 | RMW->getOperation() == AtomicRMWInst::BinOp::Sub || |
1258 | RMW->getOperation() == AtomicRMWInst::BinOp::And || |
1259 | RMW->getOperation() == AtomicRMWInst::BinOp::Or || |
1260 | RMW->getOperation() == AtomicRMWInst::BinOp::Xor)) |
1261 | return AtomicExpansionKind::None; |
1262 | |
1263 | return AtomicExpansionKind::CmpXChg; |
1264 | } |
1265 | |
1266 | bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { |
1267 | // We can use CGFI or CLGFI. |
1268 | return isInt<32>(x: Imm) || isUInt<32>(x: Imm); |
1269 | } |
1270 | |
1271 | bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const { |
1272 | // We can use ALGFI or SLGFI. |
1273 | return isUInt<32>(x: Imm) || isUInt<32>(x: -Imm); |
1274 | } |
1275 | |
1276 | bool SystemZTargetLowering::allowsMisalignedMemoryAccesses( |
1277 | EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const { |
1278 | // Unaligned accesses should never be slower than the expanded version. |
1279 | // We check specifically for aligned accesses in the few cases where |
1280 | // they are required. |
1281 | if (Fast) |
1282 | *Fast = 1; |
1283 | return true; |
1284 | } |
1285 | |
1286 | bool SystemZTargetLowering::hasAndNot(SDValue Y) const { |
1287 | EVT VT = Y.getValueType(); |
1288 | |
1289 | // We can use NC(G)RK for types in GPRs ... |
1290 | if (VT == MVT::i32 || VT == MVT::i64) |
1291 | return Subtarget.hasMiscellaneousExtensions3(); |
1292 | |
1293 | // ... or VNC for types in VRs. |
1294 | if (VT.isVector() || VT == MVT::i128) |
1295 | return Subtarget.hasVector(); |
1296 | |
1297 | return false; |
1298 | } |
1299 | |
1300 | // Information about the addressing mode for a memory access. |
1301 | struct AddressingMode { |
1302 | // True if a long displacement is supported. |
1303 | bool LongDisplacement; |
1304 | |
1305 | // True if use of index register is supported. |
1306 | bool IndexReg; |
1307 | |
1308 | AddressingMode(bool LongDispl, bool IdxReg) : |
1309 | LongDisplacement(LongDispl), IndexReg(IdxReg) {} |
1310 | }; |
1311 | |
1312 | // Return the desired addressing mode for a Load which has only one use (in |
1313 | // the same block) which is a Store. |
1314 | static AddressingMode getLoadStoreAddrMode(bool HasVector, |
1315 | Type *Ty) { |
1316 | // With vector support a Load->Store combination may be combined to either |
1317 | // an MVC or vector operations and it seems to work best to allow the |
1318 | // vector addressing mode. |
1319 | if (HasVector) |
1320 | return AddressingMode(false/*LongDispl*/, true/*IdxReg*/); |
1321 | |
1322 | // Otherwise only the MVC case is special. |
1323 | bool MVC = Ty->isIntegerTy(Bitwidth: 8); |
1324 | return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/); |
1325 | } |
1326 | |
1327 | // Return the addressing mode which seems most desirable given an LLVM |
1328 | // Instruction pointer. |
1329 | static AddressingMode |
1330 | supportedAddressingMode(Instruction *I, bool HasVector) { |
1331 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) { |
1332 | switch (II->getIntrinsicID()) { |
1333 | default: break; |
1334 | case Intrinsic::memset: |
1335 | case Intrinsic::memmove: |
1336 | case Intrinsic::memcpy: |
1337 | return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); |
1338 | } |
1339 | } |
1340 | |
1341 | if (isa<LoadInst>(Val: I) && I->hasOneUse()) { |
1342 | auto *SingleUser = cast<Instruction>(Val: *I->user_begin()); |
1343 | if (SingleUser->getParent() == I->getParent()) { |
1344 | if (isa<ICmpInst>(Val: SingleUser)) { |
1345 | if (auto *C = dyn_cast<ConstantInt>(Val: SingleUser->getOperand(i: 1))) |
1346 | if (C->getBitWidth() <= 64 && |
1347 | (isInt<16>(x: C->getSExtValue()) || isUInt<16>(x: C->getZExtValue()))) |
1348 | // Comparison of memory with 16 bit signed / unsigned immediate |
1349 | return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); |
1350 | } else if (isa<StoreInst>(Val: SingleUser)) |
1351 | // Load->Store |
1352 | return getLoadStoreAddrMode(HasVector, Ty: I->getType()); |
1353 | } |
1354 | } else if (auto *StoreI = dyn_cast<StoreInst>(Val: I)) { |
1355 | if (auto *LoadI = dyn_cast<LoadInst>(Val: StoreI->getValueOperand())) |
1356 | if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent()) |
1357 | // Load->Store |
1358 | return getLoadStoreAddrMode(HasVector, Ty: LoadI->getType()); |
1359 | } |
1360 | |
1361 | if (HasVector && (isa<LoadInst>(Val: I) || isa<StoreInst>(Val: I))) { |
1362 | |
1363 | // * Use LDE instead of LE/LEY for z13 to avoid partial register |
1364 | // dependencies (LDE only supports small offsets). |
1365 | // * Utilize the vector registers to hold floating point |
1366 | // values (vector load / store instructions only support small |
1367 | // offsets). |
1368 | |
1369 | Type *MemAccessTy = (isa<LoadInst>(Val: I) ? I->getType() : |
1370 | I->getOperand(i: 0)->getType()); |
1371 | bool IsFPAccess = MemAccessTy->isFloatingPointTy(); |
1372 | bool IsVectorAccess = MemAccessTy->isVectorTy(); |
1373 | |
1374 | // A store of an extracted vector element will be combined into a VSTE type |
1375 | // instruction. |
1376 | if (!IsVectorAccess && isa<StoreInst>(Val: I)) { |
1377 | Value *DataOp = I->getOperand(i: 0); |
1378 | if (isa<ExtractElementInst>(Val: DataOp)) |
1379 | IsVectorAccess = true; |
1380 | } |
1381 | |
1382 | // A load which gets inserted into a vector element will be combined into a |
1383 | // VLE type instruction. |
1384 | if (!IsVectorAccess && isa<LoadInst>(Val: I) && I->hasOneUse()) { |
1385 | User *LoadUser = *I->user_begin(); |
1386 | if (isa<InsertElementInst>(Val: LoadUser)) |
1387 | IsVectorAccess = true; |
1388 | } |
1389 | |
1390 | if (IsFPAccess || IsVectorAccess) |
1391 | return AddressingMode(false/*LongDispl*/, true/*IdxReg*/); |
1392 | } |
1393 | |
1394 | return AddressingMode(true/*LongDispl*/, true/*IdxReg*/); |
1395 | } |
1396 | |
1397 | bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL, |
1398 | const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const { |
1399 | // Punt on globals for now, although they can be used in limited |
1400 | // RELATIVE LONG cases. |
1401 | if (AM.BaseGV) |
1402 | return false; |
1403 | |
1404 | // Require a 20-bit signed offset. |
1405 | if (!isInt<20>(x: AM.BaseOffs)) |
1406 | return false; |
1407 | |
1408 | bool RequireD12 = |
1409 | Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(Bitwidth: 128)); |
1410 | AddressingMode SupportedAM(!RequireD12, true); |
1411 | if (I != nullptr) |
1412 | SupportedAM = supportedAddressingMode(I, HasVector: Subtarget.hasVector()); |
1413 | |
1414 | if (!SupportedAM.LongDisplacement && !isUInt<12>(x: AM.BaseOffs)) |
1415 | return false; |
1416 | |
1417 | if (!SupportedAM.IndexReg) |
1418 | // No indexing allowed. |
1419 | return AM.Scale == 0; |
1420 | else |
1421 | // Indexing is OK but no scale factor can be applied. |
1422 | return AM.Scale == 0 || AM.Scale == 1; |
1423 | } |
1424 | |
1425 | bool SystemZTargetLowering::findOptimalMemOpLowering( |
1426 | std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, |
1427 | unsigned SrcAS, const AttributeList &FuncAttributes) const { |
1428 | const int MVCFastLen = 16; |
1429 | |
1430 | if (Limit != ~unsigned(0)) { |
1431 | // Don't expand Op into scalar loads/stores in these cases: |
1432 | if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen) |
1433 | return false; // Small memcpy: Use MVC |
1434 | if (Op.isMemset() && Op.size() - 1 <= MVCFastLen) |
1435 | return false; // Small memset (first byte with STC/MVI): Use MVC |
1436 | if (Op.isZeroMemset()) |
1437 | return false; // Memset zero: Use XC |
1438 | } |
1439 | |
1440 | return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS, |
1441 | SrcAS, FuncAttributes); |
1442 | } |
1443 | |
1444 | EVT SystemZTargetLowering::getOptimalMemOpType(const MemOp &Op, |
1445 | const AttributeList &FuncAttributes) const { |
1446 | return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other; |
1447 | } |
1448 | |
1449 | bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const { |
1450 | if (!FromType->isIntegerTy() || !ToType->isIntegerTy()) |
1451 | return false; |
1452 | unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue(); |
1453 | unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue(); |
1454 | return FromBits > ToBits; |
1455 | } |
1456 | |
1457 | bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const { |
1458 | if (!FromVT.isInteger() || !ToVT.isInteger()) |
1459 | return false; |
1460 | unsigned FromBits = FromVT.getFixedSizeInBits(); |
1461 | unsigned ToBits = ToVT.getFixedSizeInBits(); |
1462 | return FromBits > ToBits; |
1463 | } |
1464 | |
1465 | //===----------------------------------------------------------------------===// |
1466 | // Inline asm support |
1467 | //===----------------------------------------------------------------------===// |
1468 | |
1469 | TargetLowering::ConstraintType |
1470 | SystemZTargetLowering::getConstraintType(StringRef Constraint) const { |
1471 | if (Constraint.size() == 1) { |
1472 | switch (Constraint[0]) { |
1473 | case 'a': // Address register |
1474 | case 'd': // Data register (equivalent to 'r') |
1475 | case 'f': // Floating-point register |
1476 | case 'h': // High-part register |
1477 | case 'r': // General-purpose register |
1478 | case 'v': // Vector register |
1479 | return C_RegisterClass; |
1480 | |
1481 | case 'Q': // Memory with base and unsigned 12-bit displacement |
1482 | case 'R': // Likewise, plus an index |
1483 | case 'S': // Memory with base and signed 20-bit displacement |
1484 | case 'T': // Likewise, plus an index |
1485 | case 'm': // Equivalent to 'T'. |
1486 | return C_Memory; |
1487 | |
1488 | case 'I': // Unsigned 8-bit constant |
1489 | case 'J': // Unsigned 12-bit constant |
1490 | case 'K': // Signed 16-bit constant |
1491 | case 'L': // Signed 20-bit displacement (on all targets we support) |
1492 | case 'M': // 0x7fffffff |
1493 | return C_Immediate; |
1494 | |
1495 | default: |
1496 | break; |
1497 | } |
1498 | } else if (Constraint.size() == 2 && Constraint[0] == 'Z') { |
1499 | switch (Constraint[1]) { |
1500 | case 'Q': // Address with base and unsigned 12-bit displacement |
1501 | case 'R': // Likewise, plus an index |
1502 | case 'S': // Address with base and signed 20-bit displacement |
1503 | case 'T': // Likewise, plus an index |
1504 | return C_Address; |
1505 | |
1506 | default: |
1507 | break; |
1508 | } |
1509 | } |
1510 | return TargetLowering::getConstraintType(Constraint); |
1511 | } |
1512 | |
1513 | TargetLowering::ConstraintWeight SystemZTargetLowering:: |
1514 | getSingleConstraintMatchWeight(AsmOperandInfo &info, |
1515 | const char *constraint) const { |
1516 | ConstraintWeight weight = CW_Invalid; |
1517 | Value *CallOperandVal = info.CallOperandVal; |
1518 | // If we don't have a value, we can't do a match, |
1519 | // but allow it at the lowest weight. |
1520 | if (!CallOperandVal) |
1521 | return CW_Default; |
1522 | Type *type = CallOperandVal->getType(); |
1523 | // Look at the constraint type. |
1524 | switch (*constraint) { |
1525 | default: |
1526 | weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); |
1527 | break; |
1528 | |
1529 | case 'a': // Address register |
1530 | case 'd': // Data register (equivalent to 'r') |
1531 | case 'h': // High-part register |
1532 | case 'r': // General-purpose register |
1533 | weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default; |
1534 | break; |
1535 | |
1536 | case 'f': // Floating-point register |
1537 | if (!useSoftFloat()) |
1538 | weight = type->isFloatingPointTy() ? CW_Register : CW_Default; |
1539 | break; |
1540 | |
1541 | case 'v': // Vector register |
1542 | if (Subtarget.hasVector()) |
1543 | weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register |
1544 | : CW_Default; |
1545 | break; |
1546 | |
1547 | case 'I': // Unsigned 8-bit constant |
1548 | if (auto *C = dyn_cast<ConstantInt>(Val: CallOperandVal)) |
1549 | if (isUInt<8>(x: C->getZExtValue())) |
1550 | weight = CW_Constant; |
1551 | break; |
1552 | |
1553 | case 'J': // Unsigned 12-bit constant |
1554 | if (auto *C = dyn_cast<ConstantInt>(Val: CallOperandVal)) |
1555 | if (isUInt<12>(x: C->getZExtValue())) |
1556 | weight = CW_Constant; |
1557 | break; |
1558 | |
1559 | case 'K': // Signed 16-bit constant |
1560 | if (auto *C = dyn_cast<ConstantInt>(Val: CallOperandVal)) |
1561 | if (isInt<16>(x: C->getSExtValue())) |
1562 | weight = CW_Constant; |
1563 | break; |
1564 | |
1565 | case 'L': // Signed 20-bit displacement (on all targets we support) |
1566 | if (auto *C = dyn_cast<ConstantInt>(Val: CallOperandVal)) |
1567 | if (isInt<20>(x: C->getSExtValue())) |
1568 | weight = CW_Constant; |
1569 | break; |
1570 | |
1571 | case 'M': // 0x7fffffff |
1572 | if (auto *C = dyn_cast<ConstantInt>(Val: CallOperandVal)) |
1573 | if (C->getZExtValue() == 0x7fffffff) |
1574 | weight = CW_Constant; |
1575 | break; |
1576 | } |
1577 | return weight; |
1578 | } |
1579 | |
1580 | // Parse a "{tNNN}" register constraint for which the register type "t" |
1581 | // has already been verified. MC is the class associated with "t" and |
1582 | // Map maps 0-based register numbers to LLVM register numbers. |
1583 | static std::pair<unsigned, const TargetRegisterClass *> |
1584 | parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, |
1585 | const unsigned *Map, unsigned Size) { |
1586 | assert(*(Constraint.end()-1) == '}' && "Missing '}'" ); |
1587 | if (isdigit(Constraint[2])) { |
1588 | unsigned Index; |
1589 | bool Failed = |
1590 | Constraint.slice(Start: 2, End: Constraint.size() - 1).getAsInteger(Radix: 10, Result&: Index); |
1591 | if (!Failed && Index < Size && Map[Index]) |
1592 | return std::make_pair(x: Map[Index], y&: RC); |
1593 | } |
1594 | return std::make_pair(x: 0U, y: nullptr); |
1595 | } |
1596 | |
1597 | std::pair<unsigned, const TargetRegisterClass *> |
1598 | SystemZTargetLowering::getRegForInlineAsmConstraint( |
1599 | const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { |
1600 | if (Constraint.size() == 1) { |
1601 | // GCC Constraint Letters |
1602 | switch (Constraint[0]) { |
1603 | default: break; |
1604 | case 'd': // Data register (equivalent to 'r') |
1605 | case 'r': // General-purpose register |
1606 | if (VT.getSizeInBits() == 64) |
1607 | return std::make_pair(x: 0U, y: &SystemZ::GR64BitRegClass); |
1608 | else if (VT.getSizeInBits() == 128) |
1609 | return std::make_pair(x: 0U, y: &SystemZ::GR128BitRegClass); |
1610 | return std::make_pair(x: 0U, y: &SystemZ::GR32BitRegClass); |
1611 | |
1612 | case 'a': // Address register |
1613 | if (VT == MVT::i64) |
1614 | return std::make_pair(x: 0U, y: &SystemZ::ADDR64BitRegClass); |
1615 | else if (VT == MVT::i128) |
1616 | return std::make_pair(x: 0U, y: &SystemZ::ADDR128BitRegClass); |
1617 | return std::make_pair(x: 0U, y: &SystemZ::ADDR32BitRegClass); |
1618 | |
1619 | case 'h': // High-part register (an LLVM extension) |
1620 | return std::make_pair(x: 0U, y: &SystemZ::GRH32BitRegClass); |
1621 | |
1622 | case 'f': // Floating-point register |
1623 | if (!useSoftFloat()) { |
1624 | if (VT.getSizeInBits() == 16) |
1625 | return std::make_pair(x: 0U, y: &SystemZ::FP16BitRegClass); |
1626 | else if (VT.getSizeInBits() == 64) |
1627 | return std::make_pair(x: 0U, y: &SystemZ::FP64BitRegClass); |
1628 | else if (VT.getSizeInBits() == 128) |
1629 | return std::make_pair(x: 0U, y: &SystemZ::FP128BitRegClass); |
1630 | return std::make_pair(x: 0U, y: &SystemZ::FP32BitRegClass); |
1631 | } |
1632 | break; |
1633 | |
1634 | case 'v': // Vector register |
1635 | if (Subtarget.hasVector()) { |
1636 | if (VT.getSizeInBits() == 16) |
1637 | return std::make_pair(x: 0U, y: &SystemZ::VR16BitRegClass); |
1638 | if (VT.getSizeInBits() == 32) |
1639 | return std::make_pair(x: 0U, y: &SystemZ::VR32BitRegClass); |
1640 | if (VT.getSizeInBits() == 64) |
1641 | return std::make_pair(x: 0U, y: &SystemZ::VR64BitRegClass); |
1642 | return std::make_pair(x: 0U, y: &SystemZ::VR128BitRegClass); |
1643 | } |
1644 | break; |
1645 | } |
1646 | } |
1647 | if (Constraint.starts_with(Prefix: "{" )) { |
1648 | |
1649 | // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal |
1650 | // to check the size on. |
1651 | auto getVTSizeInBits = [&VT]() { |
1652 | return VT == MVT::Other ? 0 : VT.getSizeInBits(); |
1653 | }; |
1654 | |
1655 | // We need to override the default register parsing for GPRs and FPRs |
1656 | // because the interpretation depends on VT. The internal names of |
1657 | // the registers are also different from the external names |
1658 | // (F0D and F0S instead of F0, etc.). |
1659 | if (Constraint[1] == 'r') { |
1660 | if (getVTSizeInBits() == 32) |
1661 | return parseRegisterNumber(Constraint, RC: &SystemZ::GR32BitRegClass, |
1662 | Map: SystemZMC::GR32Regs, Size: 16); |
1663 | if (getVTSizeInBits() == 128) |
1664 | return parseRegisterNumber(Constraint, RC: &SystemZ::GR128BitRegClass, |
1665 | Map: SystemZMC::GR128Regs, Size: 16); |
1666 | return parseRegisterNumber(Constraint, RC: &SystemZ::GR64BitRegClass, |
1667 | Map: SystemZMC::GR64Regs, Size: 16); |
1668 | } |
1669 | if (Constraint[1] == 'f') { |
1670 | if (useSoftFloat()) |
1671 | return std::make_pair( |
1672 | x: 0u, y: static_cast<const TargetRegisterClass *>(nullptr)); |
1673 | if (getVTSizeInBits() == 16) |
1674 | return parseRegisterNumber(Constraint, RC: &SystemZ::FP16BitRegClass, |
1675 | Map: SystemZMC::FP16Regs, Size: 16); |
1676 | if (getVTSizeInBits() == 32) |
1677 | return parseRegisterNumber(Constraint, RC: &SystemZ::FP32BitRegClass, |
1678 | Map: SystemZMC::FP32Regs, Size: 16); |
1679 | if (getVTSizeInBits() == 128) |
1680 | return parseRegisterNumber(Constraint, RC: &SystemZ::FP128BitRegClass, |
1681 | Map: SystemZMC::FP128Regs, Size: 16); |
1682 | return parseRegisterNumber(Constraint, RC: &SystemZ::FP64BitRegClass, |
1683 | Map: SystemZMC::FP64Regs, Size: 16); |
1684 | } |
1685 | if (Constraint[1] == 'v') { |
1686 | if (!Subtarget.hasVector()) |
1687 | return std::make_pair( |
1688 | x: 0u, y: static_cast<const TargetRegisterClass *>(nullptr)); |
1689 | if (getVTSizeInBits() == 16) |
1690 | return parseRegisterNumber(Constraint, RC: &SystemZ::VR16BitRegClass, |
1691 | Map: SystemZMC::VR16Regs, Size: 32); |
1692 | if (getVTSizeInBits() == 32) |
1693 | return parseRegisterNumber(Constraint, RC: &SystemZ::VR32BitRegClass, |
1694 | Map: SystemZMC::VR32Regs, Size: 32); |
1695 | if (getVTSizeInBits() == 64) |
1696 | return parseRegisterNumber(Constraint, RC: &SystemZ::VR64BitRegClass, |
1697 | Map: SystemZMC::VR64Regs, Size: 32); |
1698 | return parseRegisterNumber(Constraint, RC: &SystemZ::VR128BitRegClass, |
1699 | Map: SystemZMC::VR128Regs, Size: 32); |
1700 | } |
1701 | } |
1702 | return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); |
1703 | } |
1704 | |
1705 | // FIXME? Maybe this could be a TableGen attribute on some registers and |
1706 | // this table could be generated automatically from RegInfo. |
1707 | Register |
1708 | SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT, |
1709 | const MachineFunction &MF) const { |
1710 | Register Reg = |
1711 | StringSwitch<Register>(RegName) |
1712 | .Case(S: "r4" , Value: Subtarget.isTargetXPLINK64() ? SystemZ::R4D |
1713 | : SystemZ::NoRegister) |
1714 | .Case(S: "r15" , |
1715 | Value: Subtarget.isTargetELF() ? SystemZ::R15D : SystemZ::NoRegister) |
1716 | .Default(Value: Register()); |
1717 | |
1718 | return Reg; |
1719 | } |
1720 | |
1721 | Register SystemZTargetLowering::getExceptionPointerRegister( |
1722 | const Constant *PersonalityFn) const { |
1723 | return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D; |
1724 | } |
1725 | |
1726 | Register SystemZTargetLowering::getExceptionSelectorRegister( |
1727 | const Constant *PersonalityFn) const { |
1728 | return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D; |
1729 | } |
1730 | |
1731 | void SystemZTargetLowering::LowerAsmOperandForConstraint( |
1732 | SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, |
1733 | SelectionDAG &DAG) const { |
1734 | // Only support length 1 constraints for now. |
1735 | if (Constraint.size() == 1) { |
1736 | switch (Constraint[0]) { |
1737 | case 'I': // Unsigned 8-bit constant |
1738 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) |
1739 | if (isUInt<8>(x: C->getZExtValue())) |
1740 | Ops.push_back(x: DAG.getTargetConstant(Val: C->getZExtValue(), DL: SDLoc(Op), |
1741 | VT: Op.getValueType())); |
1742 | return; |
1743 | |
1744 | case 'J': // Unsigned 12-bit constant |
1745 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) |
1746 | if (isUInt<12>(x: C->getZExtValue())) |
1747 | Ops.push_back(x: DAG.getTargetConstant(Val: C->getZExtValue(), DL: SDLoc(Op), |
1748 | VT: Op.getValueType())); |
1749 | return; |
1750 | |
1751 | case 'K': // Signed 16-bit constant |
1752 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) |
1753 | if (isInt<16>(x: C->getSExtValue())) |
1754 | Ops.push_back(x: DAG.getSignedTargetConstant( |
1755 | Val: C->getSExtValue(), DL: SDLoc(Op), VT: Op.getValueType())); |
1756 | return; |
1757 | |
1758 | case 'L': // Signed 20-bit displacement (on all targets we support) |
1759 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) |
1760 | if (isInt<20>(x: C->getSExtValue())) |
1761 | Ops.push_back(x: DAG.getSignedTargetConstant( |
1762 | Val: C->getSExtValue(), DL: SDLoc(Op), VT: Op.getValueType())); |
1763 | return; |
1764 | |
1765 | case 'M': // 0x7fffffff |
1766 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) |
1767 | if (C->getZExtValue() == 0x7fffffff) |
1768 | Ops.push_back(x: DAG.getTargetConstant(Val: C->getZExtValue(), DL: SDLoc(Op), |
1769 | VT: Op.getValueType())); |
1770 | return; |
1771 | } |
1772 | } |
1773 | TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); |
1774 | } |
1775 | |
1776 | //===----------------------------------------------------------------------===// |
1777 | // Calling conventions |
1778 | //===----------------------------------------------------------------------===// |
1779 | |
1780 | #include "SystemZGenCallingConv.inc" |
1781 | |
1782 | const MCPhysReg *SystemZTargetLowering::getScratchRegisters( |
1783 | CallingConv::ID) const { |
1784 | static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D, |
1785 | SystemZ::R14D, 0 }; |
1786 | return ScratchRegs; |
1787 | } |
1788 | |
1789 | bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType, |
1790 | Type *ToType) const { |
1791 | return isTruncateFree(FromType, ToType); |
1792 | } |
1793 | |
1794 | bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { |
1795 | return CI->isTailCall(); |
1796 | } |
1797 | |
1798 | // Value is a value that has been passed to us in the location described by VA |
1799 | // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining |
1800 | // any loads onto Chain. |
1801 | static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL, |
1802 | CCValAssign &VA, SDValue Chain, |
1803 | SDValue Value) { |
1804 | // If the argument has been promoted from a smaller type, insert an |
1805 | // assertion to capture this. |
1806 | if (VA.getLocInfo() == CCValAssign::SExt) |
1807 | Value = DAG.getNode(Opcode: ISD::AssertSext, DL, VT: VA.getLocVT(), N1: Value, |
1808 | N2: DAG.getValueType(VA.getValVT())); |
1809 | else if (VA.getLocInfo() == CCValAssign::ZExt) |
1810 | Value = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: VA.getLocVT(), N1: Value, |
1811 | N2: DAG.getValueType(VA.getValVT())); |
1812 | |
1813 | if (VA.isExtInLoc()) |
1814 | Value = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VA.getValVT(), Operand: Value); |
1815 | else if (VA.getLocInfo() == CCValAssign::BCvt) { |
1816 | // If this is a short vector argument loaded from the stack, |
1817 | // extend from i64 to full vector size and then bitcast. |
1818 | assert(VA.getLocVT() == MVT::i64); |
1819 | assert(VA.getValVT().isVector()); |
1820 | Value = DAG.getBuildVector(VT: MVT::v2i64, DL, Ops: {Value, DAG.getUNDEF(VT: MVT::i64)}); |
1821 | Value = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VA.getValVT(), Operand: Value); |
1822 | } else |
1823 | assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo" ); |
1824 | return Value; |
1825 | } |
1826 | |
1827 | // Value is a value of type VA.getValVT() that we need to copy into |
1828 | // the location described by VA. Return a copy of Value converted to |
1829 | // VA.getValVT(). The caller is responsible for handling indirect values. |
1830 | static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL, |
1831 | CCValAssign &VA, SDValue Value) { |
1832 | switch (VA.getLocInfo()) { |
1833 | case CCValAssign::SExt: |
1834 | return DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: VA.getLocVT(), Operand: Value); |
1835 | case CCValAssign::ZExt: |
1836 | return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: VA.getLocVT(), Operand: Value); |
1837 | case CCValAssign::AExt: |
1838 | return DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: VA.getLocVT(), Operand: Value); |
1839 | case CCValAssign::BCvt: { |
1840 | assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128); |
1841 | assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 || |
1842 | VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128); |
1843 | // For an f32 vararg we need to first promote it to an f64 and then |
1844 | // bitcast it to an i64. |
1845 | if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64) |
1846 | Value = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f64, Operand: Value); |
1847 | MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64 |
1848 | ? MVT::v2i64 |
1849 | : VA.getLocVT(); |
1850 | Value = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: BitCastToType, Operand: Value); |
1851 | // For ELF, this is a short vector argument to be stored to the stack, |
1852 | // bitcast to v2i64 and then extract first element. |
1853 | if (BitCastToType == MVT::v2i64) |
1854 | return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: VA.getLocVT(), N1: Value, |
1855 | N2: DAG.getConstant(Val: 0, DL, VT: MVT::i32)); |
1856 | return Value; |
1857 | } |
1858 | case CCValAssign::Full: |
1859 | return Value; |
1860 | default: |
1861 | llvm_unreachable("Unhandled getLocInfo()" ); |
1862 | } |
1863 | } |
1864 | |
1865 | static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) { |
1866 | SDLoc DL(In); |
1867 | SDValue Lo, Hi; |
1868 | if (DAG.getTargetLoweringInfo().isTypeLegal(VT: MVT::i128)) { |
1869 | Lo = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i64, Operand: In); |
1870 | Hi = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i64, |
1871 | Operand: DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i128, N1: In, |
1872 | N2: DAG.getConstant(Val: 64, DL, VT: MVT::i32))); |
1873 | } else { |
1874 | std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: In, DL, LoVT: MVT::i64, HiVT: MVT::i64); |
1875 | } |
1876 | |
1877 | // FIXME: If v2i64 were a legal type, we could use it instead of |
1878 | // Untyped here. This might enable improved folding. |
1879 | SDNode *Pair = DAG.getMachineNode(Opcode: SystemZ::PAIR128, dl: DL, |
1880 | VT: MVT::Untyped, Op1: Hi, Op2: Lo); |
1881 | return SDValue(Pair, 0); |
1882 | } |
1883 | |
1884 | static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) { |
1885 | SDLoc DL(In); |
1886 | SDValue Hi = DAG.getTargetExtractSubreg(SRIdx: SystemZ::subreg_h64, |
1887 | DL, VT: MVT::i64, Operand: In); |
1888 | SDValue Lo = DAG.getTargetExtractSubreg(SRIdx: SystemZ::subreg_l64, |
1889 | DL, VT: MVT::i64, Operand: In); |
1890 | |
1891 | if (DAG.getTargetLoweringInfo().isTypeLegal(VT: MVT::i128)) { |
1892 | Lo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i128, Operand: Lo); |
1893 | Hi = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i128, Operand: Hi); |
1894 | Hi = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i128, N1: Hi, |
1895 | N2: DAG.getConstant(Val: 64, DL, VT: MVT::i32)); |
1896 | return DAG.getNode(Opcode: ISD::OR, DL, VT: MVT::i128, N1: Lo, N2: Hi); |
1897 | } else { |
1898 | return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i128, N1: Lo, N2: Hi); |
1899 | } |
1900 | } |
1901 | |
1902 | bool SystemZTargetLowering::splitValueIntoRegisterParts( |
1903 | SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, |
1904 | unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { |
1905 | EVT ValueVT = Val.getValueType(); |
1906 | if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) { |
1907 | // Inline assembly operand. |
1908 | Parts[0] = lowerI128ToGR128(DAG, In: DAG.getBitcast(VT: MVT::i128, V: Val)); |
1909 | return true; |
1910 | } |
1911 | |
1912 | return false; |
1913 | } |
1914 | |
1915 | SDValue SystemZTargetLowering::joinRegisterPartsIntoValue( |
1916 | SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, |
1917 | MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const { |
1918 | if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) { |
1919 | // Inline assembly operand. |
1920 | SDValue Res = lowerGR128ToI128(DAG, In: Parts[0]); |
1921 | return DAG.getBitcast(VT: ValueVT, V: Res); |
1922 | } |
1923 | |
1924 | return SDValue(); |
1925 | } |
1926 | |
1927 | SDValue SystemZTargetLowering::LowerFormalArguments( |
1928 | SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
1929 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, |
1930 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
1931 | MachineFunction &MF = DAG.getMachineFunction(); |
1932 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1933 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
1934 | SystemZMachineFunctionInfo *FuncInfo = |
1935 | MF.getInfo<SystemZMachineFunctionInfo>(); |
1936 | auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>(); |
1937 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
1938 | |
1939 | // Assign locations to all of the incoming arguments. |
1940 | SmallVector<CCValAssign, 16> ArgLocs; |
1941 | SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); |
1942 | CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_SystemZ); |
1943 | FuncInfo->setSizeOfFnParams(CCInfo.getStackSize()); |
1944 | |
1945 | unsigned NumFixedGPRs = 0; |
1946 | unsigned NumFixedFPRs = 0; |
1947 | for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { |
1948 | SDValue ArgValue; |
1949 | CCValAssign &VA = ArgLocs[I]; |
1950 | EVT LocVT = VA.getLocVT(); |
1951 | if (VA.isRegLoc()) { |
1952 | // Arguments passed in registers |
1953 | const TargetRegisterClass *RC; |
1954 | switch (LocVT.getSimpleVT().SimpleTy) { |
1955 | default: |
1956 | // Integers smaller than i64 should be promoted to i64. |
1957 | llvm_unreachable("Unexpected argument type" ); |
1958 | case MVT::i32: |
1959 | NumFixedGPRs += 1; |
1960 | RC = &SystemZ::GR32BitRegClass; |
1961 | break; |
1962 | case MVT::i64: |
1963 | NumFixedGPRs += 1; |
1964 | RC = &SystemZ::GR64BitRegClass; |
1965 | break; |
1966 | case MVT::f16: |
1967 | NumFixedFPRs += 1; |
1968 | RC = &SystemZ::FP16BitRegClass; |
1969 | break; |
1970 | case MVT::f32: |
1971 | NumFixedFPRs += 1; |
1972 | RC = &SystemZ::FP32BitRegClass; |
1973 | break; |
1974 | case MVT::f64: |
1975 | NumFixedFPRs += 1; |
1976 | RC = &SystemZ::FP64BitRegClass; |
1977 | break; |
1978 | case MVT::f128: |
1979 | NumFixedFPRs += 2; |
1980 | RC = &SystemZ::FP128BitRegClass; |
1981 | break; |
1982 | case MVT::v16i8: |
1983 | case MVT::v8i16: |
1984 | case MVT::v4i32: |
1985 | case MVT::v2i64: |
1986 | case MVT::v4f32: |
1987 | case MVT::v2f64: |
1988 | RC = &SystemZ::VR128BitRegClass; |
1989 | break; |
1990 | } |
1991 | |
1992 | Register VReg = MRI.createVirtualRegister(RegClass: RC); |
1993 | MRI.addLiveIn(Reg: VA.getLocReg(), vreg: VReg); |
1994 | ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: LocVT); |
1995 | } else { |
1996 | assert(VA.isMemLoc() && "Argument not register or memory" ); |
1997 | |
1998 | // Create the frame index object for this incoming parameter. |
1999 | // FIXME: Pre-include call frame size in the offset, should not |
2000 | // need to manually add it here. |
2001 | int64_t ArgSPOffset = VA.getLocMemOffset(); |
2002 | if (Subtarget.isTargetXPLINK64()) { |
2003 | auto &XPRegs = |
2004 | Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); |
2005 | ArgSPOffset += XPRegs.getCallFrameSize(); |
2006 | } |
2007 | int FI = |
2008 | MFI.CreateFixedObject(Size: LocVT.getSizeInBits() / 8, SPOffset: ArgSPOffset, IsImmutable: true); |
2009 | |
2010 | // Create the SelectionDAG nodes corresponding to a load |
2011 | // from this parameter. Unpromoted ints and floats are |
2012 | // passed as right-justified 8-byte values. |
2013 | SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT); |
2014 | if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32 || |
2015 | VA.getLocVT() == MVT::f16) { |
2016 | unsigned SlotOffs = VA.getLocVT() == MVT::f16 ? 6 : 4; |
2017 | FIN = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: FIN, |
2018 | N2: DAG.getIntPtrConstant(Val: SlotOffs, DL)); |
2019 | } |
2020 | ArgValue = DAG.getLoad(VT: LocVT, dl: DL, Chain, Ptr: FIN, |
2021 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)); |
2022 | } |
2023 | |
2024 | // Convert the value of the argument register into the value that's |
2025 | // being passed. |
2026 | if (VA.getLocInfo() == CCValAssign::Indirect) { |
2027 | InVals.push_back(Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain, Ptr: ArgValue, |
2028 | PtrInfo: MachinePointerInfo())); |
2029 | // If the original argument was split (e.g. i128), we need |
2030 | // to load all parts of it here (using the same address). |
2031 | unsigned ArgIndex = Ins[I].OrigArgIndex; |
2032 | assert (Ins[I].PartOffset == 0); |
2033 | while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) { |
2034 | CCValAssign &PartVA = ArgLocs[I + 1]; |
2035 | unsigned PartOffset = Ins[I + 1].PartOffset; |
2036 | SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: ArgValue, |
2037 | N2: DAG.getIntPtrConstant(Val: PartOffset, DL)); |
2038 | InVals.push_back(Elt: DAG.getLoad(VT: PartVA.getValVT(), dl: DL, Chain, Ptr: Address, |
2039 | PtrInfo: MachinePointerInfo())); |
2040 | ++I; |
2041 | } |
2042 | } else |
2043 | InVals.push_back(Elt: convertLocVTToValVT(DAG, DL, VA, Chain, Value: ArgValue)); |
2044 | } |
2045 | |
2046 | if (IsVarArg && Subtarget.isTargetXPLINK64()) { |
2047 | // Save the number of non-varargs registers for later use by va_start, etc. |
2048 | FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); |
2049 | FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); |
2050 | |
2051 | auto *Regs = static_cast<SystemZXPLINK64Registers *>( |
2052 | Subtarget.getSpecialRegisters()); |
2053 | |
2054 | // Likewise the address (in the form of a frame index) of where the |
2055 | // first stack vararg would be. The 1-byte size here is arbitrary. |
2056 | // FIXME: Pre-include call frame size in the offset, should not |
2057 | // need to manually add it here. |
2058 | int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize(); |
2059 | int FI = MFI.CreateFixedObject(Size: 1, SPOffset: VarArgOffset, IsImmutable: true); |
2060 | FuncInfo->setVarArgsFrameIndex(FI); |
2061 | } |
2062 | |
2063 | if (IsVarArg && Subtarget.isTargetELF()) { |
2064 | // Save the number of non-varargs registers for later use by va_start, etc. |
2065 | FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); |
2066 | FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); |
2067 | |
2068 | // Likewise the address (in the form of a frame index) of where the |
2069 | // first stack vararg would be. The 1-byte size here is arbitrary. |
2070 | int64_t VarArgsOffset = CCInfo.getStackSize(); |
2071 | FuncInfo->setVarArgsFrameIndex( |
2072 | MFI.CreateFixedObject(Size: 1, SPOffset: VarArgsOffset, IsImmutable: true)); |
2073 | |
2074 | // ...and a similar frame index for the caller-allocated save area |
2075 | // that will be used to store the incoming registers. |
2076 | int64_t RegSaveOffset = |
2077 | -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, Reg: SystemZ::R2D) - 16; |
2078 | unsigned RegSaveIndex = MFI.CreateFixedObject(Size: 1, SPOffset: RegSaveOffset, IsImmutable: true); |
2079 | FuncInfo->setRegSaveFrameIndex(RegSaveIndex); |
2080 | |
2081 | // Store the FPR varargs in the reserved frame slots. (We store the |
2082 | // GPRs as part of the prologue.) |
2083 | if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) { |
2084 | SDValue MemOps[SystemZ::ELFNumArgFPRs]; |
2085 | for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) { |
2086 | unsigned Offset = TFL->getRegSpillOffset(MF, Reg: SystemZ::ELFArgFPRs[I]); |
2087 | int FI = |
2088 | MFI.CreateFixedObject(Size: 8, SPOffset: -SystemZMC::ELFCallFrameSize + Offset, IsImmutable: true); |
2089 | SDValue FIN = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout())); |
2090 | Register VReg = MF.addLiveIn(PReg: SystemZ::ELFArgFPRs[I], |
2091 | RC: &SystemZ::FP64BitRegClass); |
2092 | SDValue ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: MVT::f64); |
2093 | MemOps[I] = DAG.getStore(Chain: ArgValue.getValue(R: 1), dl: DL, Val: ArgValue, Ptr: FIN, |
2094 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)); |
2095 | } |
2096 | // Join the stores, which are independent of one another. |
2097 | Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, |
2098 | Ops: ArrayRef(&MemOps[NumFixedFPRs], |
2099 | SystemZ::ELFNumArgFPRs - NumFixedFPRs)); |
2100 | } |
2101 | } |
2102 | |
2103 | if (Subtarget.isTargetXPLINK64()) { |
2104 | // Create virual register for handling incoming "ADA" special register (R5) |
2105 | const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; |
2106 | Register ADAvReg = MRI.createVirtualRegister(RegClass: RC); |
2107 | auto *Regs = static_cast<SystemZXPLINK64Registers *>( |
2108 | Subtarget.getSpecialRegisters()); |
2109 | MRI.addLiveIn(Reg: Regs->getADARegister(), vreg: ADAvReg); |
2110 | FuncInfo->setADAVirtualRegister(ADAvReg); |
2111 | } |
2112 | return Chain; |
2113 | } |
2114 | |
2115 | static bool canUseSiblingCall(const CCState &ArgCCInfo, |
2116 | SmallVectorImpl<CCValAssign> &ArgLocs, |
2117 | SmallVectorImpl<ISD::OutputArg> &Outs) { |
2118 | // Punt if there are any indirect or stack arguments, or if the call |
2119 | // needs the callee-saved argument register R6, or if the call uses |
2120 | // the callee-saved register arguments SwiftSelf and SwiftError. |
2121 | for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { |
2122 | CCValAssign &VA = ArgLocs[I]; |
2123 | if (VA.getLocInfo() == CCValAssign::Indirect) |
2124 | return false; |
2125 | if (!VA.isRegLoc()) |
2126 | return false; |
2127 | Register Reg = VA.getLocReg(); |
2128 | if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D) |
2129 | return false; |
2130 | if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError()) |
2131 | return false; |
2132 | } |
2133 | return true; |
2134 | } |
2135 | |
2136 | static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, |
2137 | unsigned Offset, bool LoadAdr = false) { |
2138 | MachineFunction &MF = DAG.getMachineFunction(); |
2139 | SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>(); |
2140 | unsigned ADAvReg = MFI->getADAVirtualRegister(); |
2141 | EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout()); |
2142 | |
2143 | SDValue Reg = DAG.getRegister(Reg: ADAvReg, VT: PtrVT); |
2144 | SDValue Ofs = DAG.getTargetConstant(Val: Offset, DL, VT: PtrVT); |
2145 | |
2146 | SDValue Result = DAG.getNode(Opcode: SystemZISD::ADA_ENTRY, DL, VT: PtrVT, N1: Val, N2: Reg, N3: Ofs); |
2147 | if (!LoadAdr) |
2148 | Result = DAG.getLoad( |
2149 | VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: Result, PtrInfo: MachinePointerInfo(), Alignment: Align(8), |
2150 | MMOFlags: MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant); |
2151 | |
2152 | return Result; |
2153 | } |
2154 | |
2155 | // ADA access using Global value |
2156 | // Note: for functions, address of descriptor is returned |
2157 | static SDValue getADAEntry(SelectionDAG &DAG, const GlobalValue *GV, SDLoc DL, |
2158 | EVT PtrVT) { |
2159 | unsigned ADAtype; |
2160 | bool LoadAddr = false; |
2161 | const GlobalAlias *GA = dyn_cast<GlobalAlias>(Val: GV); |
2162 | bool IsFunction = |
2163 | (isa<Function>(Val: GV)) || (GA && isa<Function>(Val: GA->getAliaseeObject())); |
2164 | bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage()); |
2165 | |
2166 | if (IsFunction) { |
2167 | if (IsInternal) { |
2168 | ADAtype = SystemZII::MO_ADA_DIRECT_FUNC_DESC; |
2169 | LoadAddr = true; |
2170 | } else |
2171 | ADAtype = SystemZII::MO_ADA_INDIRECT_FUNC_DESC; |
2172 | } else { |
2173 | ADAtype = SystemZII::MO_ADA_DATA_SYMBOL_ADDR; |
2174 | } |
2175 | SDValue Val = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: 0, TargetFlags: ADAtype); |
2176 | |
2177 | return getADAEntry(DAG, Val, DL, Offset: 0, LoadAdr: LoadAddr); |
2178 | } |
2179 | |
2180 | static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, |
2181 | SDLoc &DL, SDValue &Chain) { |
2182 | unsigned ADADelta = 0; // ADA offset in desc. |
2183 | unsigned EPADelta = 8; // EPA offset in desc. |
2184 | MachineFunction &MF = DAG.getMachineFunction(); |
2185 | EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout()); |
2186 | |
2187 | // XPLink calling convention. |
2188 | if (auto *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) { |
2189 | bool IsInternal = (G->getGlobal()->hasInternalLinkage() || |
2190 | G->getGlobal()->hasPrivateLinkage()); |
2191 | if (IsInternal) { |
2192 | SystemZMachineFunctionInfo *MFI = |
2193 | MF.getInfo<SystemZMachineFunctionInfo>(); |
2194 | unsigned ADAvReg = MFI->getADAVirtualRegister(); |
2195 | ADA = DAG.getCopyFromReg(Chain, dl: DL, Reg: ADAvReg, VT: PtrVT); |
2196 | Callee = DAG.getTargetGlobalAddress(GV: G->getGlobal(), DL, VT: PtrVT); |
2197 | Callee = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Callee); |
2198 | return true; |
2199 | } else { |
2200 | SDValue GA = DAG.getTargetGlobalAddress( |
2201 | GV: G->getGlobal(), DL, VT: PtrVT, offset: 0, TargetFlags: SystemZII::MO_ADA_DIRECT_FUNC_DESC); |
2202 | ADA = getADAEntry(DAG, Val: GA, DL, Offset: ADADelta); |
2203 | Callee = getADAEntry(DAG, Val: GA, DL, Offset: EPADelta); |
2204 | } |
2205 | } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) { |
2206 | SDValue ES = DAG.getTargetExternalSymbol( |
2207 | Sym: E->getSymbol(), VT: PtrVT, TargetFlags: SystemZII::MO_ADA_DIRECT_FUNC_DESC); |
2208 | ADA = getADAEntry(DAG, Val: ES, DL, Offset: ADADelta); |
2209 | Callee = getADAEntry(DAG, Val: ES, DL, Offset: EPADelta); |
2210 | } else { |
2211 | // Function pointer case |
2212 | ADA = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Callee, |
2213 | N2: DAG.getConstant(Val: ADADelta, DL, VT: PtrVT)); |
2214 | ADA = DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: ADA, |
2215 | PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction())); |
2216 | Callee = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Callee, |
2217 | N2: DAG.getConstant(Val: EPADelta, DL, VT: PtrVT)); |
2218 | Callee = DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: Callee, |
2219 | PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction())); |
2220 | } |
2221 | return false; |
2222 | } |
2223 | |
2224 | SDValue |
2225 | SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, |
2226 | SmallVectorImpl<SDValue> &InVals) const { |
2227 | SelectionDAG &DAG = CLI.DAG; |
2228 | SDLoc &DL = CLI.DL; |
2229 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; |
2230 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; |
2231 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; |
2232 | SDValue Chain = CLI.Chain; |
2233 | SDValue Callee = CLI.Callee; |
2234 | bool &IsTailCall = CLI.IsTailCall; |
2235 | CallingConv::ID CallConv = CLI.CallConv; |
2236 | bool IsVarArg = CLI.IsVarArg; |
2237 | MachineFunction &MF = DAG.getMachineFunction(); |
2238 | EVT PtrVT = getPointerTy(DL: MF.getDataLayout()); |
2239 | LLVMContext &Ctx = *DAG.getContext(); |
2240 | SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters(); |
2241 | |
2242 | // FIXME: z/OS support to be added in later. |
2243 | if (Subtarget.isTargetXPLINK64()) |
2244 | IsTailCall = false; |
2245 | |
2246 | // Integer args <=32 bits should have an extension attribute. |
2247 | verifyNarrowIntegerArgs_Call(Outs, F: &MF.getFunction(), Callee); |
2248 | |
2249 | // Analyze the operands of the call, assigning locations to each operand. |
2250 | SmallVector<CCValAssign, 16> ArgLocs; |
2251 | SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx); |
2252 | ArgCCInfo.AnalyzeCallOperands(Outs, Fn: CC_SystemZ); |
2253 | |
2254 | // We don't support GuaranteedTailCallOpt, only automatically-detected |
2255 | // sibling calls. |
2256 | if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs)) |
2257 | IsTailCall = false; |
2258 | |
2259 | // Get a count of how many bytes are to be pushed on the stack. |
2260 | unsigned NumBytes = ArgCCInfo.getStackSize(); |
2261 | |
2262 | // Mark the start of the call. |
2263 | if (!IsTailCall) |
2264 | Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL); |
2265 | |
2266 | // Copy argument values to their designated locations. |
2267 | SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass; |
2268 | SmallVector<SDValue, 8> MemOpChains; |
2269 | SDValue StackPtr; |
2270 | for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { |
2271 | CCValAssign &VA = ArgLocs[I]; |
2272 | SDValue ArgValue = OutVals[I]; |
2273 | |
2274 | if (VA.getLocInfo() == CCValAssign::Indirect) { |
2275 | // Store the argument in a stack slot and pass its address. |
2276 | unsigned ArgIndex = Outs[I].OrigArgIndex; |
2277 | EVT SlotVT; |
2278 | if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { |
2279 | // Allocate the full stack space for a promoted (and split) argument. |
2280 | Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty; |
2281 | EVT OrigArgVT = getValueType(DL: MF.getDataLayout(), Ty: OrigArgType); |
2282 | MVT PartVT = getRegisterTypeForCallingConv(Context&: Ctx, CC: CLI.CallConv, VT: OrigArgVT); |
2283 | unsigned N = getNumRegistersForCallingConv(Context&: Ctx, CC: CLI.CallConv, VT: OrigArgVT); |
2284 | SlotVT = EVT::getIntegerVT(Context&: Ctx, BitWidth: PartVT.getSizeInBits() * N); |
2285 | } else { |
2286 | SlotVT = Outs[I].VT; |
2287 | } |
2288 | SDValue SpillSlot = DAG.CreateStackTemporary(VT: SlotVT); |
2289 | int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex(); |
2290 | MemOpChains.push_back( |
2291 | Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: SpillSlot, |
2292 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI))); |
2293 | // If the original argument was split (e.g. i128), we need |
2294 | // to store all parts of it here (and pass just one address). |
2295 | assert (Outs[I].PartOffset == 0); |
2296 | while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { |
2297 | SDValue PartValue = OutVals[I + 1]; |
2298 | unsigned PartOffset = Outs[I + 1].PartOffset; |
2299 | SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: SpillSlot, |
2300 | N2: DAG.getIntPtrConstant(Val: PartOffset, DL)); |
2301 | MemOpChains.push_back( |
2302 | Elt: DAG.getStore(Chain, dl: DL, Val: PartValue, Ptr: Address, |
2303 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI))); |
2304 | assert((PartOffset + PartValue.getValueType().getStoreSize() <= |
2305 | SlotVT.getStoreSize()) && "Not enough space for argument part!" ); |
2306 | ++I; |
2307 | } |
2308 | ArgValue = SpillSlot; |
2309 | } else |
2310 | ArgValue = convertValVTToLocVT(DAG, DL, VA, Value: ArgValue); |
2311 | |
2312 | if (VA.isRegLoc()) { |
2313 | // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a |
2314 | // MVT::i128 type. We decompose the 128-bit type to a pair of its high |
2315 | // and low values. |
2316 | if (VA.getLocVT() == MVT::i128) |
2317 | ArgValue = lowerI128ToGR128(DAG, In: ArgValue); |
2318 | // Queue up the argument copies and emit them at the end. |
2319 | RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgValue)); |
2320 | } else { |
2321 | assert(VA.isMemLoc() && "Argument not register or memory" ); |
2322 | |
2323 | // Work out the address of the stack slot. Unpromoted ints and |
2324 | // floats are passed as right-justified 8-byte values. |
2325 | if (!StackPtr.getNode()) |
2326 | StackPtr = DAG.getCopyFromReg(Chain, dl: DL, |
2327 | Reg: Regs->getStackPointerRegister(), VT: PtrVT); |
2328 | unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() + |
2329 | VA.getLocMemOffset(); |
2330 | if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) |
2331 | Offset += 4; |
2332 | else if (VA.getLocVT() == MVT::f16) |
2333 | Offset += 6; |
2334 | SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, |
2335 | N2: DAG.getIntPtrConstant(Val: Offset, DL)); |
2336 | |
2337 | // Emit the store. |
2338 | MemOpChains.push_back( |
2339 | Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: Address, PtrInfo: MachinePointerInfo())); |
2340 | |
2341 | // Although long doubles or vectors are passed through the stack when |
2342 | // they are vararg (non-fixed arguments), if a long double or vector |
2343 | // occupies the third and fourth slot of the argument list GPR3 should |
2344 | // still shadow the third slot of the argument list. |
2345 | if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) { |
2346 | SDValue ShadowArgValue = |
2347 | DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::i64, N1: ArgValue, |
2348 | N2: DAG.getIntPtrConstant(Val: 1, DL)); |
2349 | RegsToPass.push_back(Elt: std::make_pair(x: SystemZ::R3D, y&: ShadowArgValue)); |
2350 | } |
2351 | } |
2352 | } |
2353 | |
2354 | // Join the stores, which are independent of one another. |
2355 | if (!MemOpChains.empty()) |
2356 | Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOpChains); |
2357 | |
2358 | // Accept direct calls by converting symbolic call addresses to the |
2359 | // associated Target* opcodes. Force %r1 to be used for indirect |
2360 | // tail calls. |
2361 | SDValue Glue; |
2362 | |
2363 | if (Subtarget.isTargetXPLINK64()) { |
2364 | SDValue ADA; |
2365 | bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain); |
2366 | if (!IsBRASL) { |
2367 | unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs) |
2368 | ->getAddressOfCalleeRegister(); |
2369 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: CalleeReg, N: Callee, Glue); |
2370 | Glue = Chain.getValue(R: 1); |
2371 | Callee = DAG.getRegister(Reg: CalleeReg, VT: Callee.getValueType()); |
2372 | } |
2373 | RegsToPass.push_back(Elt: std::make_pair( |
2374 | x: static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), y&: ADA)); |
2375 | } else { |
2376 | if (auto *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) { |
2377 | Callee = DAG.getTargetGlobalAddress(GV: G->getGlobal(), DL, VT: PtrVT); |
2378 | Callee = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Callee); |
2379 | } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) { |
2380 | Callee = DAG.getTargetExternalSymbol(Sym: E->getSymbol(), VT: PtrVT); |
2381 | Callee = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Callee); |
2382 | } else if (IsTailCall) { |
2383 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: SystemZ::R1D, N: Callee, Glue); |
2384 | Glue = Chain.getValue(R: 1); |
2385 | Callee = DAG.getRegister(Reg: SystemZ::R1D, VT: Callee.getValueType()); |
2386 | } |
2387 | } |
2388 | |
2389 | // Build a sequence of copy-to-reg nodes, chained and glued together. |
2390 | for (const auto &[Reg, N] : RegsToPass) { |
2391 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg, N, Glue); |
2392 | Glue = Chain.getValue(R: 1); |
2393 | } |
2394 | |
2395 | // The first call operand is the chain and the second is the target address. |
2396 | SmallVector<SDValue, 8> Ops; |
2397 | Ops.push_back(Elt: Chain); |
2398 | Ops.push_back(Elt: Callee); |
2399 | |
2400 | // Add argument registers to the end of the list so that they are |
2401 | // known live into the call. |
2402 | for (const auto &[Reg, N] : RegsToPass) |
2403 | Ops.push_back(Elt: DAG.getRegister(Reg, VT: N.getValueType())); |
2404 | |
2405 | // Add a register mask operand representing the call-preserved registers. |
2406 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
2407 | const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); |
2408 | assert(Mask && "Missing call preserved mask for calling convention" ); |
2409 | Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask)); |
2410 | |
2411 | // Glue the call to the argument copies, if any. |
2412 | if (Glue.getNode()) |
2413 | Ops.push_back(Elt: Glue); |
2414 | |
2415 | // Emit the call. |
2416 | SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue); |
2417 | if (IsTailCall) { |
2418 | SDValue Ret = DAG.getNode(Opcode: SystemZISD::SIBCALL, DL, VTList: NodeTys, Ops); |
2419 | DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge); |
2420 | return Ret; |
2421 | } |
2422 | Chain = DAG.getNode(Opcode: SystemZISD::CALL, DL, VTList: NodeTys, Ops); |
2423 | DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge); |
2424 | Glue = Chain.getValue(R: 1); |
2425 | |
2426 | // Mark the end of the call, which is glued to the call itself. |
2427 | Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: 0, Glue, DL); |
2428 | Glue = Chain.getValue(R: 1); |
2429 | |
2430 | // Assign locations to each value returned by this call. |
2431 | SmallVector<CCValAssign, 16> RetLocs; |
2432 | CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx); |
2433 | RetCCInfo.AnalyzeCallResult(Ins, Fn: RetCC_SystemZ); |
2434 | |
2435 | // Copy all of the result registers out of their specified physreg. |
2436 | for (CCValAssign &VA : RetLocs) { |
2437 | // Copy the value out, gluing the copy to the end of the call sequence. |
2438 | SDValue RetValue = DAG.getCopyFromReg(Chain, dl: DL, Reg: VA.getLocReg(), |
2439 | VT: VA.getLocVT(), Glue); |
2440 | Chain = RetValue.getValue(R: 1); |
2441 | Glue = RetValue.getValue(R: 2); |
2442 | |
2443 | // Convert the value of the return register into the value that's |
2444 | // being returned. |
2445 | InVals.push_back(Elt: convertLocVTToValVT(DAG, DL, VA, Chain, Value: RetValue)); |
2446 | } |
2447 | |
2448 | return Chain; |
2449 | } |
2450 | |
2451 | // Generate a call taking the given operands as arguments and returning a |
2452 | // result of type RetVT. |
2453 | std::pair<SDValue, SDValue> SystemZTargetLowering::makeExternalCall( |
2454 | SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, |
2455 | ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, |
2456 | bool DoesNotReturn, bool IsReturnValueUsed) const { |
2457 | TargetLowering::ArgListTy Args; |
2458 | Args.reserve(n: Ops.size()); |
2459 | |
2460 | TargetLowering::ArgListEntry Entry; |
2461 | for (SDValue Op : Ops) { |
2462 | Entry.Node = Op; |
2463 | Entry.Ty = Entry.Node.getValueType().getTypeForEVT(Context&: *DAG.getContext()); |
2464 | Entry.IsSExt = shouldSignExtendTypeInLibCall(Ty: Entry.Ty, IsSigned); |
2465 | Entry.IsZExt = !Entry.IsSExt; |
2466 | Args.push_back(x: Entry); |
2467 | } |
2468 | |
2469 | SDValue Callee = |
2470 | DAG.getExternalSymbol(Sym: CalleeName, VT: getPointerTy(DL: DAG.getDataLayout())); |
2471 | |
2472 | Type *RetTy = RetVT.getTypeForEVT(Context&: *DAG.getContext()); |
2473 | TargetLowering::CallLoweringInfo CLI(DAG); |
2474 | bool SignExtend = shouldSignExtendTypeInLibCall(Ty: RetTy, IsSigned); |
2475 | CLI.setDebugLoc(DL) |
2476 | .setChain(Chain) |
2477 | .setCallee(CC: CallConv, ResultType: RetTy, Target: Callee, ArgsList: std::move(Args)) |
2478 | .setNoReturn(DoesNotReturn) |
2479 | .setDiscardResult(!IsReturnValueUsed) |
2480 | .setSExtResult(SignExtend) |
2481 | .setZExtResult(!SignExtend); |
2482 | return LowerCallTo(CLI); |
2483 | } |
2484 | |
2485 | bool SystemZTargetLowering:: |
2486 | CanLowerReturn(CallingConv::ID CallConv, |
2487 | MachineFunction &MF, bool isVarArg, |
2488 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
2489 | LLVMContext &Context, |
2490 | const Type *RetTy) const { |
2491 | // Special case that we cannot easily detect in RetCC_SystemZ since |
2492 | // i128 may not be a legal type. |
2493 | for (auto &Out : Outs) |
2494 | if (Out.ArgVT == MVT::i128) |
2495 | return false; |
2496 | |
2497 | SmallVector<CCValAssign, 16> RetLocs; |
2498 | CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context); |
2499 | return RetCCInfo.CheckReturn(Outs, Fn: RetCC_SystemZ); |
2500 | } |
2501 | |
2502 | SDValue |
2503 | SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, |
2504 | bool IsVarArg, |
2505 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
2506 | const SmallVectorImpl<SDValue> &OutVals, |
2507 | const SDLoc &DL, SelectionDAG &DAG) const { |
2508 | MachineFunction &MF = DAG.getMachineFunction(); |
2509 | |
2510 | // Integer args <=32 bits should have an extension attribute. |
2511 | verifyNarrowIntegerArgs_Ret(Outs, F: &MF.getFunction()); |
2512 | |
2513 | // Assign locations to each returned value. |
2514 | SmallVector<CCValAssign, 16> RetLocs; |
2515 | CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); |
2516 | RetCCInfo.AnalyzeReturn(Outs, Fn: RetCC_SystemZ); |
2517 | |
2518 | // Quick exit for void returns |
2519 | if (RetLocs.empty()) |
2520 | return DAG.getNode(Opcode: SystemZISD::RET_GLUE, DL, VT: MVT::Other, Operand: Chain); |
2521 | |
2522 | if (CallConv == CallingConv::GHC) |
2523 | report_fatal_error(reason: "GHC functions return void only" ); |
2524 | |
2525 | // Copy the result values into the output registers. |
2526 | SDValue Glue; |
2527 | SmallVector<SDValue, 4> RetOps; |
2528 | RetOps.push_back(Elt: Chain); |
2529 | for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { |
2530 | CCValAssign &VA = RetLocs[I]; |
2531 | SDValue RetValue = OutVals[I]; |
2532 | |
2533 | // Make the return register live on exit. |
2534 | assert(VA.isRegLoc() && "Can only return in registers!" ); |
2535 | |
2536 | // Promote the value as required. |
2537 | RetValue = convertValVTToLocVT(DAG, DL, VA, Value: RetValue); |
2538 | |
2539 | // Chain and glue the copies together. |
2540 | Register Reg = VA.getLocReg(); |
2541 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg, N: RetValue, Glue); |
2542 | Glue = Chain.getValue(R: 1); |
2543 | RetOps.push_back(Elt: DAG.getRegister(Reg, VT: VA.getLocVT())); |
2544 | } |
2545 | |
2546 | // Update chain and glue. |
2547 | RetOps[0] = Chain; |
2548 | if (Glue.getNode()) |
2549 | RetOps.push_back(Elt: Glue); |
2550 | |
2551 | return DAG.getNode(Opcode: SystemZISD::RET_GLUE, DL, VT: MVT::Other, Ops: RetOps); |
2552 | } |
2553 | |
2554 | // Return true if Op is an intrinsic node with chain that returns the CC value |
2555 | // as its only (other) argument. Provide the associated SystemZISD opcode and |
2556 | // the mask of valid CC values if so. |
2557 | static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, |
2558 | unsigned &CCValid) { |
2559 | unsigned Id = Op.getConstantOperandVal(i: 1); |
2560 | switch (Id) { |
2561 | case Intrinsic::s390_tbegin: |
2562 | Opcode = SystemZISD::TBEGIN; |
2563 | CCValid = SystemZ::CCMASK_TBEGIN; |
2564 | return true; |
2565 | |
2566 | case Intrinsic::s390_tbegin_nofloat: |
2567 | Opcode = SystemZISD::TBEGIN_NOFLOAT; |
2568 | CCValid = SystemZ::CCMASK_TBEGIN; |
2569 | return true; |
2570 | |
2571 | case Intrinsic::s390_tend: |
2572 | Opcode = SystemZISD::TEND; |
2573 | CCValid = SystemZ::CCMASK_TEND; |
2574 | return true; |
2575 | |
2576 | default: |
2577 | return false; |
2578 | } |
2579 | } |
2580 | |
2581 | // Return true if Op is an intrinsic node without chain that returns the |
2582 | // CC value as its final argument. Provide the associated SystemZISD |
2583 | // opcode and the mask of valid CC values if so. |
2584 | static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { |
2585 | unsigned Id = Op.getConstantOperandVal(i: 0); |
2586 | switch (Id) { |
2587 | case Intrinsic::s390_vpkshs: |
2588 | case Intrinsic::s390_vpksfs: |
2589 | case Intrinsic::s390_vpksgs: |
2590 | Opcode = SystemZISD::PACKS_CC; |
2591 | CCValid = SystemZ::CCMASK_VCMP; |
2592 | return true; |
2593 | |
2594 | case Intrinsic::s390_vpklshs: |
2595 | case Intrinsic::s390_vpklsfs: |
2596 | case Intrinsic::s390_vpklsgs: |
2597 | Opcode = SystemZISD::PACKLS_CC; |
2598 | CCValid = SystemZ::CCMASK_VCMP; |
2599 | return true; |
2600 | |
2601 | case Intrinsic::s390_vceqbs: |
2602 | case Intrinsic::s390_vceqhs: |
2603 | case Intrinsic::s390_vceqfs: |
2604 | case Intrinsic::s390_vceqgs: |
2605 | case Intrinsic::s390_vceqqs: |
2606 | Opcode = SystemZISD::VICMPES; |
2607 | CCValid = SystemZ::CCMASK_VCMP; |
2608 | return true; |
2609 | |
2610 | case Intrinsic::s390_vchbs: |
2611 | case Intrinsic::s390_vchhs: |
2612 | case Intrinsic::s390_vchfs: |
2613 | case Intrinsic::s390_vchgs: |
2614 | case Intrinsic::s390_vchqs: |
2615 | Opcode = SystemZISD::VICMPHS; |
2616 | CCValid = SystemZ::CCMASK_VCMP; |
2617 | return true; |
2618 | |
2619 | case Intrinsic::s390_vchlbs: |
2620 | case Intrinsic::s390_vchlhs: |
2621 | case Intrinsic::s390_vchlfs: |
2622 | case Intrinsic::s390_vchlgs: |
2623 | case Intrinsic::s390_vchlqs: |
2624 | Opcode = SystemZISD::VICMPHLS; |
2625 | CCValid = SystemZ::CCMASK_VCMP; |
2626 | return true; |
2627 | |
2628 | case Intrinsic::s390_vtm: |
2629 | Opcode = SystemZISD::VTM; |
2630 | CCValid = SystemZ::CCMASK_VCMP; |
2631 | return true; |
2632 | |
2633 | case Intrinsic::s390_vfaebs: |
2634 | case Intrinsic::s390_vfaehs: |
2635 | case Intrinsic::s390_vfaefs: |
2636 | Opcode = SystemZISD::VFAE_CC; |
2637 | CCValid = SystemZ::CCMASK_ANY; |
2638 | return true; |
2639 | |
2640 | case Intrinsic::s390_vfaezbs: |
2641 | case Intrinsic::s390_vfaezhs: |
2642 | case Intrinsic::s390_vfaezfs: |
2643 | Opcode = SystemZISD::VFAEZ_CC; |
2644 | CCValid = SystemZ::CCMASK_ANY; |
2645 | return true; |
2646 | |
2647 | case Intrinsic::s390_vfeebs: |
2648 | case Intrinsic::s390_vfeehs: |
2649 | case Intrinsic::s390_vfeefs: |
2650 | Opcode = SystemZISD::VFEE_CC; |
2651 | CCValid = SystemZ::CCMASK_ANY; |
2652 | return true; |
2653 | |
2654 | case Intrinsic::s390_vfeezbs: |
2655 | case Intrinsic::s390_vfeezhs: |
2656 | case Intrinsic::s390_vfeezfs: |
2657 | Opcode = SystemZISD::VFEEZ_CC; |
2658 | CCValid = SystemZ::CCMASK_ANY; |
2659 | return true; |
2660 | |
2661 | case Intrinsic::s390_vfenebs: |
2662 | case Intrinsic::s390_vfenehs: |
2663 | case Intrinsic::s390_vfenefs: |
2664 | Opcode = SystemZISD::VFENE_CC; |
2665 | CCValid = SystemZ::CCMASK_ANY; |
2666 | return true; |
2667 | |
2668 | case Intrinsic::s390_vfenezbs: |
2669 | case Intrinsic::s390_vfenezhs: |
2670 | case Intrinsic::s390_vfenezfs: |
2671 | Opcode = SystemZISD::VFENEZ_CC; |
2672 | CCValid = SystemZ::CCMASK_ANY; |
2673 | return true; |
2674 | |
2675 | case Intrinsic::s390_vistrbs: |
2676 | case Intrinsic::s390_vistrhs: |
2677 | case Intrinsic::s390_vistrfs: |
2678 | Opcode = SystemZISD::VISTR_CC; |
2679 | CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3; |
2680 | return true; |
2681 | |
2682 | case Intrinsic::s390_vstrcbs: |
2683 | case Intrinsic::s390_vstrchs: |
2684 | case Intrinsic::s390_vstrcfs: |
2685 | Opcode = SystemZISD::VSTRC_CC; |
2686 | CCValid = SystemZ::CCMASK_ANY; |
2687 | return true; |
2688 | |
2689 | case Intrinsic::s390_vstrczbs: |
2690 | case Intrinsic::s390_vstrczhs: |
2691 | case Intrinsic::s390_vstrczfs: |
2692 | Opcode = SystemZISD::VSTRCZ_CC; |
2693 | CCValid = SystemZ::CCMASK_ANY; |
2694 | return true; |
2695 | |
2696 | case Intrinsic::s390_vstrsb: |
2697 | case Intrinsic::s390_vstrsh: |
2698 | case Intrinsic::s390_vstrsf: |
2699 | Opcode = SystemZISD::VSTRS_CC; |
2700 | CCValid = SystemZ::CCMASK_ANY; |
2701 | return true; |
2702 | |
2703 | case Intrinsic::s390_vstrszb: |
2704 | case Intrinsic::s390_vstrszh: |
2705 | case Intrinsic::s390_vstrszf: |
2706 | Opcode = SystemZISD::VSTRSZ_CC; |
2707 | CCValid = SystemZ::CCMASK_ANY; |
2708 | return true; |
2709 | |
2710 | case Intrinsic::s390_vfcedbs: |
2711 | case Intrinsic::s390_vfcesbs: |
2712 | Opcode = SystemZISD::VFCMPES; |
2713 | CCValid = SystemZ::CCMASK_VCMP; |
2714 | return true; |
2715 | |
2716 | case Intrinsic::s390_vfchdbs: |
2717 | case Intrinsic::s390_vfchsbs: |
2718 | Opcode = SystemZISD::VFCMPHS; |
2719 | CCValid = SystemZ::CCMASK_VCMP; |
2720 | return true; |
2721 | |
2722 | case Intrinsic::s390_vfchedbs: |
2723 | case Intrinsic::s390_vfchesbs: |
2724 | Opcode = SystemZISD::VFCMPHES; |
2725 | CCValid = SystemZ::CCMASK_VCMP; |
2726 | return true; |
2727 | |
2728 | case Intrinsic::s390_vftcidb: |
2729 | case Intrinsic::s390_vftcisb: |
2730 | Opcode = SystemZISD::VFTCI; |
2731 | CCValid = SystemZ::CCMASK_VCMP; |
2732 | return true; |
2733 | |
2734 | case Intrinsic::s390_tdc: |
2735 | Opcode = SystemZISD::TDC; |
2736 | CCValid = SystemZ::CCMASK_TDC; |
2737 | return true; |
2738 | |
2739 | default: |
2740 | return false; |
2741 | } |
2742 | } |
2743 | |
2744 | // Emit an intrinsic with chain and an explicit CC register result. |
2745 | static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, |
2746 | unsigned Opcode) { |
2747 | // Copy all operands except the intrinsic ID. |
2748 | unsigned NumOps = Op.getNumOperands(); |
2749 | SmallVector<SDValue, 6> Ops; |
2750 | Ops.reserve(N: NumOps - 1); |
2751 | Ops.push_back(Elt: Op.getOperand(i: 0)); |
2752 | for (unsigned I = 2; I < NumOps; ++I) |
2753 | Ops.push_back(Elt: Op.getOperand(i: I)); |
2754 | |
2755 | assert(Op->getNumValues() == 2 && "Expected only CC result and chain" ); |
2756 | SDVTList RawVTs = DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other); |
2757 | SDValue Intr = DAG.getNode(Opcode, DL: SDLoc(Op), VTList: RawVTs, Ops); |
2758 | SDValue OldChain = SDValue(Op.getNode(), 1); |
2759 | SDValue NewChain = SDValue(Intr.getNode(), 1); |
2760 | DAG.ReplaceAllUsesOfValueWith(From: OldChain, To: NewChain); |
2761 | return Intr.getNode(); |
2762 | } |
2763 | |
2764 | // Emit an intrinsic with an explicit CC register result. |
2765 | static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, |
2766 | unsigned Opcode) { |
2767 | // Copy all operands except the intrinsic ID. |
2768 | SDLoc DL(Op); |
2769 | unsigned NumOps = Op.getNumOperands(); |
2770 | SmallVector<SDValue, 6> Ops; |
2771 | Ops.reserve(N: NumOps - 1); |
2772 | for (unsigned I = 1; I < NumOps; ++I) { |
2773 | SDValue CurrOper = Op.getOperand(i: I); |
2774 | if (CurrOper.getValueType() == MVT::f16) { |
2775 | assert((Op.getConstantOperandVal(0) == Intrinsic::s390_tdc && I == 1) && |
2776 | "Unhandled intrinsic with f16 operand." ); |
2777 | CurrOper = DAG.getFPExtendOrRound(Op: CurrOper, DL, VT: MVT::f32); |
2778 | } |
2779 | Ops.push_back(Elt: CurrOper); |
2780 | } |
2781 | |
2782 | SDValue Intr = DAG.getNode(Opcode, DL, VTList: Op->getVTList(), Ops); |
2783 | return Intr.getNode(); |
2784 | } |
2785 | |
2786 | // CC is a comparison that will be implemented using an integer or |
2787 | // floating-point comparison. Return the condition code mask for |
2788 | // a branch on true. In the integer case, CCMASK_CMP_UO is set for |
2789 | // unsigned comparisons and clear for signed ones. In the floating-point |
2790 | // case, CCMASK_CMP_UO has its normal mask meaning (unordered). |
2791 | static unsigned CCMaskForCondCode(ISD::CondCode CC) { |
2792 | #define CONV(X) \ |
2793 | case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \ |
2794 | case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \ |
2795 | case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X |
2796 | |
2797 | switch (CC) { |
2798 | default: |
2799 | llvm_unreachable("Invalid integer condition!" ); |
2800 | |
2801 | CONV(EQ); |
2802 | CONV(NE); |
2803 | CONV(GT); |
2804 | CONV(GE); |
2805 | CONV(LT); |
2806 | CONV(LE); |
2807 | |
2808 | case ISD::SETO: return SystemZ::CCMASK_CMP_O; |
2809 | case ISD::SETUO: return SystemZ::CCMASK_CMP_UO; |
2810 | } |
2811 | #undef CONV |
2812 | } |
2813 | |
2814 | // If C can be converted to a comparison against zero, adjust the operands |
2815 | // as necessary. |
2816 | static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { |
2817 | if (C.ICmpType == SystemZICMP::UnsignedOnly) |
2818 | return; |
2819 | |
2820 | auto *ConstOp1 = dyn_cast<ConstantSDNode>(Val: C.Op1.getNode()); |
2821 | if (!ConstOp1 || ConstOp1->getValueSizeInBits(ResNo: 0) > 64) |
2822 | return; |
2823 | |
2824 | int64_t Value = ConstOp1->getSExtValue(); |
2825 | if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) || |
2826 | (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) || |
2827 | (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) || |
2828 | (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) { |
2829 | C.CCMask ^= SystemZ::CCMASK_CMP_EQ; |
2830 | C.Op1 = DAG.getConstant(Val: 0, DL, VT: C.Op1.getValueType()); |
2831 | } |
2832 | } |
2833 | |
2834 | // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI, |
2835 | // adjust the operands as necessary. |
2836 | static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, |
2837 | Comparison &C) { |
2838 | // For us to make any changes, it must a comparison between a single-use |
2839 | // load and a constant. |
2840 | if (!C.Op0.hasOneUse() || |
2841 | C.Op0.getOpcode() != ISD::LOAD || |
2842 | C.Op1.getOpcode() != ISD::Constant) |
2843 | return; |
2844 | |
2845 | // We must have an 8- or 16-bit load. |
2846 | auto *Load = cast<LoadSDNode>(Val&: C.Op0); |
2847 | unsigned NumBits = Load->getMemoryVT().getSizeInBits(); |
2848 | if ((NumBits != 8 && NumBits != 16) || |
2849 | NumBits != Load->getMemoryVT().getStoreSizeInBits()) |
2850 | return; |
2851 | |
2852 | // The load must be an extending one and the constant must be within the |
2853 | // range of the unextended value. |
2854 | auto *ConstOp1 = cast<ConstantSDNode>(Val&: C.Op1); |
2855 | if (!ConstOp1 || ConstOp1->getValueSizeInBits(ResNo: 0) > 64) |
2856 | return; |
2857 | uint64_t Value = ConstOp1->getZExtValue(); |
2858 | uint64_t Mask = (1 << NumBits) - 1; |
2859 | if (Load->getExtensionType() == ISD::SEXTLOAD) { |
2860 | // Make sure that ConstOp1 is in range of C.Op0. |
2861 | int64_t SignedValue = ConstOp1->getSExtValue(); |
2862 | if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask) |
2863 | return; |
2864 | if (C.ICmpType != SystemZICMP::SignedOnly) { |
2865 | // Unsigned comparison between two sign-extended values is equivalent |
2866 | // to unsigned comparison between two zero-extended values. |
2867 | Value &= Mask; |
2868 | } else if (NumBits == 8) { |
2869 | // Try to treat the comparison as unsigned, so that we can use CLI. |
2870 | // Adjust CCMask and Value as necessary. |
2871 | if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT) |
2872 | // Test whether the high bit of the byte is set. |
2873 | Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT; |
2874 | else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE) |
2875 | // Test whether the high bit of the byte is clear. |
2876 | Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT; |
2877 | else |
2878 | // No instruction exists for this combination. |
2879 | return; |
2880 | C.ICmpType = SystemZICMP::UnsignedOnly; |
2881 | } |
2882 | } else if (Load->getExtensionType() == ISD::ZEXTLOAD) { |
2883 | if (Value > Mask) |
2884 | return; |
2885 | // If the constant is in range, we can use any comparison. |
2886 | C.ICmpType = SystemZICMP::Any; |
2887 | } else |
2888 | return; |
2889 | |
2890 | // Make sure that the first operand is an i32 of the right extension type. |
2891 | ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ? |
2892 | ISD::SEXTLOAD : |
2893 | ISD::ZEXTLOAD); |
2894 | if (C.Op0.getValueType() != MVT::i32 || |
2895 | Load->getExtensionType() != ExtType) { |
2896 | C.Op0 = DAG.getExtLoad(ExtType, dl: SDLoc(Load), VT: MVT::i32, Chain: Load->getChain(), |
2897 | Ptr: Load->getBasePtr(), PtrInfo: Load->getPointerInfo(), |
2898 | MemVT: Load->getMemoryVT(), Alignment: Load->getAlign(), |
2899 | MMOFlags: Load->getMemOperand()->getFlags()); |
2900 | // Update the chain uses. |
2901 | DAG.ReplaceAllUsesOfValueWith(From: SDValue(Load, 1), To: C.Op0.getValue(R: 1)); |
2902 | } |
2903 | |
2904 | // Make sure that the second operand is an i32 with the right value. |
2905 | if (C.Op1.getValueType() != MVT::i32 || |
2906 | Value != ConstOp1->getZExtValue()) |
2907 | C.Op1 = DAG.getConstant(Val: (uint32_t)Value, DL, VT: MVT::i32); |
2908 | } |
2909 | |
2910 | // Return true if Op is either an unextended load, or a load suitable |
2911 | // for integer register-memory comparisons of type ICmpType. |
2912 | static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) { |
2913 | auto *Load = dyn_cast<LoadSDNode>(Val: Op.getNode()); |
2914 | if (Load) { |
2915 | // There are no instructions to compare a register with a memory byte. |
2916 | if (Load->getMemoryVT() == MVT::i8) |
2917 | return false; |
2918 | // Otherwise decide on extension type. |
2919 | switch (Load->getExtensionType()) { |
2920 | case ISD::NON_EXTLOAD: |
2921 | return true; |
2922 | case ISD::SEXTLOAD: |
2923 | return ICmpType != SystemZICMP::UnsignedOnly; |
2924 | case ISD::ZEXTLOAD: |
2925 | return ICmpType != SystemZICMP::SignedOnly; |
2926 | default: |
2927 | break; |
2928 | } |
2929 | } |
2930 | return false; |
2931 | } |
2932 | |
2933 | // Return true if it is better to swap the operands of C. |
2934 | static bool shouldSwapCmpOperands(const Comparison &C) { |
2935 | // Leave i128 and f128 comparisons alone, since they have no memory forms. |
2936 | if (C.Op0.getValueType() == MVT::i128) |
2937 | return false; |
2938 | if (C.Op0.getValueType() == MVT::f128) |
2939 | return false; |
2940 | |
2941 | // Always keep a floating-point constant second, since comparisons with |
2942 | // zero can use LOAD TEST and comparisons with other constants make a |
2943 | // natural memory operand. |
2944 | if (isa<ConstantFPSDNode>(Val: C.Op1)) |
2945 | return false; |
2946 | |
2947 | // Never swap comparisons with zero since there are many ways to optimize |
2948 | // those later. |
2949 | auto *ConstOp1 = dyn_cast<ConstantSDNode>(Val: C.Op1); |
2950 | if (ConstOp1 && ConstOp1->getZExtValue() == 0) |
2951 | return false; |
2952 | |
2953 | // Also keep natural memory operands second if the loaded value is |
2954 | // only used here. Several comparisons have memory forms. |
2955 | if (isNaturalMemoryOperand(Op: C.Op1, ICmpType: C.ICmpType) && C.Op1.hasOneUse()) |
2956 | return false; |
2957 | |
2958 | // Look for cases where Cmp0 is a single-use load and Cmp1 isn't. |
2959 | // In that case we generally prefer the memory to be second. |
2960 | if (isNaturalMemoryOperand(Op: C.Op0, ICmpType: C.ICmpType) && C.Op0.hasOneUse()) { |
2961 | // The only exceptions are when the second operand is a constant and |
2962 | // we can use things like CHHSI. |
2963 | if (!ConstOp1) |
2964 | return true; |
2965 | // The unsigned memory-immediate instructions can handle 16-bit |
2966 | // unsigned integers. |
2967 | if (C.ICmpType != SystemZICMP::SignedOnly && |
2968 | isUInt<16>(x: ConstOp1->getZExtValue())) |
2969 | return false; |
2970 | // The signed memory-immediate instructions can handle 16-bit |
2971 | // signed integers. |
2972 | if (C.ICmpType != SystemZICMP::UnsignedOnly && |
2973 | isInt<16>(x: ConstOp1->getSExtValue())) |
2974 | return false; |
2975 | return true; |
2976 | } |
2977 | |
2978 | // Try to promote the use of CGFR and CLGFR. |
2979 | unsigned Opcode0 = C.Op0.getOpcode(); |
2980 | if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND) |
2981 | return true; |
2982 | if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND) |
2983 | return true; |
2984 | if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND && |
2985 | C.Op0.getOperand(i: 1).getOpcode() == ISD::Constant && |
2986 | C.Op0.getConstantOperandVal(i: 1) == 0xffffffff) |
2987 | return true; |
2988 | |
2989 | return false; |
2990 | } |
2991 | |
2992 | // Check whether C tests for equality between X and Y and whether X - Y |
2993 | // or Y - X is also computed. In that case it's better to compare the |
2994 | // result of the subtraction against zero. |
2995 | static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, |
2996 | Comparison &C) { |
2997 | if (C.CCMask == SystemZ::CCMASK_CMP_EQ || |
2998 | C.CCMask == SystemZ::CCMASK_CMP_NE) { |
2999 | for (SDNode *N : C.Op0->users()) { |
3000 | if (N->getOpcode() == ISD::SUB && |
3001 | ((N->getOperand(Num: 0) == C.Op0 && N->getOperand(Num: 1) == C.Op1) || |
3002 | (N->getOperand(Num: 0) == C.Op1 && N->getOperand(Num: 1) == C.Op0))) { |
3003 | // Disable the nsw and nuw flags: the backend needs to handle |
3004 | // overflow as well during comparison elimination. |
3005 | N->dropFlags(Mask: SDNodeFlags::NoWrap); |
3006 | C.Op0 = SDValue(N, 0); |
3007 | C.Op1 = DAG.getConstant(Val: 0, DL, VT: N->getValueType(ResNo: 0)); |
3008 | return; |
3009 | } |
3010 | } |
3011 | } |
3012 | } |
3013 | |
3014 | // Check whether C compares a floating-point value with zero and if that |
3015 | // floating-point value is also negated. In this case we can use the |
3016 | // negation to set CC, so avoiding separate LOAD AND TEST and |
3017 | // LOAD (NEGATIVE/COMPLEMENT) instructions. |
3018 | static void adjustForFNeg(Comparison &C) { |
3019 | // This optimization is invalid for strict comparisons, since FNEG |
3020 | // does not raise any exceptions. |
3021 | if (C.Chain) |
3022 | return; |
3023 | auto *C1 = dyn_cast<ConstantFPSDNode>(Val&: C.Op1); |
3024 | if (C1 && C1->isZero()) { |
3025 | for (SDNode *N : C.Op0->users()) { |
3026 | if (N->getOpcode() == ISD::FNEG) { |
3027 | C.Op0 = SDValue(N, 0); |
3028 | C.CCMask = SystemZ::reverseCCMask(CCMask: C.CCMask); |
3029 | return; |
3030 | } |
3031 | } |
3032 | } |
3033 | } |
3034 | |
3035 | // Check whether C compares (shl X, 32) with 0 and whether X is |
3036 | // also sign-extended. In that case it is better to test the result |
3037 | // of the sign extension using LTGFR. |
3038 | // |
3039 | // This case is important because InstCombine transforms a comparison |
3040 | // with (sext (trunc X)) into a comparison with (shl X, 32). |
3041 | static void adjustForLTGFR(Comparison &C) { |
3042 | // Check for a comparison between (shl X, 32) and 0. |
3043 | if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 && |
3044 | C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) { |
3045 | auto *C1 = dyn_cast<ConstantSDNode>(Val: C.Op0.getOperand(i: 1)); |
3046 | if (C1 && C1->getZExtValue() == 32) { |
3047 | SDValue ShlOp0 = C.Op0.getOperand(i: 0); |
3048 | // See whether X has any SIGN_EXTEND_INREG uses. |
3049 | for (SDNode *N : ShlOp0->users()) { |
3050 | if (N->getOpcode() == ISD::SIGN_EXTEND_INREG && |
3051 | cast<VTSDNode>(Val: N->getOperand(Num: 1))->getVT() == MVT::i32) { |
3052 | C.Op0 = SDValue(N, 0); |
3053 | return; |
3054 | } |
3055 | } |
3056 | } |
3057 | } |
3058 | } |
3059 | |
3060 | // If C compares the truncation of an extending load, try to compare |
3061 | // the untruncated value instead. This exposes more opportunities to |
3062 | // reuse CC. |
3063 | static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, |
3064 | Comparison &C) { |
3065 | if (C.Op0.getOpcode() == ISD::TRUNCATE && |
3066 | C.Op0.getOperand(i: 0).getOpcode() == ISD::LOAD && |
3067 | C.Op1.getOpcode() == ISD::Constant && |
3068 | cast<ConstantSDNode>(Val&: C.Op1)->getValueSizeInBits(ResNo: 0) <= 64 && |
3069 | C.Op1->getAsZExtVal() == 0) { |
3070 | auto *L = cast<LoadSDNode>(Val: C.Op0.getOperand(i: 0)); |
3071 | if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <= |
3072 | C.Op0.getValueSizeInBits().getFixedValue()) { |
3073 | unsigned Type = L->getExtensionType(); |
3074 | if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) || |
3075 | (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) { |
3076 | C.Op0 = C.Op0.getOperand(i: 0); |
3077 | C.Op1 = DAG.getConstant(Val: 0, DL, VT: C.Op0.getValueType()); |
3078 | } |
3079 | } |
3080 | } |
3081 | } |
3082 | |
3083 | // Return true if shift operation N has an in-range constant shift value. |
3084 | // Store it in ShiftVal if so. |
3085 | static bool isSimpleShift(SDValue N, unsigned &ShiftVal) { |
3086 | auto *Shift = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1)); |
3087 | if (!Shift) |
3088 | return false; |
3089 | |
3090 | uint64_t Amount = Shift->getZExtValue(); |
3091 | if (Amount >= N.getValueSizeInBits()) |
3092 | return false; |
3093 | |
3094 | ShiftVal = Amount; |
3095 | return true; |
3096 | } |
3097 | |
3098 | // Check whether an AND with Mask is suitable for a TEST UNDER MASK |
3099 | // instruction and whether the CC value is descriptive enough to handle |
3100 | // a comparison of type Opcode between the AND result and CmpVal. |
3101 | // CCMask says which comparison result is being tested and BitSize is |
3102 | // the number of bits in the operands. If TEST UNDER MASK can be used, |
3103 | // return the corresponding CC mask, otherwise return 0. |
3104 | static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, |
3105 | uint64_t Mask, uint64_t CmpVal, |
3106 | unsigned ICmpType) { |
3107 | assert(Mask != 0 && "ANDs with zero should have been removed by now" ); |
3108 | |
3109 | // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL. |
3110 | if (!SystemZ::isImmLL(Val: Mask) && !SystemZ::isImmLH(Val: Mask) && |
3111 | !SystemZ::isImmHL(Val: Mask) && !SystemZ::isImmHH(Val: Mask)) |
3112 | return 0; |
3113 | |
3114 | // Work out the masks for the lowest and highest bits. |
3115 | uint64_t High = llvm::bit_floor(Value: Mask); |
3116 | uint64_t Low = uint64_t(1) << llvm::countr_zero(Val: Mask); |
3117 | |
3118 | // Signed ordered comparisons are effectively unsigned if the sign |
3119 | // bit is dropped. |
3120 | bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly); |
3121 | |
3122 | // Check for equality comparisons with 0, or the equivalent. |
3123 | if (CmpVal == 0) { |
3124 | if (CCMask == SystemZ::CCMASK_CMP_EQ) |
3125 | return SystemZ::CCMASK_TM_ALL_0; |
3126 | if (CCMask == SystemZ::CCMASK_CMP_NE) |
3127 | return SystemZ::CCMASK_TM_SOME_1; |
3128 | } |
3129 | if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) { |
3130 | if (CCMask == SystemZ::CCMASK_CMP_LT) |
3131 | return SystemZ::CCMASK_TM_ALL_0; |
3132 | if (CCMask == SystemZ::CCMASK_CMP_GE) |
3133 | return SystemZ::CCMASK_TM_SOME_1; |
3134 | } |
3135 | if (EffectivelyUnsigned && CmpVal < Low) { |
3136 | if (CCMask == SystemZ::CCMASK_CMP_LE) |
3137 | return SystemZ::CCMASK_TM_ALL_0; |
3138 | if (CCMask == SystemZ::CCMASK_CMP_GT) |
3139 | return SystemZ::CCMASK_TM_SOME_1; |
3140 | } |
3141 | |
3142 | // Check for equality comparisons with the mask, or the equivalent. |
3143 | if (CmpVal == Mask) { |
3144 | if (CCMask == SystemZ::CCMASK_CMP_EQ) |
3145 | return SystemZ::CCMASK_TM_ALL_1; |
3146 | if (CCMask == SystemZ::CCMASK_CMP_NE) |
3147 | return SystemZ::CCMASK_TM_SOME_0; |
3148 | } |
3149 | if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) { |
3150 | if (CCMask == SystemZ::CCMASK_CMP_GT) |
3151 | return SystemZ::CCMASK_TM_ALL_1; |
3152 | if (CCMask == SystemZ::CCMASK_CMP_LE) |
3153 | return SystemZ::CCMASK_TM_SOME_0; |
3154 | } |
3155 | if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) { |
3156 | if (CCMask == SystemZ::CCMASK_CMP_GE) |
3157 | return SystemZ::CCMASK_TM_ALL_1; |
3158 | if (CCMask == SystemZ::CCMASK_CMP_LT) |
3159 | return SystemZ::CCMASK_TM_SOME_0; |
3160 | } |
3161 | |
3162 | // Check for ordered comparisons with the top bit. |
3163 | if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) { |
3164 | if (CCMask == SystemZ::CCMASK_CMP_LE) |
3165 | return SystemZ::CCMASK_TM_MSB_0; |
3166 | if (CCMask == SystemZ::CCMASK_CMP_GT) |
3167 | return SystemZ::CCMASK_TM_MSB_1; |
3168 | } |
3169 | if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) { |
3170 | if (CCMask == SystemZ::CCMASK_CMP_LT) |
3171 | return SystemZ::CCMASK_TM_MSB_0; |
3172 | if (CCMask == SystemZ::CCMASK_CMP_GE) |
3173 | return SystemZ::CCMASK_TM_MSB_1; |
3174 | } |
3175 | |
3176 | // If there are just two bits, we can do equality checks for Low and High |
3177 | // as well. |
3178 | if (Mask == Low + High) { |
3179 | if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low) |
3180 | return SystemZ::CCMASK_TM_MIXED_MSB_0; |
3181 | if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low) |
3182 | return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY; |
3183 | if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High) |
3184 | return SystemZ::CCMASK_TM_MIXED_MSB_1; |
3185 | if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High) |
3186 | return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY; |
3187 | } |
3188 | |
3189 | // Looks like we've exhausted our options. |
3190 | return 0; |
3191 | } |
3192 | |
3193 | // See whether C can be implemented as a TEST UNDER MASK instruction. |
3194 | // Update the arguments with the TM version if so. |
3195 | static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, |
3196 | Comparison &C) { |
3197 | // Use VECTOR TEST UNDER MASK for i128 operations. |
3198 | if (C.Op0.getValueType() == MVT::i128) { |
3199 | // We can use VTM for EQ/NE comparisons of x & y against 0. |
3200 | if (C.Op0.getOpcode() == ISD::AND && |
3201 | (C.CCMask == SystemZ::CCMASK_CMP_EQ || |
3202 | C.CCMask == SystemZ::CCMASK_CMP_NE)) { |
3203 | auto *Mask = dyn_cast<ConstantSDNode>(Val&: C.Op1); |
3204 | if (Mask && Mask->getAPIntValue() == 0) { |
3205 | C.Opcode = SystemZISD::VTM; |
3206 | C.Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v16i8, Operand: C.Op0.getOperand(i: 1)); |
3207 | C.Op0 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v16i8, Operand: C.Op0.getOperand(i: 0)); |
3208 | C.CCValid = SystemZ::CCMASK_VCMP; |
3209 | if (C.CCMask == SystemZ::CCMASK_CMP_EQ) |
3210 | C.CCMask = SystemZ::CCMASK_VCMP_ALL; |
3211 | else |
3212 | C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid; |
3213 | } |
3214 | } |
3215 | return; |
3216 | } |
3217 | |
3218 | // Check that we have a comparison with a constant. |
3219 | auto *ConstOp1 = dyn_cast<ConstantSDNode>(Val&: C.Op1); |
3220 | if (!ConstOp1) |
3221 | return; |
3222 | uint64_t CmpVal = ConstOp1->getZExtValue(); |
3223 | |
3224 | // Check whether the nonconstant input is an AND with a constant mask. |
3225 | Comparison NewC(C); |
3226 | uint64_t MaskVal; |
3227 | ConstantSDNode *Mask = nullptr; |
3228 | if (C.Op0.getOpcode() == ISD::AND) { |
3229 | NewC.Op0 = C.Op0.getOperand(i: 0); |
3230 | NewC.Op1 = C.Op0.getOperand(i: 1); |
3231 | Mask = dyn_cast<ConstantSDNode>(Val&: NewC.Op1); |
3232 | if (!Mask) |
3233 | return; |
3234 | MaskVal = Mask->getZExtValue(); |
3235 | } else { |
3236 | // There is no instruction to compare with a 64-bit immediate |
3237 | // so use TMHH instead if possible. We need an unsigned ordered |
3238 | // comparison with an i64 immediate. |
3239 | if (NewC.Op0.getValueType() != MVT::i64 || |
3240 | NewC.CCMask == SystemZ::CCMASK_CMP_EQ || |
3241 | NewC.CCMask == SystemZ::CCMASK_CMP_NE || |
3242 | NewC.ICmpType == SystemZICMP::SignedOnly) |
3243 | return; |
3244 | // Convert LE and GT comparisons into LT and GE. |
3245 | if (NewC.CCMask == SystemZ::CCMASK_CMP_LE || |
3246 | NewC.CCMask == SystemZ::CCMASK_CMP_GT) { |
3247 | if (CmpVal == uint64_t(-1)) |
3248 | return; |
3249 | CmpVal += 1; |
3250 | NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ; |
3251 | } |
3252 | // If the low N bits of Op1 are zero than the low N bits of Op0 can |
3253 | // be masked off without changing the result. |
3254 | MaskVal = -(CmpVal & -CmpVal); |
3255 | NewC.ICmpType = SystemZICMP::UnsignedOnly; |
3256 | } |
3257 | if (!MaskVal) |
3258 | return; |
3259 | |
3260 | // Check whether the combination of mask, comparison value and comparison |
3261 | // type are suitable. |
3262 | unsigned BitSize = NewC.Op0.getValueSizeInBits(); |
3263 | unsigned NewCCMask, ShiftVal; |
3264 | if (NewC.ICmpType != SystemZICMP::SignedOnly && |
3265 | NewC.Op0.getOpcode() == ISD::SHL && |
3266 | isSimpleShift(N: NewC.Op0, ShiftVal) && |
3267 | (MaskVal >> ShiftVal != 0) && |
3268 | ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal && |
3269 | (NewCCMask = getTestUnderMaskCond(BitSize, CCMask: NewC.CCMask, |
3270 | Mask: MaskVal >> ShiftVal, |
3271 | CmpVal: CmpVal >> ShiftVal, |
3272 | ICmpType: SystemZICMP::Any))) { |
3273 | NewC.Op0 = NewC.Op0.getOperand(i: 0); |
3274 | MaskVal >>= ShiftVal; |
3275 | } else if (NewC.ICmpType != SystemZICMP::SignedOnly && |
3276 | NewC.Op0.getOpcode() == ISD::SRL && |
3277 | isSimpleShift(N: NewC.Op0, ShiftVal) && |
3278 | (MaskVal << ShiftVal != 0) && |
3279 | ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal && |
3280 | (NewCCMask = getTestUnderMaskCond(BitSize, CCMask: NewC.CCMask, |
3281 | Mask: MaskVal << ShiftVal, |
3282 | CmpVal: CmpVal << ShiftVal, |
3283 | ICmpType: SystemZICMP::UnsignedOnly))) { |
3284 | NewC.Op0 = NewC.Op0.getOperand(i: 0); |
3285 | MaskVal <<= ShiftVal; |
3286 | } else { |
3287 | NewCCMask = getTestUnderMaskCond(BitSize, CCMask: NewC.CCMask, Mask: MaskVal, CmpVal, |
3288 | ICmpType: NewC.ICmpType); |
3289 | if (!NewCCMask) |
3290 | return; |
3291 | } |
3292 | |
3293 | // Go ahead and make the change. |
3294 | C.Opcode = SystemZISD::TM; |
3295 | C.Op0 = NewC.Op0; |
3296 | if (Mask && Mask->getZExtValue() == MaskVal) |
3297 | C.Op1 = SDValue(Mask, 0); |
3298 | else |
3299 | C.Op1 = DAG.getConstant(Val: MaskVal, DL, VT: C.Op0.getValueType()); |
3300 | C.CCValid = SystemZ::CCMASK_TM; |
3301 | C.CCMask = NewCCMask; |
3302 | } |
3303 | |
3304 | // Implement i128 comparison in vector registers. |
3305 | static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, |
3306 | Comparison &C) { |
3307 | if (C.Opcode != SystemZISD::ICMP) |
3308 | return; |
3309 | if (C.Op0.getValueType() != MVT::i128) |
3310 | return; |
3311 | |
3312 | // Recognize vector comparison reductions. |
3313 | if ((C.CCMask == SystemZ::CCMASK_CMP_EQ || |
3314 | C.CCMask == SystemZ::CCMASK_CMP_NE) && |
3315 | (isNullConstant(V: C.Op1) || isAllOnesConstant(V: C.Op1))) { |
3316 | bool CmpEq = C.CCMask == SystemZ::CCMASK_CMP_EQ; |
3317 | bool CmpNull = isNullConstant(V: C.Op1); |
3318 | SDValue Src = peekThroughBitcasts(V: C.Op0); |
3319 | if (Src.hasOneUse() && isBitwiseNot(V: Src)) { |
3320 | Src = Src.getOperand(i: 0); |
3321 | CmpNull = !CmpNull; |
3322 | } |
3323 | unsigned Opcode = 0; |
3324 | if (Src.hasOneUse()) { |
3325 | switch (Src.getOpcode()) { |
3326 | case SystemZISD::VICMPE: Opcode = SystemZISD::VICMPES; break; |
3327 | case SystemZISD::VICMPH: Opcode = SystemZISD::VICMPHS; break; |
3328 | case SystemZISD::VICMPHL: Opcode = SystemZISD::VICMPHLS; break; |
3329 | case SystemZISD::VFCMPE: Opcode = SystemZISD::VFCMPES; break; |
3330 | case SystemZISD::VFCMPH: Opcode = SystemZISD::VFCMPHS; break; |
3331 | case SystemZISD::VFCMPHE: Opcode = SystemZISD::VFCMPHES; break; |
3332 | default: break; |
3333 | } |
3334 | } |
3335 | if (Opcode) { |
3336 | C.Opcode = Opcode; |
3337 | C.Op0 = Src->getOperand(Num: 0); |
3338 | C.Op1 = Src->getOperand(Num: 1); |
3339 | C.CCValid = SystemZ::CCMASK_VCMP; |
3340 | C.CCMask = CmpNull ? SystemZ::CCMASK_VCMP_NONE : SystemZ::CCMASK_VCMP_ALL; |
3341 | if (!CmpEq) |
3342 | C.CCMask ^= C.CCValid; |
3343 | return; |
3344 | } |
3345 | } |
3346 | |
3347 | // Everything below here is not useful if we have native i128 compares. |
3348 | if (DAG.getSubtarget<SystemZSubtarget>().hasVectorEnhancements3()) |
3349 | return; |
3350 | |
3351 | // (In-)Equality comparisons can be implemented via VCEQGS. |
3352 | if (C.CCMask == SystemZ::CCMASK_CMP_EQ || |
3353 | C.CCMask == SystemZ::CCMASK_CMP_NE) { |
3354 | C.Opcode = SystemZISD::VICMPES; |
3355 | C.Op0 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v2i64, Operand: C.Op0); |
3356 | C.Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v2i64, Operand: C.Op1); |
3357 | C.CCValid = SystemZ::CCMASK_VCMP; |
3358 | if (C.CCMask == SystemZ::CCMASK_CMP_EQ) |
3359 | C.CCMask = SystemZ::CCMASK_VCMP_ALL; |
3360 | else |
3361 | C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid; |
3362 | return; |
3363 | } |
3364 | |
3365 | // Normalize other comparisons to GT. |
3366 | bool Swap = false, Invert = false; |
3367 | switch (C.CCMask) { |
3368 | case SystemZ::CCMASK_CMP_GT: break; |
3369 | case SystemZ::CCMASK_CMP_LT: Swap = true; break; |
3370 | case SystemZ::CCMASK_CMP_LE: Invert = true; break; |
3371 | case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break; |
3372 | default: llvm_unreachable("Invalid integer condition!" ); |
3373 | } |
3374 | if (Swap) |
3375 | std::swap(a&: C.Op0, b&: C.Op1); |
3376 | |
3377 | if (C.ICmpType == SystemZICMP::UnsignedOnly) |
3378 | C.Opcode = SystemZISD::UCMP128HI; |
3379 | else |
3380 | C.Opcode = SystemZISD::SCMP128HI; |
3381 | C.CCValid = SystemZ::CCMASK_ANY; |
3382 | C.CCMask = SystemZ::CCMASK_1; |
3383 | |
3384 | if (Invert) |
3385 | C.CCMask ^= C.CCValid; |
3386 | } |
3387 | |
3388 | // See whether the comparison argument contains a redundant AND |
3389 | // and remove it if so. This sometimes happens due to the generic |
3390 | // BRCOND expansion. |
3391 | static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, |
3392 | Comparison &C) { |
3393 | if (C.Op0.getOpcode() != ISD::AND) |
3394 | return; |
3395 | auto *Mask = dyn_cast<ConstantSDNode>(Val: C.Op0.getOperand(i: 1)); |
3396 | if (!Mask || Mask->getValueSizeInBits(ResNo: 0) > 64) |
3397 | return; |
3398 | KnownBits Known = DAG.computeKnownBits(Op: C.Op0.getOperand(i: 0)); |
3399 | if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue()) |
3400 | return; |
3401 | |
3402 | C.Op0 = C.Op0.getOperand(i: 0); |
3403 | } |
3404 | |
3405 | // Return a Comparison that tests the condition-code result of intrinsic |
3406 | // node Call against constant integer CC using comparison code Cond. |
3407 | // Opcode is the opcode of the SystemZISD operation for the intrinsic |
3408 | // and CCValid is the set of possible condition-code results. |
3409 | static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, |
3410 | SDValue Call, unsigned CCValid, uint64_t CC, |
3411 | ISD::CondCode Cond) { |
3412 | Comparison C(Call, SDValue(), SDValue()); |
3413 | C.Opcode = Opcode; |
3414 | C.CCValid = CCValid; |
3415 | if (Cond == ISD::SETEQ) |
3416 | // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3. |
3417 | C.CCMask = CC < 4 ? 1 << (3 - CC) : 0; |
3418 | else if (Cond == ISD::SETNE) |
3419 | // ...and the inverse of that. |
3420 | C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1; |
3421 | else if (Cond == ISD::SETLT || Cond == ISD::SETULT) |
3422 | // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3, |
3423 | // always true for CC>3. |
3424 | C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1; |
3425 | else if (Cond == ISD::SETGE || Cond == ISD::SETUGE) |
3426 | // ...and the inverse of that. |
3427 | C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0; |
3428 | else if (Cond == ISD::SETLE || Cond == ISD::SETULE) |
3429 | // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true), |
3430 | // always true for CC>3. |
3431 | C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1; |
3432 | else if (Cond == ISD::SETGT || Cond == ISD::SETUGT) |
3433 | // ...and the inverse of that. |
3434 | C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0; |
3435 | else |
3436 | llvm_unreachable("Unexpected integer comparison type" ); |
3437 | C.CCMask &= CCValid; |
3438 | return C; |
3439 | } |
3440 | |
3441 | // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. |
3442 | static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, |
3443 | ISD::CondCode Cond, const SDLoc &DL, |
3444 | SDValue Chain = SDValue(), |
3445 | bool IsSignaling = false) { |
3446 | if (CmpOp1.getOpcode() == ISD::Constant) { |
3447 | assert(!Chain); |
3448 | unsigned Opcode, CCValid; |
3449 | if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && |
3450 | CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(NUses: 1, Value: 0) && |
3451 | isIntrinsicWithCCAndChain(Op: CmpOp0, Opcode, CCValid)) |
3452 | return getIntrinsicCmp(DAG, Opcode, Call: CmpOp0, CCValid, |
3453 | CC: CmpOp1->getAsZExtVal(), Cond); |
3454 | if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN && |
3455 | CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 && |
3456 | isIntrinsicWithCC(Op: CmpOp0, Opcode, CCValid)) |
3457 | return getIntrinsicCmp(DAG, Opcode, Call: CmpOp0, CCValid, |
3458 | CC: CmpOp1->getAsZExtVal(), Cond); |
3459 | } |
3460 | Comparison C(CmpOp0, CmpOp1, Chain); |
3461 | C.CCMask = CCMaskForCondCode(CC: Cond); |
3462 | if (C.Op0.getValueType().isFloatingPoint()) { |
3463 | C.CCValid = SystemZ::CCMASK_FCMP; |
3464 | if (!C.Chain) |
3465 | C.Opcode = SystemZISD::FCMP; |
3466 | else if (!IsSignaling) |
3467 | C.Opcode = SystemZISD::STRICT_FCMP; |
3468 | else |
3469 | C.Opcode = SystemZISD::STRICT_FCMPS; |
3470 | adjustForFNeg(C); |
3471 | } else { |
3472 | assert(!C.Chain); |
3473 | C.CCValid = SystemZ::CCMASK_ICMP; |
3474 | C.Opcode = SystemZISD::ICMP; |
3475 | // Choose the type of comparison. Equality and inequality tests can |
3476 | // use either signed or unsigned comparisons. The choice also doesn't |
3477 | // matter if both sign bits are known to be clear. In those cases we |
3478 | // want to give the main isel code the freedom to choose whichever |
3479 | // form fits best. |
3480 | if (C.CCMask == SystemZ::CCMASK_CMP_EQ || |
3481 | C.CCMask == SystemZ::CCMASK_CMP_NE || |
3482 | (DAG.SignBitIsZero(Op: C.Op0) && DAG.SignBitIsZero(Op: C.Op1))) |
3483 | C.ICmpType = SystemZICMP::Any; |
3484 | else if (C.CCMask & SystemZ::CCMASK_CMP_UO) |
3485 | C.ICmpType = SystemZICMP::UnsignedOnly; |
3486 | else |
3487 | C.ICmpType = SystemZICMP::SignedOnly; |
3488 | C.CCMask &= ~SystemZ::CCMASK_CMP_UO; |
3489 | adjustForRedundantAnd(DAG, DL, C); |
3490 | adjustZeroCmp(DAG, DL, C); |
3491 | adjustSubwordCmp(DAG, DL, C); |
3492 | adjustForSubtraction(DAG, DL, C); |
3493 | adjustForLTGFR(C); |
3494 | adjustICmpTruncate(DAG, DL, C); |
3495 | } |
3496 | |
3497 | if (shouldSwapCmpOperands(C)) { |
3498 | std::swap(a&: C.Op0, b&: C.Op1); |
3499 | C.CCMask = SystemZ::reverseCCMask(CCMask: C.CCMask); |
3500 | } |
3501 | |
3502 | adjustForTestUnderMask(DAG, DL, C); |
3503 | adjustICmp128(DAG, DL, C); |
3504 | return C; |
3505 | } |
3506 | |
3507 | // Emit the comparison instruction described by C. |
3508 | static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { |
3509 | if (!C.Op1.getNode()) { |
3510 | SDNode *Node; |
3511 | switch (C.Op0.getOpcode()) { |
3512 | case ISD::INTRINSIC_W_CHAIN: |
3513 | Node = emitIntrinsicWithCCAndChain(DAG, Op: C.Op0, Opcode: C.Opcode); |
3514 | return SDValue(Node, 0); |
3515 | case ISD::INTRINSIC_WO_CHAIN: |
3516 | Node = emitIntrinsicWithCC(DAG, Op: C.Op0, Opcode: C.Opcode); |
3517 | return SDValue(Node, Node->getNumValues() - 1); |
3518 | default: |
3519 | llvm_unreachable("Invalid comparison operands" ); |
3520 | } |
3521 | } |
3522 | if (C.Opcode == SystemZISD::ICMP) |
3523 | return DAG.getNode(Opcode: SystemZISD::ICMP, DL, VT: MVT::i32, N1: C.Op0, N2: C.Op1, |
3524 | N3: DAG.getTargetConstant(Val: C.ICmpType, DL, VT: MVT::i32)); |
3525 | if (C.Opcode == SystemZISD::TM) { |
3526 | bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != |
3527 | bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); |
3528 | return DAG.getNode(Opcode: SystemZISD::TM, DL, VT: MVT::i32, N1: C.Op0, N2: C.Op1, |
3529 | N3: DAG.getTargetConstant(Val: RegisterOnly, DL, VT: MVT::i32)); |
3530 | } |
3531 | if (C.Opcode == SystemZISD::VICMPES || |
3532 | C.Opcode == SystemZISD::VICMPHS || |
3533 | C.Opcode == SystemZISD::VICMPHLS || |
3534 | C.Opcode == SystemZISD::VFCMPES || |
3535 | C.Opcode == SystemZISD::VFCMPHS || |
3536 | C.Opcode == SystemZISD::VFCMPHES) { |
3537 | EVT IntVT = C.Op0.getValueType().changeVectorElementTypeToInteger(); |
3538 | SDVTList VTs = DAG.getVTList(VT1: IntVT, VT2: MVT::i32); |
3539 | SDValue Val = DAG.getNode(Opcode: C.Opcode, DL, VTList: VTs, N1: C.Op0, N2: C.Op1); |
3540 | return SDValue(Val.getNode(), 1); |
3541 | } |
3542 | if (C.Chain) { |
3543 | SDVTList VTs = DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other); |
3544 | return DAG.getNode(Opcode: C.Opcode, DL, VTList: VTs, N1: C.Chain, N2: C.Op0, N3: C.Op1); |
3545 | } |
3546 | return DAG.getNode(Opcode: C.Opcode, DL, VT: MVT::i32, N1: C.Op0, N2: C.Op1); |
3547 | } |
3548 | |
3549 | // Implement a 32-bit *MUL_LOHI operation by extending both operands to |
3550 | // 64 bits. Extend is the extension type to use. Store the high part |
3551 | // in Hi and the low part in Lo. |
3552 | static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, |
3553 | SDValue Op0, SDValue Op1, SDValue &Hi, |
3554 | SDValue &Lo) { |
3555 | Op0 = DAG.getNode(Opcode: Extend, DL, VT: MVT::i64, Operand: Op0); |
3556 | Op1 = DAG.getNode(Opcode: Extend, DL, VT: MVT::i64, Operand: Op1); |
3557 | SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL, VT: MVT::i64, N1: Op0, N2: Op1); |
3558 | Hi = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: Mul, |
3559 | N2: DAG.getConstant(Val: 32, DL, VT: MVT::i64)); |
3560 | Hi = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Hi); |
3561 | Lo = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Mul); |
3562 | } |
3563 | |
3564 | // Lower a binary operation that produces two VT results, one in each |
3565 | // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation, |
3566 | // and Opcode performs the GR128 operation. Store the even register result |
3567 | // in Even and the odd register result in Odd. |
3568 | static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, |
3569 | unsigned Opcode, SDValue Op0, SDValue Op1, |
3570 | SDValue &Even, SDValue &Odd) { |
3571 | SDValue Result = DAG.getNode(Opcode, DL, VT: MVT::Untyped, N1: Op0, N2: Op1); |
3572 | bool Is32Bit = is32Bit(VT); |
3573 | Even = DAG.getTargetExtractSubreg(SRIdx: SystemZ::even128(Is32bit: Is32Bit), DL, VT, Operand: Result); |
3574 | Odd = DAG.getTargetExtractSubreg(SRIdx: SystemZ::odd128(Is32bit: Is32Bit), DL, VT, Operand: Result); |
3575 | } |
3576 | |
3577 | // Return an i32 value that is 1 if the CC value produced by CCReg is |
3578 | // in the mask CCMask and 0 otherwise. CC is known to have a value |
3579 | // in CCValid, so other values can be ignored. |
3580 | static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, |
3581 | unsigned CCValid, unsigned CCMask) { |
3582 | SDValue Ops[] = {DAG.getConstant(Val: 1, DL, VT: MVT::i32), |
3583 | DAG.getConstant(Val: 0, DL, VT: MVT::i32), |
3584 | DAG.getTargetConstant(Val: CCValid, DL, VT: MVT::i32), |
3585 | DAG.getTargetConstant(Val: CCMask, DL, VT: MVT::i32), CCReg}; |
3586 | return DAG.getNode(Opcode: SystemZISD::SELECT_CCMASK, DL, VT: MVT::i32, Ops); |
3587 | } |
3588 | |
3589 | // Return the SystemISD vector comparison operation for CC, or 0 if it cannot |
3590 | // be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP |
3591 | // for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet) |
3592 | // floating-point comparisons, and CmpMode::SignalingFP for strict signaling |
3593 | // floating-point comparisons. |
3594 | enum class CmpMode { Int, FP, StrictFP, SignalingFP }; |
3595 | static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) { |
3596 | switch (CC) { |
3597 | case ISD::SETOEQ: |
3598 | case ISD::SETEQ: |
3599 | switch (Mode) { |
3600 | case CmpMode::Int: return SystemZISD::VICMPE; |
3601 | case CmpMode::FP: return SystemZISD::VFCMPE; |
3602 | case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE; |
3603 | case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES; |
3604 | } |
3605 | llvm_unreachable("Bad mode" ); |
3606 | |
3607 | case ISD::SETOGE: |
3608 | case ISD::SETGE: |
3609 | switch (Mode) { |
3610 | case CmpMode::Int: return 0; |
3611 | case CmpMode::FP: return SystemZISD::VFCMPHE; |
3612 | case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE; |
3613 | case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES; |
3614 | } |
3615 | llvm_unreachable("Bad mode" ); |
3616 | |
3617 | case ISD::SETOGT: |
3618 | case ISD::SETGT: |
3619 | switch (Mode) { |
3620 | case CmpMode::Int: return SystemZISD::VICMPH; |
3621 | case CmpMode::FP: return SystemZISD::VFCMPH; |
3622 | case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH; |
3623 | case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS; |
3624 | } |
3625 | llvm_unreachable("Bad mode" ); |
3626 | |
3627 | case ISD::SETUGT: |
3628 | switch (Mode) { |
3629 | case CmpMode::Int: return SystemZISD::VICMPHL; |
3630 | case CmpMode::FP: return 0; |
3631 | case CmpMode::StrictFP: return 0; |
3632 | case CmpMode::SignalingFP: return 0; |
3633 | } |
3634 | llvm_unreachable("Bad mode" ); |
3635 | |
3636 | default: |
3637 | return 0; |
3638 | } |
3639 | } |
3640 | |
3641 | // Return the SystemZISD vector comparison operation for CC or its inverse, |
3642 | // or 0 if neither can be done directly. Indicate in Invert whether the |
3643 | // result is for the inverse of CC. Mode is as above. |
3644 | static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, |
3645 | bool &Invert) { |
3646 | if (unsigned Opcode = getVectorComparison(CC, Mode)) { |
3647 | Invert = false; |
3648 | return Opcode; |
3649 | } |
3650 | |
3651 | CC = ISD::getSetCCInverse(Operation: CC, Type: Mode == CmpMode::Int ? MVT::i32 : MVT::f32); |
3652 | if (unsigned Opcode = getVectorComparison(CC, Mode)) { |
3653 | Invert = true; |
3654 | return Opcode; |
3655 | } |
3656 | |
3657 | return 0; |
3658 | } |
3659 | |
3660 | // Return a v2f64 that contains the extended form of elements Start and Start+1 |
3661 | // of v4f32 value Op. If Chain is nonnull, return the strict form. |
3662 | static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, |
3663 | SDValue Op, SDValue Chain) { |
3664 | int Mask[] = { Start, -1, Start + 1, -1 }; |
3665 | Op = DAG.getVectorShuffle(VT: MVT::v4f32, dl: DL, N1: Op, N2: DAG.getUNDEF(VT: MVT::v4f32), Mask); |
3666 | if (Chain) { |
3667 | SDVTList VTs = DAG.getVTList(VT1: MVT::v2f64, VT2: MVT::Other); |
3668 | return DAG.getNode(Opcode: SystemZISD::STRICT_VEXTEND, DL, VTList: VTs, N1: Chain, N2: Op); |
3669 | } |
3670 | return DAG.getNode(Opcode: SystemZISD::VEXTEND, DL, VT: MVT::v2f64, Operand: Op); |
3671 | } |
3672 | |
3673 | // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, |
3674 | // producing a result of type VT. If Chain is nonnull, return the strict form. |
3675 | SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode, |
3676 | const SDLoc &DL, EVT VT, |
3677 | SDValue CmpOp0, |
3678 | SDValue CmpOp1, |
3679 | SDValue Chain) const { |
3680 | // There is no hardware support for v4f32 (unless we have the vector |
3681 | // enhancements facility 1), so extend the vector into two v2f64s |
3682 | // and compare those. |
3683 | if (CmpOp0.getValueType() == MVT::v4f32 && |
3684 | !Subtarget.hasVectorEnhancements1()) { |
3685 | SDValue H0 = expandV4F32ToV2F64(DAG, Start: 0, DL, Op: CmpOp0, Chain); |
3686 | SDValue L0 = expandV4F32ToV2F64(DAG, Start: 2, DL, Op: CmpOp0, Chain); |
3687 | SDValue H1 = expandV4F32ToV2F64(DAG, Start: 0, DL, Op: CmpOp1, Chain); |
3688 | SDValue L1 = expandV4F32ToV2F64(DAG, Start: 2, DL, Op: CmpOp1, Chain); |
3689 | if (Chain) { |
3690 | SDVTList VTs = DAG.getVTList(VT1: MVT::v2i64, VT2: MVT::Other); |
3691 | SDValue HRes = DAG.getNode(Opcode, DL, VTList: VTs, N1: Chain, N2: H0, N3: H1); |
3692 | SDValue LRes = DAG.getNode(Opcode, DL, VTList: VTs, N1: Chain, N2: L0, N3: L1); |
3693 | SDValue Res = DAG.getNode(Opcode: SystemZISD::PACK, DL, VT, N1: HRes, N2: LRes); |
3694 | SDValue Chains[6] = { H0.getValue(R: 1), L0.getValue(R: 1), |
3695 | H1.getValue(R: 1), L1.getValue(R: 1), |
3696 | HRes.getValue(R: 1), LRes.getValue(R: 1) }; |
3697 | SDValue NewChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: Chains); |
3698 | SDValue Ops[2] = { Res, NewChain }; |
3699 | return DAG.getMergeValues(Ops, dl: DL); |
3700 | } |
3701 | SDValue HRes = DAG.getNode(Opcode, DL, VT: MVT::v2i64, N1: H0, N2: H1); |
3702 | SDValue LRes = DAG.getNode(Opcode, DL, VT: MVT::v2i64, N1: L0, N2: L1); |
3703 | return DAG.getNode(Opcode: SystemZISD::PACK, DL, VT, N1: HRes, N2: LRes); |
3704 | } |
3705 | if (Chain) { |
3706 | SDVTList VTs = DAG.getVTList(VT1: VT, VT2: MVT::Other); |
3707 | return DAG.getNode(Opcode, DL, VTList: VTs, N1: Chain, N2: CmpOp0, N3: CmpOp1); |
3708 | } |
3709 | return DAG.getNode(Opcode, DL, VT, N1: CmpOp0, N2: CmpOp1); |
3710 | } |
3711 | |
3712 | // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing |
3713 | // an integer mask of type VT. If Chain is nonnull, we have a strict |
3714 | // floating-point comparison. If in addition IsSignaling is true, we have |
3715 | // a strict signaling floating-point comparison. |
3716 | SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, |
3717 | const SDLoc &DL, EVT VT, |
3718 | ISD::CondCode CC, |
3719 | SDValue CmpOp0, |
3720 | SDValue CmpOp1, |
3721 | SDValue Chain, |
3722 | bool IsSignaling) const { |
3723 | bool IsFP = CmpOp0.getValueType().isFloatingPoint(); |
3724 | assert (!Chain || IsFP); |
3725 | assert (!IsSignaling || Chain); |
3726 | CmpMode Mode = IsSignaling ? CmpMode::SignalingFP : |
3727 | Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int; |
3728 | bool Invert = false; |
3729 | SDValue Cmp; |
3730 | switch (CC) { |
3731 | // Handle tests for order using (or (ogt y x) (oge x y)). |
3732 | case ISD::SETUO: |
3733 | Invert = true; |
3734 | [[fallthrough]]; |
3735 | case ISD::SETO: { |
3736 | assert(IsFP && "Unexpected integer comparison" ); |
3737 | SDValue LT = getVectorCmp(DAG, Opcode: getVectorComparison(CC: ISD::SETOGT, Mode), |
3738 | DL, VT, CmpOp0: CmpOp1, CmpOp1: CmpOp0, Chain); |
3739 | SDValue GE = getVectorCmp(DAG, Opcode: getVectorComparison(CC: ISD::SETOGE, Mode), |
3740 | DL, VT, CmpOp0, CmpOp1, Chain); |
3741 | Cmp = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: LT, N2: GE); |
3742 | if (Chain) |
3743 | Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, |
3744 | N1: LT.getValue(R: 1), N2: GE.getValue(R: 1)); |
3745 | break; |
3746 | } |
3747 | |
3748 | // Handle <> tests using (or (ogt y x) (ogt x y)). |
3749 | case ISD::SETUEQ: |
3750 | Invert = true; |
3751 | [[fallthrough]]; |
3752 | case ISD::SETONE: { |
3753 | assert(IsFP && "Unexpected integer comparison" ); |
3754 | SDValue LT = getVectorCmp(DAG, Opcode: getVectorComparison(CC: ISD::SETOGT, Mode), |
3755 | DL, VT, CmpOp0: CmpOp1, CmpOp1: CmpOp0, Chain); |
3756 | SDValue GT = getVectorCmp(DAG, Opcode: getVectorComparison(CC: ISD::SETOGT, Mode), |
3757 | DL, VT, CmpOp0, CmpOp1, Chain); |
3758 | Cmp = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: LT, N2: GT); |
3759 | if (Chain) |
3760 | Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, |
3761 | N1: LT.getValue(R: 1), N2: GT.getValue(R: 1)); |
3762 | break; |
3763 | } |
3764 | |
3765 | // Otherwise a single comparison is enough. It doesn't really |
3766 | // matter whether we try the inversion or the swap first, since |
3767 | // there are no cases where both work. |
3768 | default: |
3769 | // Optimize sign-bit comparisons to signed compares. |
3770 | if (Mode == CmpMode::Int && (CC == ISD::SETEQ || CC == ISD::SETNE) && |
3771 | ISD::isConstantSplatVectorAllZeros(N: CmpOp1.getNode())) { |
3772 | unsigned EltSize = VT.getVectorElementType().getSizeInBits(); |
3773 | APInt Mask; |
3774 | if (CmpOp0.getOpcode() == ISD::AND |
3775 | && ISD::isConstantSplatVector(N: CmpOp0.getOperand(i: 1).getNode(), SplatValue&: Mask) |
3776 | && Mask == APInt::getSignMask(BitWidth: EltSize)) { |
3777 | CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; |
3778 | CmpOp0 = CmpOp0.getOperand(i: 0); |
3779 | } |
3780 | } |
3781 | if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) |
3782 | Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain); |
3783 | else { |
3784 | CC = ISD::getSetCCSwappedOperands(Operation: CC); |
3785 | if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) |
3786 | Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0: CmpOp1, CmpOp1: CmpOp0, Chain); |
3787 | else |
3788 | llvm_unreachable("Unhandled comparison" ); |
3789 | } |
3790 | if (Chain) |
3791 | Chain = Cmp.getValue(R: 1); |
3792 | break; |
3793 | } |
3794 | if (Invert) { |
3795 | SDValue Mask = |
3796 | DAG.getSplatBuildVector(VT, DL, Op: DAG.getAllOnesConstant(DL, VT: MVT::i64)); |
3797 | Cmp = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Cmp, N2: Mask); |
3798 | } |
3799 | if (Chain && Chain.getNode() != Cmp.getNode()) { |
3800 | SDValue Ops[2] = { Cmp, Chain }; |
3801 | Cmp = DAG.getMergeValues(Ops, dl: DL); |
3802 | } |
3803 | return Cmp; |
3804 | } |
3805 | |
3806 | SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, |
3807 | SelectionDAG &DAG) const { |
3808 | SDValue CmpOp0 = Op.getOperand(i: 0); |
3809 | SDValue CmpOp1 = Op.getOperand(i: 1); |
3810 | ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get(); |
3811 | SDLoc DL(Op); |
3812 | EVT VT = Op.getValueType(); |
3813 | if (VT.isVector()) |
3814 | return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1); |
3815 | |
3816 | Comparison C(getCmp(DAG, CmpOp0, CmpOp1, Cond: CC, DL)); |
3817 | SDValue CCReg = emitCmp(DAG, DL, C); |
3818 | return emitSETCC(DAG, DL, CCReg, CCValid: C.CCValid, CCMask: C.CCMask); |
3819 | } |
3820 | |
3821 | SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op, |
3822 | SelectionDAG &DAG, |
3823 | bool IsSignaling) const { |
3824 | SDValue Chain = Op.getOperand(i: 0); |
3825 | SDValue CmpOp0 = Op.getOperand(i: 1); |
3826 | SDValue CmpOp1 = Op.getOperand(i: 2); |
3827 | ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 3))->get(); |
3828 | SDLoc DL(Op); |
3829 | EVT VT = Op.getNode()->getValueType(ResNo: 0); |
3830 | if (VT.isVector()) { |
3831 | SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1, |
3832 | Chain, IsSignaling); |
3833 | return Res.getValue(R: Op.getResNo()); |
3834 | } |
3835 | |
3836 | Comparison C(getCmp(DAG, CmpOp0, CmpOp1, Cond: CC, DL, Chain, IsSignaling)); |
3837 | SDValue CCReg = emitCmp(DAG, DL, C); |
3838 | CCReg->setFlags(Op->getFlags()); |
3839 | SDValue Result = emitSETCC(DAG, DL, CCReg, CCValid: C.CCValid, CCMask: C.CCMask); |
3840 | SDValue Ops[2] = { Result, CCReg.getValue(R: 1) }; |
3841 | return DAG.getMergeValues(Ops, dl: DL); |
3842 | } |
3843 | |
3844 | SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { |
3845 | ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 1))->get(); |
3846 | SDValue CmpOp0 = Op.getOperand(i: 2); |
3847 | SDValue CmpOp1 = Op.getOperand(i: 3); |
3848 | SDValue Dest = Op.getOperand(i: 4); |
3849 | SDLoc DL(Op); |
3850 | |
3851 | Comparison C(getCmp(DAG, CmpOp0, CmpOp1, Cond: CC, DL)); |
3852 | SDValue CCReg = emitCmp(DAG, DL, C); |
3853 | return DAG.getNode( |
3854 | Opcode: SystemZISD::BR_CCMASK, DL, VT: Op.getValueType(), N1: Op.getOperand(i: 0), |
3855 | N2: DAG.getTargetConstant(Val: C.CCValid, DL, VT: MVT::i32), |
3856 | N3: DAG.getTargetConstant(Val: C.CCMask, DL, VT: MVT::i32), N4: Dest, N5: CCReg); |
3857 | } |
3858 | |
3859 | // Return true if Pos is CmpOp and Neg is the negative of CmpOp, |
3860 | // allowing Pos and Neg to be wider than CmpOp. |
3861 | static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) { |
3862 | return (Neg.getOpcode() == ISD::SUB && |
3863 | Neg.getOperand(i: 0).getOpcode() == ISD::Constant && |
3864 | Neg.getConstantOperandVal(i: 0) == 0 && Neg.getOperand(i: 1) == Pos && |
3865 | (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND && |
3866 | Pos.getOperand(i: 0) == CmpOp))); |
3867 | } |
3868 | |
3869 | // Return the absolute or negative absolute of Op; IsNegative decides which. |
3870 | static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, |
3871 | bool IsNegative) { |
3872 | Op = DAG.getNode(Opcode: ISD::ABS, DL, VT: Op.getValueType(), Operand: Op); |
3873 | if (IsNegative) |
3874 | Op = DAG.getNode(Opcode: ISD::SUB, DL, VT: Op.getValueType(), |
3875 | N1: DAG.getConstant(Val: 0, DL, VT: Op.getValueType()), N2: Op); |
3876 | return Op; |
3877 | } |
3878 | |
3879 | static SDValue getI128Select(SelectionDAG &DAG, const SDLoc &DL, |
3880 | Comparison C, SDValue TrueOp, SDValue FalseOp) { |
3881 | EVT VT = MVT::i128; |
3882 | unsigned Op; |
3883 | |
3884 | if (C.CCMask == SystemZ::CCMASK_CMP_NE || |
3885 | C.CCMask == SystemZ::CCMASK_CMP_GE || |
3886 | C.CCMask == SystemZ::CCMASK_CMP_LE) { |
3887 | std::swap(a&: TrueOp, b&: FalseOp); |
3888 | C.CCMask ^= C.CCValid; |
3889 | } |
3890 | if (C.CCMask == SystemZ::CCMASK_CMP_LT) { |
3891 | std::swap(a&: C.Op0, b&: C.Op1); |
3892 | C.CCMask = SystemZ::CCMASK_CMP_GT; |
3893 | } |
3894 | switch (C.CCMask) { |
3895 | case SystemZ::CCMASK_CMP_EQ: |
3896 | Op = SystemZISD::VICMPE; |
3897 | break; |
3898 | case SystemZ::CCMASK_CMP_GT: |
3899 | if (C.ICmpType == SystemZICMP::UnsignedOnly) |
3900 | Op = SystemZISD::VICMPHL; |
3901 | else |
3902 | Op = SystemZISD::VICMPH; |
3903 | break; |
3904 | default: |
3905 | llvm_unreachable("Unhandled comparison" ); |
3906 | break; |
3907 | } |
3908 | |
3909 | SDValue Mask = DAG.getNode(Opcode: Op, DL, VT, N1: C.Op0, N2: C.Op1); |
3910 | TrueOp = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: TrueOp, N2: Mask); |
3911 | FalseOp = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: FalseOp, N2: DAG.getNOT(DL, Val: Mask, VT)); |
3912 | return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: TrueOp, N2: FalseOp); |
3913 | } |
3914 | |
3915 | SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, |
3916 | SelectionDAG &DAG) const { |
3917 | SDValue CmpOp0 = Op.getOperand(i: 0); |
3918 | SDValue CmpOp1 = Op.getOperand(i: 1); |
3919 | SDValue TrueOp = Op.getOperand(i: 2); |
3920 | SDValue FalseOp = Op.getOperand(i: 3); |
3921 | ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 4))->get(); |
3922 | SDLoc DL(Op); |
3923 | |
3924 | // SELECT_CC involving f16 will not have the cmp-ops promoted by the |
3925 | // legalizer, as it will be handled according to the type of the resulting |
3926 | // value. Extend them here if needed. |
3927 | if (CmpOp0.getSimpleValueType() == MVT::f16) { |
3928 | CmpOp0 = DAG.getFPExtendOrRound(Op: CmpOp0, DL: SDLoc(CmpOp0), VT: MVT::f32); |
3929 | CmpOp1 = DAG.getFPExtendOrRound(Op: CmpOp1, DL: SDLoc(CmpOp1), VT: MVT::f32); |
3930 | } |
3931 | |
3932 | Comparison C(getCmp(DAG, CmpOp0, CmpOp1, Cond: CC, DL)); |
3933 | |
3934 | // Check for absolute and negative-absolute selections, including those |
3935 | // where the comparison value is sign-extended (for LPGFR and LNGFR). |
3936 | // This check supplements the one in DAGCombiner. |
3937 | if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ && |
3938 | C.CCMask != SystemZ::CCMASK_CMP_NE && |
3939 | C.Op1.getOpcode() == ISD::Constant && |
3940 | cast<ConstantSDNode>(Val&: C.Op1)->getValueSizeInBits(ResNo: 0) <= 64 && |
3941 | C.Op1->getAsZExtVal() == 0) { |
3942 | if (isAbsolute(CmpOp: C.Op0, Pos: TrueOp, Neg: FalseOp)) |
3943 | return getAbsolute(DAG, DL, Op: TrueOp, IsNegative: C.CCMask & SystemZ::CCMASK_CMP_LT); |
3944 | if (isAbsolute(CmpOp: C.Op0, Pos: FalseOp, Neg: TrueOp)) |
3945 | return getAbsolute(DAG, DL, Op: FalseOp, IsNegative: C.CCMask & SystemZ::CCMASK_CMP_GT); |
3946 | } |
3947 | |
3948 | if (Subtarget.hasVectorEnhancements3() && |
3949 | C.Opcode == SystemZISD::ICMP && |
3950 | C.Op0.getValueType() == MVT::i128 && |
3951 | TrueOp.getValueType() == MVT::i128) { |
3952 | return getI128Select(DAG, DL, C, TrueOp, FalseOp); |
3953 | } |
3954 | |
3955 | SDValue CCReg = emitCmp(DAG, DL, C); |
3956 | SDValue Ops[] = {TrueOp, FalseOp, |
3957 | DAG.getTargetConstant(Val: C.CCValid, DL, VT: MVT::i32), |
3958 | DAG.getTargetConstant(Val: C.CCMask, DL, VT: MVT::i32), CCReg}; |
3959 | |
3960 | return DAG.getNode(Opcode: SystemZISD::SELECT_CCMASK, DL, VT: Op.getValueType(), Ops); |
3961 | } |
3962 | |
3963 | SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, |
3964 | SelectionDAG &DAG) const { |
3965 | SDLoc DL(Node); |
3966 | const GlobalValue *GV = Node->getGlobal(); |
3967 | int64_t Offset = Node->getOffset(); |
3968 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
3969 | CodeModel::Model CM = DAG.getTarget().getCodeModel(); |
3970 | |
3971 | SDValue Result; |
3972 | if (Subtarget.isPC32DBLSymbol(GV, CM)) { |
3973 | if (isInt<32>(x: Offset)) { |
3974 | // Assign anchors at 1<<12 byte boundaries. |
3975 | uint64_t Anchor = Offset & ~uint64_t(0xfff); |
3976 | Result = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: Anchor); |
3977 | Result = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Result); |
3978 | |
3979 | // The offset can be folded into the address if it is aligned to a |
3980 | // halfword. |
3981 | Offset -= Anchor; |
3982 | if (Offset != 0 && (Offset & 1) == 0) { |
3983 | SDValue Full = |
3984 | DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: Anchor + Offset); |
3985 | Result = DAG.getNode(Opcode: SystemZISD::PCREL_OFFSET, DL, VT: PtrVT, N1: Full, N2: Result); |
3986 | Offset = 0; |
3987 | } |
3988 | } else { |
3989 | // Conservatively load a constant offset greater than 32 bits into a |
3990 | // register below. |
3991 | Result = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT); |
3992 | Result = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Result); |
3993 | } |
3994 | } else if (Subtarget.isTargetELF()) { |
3995 | Result = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: 0, TargetFlags: SystemZII::MO_GOT); |
3996 | Result = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Result); |
3997 | Result = DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: Result, |
3998 | PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction())); |
3999 | } else if (Subtarget.isTargetzOS()) { |
4000 | Result = getADAEntry(DAG, GV, DL, PtrVT); |
4001 | } else |
4002 | llvm_unreachable("Unexpected Subtarget" ); |
4003 | |
4004 | // If there was a non-zero offset that we didn't fold, create an explicit |
4005 | // addition for it. |
4006 | if (Offset != 0) |
4007 | Result = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Result, |
4008 | N2: DAG.getSignedConstant(Val: Offset, DL, VT: PtrVT)); |
4009 | |
4010 | return Result; |
4011 | } |
4012 | |
4013 | SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, |
4014 | SelectionDAG &DAG, |
4015 | unsigned Opcode, |
4016 | SDValue GOTOffset) const { |
4017 | SDLoc DL(Node); |
4018 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
4019 | SDValue Chain = DAG.getEntryNode(); |
4020 | SDValue Glue; |
4021 | |
4022 | if (DAG.getMachineFunction().getFunction().getCallingConv() == |
4023 | CallingConv::GHC) |
4024 | report_fatal_error(reason: "In GHC calling convention TLS is not supported" ); |
4025 | |
4026 | // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12. |
4027 | SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(VT: PtrVT); |
4028 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: SystemZ::R12D, N: GOT, Glue); |
4029 | Glue = Chain.getValue(R: 1); |
4030 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: SystemZ::R2D, N: GOTOffset, Glue); |
4031 | Glue = Chain.getValue(R: 1); |
4032 | |
4033 | // The first call operand is the chain and the second is the TLS symbol. |
4034 | SmallVector<SDValue, 8> Ops; |
4035 | Ops.push_back(Elt: Chain); |
4036 | Ops.push_back(Elt: DAG.getTargetGlobalAddress(GV: Node->getGlobal(), DL, |
4037 | VT: Node->getValueType(ResNo: 0), |
4038 | offset: 0, TargetFlags: 0)); |
4039 | |
4040 | // Add argument registers to the end of the list so that they are |
4041 | // known live into the call. |
4042 | Ops.push_back(Elt: DAG.getRegister(Reg: SystemZ::R2D, VT: PtrVT)); |
4043 | Ops.push_back(Elt: DAG.getRegister(Reg: SystemZ::R12D, VT: PtrVT)); |
4044 | |
4045 | // Add a register mask operand representing the call-preserved registers. |
4046 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
4047 | const uint32_t *Mask = |
4048 | TRI->getCallPreservedMask(MF: DAG.getMachineFunction(), CallingConv::C); |
4049 | assert(Mask && "Missing call preserved mask for calling convention" ); |
4050 | Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask)); |
4051 | |
4052 | // Glue the call to the argument copies. |
4053 | Ops.push_back(Elt: Glue); |
4054 | |
4055 | // Emit the call. |
4056 | SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue); |
4057 | Chain = DAG.getNode(Opcode, DL, VTList: NodeTys, Ops); |
4058 | Glue = Chain.getValue(R: 1); |
4059 | |
4060 | // Copy the return value from %r2. |
4061 | return DAG.getCopyFromReg(Chain, dl: DL, Reg: SystemZ::R2D, VT: PtrVT, Glue); |
4062 | } |
4063 | |
4064 | SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL, |
4065 | SelectionDAG &DAG) const { |
4066 | SDValue Chain = DAG.getEntryNode(); |
4067 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
4068 | |
4069 | // The high part of the thread pointer is in access register 0. |
4070 | SDValue TPHi = DAG.getCopyFromReg(Chain, dl: DL, Reg: SystemZ::A0, VT: MVT::i32); |
4071 | TPHi = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: PtrVT, Operand: TPHi); |
4072 | |
4073 | // The low part of the thread pointer is in access register 1. |
4074 | SDValue TPLo = DAG.getCopyFromReg(Chain, dl: DL, Reg: SystemZ::A1, VT: MVT::i32); |
4075 | TPLo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: PtrVT, Operand: TPLo); |
4076 | |
4077 | // Merge them into a single 64-bit address. |
4078 | SDValue TPHiShifted = DAG.getNode(Opcode: ISD::SHL, DL, VT: PtrVT, N1: TPHi, |
4079 | N2: DAG.getConstant(Val: 32, DL, VT: PtrVT)); |
4080 | return DAG.getNode(Opcode: ISD::OR, DL, VT: PtrVT, N1: TPHiShifted, N2: TPLo); |
4081 | } |
4082 | |
4083 | SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, |
4084 | SelectionDAG &DAG) const { |
4085 | if (DAG.getTarget().useEmulatedTLS()) |
4086 | return LowerToTLSEmulatedModel(GA: Node, DAG); |
4087 | SDLoc DL(Node); |
4088 | const GlobalValue *GV = Node->getGlobal(); |
4089 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
4090 | TLSModel::Model model = DAG.getTarget().getTLSModel(GV); |
4091 | |
4092 | if (DAG.getMachineFunction().getFunction().getCallingConv() == |
4093 | CallingConv::GHC) |
4094 | report_fatal_error(reason: "In GHC calling convention TLS is not supported" ); |
4095 | |
4096 | SDValue TP = lowerThreadPointer(DL, DAG); |
4097 | |
4098 | // Get the offset of GA from the thread pointer, based on the TLS model. |
4099 | SDValue Offset; |
4100 | switch (model) { |
4101 | case TLSModel::GeneralDynamic: { |
4102 | // Load the GOT offset of the tls_index (module ID / per-symbol offset). |
4103 | SystemZConstantPoolValue *CPV = |
4104 | SystemZConstantPoolValue::Create(GV, Modifier: SystemZCP::TLSGD); |
4105 | |
4106 | Offset = DAG.getConstantPool(C: CPV, VT: PtrVT, Align: Align(8)); |
4107 | Offset = DAG.getLoad( |
4108 | VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: Offset, |
4109 | PtrInfo: MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction())); |
4110 | |
4111 | // Call __tls_get_offset to retrieve the offset. |
4112 | Offset = lowerTLSGetOffset(Node, DAG, Opcode: SystemZISD::TLS_GDCALL, GOTOffset: Offset); |
4113 | break; |
4114 | } |
4115 | |
4116 | case TLSModel::LocalDynamic: { |
4117 | // Load the GOT offset of the module ID. |
4118 | SystemZConstantPoolValue *CPV = |
4119 | SystemZConstantPoolValue::Create(GV, Modifier: SystemZCP::TLSLDM); |
4120 | |
4121 | Offset = DAG.getConstantPool(C: CPV, VT: PtrVT, Align: Align(8)); |
4122 | Offset = DAG.getLoad( |
4123 | VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: Offset, |
4124 | PtrInfo: MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction())); |
4125 | |
4126 | // Call __tls_get_offset to retrieve the module base offset. |
4127 | Offset = lowerTLSGetOffset(Node, DAG, Opcode: SystemZISD::TLS_LDCALL, GOTOffset: Offset); |
4128 | |
4129 | // Note: The SystemZLDCleanupPass will remove redundant computations |
4130 | // of the module base offset. Count total number of local-dynamic |
4131 | // accesses to trigger execution of that pass. |
4132 | SystemZMachineFunctionInfo* MFI = |
4133 | DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>(); |
4134 | MFI->incNumLocalDynamicTLSAccesses(); |
4135 | |
4136 | // Add the per-symbol offset. |
4137 | CPV = SystemZConstantPoolValue::Create(GV, Modifier: SystemZCP::DTPOFF); |
4138 | |
4139 | SDValue DTPOffset = DAG.getConstantPool(C: CPV, VT: PtrVT, Align: Align(8)); |
4140 | DTPOffset = DAG.getLoad( |
4141 | VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: DTPOffset, |
4142 | PtrInfo: MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction())); |
4143 | |
4144 | Offset = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Offset, N2: DTPOffset); |
4145 | break; |
4146 | } |
4147 | |
4148 | case TLSModel::InitialExec: { |
4149 | // Load the offset from the GOT. |
4150 | Offset = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: 0, |
4151 | TargetFlags: SystemZII::MO_INDNTPOFF); |
4152 | Offset = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Offset); |
4153 | Offset = |
4154 | DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: Offset, |
4155 | PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction())); |
4156 | break; |
4157 | } |
4158 | |
4159 | case TLSModel::LocalExec: { |
4160 | // Force the offset into the constant pool and load it from there. |
4161 | SystemZConstantPoolValue *CPV = |
4162 | SystemZConstantPoolValue::Create(GV, Modifier: SystemZCP::NTPOFF); |
4163 | |
4164 | Offset = DAG.getConstantPool(C: CPV, VT: PtrVT, Align: Align(8)); |
4165 | Offset = DAG.getLoad( |
4166 | VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: Offset, |
4167 | PtrInfo: MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction())); |
4168 | break; |
4169 | } |
4170 | } |
4171 | |
4172 | // Add the base and offset together. |
4173 | return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: TP, N2: Offset); |
4174 | } |
4175 | |
4176 | SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node, |
4177 | SelectionDAG &DAG) const { |
4178 | SDLoc DL(Node); |
4179 | const BlockAddress *BA = Node->getBlockAddress(); |
4180 | int64_t Offset = Node->getOffset(); |
4181 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
4182 | |
4183 | SDValue Result = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset); |
4184 | Result = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Result); |
4185 | return Result; |
4186 | } |
4187 | |
4188 | SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT, |
4189 | SelectionDAG &DAG) const { |
4190 | SDLoc DL(JT); |
4191 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
4192 | SDValue Result = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT); |
4193 | |
4194 | // Use LARL to load the address of the table. |
4195 | return DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Result); |
4196 | } |
4197 | |
4198 | SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, |
4199 | SelectionDAG &DAG) const { |
4200 | SDLoc DL(CP); |
4201 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
4202 | |
4203 | SDValue Result; |
4204 | if (CP->isMachineConstantPoolEntry()) |
4205 | Result = |
4206 | DAG.getTargetConstantPool(C: CP->getMachineCPVal(), VT: PtrVT, Align: CP->getAlign()); |
4207 | else |
4208 | Result = DAG.getTargetConstantPool(C: CP->getConstVal(), VT: PtrVT, Align: CP->getAlign(), |
4209 | Offset: CP->getOffset()); |
4210 | |
4211 | // Use LARL to load the address of the constant pool entry. |
4212 | return DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Result); |
4213 | } |
4214 | |
4215 | SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op, |
4216 | SelectionDAG &DAG) const { |
4217 | auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>(); |
4218 | MachineFunction &MF = DAG.getMachineFunction(); |
4219 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
4220 | MFI.setFrameAddressIsTaken(true); |
4221 | |
4222 | SDLoc DL(Op); |
4223 | unsigned Depth = Op.getConstantOperandVal(i: 0); |
4224 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
4225 | |
4226 | // By definition, the frame address is the address of the back chain. (In |
4227 | // the case of packed stack without backchain, return the address where the |
4228 | // backchain would have been stored. This will either be an unused space or |
4229 | // contain a saved register). |
4230 | int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF); |
4231 | SDValue BackChain = DAG.getFrameIndex(FI: BackChainIdx, VT: PtrVT); |
4232 | |
4233 | if (Depth > 0) { |
4234 | // FIXME The frontend should detect this case. |
4235 | if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain()) |
4236 | report_fatal_error(reason: "Unsupported stack frame traversal count" ); |
4237 | |
4238 | SDValue Offset = DAG.getConstant(Val: TFL->getBackchainOffset(MF), DL, VT: PtrVT); |
4239 | while (Depth--) { |
4240 | BackChain = DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: BackChain, |
4241 | PtrInfo: MachinePointerInfo()); |
4242 | BackChain = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: BackChain, N2: Offset); |
4243 | } |
4244 | } |
4245 | |
4246 | return BackChain; |
4247 | } |
4248 | |
4249 | SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op, |
4250 | SelectionDAG &DAG) const { |
4251 | MachineFunction &MF = DAG.getMachineFunction(); |
4252 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
4253 | MFI.setReturnAddressIsTaken(true); |
4254 | |
4255 | if (verifyReturnAddressArgumentIsConstant(Op, DAG)) |
4256 | return SDValue(); |
4257 | |
4258 | SDLoc DL(Op); |
4259 | unsigned Depth = Op.getConstantOperandVal(i: 0); |
4260 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
4261 | |
4262 | if (Depth > 0) { |
4263 | // FIXME The frontend should detect this case. |
4264 | if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain()) |
4265 | report_fatal_error(reason: "Unsupported stack frame traversal count" ); |
4266 | |
4267 | SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); |
4268 | const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>(); |
4269 | int Offset = TFL->getReturnAddressOffset(MF); |
4270 | SDValue Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: FrameAddr, |
4271 | N2: DAG.getSignedConstant(Val: Offset, DL, VT: PtrVT)); |
4272 | return DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr, |
4273 | PtrInfo: MachinePointerInfo()); |
4274 | } |
4275 | |
4276 | // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an |
4277 | // implicit live-in. |
4278 | SystemZCallingConventionRegisters *CCR = Subtarget.getSpecialRegisters(); |
4279 | Register LinkReg = MF.addLiveIn(PReg: CCR->getReturnFunctionAddressRegister(), |
4280 | RC: &SystemZ::GR64BitRegClass); |
4281 | return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: LinkReg, VT: PtrVT); |
4282 | } |
4283 | |
4284 | SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, |
4285 | SelectionDAG &DAG) const { |
4286 | SDLoc DL(Op); |
4287 | SDValue In = Op.getOperand(i: 0); |
4288 | EVT InVT = In.getValueType(); |
4289 | EVT ResVT = Op.getValueType(); |
4290 | |
4291 | // Convert loads directly. This is normally done by DAGCombiner, |
4292 | // but we need this case for bitcasts that are created during lowering |
4293 | // and which are then lowered themselves. |
4294 | if (auto *LoadN = dyn_cast<LoadSDNode>(Val&: In)) |
4295 | if (ISD::isNormalLoad(N: LoadN)) { |
4296 | SDValue NewLoad = DAG.getLoad(VT: ResVT, dl: DL, Chain: LoadN->getChain(), |
4297 | Ptr: LoadN->getBasePtr(), MMO: LoadN->getMemOperand()); |
4298 | // Update the chain uses. |
4299 | DAG.ReplaceAllUsesOfValueWith(From: SDValue(LoadN, 1), To: NewLoad.getValue(R: 1)); |
4300 | return NewLoad; |
4301 | } |
4302 | |
4303 | if (InVT == MVT::i32 && ResVT == MVT::f32) { |
4304 | SDValue In64; |
4305 | if (Subtarget.hasHighWord()) { |
4306 | SDNode *U64 = DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, |
4307 | VT: MVT::i64); |
4308 | In64 = DAG.getTargetInsertSubreg(SRIdx: SystemZ::subreg_h32, DL, |
4309 | VT: MVT::i64, Operand: SDValue(U64, 0), Subreg: In); |
4310 | } else { |
4311 | In64 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: In); |
4312 | In64 = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, N1: In64, |
4313 | N2: DAG.getConstant(Val: 32, DL, VT: MVT::i64)); |
4314 | } |
4315 | SDValue Out64 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::f64, Operand: In64); |
4316 | return DAG.getTargetExtractSubreg(SRIdx: SystemZ::subreg_h32, |
4317 | DL, VT: MVT::f32, Operand: Out64); |
4318 | } |
4319 | if (InVT == MVT::f32 && ResVT == MVT::i32) { |
4320 | SDNode *U64 = DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::f64); |
4321 | SDValue In64 = DAG.getTargetInsertSubreg(SRIdx: SystemZ::subreg_h32, DL, |
4322 | VT: MVT::f64, Operand: SDValue(U64, 0), Subreg: In); |
4323 | SDValue Out64 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i64, Operand: In64); |
4324 | if (Subtarget.hasHighWord()) |
4325 | return DAG.getTargetExtractSubreg(SRIdx: SystemZ::subreg_h32, DL, |
4326 | VT: MVT::i32, Operand: Out64); |
4327 | SDValue Shift = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: Out64, |
4328 | N2: DAG.getConstant(Val: 32, DL, VT: MVT::i64)); |
4329 | return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Shift); |
4330 | } |
4331 | llvm_unreachable("Unexpected bitcast combination" ); |
4332 | } |
4333 | |
4334 | SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, |
4335 | SelectionDAG &DAG) const { |
4336 | |
4337 | if (Subtarget.isTargetXPLINK64()) |
4338 | return lowerVASTART_XPLINK(Op, DAG); |
4339 | else |
4340 | return lowerVASTART_ELF(Op, DAG); |
4341 | } |
4342 | |
4343 | SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op, |
4344 | SelectionDAG &DAG) const { |
4345 | MachineFunction &MF = DAG.getMachineFunction(); |
4346 | SystemZMachineFunctionInfo *FuncInfo = |
4347 | MF.getInfo<SystemZMachineFunctionInfo>(); |
4348 | |
4349 | SDLoc DL(Op); |
4350 | |
4351 | // vastart just stores the address of the VarArgsFrameIndex slot into the |
4352 | // memory location argument. |
4353 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
4354 | SDValue FR = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT); |
4355 | const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue(); |
4356 | return DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL, Val: FR, Ptr: Op.getOperand(i: 1), |
4357 | PtrInfo: MachinePointerInfo(SV)); |
4358 | } |
4359 | |
4360 | SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op, |
4361 | SelectionDAG &DAG) const { |
4362 | MachineFunction &MF = DAG.getMachineFunction(); |
4363 | SystemZMachineFunctionInfo *FuncInfo = |
4364 | MF.getInfo<SystemZMachineFunctionInfo>(); |
4365 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
4366 | |
4367 | SDValue Chain = Op.getOperand(i: 0); |
4368 | SDValue Addr = Op.getOperand(i: 1); |
4369 | const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue(); |
4370 | SDLoc DL(Op); |
4371 | |
4372 | // The initial values of each field. |
4373 | const unsigned NumFields = 4; |
4374 | SDValue Fields[NumFields] = { |
4375 | DAG.getConstant(Val: FuncInfo->getVarArgsFirstGPR(), DL, VT: PtrVT), |
4376 | DAG.getConstant(Val: FuncInfo->getVarArgsFirstFPR(), DL, VT: PtrVT), |
4377 | DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT), |
4378 | DAG.getFrameIndex(FI: FuncInfo->getRegSaveFrameIndex(), VT: PtrVT) |
4379 | }; |
4380 | |
4381 | // Store each field into its respective slot. |
4382 | SDValue MemOps[NumFields]; |
4383 | unsigned Offset = 0; |
4384 | for (unsigned I = 0; I < NumFields; ++I) { |
4385 | SDValue FieldAddr = Addr; |
4386 | if (Offset != 0) |
4387 | FieldAddr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: FieldAddr, |
4388 | N2: DAG.getIntPtrConstant(Val: Offset, DL)); |
4389 | MemOps[I] = DAG.getStore(Chain, dl: DL, Val: Fields[I], Ptr: FieldAddr, |
4390 | PtrInfo: MachinePointerInfo(SV, Offset)); |
4391 | Offset += 8; |
4392 | } |
4393 | return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOps); |
4394 | } |
4395 | |
4396 | SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, |
4397 | SelectionDAG &DAG) const { |
4398 | SDValue Chain = Op.getOperand(i: 0); |
4399 | SDValue DstPtr = Op.getOperand(i: 1); |
4400 | SDValue SrcPtr = Op.getOperand(i: 2); |
4401 | const Value *DstSV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 3))->getValue(); |
4402 | const Value *SrcSV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 4))->getValue(); |
4403 | SDLoc DL(Op); |
4404 | |
4405 | uint32_t Sz = |
4406 | Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(AS: 0) : 32; |
4407 | return DAG.getMemcpy(Chain, dl: DL, Dst: DstPtr, Src: SrcPtr, Size: DAG.getIntPtrConstant(Val: Sz, DL), |
4408 | Alignment: Align(8), /*isVolatile*/ isVol: false, /*AlwaysInline*/ false, |
4409 | /*CI=*/nullptr, OverrideTailCall: std::nullopt, DstPtrInfo: MachinePointerInfo(DstSV), |
4410 | SrcPtrInfo: MachinePointerInfo(SrcSV)); |
4411 | } |
4412 | |
4413 | SDValue |
4414 | SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op, |
4415 | SelectionDAG &DAG) const { |
4416 | if (Subtarget.isTargetXPLINK64()) |
4417 | return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG); |
4418 | else |
4419 | return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG); |
4420 | } |
4421 | |
4422 | SDValue |
4423 | SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op, |
4424 | SelectionDAG &DAG) const { |
4425 | const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); |
4426 | MachineFunction &MF = DAG.getMachineFunction(); |
4427 | bool RealignOpt = !MF.getFunction().hasFnAttribute(Kind: "no-realign-stack" ); |
4428 | SDValue Chain = Op.getOperand(i: 0); |
4429 | SDValue Size = Op.getOperand(i: 1); |
4430 | SDValue Align = Op.getOperand(i: 2); |
4431 | SDLoc DL(Op); |
4432 | |
4433 | // If user has set the no alignment function attribute, ignore |
4434 | // alloca alignments. |
4435 | uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0); |
4436 | |
4437 | uint64_t StackAlign = TFI->getStackAlignment(); |
4438 | uint64_t RequiredAlign = std::max(a: AlignVal, b: StackAlign); |
4439 | uint64_t = RequiredAlign - StackAlign; |
4440 | |
4441 | SDValue NeededSpace = Size; |
4442 | |
4443 | // Add extra space for alignment if needed. |
4444 | EVT PtrVT = getPointerTy(DL: MF.getDataLayout()); |
4445 | if (ExtraAlignSpace) |
4446 | NeededSpace = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: NeededSpace, |
4447 | N2: DAG.getConstant(Val: ExtraAlignSpace, DL, VT: PtrVT)); |
4448 | |
4449 | bool IsSigned = false; |
4450 | bool DoesNotReturn = false; |
4451 | bool IsReturnValueUsed = false; |
4452 | EVT VT = Op.getValueType(); |
4453 | SDValue AllocaCall = |
4454 | makeExternalCall(Chain, DAG, CalleeName: "@@ALCAXP" , RetVT: VT, Ops: ArrayRef(NeededSpace), |
4455 | CallConv: CallingConv::C, IsSigned, DL, DoesNotReturn, |
4456 | IsReturnValueUsed) |
4457 | .first; |
4458 | |
4459 | // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue |
4460 | // to end of call in order to ensure it isn't broken up from the call |
4461 | // sequence. |
4462 | auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); |
4463 | Register SPReg = Regs.getStackPointerRegister(); |
4464 | Chain = AllocaCall.getValue(R: 1); |
4465 | SDValue Glue = AllocaCall.getValue(R: 2); |
4466 | SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, dl: DL, Reg: SPReg, VT: PtrVT, Glue); |
4467 | Chain = NewSPRegNode.getValue(R: 1); |
4468 | |
4469 | MVT PtrMVT = getPointerMemTy(DL: MF.getDataLayout()); |
4470 | SDValue ArgAdjust = DAG.getNode(Opcode: SystemZISD::ADJDYNALLOC, DL, VT: PtrMVT); |
4471 | SDValue Result = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrMVT, N1: NewSPRegNode, N2: ArgAdjust); |
4472 | |
4473 | // Dynamically realign if needed. |
4474 | if (ExtraAlignSpace) { |
4475 | Result = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Result, |
4476 | N2: DAG.getConstant(Val: ExtraAlignSpace, DL, VT: PtrVT)); |
4477 | Result = DAG.getNode(Opcode: ISD::AND, DL, VT: PtrVT, N1: Result, |
4478 | N2: DAG.getConstant(Val: ~(RequiredAlign - 1), DL, VT: PtrVT)); |
4479 | } |
4480 | |
4481 | SDValue Ops[2] = {Result, Chain}; |
4482 | return DAG.getMergeValues(Ops, dl: DL); |
4483 | } |
4484 | |
4485 | SDValue |
4486 | SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op, |
4487 | SelectionDAG &DAG) const { |
4488 | const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); |
4489 | MachineFunction &MF = DAG.getMachineFunction(); |
4490 | bool RealignOpt = !MF.getFunction().hasFnAttribute(Kind: "no-realign-stack" ); |
4491 | bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain(); |
4492 | |
4493 | SDValue Chain = Op.getOperand(i: 0); |
4494 | SDValue Size = Op.getOperand(i: 1); |
4495 | SDValue Align = Op.getOperand(i: 2); |
4496 | SDLoc DL(Op); |
4497 | |
4498 | // If user has set the no alignment function attribute, ignore |
4499 | // alloca alignments. |
4500 | uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0); |
4501 | |
4502 | uint64_t StackAlign = TFI->getStackAlignment(); |
4503 | uint64_t RequiredAlign = std::max(a: AlignVal, b: StackAlign); |
4504 | uint64_t = RequiredAlign - StackAlign; |
4505 | |
4506 | Register SPReg = getStackPointerRegisterToSaveRestore(); |
4507 | SDValue NeededSpace = Size; |
4508 | |
4509 | // Get a reference to the stack pointer. |
4510 | SDValue OldSP = DAG.getCopyFromReg(Chain, dl: DL, Reg: SPReg, VT: MVT::i64); |
4511 | |
4512 | // If we need a backchain, save it now. |
4513 | SDValue Backchain; |
4514 | if (StoreBackchain) |
4515 | Backchain = DAG.getLoad(VT: MVT::i64, dl: DL, Chain, Ptr: getBackchainAddress(SP: OldSP, DAG), |
4516 | PtrInfo: MachinePointerInfo()); |
4517 | |
4518 | // Add extra space for alignment if needed. |
4519 | if (ExtraAlignSpace) |
4520 | NeededSpace = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: NeededSpace, |
4521 | N2: DAG.getConstant(Val: ExtraAlignSpace, DL, VT: MVT::i64)); |
4522 | |
4523 | // Get the new stack pointer value. |
4524 | SDValue NewSP; |
4525 | if (hasInlineStackProbe(MF)) { |
4526 | NewSP = DAG.getNode(Opcode: SystemZISD::PROBED_ALLOCA, DL, |
4527 | VTList: DAG.getVTList(VT1: MVT::i64, VT2: MVT::Other), N1: Chain, N2: OldSP, N3: NeededSpace); |
4528 | Chain = NewSP.getValue(R: 1); |
4529 | } |
4530 | else { |
4531 | NewSP = DAG.getNode(Opcode: ISD::SUB, DL, VT: MVT::i64, N1: OldSP, N2: NeededSpace); |
4532 | // Copy the new stack pointer back. |
4533 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: SPReg, N: NewSP); |
4534 | } |
4535 | |
4536 | // The allocated data lives above the 160 bytes allocated for the standard |
4537 | // frame, plus any outgoing stack arguments. We don't know how much that |
4538 | // amounts to yet, so emit a special ADJDYNALLOC placeholder. |
4539 | SDValue ArgAdjust = DAG.getNode(Opcode: SystemZISD::ADJDYNALLOC, DL, VT: MVT::i64); |
4540 | SDValue Result = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: NewSP, N2: ArgAdjust); |
4541 | |
4542 | // Dynamically realign if needed. |
4543 | if (RequiredAlign > StackAlign) { |
4544 | Result = |
4545 | DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: Result, |
4546 | N2: DAG.getConstant(Val: ExtraAlignSpace, DL, VT: MVT::i64)); |
4547 | Result = |
4548 | DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, N1: Result, |
4549 | N2: DAG.getConstant(Val: ~(RequiredAlign - 1), DL, VT: MVT::i64)); |
4550 | } |
4551 | |
4552 | if (StoreBackchain) |
4553 | Chain = DAG.getStore(Chain, dl: DL, Val: Backchain, Ptr: getBackchainAddress(SP: NewSP, DAG), |
4554 | PtrInfo: MachinePointerInfo()); |
4555 | |
4556 | SDValue Ops[2] = { Result, Chain }; |
4557 | return DAG.getMergeValues(Ops, dl: DL); |
4558 | } |
4559 | |
4560 | SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET( |
4561 | SDValue Op, SelectionDAG &DAG) const { |
4562 | SDLoc DL(Op); |
4563 | |
4564 | return DAG.getNode(Opcode: SystemZISD::ADJDYNALLOC, DL, VT: MVT::i64); |
4565 | } |
4566 | |
4567 | SDValue SystemZTargetLowering::lowerMULH(SDValue Op, |
4568 | SelectionDAG &DAG, |
4569 | unsigned Opcode) const { |
4570 | EVT VT = Op.getValueType(); |
4571 | SDLoc DL(Op); |
4572 | SDValue Even, Odd; |
4573 | |
4574 | // This custom expander is only used on z17 and later for 64-bit types. |
4575 | assert(!is32Bit(VT)); |
4576 | assert(Subtarget.hasMiscellaneousExtensions2()); |
4577 | |
4578 | // SystemZISD::xMUL_LOHI returns the low result in the odd register and |
4579 | // the high result in the even register. Return the latter. |
4580 | lowerGR128Binary(DAG, DL, VT, Opcode, |
4581 | Op0: Op.getOperand(i: 0), Op1: Op.getOperand(i: 1), Even, Odd); |
4582 | return Even; |
4583 | } |
4584 | |
4585 | SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, |
4586 | SelectionDAG &DAG) const { |
4587 | EVT VT = Op.getValueType(); |
4588 | SDLoc DL(Op); |
4589 | SDValue Ops[2]; |
4590 | if (is32Bit(VT)) |
4591 | // Just do a normal 64-bit multiplication and extract the results. |
4592 | // We define this so that it can be used for constant division. |
4593 | lowerMUL_LOHI32(DAG, DL, Extend: ISD::SIGN_EXTEND, Op0: Op.getOperand(i: 0), |
4594 | Op1: Op.getOperand(i: 1), Hi&: Ops[1], Lo&: Ops[0]); |
4595 | else if (Subtarget.hasMiscellaneousExtensions2()) |
4596 | // SystemZISD::SMUL_LOHI returns the low result in the odd register and |
4597 | // the high result in the even register. ISD::SMUL_LOHI is defined to |
4598 | // return the low half first, so the results are in reverse order. |
4599 | lowerGR128Binary(DAG, DL, VT, Opcode: SystemZISD::SMUL_LOHI, |
4600 | Op0: Op.getOperand(i: 0), Op1: Op.getOperand(i: 1), Even&: Ops[1], Odd&: Ops[0]); |
4601 | else { |
4602 | // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI: |
4603 | // |
4604 | // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64) |
4605 | // |
4606 | // but using the fact that the upper halves are either all zeros |
4607 | // or all ones: |
4608 | // |
4609 | // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64) |
4610 | // |
4611 | // and grouping the right terms together since they are quicker than the |
4612 | // multiplication: |
4613 | // |
4614 | // (ll * rl) - (((lh & rl) + (ll & rh)) << 64) |
4615 | SDValue C63 = DAG.getConstant(Val: 63, DL, VT: MVT::i64); |
4616 | SDValue LL = Op.getOperand(i: 0); |
4617 | SDValue RL = Op.getOperand(i: 1); |
4618 | SDValue LH = DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: LL, N2: C63); |
4619 | SDValue RH = DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: RL, N2: C63); |
4620 | // SystemZISD::UMUL_LOHI returns the low result in the odd register and |
4621 | // the high result in the even register. ISD::SMUL_LOHI is defined to |
4622 | // return the low half first, so the results are in reverse order. |
4623 | lowerGR128Binary(DAG, DL, VT, Opcode: SystemZISD::UMUL_LOHI, |
4624 | Op0: LL, Op1: RL, Even&: Ops[1], Odd&: Ops[0]); |
4625 | SDValue NegLLTimesRH = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: LL, N2: RH); |
4626 | SDValue NegLHTimesRL = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: LH, N2: RL); |
4627 | SDValue NegSum = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: NegLLTimesRH, N2: NegLHTimesRL); |
4628 | Ops[1] = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Ops[1], N2: NegSum); |
4629 | } |
4630 | return DAG.getMergeValues(Ops, dl: DL); |
4631 | } |
4632 | |
4633 | SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, |
4634 | SelectionDAG &DAG) const { |
4635 | EVT VT = Op.getValueType(); |
4636 | SDLoc DL(Op); |
4637 | SDValue Ops[2]; |
4638 | if (is32Bit(VT)) |
4639 | // Just do a normal 64-bit multiplication and extract the results. |
4640 | // We define this so that it can be used for constant division. |
4641 | lowerMUL_LOHI32(DAG, DL, Extend: ISD::ZERO_EXTEND, Op0: Op.getOperand(i: 0), |
4642 | Op1: Op.getOperand(i: 1), Hi&: Ops[1], Lo&: Ops[0]); |
4643 | else |
4644 | // SystemZISD::UMUL_LOHI returns the low result in the odd register and |
4645 | // the high result in the even register. ISD::UMUL_LOHI is defined to |
4646 | // return the low half first, so the results are in reverse order. |
4647 | lowerGR128Binary(DAG, DL, VT, Opcode: SystemZISD::UMUL_LOHI, |
4648 | Op0: Op.getOperand(i: 0), Op1: Op.getOperand(i: 1), Even&: Ops[1], Odd&: Ops[0]); |
4649 | return DAG.getMergeValues(Ops, dl: DL); |
4650 | } |
4651 | |
4652 | SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, |
4653 | SelectionDAG &DAG) const { |
4654 | SDValue Op0 = Op.getOperand(i: 0); |
4655 | SDValue Op1 = Op.getOperand(i: 1); |
4656 | EVT VT = Op.getValueType(); |
4657 | SDLoc DL(Op); |
4658 | |
4659 | // We use DSGF for 32-bit division. This means the first operand must |
4660 | // always be 64-bit, and the second operand should be 32-bit whenever |
4661 | // that is possible, to improve performance. |
4662 | if (is32Bit(VT)) |
4663 | Op0 = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: MVT::i64, Operand: Op0); |
4664 | else if (DAG.ComputeNumSignBits(Op: Op1) > 32) |
4665 | Op1 = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Op1); |
4666 | |
4667 | // DSG(F) returns the remainder in the even register and the |
4668 | // quotient in the odd register. |
4669 | SDValue Ops[2]; |
4670 | lowerGR128Binary(DAG, DL, VT, Opcode: SystemZISD::SDIVREM, Op0, Op1, Even&: Ops[1], Odd&: Ops[0]); |
4671 | return DAG.getMergeValues(Ops, dl: DL); |
4672 | } |
4673 | |
4674 | SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, |
4675 | SelectionDAG &DAG) const { |
4676 | EVT VT = Op.getValueType(); |
4677 | SDLoc DL(Op); |
4678 | |
4679 | // DL(G) returns the remainder in the even register and the |
4680 | // quotient in the odd register. |
4681 | SDValue Ops[2]; |
4682 | lowerGR128Binary(DAG, DL, VT, Opcode: SystemZISD::UDIVREM, |
4683 | Op0: Op.getOperand(i: 0), Op1: Op.getOperand(i: 1), Even&: Ops[1], Odd&: Ops[0]); |
4684 | return DAG.getMergeValues(Ops, dl: DL); |
4685 | } |
4686 | |
4687 | SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { |
4688 | assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation" ); |
4689 | |
4690 | // Get the known-zero masks for each operand. |
4691 | SDValue Ops[] = {Op.getOperand(i: 0), Op.getOperand(i: 1)}; |
4692 | KnownBits Known[2] = {DAG.computeKnownBits(Op: Ops[0]), |
4693 | DAG.computeKnownBits(Op: Ops[1])}; |
4694 | |
4695 | // See if the upper 32 bits of one operand and the lower 32 bits of the |
4696 | // other are known zero. They are the low and high operands respectively. |
4697 | uint64_t Masks[] = { Known[0].Zero.getZExtValue(), |
4698 | Known[1].Zero.getZExtValue() }; |
4699 | unsigned High, Low; |
4700 | if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff) |
4701 | High = 1, Low = 0; |
4702 | else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff) |
4703 | High = 0, Low = 1; |
4704 | else |
4705 | return Op; |
4706 | |
4707 | SDValue LowOp = Ops[Low]; |
4708 | SDValue HighOp = Ops[High]; |
4709 | |
4710 | // If the high part is a constant, we're better off using IILH. |
4711 | if (HighOp.getOpcode() == ISD::Constant) |
4712 | return Op; |
4713 | |
4714 | // If the low part is a constant that is outside the range of LHI, |
4715 | // then we're better off using IILF. |
4716 | if (LowOp.getOpcode() == ISD::Constant) { |
4717 | int64_t Value = int32_t(LowOp->getAsZExtVal()); |
4718 | if (!isInt<16>(x: Value)) |
4719 | return Op; |
4720 | } |
4721 | |
4722 | // Check whether the high part is an AND that doesn't change the |
4723 | // high 32 bits and just masks out low bits. We can skip it if so. |
4724 | if (HighOp.getOpcode() == ISD::AND && |
4725 | HighOp.getOperand(i: 1).getOpcode() == ISD::Constant) { |
4726 | SDValue HighOp0 = HighOp.getOperand(i: 0); |
4727 | uint64_t Mask = HighOp.getConstantOperandVal(i: 1); |
4728 | if (DAG.MaskedValueIsZero(Op: HighOp0, Mask: APInt(64, ~(Mask | 0xffffffff)))) |
4729 | HighOp = HighOp0; |
4730 | } |
4731 | |
4732 | // Take advantage of the fact that all GR32 operations only change the |
4733 | // low 32 bits by truncating Low to an i32 and inserting it directly |
4734 | // using a subreg. The interesting cases are those where the truncation |
4735 | // can be folded. |
4736 | SDLoc DL(Op); |
4737 | SDValue Low32 = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: LowOp); |
4738 | return DAG.getTargetInsertSubreg(SRIdx: SystemZ::subreg_l32, DL, |
4739 | VT: MVT::i64, Operand: HighOp, Subreg: Low32); |
4740 | } |
4741 | |
4742 | // Lower SADDO/SSUBO/UADDO/USUBO nodes. |
4743 | SDValue SystemZTargetLowering::lowerXALUO(SDValue Op, |
4744 | SelectionDAG &DAG) const { |
4745 | SDNode *N = Op.getNode(); |
4746 | SDValue LHS = N->getOperand(Num: 0); |
4747 | SDValue RHS = N->getOperand(Num: 1); |
4748 | SDLoc DL(N); |
4749 | |
4750 | if (N->getValueType(ResNo: 0) == MVT::i128) { |
4751 | unsigned BaseOp = 0; |
4752 | unsigned FlagOp = 0; |
4753 | bool IsBorrow = false; |
4754 | switch (Op.getOpcode()) { |
4755 | default: llvm_unreachable("Unknown instruction!" ); |
4756 | case ISD::UADDO: |
4757 | BaseOp = ISD::ADD; |
4758 | FlagOp = SystemZISD::VACC; |
4759 | break; |
4760 | case ISD::USUBO: |
4761 | BaseOp = ISD::SUB; |
4762 | FlagOp = SystemZISD::VSCBI; |
4763 | IsBorrow = true; |
4764 | break; |
4765 | } |
4766 | SDValue Result = DAG.getNode(Opcode: BaseOp, DL, VT: MVT::i128, N1: LHS, N2: RHS); |
4767 | SDValue Flag = DAG.getNode(Opcode: FlagOp, DL, VT: MVT::i128, N1: LHS, N2: RHS); |
4768 | Flag = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: MVT::i128, N1: Flag, |
4769 | N2: DAG.getValueType(MVT::i1)); |
4770 | Flag = DAG.getZExtOrTrunc(Op: Flag, DL, VT: N->getValueType(ResNo: 1)); |
4771 | if (IsBorrow) |
4772 | Flag = DAG.getNode(Opcode: ISD::XOR, DL, VT: Flag.getValueType(), |
4773 | N1: Flag, N2: DAG.getConstant(Val: 1, DL, VT: Flag.getValueType())); |
4774 | return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: N->getVTList(), N1: Result, N2: Flag); |
4775 | } |
4776 | |
4777 | unsigned BaseOp = 0; |
4778 | unsigned CCValid = 0; |
4779 | unsigned CCMask = 0; |
4780 | |
4781 | switch (Op.getOpcode()) { |
4782 | default: llvm_unreachable("Unknown instruction!" ); |
4783 | case ISD::SADDO: |
4784 | BaseOp = SystemZISD::SADDO; |
4785 | CCValid = SystemZ::CCMASK_ARITH; |
4786 | CCMask = SystemZ::CCMASK_ARITH_OVERFLOW; |
4787 | break; |
4788 | case ISD::SSUBO: |
4789 | BaseOp = SystemZISD::SSUBO; |
4790 | CCValid = SystemZ::CCMASK_ARITH; |
4791 | CCMask = SystemZ::CCMASK_ARITH_OVERFLOW; |
4792 | break; |
4793 | case ISD::UADDO: |
4794 | BaseOp = SystemZISD::UADDO; |
4795 | CCValid = SystemZ::CCMASK_LOGICAL; |
4796 | CCMask = SystemZ::CCMASK_LOGICAL_CARRY; |
4797 | break; |
4798 | case ISD::USUBO: |
4799 | BaseOp = SystemZISD::USUBO; |
4800 | CCValid = SystemZ::CCMASK_LOGICAL; |
4801 | CCMask = SystemZ::CCMASK_LOGICAL_BORROW; |
4802 | break; |
4803 | } |
4804 | |
4805 | SDVTList VTs = DAG.getVTList(VT1: N->getValueType(ResNo: 0), VT2: MVT::i32); |
4806 | SDValue Result = DAG.getNode(Opcode: BaseOp, DL, VTList: VTs, N1: LHS, N2: RHS); |
4807 | |
4808 | SDValue SetCC = emitSETCC(DAG, DL, CCReg: Result.getValue(R: 1), CCValid, CCMask); |
4809 | if (N->getValueType(ResNo: 1) == MVT::i1) |
4810 | SetCC = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i1, Operand: SetCC); |
4811 | |
4812 | return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: N->getVTList(), N1: Result, N2: SetCC); |
4813 | } |
4814 | |
4815 | static bool isAddCarryChain(SDValue Carry) { |
4816 | while (Carry.getOpcode() == ISD::UADDO_CARRY && |
4817 | Carry->getValueType(ResNo: 0) != MVT::i128) |
4818 | Carry = Carry.getOperand(i: 2); |
4819 | return Carry.getOpcode() == ISD::UADDO && |
4820 | Carry->getValueType(ResNo: 0) != MVT::i128; |
4821 | } |
4822 | |
4823 | static bool isSubBorrowChain(SDValue Carry) { |
4824 | while (Carry.getOpcode() == ISD::USUBO_CARRY && |
4825 | Carry->getValueType(ResNo: 0) != MVT::i128) |
4826 | Carry = Carry.getOperand(i: 2); |
4827 | return Carry.getOpcode() == ISD::USUBO && |
4828 | Carry->getValueType(ResNo: 0) != MVT::i128; |
4829 | } |
4830 | |
4831 | // Lower UADDO_CARRY/USUBO_CARRY nodes. |
4832 | SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op, |
4833 | SelectionDAG &DAG) const { |
4834 | |
4835 | SDNode *N = Op.getNode(); |
4836 | MVT VT = N->getSimpleValueType(ResNo: 0); |
4837 | |
4838 | // Let legalize expand this if it isn't a legal type yet. |
4839 | if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) |
4840 | return SDValue(); |
4841 | |
4842 | SDValue LHS = N->getOperand(Num: 0); |
4843 | SDValue RHS = N->getOperand(Num: 1); |
4844 | SDValue Carry = Op.getOperand(i: 2); |
4845 | SDLoc DL(N); |
4846 | |
4847 | if (VT == MVT::i128) { |
4848 | unsigned BaseOp = 0; |
4849 | unsigned FlagOp = 0; |
4850 | bool IsBorrow = false; |
4851 | switch (Op.getOpcode()) { |
4852 | default: llvm_unreachable("Unknown instruction!" ); |
4853 | case ISD::UADDO_CARRY: |
4854 | BaseOp = SystemZISD::VAC; |
4855 | FlagOp = SystemZISD::VACCC; |
4856 | break; |
4857 | case ISD::USUBO_CARRY: |
4858 | BaseOp = SystemZISD::VSBI; |
4859 | FlagOp = SystemZISD::VSBCBI; |
4860 | IsBorrow = true; |
4861 | break; |
4862 | } |
4863 | if (IsBorrow) |
4864 | Carry = DAG.getNode(Opcode: ISD::XOR, DL, VT: Carry.getValueType(), |
4865 | N1: Carry, N2: DAG.getConstant(Val: 1, DL, VT: Carry.getValueType())); |
4866 | Carry = DAG.getZExtOrTrunc(Op: Carry, DL, VT: MVT::i128); |
4867 | SDValue Result = DAG.getNode(Opcode: BaseOp, DL, VT: MVT::i128, N1: LHS, N2: RHS, N3: Carry); |
4868 | SDValue Flag = DAG.getNode(Opcode: FlagOp, DL, VT: MVT::i128, N1: LHS, N2: RHS, N3: Carry); |
4869 | Flag = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: MVT::i128, N1: Flag, |
4870 | N2: DAG.getValueType(MVT::i1)); |
4871 | Flag = DAG.getZExtOrTrunc(Op: Flag, DL, VT: N->getValueType(ResNo: 1)); |
4872 | if (IsBorrow) |
4873 | Flag = DAG.getNode(Opcode: ISD::XOR, DL, VT: Flag.getValueType(), |
4874 | N1: Flag, N2: DAG.getConstant(Val: 1, DL, VT: Flag.getValueType())); |
4875 | return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: N->getVTList(), N1: Result, N2: Flag); |
4876 | } |
4877 | |
4878 | unsigned BaseOp = 0; |
4879 | unsigned CCValid = 0; |
4880 | unsigned CCMask = 0; |
4881 | |
4882 | switch (Op.getOpcode()) { |
4883 | default: llvm_unreachable("Unknown instruction!" ); |
4884 | case ISD::UADDO_CARRY: |
4885 | if (!isAddCarryChain(Carry)) |
4886 | return SDValue(); |
4887 | |
4888 | BaseOp = SystemZISD::ADDCARRY; |
4889 | CCValid = SystemZ::CCMASK_LOGICAL; |
4890 | CCMask = SystemZ::CCMASK_LOGICAL_CARRY; |
4891 | break; |
4892 | case ISD::USUBO_CARRY: |
4893 | if (!isSubBorrowChain(Carry)) |
4894 | return SDValue(); |
4895 | |
4896 | BaseOp = SystemZISD::SUBCARRY; |
4897 | CCValid = SystemZ::CCMASK_LOGICAL; |
4898 | CCMask = SystemZ::CCMASK_LOGICAL_BORROW; |
4899 | break; |
4900 | } |
4901 | |
4902 | // Set the condition code from the carry flag. |
4903 | Carry = DAG.getNode(Opcode: SystemZISD::GET_CCMASK, DL, VT: MVT::i32, N1: Carry, |
4904 | N2: DAG.getConstant(Val: CCValid, DL, VT: MVT::i32), |
4905 | N3: DAG.getConstant(Val: CCMask, DL, VT: MVT::i32)); |
4906 | |
4907 | SDVTList VTs = DAG.getVTList(VT1: VT, VT2: MVT::i32); |
4908 | SDValue Result = DAG.getNode(Opcode: BaseOp, DL, VTList: VTs, N1: LHS, N2: RHS, N3: Carry); |
4909 | |
4910 | SDValue SetCC = emitSETCC(DAG, DL, CCReg: Result.getValue(R: 1), CCValid, CCMask); |
4911 | if (N->getValueType(ResNo: 1) == MVT::i1) |
4912 | SetCC = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i1, Operand: SetCC); |
4913 | |
4914 | return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: N->getVTList(), N1: Result, N2: SetCC); |
4915 | } |
4916 | |
4917 | SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, |
4918 | SelectionDAG &DAG) const { |
4919 | EVT VT = Op.getValueType(); |
4920 | SDLoc DL(Op); |
4921 | Op = Op.getOperand(i: 0); |
4922 | |
4923 | if (VT.getScalarSizeInBits() == 128) { |
4924 | Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v2i64, Operand: Op); |
4925 | Op = DAG.getNode(Opcode: ISD::CTPOP, DL, VT: MVT::v2i64, Operand: Op); |
4926 | SDValue Tmp = DAG.getSplatBuildVector(VT: MVT::v2i64, DL, |
4927 | Op: DAG.getConstant(Val: 0, DL, VT: MVT::i64)); |
4928 | Op = DAG.getNode(Opcode: SystemZISD::VSUM, DL, VT, N1: Op, N2: Tmp); |
4929 | return Op; |
4930 | } |
4931 | |
4932 | // Handle vector types via VPOPCT. |
4933 | if (VT.isVector()) { |
4934 | Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v16i8, Operand: Op); |
4935 | Op = DAG.getNode(Opcode: SystemZISD::POPCNT, DL, VT: MVT::v16i8, Operand: Op); |
4936 | switch (VT.getScalarSizeInBits()) { |
4937 | case 8: |
4938 | break; |
4939 | case 16: { |
4940 | Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op); |
4941 | SDValue Shift = DAG.getConstant(Val: 8, DL, VT: MVT::i32); |
4942 | SDValue Tmp = DAG.getNode(Opcode: SystemZISD::VSHL_BY_SCALAR, DL, VT, N1: Op, N2: Shift); |
4943 | Op = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op, N2: Tmp); |
4944 | Op = DAG.getNode(Opcode: SystemZISD::VSRL_BY_SCALAR, DL, VT, N1: Op, N2: Shift); |
4945 | break; |
4946 | } |
4947 | case 32: { |
4948 | SDValue Tmp = DAG.getSplatBuildVector(VT: MVT::v16i8, DL, |
4949 | Op: DAG.getConstant(Val: 0, DL, VT: MVT::i32)); |
4950 | Op = DAG.getNode(Opcode: SystemZISD::VSUM, DL, VT, N1: Op, N2: Tmp); |
4951 | break; |
4952 | } |
4953 | case 64: { |
4954 | SDValue Tmp = DAG.getSplatBuildVector(VT: MVT::v16i8, DL, |
4955 | Op: DAG.getConstant(Val: 0, DL, VT: MVT::i32)); |
4956 | Op = DAG.getNode(Opcode: SystemZISD::VSUM, DL, VT: MVT::v4i32, N1: Op, N2: Tmp); |
4957 | Op = DAG.getNode(Opcode: SystemZISD::VSUM, DL, VT, N1: Op, N2: Tmp); |
4958 | break; |
4959 | } |
4960 | default: |
4961 | llvm_unreachable("Unexpected type" ); |
4962 | } |
4963 | return Op; |
4964 | } |
4965 | |
4966 | // Get the known-zero mask for the operand. |
4967 | KnownBits Known = DAG.computeKnownBits(Op); |
4968 | unsigned NumSignificantBits = Known.getMaxValue().getActiveBits(); |
4969 | if (NumSignificantBits == 0) |
4970 | return DAG.getConstant(Val: 0, DL, VT); |
4971 | |
4972 | // Skip known-zero high parts of the operand. |
4973 | int64_t OrigBitSize = VT.getSizeInBits(); |
4974 | int64_t BitSize = llvm::bit_ceil(Value: NumSignificantBits); |
4975 | BitSize = std::min(a: BitSize, b: OrigBitSize); |
4976 | |
4977 | // The POPCNT instruction counts the number of bits in each byte. |
4978 | Op = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op); |
4979 | Op = DAG.getNode(Opcode: SystemZISD::POPCNT, DL, VT: MVT::i64, Operand: Op); |
4980 | Op = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Op); |
4981 | |
4982 | // Add up per-byte counts in a binary tree. All bits of Op at |
4983 | // position larger than BitSize remain zero throughout. |
4984 | for (int64_t I = BitSize / 2; I >= 8; I = I / 2) { |
4985 | SDValue Tmp = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Op, N2: DAG.getConstant(Val: I, DL, VT)); |
4986 | if (BitSize != OrigBitSize) |
4987 | Tmp = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Tmp, |
4988 | N2: DAG.getConstant(Val: ((uint64_t)1 << BitSize) - 1, DL, VT)); |
4989 | Op = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op, N2: Tmp); |
4990 | } |
4991 | |
4992 | // Extract overall result from high byte. |
4993 | if (BitSize > 8) |
4994 | Op = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Op, |
4995 | N2: DAG.getConstant(Val: BitSize - 8, DL, VT)); |
4996 | |
4997 | return Op; |
4998 | } |
4999 | |
5000 | SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, |
5001 | SelectionDAG &DAG) const { |
5002 | SDLoc DL(Op); |
5003 | AtomicOrdering FenceOrdering = |
5004 | static_cast<AtomicOrdering>(Op.getConstantOperandVal(i: 1)); |
5005 | SyncScope::ID FenceSSID = |
5006 | static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: 2)); |
5007 | |
5008 | // The only fence that needs an instruction is a sequentially-consistent |
5009 | // cross-thread fence. |
5010 | if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && |
5011 | FenceSSID == SyncScope::System) { |
5012 | return SDValue(DAG.getMachineNode(Opcode: SystemZ::Serialize, dl: DL, VT: MVT::Other, |
5013 | Op1: Op.getOperand(i: 0)), |
5014 | 0); |
5015 | } |
5016 | |
5017 | // MEMBARRIER is a compiler barrier; it codegens to a no-op. |
5018 | return DAG.getNode(Opcode: ISD::MEMBARRIER, DL, VT: MVT::Other, Operand: Op.getOperand(i: 0)); |
5019 | } |
5020 | |
5021 | SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, |
5022 | SelectionDAG &DAG) const { |
5023 | EVT RegVT = Op.getValueType(); |
5024 | if (RegVT.getSizeInBits() == 128) |
5025 | return lowerATOMIC_LDST_I128(Op, DAG); |
5026 | return lowerLoadF16(Op, DAG); |
5027 | } |
5028 | |
5029 | SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op, |
5030 | SelectionDAG &DAG) const { |
5031 | auto *Node = cast<AtomicSDNode>(Val: Op.getNode()); |
5032 | if (Node->getMemoryVT().getSizeInBits() == 128) |
5033 | return lowerATOMIC_LDST_I128(Op, DAG); |
5034 | return lowerStoreF16(Op, DAG); |
5035 | } |
5036 | |
5037 | SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op, |
5038 | SelectionDAG &DAG) const { |
5039 | auto *Node = cast<AtomicSDNode>(Val: Op.getNode()); |
5040 | assert( |
5041 | (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) && |
5042 | "Only custom lowering i128 or f128." ); |
5043 | // Use same code to handle both legal and non-legal i128 types. |
5044 | SmallVector<SDValue, 2> Results; |
5045 | LowerOperationWrapper(N: Node, Results, DAG); |
5046 | return DAG.getMergeValues(Ops: Results, dl: SDLoc(Op)); |
5047 | } |
5048 | |
5049 | // Prepare for a Compare And Swap for a subword operation. This needs to be |
5050 | // done in memory with 4 bytes at natural alignment. |
5051 | static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, |
5052 | SDValue &AlignedAddr, SDValue &BitShift, |
5053 | SDValue &NegBitShift) { |
5054 | EVT PtrVT = Addr.getValueType(); |
5055 | EVT WideVT = MVT::i32; |
5056 | |
5057 | // Get the address of the containing word. |
5058 | AlignedAddr = DAG.getNode(Opcode: ISD::AND, DL, VT: PtrVT, N1: Addr, |
5059 | N2: DAG.getSignedConstant(Val: -4, DL, VT: PtrVT)); |
5060 | |
5061 | // Get the number of bits that the word must be rotated left in order |
5062 | // to bring the field to the top bits of a GR32. |
5063 | BitShift = DAG.getNode(Opcode: ISD::SHL, DL, VT: PtrVT, N1: Addr, |
5064 | N2: DAG.getConstant(Val: 3, DL, VT: PtrVT)); |
5065 | BitShift = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: WideVT, Operand: BitShift); |
5066 | |
5067 | // Get the complementing shift amount, for rotating a field in the top |
5068 | // bits back to its proper position. |
5069 | NegBitShift = DAG.getNode(Opcode: ISD::SUB, DL, VT: WideVT, |
5070 | N1: DAG.getConstant(Val: 0, DL, VT: WideVT), N2: BitShift); |
5071 | |
5072 | } |
5073 | |
5074 | // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first |
5075 | // two into the fullword ATOMIC_LOADW_* operation given by Opcode. |
5076 | SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, |
5077 | SelectionDAG &DAG, |
5078 | unsigned Opcode) const { |
5079 | auto *Node = cast<AtomicSDNode>(Val: Op.getNode()); |
5080 | |
5081 | // 32-bit operations need no special handling. |
5082 | EVT NarrowVT = Node->getMemoryVT(); |
5083 | EVT WideVT = MVT::i32; |
5084 | if (NarrowVT == WideVT) |
5085 | return Op; |
5086 | |
5087 | int64_t BitSize = NarrowVT.getSizeInBits(); |
5088 | SDValue ChainIn = Node->getChain(); |
5089 | SDValue Addr = Node->getBasePtr(); |
5090 | SDValue Src2 = Node->getVal(); |
5091 | MachineMemOperand *MMO = Node->getMemOperand(); |
5092 | SDLoc DL(Node); |
5093 | |
5094 | // Convert atomic subtracts of constants into additions. |
5095 | if (Opcode == SystemZISD::ATOMIC_LOADW_SUB) |
5096 | if (auto *Const = dyn_cast<ConstantSDNode>(Val&: Src2)) { |
5097 | Opcode = SystemZISD::ATOMIC_LOADW_ADD; |
5098 | Src2 = DAG.getSignedConstant(Val: -Const->getSExtValue(), DL, |
5099 | VT: Src2.getValueType()); |
5100 | } |
5101 | |
5102 | SDValue AlignedAddr, BitShift, NegBitShift; |
5103 | getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift); |
5104 | |
5105 | // Extend the source operand to 32 bits and prepare it for the inner loop. |
5106 | // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other |
5107 | // operations require the source to be shifted in advance. (This shift |
5108 | // can be folded if the source is constant.) For AND and NAND, the lower |
5109 | // bits must be set, while for other opcodes they should be left clear. |
5110 | if (Opcode != SystemZISD::ATOMIC_SWAPW) |
5111 | Src2 = DAG.getNode(Opcode: ISD::SHL, DL, VT: WideVT, N1: Src2, |
5112 | N2: DAG.getConstant(Val: 32 - BitSize, DL, VT: WideVT)); |
5113 | if (Opcode == SystemZISD::ATOMIC_LOADW_AND || |
5114 | Opcode == SystemZISD::ATOMIC_LOADW_NAND) |
5115 | Src2 = DAG.getNode(Opcode: ISD::OR, DL, VT: WideVT, N1: Src2, |
5116 | N2: DAG.getConstant(Val: uint32_t(-1) >> BitSize, DL, VT: WideVT)); |
5117 | |
5118 | // Construct the ATOMIC_LOADW_* node. |
5119 | SDVTList VTList = DAG.getVTList(VT1: WideVT, VT2: MVT::Other); |
5120 | SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift, |
5121 | DAG.getConstant(Val: BitSize, DL, VT: WideVT) }; |
5122 | SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, dl: DL, VTList, Ops, |
5123 | MemVT: NarrowVT, MMO); |
5124 | |
5125 | // Rotate the result of the final CS so that the field is in the lower |
5126 | // bits of a GR32, then truncate it. |
5127 | SDValue ResultShift = DAG.getNode(Opcode: ISD::ADD, DL, VT: WideVT, N1: BitShift, |
5128 | N2: DAG.getConstant(Val: BitSize, DL, VT: WideVT)); |
5129 | SDValue Result = DAG.getNode(Opcode: ISD::ROTL, DL, VT: WideVT, N1: AtomicOp, N2: ResultShift); |
5130 | |
5131 | SDValue RetOps[2] = { Result, AtomicOp.getValue(R: 1) }; |
5132 | return DAG.getMergeValues(Ops: RetOps, dl: DL); |
5133 | } |
5134 | |
5135 | // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into |
5136 | // ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions. |
5137 | SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, |
5138 | SelectionDAG &DAG) const { |
5139 | auto *Node = cast<AtomicSDNode>(Val: Op.getNode()); |
5140 | EVT MemVT = Node->getMemoryVT(); |
5141 | if (MemVT == MVT::i32 || MemVT == MVT::i64) { |
5142 | // A full-width operation: negate and use LAA(G). |
5143 | assert(Op.getValueType() == MemVT && "Mismatched VTs" ); |
5144 | assert(Subtarget.hasInterlockedAccess1() && |
5145 | "Should have been expanded by AtomicExpand pass." ); |
5146 | SDValue Src2 = Node->getVal(); |
5147 | SDLoc DL(Src2); |
5148 | SDValue NegSrc2 = |
5149 | DAG.getNode(Opcode: ISD::SUB, DL, VT: MemVT, N1: DAG.getConstant(Val: 0, DL, VT: MemVT), N2: Src2); |
5150 | return DAG.getAtomic(Opcode: ISD::ATOMIC_LOAD_ADD, dl: DL, MemVT, |
5151 | Chain: Node->getChain(), Ptr: Node->getBasePtr(), Val: NegSrc2, |
5152 | MMO: Node->getMemOperand()); |
5153 | } |
5154 | |
5155 | return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_SUB); |
5156 | } |
5157 | |
5158 | // Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node. |
5159 | SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, |
5160 | SelectionDAG &DAG) const { |
5161 | auto *Node = cast<AtomicSDNode>(Val: Op.getNode()); |
5162 | SDValue ChainIn = Node->getOperand(Num: 0); |
5163 | SDValue Addr = Node->getOperand(Num: 1); |
5164 | SDValue CmpVal = Node->getOperand(Num: 2); |
5165 | SDValue SwapVal = Node->getOperand(Num: 3); |
5166 | MachineMemOperand *MMO = Node->getMemOperand(); |
5167 | SDLoc DL(Node); |
5168 | |
5169 | if (Node->getMemoryVT() == MVT::i128) { |
5170 | // Use same code to handle both legal and non-legal i128 types. |
5171 | SmallVector<SDValue, 3> Results; |
5172 | LowerOperationWrapper(N: Node, Results, DAG); |
5173 | return DAG.getMergeValues(Ops: Results, dl: DL); |
5174 | } |
5175 | |
5176 | // We have native support for 32-bit and 64-bit compare and swap, but we |
5177 | // still need to expand extracting the "success" result from the CC. |
5178 | EVT NarrowVT = Node->getMemoryVT(); |
5179 | EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32; |
5180 | if (NarrowVT == WideVT) { |
5181 | SDVTList Tys = DAG.getVTList(VT1: WideVT, VT2: MVT::i32, VT3: MVT::Other); |
5182 | SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal }; |
5183 | SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode: SystemZISD::ATOMIC_CMP_SWAP, |
5184 | dl: DL, VTList: Tys, Ops, MemVT: NarrowVT, MMO); |
5185 | SDValue Success = emitSETCC(DAG, DL, CCReg: AtomicOp.getValue(R: 1), |
5186 | CCValid: SystemZ::CCMASK_CS, CCMask: SystemZ::CCMASK_CS_EQ); |
5187 | |
5188 | DAG.ReplaceAllUsesOfValueWith(From: Op.getValue(R: 0), To: AtomicOp.getValue(R: 0)); |
5189 | DAG.ReplaceAllUsesOfValueWith(From: Op.getValue(R: 1), To: Success); |
5190 | DAG.ReplaceAllUsesOfValueWith(From: Op.getValue(R: 2), To: AtomicOp.getValue(R: 2)); |
5191 | return SDValue(); |
5192 | } |
5193 | |
5194 | // Convert 8-bit and 16-bit compare and swap to a loop, implemented |
5195 | // via a fullword ATOMIC_CMP_SWAPW operation. |
5196 | int64_t BitSize = NarrowVT.getSizeInBits(); |
5197 | |
5198 | SDValue AlignedAddr, BitShift, NegBitShift; |
5199 | getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift); |
5200 | |
5201 | // Construct the ATOMIC_CMP_SWAPW node. |
5202 | SDVTList VTList = DAG.getVTList(VT1: WideVT, VT2: MVT::i32, VT3: MVT::Other); |
5203 | SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, |
5204 | NegBitShift, DAG.getConstant(Val: BitSize, DL, VT: WideVT) }; |
5205 | SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode: SystemZISD::ATOMIC_CMP_SWAPW, dl: DL, |
5206 | VTList, Ops, MemVT: NarrowVT, MMO); |
5207 | SDValue Success = emitSETCC(DAG, DL, CCReg: AtomicOp.getValue(R: 1), |
5208 | CCValid: SystemZ::CCMASK_ICMP, CCMask: SystemZ::CCMASK_CMP_EQ); |
5209 | |
5210 | // emitAtomicCmpSwapW() will zero extend the result (original value). |
5211 | SDValue OrigVal = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: WideVT, N1: AtomicOp.getValue(R: 0), |
5212 | N2: DAG.getValueType(NarrowVT)); |
5213 | DAG.ReplaceAllUsesOfValueWith(From: Op.getValue(R: 0), To: OrigVal); |
5214 | DAG.ReplaceAllUsesOfValueWith(From: Op.getValue(R: 1), To: Success); |
5215 | DAG.ReplaceAllUsesOfValueWith(From: Op.getValue(R: 2), To: AtomicOp.getValue(R: 2)); |
5216 | return SDValue(); |
5217 | } |
5218 | |
5219 | MachineMemOperand::Flags |
5220 | SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const { |
5221 | // Because of how we convert atomic_load and atomic_store to normal loads and |
5222 | // stores in the DAG, we need to ensure that the MMOs are marked volatile |
5223 | // since DAGCombine hasn't been updated to account for atomic, but non |
5224 | // volatile loads. (See D57601) |
5225 | if (auto *SI = dyn_cast<StoreInst>(Val: &I)) |
5226 | if (SI->isAtomic()) |
5227 | return MachineMemOperand::MOVolatile; |
5228 | if (auto *LI = dyn_cast<LoadInst>(Val: &I)) |
5229 | if (LI->isAtomic()) |
5230 | return MachineMemOperand::MOVolatile; |
5231 | if (auto *AI = dyn_cast<AtomicRMWInst>(Val: &I)) |
5232 | if (AI->isAtomic()) |
5233 | return MachineMemOperand::MOVolatile; |
5234 | if (auto *AI = dyn_cast<AtomicCmpXchgInst>(Val: &I)) |
5235 | if (AI->isAtomic()) |
5236 | return MachineMemOperand::MOVolatile; |
5237 | return MachineMemOperand::MONone; |
5238 | } |
5239 | |
5240 | SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op, |
5241 | SelectionDAG &DAG) const { |
5242 | MachineFunction &MF = DAG.getMachineFunction(); |
5243 | auto *Regs = Subtarget.getSpecialRegisters(); |
5244 | if (MF.getFunction().getCallingConv() == CallingConv::GHC) |
5245 | report_fatal_error(reason: "Variable-sized stack allocations are not supported " |
5246 | "in GHC calling convention" ); |
5247 | return DAG.getCopyFromReg(Chain: Op.getOperand(i: 0), dl: SDLoc(Op), |
5248 | Reg: Regs->getStackPointerRegister(), VT: Op.getValueType()); |
5249 | } |
5250 | |
5251 | SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, |
5252 | SelectionDAG &DAG) const { |
5253 | MachineFunction &MF = DAG.getMachineFunction(); |
5254 | auto *Regs = Subtarget.getSpecialRegisters(); |
5255 | bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain(); |
5256 | |
5257 | if (MF.getFunction().getCallingConv() == CallingConv::GHC) |
5258 | report_fatal_error(reason: "Variable-sized stack allocations are not supported " |
5259 | "in GHC calling convention" ); |
5260 | |
5261 | SDValue Chain = Op.getOperand(i: 0); |
5262 | SDValue NewSP = Op.getOperand(i: 1); |
5263 | SDValue Backchain; |
5264 | SDLoc DL(Op); |
5265 | |
5266 | if (StoreBackchain) { |
5267 | SDValue OldSP = DAG.getCopyFromReg( |
5268 | Chain, dl: DL, Reg: Regs->getStackPointerRegister(), VT: MVT::i64); |
5269 | Backchain = DAG.getLoad(VT: MVT::i64, dl: DL, Chain, Ptr: getBackchainAddress(SP: OldSP, DAG), |
5270 | PtrInfo: MachinePointerInfo()); |
5271 | } |
5272 | |
5273 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: Regs->getStackPointerRegister(), N: NewSP); |
5274 | |
5275 | if (StoreBackchain) |
5276 | Chain = DAG.getStore(Chain, dl: DL, Val: Backchain, Ptr: getBackchainAddress(SP: NewSP, DAG), |
5277 | PtrInfo: MachinePointerInfo()); |
5278 | |
5279 | return Chain; |
5280 | } |
5281 | |
5282 | SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, |
5283 | SelectionDAG &DAG) const { |
5284 | bool IsData = Op.getConstantOperandVal(i: 4); |
5285 | if (!IsData) |
5286 | // Just preserve the chain. |
5287 | return Op.getOperand(i: 0); |
5288 | |
5289 | SDLoc DL(Op); |
5290 | bool IsWrite = Op.getConstantOperandVal(i: 2); |
5291 | unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; |
5292 | auto *Node = cast<MemIntrinsicSDNode>(Val: Op.getNode()); |
5293 | SDValue Ops[] = {Op.getOperand(i: 0), DAG.getTargetConstant(Val: Code, DL, VT: MVT::i32), |
5294 | Op.getOperand(i: 1)}; |
5295 | return DAG.getMemIntrinsicNode(Opcode: SystemZISD::PREFETCH, dl: DL, |
5296 | VTList: Node->getVTList(), Ops, |
5297 | MemVT: Node->getMemoryVT(), MMO: Node->getMemOperand()); |
5298 | } |
5299 | |
5300 | // Convert condition code in CCReg to an i32 value. |
5301 | static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) { |
5302 | SDLoc DL(CCReg); |
5303 | SDValue IPM = DAG.getNode(Opcode: SystemZISD::IPM, DL, VT: MVT::i32, Operand: CCReg); |
5304 | return DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i32, N1: IPM, |
5305 | N2: DAG.getConstant(Val: SystemZ::IPM_CC, DL, VT: MVT::i32)); |
5306 | } |
5307 | |
5308 | SDValue |
5309 | SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, |
5310 | SelectionDAG &DAG) const { |
5311 | unsigned Opcode, CCValid; |
5312 | if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) { |
5313 | assert(Op->getNumValues() == 2 && "Expected only CC result and chain" ); |
5314 | SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode); |
5315 | SDValue CC = getCCResult(DAG, CCReg: SDValue(Node, 0)); |
5316 | DAG.ReplaceAllUsesOfValueWith(From: SDValue(Op.getNode(), 0), To: CC); |
5317 | return SDValue(); |
5318 | } |
5319 | |
5320 | return SDValue(); |
5321 | } |
5322 | |
5323 | SDValue |
5324 | SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, |
5325 | SelectionDAG &DAG) const { |
5326 | unsigned Opcode, CCValid; |
5327 | if (isIntrinsicWithCC(Op, Opcode, CCValid)) { |
5328 | SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode); |
5329 | if (Op->getNumValues() == 1) |
5330 | return getCCResult(DAG, CCReg: SDValue(Node, 0)); |
5331 | assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result" ); |
5332 | return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: SDLoc(Op), VTList: Op->getVTList(), |
5333 | N1: SDValue(Node, 0), N2: getCCResult(DAG, CCReg: SDValue(Node, 1))); |
5334 | } |
5335 | |
5336 | unsigned Id = Op.getConstantOperandVal(i: 0); |
5337 | switch (Id) { |
5338 | case Intrinsic::thread_pointer: |
5339 | return lowerThreadPointer(DL: SDLoc(Op), DAG); |
5340 | |
5341 | case Intrinsic::s390_vpdi: |
5342 | return DAG.getNode(Opcode: SystemZISD::PERMUTE_DWORDS, DL: SDLoc(Op), VT: Op.getValueType(), |
5343 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3)); |
5344 | |
5345 | case Intrinsic::s390_vperm: |
5346 | return DAG.getNode(Opcode: SystemZISD::PERMUTE, DL: SDLoc(Op), VT: Op.getValueType(), |
5347 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3)); |
5348 | |
5349 | case Intrinsic::s390_vuphb: |
5350 | case Intrinsic::s390_vuphh: |
5351 | case Intrinsic::s390_vuphf: |
5352 | case Intrinsic::s390_vuphg: |
5353 | return DAG.getNode(Opcode: SystemZISD::UNPACK_HIGH, DL: SDLoc(Op), VT: Op.getValueType(), |
5354 | Operand: Op.getOperand(i: 1)); |
5355 | |
5356 | case Intrinsic::s390_vuplhb: |
5357 | case Intrinsic::s390_vuplhh: |
5358 | case Intrinsic::s390_vuplhf: |
5359 | case Intrinsic::s390_vuplhg: |
5360 | return DAG.getNode(Opcode: SystemZISD::UNPACKL_HIGH, DL: SDLoc(Op), VT: Op.getValueType(), |
5361 | Operand: Op.getOperand(i: 1)); |
5362 | |
5363 | case Intrinsic::s390_vuplb: |
5364 | case Intrinsic::s390_vuplhw: |
5365 | case Intrinsic::s390_vuplf: |
5366 | case Intrinsic::s390_vuplg: |
5367 | return DAG.getNode(Opcode: SystemZISD::UNPACK_LOW, DL: SDLoc(Op), VT: Op.getValueType(), |
5368 | Operand: Op.getOperand(i: 1)); |
5369 | |
5370 | case Intrinsic::s390_vupllb: |
5371 | case Intrinsic::s390_vupllh: |
5372 | case Intrinsic::s390_vupllf: |
5373 | case Intrinsic::s390_vupllg: |
5374 | return DAG.getNode(Opcode: SystemZISD::UNPACKL_LOW, DL: SDLoc(Op), VT: Op.getValueType(), |
5375 | Operand: Op.getOperand(i: 1)); |
5376 | |
5377 | case Intrinsic::s390_vsumb: |
5378 | case Intrinsic::s390_vsumh: |
5379 | case Intrinsic::s390_vsumgh: |
5380 | case Intrinsic::s390_vsumgf: |
5381 | case Intrinsic::s390_vsumqf: |
5382 | case Intrinsic::s390_vsumqg: |
5383 | return DAG.getNode(Opcode: SystemZISD::VSUM, DL: SDLoc(Op), VT: Op.getValueType(), |
5384 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)); |
5385 | |
5386 | case Intrinsic::s390_vaq: |
5387 | return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(Op), VT: Op.getValueType(), |
5388 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)); |
5389 | case Intrinsic::s390_vaccb: |
5390 | case Intrinsic::s390_vacch: |
5391 | case Intrinsic::s390_vaccf: |
5392 | case Intrinsic::s390_vaccg: |
5393 | case Intrinsic::s390_vaccq: |
5394 | return DAG.getNode(Opcode: SystemZISD::VACC, DL: SDLoc(Op), VT: Op.getValueType(), |
5395 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)); |
5396 | case Intrinsic::s390_vacq: |
5397 | return DAG.getNode(Opcode: SystemZISD::VAC, DL: SDLoc(Op), VT: Op.getValueType(), |
5398 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3)); |
5399 | case Intrinsic::s390_vacccq: |
5400 | return DAG.getNode(Opcode: SystemZISD::VACCC, DL: SDLoc(Op), VT: Op.getValueType(), |
5401 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3)); |
5402 | |
5403 | case Intrinsic::s390_vsq: |
5404 | return DAG.getNode(Opcode: ISD::SUB, DL: SDLoc(Op), VT: Op.getValueType(), |
5405 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)); |
5406 | case Intrinsic::s390_vscbib: |
5407 | case Intrinsic::s390_vscbih: |
5408 | case Intrinsic::s390_vscbif: |
5409 | case Intrinsic::s390_vscbig: |
5410 | case Intrinsic::s390_vscbiq: |
5411 | return DAG.getNode(Opcode: SystemZISD::VSCBI, DL: SDLoc(Op), VT: Op.getValueType(), |
5412 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)); |
5413 | case Intrinsic::s390_vsbiq: |
5414 | return DAG.getNode(Opcode: SystemZISD::VSBI, DL: SDLoc(Op), VT: Op.getValueType(), |
5415 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3)); |
5416 | case Intrinsic::s390_vsbcbiq: |
5417 | return DAG.getNode(Opcode: SystemZISD::VSBCBI, DL: SDLoc(Op), VT: Op.getValueType(), |
5418 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3)); |
5419 | |
5420 | case Intrinsic::s390_vmhb: |
5421 | case Intrinsic::s390_vmhh: |
5422 | case Intrinsic::s390_vmhf: |
5423 | case Intrinsic::s390_vmhg: |
5424 | case Intrinsic::s390_vmhq: |
5425 | return DAG.getNode(Opcode: ISD::MULHS, DL: SDLoc(Op), VT: Op.getValueType(), |
5426 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)); |
5427 | case Intrinsic::s390_vmlhb: |
5428 | case Intrinsic::s390_vmlhh: |
5429 | case Intrinsic::s390_vmlhf: |
5430 | case Intrinsic::s390_vmlhg: |
5431 | case Intrinsic::s390_vmlhq: |
5432 | return DAG.getNode(Opcode: ISD::MULHU, DL: SDLoc(Op), VT: Op.getValueType(), |
5433 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)); |
5434 | |
5435 | case Intrinsic::s390_vmahb: |
5436 | case Intrinsic::s390_vmahh: |
5437 | case Intrinsic::s390_vmahf: |
5438 | case Intrinsic::s390_vmahg: |
5439 | case Intrinsic::s390_vmahq: |
5440 | return DAG.getNode(Opcode: SystemZISD::VMAH, DL: SDLoc(Op), VT: Op.getValueType(), |
5441 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3)); |
5442 | case Intrinsic::s390_vmalhb: |
5443 | case Intrinsic::s390_vmalhh: |
5444 | case Intrinsic::s390_vmalhf: |
5445 | case Intrinsic::s390_vmalhg: |
5446 | case Intrinsic::s390_vmalhq: |
5447 | return DAG.getNode(Opcode: SystemZISD::VMALH, DL: SDLoc(Op), VT: Op.getValueType(), |
5448 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3)); |
5449 | |
5450 | case Intrinsic::s390_vmeb: |
5451 | case Intrinsic::s390_vmeh: |
5452 | case Intrinsic::s390_vmef: |
5453 | case Intrinsic::s390_vmeg: |
5454 | return DAG.getNode(Opcode: SystemZISD::VME, DL: SDLoc(Op), VT: Op.getValueType(), |
5455 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)); |
5456 | case Intrinsic::s390_vmleb: |
5457 | case Intrinsic::s390_vmleh: |
5458 | case Intrinsic::s390_vmlef: |
5459 | case Intrinsic::s390_vmleg: |
5460 | return DAG.getNode(Opcode: SystemZISD::VMLE, DL: SDLoc(Op), VT: Op.getValueType(), |
5461 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)); |
5462 | case Intrinsic::s390_vmob: |
5463 | case Intrinsic::s390_vmoh: |
5464 | case Intrinsic::s390_vmof: |
5465 | case Intrinsic::s390_vmog: |
5466 | return DAG.getNode(Opcode: SystemZISD::VMO, DL: SDLoc(Op), VT: Op.getValueType(), |
5467 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)); |
5468 | case Intrinsic::s390_vmlob: |
5469 | case Intrinsic::s390_vmloh: |
5470 | case Intrinsic::s390_vmlof: |
5471 | case Intrinsic::s390_vmlog: |
5472 | return DAG.getNode(Opcode: SystemZISD::VMLO, DL: SDLoc(Op), VT: Op.getValueType(), |
5473 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)); |
5474 | |
5475 | case Intrinsic::s390_vmaeb: |
5476 | case Intrinsic::s390_vmaeh: |
5477 | case Intrinsic::s390_vmaef: |
5478 | case Intrinsic::s390_vmaeg: |
5479 | return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(Op), VT: Op.getValueType(), |
5480 | N1: DAG.getNode(Opcode: SystemZISD::VME, DL: SDLoc(Op), VT: Op.getValueType(), |
5481 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)), |
5482 | N2: Op.getOperand(i: 3)); |
5483 | case Intrinsic::s390_vmaleb: |
5484 | case Intrinsic::s390_vmaleh: |
5485 | case Intrinsic::s390_vmalef: |
5486 | case Intrinsic::s390_vmaleg: |
5487 | return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(Op), VT: Op.getValueType(), |
5488 | N1: DAG.getNode(Opcode: SystemZISD::VMLE, DL: SDLoc(Op), VT: Op.getValueType(), |
5489 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)), |
5490 | N2: Op.getOperand(i: 3)); |
5491 | case Intrinsic::s390_vmaob: |
5492 | case Intrinsic::s390_vmaoh: |
5493 | case Intrinsic::s390_vmaof: |
5494 | case Intrinsic::s390_vmaog: |
5495 | return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(Op), VT: Op.getValueType(), |
5496 | N1: DAG.getNode(Opcode: SystemZISD::VMO, DL: SDLoc(Op), VT: Op.getValueType(), |
5497 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)), |
5498 | N2: Op.getOperand(i: 3)); |
5499 | case Intrinsic::s390_vmalob: |
5500 | case Intrinsic::s390_vmaloh: |
5501 | case Intrinsic::s390_vmalof: |
5502 | case Intrinsic::s390_vmalog: |
5503 | return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(Op), VT: Op.getValueType(), |
5504 | N1: DAG.getNode(Opcode: SystemZISD::VMLO, DL: SDLoc(Op), VT: Op.getValueType(), |
5505 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)), |
5506 | N2: Op.getOperand(i: 3)); |
5507 | } |
5508 | |
5509 | return SDValue(); |
5510 | } |
5511 | |
5512 | namespace { |
5513 | // Says that SystemZISD operation Opcode can be used to perform the equivalent |
5514 | // of a VPERM with permute vector Bytes. If Opcode takes three operands, |
5515 | // Operand is the constant third operand, otherwise it is the number of |
5516 | // bytes in each element of the result. |
5517 | struct Permute { |
5518 | unsigned Opcode; |
5519 | unsigned Operand; |
5520 | unsigned char Bytes[SystemZ::VectorBytes]; |
5521 | }; |
5522 | } |
5523 | |
5524 | static const Permute PermuteForms[] = { |
5525 | // VMRHG |
5526 | { .Opcode: SystemZISD::MERGE_HIGH, .Operand: 8, |
5527 | .Bytes: { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } }, |
5528 | // VMRHF |
5529 | { .Opcode: SystemZISD::MERGE_HIGH, .Operand: 4, |
5530 | .Bytes: { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, |
5531 | // VMRHH |
5532 | { .Opcode: SystemZISD::MERGE_HIGH, .Operand: 2, |
5533 | .Bytes: { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, |
5534 | // VMRHB |
5535 | { .Opcode: SystemZISD::MERGE_HIGH, .Operand: 1, |
5536 | .Bytes: { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, |
5537 | // VMRLG |
5538 | { .Opcode: SystemZISD::MERGE_LOW, .Operand: 8, |
5539 | .Bytes: { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } }, |
5540 | // VMRLF |
5541 | { .Opcode: SystemZISD::MERGE_LOW, .Operand: 4, |
5542 | .Bytes: { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }, |
5543 | // VMRLH |
5544 | { .Opcode: SystemZISD::MERGE_LOW, .Operand: 2, |
5545 | .Bytes: { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, |
5546 | // VMRLB |
5547 | { .Opcode: SystemZISD::MERGE_LOW, .Operand: 1, |
5548 | .Bytes: { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, |
5549 | // VPKG |
5550 | { .Opcode: SystemZISD::PACK, .Operand: 4, |
5551 | .Bytes: { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } }, |
5552 | // VPKF |
5553 | { .Opcode: SystemZISD::PACK, .Operand: 2, |
5554 | .Bytes: { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, |
5555 | // VPKH |
5556 | { .Opcode: SystemZISD::PACK, .Operand: 1, |
5557 | .Bytes: { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } }, |
5558 | // VPDI V1, V2, 4 (low half of V1, high half of V2) |
5559 | { .Opcode: SystemZISD::PERMUTE_DWORDS, .Operand: 4, |
5560 | .Bytes: { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } }, |
5561 | // VPDI V1, V2, 1 (high half of V1, low half of V2) |
5562 | { .Opcode: SystemZISD::PERMUTE_DWORDS, .Operand: 1, |
5563 | .Bytes: { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } } |
5564 | }; |
5565 | |
5566 | // Called after matching a vector shuffle against a particular pattern. |
5567 | // Both the original shuffle and the pattern have two vector operands. |
5568 | // OpNos[0] is the operand of the original shuffle that should be used for |
5569 | // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything. |
5570 | // OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and |
5571 | // set OpNo0 and OpNo1 to the shuffle operands that should actually be used |
5572 | // for operands 0 and 1 of the pattern. |
5573 | static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) { |
5574 | if (OpNos[0] < 0) { |
5575 | if (OpNos[1] < 0) |
5576 | return false; |
5577 | OpNo0 = OpNo1 = OpNos[1]; |
5578 | } else if (OpNos[1] < 0) { |
5579 | OpNo0 = OpNo1 = OpNos[0]; |
5580 | } else { |
5581 | OpNo0 = OpNos[0]; |
5582 | OpNo1 = OpNos[1]; |
5583 | } |
5584 | return true; |
5585 | } |
5586 | |
5587 | // Bytes is a VPERM-like permute vector, except that -1 is used for |
5588 | // undefined bytes. Return true if the VPERM can be implemented using P. |
5589 | // When returning true set OpNo0 to the VPERM operand that should be |
5590 | // used for operand 0 of P and likewise OpNo1 for operand 1 of P. |
5591 | // |
5592 | // For example, if swapping the VPERM operands allows P to match, OpNo0 |
5593 | // will be 1 and OpNo1 will be 0. If instead Bytes only refers to one |
5594 | // operand, but rewriting it to use two duplicated operands allows it to |
5595 | // match P, then OpNo0 and OpNo1 will be the same. |
5596 | static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P, |
5597 | unsigned &OpNo0, unsigned &OpNo1) { |
5598 | int OpNos[] = { -1, -1 }; |
5599 | for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { |
5600 | int Elt = Bytes[I]; |
5601 | if (Elt >= 0) { |
5602 | // Make sure that the two permute vectors use the same suboperand |
5603 | // byte number. Only the operand numbers (the high bits) are |
5604 | // allowed to differ. |
5605 | if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1)) |
5606 | return false; |
5607 | int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes; |
5608 | int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes; |
5609 | // Make sure that the operand mappings are consistent with previous |
5610 | // elements. |
5611 | if (OpNos[ModelOpNo] == 1 - RealOpNo) |
5612 | return false; |
5613 | OpNos[ModelOpNo] = RealOpNo; |
5614 | } |
5615 | } |
5616 | return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); |
5617 | } |
5618 | |
5619 | // As above, but search for a matching permute. |
5620 | static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes, |
5621 | unsigned &OpNo0, unsigned &OpNo1) { |
5622 | for (auto &P : PermuteForms) |
5623 | if (matchPermute(Bytes, P, OpNo0, OpNo1)) |
5624 | return &P; |
5625 | return nullptr; |
5626 | } |
5627 | |
5628 | // Bytes is a VPERM-like permute vector, except that -1 is used for |
5629 | // undefined bytes. This permute is an operand of an outer permute. |
5630 | // See whether redistributing the -1 bytes gives a shuffle that can be |
5631 | // implemented using P. If so, set Transform to a VPERM-like permute vector |
5632 | // that, when applied to the result of P, gives the original permute in Bytes. |
5633 | static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes, |
5634 | const Permute &P, |
5635 | SmallVectorImpl<int> &Transform) { |
5636 | unsigned To = 0; |
5637 | for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) { |
5638 | int Elt = Bytes[From]; |
5639 | if (Elt < 0) |
5640 | // Byte number From of the result is undefined. |
5641 | Transform[From] = -1; |
5642 | else { |
5643 | while (P.Bytes[To] != Elt) { |
5644 | To += 1; |
5645 | if (To == SystemZ::VectorBytes) |
5646 | return false; |
5647 | } |
5648 | Transform[From] = To; |
5649 | } |
5650 | } |
5651 | return true; |
5652 | } |
5653 | |
5654 | // As above, but search for a matching permute. |
5655 | static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes, |
5656 | SmallVectorImpl<int> &Transform) { |
5657 | for (auto &P : PermuteForms) |
5658 | if (matchDoublePermute(Bytes, P, Transform)) |
5659 | return &P; |
5660 | return nullptr; |
5661 | } |
5662 | |
5663 | // Convert the mask of the given shuffle op into a byte-level mask, |
5664 | // as if it had type vNi8. |
5665 | static bool getVPermMask(SDValue ShuffleOp, |
5666 | SmallVectorImpl<int> &Bytes) { |
5667 | EVT VT = ShuffleOp.getValueType(); |
5668 | unsigned NumElements = VT.getVectorNumElements(); |
5669 | unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); |
5670 | |
5671 | if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Val&: ShuffleOp)) { |
5672 | Bytes.resize(N: NumElements * BytesPerElement, NV: -1); |
5673 | for (unsigned I = 0; I < NumElements; ++I) { |
5674 | int Index = VSN->getMaskElt(Idx: I); |
5675 | if (Index >= 0) |
5676 | for (unsigned J = 0; J < BytesPerElement; ++J) |
5677 | Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; |
5678 | } |
5679 | return true; |
5680 | } |
5681 | if (SystemZISD::SPLAT == ShuffleOp.getOpcode() && |
5682 | isa<ConstantSDNode>(Val: ShuffleOp.getOperand(i: 1))) { |
5683 | unsigned Index = ShuffleOp.getConstantOperandVal(i: 1); |
5684 | Bytes.resize(N: NumElements * BytesPerElement, NV: -1); |
5685 | for (unsigned I = 0; I < NumElements; ++I) |
5686 | for (unsigned J = 0; J < BytesPerElement; ++J) |
5687 | Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; |
5688 | return true; |
5689 | } |
5690 | return false; |
5691 | } |
5692 | |
5693 | // Bytes is a VPERM-like permute vector, except that -1 is used for |
5694 | // undefined bytes. See whether bytes [Start, Start + BytesPerElement) of |
5695 | // the result come from a contiguous sequence of bytes from one input. |
5696 | // Set Base to the selector for the first byte if so. |
5697 | static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start, |
5698 | unsigned BytesPerElement, int &Base) { |
5699 | Base = -1; |
5700 | for (unsigned I = 0; I < BytesPerElement; ++I) { |
5701 | if (Bytes[Start + I] >= 0) { |
5702 | unsigned Elem = Bytes[Start + I]; |
5703 | if (Base < 0) { |
5704 | Base = Elem - I; |
5705 | // Make sure the bytes would come from one input operand. |
5706 | if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size()) |
5707 | return false; |
5708 | } else if (unsigned(Base) != Elem - I) |
5709 | return false; |
5710 | } |
5711 | } |
5712 | return true; |
5713 | } |
5714 | |
5715 | // Bytes is a VPERM-like permute vector, except that -1 is used for |
5716 | // undefined bytes. Return true if it can be performed using VSLDB. |
5717 | // When returning true, set StartIndex to the shift amount and OpNo0 |
5718 | // and OpNo1 to the VPERM operands that should be used as the first |
5719 | // and second shift operand respectively. |
5720 | static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes, |
5721 | unsigned &StartIndex, unsigned &OpNo0, |
5722 | unsigned &OpNo1) { |
5723 | int OpNos[] = { -1, -1 }; |
5724 | int Shift = -1; |
5725 | for (unsigned I = 0; I < 16; ++I) { |
5726 | int Index = Bytes[I]; |
5727 | if (Index >= 0) { |
5728 | int ExpectedShift = (Index - I) % SystemZ::VectorBytes; |
5729 | int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes; |
5730 | int RealOpNo = unsigned(Index) / SystemZ::VectorBytes; |
5731 | if (Shift < 0) |
5732 | Shift = ExpectedShift; |
5733 | else if (Shift != ExpectedShift) |
5734 | return false; |
5735 | // Make sure that the operand mappings are consistent with previous |
5736 | // elements. |
5737 | if (OpNos[ModelOpNo] == 1 - RealOpNo) |
5738 | return false; |
5739 | OpNos[ModelOpNo] = RealOpNo; |
5740 | } |
5741 | } |
5742 | StartIndex = Shift; |
5743 | return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); |
5744 | } |
5745 | |
5746 | // Create a node that performs P on operands Op0 and Op1, casting the |
5747 | // operands to the appropriate type. The type of the result is determined by P. |
5748 | static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, |
5749 | const Permute &P, SDValue Op0, SDValue Op1) { |
5750 | // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input |
5751 | // elements of a PACK are twice as wide as the outputs. |
5752 | unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 : |
5753 | P.Opcode == SystemZISD::PACK ? P.Operand * 2 : |
5754 | P.Operand); |
5755 | // Cast both operands to the appropriate type. |
5756 | MVT InVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: InBytes * 8), |
5757 | NumElements: SystemZ::VectorBytes / InBytes); |
5758 | Op0 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: InVT, Operand: Op0); |
5759 | Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: InVT, Operand: Op1); |
5760 | SDValue Op; |
5761 | if (P.Opcode == SystemZISD::PERMUTE_DWORDS) { |
5762 | SDValue Op2 = DAG.getTargetConstant(Val: P.Operand, DL, VT: MVT::i32); |
5763 | Op = DAG.getNode(Opcode: SystemZISD::PERMUTE_DWORDS, DL, VT: InVT, N1: Op0, N2: Op1, N3: Op2); |
5764 | } else if (P.Opcode == SystemZISD::PACK) { |
5765 | MVT OutVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: P.Operand * 8), |
5766 | NumElements: SystemZ::VectorBytes / P.Operand); |
5767 | Op = DAG.getNode(Opcode: SystemZISD::PACK, DL, VT: OutVT, N1: Op0, N2: Op1); |
5768 | } else { |
5769 | Op = DAG.getNode(Opcode: P.Opcode, DL, VT: InVT, N1: Op0, N2: Op1); |
5770 | } |
5771 | return Op; |
5772 | } |
5773 | |
5774 | static bool isZeroVector(SDValue N) { |
5775 | if (N->getOpcode() == ISD::BITCAST) |
5776 | N = N->getOperand(Num: 0); |
5777 | if (N->getOpcode() == ISD::SPLAT_VECTOR) |
5778 | if (auto *Op = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 0))) |
5779 | return Op->getZExtValue() == 0; |
5780 | return ISD::isBuildVectorAllZeros(N: N.getNode()); |
5781 | } |
5782 | |
5783 | // Return the index of the zero/undef vector, or UINT32_MAX if not found. |
5784 | static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) { |
5785 | for (unsigned I = 0; I < Num ; I++) |
5786 | if (isZeroVector(N: Ops[I])) |
5787 | return I; |
5788 | return UINT32_MAX; |
5789 | } |
5790 | |
5791 | // Bytes is a VPERM-like permute vector, except that -1 is used for |
5792 | // undefined bytes. Implement it on operands Ops[0] and Ops[1] using |
5793 | // VSLDB or VPERM. |
5794 | static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, |
5795 | SDValue *Ops, |
5796 | const SmallVectorImpl<int> &Bytes) { |
5797 | for (unsigned I = 0; I < 2; ++I) |
5798 | Ops[I] = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v16i8, Operand: Ops[I]); |
5799 | |
5800 | // First see whether VSLDB can be used. |
5801 | unsigned StartIndex, OpNo0, OpNo1; |
5802 | if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1)) |
5803 | return DAG.getNode(Opcode: SystemZISD::SHL_DOUBLE, DL, VT: MVT::v16i8, N1: Ops[OpNo0], |
5804 | N2: Ops[OpNo1], |
5805 | N3: DAG.getTargetConstant(Val: StartIndex, DL, VT: MVT::i32)); |
5806 | |
5807 | // Fall back on VPERM. Construct an SDNode for the permute vector. Try to |
5808 | // eliminate a zero vector by reusing any zero index in the permute vector. |
5809 | unsigned ZeroVecIdx = findZeroVectorIdx(Ops: &Ops[0], Num: 2); |
5810 | if (ZeroVecIdx != UINT32_MAX) { |
5811 | bool MaskFirst = true; |
5812 | int ZeroIdx = -1; |
5813 | for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { |
5814 | unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; |
5815 | unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes; |
5816 | if (OpNo == ZeroVecIdx && I == 0) { |
5817 | // If the first byte is zero, use mask as first operand. |
5818 | ZeroIdx = 0; |
5819 | break; |
5820 | } |
5821 | if (OpNo != ZeroVecIdx && Byte == 0) { |
5822 | // If mask contains a zero, use it by placing that vector first. |
5823 | ZeroIdx = I + SystemZ::VectorBytes; |
5824 | MaskFirst = false; |
5825 | break; |
5826 | } |
5827 | } |
5828 | if (ZeroIdx != -1) { |
5829 | SDValue IndexNodes[SystemZ::VectorBytes]; |
5830 | for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { |
5831 | if (Bytes[I] >= 0) { |
5832 | unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; |
5833 | unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes; |
5834 | if (OpNo == ZeroVecIdx) |
5835 | IndexNodes[I] = DAG.getConstant(Val: ZeroIdx, DL, VT: MVT::i32); |
5836 | else { |
5837 | unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte; |
5838 | IndexNodes[I] = DAG.getConstant(Val: BIdx, DL, VT: MVT::i32); |
5839 | } |
5840 | } else |
5841 | IndexNodes[I] = DAG.getUNDEF(VT: MVT::i32); |
5842 | } |
5843 | SDValue Mask = DAG.getBuildVector(VT: MVT::v16i8, DL, Ops: IndexNodes); |
5844 | SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0]; |
5845 | if (MaskFirst) |
5846 | return DAG.getNode(Opcode: SystemZISD::PERMUTE, DL, VT: MVT::v16i8, N1: Mask, N2: Src, |
5847 | N3: Mask); |
5848 | else |
5849 | return DAG.getNode(Opcode: SystemZISD::PERMUTE, DL, VT: MVT::v16i8, N1: Src, N2: Mask, |
5850 | N3: Mask); |
5851 | } |
5852 | } |
5853 | |
5854 | SDValue IndexNodes[SystemZ::VectorBytes]; |
5855 | for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) |
5856 | if (Bytes[I] >= 0) |
5857 | IndexNodes[I] = DAG.getConstant(Val: Bytes[I], DL, VT: MVT::i32); |
5858 | else |
5859 | IndexNodes[I] = DAG.getUNDEF(VT: MVT::i32); |
5860 | SDValue Op2 = DAG.getBuildVector(VT: MVT::v16i8, DL, Ops: IndexNodes); |
5861 | return DAG.getNode(Opcode: SystemZISD::PERMUTE, DL, VT: MVT::v16i8, N1: Ops[0], |
5862 | N2: (!Ops[1].isUndef() ? Ops[1] : Ops[0]), N3: Op2); |
5863 | } |
5864 | |
5865 | namespace { |
5866 | // Describes a general N-operand vector shuffle. |
5867 | struct GeneralShuffle { |
5868 | GeneralShuffle(EVT vt) |
5869 | : VT(vt), UnpackFromEltSize(UINT_MAX), UnpackLow(false) {} |
5870 | void addUndef(); |
5871 | bool add(SDValue, unsigned); |
5872 | SDValue getNode(SelectionDAG &, const SDLoc &); |
5873 | void tryPrepareForUnpack(); |
5874 | bool unpackWasPrepared() { return UnpackFromEltSize <= 4; } |
5875 | SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op); |
5876 | |
5877 | // The operands of the shuffle. |
5878 | SmallVector<SDValue, SystemZ::VectorBytes> Ops; |
5879 | |
5880 | // Index I is -1 if byte I of the result is undefined. Otherwise the |
5881 | // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand |
5882 | // Bytes[I] / SystemZ::VectorBytes. |
5883 | SmallVector<int, SystemZ::VectorBytes> Bytes; |
5884 | |
5885 | // The type of the shuffle result. |
5886 | EVT VT; |
5887 | |
5888 | // Holds a value of 1, 2 or 4 if a final unpack has been prepared for. |
5889 | unsigned UnpackFromEltSize; |
5890 | // True if the final unpack uses the low half. |
5891 | bool UnpackLow; |
5892 | }; |
5893 | } // namespace |
5894 | |
5895 | // Add an extra undefined element to the shuffle. |
5896 | void GeneralShuffle::addUndef() { |
5897 | unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); |
5898 | for (unsigned I = 0; I < BytesPerElement; ++I) |
5899 | Bytes.push_back(Elt: -1); |
5900 | } |
5901 | |
5902 | // Add an extra element to the shuffle, taking it from element Elem of Op. |
5903 | // A null Op indicates a vector input whose value will be calculated later; |
5904 | // there is at most one such input per shuffle and it always has the same |
5905 | // type as the result. Aborts and returns false if the source vector elements |
5906 | // of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per |
5907 | // LLVM they become implicitly extended, but this is rare and not optimized. |
5908 | bool GeneralShuffle::add(SDValue Op, unsigned Elem) { |
5909 | unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); |
5910 | |
5911 | // The source vector can have wider elements than the result, |
5912 | // either through an explicit TRUNCATE or because of type legalization. |
5913 | // We want the least significant part. |
5914 | EVT FromVT = Op.getNode() ? Op.getValueType() : VT; |
5915 | unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize(); |
5916 | |
5917 | // Return false if the source elements are smaller than their destination |
5918 | // elements. |
5919 | if (FromBytesPerElement < BytesPerElement) |
5920 | return false; |
5921 | |
5922 | unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes + |
5923 | (FromBytesPerElement - BytesPerElement)); |
5924 | |
5925 | // Look through things like shuffles and bitcasts. |
5926 | while (Op.getNode()) { |
5927 | if (Op.getOpcode() == ISD::BITCAST) |
5928 | Op = Op.getOperand(i: 0); |
5929 | else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) { |
5930 | // See whether the bytes we need come from a contiguous part of one |
5931 | // operand. |
5932 | SmallVector<int, SystemZ::VectorBytes> OpBytes; |
5933 | if (!getVPermMask(ShuffleOp: Op, Bytes&: OpBytes)) |
5934 | break; |
5935 | int NewByte; |
5936 | if (!getShuffleInput(Bytes: OpBytes, Start: Byte, BytesPerElement, Base&: NewByte)) |
5937 | break; |
5938 | if (NewByte < 0) { |
5939 | addUndef(); |
5940 | return true; |
5941 | } |
5942 | Op = Op.getOperand(i: unsigned(NewByte) / SystemZ::VectorBytes); |
5943 | Byte = unsigned(NewByte) % SystemZ::VectorBytes; |
5944 | } else if (Op.isUndef()) { |
5945 | addUndef(); |
5946 | return true; |
5947 | } else |
5948 | break; |
5949 | } |
5950 | |
5951 | // Make sure that the source of the extraction is in Ops. |
5952 | unsigned OpNo = 0; |
5953 | for (; OpNo < Ops.size(); ++OpNo) |
5954 | if (Ops[OpNo] == Op) |
5955 | break; |
5956 | if (OpNo == Ops.size()) |
5957 | Ops.push_back(Elt: Op); |
5958 | |
5959 | // Add the element to Bytes. |
5960 | unsigned Base = OpNo * SystemZ::VectorBytes + Byte; |
5961 | for (unsigned I = 0; I < BytesPerElement; ++I) |
5962 | Bytes.push_back(Elt: Base + I); |
5963 | |
5964 | return true; |
5965 | } |
5966 | |
5967 | // Return SDNodes for the completed shuffle. |
5968 | SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) { |
5969 | assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector" ); |
5970 | |
5971 | if (Ops.size() == 0) |
5972 | return DAG.getUNDEF(VT); |
5973 | |
5974 | // Use a single unpack if possible as the last operation. |
5975 | tryPrepareForUnpack(); |
5976 | |
5977 | // Make sure that there are at least two shuffle operands. |
5978 | if (Ops.size() == 1) |
5979 | Ops.push_back(Elt: DAG.getUNDEF(VT: MVT::v16i8)); |
5980 | |
5981 | // Create a tree of shuffles, deferring root node until after the loop. |
5982 | // Try to redistribute the undefined elements of non-root nodes so that |
5983 | // the non-root shuffles match something like a pack or merge, then adjust |
5984 | // the parent node's permute vector to compensate for the new order. |
5985 | // Among other things, this copes with vectors like <2 x i16> that were |
5986 | // padded with undefined elements during type legalization. |
5987 | // |
5988 | // In the best case this redistribution will lead to the whole tree |
5989 | // using packs and merges. It should rarely be a loss in other cases. |
5990 | unsigned Stride = 1; |
5991 | for (; Stride * 2 < Ops.size(); Stride *= 2) { |
5992 | for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) { |
5993 | SDValue SubOps[] = { Ops[I], Ops[I + Stride] }; |
5994 | |
5995 | // Create a mask for just these two operands. |
5996 | SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes); |
5997 | for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { |
5998 | unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes; |
5999 | unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes; |
6000 | if (OpNo == I) |
6001 | NewBytes[J] = Byte; |
6002 | else if (OpNo == I + Stride) |
6003 | NewBytes[J] = SystemZ::VectorBytes + Byte; |
6004 | else |
6005 | NewBytes[J] = -1; |
6006 | } |
6007 | // See if it would be better to reorganize NewMask to avoid using VPERM. |
6008 | SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes); |
6009 | if (const Permute *P = matchDoublePermute(Bytes: NewBytes, Transform&: NewBytesMap)) { |
6010 | Ops[I] = getPermuteNode(DAG, DL, P: *P, Op0: SubOps[0], Op1: SubOps[1]); |
6011 | // Applying NewBytesMap to Ops[I] gets back to NewBytes. |
6012 | for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { |
6013 | if (NewBytes[J] >= 0) { |
6014 | assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && |
6015 | "Invalid double permute" ); |
6016 | Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J]; |
6017 | } else |
6018 | assert(NewBytesMap[J] < 0 && "Invalid double permute" ); |
6019 | } |
6020 | } else { |
6021 | // Just use NewBytes on the operands. |
6022 | Ops[I] = getGeneralPermuteNode(DAG, DL, Ops: SubOps, Bytes: NewBytes); |
6023 | for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) |
6024 | if (NewBytes[J] >= 0) |
6025 | Bytes[J] = I * SystemZ::VectorBytes + J; |
6026 | } |
6027 | } |
6028 | } |
6029 | |
6030 | // Now we just have 2 inputs. Put the second operand in Ops[1]. |
6031 | if (Stride > 1) { |
6032 | Ops[1] = Ops[Stride]; |
6033 | for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) |
6034 | if (Bytes[I] >= int(SystemZ::VectorBytes)) |
6035 | Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes; |
6036 | } |
6037 | |
6038 | // Look for an instruction that can do the permute without resorting |
6039 | // to VPERM. |
6040 | unsigned OpNo0, OpNo1; |
6041 | SDValue Op; |
6042 | if (unpackWasPrepared() && Ops[1].isUndef()) |
6043 | Op = Ops[0]; |
6044 | else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1)) |
6045 | Op = getPermuteNode(DAG, DL, P: *P, Op0: Ops[OpNo0], Op1: Ops[OpNo1]); |
6046 | else |
6047 | Op = getGeneralPermuteNode(DAG, DL, Ops: &Ops[0], Bytes); |
6048 | |
6049 | Op = insertUnpackIfPrepared(DAG, DL, Op); |
6050 | |
6051 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op); |
6052 | } |
6053 | |
6054 | #ifndef NDEBUG |
6055 | static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) { |
6056 | dbgs() << Msg.c_str() << " { " ; |
6057 | for (unsigned i = 0; i < Bytes.size(); i++) |
6058 | dbgs() << Bytes[i] << " " ; |
6059 | dbgs() << "}\n" ; |
6060 | } |
6061 | #endif |
6062 | |
6063 | // If the Bytes vector matches an unpack operation, prepare to do the unpack |
6064 | // after all else by removing the zero vector and the effect of the unpack on |
6065 | // Bytes. |
6066 | void GeneralShuffle::tryPrepareForUnpack() { |
6067 | uint32_t ZeroVecOpNo = findZeroVectorIdx(Ops: &Ops[0], Num: Ops.size()); |
6068 | if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1) |
6069 | return; |
6070 | |
6071 | // Only do this if removing the zero vector reduces the depth, otherwise |
6072 | // the critical path will increase with the final unpack. |
6073 | if (Ops.size() > 2 && |
6074 | Log2_32_Ceil(Value: Ops.size()) == Log2_32_Ceil(Value: Ops.size() - 1)) |
6075 | return; |
6076 | |
6077 | // Find an unpack that would allow removing the zero vector from Ops. |
6078 | UnpackFromEltSize = 1; |
6079 | for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) { |
6080 | bool MatchUnpack = true; |
6081 | SmallVector<int, SystemZ::VectorBytes> SrcBytes; |
6082 | for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) { |
6083 | unsigned ToEltSize = UnpackFromEltSize * 2; |
6084 | bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize; |
6085 | if (!IsZextByte) |
6086 | SrcBytes.push_back(Elt: Bytes[Elt]); |
6087 | if (Bytes[Elt] != -1) { |
6088 | unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes; |
6089 | if (IsZextByte != (OpNo == ZeroVecOpNo)) { |
6090 | MatchUnpack = false; |
6091 | break; |
6092 | } |
6093 | } |
6094 | } |
6095 | if (MatchUnpack) { |
6096 | if (Ops.size() == 2) { |
6097 | // Don't use unpack if a single source operand needs rearrangement. |
6098 | bool CanUseUnpackLow = true, CanUseUnpackHigh = true; |
6099 | for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) { |
6100 | if (SrcBytes[i] == -1) |
6101 | continue; |
6102 | if (SrcBytes[i] % 16 != int(i)) |
6103 | CanUseUnpackHigh = false; |
6104 | if (SrcBytes[i] % 16 != int(i + SystemZ::VectorBytes / 2)) |
6105 | CanUseUnpackLow = false; |
6106 | if (!CanUseUnpackLow && !CanUseUnpackHigh) { |
6107 | UnpackFromEltSize = UINT_MAX; |
6108 | return; |
6109 | } |
6110 | } |
6111 | if (!CanUseUnpackHigh) |
6112 | UnpackLow = true; |
6113 | } |
6114 | break; |
6115 | } |
6116 | } |
6117 | if (UnpackFromEltSize > 4) |
6118 | return; |
6119 | |
6120 | LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size " |
6121 | << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo |
6122 | << ".\n" ; |
6123 | dumpBytes(Bytes, "Original Bytes vector:" );); |
6124 | |
6125 | // Apply the unpack in reverse to the Bytes array. |
6126 | unsigned B = 0; |
6127 | if (UnpackLow) { |
6128 | while (B < SystemZ::VectorBytes / 2) |
6129 | Bytes[B++] = -1; |
6130 | } |
6131 | for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) { |
6132 | Elt += UnpackFromEltSize; |
6133 | for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++) |
6134 | Bytes[B] = Bytes[Elt]; |
6135 | } |
6136 | if (!UnpackLow) { |
6137 | while (B < SystemZ::VectorBytes) |
6138 | Bytes[B++] = -1; |
6139 | } |
6140 | |
6141 | // Remove the zero vector from Ops |
6142 | Ops.erase(CI: &Ops[ZeroVecOpNo]); |
6143 | for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) |
6144 | if (Bytes[I] >= 0) { |
6145 | unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; |
6146 | if (OpNo > ZeroVecOpNo) |
6147 | Bytes[I] -= SystemZ::VectorBytes; |
6148 | } |
6149 | |
6150 | LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:" ); |
6151 | dbgs() << "\n" ;); |
6152 | } |
6153 | |
6154 | SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG, |
6155 | const SDLoc &DL, |
6156 | SDValue Op) { |
6157 | if (!unpackWasPrepared()) |
6158 | return Op; |
6159 | unsigned InBits = UnpackFromEltSize * 8; |
6160 | EVT InVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: InBits), |
6161 | NumElements: SystemZ::VectorBits / InBits); |
6162 | SDValue PackedOp = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: InVT, Operand: Op); |
6163 | unsigned OutBits = InBits * 2; |
6164 | EVT OutVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: OutBits), |
6165 | NumElements: SystemZ::VectorBits / OutBits); |
6166 | return DAG.getNode(Opcode: UnpackLow ? SystemZISD::UNPACKL_LOW |
6167 | : SystemZISD::UNPACKL_HIGH, |
6168 | DL, VT: OutVT, Operand: PackedOp); |
6169 | } |
6170 | |
6171 | // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion. |
6172 | static bool isScalarToVector(SDValue Op) { |
6173 | for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I) |
6174 | if (!Op.getOperand(i: I).isUndef()) |
6175 | return false; |
6176 | return true; |
6177 | } |
6178 | |
6179 | // Return a vector of type VT that contains Value in the first element. |
6180 | // The other elements don't matter. |
6181 | static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, |
6182 | SDValue Value) { |
6183 | // If we have a constant, replicate it to all elements and let the |
6184 | // BUILD_VECTOR lowering take care of it. |
6185 | if (Value.getOpcode() == ISD::Constant || |
6186 | Value.getOpcode() == ISD::ConstantFP) { |
6187 | SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value); |
6188 | return DAG.getBuildVector(VT, DL, Ops); |
6189 | } |
6190 | if (Value.isUndef()) |
6191 | return DAG.getUNDEF(VT); |
6192 | return DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT, Operand: Value); |
6193 | } |
6194 | |
6195 | // Return a vector of type VT in which Op0 is in element 0 and Op1 is in |
6196 | // element 1. Used for cases in which replication is cheap. |
6197 | static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, |
6198 | SDValue Op0, SDValue Op1) { |
6199 | if (Op0.isUndef()) { |
6200 | if (Op1.isUndef()) |
6201 | return DAG.getUNDEF(VT); |
6202 | return DAG.getNode(Opcode: SystemZISD::REPLICATE, DL, VT, Operand: Op1); |
6203 | } |
6204 | if (Op1.isUndef()) |
6205 | return DAG.getNode(Opcode: SystemZISD::REPLICATE, DL, VT, Operand: Op0); |
6206 | return DAG.getNode(Opcode: SystemZISD::MERGE_HIGH, DL, VT, |
6207 | N1: buildScalarToVector(DAG, DL, VT, Value: Op0), |
6208 | N2: buildScalarToVector(DAG, DL, VT, Value: Op1)); |
6209 | } |
6210 | |
6211 | // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64 |
6212 | // vector for them. |
6213 | static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, |
6214 | SDValue Op1) { |
6215 | if (Op0.isUndef() && Op1.isUndef()) |
6216 | return DAG.getUNDEF(VT: MVT::v2i64); |
6217 | // If one of the two inputs is undefined then replicate the other one, |
6218 | // in order to avoid using another register unnecessarily. |
6219 | if (Op0.isUndef()) |
6220 | Op0 = Op1 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op1); |
6221 | else if (Op1.isUndef()) |
6222 | Op0 = Op1 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op0); |
6223 | else { |
6224 | Op0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op0); |
6225 | Op1 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op1); |
6226 | } |
6227 | return DAG.getNode(Opcode: SystemZISD::JOIN_DWORDS, DL, VT: MVT::v2i64, N1: Op0, N2: Op1); |
6228 | } |
6229 | |
6230 | // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually |
6231 | // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for |
6232 | // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR |
6233 | // would benefit from this representation and return it if so. |
6234 | static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, |
6235 | BuildVectorSDNode *BVN) { |
6236 | EVT VT = BVN->getValueType(ResNo: 0); |
6237 | unsigned NumElements = VT.getVectorNumElements(); |
6238 | |
6239 | // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation |
6240 | // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still |
6241 | // need a BUILD_VECTOR, add an additional placeholder operand for that |
6242 | // BUILD_VECTOR and store its operands in ResidueOps. |
6243 | GeneralShuffle GS(VT); |
6244 | SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps; |
6245 | bool FoundOne = false; |
6246 | for (unsigned I = 0; I < NumElements; ++I) { |
6247 | SDValue Op = BVN->getOperand(Num: I); |
6248 | if (Op.getOpcode() == ISD::TRUNCATE) |
6249 | Op = Op.getOperand(i: 0); |
6250 | if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
6251 | Op.getOperand(i: 1).getOpcode() == ISD::Constant) { |
6252 | unsigned Elem = Op.getConstantOperandVal(i: 1); |
6253 | if (!GS.add(Op: Op.getOperand(i: 0), Elem)) |
6254 | return SDValue(); |
6255 | FoundOne = true; |
6256 | } else if (Op.isUndef()) { |
6257 | GS.addUndef(); |
6258 | } else { |
6259 | if (!GS.add(Op: SDValue(), Elem: ResidueOps.size())) |
6260 | return SDValue(); |
6261 | ResidueOps.push_back(Elt: BVN->getOperand(Num: I)); |
6262 | } |
6263 | } |
6264 | |
6265 | // Nothing to do if there are no EXTRACT_VECTOR_ELTs. |
6266 | if (!FoundOne) |
6267 | return SDValue(); |
6268 | |
6269 | // Create the BUILD_VECTOR for the remaining elements, if any. |
6270 | if (!ResidueOps.empty()) { |
6271 | while (ResidueOps.size() < NumElements) |
6272 | ResidueOps.push_back(Elt: DAG.getUNDEF(VT: ResidueOps[0].getValueType())); |
6273 | for (auto &Op : GS.Ops) { |
6274 | if (!Op.getNode()) { |
6275 | Op = DAG.getBuildVector(VT, DL: SDLoc(BVN), Ops: ResidueOps); |
6276 | break; |
6277 | } |
6278 | } |
6279 | } |
6280 | return GS.getNode(DAG, DL: SDLoc(BVN)); |
6281 | } |
6282 | |
6283 | bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const { |
6284 | if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Val&: Op)->isUnindexed()) |
6285 | return true; |
6286 | if (auto *AL = dyn_cast<AtomicSDNode>(Val&: Op)) |
6287 | if (AL->getOpcode() == ISD::ATOMIC_LOAD) |
6288 | return true; |
6289 | if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV) |
6290 | return true; |
6291 | return false; |
6292 | } |
6293 | |
6294 | // Combine GPR scalar values Elems into a vector of type VT. |
6295 | SDValue |
6296 | SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, |
6297 | SmallVectorImpl<SDValue> &Elems) const { |
6298 | // See whether there is a single replicated value. |
6299 | SDValue Single; |
6300 | unsigned int NumElements = Elems.size(); |
6301 | unsigned int Count = 0; |
6302 | for (auto Elem : Elems) { |
6303 | if (!Elem.isUndef()) { |
6304 | if (!Single.getNode()) |
6305 | Single = Elem; |
6306 | else if (Elem != Single) { |
6307 | Single = SDValue(); |
6308 | break; |
6309 | } |
6310 | Count += 1; |
6311 | } |
6312 | } |
6313 | // There are three cases here: |
6314 | // |
6315 | // - if the only defined element is a loaded one, the best sequence |
6316 | // is a replicating load. |
6317 | // |
6318 | // - otherwise, if the only defined element is an i64 value, we will |
6319 | // end up with the same VLVGP sequence regardless of whether we short-cut |
6320 | // for replication or fall through to the later code. |
6321 | // |
6322 | // - otherwise, if the only defined element is an i32 or smaller value, |
6323 | // we would need 2 instructions to replicate it: VLVGP followed by VREPx. |
6324 | // This is only a win if the single defined element is used more than once. |
6325 | // In other cases we're better off using a single VLVGx. |
6326 | if (Single.getNode() && (Count > 1 || isVectorElementLoad(Op: Single))) |
6327 | return DAG.getNode(Opcode: SystemZISD::REPLICATE, DL, VT, Operand: Single); |
6328 | |
6329 | // If all elements are loads, use VLREP/VLEs (below). |
6330 | bool AllLoads = true; |
6331 | for (auto Elem : Elems) |
6332 | if (!isVectorElementLoad(Op: Elem)) { |
6333 | AllLoads = false; |
6334 | break; |
6335 | } |
6336 | |
6337 | // The best way of building a v2i64 from two i64s is to use VLVGP. |
6338 | if (VT == MVT::v2i64 && !AllLoads) |
6339 | return joinDwords(DAG, DL, Op0: Elems[0], Op1: Elems[1]); |
6340 | |
6341 | // Use a 64-bit merge high to combine two doubles. |
6342 | if (VT == MVT::v2f64 && !AllLoads) |
6343 | return buildMergeScalars(DAG, DL, VT, Op0: Elems[0], Op1: Elems[1]); |
6344 | |
6345 | // Build v4f32 values directly from the FPRs: |
6346 | // |
6347 | // <Axxx> <Bxxx> <Cxxxx> <Dxxx> |
6348 | // V V VMRHF |
6349 | // <ABxx> <CDxx> |
6350 | // V VMRHG |
6351 | // <ABCD> |
6352 | if (VT == MVT::v4f32 && !AllLoads) { |
6353 | SDValue Op01 = buildMergeScalars(DAG, DL, VT, Op0: Elems[0], Op1: Elems[1]); |
6354 | SDValue Op23 = buildMergeScalars(DAG, DL, VT, Op0: Elems[2], Op1: Elems[3]); |
6355 | // Avoid unnecessary undefs by reusing the other operand. |
6356 | if (Op01.isUndef()) |
6357 | Op01 = Op23; |
6358 | else if (Op23.isUndef()) |
6359 | Op23 = Op01; |
6360 | // Merging identical replications is a no-op. |
6361 | if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23) |
6362 | return Op01; |
6363 | Op01 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v2i64, Operand: Op01); |
6364 | Op23 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v2i64, Operand: Op23); |
6365 | SDValue Op = DAG.getNode(Opcode: SystemZISD::MERGE_HIGH, |
6366 | DL, VT: MVT::v2i64, N1: Op01, N2: Op23); |
6367 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op); |
6368 | } |
6369 | |
6370 | // Collect the constant terms. |
6371 | SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue()); |
6372 | SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false); |
6373 | |
6374 | unsigned NumConstants = 0; |
6375 | for (unsigned I = 0; I < NumElements; ++I) { |
6376 | SDValue Elem = Elems[I]; |
6377 | if (Elem.getOpcode() == ISD::Constant || |
6378 | Elem.getOpcode() == ISD::ConstantFP) { |
6379 | NumConstants += 1; |
6380 | Constants[I] = Elem; |
6381 | Done[I] = true; |
6382 | } |
6383 | } |
6384 | // If there was at least one constant, fill in the other elements of |
6385 | // Constants with undefs to get a full vector constant and use that |
6386 | // as the starting point. |
6387 | SDValue Result; |
6388 | SDValue ReplicatedVal; |
6389 | if (NumConstants > 0) { |
6390 | for (unsigned I = 0; I < NumElements; ++I) |
6391 | if (!Constants[I].getNode()) |
6392 | Constants[I] = DAG.getUNDEF(VT: Elems[I].getValueType()); |
6393 | Result = DAG.getBuildVector(VT, DL, Ops: Constants); |
6394 | } else { |
6395 | // Otherwise try to use VLREP or VLVGP to start the sequence in order to |
6396 | // avoid a false dependency on any previous contents of the vector |
6397 | // register. |
6398 | |
6399 | // Use a VLREP if at least one element is a load. Make sure to replicate |
6400 | // the load with the most elements having its value. |
6401 | std::map<const SDNode*, unsigned> UseCounts; |
6402 | SDNode *LoadMaxUses = nullptr; |
6403 | for (unsigned I = 0; I < NumElements; ++I) |
6404 | if (isVectorElementLoad(Op: Elems[I])) { |
6405 | SDNode *Ld = Elems[I].getNode(); |
6406 | unsigned Count = ++UseCounts[Ld]; |
6407 | if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < Count) |
6408 | LoadMaxUses = Ld; |
6409 | } |
6410 | if (LoadMaxUses != nullptr) { |
6411 | ReplicatedVal = SDValue(LoadMaxUses, 0); |
6412 | Result = DAG.getNode(Opcode: SystemZISD::REPLICATE, DL, VT, Operand: ReplicatedVal); |
6413 | } else { |
6414 | // Try to use VLVGP. |
6415 | unsigned I1 = NumElements / 2 - 1; |
6416 | unsigned I2 = NumElements - 1; |
6417 | bool Def1 = !Elems[I1].isUndef(); |
6418 | bool Def2 = !Elems[I2].isUndef(); |
6419 | if (Def1 || Def2) { |
6420 | SDValue Elem1 = Elems[Def1 ? I1 : I2]; |
6421 | SDValue Elem2 = Elems[Def2 ? I2 : I1]; |
6422 | Result = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, |
6423 | Operand: joinDwords(DAG, DL, Op0: Elem1, Op1: Elem2)); |
6424 | Done[I1] = true; |
6425 | Done[I2] = true; |
6426 | } else |
6427 | Result = DAG.getUNDEF(VT); |
6428 | } |
6429 | } |
6430 | |
6431 | // Use VLVGx to insert the other elements. |
6432 | for (unsigned I = 0; I < NumElements; ++I) |
6433 | if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal) |
6434 | Result = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT, N1: Result, N2: Elems[I], |
6435 | N3: DAG.getConstant(Val: I, DL, VT: MVT::i32)); |
6436 | return Result; |
6437 | } |
6438 | |
6439 | SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, |
6440 | SelectionDAG &DAG) const { |
6441 | auto *BVN = cast<BuildVectorSDNode>(Val: Op.getNode()); |
6442 | SDLoc DL(Op); |
6443 | EVT VT = Op.getValueType(); |
6444 | |
6445 | if (BVN->isConstant()) { |
6446 | if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget)) |
6447 | return Op; |
6448 | |
6449 | // Fall back to loading it from memory. |
6450 | return SDValue(); |
6451 | } |
6452 | |
6453 | // See if we should use shuffles to construct the vector from other vectors. |
6454 | if (SDValue Res = tryBuildVectorShuffle(DAG, BVN)) |
6455 | return Res; |
6456 | |
6457 | // Detect SCALAR_TO_VECTOR conversions. |
6458 | if (isOperationLegal(Op: ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op)) |
6459 | return buildScalarToVector(DAG, DL, VT, Value: Op.getOperand(i: 0)); |
6460 | |
6461 | // Otherwise use buildVector to build the vector up from GPRs. |
6462 | unsigned NumElements = Op.getNumOperands(); |
6463 | SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements); |
6464 | for (unsigned I = 0; I < NumElements; ++I) |
6465 | Ops[I] = Op.getOperand(i: I); |
6466 | return buildVector(DAG, DL, VT, Elems&: Ops); |
6467 | } |
6468 | |
6469 | SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, |
6470 | SelectionDAG &DAG) const { |
6471 | auto *VSN = cast<ShuffleVectorSDNode>(Val: Op.getNode()); |
6472 | SDLoc DL(Op); |
6473 | EVT VT = Op.getValueType(); |
6474 | unsigned NumElements = VT.getVectorNumElements(); |
6475 | |
6476 | if (VSN->isSplat()) { |
6477 | SDValue Op0 = Op.getOperand(i: 0); |
6478 | unsigned Index = VSN->getSplatIndex(); |
6479 | assert(Index < VT.getVectorNumElements() && |
6480 | "Splat index should be defined and in first operand" ); |
6481 | // See whether the value we're splatting is directly available as a scalar. |
6482 | if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) || |
6483 | Op0.getOpcode() == ISD::BUILD_VECTOR) |
6484 | return DAG.getNode(Opcode: SystemZISD::REPLICATE, DL, VT, Operand: Op0.getOperand(i: Index)); |
6485 | // Otherwise keep it as a vector-to-vector operation. |
6486 | return DAG.getNode(Opcode: SystemZISD::SPLAT, DL, VT, N1: Op.getOperand(i: 0), |
6487 | N2: DAG.getTargetConstant(Val: Index, DL, VT: MVT::i32)); |
6488 | } |
6489 | |
6490 | GeneralShuffle GS(VT); |
6491 | for (unsigned I = 0; I < NumElements; ++I) { |
6492 | int Elt = VSN->getMaskElt(Idx: I); |
6493 | if (Elt < 0) |
6494 | GS.addUndef(); |
6495 | else if (!GS.add(Op: Op.getOperand(i: unsigned(Elt) / NumElements), |
6496 | Elem: unsigned(Elt) % NumElements)) |
6497 | return SDValue(); |
6498 | } |
6499 | return GS.getNode(DAG, DL: SDLoc(VSN)); |
6500 | } |
6501 | |
6502 | SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op, |
6503 | SelectionDAG &DAG) const { |
6504 | SDLoc DL(Op); |
6505 | // Just insert the scalar into element 0 of an undefined vector. |
6506 | return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, |
6507 | VT: Op.getValueType(), N1: DAG.getUNDEF(VT: Op.getValueType()), |
6508 | N2: Op.getOperand(i: 0), N3: DAG.getConstant(Val: 0, DL, VT: MVT::i32)); |
6509 | } |
6510 | |
6511 | SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, |
6512 | SelectionDAG &DAG) const { |
6513 | // Handle insertions of floating-point values. |
6514 | SDLoc DL(Op); |
6515 | SDValue Op0 = Op.getOperand(i: 0); |
6516 | SDValue Op1 = Op.getOperand(i: 1); |
6517 | SDValue Op2 = Op.getOperand(i: 2); |
6518 | EVT VT = Op.getValueType(); |
6519 | |
6520 | // Insertions into constant indices of a v2f64 can be done using VPDI. |
6521 | // However, if the inserted value is a bitcast or a constant then it's |
6522 | // better to use GPRs, as below. |
6523 | if (VT == MVT::v2f64 && |
6524 | Op1.getOpcode() != ISD::BITCAST && |
6525 | Op1.getOpcode() != ISD::ConstantFP && |
6526 | Op2.getOpcode() == ISD::Constant) { |
6527 | uint64_t Index = Op2->getAsZExtVal(); |
6528 | unsigned Mask = VT.getVectorNumElements() - 1; |
6529 | if (Index <= Mask) |
6530 | return Op; |
6531 | } |
6532 | |
6533 | // Otherwise bitcast to the equivalent integer form and insert via a GPR. |
6534 | MVT IntVT = MVT::getIntegerVT(BitWidth: VT.getScalarSizeInBits()); |
6535 | MVT IntVecVT = MVT::getVectorVT(VT: IntVT, NumElements: VT.getVectorNumElements()); |
6536 | SDValue Res = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: IntVecVT, |
6537 | N1: DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVecVT, Operand: Op0), |
6538 | N2: DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Op1), N3: Op2); |
6539 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Res); |
6540 | } |
6541 | |
6542 | SDValue |
6543 | SystemZTargetLowering::(SDValue Op, |
6544 | SelectionDAG &DAG) const { |
6545 | // Handle extractions of floating-point values. |
6546 | SDLoc DL(Op); |
6547 | SDValue Op0 = Op.getOperand(i: 0); |
6548 | SDValue Op1 = Op.getOperand(i: 1); |
6549 | EVT VT = Op.getValueType(); |
6550 | EVT VecVT = Op0.getValueType(); |
6551 | |
6552 | // Extractions of constant indices can be done directly. |
6553 | if (auto *CIndexN = dyn_cast<ConstantSDNode>(Val&: Op1)) { |
6554 | uint64_t Index = CIndexN->getZExtValue(); |
6555 | unsigned Mask = VecVT.getVectorNumElements() - 1; |
6556 | if (Index <= Mask) |
6557 | return Op; |
6558 | } |
6559 | |
6560 | // Otherwise bitcast to the equivalent integer form and extract via a GPR. |
6561 | MVT IntVT = MVT::getIntegerVT(BitWidth: VT.getSizeInBits()); |
6562 | MVT IntVecVT = MVT::getVectorVT(VT: IntVT, NumElements: VecVT.getVectorNumElements()); |
6563 | SDValue Res = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: IntVT, |
6564 | N1: DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVecVT, Operand: Op0), N2: Op1); |
6565 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Res); |
6566 | } |
6567 | |
6568 | SDValue SystemZTargetLowering:: |
6569 | lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const { |
6570 | SDValue PackedOp = Op.getOperand(i: 0); |
6571 | EVT OutVT = Op.getValueType(); |
6572 | EVT InVT = PackedOp.getValueType(); |
6573 | unsigned ToBits = OutVT.getScalarSizeInBits(); |
6574 | unsigned FromBits = InVT.getScalarSizeInBits(); |
6575 | unsigned StartOffset = 0; |
6576 | |
6577 | // If the input is a VECTOR_SHUFFLE, there are a number of important |
6578 | // cases where we can directly implement the sign-extension of the |
6579 | // original input lanes of the shuffle. |
6580 | if (PackedOp.getOpcode() == ISD::VECTOR_SHUFFLE) { |
6581 | ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: PackedOp.getNode()); |
6582 | ArrayRef<int> ShuffleMask = SVN->getMask(); |
6583 | int OutNumElts = OutVT.getVectorNumElements(); |
6584 | |
6585 | // Recognize the special case where the sign-extension can be done |
6586 | // by the VSEG instruction. Handled via the default expander. |
6587 | if (ToBits == 64 && OutNumElts == 2) { |
6588 | int NumElem = ToBits / FromBits; |
6589 | if (ShuffleMask[0] == NumElem - 1 && ShuffleMask[1] == 2 * NumElem - 1) |
6590 | return SDValue(); |
6591 | } |
6592 | |
6593 | // Recognize the special case where we can fold the shuffle by |
6594 | // replacing some of the UNPACK_HIGH with UNPACK_LOW. |
6595 | int StartOffsetCandidate = -1; |
6596 | for (int Elt = 0; Elt < OutNumElts; Elt++) { |
6597 | if (ShuffleMask[Elt] == -1) |
6598 | continue; |
6599 | if (ShuffleMask[Elt] % OutNumElts == Elt) { |
6600 | if (StartOffsetCandidate == -1) |
6601 | StartOffsetCandidate = ShuffleMask[Elt] - Elt; |
6602 | if (StartOffsetCandidate == ShuffleMask[Elt] - Elt) |
6603 | continue; |
6604 | } |
6605 | StartOffsetCandidate = -1; |
6606 | break; |
6607 | } |
6608 | if (StartOffsetCandidate != -1) { |
6609 | StartOffset = StartOffsetCandidate; |
6610 | PackedOp = PackedOp.getOperand(i: 0); |
6611 | } |
6612 | } |
6613 | |
6614 | do { |
6615 | FromBits *= 2; |
6616 | unsigned OutNumElts = SystemZ::VectorBits / FromBits; |
6617 | EVT OutVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: FromBits), NumElements: OutNumElts); |
6618 | unsigned Opcode = SystemZISD::UNPACK_HIGH; |
6619 | if (StartOffset >= OutNumElts) { |
6620 | Opcode = SystemZISD::UNPACK_LOW; |
6621 | StartOffset -= OutNumElts; |
6622 | } |
6623 | PackedOp = DAG.getNode(Opcode, DL: SDLoc(PackedOp), VT: OutVT, Operand: PackedOp); |
6624 | } while (FromBits != ToBits); |
6625 | return PackedOp; |
6626 | } |
6627 | |
6628 | // Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector. |
6629 | SDValue SystemZTargetLowering:: |
6630 | lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const { |
6631 | SDValue PackedOp = Op.getOperand(i: 0); |
6632 | SDLoc DL(Op); |
6633 | EVT OutVT = Op.getValueType(); |
6634 | EVT InVT = PackedOp.getValueType(); |
6635 | unsigned InNumElts = InVT.getVectorNumElements(); |
6636 | unsigned OutNumElts = OutVT.getVectorNumElements(); |
6637 | unsigned NumInPerOut = InNumElts / OutNumElts; |
6638 | |
6639 | SDValue ZeroVec = |
6640 | DAG.getSplatVector(VT: InVT, DL, Op: DAG.getConstant(Val: 0, DL, VT: InVT.getScalarType())); |
6641 | |
6642 | SmallVector<int, 16> Mask(InNumElts); |
6643 | unsigned ZeroVecElt = InNumElts; |
6644 | for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) { |
6645 | unsigned MaskElt = PackedElt * NumInPerOut; |
6646 | unsigned End = MaskElt + NumInPerOut - 1; |
6647 | for (; MaskElt < End; MaskElt++) |
6648 | Mask[MaskElt] = ZeroVecElt++; |
6649 | Mask[MaskElt] = PackedElt; |
6650 | } |
6651 | SDValue Shuf = DAG.getVectorShuffle(VT: InVT, dl: DL, N1: PackedOp, N2: ZeroVec, Mask); |
6652 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: OutVT, Operand: Shuf); |
6653 | } |
6654 | |
6655 | SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG, |
6656 | unsigned ByScalar) const { |
6657 | // Look for cases where a vector shift can use the *_BY_SCALAR form. |
6658 | SDValue Op0 = Op.getOperand(i: 0); |
6659 | SDValue Op1 = Op.getOperand(i: 1); |
6660 | SDLoc DL(Op); |
6661 | EVT VT = Op.getValueType(); |
6662 | unsigned ElemBitSize = VT.getScalarSizeInBits(); |
6663 | |
6664 | // See whether the shift vector is a splat represented as BUILD_VECTOR. |
6665 | if (auto *BVN = dyn_cast<BuildVectorSDNode>(Val&: Op1)) { |
6666 | APInt SplatBits, SplatUndef; |
6667 | unsigned SplatBitSize; |
6668 | bool HasAnyUndefs; |
6669 | // Check for constant splats. Use ElemBitSize as the minimum element |
6670 | // width and reject splats that need wider elements. |
6671 | if (BVN->isConstantSplat(SplatValue&: SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, |
6672 | MinSplatBits: ElemBitSize, isBigEndian: true) && |
6673 | SplatBitSize == ElemBitSize) { |
6674 | SDValue Shift = DAG.getConstant(Val: SplatBits.getZExtValue() & 0xfff, |
6675 | DL, VT: MVT::i32); |
6676 | return DAG.getNode(Opcode: ByScalar, DL, VT, N1: Op0, N2: Shift); |
6677 | } |
6678 | // Check for variable splats. |
6679 | BitVector UndefElements; |
6680 | SDValue Splat = BVN->getSplatValue(UndefElements: &UndefElements); |
6681 | if (Splat) { |
6682 | // Since i32 is the smallest legal type, we either need a no-op |
6683 | // or a truncation. |
6684 | SDValue Shift = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Splat); |
6685 | return DAG.getNode(Opcode: ByScalar, DL, VT, N1: Op0, N2: Shift); |
6686 | } |
6687 | } |
6688 | |
6689 | // See whether the shift vector is a splat represented as SHUFFLE_VECTOR, |
6690 | // and the shift amount is directly available in a GPR. |
6691 | if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Val&: Op1)) { |
6692 | if (VSN->isSplat()) { |
6693 | SDValue VSNOp0 = VSN->getOperand(Num: 0); |
6694 | unsigned Index = VSN->getSplatIndex(); |
6695 | assert(Index < VT.getVectorNumElements() && |
6696 | "Splat index should be defined and in first operand" ); |
6697 | if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) || |
6698 | VSNOp0.getOpcode() == ISD::BUILD_VECTOR) { |
6699 | // Since i32 is the smallest legal type, we either need a no-op |
6700 | // or a truncation. |
6701 | SDValue Shift = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, |
6702 | Operand: VSNOp0.getOperand(i: Index)); |
6703 | return DAG.getNode(Opcode: ByScalar, DL, VT, N1: Op0, N2: Shift); |
6704 | } |
6705 | } |
6706 | } |
6707 | |
6708 | // Otherwise just treat the current form as legal. |
6709 | return Op; |
6710 | } |
6711 | |
6712 | SDValue SystemZTargetLowering::lowerFSHL(SDValue Op, SelectionDAG &DAG) const { |
6713 | SDLoc DL(Op); |
6714 | |
6715 | // i128 FSHL with a constant amount that is a multiple of 8 can be |
6716 | // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2 |
6717 | // facility, FSHL with a constant amount less than 8 can be implemented |
6718 | // via SHL_DOUBLE_BIT, and FSHL with other constant amounts by a |
6719 | // combination of the two. |
6720 | if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2))) { |
6721 | uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127; |
6722 | if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) { |
6723 | SDValue Op0 = DAG.getBitcast(VT: MVT::v16i8, V: Op.getOperand(i: 0)); |
6724 | SDValue Op1 = DAG.getBitcast(VT: MVT::v16i8, V: Op.getOperand(i: 1)); |
6725 | SmallVector<int, 16> Mask(16); |
6726 | for (unsigned Elt = 0; Elt < 16; Elt++) |
6727 | Mask[Elt] = (ShiftAmt >> 3) + Elt; |
6728 | SDValue Shuf1 = DAG.getVectorShuffle(VT: MVT::v16i8, dl: DL, N1: Op0, N2: Op1, Mask); |
6729 | if ((ShiftAmt & 7) == 0) |
6730 | return DAG.getBitcast(VT: MVT::i128, V: Shuf1); |
6731 | SDValue Shuf2 = DAG.getVectorShuffle(VT: MVT::v16i8, dl: DL, N1: Op1, N2: Op1, Mask); |
6732 | SDValue Val = |
6733 | DAG.getNode(Opcode: SystemZISD::SHL_DOUBLE_BIT, DL, VT: MVT::v16i8, N1: Shuf1, N2: Shuf2, |
6734 | N3: DAG.getTargetConstant(Val: ShiftAmt & 7, DL, VT: MVT::i32)); |
6735 | return DAG.getBitcast(VT: MVT::i128, V: Val); |
6736 | } |
6737 | } |
6738 | |
6739 | return SDValue(); |
6740 | } |
6741 | |
6742 | SDValue SystemZTargetLowering::lowerFSHR(SDValue Op, SelectionDAG &DAG) const { |
6743 | SDLoc DL(Op); |
6744 | |
6745 | // i128 FSHR with a constant amount that is a multiple of 8 can be |
6746 | // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2 |
6747 | // facility, FSHR with a constant amount less than 8 can be implemented |
6748 | // via SHL_DOUBLE_BIT, and FSHR with other constant amounts by a |
6749 | // combination of the two. |
6750 | if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2))) { |
6751 | uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127; |
6752 | if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) { |
6753 | SDValue Op0 = DAG.getBitcast(VT: MVT::v16i8, V: Op.getOperand(i: 0)); |
6754 | SDValue Op1 = DAG.getBitcast(VT: MVT::v16i8, V: Op.getOperand(i: 1)); |
6755 | SmallVector<int, 16> Mask(16); |
6756 | for (unsigned Elt = 0; Elt < 16; Elt++) |
6757 | Mask[Elt] = 16 - (ShiftAmt >> 3) + Elt; |
6758 | SDValue Shuf1 = DAG.getVectorShuffle(VT: MVT::v16i8, dl: DL, N1: Op0, N2: Op1, Mask); |
6759 | if ((ShiftAmt & 7) == 0) |
6760 | return DAG.getBitcast(VT: MVT::i128, V: Shuf1); |
6761 | SDValue Shuf2 = DAG.getVectorShuffle(VT: MVT::v16i8, dl: DL, N1: Op0, N2: Op0, Mask); |
6762 | SDValue Val = |
6763 | DAG.getNode(Opcode: SystemZISD::SHR_DOUBLE_BIT, DL, VT: MVT::v16i8, N1: Shuf2, N2: Shuf1, |
6764 | N3: DAG.getTargetConstant(Val: ShiftAmt & 7, DL, VT: MVT::i32)); |
6765 | return DAG.getBitcast(VT: MVT::i128, V: Val); |
6766 | } |
6767 | } |
6768 | |
6769 | return SDValue(); |
6770 | } |
6771 | |
6772 | static SDValue lowerAddrSpaceCast(SDValue Op, SelectionDAG &DAG) { |
6773 | SDLoc dl(Op); |
6774 | SDValue Src = Op.getOperand(i: 0); |
6775 | MVT DstVT = Op.getSimpleValueType(); |
6776 | |
6777 | AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Val: Op.getNode()); |
6778 | unsigned SrcAS = N->getSrcAddressSpace(); |
6779 | |
6780 | assert(SrcAS != N->getDestAddressSpace() && |
6781 | "addrspacecast must be between different address spaces" ); |
6782 | |
6783 | // addrspacecast [0 <- 1] : Assinging a ptr32 value to a 64-bit pointer. |
6784 | // addrspacecast [1 <- 0] : Assigining a 64-bit pointer to a ptr32 value. |
6785 | if (SrcAS == SYSTEMZAS::PTR32 && DstVT == MVT::i64) { |
6786 | Op = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: Src, |
6787 | N2: DAG.getConstant(Val: 0x7fffffff, DL: dl, VT: MVT::i32)); |
6788 | Op = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: DstVT, Operand: Op); |
6789 | } else if (DstVT == MVT::i32) { |
6790 | Op = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: DstVT, Operand: Src); |
6791 | Op = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: Op, |
6792 | N2: DAG.getConstant(Val: 0x7fffffff, DL: dl, VT: MVT::i32)); |
6793 | Op = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: DstVT, Operand: Op); |
6794 | } else { |
6795 | report_fatal_error(reason: "Bad address space in addrspacecast" ); |
6796 | } |
6797 | return Op; |
6798 | } |
6799 | |
6800 | SDValue SystemZTargetLowering::lowerFP_EXTEND(SDValue Op, |
6801 | SelectionDAG &DAG) const { |
6802 | SDValue In = Op.getOperand(i: Op->isStrictFPOpcode() ? 1 : 0); |
6803 | if (In.getSimpleValueType() != MVT::f16) |
6804 | return Op; // Legal |
6805 | return SDValue(); // Let legalizer emit the libcall. |
6806 | } |
6807 | |
6808 | SDValue SystemZTargetLowering::useLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, |
6809 | MVT VT, SDValue Arg, SDLoc DL, |
6810 | SDValue Chain, bool IsStrict) const { |
6811 | assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected request for libcall!" ); |
6812 | MakeLibCallOptions CallOptions; |
6813 | SDValue Result; |
6814 | std::tie(args&: Result, args&: Chain) = |
6815 | makeLibCall(DAG, LC, RetVT: VT, Ops: Arg, CallOptions, dl: DL, Chain); |
6816 | return IsStrict ? DAG.getMergeValues(Ops: {Result, Chain}, dl: DL) : Result; |
6817 | } |
6818 | |
6819 | SDValue SystemZTargetLowering::lower_FP_TO_INT(SDValue Op, |
6820 | SelectionDAG &DAG) const { |
6821 | bool IsSigned = (Op->getOpcode() == ISD::FP_TO_SINT || |
6822 | Op->getOpcode() == ISD::STRICT_FP_TO_SINT); |
6823 | bool IsStrict = Op->isStrictFPOpcode(); |
6824 | SDLoc DL(Op); |
6825 | MVT VT = Op.getSimpleValueType(); |
6826 | SDValue InOp = Op.getOperand(i: IsStrict ? 1 : 0); |
6827 | SDValue Chain = IsStrict ? Op.getOperand(i: 0) : DAG.getEntryNode(); |
6828 | EVT InVT = InOp.getValueType(); |
6829 | |
6830 | // FP to unsigned is not directly supported on z10. Promoting an i32 |
6831 | // result to (signed) i64 doesn't generate an inexact condition (fp |
6832 | // exception) for values that are outside the i32 range but in the i64 |
6833 | // range, so use the default expansion. |
6834 | if (!Subtarget.hasFPExtension() && !IsSigned) |
6835 | // Expand i32/i64. F16 values will be recognized to fit and extended. |
6836 | return SDValue(); |
6837 | |
6838 | // Conversion from f16 is done via f32. |
6839 | if (InOp.getSimpleValueType() == MVT::f16) { |
6840 | SmallVector<SDValue, 2> Results; |
6841 | LowerOperationWrapper(N: Op.getNode(), Results, DAG); |
6842 | return DAG.getMergeValues(Ops: Results, dl: DL); |
6843 | } |
6844 | |
6845 | if (VT == MVT::i128) { |
6846 | RTLIB::Libcall LC = |
6847 | IsSigned ? RTLIB::getFPTOSINT(OpVT: InVT, RetVT: VT) : RTLIB::getFPTOUINT(OpVT: InVT, RetVT: VT); |
6848 | return useLibCall(DAG, LC, VT, Arg: InOp, DL, Chain, IsStrict); |
6849 | } |
6850 | |
6851 | return Op; // Legal |
6852 | } |
6853 | |
6854 | SDValue SystemZTargetLowering::lower_INT_TO_FP(SDValue Op, |
6855 | SelectionDAG &DAG) const { |
6856 | bool IsSigned = (Op->getOpcode() == ISD::SINT_TO_FP || |
6857 | Op->getOpcode() == ISD::STRICT_SINT_TO_FP); |
6858 | bool IsStrict = Op->isStrictFPOpcode(); |
6859 | SDLoc DL(Op); |
6860 | MVT VT = Op.getSimpleValueType(); |
6861 | SDValue InOp = Op.getOperand(i: IsStrict ? 1 : 0); |
6862 | SDValue Chain = IsStrict ? Op.getOperand(i: 0) : DAG.getEntryNode(); |
6863 | EVT InVT = InOp.getValueType(); |
6864 | |
6865 | // Conversion to f16 is done via f32. |
6866 | if (VT == MVT::f16) { |
6867 | SmallVector<SDValue, 2> Results; |
6868 | LowerOperationWrapper(N: Op.getNode(), Results, DAG); |
6869 | return DAG.getMergeValues(Ops: Results, dl: DL); |
6870 | } |
6871 | |
6872 | // Unsigned to fp is not directly supported on z10. |
6873 | if (!Subtarget.hasFPExtension() && !IsSigned) |
6874 | return SDValue(); // Expand i64. |
6875 | |
6876 | if (InVT == MVT::i128) { |
6877 | RTLIB::Libcall LC = |
6878 | IsSigned ? RTLIB::getSINTTOFP(OpVT: InVT, RetVT: VT) : RTLIB::getUINTTOFP(OpVT: InVT, RetVT: VT); |
6879 | return useLibCall(DAG, LC, VT, Arg: InOp, DL, Chain, IsStrict); |
6880 | } |
6881 | |
6882 | return Op; // Legal |
6883 | } |
6884 | |
6885 | // Shift the lower 2 bytes of Op to the left in order to insert into the |
6886 | // upper 2 bytes of the FP register. |
6887 | static SDValue convertToF16(SDValue Op, SelectionDAG &DAG) { |
6888 | assert(Op.getSimpleValueType() == MVT::i64 && |
6889 | "Expexted to convert i64 to f16." ); |
6890 | SDLoc DL(Op); |
6891 | SDValue Shft = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, N1: Op, |
6892 | N2: DAG.getConstant(Val: 48, DL, VT: MVT::i64)); |
6893 | SDValue BCast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::f64, Operand: Shft); |
6894 | SDValue F16Val = |
6895 | DAG.getTargetExtractSubreg(SRIdx: SystemZ::subreg_h16, DL, VT: MVT::f16, Operand: BCast); |
6896 | return F16Val; |
6897 | } |
6898 | |
6899 | // Extract Op into GPR and shift the 2 f16 bytes to the right. |
6900 | static SDValue convertFromF16(SDValue Op, SDLoc DL, SelectionDAG &DAG) { |
6901 | assert(Op.getSimpleValueType() == MVT::f16 && |
6902 | "Expected to convert f16 to i64." ); |
6903 | SDNode *U32 = DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::f64); |
6904 | SDValue In64 = DAG.getTargetInsertSubreg(SRIdx: SystemZ::subreg_h16, DL, VT: MVT::f64, |
6905 | Operand: SDValue(U32, 0), Subreg: Op); |
6906 | SDValue BCast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i64, Operand: In64); |
6907 | SDValue Shft = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: BCast, |
6908 | N2: DAG.getConstant(Val: 48, DL, VT: MVT::i32)); |
6909 | return Shft; |
6910 | } |
6911 | |
6912 | // Lower an f16 LOAD in case of no vector support. |
6913 | SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op, |
6914 | SelectionDAG &DAG) const { |
6915 | EVT RegVT = Op.getValueType(); |
6916 | assert(RegVT == MVT::f16 && "Expected to lower an f16 load." ); |
6917 | (void)RegVT; |
6918 | |
6919 | // Load as integer. |
6920 | SDLoc DL(Op); |
6921 | SDValue NewLd; |
6922 | if (auto *AtomicLd = dyn_cast<AtomicSDNode>(Val: Op.getNode())) { |
6923 | assert(EVT(RegVT) == AtomicLd->getMemoryVT() && "Unhandled f16 load" ); |
6924 | NewLd = DAG.getAtomicLoad(ExtType: ISD::EXTLOAD, dl: DL, MemVT: MVT::i16, VT: MVT::i64, |
6925 | Chain: AtomicLd->getChain(), Ptr: AtomicLd->getBasePtr(), |
6926 | MMO: AtomicLd->getMemOperand()); |
6927 | } else { |
6928 | LoadSDNode *Ld = cast<LoadSDNode>(Val: Op.getNode()); |
6929 | assert(EVT(RegVT) == Ld->getMemoryVT() && "Unhandled f16 load" ); |
6930 | NewLd = DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl: DL, VT: MVT::i64, Chain: Ld->getChain(), |
6931 | Ptr: Ld->getBasePtr(), PtrInfo: Ld->getPointerInfo(), MemVT: MVT::i16, |
6932 | Alignment: Ld->getBaseAlign(), MMOFlags: Ld->getMemOperand()->getFlags()); |
6933 | } |
6934 | SDValue F16Val = convertToF16(Op: NewLd, DAG); |
6935 | return DAG.getMergeValues(Ops: {F16Val, NewLd.getValue(R: 1)}, dl: DL); |
6936 | } |
6937 | |
6938 | // Lower an f16 STORE in case of no vector support. |
6939 | SDValue SystemZTargetLowering::lowerStoreF16(SDValue Op, |
6940 | SelectionDAG &DAG) const { |
6941 | SDLoc DL(Op); |
6942 | SDValue Shft = convertFromF16(Op: Op->getOperand(Num: 1), DL, DAG); |
6943 | |
6944 | if (auto *AtomicSt = dyn_cast<AtomicSDNode>(Val: Op.getNode())) |
6945 | return DAG.getAtomic(Opcode: ISD::ATOMIC_STORE, dl: DL, MemVT: MVT::i16, Chain: AtomicSt->getChain(), |
6946 | Ptr: Shft, Val: AtomicSt->getBasePtr(), |
6947 | MMO: AtomicSt->getMemOperand()); |
6948 | |
6949 | StoreSDNode *St = cast<StoreSDNode>(Val: Op.getNode()); |
6950 | return DAG.getTruncStore(Chain: St->getChain(), dl: DL, Val: Shft, Ptr: St->getBasePtr(), SVT: MVT::i16, |
6951 | MMO: St->getMemOperand()); |
6952 | } |
6953 | |
6954 | SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op, |
6955 | SelectionDAG &DAG) const { |
6956 | SDLoc DL(Op); |
6957 | MVT ResultVT = Op.getSimpleValueType(); |
6958 | SDValue Arg = Op.getOperand(i: 0); |
6959 | unsigned Check = Op.getConstantOperandVal(i: 1); |
6960 | |
6961 | unsigned TDCMask = 0; |
6962 | if (Check & fcSNan) |
6963 | TDCMask |= SystemZ::TDCMASK_SNAN_PLUS | SystemZ::TDCMASK_SNAN_MINUS; |
6964 | if (Check & fcQNan) |
6965 | TDCMask |= SystemZ::TDCMASK_QNAN_PLUS | SystemZ::TDCMASK_QNAN_MINUS; |
6966 | if (Check & fcPosInf) |
6967 | TDCMask |= SystemZ::TDCMASK_INFINITY_PLUS; |
6968 | if (Check & fcNegInf) |
6969 | TDCMask |= SystemZ::TDCMASK_INFINITY_MINUS; |
6970 | if (Check & fcPosNormal) |
6971 | TDCMask |= SystemZ::TDCMASK_NORMAL_PLUS; |
6972 | if (Check & fcNegNormal) |
6973 | TDCMask |= SystemZ::TDCMASK_NORMAL_MINUS; |
6974 | if (Check & fcPosSubnormal) |
6975 | TDCMask |= SystemZ::TDCMASK_SUBNORMAL_PLUS; |
6976 | if (Check & fcNegSubnormal) |
6977 | TDCMask |= SystemZ::TDCMASK_SUBNORMAL_MINUS; |
6978 | if (Check & fcPosZero) |
6979 | TDCMask |= SystemZ::TDCMASK_ZERO_PLUS; |
6980 | if (Check & fcNegZero) |
6981 | TDCMask |= SystemZ::TDCMASK_ZERO_MINUS; |
6982 | SDValue TDCMaskV = DAG.getConstant(Val: TDCMask, DL, VT: MVT::i64); |
6983 | |
6984 | if (Arg.getSimpleValueType() == MVT::f16) |
6985 | Arg = DAG.getFPExtendOrRound(Op: Arg, DL: SDLoc(Arg), VT: MVT::f32); |
6986 | SDValue Intr = DAG.getNode(Opcode: SystemZISD::TDC, DL, VT: ResultVT, N1: Arg, N2: TDCMaskV); |
6987 | return getCCResult(DAG, CCReg: Intr); |
6988 | } |
6989 | |
6990 | SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op, |
6991 | SelectionDAG &DAG) const { |
6992 | SDLoc DL(Op); |
6993 | SDValue Chain = Op.getOperand(i: 0); |
6994 | |
6995 | // STCKF only supports a memory operand, so we have to use a temporary. |
6996 | SDValue StackPtr = DAG.CreateStackTemporary(VT: MVT::i64); |
6997 | int SPFI = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex(); |
6998 | MachinePointerInfo MPI = |
6999 | MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: SPFI); |
7000 | |
7001 | // Use STCFK to store the TOD clock into the temporary. |
7002 | SDValue StoreOps[] = {Chain, StackPtr}; |
7003 | Chain = DAG.getMemIntrinsicNode( |
7004 | Opcode: SystemZISD::STCKF, dl: DL, VTList: DAG.getVTList(VT: MVT::Other), Ops: StoreOps, MemVT: MVT::i64, |
7005 | PtrInfo: MPI, Alignment: MaybeAlign(), Flags: MachineMemOperand::MOStore); |
7006 | |
7007 | // And read it back from there. |
7008 | return DAG.getLoad(VT: MVT::i64, dl: DL, Chain, Ptr: StackPtr, PtrInfo: MPI); |
7009 | } |
7010 | |
7011 | SDValue SystemZTargetLowering::LowerOperation(SDValue Op, |
7012 | SelectionDAG &DAG) const { |
7013 | switch (Op.getOpcode()) { |
7014 | case ISD::FRAMEADDR: |
7015 | return lowerFRAMEADDR(Op, DAG); |
7016 | case ISD::RETURNADDR: |
7017 | return lowerRETURNADDR(Op, DAG); |
7018 | case ISD::BR_CC: |
7019 | return lowerBR_CC(Op, DAG); |
7020 | case ISD::SELECT_CC: |
7021 | return lowerSELECT_CC(Op, DAG); |
7022 | case ISD::SETCC: |
7023 | return lowerSETCC(Op, DAG); |
7024 | case ISD::STRICT_FSETCC: |
7025 | return lowerSTRICT_FSETCC(Op, DAG, IsSignaling: false); |
7026 | case ISD::STRICT_FSETCCS: |
7027 | return lowerSTRICT_FSETCC(Op, DAG, IsSignaling: true); |
7028 | case ISD::GlobalAddress: |
7029 | return lowerGlobalAddress(Node: cast<GlobalAddressSDNode>(Val&: Op), DAG); |
7030 | case ISD::GlobalTLSAddress: |
7031 | return lowerGlobalTLSAddress(Node: cast<GlobalAddressSDNode>(Val&: Op), DAG); |
7032 | case ISD::BlockAddress: |
7033 | return lowerBlockAddress(Node: cast<BlockAddressSDNode>(Val&: Op), DAG); |
7034 | case ISD::JumpTable: |
7035 | return lowerJumpTable(JT: cast<JumpTableSDNode>(Val&: Op), DAG); |
7036 | case ISD::ConstantPool: |
7037 | return lowerConstantPool(CP: cast<ConstantPoolSDNode>(Val&: Op), DAG); |
7038 | case ISD::BITCAST: |
7039 | return lowerBITCAST(Op, DAG); |
7040 | case ISD::VASTART: |
7041 | return lowerVASTART(Op, DAG); |
7042 | case ISD::VACOPY: |
7043 | return lowerVACOPY(Op, DAG); |
7044 | case ISD::DYNAMIC_STACKALLOC: |
7045 | return lowerDYNAMIC_STACKALLOC(Op, DAG); |
7046 | case ISD::GET_DYNAMIC_AREA_OFFSET: |
7047 | return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); |
7048 | case ISD::MULHS: |
7049 | return lowerMULH(Op, DAG, Opcode: SystemZISD::SMUL_LOHI); |
7050 | case ISD::MULHU: |
7051 | return lowerMULH(Op, DAG, Opcode: SystemZISD::UMUL_LOHI); |
7052 | case ISD::SMUL_LOHI: |
7053 | return lowerSMUL_LOHI(Op, DAG); |
7054 | case ISD::UMUL_LOHI: |
7055 | return lowerUMUL_LOHI(Op, DAG); |
7056 | case ISD::SDIVREM: |
7057 | return lowerSDIVREM(Op, DAG); |
7058 | case ISD::UDIVREM: |
7059 | return lowerUDIVREM(Op, DAG); |
7060 | case ISD::SADDO: |
7061 | case ISD::SSUBO: |
7062 | case ISD::UADDO: |
7063 | case ISD::USUBO: |
7064 | return lowerXALUO(Op, DAG); |
7065 | case ISD::UADDO_CARRY: |
7066 | case ISD::USUBO_CARRY: |
7067 | return lowerUADDSUBO_CARRY(Op, DAG); |
7068 | case ISD::OR: |
7069 | return lowerOR(Op, DAG); |
7070 | case ISD::CTPOP: |
7071 | return lowerCTPOP(Op, DAG); |
7072 | case ISD::VECREDUCE_ADD: |
7073 | return lowerVECREDUCE_ADD(Op, DAG); |
7074 | case ISD::ATOMIC_FENCE: |
7075 | return lowerATOMIC_FENCE(Op, DAG); |
7076 | case ISD::ATOMIC_SWAP: |
7077 | return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_SWAPW); |
7078 | case ISD::ATOMIC_STORE: |
7079 | return lowerATOMIC_STORE(Op, DAG); |
7080 | case ISD::ATOMIC_LOAD: |
7081 | return lowerATOMIC_LOAD(Op, DAG); |
7082 | case ISD::ATOMIC_LOAD_ADD: |
7083 | return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_ADD); |
7084 | case ISD::ATOMIC_LOAD_SUB: |
7085 | return lowerATOMIC_LOAD_SUB(Op, DAG); |
7086 | case ISD::ATOMIC_LOAD_AND: |
7087 | return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_AND); |
7088 | case ISD::ATOMIC_LOAD_OR: |
7089 | return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_OR); |
7090 | case ISD::ATOMIC_LOAD_XOR: |
7091 | return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_XOR); |
7092 | case ISD::ATOMIC_LOAD_NAND: |
7093 | return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_NAND); |
7094 | case ISD::ATOMIC_LOAD_MIN: |
7095 | return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_MIN); |
7096 | case ISD::ATOMIC_LOAD_MAX: |
7097 | return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_MAX); |
7098 | case ISD::ATOMIC_LOAD_UMIN: |
7099 | return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_UMIN); |
7100 | case ISD::ATOMIC_LOAD_UMAX: |
7101 | return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_UMAX); |
7102 | case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: |
7103 | return lowerATOMIC_CMP_SWAP(Op, DAG); |
7104 | case ISD::STACKSAVE: |
7105 | return lowerSTACKSAVE(Op, DAG); |
7106 | case ISD::STACKRESTORE: |
7107 | return lowerSTACKRESTORE(Op, DAG); |
7108 | case ISD::PREFETCH: |
7109 | return lowerPREFETCH(Op, DAG); |
7110 | case ISD::INTRINSIC_W_CHAIN: |
7111 | return lowerINTRINSIC_W_CHAIN(Op, DAG); |
7112 | case ISD::INTRINSIC_WO_CHAIN: |
7113 | return lowerINTRINSIC_WO_CHAIN(Op, DAG); |
7114 | case ISD::BUILD_VECTOR: |
7115 | return lowerBUILD_VECTOR(Op, DAG); |
7116 | case ISD::VECTOR_SHUFFLE: |
7117 | return lowerVECTOR_SHUFFLE(Op, DAG); |
7118 | case ISD::SCALAR_TO_VECTOR: |
7119 | return lowerSCALAR_TO_VECTOR(Op, DAG); |
7120 | case ISD::INSERT_VECTOR_ELT: |
7121 | return lowerINSERT_VECTOR_ELT(Op, DAG); |
7122 | case ISD::EXTRACT_VECTOR_ELT: |
7123 | return lowerEXTRACT_VECTOR_ELT(Op, DAG); |
7124 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
7125 | return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG); |
7126 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
7127 | return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG); |
7128 | case ISD::SHL: |
7129 | return lowerShift(Op, DAG, ByScalar: SystemZISD::VSHL_BY_SCALAR); |
7130 | case ISD::SRL: |
7131 | return lowerShift(Op, DAG, ByScalar: SystemZISD::VSRL_BY_SCALAR); |
7132 | case ISD::SRA: |
7133 | return lowerShift(Op, DAG, ByScalar: SystemZISD::VSRA_BY_SCALAR); |
7134 | case ISD::ADDRSPACECAST: |
7135 | return lowerAddrSpaceCast(Op, DAG); |
7136 | case ISD::ROTL: |
7137 | return lowerShift(Op, DAG, ByScalar: SystemZISD::VROTL_BY_SCALAR); |
7138 | case ISD::FSHL: |
7139 | return lowerFSHL(Op, DAG); |
7140 | case ISD::FSHR: |
7141 | return lowerFSHR(Op, DAG); |
7142 | case ISD::FP_EXTEND: |
7143 | case ISD::STRICT_FP_EXTEND: |
7144 | return lowerFP_EXTEND(Op, DAG); |
7145 | case ISD::FP_TO_UINT: |
7146 | case ISD::FP_TO_SINT: |
7147 | case ISD::STRICT_FP_TO_UINT: |
7148 | case ISD::STRICT_FP_TO_SINT: |
7149 | return lower_FP_TO_INT(Op, DAG); |
7150 | case ISD::UINT_TO_FP: |
7151 | case ISD::SINT_TO_FP: |
7152 | case ISD::STRICT_UINT_TO_FP: |
7153 | case ISD::STRICT_SINT_TO_FP: |
7154 | return lower_INT_TO_FP(Op, DAG); |
7155 | case ISD::LOAD: |
7156 | return lowerLoadF16(Op, DAG); |
7157 | case ISD::STORE: |
7158 | return lowerStoreF16(Op, DAG); |
7159 | case ISD::IS_FPCLASS: |
7160 | return lowerIS_FPCLASS(Op, DAG); |
7161 | case ISD::GET_ROUNDING: |
7162 | return lowerGET_ROUNDING(Op, DAG); |
7163 | case ISD::READCYCLECOUNTER: |
7164 | return lowerREADCYCLECOUNTER(Op, DAG); |
7165 | case ISD::EH_SJLJ_SETJMP: |
7166 | case ISD::EH_SJLJ_LONGJMP: |
7167 | // These operations are legal on our platform, but we cannot actually |
7168 | // set the operation action to Legal as common code would treat this |
7169 | // as equivalent to Expand. Instead, we keep the operation action to |
7170 | // Custom and just leave them unchanged here. |
7171 | return Op; |
7172 | |
7173 | default: |
7174 | llvm_unreachable("Unexpected node to lower" ); |
7175 | } |
7176 | } |
7177 | |
7178 | static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, |
7179 | const SDLoc &SL) { |
7180 | // If i128 is legal, just use a normal bitcast. |
7181 | if (DAG.getTargetLoweringInfo().isTypeLegal(VT: MVT::i128)) |
7182 | return DAG.getBitcast(VT: MVT::f128, V: Src); |
7183 | |
7184 | // Otherwise, f128 must live in FP128, so do a partwise move. |
7185 | assert(DAG.getTargetLoweringInfo().getRepRegClassFor(MVT::f128) == |
7186 | &SystemZ::FP128BitRegClass); |
7187 | |
7188 | SDValue Hi, Lo; |
7189 | std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: Src, DL: SL, LoVT: MVT::i64, HiVT: MVT::i64); |
7190 | |
7191 | Hi = DAG.getBitcast(VT: MVT::f64, V: Hi); |
7192 | Lo = DAG.getBitcast(VT: MVT::f64, V: Lo); |
7193 | |
7194 | SDNode *Pair = DAG.getMachineNode( |
7195 | Opcode: SystemZ::REG_SEQUENCE, dl: SL, VT: MVT::f128, |
7196 | Ops: {DAG.getTargetConstant(Val: SystemZ::FP128BitRegClassID, DL: SL, VT: MVT::i32), Lo, |
7197 | DAG.getTargetConstant(Val: SystemZ::subreg_l64, DL: SL, VT: MVT::i32), Hi, |
7198 | DAG.getTargetConstant(Val: SystemZ::subreg_h64, DL: SL, VT: MVT::i32)}); |
7199 | return SDValue(Pair, 0); |
7200 | } |
7201 | |
7202 | static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, |
7203 | const SDLoc &SL) { |
7204 | // If i128 is legal, just use a normal bitcast. |
7205 | if (DAG.getTargetLoweringInfo().isTypeLegal(VT: MVT::i128)) |
7206 | return DAG.getBitcast(VT: MVT::i128, V: Src); |
7207 | |
7208 | // Otherwise, f128 must live in FP128, so do a partwise move. |
7209 | assert(DAG.getTargetLoweringInfo().getRepRegClassFor(MVT::f128) == |
7210 | &SystemZ::FP128BitRegClass); |
7211 | |
7212 | SDValue LoFP = |
7213 | DAG.getTargetExtractSubreg(SRIdx: SystemZ::subreg_l64, DL: SL, VT: MVT::f64, Operand: Src); |
7214 | SDValue HiFP = |
7215 | DAG.getTargetExtractSubreg(SRIdx: SystemZ::subreg_h64, DL: SL, VT: MVT::f64, Operand: Src); |
7216 | SDValue Lo = DAG.getNode(Opcode: ISD::BITCAST, DL: SL, VT: MVT::i64, Operand: LoFP); |
7217 | SDValue Hi = DAG.getNode(Opcode: ISD::BITCAST, DL: SL, VT: MVT::i64, Operand: HiFP); |
7218 | |
7219 | return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: SL, VT: MVT::i128, N1: Lo, N2: Hi); |
7220 | } |
7221 | |
7222 | // Lower operations with invalid operand or result types. |
7223 | void |
7224 | SystemZTargetLowering::LowerOperationWrapper(SDNode *N, |
7225 | SmallVectorImpl<SDValue> &Results, |
7226 | SelectionDAG &DAG) const { |
7227 | switch (N->getOpcode()) { |
7228 | case ISD::ATOMIC_LOAD: { |
7229 | SDLoc DL(N); |
7230 | SDVTList Tys = DAG.getVTList(VT1: MVT::Untyped, VT2: MVT::Other); |
7231 | SDValue Ops[] = { N->getOperand(Num: 0), N->getOperand(Num: 1) }; |
7232 | MachineMemOperand *MMO = cast<AtomicSDNode>(Val: N)->getMemOperand(); |
7233 | SDValue Res = DAG.getMemIntrinsicNode(Opcode: SystemZISD::ATOMIC_LOAD_128, |
7234 | dl: DL, VTList: Tys, Ops, MemVT: MVT::i128, MMO); |
7235 | |
7236 | SDValue Lowered = lowerGR128ToI128(DAG, In: Res); |
7237 | if (N->getValueType(ResNo: 0) == MVT::f128) |
7238 | Lowered = expandBitCastI128ToF128(DAG, Src: Lowered, SL: DL); |
7239 | Results.push_back(Elt: Lowered); |
7240 | Results.push_back(Elt: Res.getValue(R: 1)); |
7241 | break; |
7242 | } |
7243 | case ISD::ATOMIC_STORE: { |
7244 | SDLoc DL(N); |
7245 | SDVTList Tys = DAG.getVTList(VT: MVT::Other); |
7246 | SDValue Val = N->getOperand(Num: 1); |
7247 | if (Val.getValueType() == MVT::f128) |
7248 | Val = expandBitCastF128ToI128(DAG, Src: Val, SL: DL); |
7249 | Val = lowerI128ToGR128(DAG, In: Val); |
7250 | |
7251 | SDValue Ops[] = {N->getOperand(Num: 0), Val, N->getOperand(Num: 2)}; |
7252 | MachineMemOperand *MMO = cast<AtomicSDNode>(Val: N)->getMemOperand(); |
7253 | SDValue Res = DAG.getMemIntrinsicNode(Opcode: SystemZISD::ATOMIC_STORE_128, |
7254 | dl: DL, VTList: Tys, Ops, MemVT: MVT::i128, MMO); |
7255 | // We have to enforce sequential consistency by performing a |
7256 | // serialization operation after the store. |
7257 | if (cast<AtomicSDNode>(Val: N)->getSuccessOrdering() == |
7258 | AtomicOrdering::SequentiallyConsistent) |
7259 | Res = SDValue(DAG.getMachineNode(Opcode: SystemZ::Serialize, dl: DL, |
7260 | VT: MVT::Other, Op1: Res), 0); |
7261 | Results.push_back(Elt: Res); |
7262 | break; |
7263 | } |
7264 | case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { |
7265 | SDLoc DL(N); |
7266 | SDVTList Tys = DAG.getVTList(VT1: MVT::Untyped, VT2: MVT::i32, VT3: MVT::Other); |
7267 | SDValue Ops[] = { N->getOperand(Num: 0), N->getOperand(Num: 1), |
7268 | lowerI128ToGR128(DAG, In: N->getOperand(Num: 2)), |
7269 | lowerI128ToGR128(DAG, In: N->getOperand(Num: 3)) }; |
7270 | MachineMemOperand *MMO = cast<AtomicSDNode>(Val: N)->getMemOperand(); |
7271 | SDValue Res = DAG.getMemIntrinsicNode(Opcode: SystemZISD::ATOMIC_CMP_SWAP_128, |
7272 | dl: DL, VTList: Tys, Ops, MemVT: MVT::i128, MMO); |
7273 | SDValue Success = emitSETCC(DAG, DL, CCReg: Res.getValue(R: 1), |
7274 | CCValid: SystemZ::CCMASK_CS, CCMask: SystemZ::CCMASK_CS_EQ); |
7275 | Success = DAG.getZExtOrTrunc(Op: Success, DL, VT: N->getValueType(ResNo: 1)); |
7276 | Results.push_back(Elt: lowerGR128ToI128(DAG, In: Res)); |
7277 | Results.push_back(Elt: Success); |
7278 | Results.push_back(Elt: Res.getValue(R: 2)); |
7279 | break; |
7280 | } |
7281 | case ISD::BITCAST: { |
7282 | if (useSoftFloat()) |
7283 | return; |
7284 | SDLoc DL(N); |
7285 | SDValue Src = N->getOperand(Num: 0); |
7286 | EVT SrcVT = Src.getValueType(); |
7287 | EVT ResVT = N->getValueType(ResNo: 0); |
7288 | if (ResVT == MVT::i128 && SrcVT == MVT::f128) |
7289 | Results.push_back(Elt: expandBitCastF128ToI128(DAG, Src, SL: DL)); |
7290 | else if (SrcVT == MVT::i16 && ResVT == MVT::f16) { |
7291 | if (Subtarget.hasVector()) { |
7292 | SDValue In32 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i32, Operand: Src); |
7293 | Results.push_back(Elt: SDValue( |
7294 | DAG.getMachineNode(Opcode: SystemZ::LEFR_16, dl: DL, VT: MVT::f16, Op1: In32), 0)); |
7295 | } else { |
7296 | SDValue In64 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Src); |
7297 | Results.push_back(Elt: convertToF16(Op: In64, DAG)); |
7298 | } |
7299 | } else if (SrcVT == MVT::f16 && ResVT == MVT::i16) { |
7300 | SDValue = |
7301 | Subtarget.hasVector() |
7302 | ? SDValue(DAG.getMachineNode(Opcode: SystemZ::LFER_16, dl: DL, VT: MVT::i32, Op1: Src), |
7303 | 0) |
7304 | : convertFromF16(Op: Src, DL, DAG); |
7305 | Results.push_back(Elt: DAG.getZExtOrTrunc(Op: ExtractedVal, DL, VT: ResVT)); |
7306 | } |
7307 | break; |
7308 | } |
7309 | case ISD::UINT_TO_FP: |
7310 | case ISD::SINT_TO_FP: |
7311 | case ISD::STRICT_UINT_TO_FP: |
7312 | case ISD::STRICT_SINT_TO_FP: { |
7313 | if (useSoftFloat()) |
7314 | return; |
7315 | bool IsStrict = N->isStrictFPOpcode(); |
7316 | SDLoc DL(N); |
7317 | SDValue InOp = N->getOperand(Num: IsStrict ? 1 : 0); |
7318 | EVT ResVT = N->getValueType(ResNo: 0); |
7319 | SDValue Chain = IsStrict ? N->getOperand(Num: 0) : DAG.getEntryNode(); |
7320 | if (ResVT == MVT::f16) { |
7321 | if (!IsStrict) { |
7322 | SDValue OpF32 = DAG.getNode(Opcode: N->getOpcode(), DL, VT: MVT::f32, Operand: InOp); |
7323 | Results.push_back(Elt: DAG.getFPExtendOrRound(Op: OpF32, DL, VT: MVT::f16)); |
7324 | } else { |
7325 | SDValue OpF32 = |
7326 | DAG.getNode(Opcode: N->getOpcode(), DL, VTList: DAG.getVTList(VT1: MVT::f32, VT2: MVT::Other), |
7327 | Ops: {Chain, InOp}); |
7328 | SDValue F16Res; |
7329 | std::tie(args&: F16Res, args&: Chain) = DAG.getStrictFPExtendOrRound( |
7330 | Op: OpF32, Chain: OpF32.getValue(R: 1), DL, VT: MVT::f16); |
7331 | Results.push_back(Elt: F16Res); |
7332 | Results.push_back(Elt: Chain); |
7333 | } |
7334 | } |
7335 | break; |
7336 | } |
7337 | case ISD::FP_TO_UINT: |
7338 | case ISD::FP_TO_SINT: |
7339 | case ISD::STRICT_FP_TO_UINT: |
7340 | case ISD::STRICT_FP_TO_SINT: { |
7341 | if (useSoftFloat()) |
7342 | return; |
7343 | bool IsStrict = N->isStrictFPOpcode(); |
7344 | SDLoc DL(N); |
7345 | EVT ResVT = N->getValueType(ResNo: 0); |
7346 | SDValue InOp = N->getOperand(Num: IsStrict ? 1 : 0); |
7347 | EVT InVT = InOp->getValueType(ResNo: 0); |
7348 | SDValue Chain = IsStrict ? N->getOperand(Num: 0) : DAG.getEntryNode(); |
7349 | if (InVT == MVT::f16) { |
7350 | if (!IsStrict) { |
7351 | SDValue InF32 = DAG.getFPExtendOrRound(Op: InOp, DL, VT: MVT::f32); |
7352 | Results.push_back(Elt: DAG.getNode(Opcode: N->getOpcode(), DL, VT: ResVT, Operand: InF32)); |
7353 | } else { |
7354 | SDValue InF32; |
7355 | std::tie(args&: InF32, args&: Chain) = |
7356 | DAG.getStrictFPExtendOrRound(Op: InOp, Chain, DL, VT: MVT::f32); |
7357 | SDValue OpF32 = |
7358 | DAG.getNode(Opcode: N->getOpcode(), DL, VTList: DAG.getVTList(VT1: ResVT, VT2: MVT::Other), |
7359 | Ops: {Chain, InF32}); |
7360 | Results.push_back(Elt: OpF32); |
7361 | Results.push_back(Elt: OpF32.getValue(R: 1)); |
7362 | } |
7363 | } |
7364 | break; |
7365 | } |
7366 | default: |
7367 | llvm_unreachable("Unexpected node to lower" ); |
7368 | } |
7369 | } |
7370 | |
7371 | void |
7372 | SystemZTargetLowering::ReplaceNodeResults(SDNode *N, |
7373 | SmallVectorImpl<SDValue> &Results, |
7374 | SelectionDAG &DAG) const { |
7375 | return LowerOperationWrapper(N, Results, DAG); |
7376 | } |
7377 | |
7378 | const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { |
7379 | #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME |
7380 | switch ((SystemZISD::NodeType)Opcode) { |
7381 | case SystemZISD::FIRST_NUMBER: break; |
7382 | OPCODE(RET_GLUE); |
7383 | OPCODE(CALL); |
7384 | OPCODE(SIBCALL); |
7385 | OPCODE(TLS_GDCALL); |
7386 | OPCODE(TLS_LDCALL); |
7387 | OPCODE(PCREL_WRAPPER); |
7388 | OPCODE(PCREL_OFFSET); |
7389 | OPCODE(ICMP); |
7390 | OPCODE(FCMP); |
7391 | OPCODE(STRICT_FCMP); |
7392 | OPCODE(STRICT_FCMPS); |
7393 | OPCODE(TM); |
7394 | OPCODE(BR_CCMASK); |
7395 | OPCODE(SELECT_CCMASK); |
7396 | OPCODE(ADJDYNALLOC); |
7397 | OPCODE(PROBED_ALLOCA); |
7398 | OPCODE(POPCNT); |
7399 | OPCODE(SMUL_LOHI); |
7400 | OPCODE(UMUL_LOHI); |
7401 | OPCODE(SDIVREM); |
7402 | OPCODE(UDIVREM); |
7403 | OPCODE(SADDO); |
7404 | OPCODE(SSUBO); |
7405 | OPCODE(UADDO); |
7406 | OPCODE(USUBO); |
7407 | OPCODE(ADDCARRY); |
7408 | OPCODE(SUBCARRY); |
7409 | OPCODE(GET_CCMASK); |
7410 | OPCODE(MVC); |
7411 | OPCODE(NC); |
7412 | OPCODE(OC); |
7413 | OPCODE(XC); |
7414 | OPCODE(CLC); |
7415 | OPCODE(MEMSET_MVC); |
7416 | OPCODE(STPCPY); |
7417 | OPCODE(STRCMP); |
7418 | OPCODE(SEARCH_STRING); |
7419 | OPCODE(IPM); |
7420 | OPCODE(TBEGIN); |
7421 | OPCODE(TBEGIN_NOFLOAT); |
7422 | OPCODE(TEND); |
7423 | OPCODE(BYTE_MASK); |
7424 | OPCODE(ROTATE_MASK); |
7425 | OPCODE(REPLICATE); |
7426 | OPCODE(JOIN_DWORDS); |
7427 | OPCODE(SPLAT); |
7428 | OPCODE(MERGE_HIGH); |
7429 | OPCODE(MERGE_LOW); |
7430 | OPCODE(SHL_DOUBLE); |
7431 | OPCODE(PERMUTE_DWORDS); |
7432 | OPCODE(PERMUTE); |
7433 | OPCODE(PACK); |
7434 | OPCODE(PACKS_CC); |
7435 | OPCODE(PACKLS_CC); |
7436 | OPCODE(UNPACK_HIGH); |
7437 | OPCODE(UNPACKL_HIGH); |
7438 | OPCODE(UNPACK_LOW); |
7439 | OPCODE(UNPACKL_LOW); |
7440 | OPCODE(VSHL_BY_SCALAR); |
7441 | OPCODE(VSRL_BY_SCALAR); |
7442 | OPCODE(VSRA_BY_SCALAR); |
7443 | OPCODE(VROTL_BY_SCALAR); |
7444 | OPCODE(SHL_DOUBLE_BIT); |
7445 | OPCODE(SHR_DOUBLE_BIT); |
7446 | OPCODE(VSUM); |
7447 | OPCODE(VACC); |
7448 | OPCODE(VSCBI); |
7449 | OPCODE(VAC); |
7450 | OPCODE(VSBI); |
7451 | OPCODE(VACCC); |
7452 | OPCODE(VSBCBI); |
7453 | OPCODE(VMAH); |
7454 | OPCODE(VMALH); |
7455 | OPCODE(VME); |
7456 | OPCODE(VMLE); |
7457 | OPCODE(VMO); |
7458 | OPCODE(VMLO); |
7459 | OPCODE(VICMPE); |
7460 | OPCODE(VICMPH); |
7461 | OPCODE(VICMPHL); |
7462 | OPCODE(VICMPES); |
7463 | OPCODE(VICMPHS); |
7464 | OPCODE(VICMPHLS); |
7465 | OPCODE(VFCMPE); |
7466 | OPCODE(STRICT_VFCMPE); |
7467 | OPCODE(STRICT_VFCMPES); |
7468 | OPCODE(VFCMPH); |
7469 | OPCODE(STRICT_VFCMPH); |
7470 | OPCODE(STRICT_VFCMPHS); |
7471 | OPCODE(VFCMPHE); |
7472 | OPCODE(STRICT_VFCMPHE); |
7473 | OPCODE(STRICT_VFCMPHES); |
7474 | OPCODE(VFCMPES); |
7475 | OPCODE(VFCMPHS); |
7476 | OPCODE(VFCMPHES); |
7477 | OPCODE(VFTCI); |
7478 | OPCODE(VEXTEND); |
7479 | OPCODE(STRICT_VEXTEND); |
7480 | OPCODE(VROUND); |
7481 | OPCODE(STRICT_VROUND); |
7482 | OPCODE(VTM); |
7483 | OPCODE(SCMP128HI); |
7484 | OPCODE(UCMP128HI); |
7485 | OPCODE(VFAE_CC); |
7486 | OPCODE(VFAEZ_CC); |
7487 | OPCODE(VFEE_CC); |
7488 | OPCODE(VFEEZ_CC); |
7489 | OPCODE(VFENE_CC); |
7490 | OPCODE(VFENEZ_CC); |
7491 | OPCODE(VISTR_CC); |
7492 | OPCODE(VSTRC_CC); |
7493 | OPCODE(VSTRCZ_CC); |
7494 | OPCODE(VSTRS_CC); |
7495 | OPCODE(VSTRSZ_CC); |
7496 | OPCODE(TDC); |
7497 | OPCODE(ATOMIC_SWAPW); |
7498 | OPCODE(ATOMIC_LOADW_ADD); |
7499 | OPCODE(ATOMIC_LOADW_SUB); |
7500 | OPCODE(ATOMIC_LOADW_AND); |
7501 | OPCODE(ATOMIC_LOADW_OR); |
7502 | OPCODE(ATOMIC_LOADW_XOR); |
7503 | OPCODE(ATOMIC_LOADW_NAND); |
7504 | OPCODE(ATOMIC_LOADW_MIN); |
7505 | OPCODE(ATOMIC_LOADW_MAX); |
7506 | OPCODE(ATOMIC_LOADW_UMIN); |
7507 | OPCODE(ATOMIC_LOADW_UMAX); |
7508 | OPCODE(ATOMIC_CMP_SWAPW); |
7509 | OPCODE(ATOMIC_CMP_SWAP); |
7510 | OPCODE(ATOMIC_LOAD_128); |
7511 | OPCODE(ATOMIC_STORE_128); |
7512 | OPCODE(ATOMIC_CMP_SWAP_128); |
7513 | OPCODE(LRV); |
7514 | OPCODE(STRV); |
7515 | OPCODE(VLER); |
7516 | OPCODE(VSTER); |
7517 | OPCODE(STCKF); |
7518 | OPCODE(PREFETCH); |
7519 | OPCODE(ADA_ENTRY); |
7520 | } |
7521 | return nullptr; |
7522 | #undef OPCODE |
7523 | } |
7524 | |
7525 | // Return true if VT is a vector whose elements are a whole number of bytes |
7526 | // in width. Also check for presence of vector support. |
7527 | bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const { |
7528 | if (!Subtarget.hasVector()) |
7529 | return false; |
7530 | |
7531 | return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple(); |
7532 | } |
7533 | |
7534 | // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT |
7535 | // producing a result of type ResVT. Op is a possibly bitcast version |
7536 | // of the input vector and Index is the index (based on type VecVT) that |
7537 | // should be extracted. Return the new extraction if a simplification |
7538 | // was possible or if Force is true. |
7539 | SDValue SystemZTargetLowering::(const SDLoc &DL, EVT ResVT, |
7540 | EVT VecVT, SDValue Op, |
7541 | unsigned Index, |
7542 | DAGCombinerInfo &DCI, |
7543 | bool Force) const { |
7544 | SelectionDAG &DAG = DCI.DAG; |
7545 | |
7546 | // The number of bytes being extracted. |
7547 | unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); |
7548 | |
7549 | for (;;) { |
7550 | unsigned Opcode = Op.getOpcode(); |
7551 | if (Opcode == ISD::BITCAST) |
7552 | // Look through bitcasts. |
7553 | Op = Op.getOperand(i: 0); |
7554 | else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) && |
7555 | canTreatAsByteVector(VT: Op.getValueType())) { |
7556 | // Get a VPERM-like permute mask and see whether the bytes covered |
7557 | // by the extracted element are a contiguous sequence from one |
7558 | // source operand. |
7559 | SmallVector<int, SystemZ::VectorBytes> Bytes; |
7560 | if (!getVPermMask(ShuffleOp: Op, Bytes)) |
7561 | break; |
7562 | int First; |
7563 | if (!getShuffleInput(Bytes, Start: Index * BytesPerElement, |
7564 | BytesPerElement, Base&: First)) |
7565 | break; |
7566 | if (First < 0) |
7567 | return DAG.getUNDEF(VT: ResVT); |
7568 | // Make sure the contiguous sequence starts at a multiple of the |
7569 | // original element size. |
7570 | unsigned Byte = unsigned(First) % Bytes.size(); |
7571 | if (Byte % BytesPerElement != 0) |
7572 | break; |
7573 | // We can get the extracted value directly from an input. |
7574 | Index = Byte / BytesPerElement; |
7575 | Op = Op.getOperand(i: unsigned(First) / Bytes.size()); |
7576 | Force = true; |
7577 | } else if (Opcode == ISD::BUILD_VECTOR && |
7578 | canTreatAsByteVector(VT: Op.getValueType())) { |
7579 | // We can only optimize this case if the BUILD_VECTOR elements are |
7580 | // at least as wide as the extracted value. |
7581 | EVT OpVT = Op.getValueType(); |
7582 | unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); |
7583 | if (OpBytesPerElement < BytesPerElement) |
7584 | break; |
7585 | // Make sure that the least-significant bit of the extracted value |
7586 | // is the least significant bit of an input. |
7587 | unsigned End = (Index + 1) * BytesPerElement; |
7588 | if (End % OpBytesPerElement != 0) |
7589 | break; |
7590 | // We're extracting the low part of one operand of the BUILD_VECTOR. |
7591 | Op = Op.getOperand(i: End / OpBytesPerElement - 1); |
7592 | if (!Op.getValueType().isInteger()) { |
7593 | EVT VT = MVT::getIntegerVT(BitWidth: Op.getValueSizeInBits()); |
7594 | Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op); |
7595 | DCI.AddToWorklist(N: Op.getNode()); |
7596 | } |
7597 | EVT VT = MVT::getIntegerVT(BitWidth: ResVT.getSizeInBits()); |
7598 | Op = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Op); |
7599 | if (VT != ResVT) { |
7600 | DCI.AddToWorklist(N: Op.getNode()); |
7601 | Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ResVT, Operand: Op); |
7602 | } |
7603 | return Op; |
7604 | } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || |
7605 | Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || |
7606 | Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && |
7607 | canTreatAsByteVector(VT: Op.getValueType()) && |
7608 | canTreatAsByteVector(VT: Op.getOperand(i: 0).getValueType())) { |
7609 | // Make sure that only the unextended bits are significant. |
7610 | EVT ExtVT = Op.getValueType(); |
7611 | EVT OpVT = Op.getOperand(i: 0).getValueType(); |
7612 | unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize(); |
7613 | unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); |
7614 | unsigned Byte = Index * BytesPerElement; |
7615 | unsigned SubByte = Byte % ExtBytesPerElement; |
7616 | unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement; |
7617 | if (SubByte < MinSubByte || |
7618 | SubByte + BytesPerElement > ExtBytesPerElement) |
7619 | break; |
7620 | // Get the byte offset of the unextended element |
7621 | Byte = Byte / ExtBytesPerElement * OpBytesPerElement; |
7622 | // ...then add the byte offset relative to that element. |
7623 | Byte += SubByte - MinSubByte; |
7624 | if (Byte % BytesPerElement != 0) |
7625 | break; |
7626 | Op = Op.getOperand(i: 0); |
7627 | Index = Byte / BytesPerElement; |
7628 | Force = true; |
7629 | } else |
7630 | break; |
7631 | } |
7632 | if (Force) { |
7633 | if (Op.getValueType() != VecVT) { |
7634 | Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VecVT, Operand: Op); |
7635 | DCI.AddToWorklist(N: Op.getNode()); |
7636 | } |
7637 | return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: ResVT, N1: Op, |
7638 | N2: DAG.getConstant(Val: Index, DL, VT: MVT::i32)); |
7639 | } |
7640 | return SDValue(); |
7641 | } |
7642 | |
7643 | // Optimize vector operations in scalar value Op on the basis that Op |
7644 | // is truncated to TruncVT. |
7645 | SDValue SystemZTargetLowering::( |
7646 | const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const { |
7647 | // If we have (trunc (extract_vector_elt X, Y)), try to turn it into |
7648 | // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements |
7649 | // of type TruncVT. |
7650 | if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
7651 | TruncVT.getSizeInBits() % 8 == 0) { |
7652 | SDValue Vec = Op.getOperand(i: 0); |
7653 | EVT VecVT = Vec.getValueType(); |
7654 | if (canTreatAsByteVector(VT: VecVT)) { |
7655 | if (auto *IndexN = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1))) { |
7656 | unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); |
7657 | unsigned TruncBytes = TruncVT.getStoreSize(); |
7658 | if (BytesPerElement % TruncBytes == 0) { |
7659 | // Calculate the value of Y' in the above description. We are |
7660 | // splitting the original elements into Scale equal-sized pieces |
7661 | // and for truncation purposes want the last (least-significant) |
7662 | // of these pieces for IndexN. This is easiest to do by calculating |
7663 | // the start index of the following element and then subtracting 1. |
7664 | unsigned Scale = BytesPerElement / TruncBytes; |
7665 | unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1; |
7666 | |
7667 | // Defer the creation of the bitcast from X to combineExtract, |
7668 | // which might be able to optimize the extraction. |
7669 | VecVT = EVT::getVectorVT(Context&: *DCI.DAG.getContext(), |
7670 | VT: MVT::getIntegerVT(BitWidth: TruncBytes * 8), |
7671 | NumElements: VecVT.getStoreSize() / TruncBytes); |
7672 | EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT); |
7673 | return combineExtract(DL, ResVT, VecVT, Op: Vec, Index: NewIndex, DCI, Force: true); |
7674 | } |
7675 | } |
7676 | } |
7677 | } |
7678 | return SDValue(); |
7679 | } |
7680 | |
7681 | SDValue SystemZTargetLowering::combineZERO_EXTEND( |
7682 | SDNode *N, DAGCombinerInfo &DCI) const { |
7683 | // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2') |
7684 | SelectionDAG &DAG = DCI.DAG; |
7685 | SDValue N0 = N->getOperand(Num: 0); |
7686 | EVT VT = N->getValueType(ResNo: 0); |
7687 | if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) { |
7688 | auto *TrueOp = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 0)); |
7689 | auto *FalseOp = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1)); |
7690 | if (TrueOp && FalseOp) { |
7691 | SDLoc DL(N0); |
7692 | SDValue Ops[] = { DAG.getConstant(Val: TrueOp->getZExtValue(), DL, VT), |
7693 | DAG.getConstant(Val: FalseOp->getZExtValue(), DL, VT), |
7694 | N0.getOperand(i: 2), N0.getOperand(i: 3), N0.getOperand(i: 4) }; |
7695 | SDValue NewSelect = DAG.getNode(Opcode: SystemZISD::SELECT_CCMASK, DL, VT, Ops); |
7696 | // If N0 has multiple uses, change other uses as well. |
7697 | if (!N0.hasOneUse()) { |
7698 | SDValue TruncSelect = |
7699 | DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N0.getValueType(), Operand: NewSelect); |
7700 | DCI.CombineTo(N: N0.getNode(), Res: TruncSelect); |
7701 | } |
7702 | return NewSelect; |
7703 | } |
7704 | } |
7705 | // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size |
7706 | // of the result is smaller than the size of X and all the truncated bits |
7707 | // of X are already zero. |
7708 | if (N0.getOpcode() == ISD::XOR && |
7709 | N0.hasOneUse() && N0.getOperand(i: 0).hasOneUse() && |
7710 | N0.getOperand(i: 0).getOpcode() == ISD::TRUNCATE && |
7711 | N0.getOperand(i: 1).getOpcode() == ISD::Constant) { |
7712 | SDValue X = N0.getOperand(i: 0).getOperand(i: 0); |
7713 | if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) { |
7714 | KnownBits Known = DAG.computeKnownBits(Op: X); |
7715 | APInt TruncatedBits = APInt::getBitsSet(numBits: X.getValueSizeInBits(), |
7716 | loBit: N0.getValueSizeInBits(), |
7717 | hiBit: VT.getSizeInBits()); |
7718 | if (TruncatedBits.isSubsetOf(RHS: Known.Zero)) { |
7719 | X = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc(X), VT, Operand: X); |
7720 | APInt Mask = N0.getConstantOperandAPInt(i: 1).zext(width: VT.getSizeInBits()); |
7721 | return DAG.getNode(Opcode: ISD::XOR, DL: SDLoc(N0), VT, |
7722 | N1: X, N2: DAG.getConstant(Val: Mask, DL: SDLoc(N0), VT)); |
7723 | } |
7724 | } |
7725 | } |
7726 | // Recognize patterns for VECTOR SUBTRACT COMPUTE BORROW INDICATION |
7727 | // and VECTOR ADD COMPUTE CARRY for i128: |
7728 | // (zext (setcc_uge X Y)) --> (VSCBI X Y) |
7729 | // (zext (setcc_ule Y X)) --> (VSCBI X Y) |
7730 | // (zext (setcc_ult (add X Y) X/Y) -> (VACC X Y) |
7731 | // (zext (setcc_ugt X/Y (add X Y)) -> (VACC X Y) |
7732 | // For vector types, these patterns are recognized in the .td file. |
7733 | if (N0.getOpcode() == ISD::SETCC && isTypeLegal(VT) && VT == MVT::i128 && |
7734 | N0.getOperand(i: 0).getValueType() == VT) { |
7735 | SDValue Op0 = N0.getOperand(i: 0); |
7736 | SDValue Op1 = N0.getOperand(i: 1); |
7737 | const ISD::CondCode CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: 2))->get(); |
7738 | switch (CC) { |
7739 | case ISD::SETULE: |
7740 | std::swap(a&: Op0, b&: Op1); |
7741 | [[fallthrough]]; |
7742 | case ISD::SETUGE: |
7743 | return DAG.getNode(Opcode: SystemZISD::VSCBI, DL: SDLoc(N0), VT, N1: Op0, N2: Op1); |
7744 | case ISD::SETUGT: |
7745 | std::swap(a&: Op0, b&: Op1); |
7746 | [[fallthrough]]; |
7747 | case ISD::SETULT: |
7748 | if (Op0->hasOneUse() && Op0->getOpcode() == ISD::ADD && |
7749 | (Op0->getOperand(Num: 0) == Op1 || Op0->getOperand(Num: 1) == Op1)) |
7750 | return DAG.getNode(Opcode: SystemZISD::VACC, DL: SDLoc(N0), VT, N1: Op0->getOperand(Num: 0), |
7751 | N2: Op0->getOperand(Num: 1)); |
7752 | break; |
7753 | default: |
7754 | break; |
7755 | } |
7756 | } |
7757 | |
7758 | return SDValue(); |
7759 | } |
7760 | |
7761 | SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG( |
7762 | SDNode *N, DAGCombinerInfo &DCI) const { |
7763 | // Convert (sext_in_reg (setcc LHS, RHS, COND), i1) |
7764 | // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1) |
7765 | // into (select_cc LHS, RHS, -1, 0, COND) |
7766 | SelectionDAG &DAG = DCI.DAG; |
7767 | SDValue N0 = N->getOperand(Num: 0); |
7768 | EVT VT = N->getValueType(ResNo: 0); |
7769 | EVT EVT = cast<VTSDNode>(Val: N->getOperand(Num: 1))->getVT(); |
7770 | if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND) |
7771 | N0 = N0.getOperand(i: 0); |
7772 | if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) { |
7773 | SDLoc DL(N0); |
7774 | SDValue Ops[] = { N0.getOperand(i: 0), N0.getOperand(i: 1), |
7775 | DAG.getAllOnesConstant(DL, VT), |
7776 | DAG.getConstant(Val: 0, DL, VT), N0.getOperand(i: 2) }; |
7777 | return DAG.getNode(Opcode: ISD::SELECT_CC, DL, VT, Ops); |
7778 | } |
7779 | return SDValue(); |
7780 | } |
7781 | |
7782 | SDValue SystemZTargetLowering::combineSIGN_EXTEND( |
7783 | SDNode *N, DAGCombinerInfo &DCI) const { |
7784 | // Convert (sext (ashr (shl X, C1), C2)) to |
7785 | // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as |
7786 | // cheap as narrower ones. |
7787 | SelectionDAG &DAG = DCI.DAG; |
7788 | SDValue N0 = N->getOperand(Num: 0); |
7789 | EVT VT = N->getValueType(ResNo: 0); |
7790 | if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) { |
7791 | auto *SraAmt = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1)); |
7792 | SDValue Inner = N0.getOperand(i: 0); |
7793 | if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) { |
7794 | if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Val: Inner.getOperand(i: 1))) { |
7795 | unsigned = (VT.getSizeInBits() - N0.getValueSizeInBits()); |
7796 | unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra; |
7797 | unsigned NewSraAmt = SraAmt->getZExtValue() + Extra; |
7798 | EVT ShiftVT = N0.getOperand(i: 1).getValueType(); |
7799 | SDValue Ext = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: SDLoc(Inner), VT, |
7800 | Operand: Inner.getOperand(i: 0)); |
7801 | SDValue Shl = DAG.getNode(Opcode: ISD::SHL, DL: SDLoc(Inner), VT, N1: Ext, |
7802 | N2: DAG.getConstant(Val: NewShlAmt, DL: SDLoc(Inner), |
7803 | VT: ShiftVT)); |
7804 | return DAG.getNode(Opcode: ISD::SRA, DL: SDLoc(N0), VT, N1: Shl, |
7805 | N2: DAG.getConstant(Val: NewSraAmt, DL: SDLoc(N0), VT: ShiftVT)); |
7806 | } |
7807 | } |
7808 | } |
7809 | |
7810 | return SDValue(); |
7811 | } |
7812 | |
7813 | SDValue SystemZTargetLowering::combineMERGE( |
7814 | SDNode *N, DAGCombinerInfo &DCI) const { |
7815 | SelectionDAG &DAG = DCI.DAG; |
7816 | unsigned Opcode = N->getOpcode(); |
7817 | SDValue Op0 = N->getOperand(Num: 0); |
7818 | SDValue Op1 = N->getOperand(Num: 1); |
7819 | if (Op0.getOpcode() == ISD::BITCAST) |
7820 | Op0 = Op0.getOperand(i: 0); |
7821 | if (ISD::isBuildVectorAllZeros(N: Op0.getNode())) { |
7822 | // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF |
7823 | // for v4f32. |
7824 | if (Op1 == N->getOperand(Num: 0)) |
7825 | return Op1; |
7826 | // (z_merge_? 0, X) -> (z_unpackl_? 0, X). |
7827 | EVT VT = Op1.getValueType(); |
7828 | unsigned ElemBytes = VT.getVectorElementType().getStoreSize(); |
7829 | if (ElemBytes <= 4) { |
7830 | Opcode = (Opcode == SystemZISD::MERGE_HIGH ? |
7831 | SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW); |
7832 | EVT InVT = VT.changeVectorElementTypeToInteger(); |
7833 | EVT OutVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: ElemBytes * 16), |
7834 | NumElements: SystemZ::VectorBytes / ElemBytes / 2); |
7835 | if (VT != InVT) { |
7836 | Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(N), VT: InVT, Operand: Op1); |
7837 | DCI.AddToWorklist(N: Op1.getNode()); |
7838 | } |
7839 | SDValue Op = DAG.getNode(Opcode, DL: SDLoc(N), VT: OutVT, Operand: Op1); |
7840 | DCI.AddToWorklist(N: Op.getNode()); |
7841 | return DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(N), VT, Operand: Op); |
7842 | } |
7843 | } |
7844 | return SDValue(); |
7845 | } |
7846 | |
7847 | static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, |
7848 | SDNode *&HiPart) { |
7849 | LoPart = HiPart = nullptr; |
7850 | |
7851 | // Scan through all users. |
7852 | for (SDUse &Use : LD->uses()) { |
7853 | // Skip the uses of the chain. |
7854 | if (Use.getResNo() != 0) |
7855 | continue; |
7856 | |
7857 | // Verify every user is a TRUNCATE to i64 of the low or high half. |
7858 | SDNode *User = Use.getUser(); |
7859 | bool IsLoPart = true; |
7860 | if (User->getOpcode() == ISD::SRL && |
7861 | User->getOperand(Num: 1).getOpcode() == ISD::Constant && |
7862 | User->getConstantOperandVal(Num: 1) == 64 && User->hasOneUse()) { |
7863 | User = *User->user_begin(); |
7864 | IsLoPart = false; |
7865 | } |
7866 | if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(ResNo: 0) != MVT::i64) |
7867 | return false; |
7868 | |
7869 | if (IsLoPart) { |
7870 | if (LoPart) |
7871 | return false; |
7872 | LoPart = User; |
7873 | } else { |
7874 | if (HiPart) |
7875 | return false; |
7876 | HiPart = User; |
7877 | } |
7878 | } |
7879 | return true; |
7880 | } |
7881 | |
7882 | static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, |
7883 | SDNode *&HiPart) { |
7884 | LoPart = HiPart = nullptr; |
7885 | |
7886 | // Scan through all users. |
7887 | for (SDUse &Use : LD->uses()) { |
7888 | // Skip the uses of the chain. |
7889 | if (Use.getResNo() != 0) |
7890 | continue; |
7891 | |
7892 | // Verify every user is an EXTRACT_SUBREG of the low or high half. |
7893 | SDNode *User = Use.getUser(); |
7894 | if (!User->hasOneUse() || !User->isMachineOpcode() || |
7895 | User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG) |
7896 | return false; |
7897 | |
7898 | switch (User->getConstantOperandVal(Num: 1)) { |
7899 | case SystemZ::subreg_l64: |
7900 | if (LoPart) |
7901 | return false; |
7902 | LoPart = User; |
7903 | break; |
7904 | case SystemZ::subreg_h64: |
7905 | if (HiPart) |
7906 | return false; |
7907 | HiPart = User; |
7908 | break; |
7909 | default: |
7910 | return false; |
7911 | } |
7912 | } |
7913 | return true; |
7914 | } |
7915 | |
7916 | SDValue SystemZTargetLowering::combineLOAD( |
7917 | SDNode *N, DAGCombinerInfo &DCI) const { |
7918 | SelectionDAG &DAG = DCI.DAG; |
7919 | EVT LdVT = N->getValueType(ResNo: 0); |
7920 | if (auto *LN = dyn_cast<LoadSDNode>(Val: N)) { |
7921 | if (LN->getAddressSpace() == SYSTEMZAS::PTR32) { |
7922 | MVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
7923 | MVT LoadNodeVT = LN->getBasePtr().getSimpleValueType(); |
7924 | if (PtrVT != LoadNodeVT) { |
7925 | SDLoc DL(LN); |
7926 | SDValue AddrSpaceCast = DAG.getAddrSpaceCast( |
7927 | dl: DL, VT: PtrVT, Ptr: LN->getBasePtr(), SrcAS: SYSTEMZAS::PTR32, DestAS: 0); |
7928 | return DAG.getExtLoad(ExtType: LN->getExtensionType(), dl: DL, VT: LN->getValueType(ResNo: 0), |
7929 | Chain: LN->getChain(), Ptr: AddrSpaceCast, MemVT: LN->getMemoryVT(), |
7930 | MMO: LN->getMemOperand()); |
7931 | } |
7932 | } |
7933 | } |
7934 | SDLoc DL(N); |
7935 | |
7936 | // Replace a 128-bit load that is used solely to move its value into GPRs |
7937 | // by separate loads of both halves. |
7938 | LoadSDNode *LD = cast<LoadSDNode>(Val: N); |
7939 | if (LD->isSimple() && ISD::isNormalLoad(N: LD)) { |
7940 | SDNode *LoPart, *HiPart; |
7941 | if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) || |
7942 | (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) { |
7943 | // Rewrite each extraction as an independent load. |
7944 | SmallVector<SDValue, 2> ArgChains; |
7945 | if (HiPart) { |
7946 | SDValue EltLoad = DAG.getLoad( |
7947 | VT: HiPart->getValueType(ResNo: 0), dl: DL, Chain: LD->getChain(), Ptr: LD->getBasePtr(), |
7948 | PtrInfo: LD->getPointerInfo(), Alignment: LD->getBaseAlign(), |
7949 | MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo()); |
7950 | |
7951 | DCI.CombineTo(N: HiPart, Res: EltLoad, AddTo: true); |
7952 | ArgChains.push_back(Elt: EltLoad.getValue(R: 1)); |
7953 | } |
7954 | if (LoPart) { |
7955 | SDValue EltLoad = DAG.getLoad( |
7956 | VT: LoPart->getValueType(ResNo: 0), dl: DL, Chain: LD->getChain(), |
7957 | Ptr: DAG.getObjectPtrOffset(SL: DL, Ptr: LD->getBasePtr(), Offset: TypeSize::getFixed(ExactSize: 8)), |
7958 | PtrInfo: LD->getPointerInfo().getWithOffset(O: 8), Alignment: LD->getBaseAlign(), |
7959 | MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo()); |
7960 | |
7961 | DCI.CombineTo(N: LoPart, Res: EltLoad, AddTo: true); |
7962 | ArgChains.push_back(Elt: EltLoad.getValue(R: 1)); |
7963 | } |
7964 | |
7965 | // Collect all chains via TokenFactor. |
7966 | SDValue Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: ArgChains); |
7967 | DAG.ReplaceAllUsesOfValueWith(From: SDValue(N, 1), To: Chain); |
7968 | DCI.AddToWorklist(N: Chain.getNode()); |
7969 | return SDValue(N, 0); |
7970 | } |
7971 | } |
7972 | |
7973 | if (LdVT.isVector() || LdVT.isInteger()) |
7974 | return SDValue(); |
7975 | // Transform a scalar load that is REPLICATEd as well as having other |
7976 | // use(s) to the form where the other use(s) use the first element of the |
7977 | // REPLICATE instead of the load. Otherwise instruction selection will not |
7978 | // produce a VLREP. Avoid extracting to a GPR, so only do this for floating |
7979 | // point loads. |
7980 | |
7981 | SDValue Replicate; |
7982 | SmallVector<SDNode*, 8> OtherUses; |
7983 | for (SDUse &Use : N->uses()) { |
7984 | if (Use.getUser()->getOpcode() == SystemZISD::REPLICATE) { |
7985 | if (Replicate) |
7986 | return SDValue(); // Should never happen |
7987 | Replicate = SDValue(Use.getUser(), 0); |
7988 | } else if (Use.getResNo() == 0) |
7989 | OtherUses.push_back(Elt: Use.getUser()); |
7990 | } |
7991 | if (!Replicate || OtherUses.empty()) |
7992 | return SDValue(); |
7993 | |
7994 | SDValue = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: LdVT, |
7995 | N1: Replicate, N2: DAG.getConstant(Val: 0, DL, VT: MVT::i32)); |
7996 | // Update uses of the loaded Value while preserving old chains. |
7997 | for (SDNode *U : OtherUses) { |
7998 | SmallVector<SDValue, 8> Ops; |
7999 | for (SDValue Op : U->ops()) |
8000 | Ops.push_back(Elt: (Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op); |
8001 | DAG.UpdateNodeOperands(N: U, Ops); |
8002 | } |
8003 | return SDValue(N, 0); |
8004 | } |
8005 | |
8006 | bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const { |
8007 | if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) |
8008 | return true; |
8009 | if (Subtarget.hasVectorEnhancements2()) |
8010 | if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128) |
8011 | return true; |
8012 | return false; |
8013 | } |
8014 | |
8015 | static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) { |
8016 | if (!VT.isVector() || !VT.isSimple() || |
8017 | VT.getSizeInBits() != 128 || |
8018 | VT.getScalarSizeInBits() % 8 != 0) |
8019 | return false; |
8020 | |
8021 | unsigned NumElts = VT.getVectorNumElements(); |
8022 | for (unsigned i = 0; i < NumElts; ++i) { |
8023 | if (M[i] < 0) continue; // ignore UNDEF indices |
8024 | if ((unsigned) M[i] != NumElts - 1 - i) |
8025 | return false; |
8026 | } |
8027 | |
8028 | return true; |
8029 | } |
8030 | |
8031 | static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) { |
8032 | for (auto *U : StoredVal->users()) { |
8033 | if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: U)) { |
8034 | EVT CurrMemVT = ST->getMemoryVT().getScalarType(); |
8035 | if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16) |
8036 | continue; |
8037 | } else if (isa<BuildVectorSDNode>(Val: U)) { |
8038 | SDValue BuildVector = SDValue(U, 0); |
8039 | if (DAG.isSplatValue(V: BuildVector, AllowUndefs: true/*AllowUndefs*/) && |
8040 | isOnlyUsedByStores(StoredVal: BuildVector, DAG)) |
8041 | continue; |
8042 | } |
8043 | return false; |
8044 | } |
8045 | return true; |
8046 | } |
8047 | |
8048 | static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, |
8049 | SDValue &HiPart) { |
8050 | if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse()) |
8051 | return false; |
8052 | |
8053 | SDValue Op0 = Val.getOperand(i: 0); |
8054 | SDValue Op1 = Val.getOperand(i: 1); |
8055 | |
8056 | if (Op0.getOpcode() == ISD::SHL) |
8057 | std::swap(a&: Op0, b&: Op1); |
8058 | if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() || |
8059 | Op1.getOperand(i: 1).getOpcode() != ISD::Constant || |
8060 | Op1.getConstantOperandVal(i: 1) != 64) |
8061 | return false; |
8062 | Op1 = Op1.getOperand(i: 0); |
8063 | |
8064 | if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() || |
8065 | Op0.getOperand(i: 0).getValueType() != MVT::i64) |
8066 | return false; |
8067 | if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() || |
8068 | Op1.getOperand(i: 0).getValueType() != MVT::i64) |
8069 | return false; |
8070 | |
8071 | LoPart = Op0.getOperand(i: 0); |
8072 | HiPart = Op1.getOperand(i: 0); |
8073 | return true; |
8074 | } |
8075 | |
8076 | static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, |
8077 | SDValue &HiPart) { |
8078 | if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() || |
8079 | Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE) |
8080 | return false; |
8081 | |
8082 | if (Val->getNumOperands() != 5 || |
8083 | Val->getOperand(Num: 0)->getAsZExtVal() != SystemZ::FP128BitRegClassID || |
8084 | Val->getOperand(Num: 2)->getAsZExtVal() != SystemZ::subreg_l64 || |
8085 | Val->getOperand(Num: 4)->getAsZExtVal() != SystemZ::subreg_h64) |
8086 | return false; |
8087 | |
8088 | LoPart = Val->getOperand(Num: 1); |
8089 | HiPart = Val->getOperand(Num: 3); |
8090 | return true; |
8091 | } |
8092 | |
8093 | SDValue SystemZTargetLowering::combineSTORE( |
8094 | SDNode *N, DAGCombinerInfo &DCI) const { |
8095 | SelectionDAG &DAG = DCI.DAG; |
8096 | auto *SN = cast<StoreSDNode>(Val: N); |
8097 | auto &Op1 = N->getOperand(Num: 1); |
8098 | EVT MemVT = SN->getMemoryVT(); |
8099 | |
8100 | if (SN->getAddressSpace() == SYSTEMZAS::PTR32) { |
8101 | MVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
8102 | MVT StoreNodeVT = SN->getBasePtr().getSimpleValueType(); |
8103 | if (PtrVT != StoreNodeVT) { |
8104 | SDLoc DL(SN); |
8105 | SDValue AddrSpaceCast = DAG.getAddrSpaceCast(dl: DL, VT: PtrVT, Ptr: SN->getBasePtr(), |
8106 | SrcAS: SYSTEMZAS::PTR32, DestAS: 0); |
8107 | return DAG.getStore(Chain: SN->getChain(), dl: DL, Val: SN->getValue(), Ptr: AddrSpaceCast, |
8108 | PtrInfo: SN->getPointerInfo(), Alignment: SN->getBaseAlign(), |
8109 | MMOFlags: SN->getMemOperand()->getFlags(), AAInfo: SN->getAAInfo()); |
8110 | } |
8111 | } |
8112 | |
8113 | // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better |
8114 | // for the extraction to be done on a vMiN value, so that we can use VSTE. |
8115 | // If X has wider elements then convert it to: |
8116 | // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z). |
8117 | if (MemVT.isInteger() && SN->isTruncatingStore()) { |
8118 | if (SDValue Value = |
8119 | combineTruncateExtract(DL: SDLoc(N), TruncVT: MemVT, Op: SN->getValue(), DCI)) { |
8120 | DCI.AddToWorklist(N: Value.getNode()); |
8121 | |
8122 | // Rewrite the store with the new form of stored value. |
8123 | return DAG.getTruncStore(Chain: SN->getChain(), dl: SDLoc(SN), Val: Value, |
8124 | Ptr: SN->getBasePtr(), SVT: SN->getMemoryVT(), |
8125 | MMO: SN->getMemOperand()); |
8126 | } |
8127 | } |
8128 | // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR |
8129 | if (!SN->isTruncatingStore() && |
8130 | Op1.getOpcode() == ISD::BSWAP && |
8131 | Op1.getNode()->hasOneUse() && |
8132 | canLoadStoreByteSwapped(VT: Op1.getValueType())) { |
8133 | |
8134 | SDValue BSwapOp = Op1.getOperand(i: 0); |
8135 | |
8136 | if (BSwapOp.getValueType() == MVT::i16) |
8137 | BSwapOp = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: SDLoc(N), VT: MVT::i32, Operand: BSwapOp); |
8138 | |
8139 | SDValue Ops[] = { |
8140 | N->getOperand(Num: 0), BSwapOp, N->getOperand(Num: 2) |
8141 | }; |
8142 | |
8143 | return |
8144 | DAG.getMemIntrinsicNode(Opcode: SystemZISD::STRV, dl: SDLoc(N), VTList: DAG.getVTList(VT: MVT::Other), |
8145 | Ops, MemVT, MMO: SN->getMemOperand()); |
8146 | } |
8147 | // Combine STORE (element-swap) into VSTER |
8148 | if (!SN->isTruncatingStore() && |
8149 | Op1.getOpcode() == ISD::VECTOR_SHUFFLE && |
8150 | Op1.getNode()->hasOneUse() && |
8151 | Subtarget.hasVectorEnhancements2()) { |
8152 | ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: Op1.getNode()); |
8153 | ArrayRef<int> ShuffleMask = SVN->getMask(); |
8154 | if (isVectorElementSwap(M: ShuffleMask, VT: Op1.getValueType())) { |
8155 | SDValue Ops[] = { |
8156 | N->getOperand(Num: 0), Op1.getOperand(i: 0), N->getOperand(Num: 2) |
8157 | }; |
8158 | |
8159 | return DAG.getMemIntrinsicNode(Opcode: SystemZISD::VSTER, dl: SDLoc(N), |
8160 | VTList: DAG.getVTList(VT: MVT::Other), |
8161 | Ops, MemVT, MMO: SN->getMemOperand()); |
8162 | } |
8163 | } |
8164 | |
8165 | // Combine STORE (READCYCLECOUNTER) into STCKF. |
8166 | if (!SN->isTruncatingStore() && |
8167 | Op1.getOpcode() == ISD::READCYCLECOUNTER && |
8168 | Op1.hasOneUse() && |
8169 | N->getOperand(Num: 0).reachesChainWithoutSideEffects(Dest: SDValue(Op1.getNode(), 1))) { |
8170 | SDValue Ops[] = { Op1.getOperand(i: 0), N->getOperand(Num: 2) }; |
8171 | return DAG.getMemIntrinsicNode(Opcode: SystemZISD::STCKF, dl: SDLoc(N), |
8172 | VTList: DAG.getVTList(VT: MVT::Other), |
8173 | Ops, MemVT, MMO: SN->getMemOperand()); |
8174 | } |
8175 | |
8176 | // Transform a store of a 128-bit value moved from parts into two stores. |
8177 | if (SN->isSimple() && ISD::isNormalStore(N: SN)) { |
8178 | SDValue LoPart, HiPart; |
8179 | if ((MemVT == MVT::i128 && isI128MovedFromParts(Val: Op1, LoPart, HiPart)) || |
8180 | (MemVT == MVT::f128 && isF128MovedFromParts(Val: Op1, LoPart, HiPart))) { |
8181 | SDLoc DL(SN); |
8182 | SDValue Chain0 = DAG.getStore( |
8183 | Chain: SN->getChain(), dl: DL, Val: HiPart, Ptr: SN->getBasePtr(), PtrInfo: SN->getPointerInfo(), |
8184 | Alignment: SN->getBaseAlign(), MMOFlags: SN->getMemOperand()->getFlags(), AAInfo: SN->getAAInfo()); |
8185 | SDValue Chain1 = DAG.getStore( |
8186 | Chain: SN->getChain(), dl: DL, Val: LoPart, |
8187 | Ptr: DAG.getObjectPtrOffset(SL: DL, Ptr: SN->getBasePtr(), Offset: TypeSize::getFixed(ExactSize: 8)), |
8188 | PtrInfo: SN->getPointerInfo().getWithOffset(O: 8), Alignment: SN->getBaseAlign(), |
8189 | MMOFlags: SN->getMemOperand()->getFlags(), AAInfo: SN->getAAInfo()); |
8190 | |
8191 | return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, N1: Chain0, N2: Chain1); |
8192 | } |
8193 | } |
8194 | |
8195 | // Replicate a reg or immediate with VREP instead of scalar multiply or |
8196 | // immediate load. It seems best to do this during the first DAGCombine as |
8197 | // it is straight-forward to handle the zero-extend node in the initial |
8198 | // DAG, and also not worry about the keeping the new MemVT legal (e.g. when |
8199 | // extracting an i16 element from a v16i8 vector). |
8200 | if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes && |
8201 | isOnlyUsedByStores(StoredVal: Op1, DAG)) { |
8202 | SDValue Word = SDValue(); |
8203 | EVT WordVT; |
8204 | |
8205 | // Find a replicated immediate and return it if found in Word and its |
8206 | // type in WordVT. |
8207 | auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) { |
8208 | // Some constants are better handled with a scalar store. |
8209 | if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() || |
8210 | isInt<16>(x: C->getSExtValue()) || MemVT.getStoreSize() <= 2) |
8211 | return; |
8212 | |
8213 | APInt Val = C->getAPIntValue(); |
8214 | // Truncate Val in case of a truncating store. |
8215 | if (!llvm::isUIntN(N: TotBytes * 8, x: Val.getZExtValue())) { |
8216 | assert(SN->isTruncatingStore() && |
8217 | "Non-truncating store and immediate value does not fit?" ); |
8218 | Val = Val.trunc(width: TotBytes * 8); |
8219 | } |
8220 | |
8221 | SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, Val.getZExtValue())); |
8222 | if (VCI.isVectorConstantLegal(Subtarget) && |
8223 | VCI.Opcode == SystemZISD::REPLICATE) { |
8224 | Word = DAG.getConstant(Val: VCI.OpVals[0], DL: SDLoc(SN), VT: MVT::i32); |
8225 | WordVT = VCI.VecVT.getScalarType(); |
8226 | } |
8227 | }; |
8228 | |
8229 | // Find a replicated register and return it if found in Word and its type |
8230 | // in WordVT. |
8231 | auto FindReplicatedReg = [&](SDValue MulOp) { |
8232 | EVT MulVT = MulOp.getValueType(); |
8233 | if (MulOp->getOpcode() == ISD::MUL && |
8234 | (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) { |
8235 | // Find a zero extended value and its type. |
8236 | SDValue LHS = MulOp->getOperand(Num: 0); |
8237 | if (LHS->getOpcode() == ISD::ZERO_EXTEND) |
8238 | WordVT = LHS->getOperand(Num: 0).getValueType(); |
8239 | else if (LHS->getOpcode() == ISD::AssertZext) |
8240 | WordVT = cast<VTSDNode>(Val: LHS->getOperand(Num: 1))->getVT(); |
8241 | else |
8242 | return; |
8243 | // Find a replicating constant, e.g. 0x00010001. |
8244 | if (auto *C = dyn_cast<ConstantSDNode>(Val: MulOp->getOperand(Num: 1))) { |
8245 | SystemZVectorConstantInfo VCI( |
8246 | APInt(MulVT.getSizeInBits(), C->getZExtValue())); |
8247 | if (VCI.isVectorConstantLegal(Subtarget) && |
8248 | VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 && |
8249 | WordVT == VCI.VecVT.getScalarType()) |
8250 | Word = DAG.getZExtOrTrunc(Op: LHS->getOperand(Num: 0), DL: SDLoc(SN), VT: WordVT); |
8251 | } |
8252 | } |
8253 | }; |
8254 | |
8255 | if (isa<BuildVectorSDNode>(Val: Op1) && |
8256 | DAG.isSplatValue(V: Op1, AllowUndefs: true/*AllowUndefs*/)) { |
8257 | SDValue SplatVal = Op1->getOperand(Num: 0); |
8258 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: SplatVal)) |
8259 | FindReplicatedImm(C, SplatVal.getValueType().getStoreSize()); |
8260 | else |
8261 | FindReplicatedReg(SplatVal); |
8262 | } else { |
8263 | if (auto *C = dyn_cast<ConstantSDNode>(Val: Op1)) |
8264 | FindReplicatedImm(C, MemVT.getStoreSize()); |
8265 | else |
8266 | FindReplicatedReg(Op1); |
8267 | } |
8268 | |
8269 | if (Word != SDValue()) { |
8270 | assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 && |
8271 | "Bad type handling" ); |
8272 | unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits(); |
8273 | EVT SplatVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WordVT, NumElements: NumElts); |
8274 | SDValue SplatVal = DAG.getSplatVector(VT: SplatVT, DL: SDLoc(SN), Op: Word); |
8275 | return DAG.getStore(Chain: SN->getChain(), dl: SDLoc(SN), Val: SplatVal, |
8276 | Ptr: SN->getBasePtr(), MMO: SN->getMemOperand()); |
8277 | } |
8278 | } |
8279 | |
8280 | return SDValue(); |
8281 | } |
8282 | |
8283 | SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE( |
8284 | SDNode *N, DAGCombinerInfo &DCI) const { |
8285 | SelectionDAG &DAG = DCI.DAG; |
8286 | // Combine element-swap (LOAD) into VLER |
8287 | if (ISD::isNON_EXTLoad(N: N->getOperand(Num: 0).getNode()) && |
8288 | N->getOperand(Num: 0).hasOneUse() && |
8289 | Subtarget.hasVectorEnhancements2()) { |
8290 | ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: N); |
8291 | ArrayRef<int> ShuffleMask = SVN->getMask(); |
8292 | if (isVectorElementSwap(M: ShuffleMask, VT: N->getValueType(ResNo: 0))) { |
8293 | SDValue Load = N->getOperand(Num: 0); |
8294 | LoadSDNode *LD = cast<LoadSDNode>(Val&: Load); |
8295 | |
8296 | // Create the element-swapping load. |
8297 | SDValue Ops[] = { |
8298 | LD->getChain(), // Chain |
8299 | LD->getBasePtr() // Ptr |
8300 | }; |
8301 | SDValue ESLoad = |
8302 | DAG.getMemIntrinsicNode(Opcode: SystemZISD::VLER, dl: SDLoc(N), |
8303 | VTList: DAG.getVTList(VT1: LD->getValueType(ResNo: 0), VT2: MVT::Other), |
8304 | Ops, MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand()); |
8305 | |
8306 | // First, combine the VECTOR_SHUFFLE away. This makes the value produced |
8307 | // by the load dead. |
8308 | DCI.CombineTo(N, Res: ESLoad); |
8309 | |
8310 | // Next, combine the load away, we give it a bogus result value but a real |
8311 | // chain result. The result value is dead because the shuffle is dead. |
8312 | DCI.CombineTo(N: Load.getNode(), Res0: ESLoad, Res1: ESLoad.getValue(R: 1)); |
8313 | |
8314 | // Return N so it doesn't get rechecked! |
8315 | return SDValue(N, 0); |
8316 | } |
8317 | } |
8318 | |
8319 | return SDValue(); |
8320 | } |
8321 | |
8322 | SDValue SystemZTargetLowering::( |
8323 | SDNode *N, DAGCombinerInfo &DCI) const { |
8324 | SelectionDAG &DAG = DCI.DAG; |
8325 | |
8326 | if (!Subtarget.hasVector()) |
8327 | return SDValue(); |
8328 | |
8329 | // Look through bitcasts that retain the number of vector elements. |
8330 | SDValue Op = N->getOperand(Num: 0); |
8331 | if (Op.getOpcode() == ISD::BITCAST && |
8332 | Op.getValueType().isVector() && |
8333 | Op.getOperand(i: 0).getValueType().isVector() && |
8334 | Op.getValueType().getVectorNumElements() == |
8335 | Op.getOperand(i: 0).getValueType().getVectorNumElements()) |
8336 | Op = Op.getOperand(i: 0); |
8337 | |
8338 | // Pull BSWAP out of a vector extraction. |
8339 | if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) { |
8340 | EVT VecVT = Op.getValueType(); |
8341 | EVT EltVT = VecVT.getVectorElementType(); |
8342 | Op = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SDLoc(N), VT: EltVT, |
8343 | N1: Op.getOperand(i: 0), N2: N->getOperand(Num: 1)); |
8344 | DCI.AddToWorklist(N: Op.getNode()); |
8345 | Op = DAG.getNode(Opcode: ISD::BSWAP, DL: SDLoc(N), VT: EltVT, Operand: Op); |
8346 | if (EltVT != N->getValueType(ResNo: 0)) { |
8347 | DCI.AddToWorklist(N: Op.getNode()); |
8348 | Op = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), Operand: Op); |
8349 | } |
8350 | return Op; |
8351 | } |
8352 | |
8353 | // Try to simplify a vector extraction. |
8354 | if (auto *IndexN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1))) { |
8355 | SDValue Op0 = N->getOperand(Num: 0); |
8356 | EVT VecVT = Op0.getValueType(); |
8357 | if (canTreatAsByteVector(VT: VecVT)) |
8358 | return combineExtract(DL: SDLoc(N), ResVT: N->getValueType(ResNo: 0), VecVT, Op: Op0, |
8359 | Index: IndexN->getZExtValue(), DCI, Force: false); |
8360 | } |
8361 | return SDValue(); |
8362 | } |
8363 | |
8364 | SDValue SystemZTargetLowering::combineJOIN_DWORDS( |
8365 | SDNode *N, DAGCombinerInfo &DCI) const { |
8366 | SelectionDAG &DAG = DCI.DAG; |
8367 | // (join_dwords X, X) == (replicate X) |
8368 | if (N->getOperand(Num: 0) == N->getOperand(Num: 1)) |
8369 | return DAG.getNode(Opcode: SystemZISD::REPLICATE, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
8370 | Operand: N->getOperand(Num: 0)); |
8371 | return SDValue(); |
8372 | } |
8373 | |
8374 | static SDValue MergeInputChains(SDNode *N1, SDNode *N2) { |
8375 | SDValue Chain1 = N1->getOperand(Num: 0); |
8376 | SDValue Chain2 = N2->getOperand(Num: 0); |
8377 | |
8378 | // Trivial case: both nodes take the same chain. |
8379 | if (Chain1 == Chain2) |
8380 | return Chain1; |
8381 | |
8382 | // FIXME - we could handle more complex cases via TokenFactor, |
8383 | // assuming we can verify that this would not create a cycle. |
8384 | return SDValue(); |
8385 | } |
8386 | |
8387 | SDValue SystemZTargetLowering::combineFP_ROUND( |
8388 | SDNode *N, DAGCombinerInfo &DCI) const { |
8389 | |
8390 | if (!Subtarget.hasVector()) |
8391 | return SDValue(); |
8392 | |
8393 | // (fpround (extract_vector_elt X 0)) |
8394 | // (fpround (extract_vector_elt X 1)) -> |
8395 | // (extract_vector_elt (VROUND X) 0) |
8396 | // (extract_vector_elt (VROUND X) 2) |
8397 | // |
8398 | // This is a special case since the target doesn't really support v2f32s. |
8399 | unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; |
8400 | SelectionDAG &DAG = DCI.DAG; |
8401 | SDValue Op0 = N->getOperand(Num: OpNo); |
8402 | if (N->getValueType(ResNo: 0) == MVT::f32 && Op0.hasOneUse() && |
8403 | Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
8404 | Op0.getOperand(i: 0).getValueType() == MVT::v2f64 && |
8405 | Op0.getOperand(i: 1).getOpcode() == ISD::Constant && |
8406 | Op0.getConstantOperandVal(i: 1) == 0) { |
8407 | SDValue Vec = Op0.getOperand(i: 0); |
8408 | for (auto *U : Vec->users()) { |
8409 | if (U != Op0.getNode() && U->hasOneUse() && |
8410 | U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
8411 | U->getOperand(Num: 0) == Vec && |
8412 | U->getOperand(Num: 1).getOpcode() == ISD::Constant && |
8413 | U->getConstantOperandVal(Num: 1) == 1) { |
8414 | SDValue OtherRound = SDValue(*U->user_begin(), 0); |
8415 | if (OtherRound.getOpcode() == N->getOpcode() && |
8416 | OtherRound.getOperand(i: OpNo) == SDValue(U, 0) && |
8417 | OtherRound.getValueType() == MVT::f32) { |
8418 | SDValue VRound, Chain; |
8419 | if (N->isStrictFPOpcode()) { |
8420 | Chain = MergeInputChains(N1: N, N2: OtherRound.getNode()); |
8421 | if (!Chain) |
8422 | continue; |
8423 | VRound = DAG.getNode(Opcode: SystemZISD::STRICT_VROUND, DL: SDLoc(N), |
8424 | ResultTys: {MVT::v4f32, MVT::Other}, Ops: {Chain, Vec}); |
8425 | Chain = VRound.getValue(R: 1); |
8426 | } else |
8427 | VRound = DAG.getNode(Opcode: SystemZISD::VROUND, DL: SDLoc(N), |
8428 | VT: MVT::v4f32, Operand: Vec); |
8429 | DCI.AddToWorklist(N: VRound.getNode()); |
8430 | SDValue = |
8431 | DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SDLoc(U), VT: MVT::f32, |
8432 | N1: VRound, N2: DAG.getConstant(Val: 2, DL: SDLoc(U), VT: MVT::i32)); |
8433 | DCI.AddToWorklist(N: Extract1.getNode()); |
8434 | DAG.ReplaceAllUsesOfValueWith(From: OtherRound, To: Extract1); |
8435 | if (Chain) |
8436 | DAG.ReplaceAllUsesOfValueWith(From: OtherRound.getValue(R: 1), To: Chain); |
8437 | SDValue = |
8438 | DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SDLoc(Op0), VT: MVT::f32, |
8439 | N1: VRound, N2: DAG.getConstant(Val: 0, DL: SDLoc(Op0), VT: MVT::i32)); |
8440 | if (Chain) |
8441 | return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: SDLoc(Op0), |
8442 | VTList: N->getVTList(), N1: Extract0, N2: Chain); |
8443 | return Extract0; |
8444 | } |
8445 | } |
8446 | } |
8447 | } |
8448 | return SDValue(); |
8449 | } |
8450 | |
8451 | SDValue SystemZTargetLowering::combineFP_EXTEND( |
8452 | SDNode *N, DAGCombinerInfo &DCI) const { |
8453 | |
8454 | if (!Subtarget.hasVector()) |
8455 | return SDValue(); |
8456 | |
8457 | // (fpextend (extract_vector_elt X 0)) |
8458 | // (fpextend (extract_vector_elt X 2)) -> |
8459 | // (extract_vector_elt (VEXTEND X) 0) |
8460 | // (extract_vector_elt (VEXTEND X) 1) |
8461 | // |
8462 | // This is a special case since the target doesn't really support v2f32s. |
8463 | unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; |
8464 | SelectionDAG &DAG = DCI.DAG; |
8465 | SDValue Op0 = N->getOperand(Num: OpNo); |
8466 | if (N->getValueType(ResNo: 0) == MVT::f64 && Op0.hasOneUse() && |
8467 | Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
8468 | Op0.getOperand(i: 0).getValueType() == MVT::v4f32 && |
8469 | Op0.getOperand(i: 1).getOpcode() == ISD::Constant && |
8470 | Op0.getConstantOperandVal(i: 1) == 0) { |
8471 | SDValue Vec = Op0.getOperand(i: 0); |
8472 | for (auto *U : Vec->users()) { |
8473 | if (U != Op0.getNode() && U->hasOneUse() && |
8474 | U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
8475 | U->getOperand(Num: 0) == Vec && |
8476 | U->getOperand(Num: 1).getOpcode() == ISD::Constant && |
8477 | U->getConstantOperandVal(Num: 1) == 2) { |
8478 | SDValue OtherExtend = SDValue(*U->user_begin(), 0); |
8479 | if (OtherExtend.getOpcode() == N->getOpcode() && |
8480 | OtherExtend.getOperand(i: OpNo) == SDValue(U, 0) && |
8481 | OtherExtend.getValueType() == MVT::f64) { |
8482 | SDValue VExtend, Chain; |
8483 | if (N->isStrictFPOpcode()) { |
8484 | Chain = MergeInputChains(N1: N, N2: OtherExtend.getNode()); |
8485 | if (!Chain) |
8486 | continue; |
8487 | VExtend = DAG.getNode(Opcode: SystemZISD::STRICT_VEXTEND, DL: SDLoc(N), |
8488 | ResultTys: {MVT::v2f64, MVT::Other}, Ops: {Chain, Vec}); |
8489 | Chain = VExtend.getValue(R: 1); |
8490 | } else |
8491 | VExtend = DAG.getNode(Opcode: SystemZISD::VEXTEND, DL: SDLoc(N), |
8492 | VT: MVT::v2f64, Operand: Vec); |
8493 | DCI.AddToWorklist(N: VExtend.getNode()); |
8494 | SDValue = |
8495 | DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SDLoc(U), VT: MVT::f64, |
8496 | N1: VExtend, N2: DAG.getConstant(Val: 1, DL: SDLoc(U), VT: MVT::i32)); |
8497 | DCI.AddToWorklist(N: Extract1.getNode()); |
8498 | DAG.ReplaceAllUsesOfValueWith(From: OtherExtend, To: Extract1); |
8499 | if (Chain) |
8500 | DAG.ReplaceAllUsesOfValueWith(From: OtherExtend.getValue(R: 1), To: Chain); |
8501 | SDValue = |
8502 | DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SDLoc(Op0), VT: MVT::f64, |
8503 | N1: VExtend, N2: DAG.getConstant(Val: 0, DL: SDLoc(Op0), VT: MVT::i32)); |
8504 | if (Chain) |
8505 | return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: SDLoc(Op0), |
8506 | VTList: N->getVTList(), N1: Extract0, N2: Chain); |
8507 | return Extract0; |
8508 | } |
8509 | } |
8510 | } |
8511 | } |
8512 | return SDValue(); |
8513 | } |
8514 | |
8515 | SDValue SystemZTargetLowering::combineINT_TO_FP( |
8516 | SDNode *N, DAGCombinerInfo &DCI) const { |
8517 | if (DCI.Level != BeforeLegalizeTypes) |
8518 | return SDValue(); |
8519 | SelectionDAG &DAG = DCI.DAG; |
8520 | LLVMContext &Ctx = *DAG.getContext(); |
8521 | unsigned Opcode = N->getOpcode(); |
8522 | EVT OutVT = N->getValueType(ResNo: 0); |
8523 | Type *OutLLVMTy = OutVT.getTypeForEVT(Context&: Ctx); |
8524 | SDValue Op = N->getOperand(Num: 0); |
8525 | unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits(); |
8526 | unsigned InScalarBits = Op->getValueType(ResNo: 0).getScalarSizeInBits(); |
8527 | |
8528 | // Insert an extension before type-legalization to avoid scalarization, e.g.: |
8529 | // v2f64 = uint_to_fp v2i16 |
8530 | // => |
8531 | // v2f64 = uint_to_fp (v2i64 zero_extend v2i16) |
8532 | if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits && |
8533 | OutScalarBits <= 64) { |
8534 | unsigned NumElts = cast<FixedVectorType>(Val: OutLLVMTy)->getNumElements(); |
8535 | EVT ExtVT = EVT::getVectorVT( |
8536 | Context&: Ctx, VT: EVT::getIntegerVT(Context&: Ctx, BitWidth: OutLLVMTy->getScalarSizeInBits()), NumElements: NumElts); |
8537 | unsigned ExtOpcode = |
8538 | (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND); |
8539 | SDValue ExtOp = DAG.getNode(Opcode: ExtOpcode, DL: SDLoc(N), VT: ExtVT, Operand: Op); |
8540 | return DAG.getNode(Opcode, DL: SDLoc(N), VT: OutVT, Operand: ExtOp); |
8541 | } |
8542 | return SDValue(); |
8543 | } |
8544 | |
8545 | SDValue SystemZTargetLowering::combineFCOPYSIGN( |
8546 | SDNode *N, DAGCombinerInfo &DCI) const { |
8547 | SelectionDAG &DAG = DCI.DAG; |
8548 | EVT VT = N->getValueType(ResNo: 0); |
8549 | SDValue ValOp = N->getOperand(Num: 0); |
8550 | SDValue SignOp = N->getOperand(Num: 1); |
8551 | |
8552 | // Remove the rounding which is not needed. |
8553 | if (SignOp.getOpcode() == ISD::FP_ROUND) { |
8554 | SDValue WideOp = SignOp.getOperand(i: 0); |
8555 | return DAG.getNode(Opcode: ISD::FCOPYSIGN, DL: SDLoc(N), VT, N1: ValOp, N2: WideOp); |
8556 | } |
8557 | |
8558 | return SDValue(); |
8559 | } |
8560 | |
8561 | SDValue SystemZTargetLowering::combineBSWAP( |
8562 | SDNode *N, DAGCombinerInfo &DCI) const { |
8563 | SelectionDAG &DAG = DCI.DAG; |
8564 | // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR |
8565 | if (ISD::isNON_EXTLoad(N: N->getOperand(Num: 0).getNode()) && |
8566 | N->getOperand(Num: 0).hasOneUse() && |
8567 | canLoadStoreByteSwapped(VT: N->getValueType(ResNo: 0))) { |
8568 | SDValue Load = N->getOperand(Num: 0); |
8569 | LoadSDNode *LD = cast<LoadSDNode>(Val&: Load); |
8570 | |
8571 | // Create the byte-swapping load. |
8572 | SDValue Ops[] = { |
8573 | LD->getChain(), // Chain |
8574 | LD->getBasePtr() // Ptr |
8575 | }; |
8576 | EVT LoadVT = N->getValueType(ResNo: 0); |
8577 | if (LoadVT == MVT::i16) |
8578 | LoadVT = MVT::i32; |
8579 | SDValue BSLoad = |
8580 | DAG.getMemIntrinsicNode(Opcode: SystemZISD::LRV, dl: SDLoc(N), |
8581 | VTList: DAG.getVTList(VT1: LoadVT, VT2: MVT::Other), |
8582 | Ops, MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand()); |
8583 | |
8584 | // If this is an i16 load, insert the truncate. |
8585 | SDValue ResVal = BSLoad; |
8586 | if (N->getValueType(ResNo: 0) == MVT::i16) |
8587 | ResVal = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc(N), VT: MVT::i16, Operand: BSLoad); |
8588 | |
8589 | // First, combine the bswap away. This makes the value produced by the |
8590 | // load dead. |
8591 | DCI.CombineTo(N, Res: ResVal); |
8592 | |
8593 | // Next, combine the load away, we give it a bogus result value but a real |
8594 | // chain result. The result value is dead because the bswap is dead. |
8595 | DCI.CombineTo(N: Load.getNode(), Res0: ResVal, Res1: BSLoad.getValue(R: 1)); |
8596 | |
8597 | // Return N so it doesn't get rechecked! |
8598 | return SDValue(N, 0); |
8599 | } |
8600 | |
8601 | // Look through bitcasts that retain the number of vector elements. |
8602 | SDValue Op = N->getOperand(Num: 0); |
8603 | if (Op.getOpcode() == ISD::BITCAST && |
8604 | Op.getValueType().isVector() && |
8605 | Op.getOperand(i: 0).getValueType().isVector() && |
8606 | Op.getValueType().getVectorNumElements() == |
8607 | Op.getOperand(i: 0).getValueType().getVectorNumElements()) |
8608 | Op = Op.getOperand(i: 0); |
8609 | |
8610 | // Push BSWAP into a vector insertion if at least one side then simplifies. |
8611 | if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) { |
8612 | SDValue Vec = Op.getOperand(i: 0); |
8613 | SDValue Elt = Op.getOperand(i: 1); |
8614 | SDValue Idx = Op.getOperand(i: 2); |
8615 | |
8616 | if (DAG.isConstantIntBuildVectorOrConstantInt(N: Vec) || |
8617 | Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() || |
8618 | DAG.isConstantIntBuildVectorOrConstantInt(N: Elt) || |
8619 | Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() || |
8620 | (canLoadStoreByteSwapped(VT: N->getValueType(ResNo: 0)) && |
8621 | ISD::isNON_EXTLoad(N: Elt.getNode()) && Elt.hasOneUse())) { |
8622 | EVT VecVT = N->getValueType(ResNo: 0); |
8623 | EVT EltVT = N->getValueType(ResNo: 0).getVectorElementType(); |
8624 | if (VecVT != Vec.getValueType()) { |
8625 | Vec = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(N), VT: VecVT, Operand: Vec); |
8626 | DCI.AddToWorklist(N: Vec.getNode()); |
8627 | } |
8628 | if (EltVT != Elt.getValueType()) { |
8629 | Elt = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(N), VT: EltVT, Operand: Elt); |
8630 | DCI.AddToWorklist(N: Elt.getNode()); |
8631 | } |
8632 | Vec = DAG.getNode(Opcode: ISD::BSWAP, DL: SDLoc(N), VT: VecVT, Operand: Vec); |
8633 | DCI.AddToWorklist(N: Vec.getNode()); |
8634 | Elt = DAG.getNode(Opcode: ISD::BSWAP, DL: SDLoc(N), VT: EltVT, Operand: Elt); |
8635 | DCI.AddToWorklist(N: Elt.getNode()); |
8636 | return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: VecVT, |
8637 | N1: Vec, N2: Elt, N3: Idx); |
8638 | } |
8639 | } |
8640 | |
8641 | // Push BSWAP into a vector shuffle if at least one side then simplifies. |
8642 | ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Val&: Op); |
8643 | if (SV && Op.hasOneUse()) { |
8644 | SDValue Op0 = Op.getOperand(i: 0); |
8645 | SDValue Op1 = Op.getOperand(i: 1); |
8646 | |
8647 | if (DAG.isConstantIntBuildVectorOrConstantInt(N: Op0) || |
8648 | Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() || |
8649 | DAG.isConstantIntBuildVectorOrConstantInt(N: Op1) || |
8650 | Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) { |
8651 | EVT VecVT = N->getValueType(ResNo: 0); |
8652 | if (VecVT != Op0.getValueType()) { |
8653 | Op0 = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(N), VT: VecVT, Operand: Op0); |
8654 | DCI.AddToWorklist(N: Op0.getNode()); |
8655 | } |
8656 | if (VecVT != Op1.getValueType()) { |
8657 | Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(N), VT: VecVT, Operand: Op1); |
8658 | DCI.AddToWorklist(N: Op1.getNode()); |
8659 | } |
8660 | Op0 = DAG.getNode(Opcode: ISD::BSWAP, DL: SDLoc(N), VT: VecVT, Operand: Op0); |
8661 | DCI.AddToWorklist(N: Op0.getNode()); |
8662 | Op1 = DAG.getNode(Opcode: ISD::BSWAP, DL: SDLoc(N), VT: VecVT, Operand: Op1); |
8663 | DCI.AddToWorklist(N: Op1.getNode()); |
8664 | return DAG.getVectorShuffle(VT: VecVT, dl: SDLoc(N), N1: Op0, N2: Op1, Mask: SV->getMask()); |
8665 | } |
8666 | } |
8667 | |
8668 | return SDValue(); |
8669 | } |
8670 | |
8671 | SDValue SystemZTargetLowering::combineSETCC( |
8672 | SDNode *N, DAGCombinerInfo &DCI) const { |
8673 | SelectionDAG &DAG = DCI.DAG; |
8674 | const ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get(); |
8675 | const SDValue LHS = N->getOperand(Num: 0); |
8676 | const SDValue RHS = N->getOperand(Num: 1); |
8677 | bool CmpNull = isNullConstant(V: RHS); |
8678 | bool CmpAllOnes = isAllOnesConstant(V: RHS); |
8679 | EVT VT = N->getValueType(ResNo: 0); |
8680 | SDLoc DL(N); |
8681 | |
8682 | // Match icmp_eq/ne(bitcast(icmp(X,Y)),0/-1) reduction patterns, and |
8683 | // change the outer compare to a i128 compare. This will normally |
8684 | // allow the reduction to be recognized in adjustICmp128, and even if |
8685 | // not, the i128 compare will still generate better code. |
8686 | if ((CC == ISD::SETNE || CC == ISD::SETEQ) && (CmpNull || CmpAllOnes)) { |
8687 | SDValue Src = peekThroughBitcasts(V: LHS); |
8688 | if (Src.getOpcode() == ISD::SETCC && |
8689 | Src.getValueType().isFixedLengthVector() && |
8690 | Src.getValueType().getScalarType() == MVT::i1) { |
8691 | EVT CmpVT = Src.getOperand(i: 0).getValueType(); |
8692 | if (CmpVT.getSizeInBits() == 128) { |
8693 | EVT IntVT = CmpVT.changeVectorElementTypeToInteger(); |
8694 | SDValue LHS = |
8695 | DAG.getBitcast(VT: MVT::i128, V: DAG.getSExtOrTrunc(Op: Src, DL, VT: IntVT)); |
8696 | SDValue RHS = CmpNull ? DAG.getConstant(Val: 0, DL, VT: MVT::i128) |
8697 | : DAG.getAllOnesConstant(DL, VT: MVT::i128); |
8698 | return DAG.getNode(Opcode: ISD::SETCC, DL, VT, N1: LHS, N2: RHS, N3: N->getOperand(Num: 2), |
8699 | Flags: N->getFlags()); |
8700 | } |
8701 | } |
8702 | } |
8703 | |
8704 | return SDValue(); |
8705 | } |
8706 | |
8707 | static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { |
8708 | // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code |
8709 | // set by the CCReg instruction using the CCValid / CCMask masks, |
8710 | // If the CCReg instruction is itself a ICMP testing the condition |
8711 | // code set by some other instruction, see whether we can directly |
8712 | // use that condition code. |
8713 | |
8714 | // Verify that we have an ICMP against some constant. |
8715 | if (CCValid != SystemZ::CCMASK_ICMP) |
8716 | return false; |
8717 | auto *ICmp = CCReg.getNode(); |
8718 | if (ICmp->getOpcode() != SystemZISD::ICMP) |
8719 | return false; |
8720 | auto *CompareLHS = ICmp->getOperand(Num: 0).getNode(); |
8721 | auto *CompareRHS = dyn_cast<ConstantSDNode>(Val: ICmp->getOperand(Num: 1)); |
8722 | if (!CompareRHS) |
8723 | return false; |
8724 | |
8725 | // Optimize the case where CompareLHS is a SELECT_CCMASK. |
8726 | if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) { |
8727 | // Verify that we have an appropriate mask for a EQ or NE comparison. |
8728 | bool Invert = false; |
8729 | if (CCMask == SystemZ::CCMASK_CMP_NE) |
8730 | Invert = !Invert; |
8731 | else if (CCMask != SystemZ::CCMASK_CMP_EQ) |
8732 | return false; |
8733 | |
8734 | // Verify that the ICMP compares against one of select values. |
8735 | auto *TrueVal = dyn_cast<ConstantSDNode>(Val: CompareLHS->getOperand(Num: 0)); |
8736 | if (!TrueVal) |
8737 | return false; |
8738 | auto *FalseVal = dyn_cast<ConstantSDNode>(Val: CompareLHS->getOperand(Num: 1)); |
8739 | if (!FalseVal) |
8740 | return false; |
8741 | if (CompareRHS->getAPIntValue() == FalseVal->getAPIntValue()) |
8742 | Invert = !Invert; |
8743 | else if (CompareRHS->getAPIntValue() != TrueVal->getAPIntValue()) |
8744 | return false; |
8745 | |
8746 | // Compute the effective CC mask for the new branch or select. |
8747 | auto *NewCCValid = dyn_cast<ConstantSDNode>(Val: CompareLHS->getOperand(Num: 2)); |
8748 | auto *NewCCMask = dyn_cast<ConstantSDNode>(Val: CompareLHS->getOperand(Num: 3)); |
8749 | if (!NewCCValid || !NewCCMask) |
8750 | return false; |
8751 | CCValid = NewCCValid->getZExtValue(); |
8752 | CCMask = NewCCMask->getZExtValue(); |
8753 | if (Invert) |
8754 | CCMask ^= CCValid; |
8755 | |
8756 | // Return the updated CCReg link. |
8757 | CCReg = CompareLHS->getOperand(Num: 4); |
8758 | return true; |
8759 | } |
8760 | |
8761 | // Optimize the case where CompareRHS is (SRA (SHL (IPM))). |
8762 | if (CompareLHS->getOpcode() == ISD::SRA) { |
8763 | auto *SRACount = dyn_cast<ConstantSDNode>(Val: CompareLHS->getOperand(Num: 1)); |
8764 | if (!SRACount || SRACount->getZExtValue() != 30) |
8765 | return false; |
8766 | auto *SHL = CompareLHS->getOperand(Num: 0).getNode(); |
8767 | if (SHL->getOpcode() != ISD::SHL) |
8768 | return false; |
8769 | auto *SHLCount = dyn_cast<ConstantSDNode>(Val: SHL->getOperand(Num: 1)); |
8770 | if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC) |
8771 | return false; |
8772 | auto *IPM = SHL->getOperand(Num: 0).getNode(); |
8773 | if (IPM->getOpcode() != SystemZISD::IPM) |
8774 | return false; |
8775 | |
8776 | // Avoid introducing CC spills (because SRA would clobber CC). |
8777 | if (!CompareLHS->hasOneUse()) |
8778 | return false; |
8779 | // Verify that the ICMP compares against zero. |
8780 | if (CompareRHS->getZExtValue() != 0) |
8781 | return false; |
8782 | |
8783 | // Compute the effective CC mask for the new branch or select. |
8784 | CCMask = SystemZ::reverseCCMask(CCMask); |
8785 | |
8786 | // Return the updated CCReg link. |
8787 | CCReg = IPM->getOperand(Num: 0); |
8788 | return true; |
8789 | } |
8790 | |
8791 | return false; |
8792 | } |
8793 | |
8794 | SDValue SystemZTargetLowering::combineBR_CCMASK( |
8795 | SDNode *N, DAGCombinerInfo &DCI) const { |
8796 | SelectionDAG &DAG = DCI.DAG; |
8797 | |
8798 | // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK. |
8799 | auto *CCValid = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1)); |
8800 | auto *CCMask = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 2)); |
8801 | if (!CCValid || !CCMask) |
8802 | return SDValue(); |
8803 | |
8804 | int CCValidVal = CCValid->getZExtValue(); |
8805 | int CCMaskVal = CCMask->getZExtValue(); |
8806 | SDValue Chain = N->getOperand(Num: 0); |
8807 | SDValue CCReg = N->getOperand(Num: 4); |
8808 | |
8809 | if (combineCCMask(CCReg, CCValid&: CCValidVal, CCMask&: CCMaskVal)) |
8810 | return DAG.getNode(Opcode: SystemZISD::BR_CCMASK, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
8811 | N1: Chain, |
8812 | N2: DAG.getTargetConstant(Val: CCValidVal, DL: SDLoc(N), VT: MVT::i32), |
8813 | N3: DAG.getTargetConstant(Val: CCMaskVal, DL: SDLoc(N), VT: MVT::i32), |
8814 | N4: N->getOperand(Num: 3), N5: CCReg); |
8815 | return SDValue(); |
8816 | } |
8817 | |
8818 | SDValue SystemZTargetLowering::combineSELECT_CCMASK( |
8819 | SDNode *N, DAGCombinerInfo &DCI) const { |
8820 | SelectionDAG &DAG = DCI.DAG; |
8821 | |
8822 | // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK. |
8823 | auto *CCValid = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 2)); |
8824 | auto *CCMask = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 3)); |
8825 | if (!CCValid || !CCMask) |
8826 | return SDValue(); |
8827 | |
8828 | int CCValidVal = CCValid->getZExtValue(); |
8829 | int CCMaskVal = CCMask->getZExtValue(); |
8830 | SDValue CCReg = N->getOperand(Num: 4); |
8831 | |
8832 | if (combineCCMask(CCReg, CCValid&: CCValidVal, CCMask&: CCMaskVal)) |
8833 | return DAG.getNode(Opcode: SystemZISD::SELECT_CCMASK, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
8834 | N1: N->getOperand(Num: 0), N2: N->getOperand(Num: 1), |
8835 | N3: DAG.getTargetConstant(Val: CCValidVal, DL: SDLoc(N), VT: MVT::i32), |
8836 | N4: DAG.getTargetConstant(Val: CCMaskVal, DL: SDLoc(N), VT: MVT::i32), |
8837 | N5: CCReg); |
8838 | return SDValue(); |
8839 | } |
8840 | |
8841 | |
8842 | SDValue SystemZTargetLowering::combineGET_CCMASK( |
8843 | SDNode *N, DAGCombinerInfo &DCI) const { |
8844 | |
8845 | // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible |
8846 | auto *CCValid = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1)); |
8847 | auto *CCMask = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 2)); |
8848 | if (!CCValid || !CCMask) |
8849 | return SDValue(); |
8850 | int CCValidVal = CCValid->getZExtValue(); |
8851 | int CCMaskVal = CCMask->getZExtValue(); |
8852 | |
8853 | SDValue Select = N->getOperand(Num: 0); |
8854 | if (Select->getOpcode() == ISD::TRUNCATE) |
8855 | Select = Select->getOperand(Num: 0); |
8856 | if (Select->getOpcode() != SystemZISD::SELECT_CCMASK) |
8857 | return SDValue(); |
8858 | |
8859 | auto *SelectCCValid = dyn_cast<ConstantSDNode>(Val: Select->getOperand(Num: 2)); |
8860 | auto *SelectCCMask = dyn_cast<ConstantSDNode>(Val: Select->getOperand(Num: 3)); |
8861 | if (!SelectCCValid || !SelectCCMask) |
8862 | return SDValue(); |
8863 | int SelectCCValidVal = SelectCCValid->getZExtValue(); |
8864 | int SelectCCMaskVal = SelectCCMask->getZExtValue(); |
8865 | |
8866 | auto *TrueVal = dyn_cast<ConstantSDNode>(Val: Select->getOperand(Num: 0)); |
8867 | auto *FalseVal = dyn_cast<ConstantSDNode>(Val: Select->getOperand(Num: 1)); |
8868 | if (!TrueVal || !FalseVal) |
8869 | return SDValue(); |
8870 | if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0) |
8871 | ; |
8872 | else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1) |
8873 | SelectCCMaskVal ^= SelectCCValidVal; |
8874 | else |
8875 | return SDValue(); |
8876 | |
8877 | if (SelectCCValidVal & ~CCValidVal) |
8878 | return SDValue(); |
8879 | if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal)) |
8880 | return SDValue(); |
8881 | |
8882 | return Select->getOperand(Num: 4); |
8883 | } |
8884 | |
8885 | SDValue SystemZTargetLowering::combineIntDIVREM( |
8886 | SDNode *N, DAGCombinerInfo &DCI) const { |
8887 | SelectionDAG &DAG = DCI.DAG; |
8888 | EVT VT = N->getValueType(ResNo: 0); |
8889 | // In the case where the divisor is a vector of constants a cheaper |
8890 | // sequence of instructions can replace the divide. BuildSDIV is called to |
8891 | // do this during DAG combining, but it only succeeds when it can build a |
8892 | // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and |
8893 | // since it is not Legal but Custom it can only happen before |
8894 | // legalization. Therefore we must scalarize this early before Combine |
8895 | // 1. For widened vectors, this is already the result of type legalization. |
8896 | if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) && |
8897 | DAG.isConstantIntBuildVectorOrConstantInt(N: N->getOperand(Num: 1))) |
8898 | return DAG.UnrollVectorOp(N); |
8899 | return SDValue(); |
8900 | } |
8901 | |
8902 | |
8903 | // Transform a right shift of a multiply-and-add into a multiply-and-add-high. |
8904 | // This is closely modeled after the common-code combineShiftToMULH. |
8905 | SDValue SystemZTargetLowering::combineShiftToMulAddHigh( |
8906 | SDNode *N, DAGCombinerInfo &DCI) const { |
8907 | SelectionDAG &DAG = DCI.DAG; |
8908 | SDLoc DL(N); |
8909 | |
8910 | assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && |
8911 | "SRL or SRA node is required here!" ); |
8912 | |
8913 | if (!Subtarget.hasVector()) |
8914 | return SDValue(); |
8915 | |
8916 | // Check the shift amount. Proceed with the transformation if the shift |
8917 | // amount is constant. |
8918 | ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N: N->getOperand(Num: 1)); |
8919 | if (!ShiftAmtSrc) |
8920 | return SDValue(); |
8921 | |
8922 | // The operation feeding into the shift must be an add. |
8923 | SDValue ShiftOperand = N->getOperand(Num: 0); |
8924 | if (ShiftOperand.getOpcode() != ISD::ADD) |
8925 | return SDValue(); |
8926 | |
8927 | // One operand of the add must be a multiply. |
8928 | SDValue MulOp = ShiftOperand.getOperand(i: 0); |
8929 | SDValue AddOp = ShiftOperand.getOperand(i: 1); |
8930 | if (MulOp.getOpcode() != ISD::MUL) { |
8931 | if (AddOp.getOpcode() != ISD::MUL) |
8932 | return SDValue(); |
8933 | std::swap(a&: MulOp, b&: AddOp); |
8934 | } |
8935 | |
8936 | // All operands must be equivalent extend nodes. |
8937 | SDValue LeftOp = MulOp.getOperand(i: 0); |
8938 | SDValue RightOp = MulOp.getOperand(i: 1); |
8939 | |
8940 | bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND; |
8941 | bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND; |
8942 | |
8943 | if (!IsSignExt && !IsZeroExt) |
8944 | return SDValue(); |
8945 | |
8946 | EVT NarrowVT = LeftOp.getOperand(i: 0).getValueType(); |
8947 | unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits(); |
8948 | |
8949 | SDValue MulhRightOp; |
8950 | if (ConstantSDNode *Constant = isConstOrConstSplat(N: RightOp)) { |
8951 | unsigned ActiveBits = IsSignExt |
8952 | ? Constant->getAPIntValue().getSignificantBits() |
8953 | : Constant->getAPIntValue().getActiveBits(); |
8954 | if (ActiveBits > NarrowVTSize) |
8955 | return SDValue(); |
8956 | MulhRightOp = DAG.getConstant( |
8957 | Val: Constant->getAPIntValue().trunc(width: NarrowVT.getScalarSizeInBits()), DL, |
8958 | VT: NarrowVT); |
8959 | } else { |
8960 | if (LeftOp.getOpcode() != RightOp.getOpcode()) |
8961 | return SDValue(); |
8962 | // Check that the two extend nodes are the same type. |
8963 | if (NarrowVT != RightOp.getOperand(i: 0).getValueType()) |
8964 | return SDValue(); |
8965 | MulhRightOp = RightOp.getOperand(i: 0); |
8966 | } |
8967 | |
8968 | SDValue MulhAddOp; |
8969 | if (ConstantSDNode *Constant = isConstOrConstSplat(N: AddOp)) { |
8970 | unsigned ActiveBits = IsSignExt |
8971 | ? Constant->getAPIntValue().getSignificantBits() |
8972 | : Constant->getAPIntValue().getActiveBits(); |
8973 | if (ActiveBits > NarrowVTSize) |
8974 | return SDValue(); |
8975 | MulhAddOp = DAG.getConstant( |
8976 | Val: Constant->getAPIntValue().trunc(width: NarrowVT.getScalarSizeInBits()), DL, |
8977 | VT: NarrowVT); |
8978 | } else { |
8979 | if (LeftOp.getOpcode() != AddOp.getOpcode()) |
8980 | return SDValue(); |
8981 | // Check that the two extend nodes are the same type. |
8982 | if (NarrowVT != AddOp.getOperand(i: 0).getValueType()) |
8983 | return SDValue(); |
8984 | MulhAddOp = AddOp.getOperand(i: 0); |
8985 | } |
8986 | |
8987 | EVT WideVT = LeftOp.getValueType(); |
8988 | // Proceed with the transformation if the wide types match. |
8989 | assert((WideVT == RightOp.getValueType()) && |
8990 | "Cannot have a multiply node with two different operand types." ); |
8991 | assert((WideVT == AddOp.getValueType()) && |
8992 | "Cannot have an add node with two different operand types." ); |
8993 | |
8994 | // Proceed with the transformation if the wide type is twice as large |
8995 | // as the narrow type. |
8996 | if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize) |
8997 | return SDValue(); |
8998 | |
8999 | // Check the shift amount with the narrow type size. |
9000 | // Proceed with the transformation if the shift amount is the width |
9001 | // of the narrow type. |
9002 | unsigned ShiftAmt = ShiftAmtSrc->getZExtValue(); |
9003 | if (ShiftAmt != NarrowVTSize) |
9004 | return SDValue(); |
9005 | |
9006 | // Proceed if we support the multiply-and-add-high operation. |
9007 | if (!(NarrowVT == MVT::v16i8 || NarrowVT == MVT::v8i16 || |
9008 | NarrowVT == MVT::v4i32 || |
9009 | (Subtarget.hasVectorEnhancements3() && |
9010 | (NarrowVT == MVT::v2i64 || NarrowVT == MVT::i128)))) |
9011 | return SDValue(); |
9012 | |
9013 | // Emit the VMAH (signed) or VMALH (unsigned) operation. |
9014 | SDValue Result = DAG.getNode(Opcode: IsSignExt ? SystemZISD::VMAH : SystemZISD::VMALH, |
9015 | DL, VT: NarrowVT, N1: LeftOp.getOperand(i: 0), |
9016 | N2: MulhRightOp, N3: MulhAddOp); |
9017 | bool IsSigned = N->getOpcode() == ISD::SRA; |
9018 | return DAG.getExtOrTrunc(IsSigned, Op: Result, DL, VT: WideVT); |
9019 | } |
9020 | |
9021 | // Op is an operand of a multiplication. Check whether this can be folded |
9022 | // into an even/odd widening operation; if so, return the opcode to be used |
9023 | // and update Op to the appropriate sub-operand. Note that the caller must |
9024 | // verify that *both* operands of the multiplication support the operation. |
9025 | static unsigned detectEvenOddMultiplyOperand(const SelectionDAG &DAG, |
9026 | const SystemZSubtarget &Subtarget, |
9027 | SDValue &Op) { |
9028 | EVT VT = Op.getValueType(); |
9029 | |
9030 | // Check for (sign/zero_extend_vector_inreg (vector_shuffle)) corresponding |
9031 | // to selecting the even or odd vector elements. |
9032 | if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && |
9033 | (Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG || |
9034 | Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG)) { |
9035 | bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG; |
9036 | unsigned NumElts = VT.getVectorNumElements(); |
9037 | Op = Op.getOperand(i: 0); |
9038 | if (Op.getValueType().getVectorNumElements() == 2 * NumElts && |
9039 | Op.getOpcode() == ISD::VECTOR_SHUFFLE) { |
9040 | ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: Op.getNode()); |
9041 | ArrayRef<int> ShuffleMask = SVN->getMask(); |
9042 | bool CanUseEven = true, CanUseOdd = true; |
9043 | for (unsigned Elt = 0; Elt < NumElts; Elt++) { |
9044 | if (ShuffleMask[Elt] == -1) |
9045 | continue; |
9046 | if (unsigned(ShuffleMask[Elt]) != 2 * Elt) |
9047 | CanUseEven = false; |
9048 | if (unsigned(ShuffleMask[Elt]) != 2 * Elt + 1) |
9049 | CanUseEven = true; |
9050 | } |
9051 | Op = Op.getOperand(i: 0); |
9052 | if (CanUseEven) |
9053 | return IsSigned ? SystemZISD::VME : SystemZISD::VMLE; |
9054 | if (CanUseOdd) |
9055 | return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO; |
9056 | } |
9057 | } |
9058 | |
9059 | // For z17, we can also support the v2i64->i128 case, which looks like |
9060 | // (sign/zero_extend (extract_vector_elt X 0/1)) |
9061 | if (VT == MVT::i128 && Subtarget.hasVectorEnhancements3() && |
9062 | (Op.getOpcode() == ISD::SIGN_EXTEND || |
9063 | Op.getOpcode() == ISD::ZERO_EXTEND)) { |
9064 | bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND; |
9065 | Op = Op.getOperand(i: 0); |
9066 | if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
9067 | Op.getOperand(i: 0).getValueType() == MVT::v2i64 && |
9068 | Op.getOperand(i: 1).getOpcode() == ISD::Constant) { |
9069 | unsigned Elem = Op.getConstantOperandVal(i: 1); |
9070 | Op = Op.getOperand(i: 0); |
9071 | if (Elem == 0) |
9072 | return IsSigned ? SystemZISD::VME : SystemZISD::VMLE; |
9073 | if (Elem == 1) |
9074 | return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO; |
9075 | } |
9076 | } |
9077 | |
9078 | return 0; |
9079 | } |
9080 | |
9081 | SDValue SystemZTargetLowering::combineMUL( |
9082 | SDNode *N, DAGCombinerInfo &DCI) const { |
9083 | SelectionDAG &DAG = DCI.DAG; |
9084 | |
9085 | // Detect even/odd widening multiplication. |
9086 | SDValue Op0 = N->getOperand(Num: 0); |
9087 | SDValue Op1 = N->getOperand(Num: 1); |
9088 | unsigned OpcodeCand0 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op&: Op0); |
9089 | unsigned OpcodeCand1 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op&: Op1); |
9090 | if (OpcodeCand0 && OpcodeCand0 == OpcodeCand1) |
9091 | return DAG.getNode(Opcode: OpcodeCand0, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), N1: Op0, N2: Op1); |
9092 | |
9093 | return SDValue(); |
9094 | } |
9095 | |
9096 | SDValue SystemZTargetLowering::combineINTRINSIC( |
9097 | SDNode *N, DAGCombinerInfo &DCI) const { |
9098 | SelectionDAG &DAG = DCI.DAG; |
9099 | |
9100 | unsigned Id = N->getConstantOperandVal(Num: 1); |
9101 | switch (Id) { |
9102 | // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15 |
9103 | // or larger is simply a vector load. |
9104 | case Intrinsic::s390_vll: |
9105 | case Intrinsic::s390_vlrl: |
9106 | if (auto *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 2))) |
9107 | if (C->getZExtValue() >= 15) |
9108 | return DAG.getLoad(VT: N->getValueType(ResNo: 0), dl: SDLoc(N), Chain: N->getOperand(Num: 0), |
9109 | Ptr: N->getOperand(Num: 3), PtrInfo: MachinePointerInfo()); |
9110 | break; |
9111 | // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH. |
9112 | case Intrinsic::s390_vstl: |
9113 | case Intrinsic::s390_vstrl: |
9114 | if (auto *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 3))) |
9115 | if (C->getZExtValue() >= 15) |
9116 | return DAG.getStore(Chain: N->getOperand(Num: 0), dl: SDLoc(N), Val: N->getOperand(Num: 2), |
9117 | Ptr: N->getOperand(Num: 4), PtrInfo: MachinePointerInfo()); |
9118 | break; |
9119 | } |
9120 | |
9121 | return SDValue(); |
9122 | } |
9123 | |
9124 | SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const { |
9125 | if (N->getOpcode() == SystemZISD::PCREL_WRAPPER) |
9126 | return N->getOperand(Num: 0); |
9127 | return N; |
9128 | } |
9129 | |
9130 | SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, |
9131 | DAGCombinerInfo &DCI) const { |
9132 | switch(N->getOpcode()) { |
9133 | default: break; |
9134 | case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI); |
9135 | case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI); |
9136 | case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI); |
9137 | case SystemZISD::MERGE_HIGH: |
9138 | case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI); |
9139 | case ISD::LOAD: return combineLOAD(N, DCI); |
9140 | case ISD::STORE: return combineSTORE(N, DCI); |
9141 | case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI); |
9142 | case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI); |
9143 | case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); |
9144 | case ISD::STRICT_FP_ROUND: |
9145 | case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); |
9146 | case ISD::STRICT_FP_EXTEND: |
9147 | case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI); |
9148 | case ISD::SINT_TO_FP: |
9149 | case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI); |
9150 | case ISD::FCOPYSIGN: return combineFCOPYSIGN(N, DCI); |
9151 | case ISD::BSWAP: return combineBSWAP(N, DCI); |
9152 | case ISD::SETCC: return combineSETCC(N, DCI); |
9153 | case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); |
9154 | case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI); |
9155 | case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI); |
9156 | case ISD::SRL: |
9157 | case ISD::SRA: return combineShiftToMulAddHigh(N, DCI); |
9158 | case ISD::MUL: return combineMUL(N, DCI); |
9159 | case ISD::SDIV: |
9160 | case ISD::UDIV: |
9161 | case ISD::SREM: |
9162 | case ISD::UREM: return combineIntDIVREM(N, DCI); |
9163 | case ISD::INTRINSIC_W_CHAIN: |
9164 | case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI); |
9165 | } |
9166 | |
9167 | return SDValue(); |
9168 | } |
9169 | |
9170 | // Return the demanded elements for the OpNo source operand of Op. DemandedElts |
9171 | // are for Op. |
9172 | static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, |
9173 | unsigned OpNo) { |
9174 | EVT VT = Op.getValueType(); |
9175 | unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1); |
9176 | APInt SrcDemE; |
9177 | unsigned Opcode = Op.getOpcode(); |
9178 | if (Opcode == ISD::INTRINSIC_WO_CHAIN) { |
9179 | unsigned Id = Op.getConstantOperandVal(i: 0); |
9180 | switch (Id) { |
9181 | case Intrinsic::s390_vpksh: // PACKS |
9182 | case Intrinsic::s390_vpksf: |
9183 | case Intrinsic::s390_vpksg: |
9184 | case Intrinsic::s390_vpkshs: // PACKS_CC |
9185 | case Intrinsic::s390_vpksfs: |
9186 | case Intrinsic::s390_vpksgs: |
9187 | case Intrinsic::s390_vpklsh: // PACKLS |
9188 | case Intrinsic::s390_vpklsf: |
9189 | case Intrinsic::s390_vpklsg: |
9190 | case Intrinsic::s390_vpklshs: // PACKLS_CC |
9191 | case Intrinsic::s390_vpklsfs: |
9192 | case Intrinsic::s390_vpklsgs: |
9193 | // VECTOR PACK truncates the elements of two source vectors into one. |
9194 | SrcDemE = DemandedElts; |
9195 | if (OpNo == 2) |
9196 | SrcDemE.lshrInPlace(ShiftAmt: NumElts / 2); |
9197 | SrcDemE = SrcDemE.trunc(width: NumElts / 2); |
9198 | break; |
9199 | // VECTOR UNPACK extends half the elements of the source vector. |
9200 | case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH |
9201 | case Intrinsic::s390_vuphh: |
9202 | case Intrinsic::s390_vuphf: |
9203 | case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH |
9204 | case Intrinsic::s390_vuplhh: |
9205 | case Intrinsic::s390_vuplhf: |
9206 | SrcDemE = APInt(NumElts * 2, 0); |
9207 | SrcDemE.insertBits(SubBits: DemandedElts, bitPosition: 0); |
9208 | break; |
9209 | case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW |
9210 | case Intrinsic::s390_vuplhw: |
9211 | case Intrinsic::s390_vuplf: |
9212 | case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW |
9213 | case Intrinsic::s390_vupllh: |
9214 | case Intrinsic::s390_vupllf: |
9215 | SrcDemE = APInt(NumElts * 2, 0); |
9216 | SrcDemE.insertBits(SubBits: DemandedElts, bitPosition: NumElts); |
9217 | break; |
9218 | case Intrinsic::s390_vpdi: { |
9219 | // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source. |
9220 | SrcDemE = APInt(NumElts, 0); |
9221 | if (!DemandedElts[OpNo - 1]) |
9222 | break; |
9223 | unsigned Mask = Op.getConstantOperandVal(i: 3); |
9224 | unsigned MaskBit = ((OpNo - 1) ? 1 : 4); |
9225 | // Demand input element 0 or 1, given by the mask bit value. |
9226 | SrcDemE.setBit((Mask & MaskBit)? 1 : 0); |
9227 | break; |
9228 | } |
9229 | case Intrinsic::s390_vsldb: { |
9230 | // VECTOR SHIFT LEFT DOUBLE BY BYTE |
9231 | assert(VT == MVT::v16i8 && "Unexpected type." ); |
9232 | unsigned FirstIdx = Op.getConstantOperandVal(i: 3); |
9233 | assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand." ); |
9234 | unsigned NumSrc0Els = 16 - FirstIdx; |
9235 | SrcDemE = APInt(NumElts, 0); |
9236 | if (OpNo == 1) { |
9237 | APInt DemEls = DemandedElts.trunc(width: NumSrc0Els); |
9238 | SrcDemE.insertBits(SubBits: DemEls, bitPosition: FirstIdx); |
9239 | } else { |
9240 | APInt DemEls = DemandedElts.lshr(shiftAmt: NumSrc0Els); |
9241 | SrcDemE.insertBits(SubBits: DemEls, bitPosition: 0); |
9242 | } |
9243 | break; |
9244 | } |
9245 | case Intrinsic::s390_vperm: |
9246 | SrcDemE = APInt::getAllOnes(numBits: NumElts); |
9247 | break; |
9248 | default: |
9249 | llvm_unreachable("Unhandled intrinsic." ); |
9250 | break; |
9251 | } |
9252 | } else { |
9253 | switch (Opcode) { |
9254 | case SystemZISD::JOIN_DWORDS: |
9255 | // Scalar operand. |
9256 | SrcDemE = APInt(1, 1); |
9257 | break; |
9258 | case SystemZISD::SELECT_CCMASK: |
9259 | SrcDemE = DemandedElts; |
9260 | break; |
9261 | default: |
9262 | llvm_unreachable("Unhandled opcode." ); |
9263 | break; |
9264 | } |
9265 | } |
9266 | return SrcDemE; |
9267 | } |
9268 | |
9269 | static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, |
9270 | const APInt &DemandedElts, |
9271 | const SelectionDAG &DAG, unsigned Depth, |
9272 | unsigned OpNo) { |
9273 | APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo); |
9274 | APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo: OpNo + 1); |
9275 | KnownBits LHSKnown = |
9276 | DAG.computeKnownBits(Op: Op.getOperand(i: OpNo), DemandedElts: Src0DemE, Depth: Depth + 1); |
9277 | KnownBits RHSKnown = |
9278 | DAG.computeKnownBits(Op: Op.getOperand(i: OpNo + 1), DemandedElts: Src1DemE, Depth: Depth + 1); |
9279 | Known = LHSKnown.intersectWith(RHS: RHSKnown); |
9280 | } |
9281 | |
9282 | void |
9283 | SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, |
9284 | KnownBits &Known, |
9285 | const APInt &DemandedElts, |
9286 | const SelectionDAG &DAG, |
9287 | unsigned Depth) const { |
9288 | Known.resetAll(); |
9289 | |
9290 | // Intrinsic CC result is returned in the two low bits. |
9291 | unsigned tmp0, tmp1; // not used |
9292 | if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, Opcode&: tmp0, CCValid&: tmp1)) { |
9293 | Known.Zero.setBitsFrom(2); |
9294 | return; |
9295 | } |
9296 | EVT VT = Op.getValueType(); |
9297 | if (Op.getResNo() != 0 || VT == MVT::Untyped) |
9298 | return; |
9299 | assert (Known.getBitWidth() == VT.getScalarSizeInBits() && |
9300 | "KnownBits does not match VT in bitwidth" ); |
9301 | assert ((!VT.isVector() || |
9302 | (DemandedElts.getBitWidth() == VT.getVectorNumElements())) && |
9303 | "DemandedElts does not match VT number of elements" ); |
9304 | unsigned BitWidth = Known.getBitWidth(); |
9305 | unsigned Opcode = Op.getOpcode(); |
9306 | if (Opcode == ISD::INTRINSIC_WO_CHAIN) { |
9307 | bool IsLogical = false; |
9308 | unsigned Id = Op.getConstantOperandVal(i: 0); |
9309 | switch (Id) { |
9310 | case Intrinsic::s390_vpksh: // PACKS |
9311 | case Intrinsic::s390_vpksf: |
9312 | case Intrinsic::s390_vpksg: |
9313 | case Intrinsic::s390_vpkshs: // PACKS_CC |
9314 | case Intrinsic::s390_vpksfs: |
9315 | case Intrinsic::s390_vpksgs: |
9316 | case Intrinsic::s390_vpklsh: // PACKLS |
9317 | case Intrinsic::s390_vpklsf: |
9318 | case Intrinsic::s390_vpklsg: |
9319 | case Intrinsic::s390_vpklshs: // PACKLS_CC |
9320 | case Intrinsic::s390_vpklsfs: |
9321 | case Intrinsic::s390_vpklsgs: |
9322 | case Intrinsic::s390_vpdi: |
9323 | case Intrinsic::s390_vsldb: |
9324 | case Intrinsic::s390_vperm: |
9325 | computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, OpNo: 1); |
9326 | break; |
9327 | case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH |
9328 | case Intrinsic::s390_vuplhh: |
9329 | case Intrinsic::s390_vuplhf: |
9330 | case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW |
9331 | case Intrinsic::s390_vupllh: |
9332 | case Intrinsic::s390_vupllf: |
9333 | IsLogical = true; |
9334 | [[fallthrough]]; |
9335 | case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH |
9336 | case Intrinsic::s390_vuphh: |
9337 | case Intrinsic::s390_vuphf: |
9338 | case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW |
9339 | case Intrinsic::s390_vuplhw: |
9340 | case Intrinsic::s390_vuplf: { |
9341 | SDValue SrcOp = Op.getOperand(i: 1); |
9342 | APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, OpNo: 0); |
9343 | Known = DAG.computeKnownBits(Op: SrcOp, DemandedElts: SrcDemE, Depth: Depth + 1); |
9344 | if (IsLogical) { |
9345 | Known = Known.zext(BitWidth); |
9346 | } else |
9347 | Known = Known.sext(BitWidth); |
9348 | break; |
9349 | } |
9350 | default: |
9351 | break; |
9352 | } |
9353 | } else { |
9354 | switch (Opcode) { |
9355 | case SystemZISD::JOIN_DWORDS: |
9356 | case SystemZISD::SELECT_CCMASK: |
9357 | computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, OpNo: 0); |
9358 | break; |
9359 | case SystemZISD::REPLICATE: { |
9360 | SDValue SrcOp = Op.getOperand(i: 0); |
9361 | Known = DAG.computeKnownBits(Op: SrcOp, Depth: Depth + 1); |
9362 | if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(Val: SrcOp)) |
9363 | Known = Known.sext(BitWidth); // VREPI sign extends the immedate. |
9364 | break; |
9365 | } |
9366 | default: |
9367 | break; |
9368 | } |
9369 | } |
9370 | |
9371 | // Known has the width of the source operand(s). Adjust if needed to match |
9372 | // the passed bitwidth. |
9373 | if (Known.getBitWidth() != BitWidth) |
9374 | Known = Known.anyextOrTrunc(BitWidth); |
9375 | } |
9376 | |
9377 | static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, |
9378 | const SelectionDAG &DAG, unsigned Depth, |
9379 | unsigned OpNo) { |
9380 | APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo); |
9381 | unsigned LHS = DAG.ComputeNumSignBits(Op: Op.getOperand(i: OpNo), DemandedElts: Src0DemE, Depth: Depth + 1); |
9382 | if (LHS == 1) return 1; // Early out. |
9383 | APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo: OpNo + 1); |
9384 | unsigned RHS = DAG.ComputeNumSignBits(Op: Op.getOperand(i: OpNo + 1), DemandedElts: Src1DemE, Depth: Depth + 1); |
9385 | if (RHS == 1) return 1; // Early out. |
9386 | unsigned Common = std::min(a: LHS, b: RHS); |
9387 | unsigned SrcBitWidth = Op.getOperand(i: OpNo).getScalarValueSizeInBits(); |
9388 | EVT VT = Op.getValueType(); |
9389 | unsigned VTBits = VT.getScalarSizeInBits(); |
9390 | if (SrcBitWidth > VTBits) { // PACK |
9391 | unsigned = SrcBitWidth - VTBits; |
9392 | if (Common > SrcExtraBits) |
9393 | return (Common - SrcExtraBits); |
9394 | return 1; |
9395 | } |
9396 | assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth." ); |
9397 | return Common; |
9398 | } |
9399 | |
9400 | unsigned |
9401 | SystemZTargetLowering::ComputeNumSignBitsForTargetNode( |
9402 | SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, |
9403 | unsigned Depth) const { |
9404 | if (Op.getResNo() != 0) |
9405 | return 1; |
9406 | unsigned Opcode = Op.getOpcode(); |
9407 | if (Opcode == ISD::INTRINSIC_WO_CHAIN) { |
9408 | unsigned Id = Op.getConstantOperandVal(i: 0); |
9409 | switch (Id) { |
9410 | case Intrinsic::s390_vpksh: // PACKS |
9411 | case Intrinsic::s390_vpksf: |
9412 | case Intrinsic::s390_vpksg: |
9413 | case Intrinsic::s390_vpkshs: // PACKS_CC |
9414 | case Intrinsic::s390_vpksfs: |
9415 | case Intrinsic::s390_vpksgs: |
9416 | case Intrinsic::s390_vpklsh: // PACKLS |
9417 | case Intrinsic::s390_vpklsf: |
9418 | case Intrinsic::s390_vpklsg: |
9419 | case Intrinsic::s390_vpklshs: // PACKLS_CC |
9420 | case Intrinsic::s390_vpklsfs: |
9421 | case Intrinsic::s390_vpklsgs: |
9422 | case Intrinsic::s390_vpdi: |
9423 | case Intrinsic::s390_vsldb: |
9424 | case Intrinsic::s390_vperm: |
9425 | return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, OpNo: 1); |
9426 | case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH |
9427 | case Intrinsic::s390_vuphh: |
9428 | case Intrinsic::s390_vuphf: |
9429 | case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW |
9430 | case Intrinsic::s390_vuplhw: |
9431 | case Intrinsic::s390_vuplf: { |
9432 | SDValue PackedOp = Op.getOperand(i: 1); |
9433 | APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, OpNo: 1); |
9434 | unsigned Tmp = DAG.ComputeNumSignBits(Op: PackedOp, DemandedElts: SrcDemE, Depth: Depth + 1); |
9435 | EVT VT = Op.getValueType(); |
9436 | unsigned VTBits = VT.getScalarSizeInBits(); |
9437 | Tmp += VTBits - PackedOp.getScalarValueSizeInBits(); |
9438 | return Tmp; |
9439 | } |
9440 | default: |
9441 | break; |
9442 | } |
9443 | } else { |
9444 | switch (Opcode) { |
9445 | case SystemZISD::SELECT_CCMASK: |
9446 | return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, OpNo: 0); |
9447 | default: |
9448 | break; |
9449 | } |
9450 | } |
9451 | |
9452 | return 1; |
9453 | } |
9454 | |
9455 | bool SystemZTargetLowering:: |
9456 | isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, |
9457 | const APInt &DemandedElts, const SelectionDAG &DAG, |
9458 | bool PoisonOnly, unsigned Depth) const { |
9459 | switch (Op->getOpcode()) { |
9460 | case SystemZISD::PCREL_WRAPPER: |
9461 | case SystemZISD::PCREL_OFFSET: |
9462 | return true; |
9463 | } |
9464 | return false; |
9465 | } |
9466 | |
9467 | unsigned |
9468 | SystemZTargetLowering::getStackProbeSize(const MachineFunction &MF) const { |
9469 | const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); |
9470 | unsigned StackAlign = TFI->getStackAlignment(); |
9471 | assert(StackAlign >=1 && isPowerOf2_32(StackAlign) && |
9472 | "Unexpected stack alignment" ); |
9473 | // The default stack probe size is 4096 if the function has no |
9474 | // stack-probe-size attribute. |
9475 | unsigned StackProbeSize = |
9476 | MF.getFunction().getFnAttributeAsParsedInteger(Kind: "stack-probe-size" , Default: 4096); |
9477 | // Round down to the stack alignment. |
9478 | StackProbeSize &= ~(StackAlign - 1); |
9479 | return StackProbeSize ? StackProbeSize : StackAlign; |
9480 | } |
9481 | |
9482 | //===----------------------------------------------------------------------===// |
9483 | // Custom insertion |
9484 | //===----------------------------------------------------------------------===// |
9485 | |
9486 | // Force base value Base into a register before MI. Return the register. |
9487 | static Register forceReg(MachineInstr &MI, MachineOperand &Base, |
9488 | const SystemZInstrInfo *TII) { |
9489 | MachineBasicBlock *MBB = MI.getParent(); |
9490 | MachineFunction &MF = *MBB->getParent(); |
9491 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
9492 | |
9493 | if (Base.isReg()) { |
9494 | // Copy Base into a new virtual register to help register coalescing in |
9495 | // cases with multiple uses. |
9496 | Register Reg = MRI.createVirtualRegister(RegClass: &SystemZ::ADDR64BitRegClass); |
9497 | BuildMI(BB&: *MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: SystemZ::COPY), DestReg: Reg) |
9498 | .add(MO: Base); |
9499 | return Reg; |
9500 | } |
9501 | |
9502 | Register Reg = MRI.createVirtualRegister(RegClass: &SystemZ::ADDR64BitRegClass); |
9503 | BuildMI(BB&: *MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: SystemZ::LA), DestReg: Reg) |
9504 | .add(MO: Base) |
9505 | .addImm(Val: 0) |
9506 | .addReg(RegNo: 0); |
9507 | return Reg; |
9508 | } |
9509 | |
9510 | // The CC operand of MI might be missing a kill marker because there |
9511 | // were multiple uses of CC, and ISel didn't know which to mark. |
9512 | // Figure out whether MI should have had a kill marker. |
9513 | static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB) { |
9514 | // Scan forward through BB for a use/def of CC. |
9515 | MachineBasicBlock::iterator miI(std::next(x: MachineBasicBlock::iterator(MI))); |
9516 | for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) { |
9517 | const MachineInstr& mi = *miI; |
9518 | if (mi.readsRegister(Reg: SystemZ::CC, /*TRI=*/nullptr)) |
9519 | return false; |
9520 | if (mi.definesRegister(Reg: SystemZ::CC, /*TRI=*/nullptr)) |
9521 | break; // Should have kill-flag - update below. |
9522 | } |
9523 | |
9524 | // If we hit the end of the block, check whether CC is live into a |
9525 | // successor. |
9526 | if (miI == MBB->end()) { |
9527 | for (const MachineBasicBlock *Succ : MBB->successors()) |
9528 | if (Succ->isLiveIn(Reg: SystemZ::CC)) |
9529 | return false; |
9530 | } |
9531 | |
9532 | return true; |
9533 | } |
9534 | |
9535 | // Return true if it is OK for this Select pseudo-opcode to be cascaded |
9536 | // together with other Select pseudo-opcodes into a single basic-block with |
9537 | // a conditional jump around it. |
9538 | static bool isSelectPseudo(MachineInstr &MI) { |
9539 | switch (MI.getOpcode()) { |
9540 | case SystemZ::Select32: |
9541 | case SystemZ::Select64: |
9542 | case SystemZ::Select128: |
9543 | case SystemZ::SelectF32: |
9544 | case SystemZ::SelectF64: |
9545 | case SystemZ::SelectF128: |
9546 | case SystemZ::SelectVR32: |
9547 | case SystemZ::SelectVR64: |
9548 | case SystemZ::SelectVR128: |
9549 | return true; |
9550 | |
9551 | default: |
9552 | return false; |
9553 | } |
9554 | } |
9555 | |
9556 | // Helper function, which inserts PHI functions into SinkMBB: |
9557 | // %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ], |
9558 | // where %FalseValue(i) and %TrueValue(i) are taken from Selects. |
9559 | static void createPHIsForSelects(SmallVector<MachineInstr*, 8> &Selects, |
9560 | MachineBasicBlock *TrueMBB, |
9561 | MachineBasicBlock *FalseMBB, |
9562 | MachineBasicBlock *SinkMBB) { |
9563 | MachineFunction *MF = TrueMBB->getParent(); |
9564 | const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); |
9565 | |
9566 | MachineInstr *FirstMI = Selects.front(); |
9567 | unsigned CCValid = FirstMI->getOperand(i: 3).getImm(); |
9568 | unsigned CCMask = FirstMI->getOperand(i: 4).getImm(); |
9569 | |
9570 | MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); |
9571 | |
9572 | // As we are creating the PHIs, we have to be careful if there is more than |
9573 | // one. Later Selects may reference the results of earlier Selects, but later |
9574 | // PHIs have to reference the individual true/false inputs from earlier PHIs. |
9575 | // That also means that PHI construction must work forward from earlier to |
9576 | // later, and that the code must maintain a mapping from earlier PHI's |
9577 | // destination registers, and the registers that went into the PHI. |
9578 | DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable; |
9579 | |
9580 | for (auto *MI : Selects) { |
9581 | Register DestReg = MI->getOperand(i: 0).getReg(); |
9582 | Register TrueReg = MI->getOperand(i: 1).getReg(); |
9583 | Register FalseReg = MI->getOperand(i: 2).getReg(); |
9584 | |
9585 | // If this Select we are generating is the opposite condition from |
9586 | // the jump we generated, then we have to swap the operands for the |
9587 | // PHI that is going to be generated. |
9588 | if (MI->getOperand(i: 4).getImm() == (CCValid ^ CCMask)) |
9589 | std::swap(a&: TrueReg, b&: FalseReg); |
9590 | |
9591 | if (auto It = RegRewriteTable.find(Val: TrueReg); It != RegRewriteTable.end()) |
9592 | TrueReg = It->second.first; |
9593 | |
9594 | if (auto It = RegRewriteTable.find(Val: FalseReg); It != RegRewriteTable.end()) |
9595 | FalseReg = It->second.second; |
9596 | |
9597 | DebugLoc DL = MI->getDebugLoc(); |
9598 | BuildMI(BB&: *SinkMBB, I: SinkInsertionPoint, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg) |
9599 | .addReg(RegNo: TrueReg).addMBB(MBB: TrueMBB) |
9600 | .addReg(RegNo: FalseReg).addMBB(MBB: FalseMBB); |
9601 | |
9602 | // Add this PHI to the rewrite table. |
9603 | RegRewriteTable[DestReg] = std::make_pair(x&: TrueReg, y&: FalseReg); |
9604 | } |
9605 | |
9606 | MF->getProperties().resetNoPHIs(); |
9607 | } |
9608 | |
9609 | MachineBasicBlock * |
9610 | SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI, |
9611 | MachineBasicBlock *BB) const { |
9612 | MachineFunction &MF = *BB->getParent(); |
9613 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
9614 | auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>(); |
9615 | assert(TFL->hasReservedCallFrame(MF) && |
9616 | "ADJSTACKDOWN and ADJSTACKUP should be no-ops" ); |
9617 | (void)TFL; |
9618 | // Get the MaxCallFrameSize value and erase MI since it serves no further |
9619 | // purpose as the call frame is statically reserved in the prolog. Set |
9620 | // AdjustsStack as MI is *not* mapped as a frame instruction. |
9621 | uint32_t NumBytes = MI.getOperand(i: 0).getImm(); |
9622 | if (NumBytes > MFI.getMaxCallFrameSize()) |
9623 | MFI.setMaxCallFrameSize(NumBytes); |
9624 | MFI.setAdjustsStack(true); |
9625 | |
9626 | MI.eraseFromParent(); |
9627 | return BB; |
9628 | } |
9629 | |
9630 | // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. |
9631 | MachineBasicBlock * |
9632 | SystemZTargetLowering::emitSelect(MachineInstr &MI, |
9633 | MachineBasicBlock *MBB) const { |
9634 | assert(isSelectPseudo(MI) && "Bad call to emitSelect()" ); |
9635 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
9636 | |
9637 | unsigned CCValid = MI.getOperand(i: 3).getImm(); |
9638 | unsigned CCMask = MI.getOperand(i: 4).getImm(); |
9639 | |
9640 | // If we have a sequence of Select* pseudo instructions using the |
9641 | // same condition code value, we want to expand all of them into |
9642 | // a single pair of basic blocks using the same condition. |
9643 | SmallVector<MachineInstr*, 8> Selects; |
9644 | SmallVector<MachineInstr*, 8> DbgValues; |
9645 | Selects.push_back(Elt: &MI); |
9646 | unsigned Count = 0; |
9647 | for (MachineInstr &NextMI : llvm::make_range( |
9648 | x: std::next(x: MachineBasicBlock::iterator(MI)), y: MBB->end())) { |
9649 | if (isSelectPseudo(MI&: NextMI)) { |
9650 | assert(NextMI.getOperand(3).getImm() == CCValid && |
9651 | "Bad CCValid operands since CC was not redefined." ); |
9652 | if (NextMI.getOperand(i: 4).getImm() == CCMask || |
9653 | NextMI.getOperand(i: 4).getImm() == (CCValid ^ CCMask)) { |
9654 | Selects.push_back(Elt: &NextMI); |
9655 | continue; |
9656 | } |
9657 | break; |
9658 | } |
9659 | if (NextMI.definesRegister(Reg: SystemZ::CC, /*TRI=*/nullptr) || |
9660 | NextMI.usesCustomInsertionHook()) |
9661 | break; |
9662 | bool User = false; |
9663 | for (auto *SelMI : Selects) |
9664 | if (NextMI.readsVirtualRegister(Reg: SelMI->getOperand(i: 0).getReg())) { |
9665 | User = true; |
9666 | break; |
9667 | } |
9668 | if (NextMI.isDebugInstr()) { |
9669 | if (User) { |
9670 | assert(NextMI.isDebugValue() && "Unhandled debug opcode." ); |
9671 | DbgValues.push_back(Elt: &NextMI); |
9672 | } |
9673 | } else if (User || ++Count > 20) |
9674 | break; |
9675 | } |
9676 | |
9677 | MachineInstr *LastMI = Selects.back(); |
9678 | bool CCKilled = (LastMI->killsRegister(Reg: SystemZ::CC, /*TRI=*/nullptr) || |
9679 | checkCCKill(MI&: *LastMI, MBB)); |
9680 | MachineBasicBlock *StartMBB = MBB; |
9681 | MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI: LastMI, MBB); |
9682 | MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(MBB: StartMBB); |
9683 | |
9684 | // Unless CC was killed in the last Select instruction, mark it as |
9685 | // live-in to both FalseMBB and JoinMBB. |
9686 | if (!CCKilled) { |
9687 | FalseMBB->addLiveIn(PhysReg: SystemZ::CC); |
9688 | JoinMBB->addLiveIn(PhysReg: SystemZ::CC); |
9689 | } |
9690 | |
9691 | // StartMBB: |
9692 | // BRC CCMask, JoinMBB |
9693 | // # fallthrough to FalseMBB |
9694 | MBB = StartMBB; |
9695 | BuildMI(BB: MBB, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: SystemZ::BRC)) |
9696 | .addImm(Val: CCValid).addImm(Val: CCMask).addMBB(MBB: JoinMBB); |
9697 | MBB->addSuccessor(Succ: JoinMBB); |
9698 | MBB->addSuccessor(Succ: FalseMBB); |
9699 | |
9700 | // FalseMBB: |
9701 | // # fallthrough to JoinMBB |
9702 | MBB = FalseMBB; |
9703 | MBB->addSuccessor(Succ: JoinMBB); |
9704 | |
9705 | // JoinMBB: |
9706 | // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ] |
9707 | // ... |
9708 | MBB = JoinMBB; |
9709 | createPHIsForSelects(Selects, TrueMBB: StartMBB, FalseMBB, SinkMBB: MBB); |
9710 | for (auto *SelMI : Selects) |
9711 | SelMI->eraseFromParent(); |
9712 | |
9713 | MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI(); |
9714 | for (auto *DbgMI : DbgValues) |
9715 | MBB->splice(Where: InsertPos, Other: StartMBB, From: DbgMI); |
9716 | |
9717 | return JoinMBB; |
9718 | } |
9719 | |
9720 | // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI. |
9721 | // StoreOpcode is the store to use and Invert says whether the store should |
9722 | // happen when the condition is false rather than true. If a STORE ON |
9723 | // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0. |
9724 | MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, |
9725 | MachineBasicBlock *MBB, |
9726 | unsigned StoreOpcode, |
9727 | unsigned STOCOpcode, |
9728 | bool Invert) const { |
9729 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
9730 | |
9731 | Register SrcReg = MI.getOperand(i: 0).getReg(); |
9732 | MachineOperand Base = MI.getOperand(i: 1); |
9733 | int64_t Disp = MI.getOperand(i: 2).getImm(); |
9734 | Register IndexReg = MI.getOperand(i: 3).getReg(); |
9735 | unsigned CCValid = MI.getOperand(i: 4).getImm(); |
9736 | unsigned CCMask = MI.getOperand(i: 5).getImm(); |
9737 | DebugLoc DL = MI.getDebugLoc(); |
9738 | |
9739 | StoreOpcode = TII->getOpcodeForOffset(Opcode: StoreOpcode, Offset: Disp); |
9740 | |
9741 | // ISel pattern matching also adds a load memory operand of the same |
9742 | // address, so take special care to find the storing memory operand. |
9743 | MachineMemOperand *MMO = nullptr; |
9744 | for (auto *I : MI.memoperands()) |
9745 | if (I->isStore()) { |
9746 | MMO = I; |
9747 | break; |
9748 | } |
9749 | |
9750 | // Use STOCOpcode if possible. We could use different store patterns in |
9751 | // order to avoid matching the index register, but the performance trade-offs |
9752 | // might be more complicated in that case. |
9753 | if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) { |
9754 | if (Invert) |
9755 | CCMask ^= CCValid; |
9756 | |
9757 | BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: STOCOpcode)) |
9758 | .addReg(RegNo: SrcReg) |
9759 | .add(MO: Base) |
9760 | .addImm(Val: Disp) |
9761 | .addImm(Val: CCValid) |
9762 | .addImm(Val: CCMask) |
9763 | .addMemOperand(MMO); |
9764 | |
9765 | MI.eraseFromParent(); |
9766 | return MBB; |
9767 | } |
9768 | |
9769 | // Get the condition needed to branch around the store. |
9770 | if (!Invert) |
9771 | CCMask ^= CCValid; |
9772 | |
9773 | MachineBasicBlock *StartMBB = MBB; |
9774 | MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB); |
9775 | MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(MBB: StartMBB); |
9776 | |
9777 | // Unless CC was killed in the CondStore instruction, mark it as |
9778 | // live-in to both FalseMBB and JoinMBB. |
9779 | if (!MI.killsRegister(Reg: SystemZ::CC, /*TRI=*/nullptr) && |
9780 | !checkCCKill(MI, MBB: JoinMBB)) { |
9781 | FalseMBB->addLiveIn(PhysReg: SystemZ::CC); |
9782 | JoinMBB->addLiveIn(PhysReg: SystemZ::CC); |
9783 | } |
9784 | |
9785 | // StartMBB: |
9786 | // BRC CCMask, JoinMBB |
9787 | // # fallthrough to FalseMBB |
9788 | MBB = StartMBB; |
9789 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC)) |
9790 | .addImm(Val: CCValid).addImm(Val: CCMask).addMBB(MBB: JoinMBB); |
9791 | MBB->addSuccessor(Succ: JoinMBB); |
9792 | MBB->addSuccessor(Succ: FalseMBB); |
9793 | |
9794 | // FalseMBB: |
9795 | // store %SrcReg, %Disp(%Index,%Base) |
9796 | // # fallthrough to JoinMBB |
9797 | MBB = FalseMBB; |
9798 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: StoreOpcode)) |
9799 | .addReg(RegNo: SrcReg) |
9800 | .add(MO: Base) |
9801 | .addImm(Val: Disp) |
9802 | .addReg(RegNo: IndexReg) |
9803 | .addMemOperand(MMO); |
9804 | MBB->addSuccessor(Succ: JoinMBB); |
9805 | |
9806 | MI.eraseFromParent(); |
9807 | return JoinMBB; |
9808 | } |
9809 | |
9810 | // Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI. |
9811 | MachineBasicBlock * |
9812 | SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI, |
9813 | MachineBasicBlock *MBB, |
9814 | bool Unsigned) const { |
9815 | MachineFunction &MF = *MBB->getParent(); |
9816 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
9817 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
9818 | |
9819 | // Synthetic instruction to compare 128-bit values. |
9820 | // Sets CC 1 if Op0 > Op1, sets a different CC otherwise. |
9821 | Register Op0 = MI.getOperand(i: 0).getReg(); |
9822 | Register Op1 = MI.getOperand(i: 1).getReg(); |
9823 | |
9824 | MachineBasicBlock *StartMBB = MBB; |
9825 | MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI, MBB); |
9826 | MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(MBB: StartMBB); |
9827 | |
9828 | // StartMBB: |
9829 | // |
9830 | // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts. |
9831 | // Swap the inputs to get: |
9832 | // CC 1 if high(Op0) > high(Op1) |
9833 | // CC 2 if high(Op0) < high(Op1) |
9834 | // CC 0 if high(Op0) == high(Op1) |
9835 | // |
9836 | // If CC != 0, we'd done, so jump over the next instruction. |
9837 | // |
9838 | // VEC[L]G Op1, Op0 |
9839 | // JNE JoinMBB |
9840 | // # fallthrough to HiEqMBB |
9841 | MBB = StartMBB; |
9842 | int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG; |
9843 | BuildMI(BB: MBB, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: HiOpcode)) |
9844 | .addReg(RegNo: Op1).addReg(RegNo: Op0); |
9845 | BuildMI(BB: MBB, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: SystemZ::BRC)) |
9846 | .addImm(Val: SystemZ::CCMASK_ICMP).addImm(Val: SystemZ::CCMASK_CMP_NE).addMBB(MBB: JoinMBB); |
9847 | MBB->addSuccessor(Succ: JoinMBB); |
9848 | MBB->addSuccessor(Succ: HiEqMBB); |
9849 | |
9850 | // HiEqMBB: |
9851 | // |
9852 | // Otherwise, use VECTOR COMPARE HIGH LOGICAL. |
9853 | // Since we already know the high parts are equal, the CC |
9854 | // result will only depend on the low parts: |
9855 | // CC 1 if low(Op0) > low(Op1) |
9856 | // CC 3 if low(Op0) <= low(Op1) |
9857 | // |
9858 | // VCHLGS Tmp, Op0, Op1 |
9859 | // # fallthrough to JoinMBB |
9860 | MBB = HiEqMBB; |
9861 | Register Temp = MRI.createVirtualRegister(RegClass: &SystemZ::VR128BitRegClass); |
9862 | BuildMI(BB: MBB, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: SystemZ::VCHLGS), DestReg: Temp) |
9863 | .addReg(RegNo: Op0).addReg(RegNo: Op1); |
9864 | MBB->addSuccessor(Succ: JoinMBB); |
9865 | |
9866 | // Mark CC as live-in to JoinMBB. |
9867 | JoinMBB->addLiveIn(PhysReg: SystemZ::CC); |
9868 | |
9869 | MI.eraseFromParent(); |
9870 | return JoinMBB; |
9871 | } |
9872 | |
9873 | // Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or |
9874 | // ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs |
9875 | // the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says |
9876 | // whether the field should be inverted after performing BinOpcode (e.g. for |
9877 | // NAND). |
9878 | MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( |
9879 | MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode, |
9880 | bool Invert) const { |
9881 | MachineFunction &MF = *MBB->getParent(); |
9882 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
9883 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
9884 | |
9885 | // Extract the operands. Base can be a register or a frame index. |
9886 | // Src2 can be a register or immediate. |
9887 | Register Dest = MI.getOperand(i: 0).getReg(); |
9888 | MachineOperand Base = earlyUseOperand(Op: MI.getOperand(i: 1)); |
9889 | int64_t Disp = MI.getOperand(i: 2).getImm(); |
9890 | MachineOperand Src2 = earlyUseOperand(Op: MI.getOperand(i: 3)); |
9891 | Register BitShift = MI.getOperand(i: 4).getReg(); |
9892 | Register NegBitShift = MI.getOperand(i: 5).getReg(); |
9893 | unsigned BitSize = MI.getOperand(i: 6).getImm(); |
9894 | DebugLoc DL = MI.getDebugLoc(); |
9895 | |
9896 | // Get the right opcodes for the displacement. |
9897 | unsigned LOpcode = TII->getOpcodeForOffset(Opcode: SystemZ::L, Offset: Disp); |
9898 | unsigned CSOpcode = TII->getOpcodeForOffset(Opcode: SystemZ::CS, Offset: Disp); |
9899 | assert(LOpcode && CSOpcode && "Displacement out of range" ); |
9900 | |
9901 | // Create virtual registers for temporary results. |
9902 | Register OrigVal = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass); |
9903 | Register OldVal = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass); |
9904 | Register NewVal = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass); |
9905 | Register RotatedOldVal = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass); |
9906 | Register RotatedNewVal = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass); |
9907 | |
9908 | // Insert a basic block for the main loop. |
9909 | MachineBasicBlock *StartMBB = MBB; |
9910 | MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); |
9911 | MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB: StartMBB); |
9912 | |
9913 | // StartMBB: |
9914 | // ... |
9915 | // %OrigVal = L Disp(%Base) |
9916 | // # fall through to LoopMBB |
9917 | MBB = StartMBB; |
9918 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: LOpcode), DestReg: OrigVal).add(MO: Base).addImm(Val: Disp).addReg(RegNo: 0); |
9919 | MBB->addSuccessor(Succ: LoopMBB); |
9920 | |
9921 | // LoopMBB: |
9922 | // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ] |
9923 | // %RotatedOldVal = RLL %OldVal, 0(%BitShift) |
9924 | // %RotatedNewVal = OP %RotatedOldVal, %Src2 |
9925 | // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) |
9926 | // %Dest = CS %OldVal, %NewVal, Disp(%Base) |
9927 | // JNE LoopMBB |
9928 | // # fall through to DoneMBB |
9929 | MBB = LoopMBB; |
9930 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: OldVal) |
9931 | .addReg(RegNo: OrigVal).addMBB(MBB: StartMBB) |
9932 | .addReg(RegNo: Dest).addMBB(MBB: LoopMBB); |
9933 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::RLL), DestReg: RotatedOldVal) |
9934 | .addReg(RegNo: OldVal).addReg(RegNo: BitShift).addImm(Val: 0); |
9935 | if (Invert) { |
9936 | // Perform the operation normally and then invert every bit of the field. |
9937 | Register Tmp = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass); |
9938 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: BinOpcode), DestReg: Tmp).addReg(RegNo: RotatedOldVal).add(MO: Src2); |
9939 | // XILF with the upper BitSize bits set. |
9940 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::XILF), DestReg: RotatedNewVal) |
9941 | .addReg(RegNo: Tmp).addImm(Val: -1U << (32 - BitSize)); |
9942 | } else if (BinOpcode) |
9943 | // A simply binary operation. |
9944 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: BinOpcode), DestReg: RotatedNewVal) |
9945 | .addReg(RegNo: RotatedOldVal) |
9946 | .add(MO: Src2); |
9947 | else |
9948 | // Use RISBG to rotate Src2 into position and use it to replace the |
9949 | // field in RotatedOldVal. |
9950 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::RISBG32), DestReg: RotatedNewVal) |
9951 | .addReg(RegNo: RotatedOldVal).addReg(RegNo: Src2.getReg()) |
9952 | .addImm(Val: 32).addImm(Val: 31 + BitSize).addImm(Val: 32 - BitSize); |
9953 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::RLL), DestReg: NewVal) |
9954 | .addReg(RegNo: RotatedNewVal).addReg(RegNo: NegBitShift).addImm(Val: 0); |
9955 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: CSOpcode), DestReg: Dest) |
9956 | .addReg(RegNo: OldVal) |
9957 | .addReg(RegNo: NewVal) |
9958 | .add(MO: Base) |
9959 | .addImm(Val: Disp); |
9960 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC)) |
9961 | .addImm(Val: SystemZ::CCMASK_CS).addImm(Val: SystemZ::CCMASK_CS_NE).addMBB(MBB: LoopMBB); |
9962 | MBB->addSuccessor(Succ: LoopMBB); |
9963 | MBB->addSuccessor(Succ: DoneMBB); |
9964 | |
9965 | MI.eraseFromParent(); |
9966 | return DoneMBB; |
9967 | } |
9968 | |
9969 | // Implement EmitInstrWithCustomInserter for subword pseudo |
9970 | // ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the |
9971 | // instruction that should be used to compare the current field with the |
9972 | // minimum or maximum value. KeepOldMask is the BRC condition-code mask |
9973 | // for when the current field should be kept. |
9974 | MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( |
9975 | MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode, |
9976 | unsigned KeepOldMask) const { |
9977 | MachineFunction &MF = *MBB->getParent(); |
9978 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
9979 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
9980 | |
9981 | // Extract the operands. Base can be a register or a frame index. |
9982 | Register Dest = MI.getOperand(i: 0).getReg(); |
9983 | MachineOperand Base = earlyUseOperand(Op: MI.getOperand(i: 1)); |
9984 | int64_t Disp = MI.getOperand(i: 2).getImm(); |
9985 | Register Src2 = MI.getOperand(i: 3).getReg(); |
9986 | Register BitShift = MI.getOperand(i: 4).getReg(); |
9987 | Register NegBitShift = MI.getOperand(i: 5).getReg(); |
9988 | unsigned BitSize = MI.getOperand(i: 6).getImm(); |
9989 | DebugLoc DL = MI.getDebugLoc(); |
9990 | |
9991 | // Get the right opcodes for the displacement. |
9992 | unsigned LOpcode = TII->getOpcodeForOffset(Opcode: SystemZ::L, Offset: Disp); |
9993 | unsigned CSOpcode = TII->getOpcodeForOffset(Opcode: SystemZ::CS, Offset: Disp); |
9994 | assert(LOpcode && CSOpcode && "Displacement out of range" ); |
9995 | |
9996 | // Create virtual registers for temporary results. |
9997 | Register OrigVal = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass); |
9998 | Register OldVal = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass); |
9999 | Register NewVal = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass); |
10000 | Register RotatedOldVal = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass); |
10001 | Register RotatedAltVal = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass); |
10002 | Register RotatedNewVal = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass); |
10003 | |
10004 | // Insert 3 basic blocks for the loop. |
10005 | MachineBasicBlock *StartMBB = MBB; |
10006 | MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); |
10007 | MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB: StartMBB); |
10008 | MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(MBB: LoopMBB); |
10009 | MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(MBB: UseAltMBB); |
10010 | |
10011 | // StartMBB: |
10012 | // ... |
10013 | // %OrigVal = L Disp(%Base) |
10014 | // # fall through to LoopMBB |
10015 | MBB = StartMBB; |
10016 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: LOpcode), DestReg: OrigVal).add(MO: Base).addImm(Val: Disp).addReg(RegNo: 0); |
10017 | MBB->addSuccessor(Succ: LoopMBB); |
10018 | |
10019 | // LoopMBB: |
10020 | // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ] |
10021 | // %RotatedOldVal = RLL %OldVal, 0(%BitShift) |
10022 | // CompareOpcode %RotatedOldVal, %Src2 |
10023 | // BRC KeepOldMask, UpdateMBB |
10024 | MBB = LoopMBB; |
10025 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: OldVal) |
10026 | .addReg(RegNo: OrigVal).addMBB(MBB: StartMBB) |
10027 | .addReg(RegNo: Dest).addMBB(MBB: UpdateMBB); |
10028 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::RLL), DestReg: RotatedOldVal) |
10029 | .addReg(RegNo: OldVal).addReg(RegNo: BitShift).addImm(Val: 0); |
10030 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: CompareOpcode)) |
10031 | .addReg(RegNo: RotatedOldVal).addReg(RegNo: Src2); |
10032 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC)) |
10033 | .addImm(Val: SystemZ::CCMASK_ICMP).addImm(Val: KeepOldMask).addMBB(MBB: UpdateMBB); |
10034 | MBB->addSuccessor(Succ: UpdateMBB); |
10035 | MBB->addSuccessor(Succ: UseAltMBB); |
10036 | |
10037 | // UseAltMBB: |
10038 | // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0 |
10039 | // # fall through to UpdateMBB |
10040 | MBB = UseAltMBB; |
10041 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::RISBG32), DestReg: RotatedAltVal) |
10042 | .addReg(RegNo: RotatedOldVal).addReg(RegNo: Src2) |
10043 | .addImm(Val: 32).addImm(Val: 31 + BitSize).addImm(Val: 0); |
10044 | MBB->addSuccessor(Succ: UpdateMBB); |
10045 | |
10046 | // UpdateMBB: |
10047 | // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ], |
10048 | // [ %RotatedAltVal, UseAltMBB ] |
10049 | // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) |
10050 | // %Dest = CS %OldVal, %NewVal, Disp(%Base) |
10051 | // JNE LoopMBB |
10052 | // # fall through to DoneMBB |
10053 | MBB = UpdateMBB; |
10054 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: RotatedNewVal) |
10055 | .addReg(RegNo: RotatedOldVal).addMBB(MBB: LoopMBB) |
10056 | .addReg(RegNo: RotatedAltVal).addMBB(MBB: UseAltMBB); |
10057 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::RLL), DestReg: NewVal) |
10058 | .addReg(RegNo: RotatedNewVal).addReg(RegNo: NegBitShift).addImm(Val: 0); |
10059 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: CSOpcode), DestReg: Dest) |
10060 | .addReg(RegNo: OldVal) |
10061 | .addReg(RegNo: NewVal) |
10062 | .add(MO: Base) |
10063 | .addImm(Val: Disp); |
10064 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC)) |
10065 | .addImm(Val: SystemZ::CCMASK_CS).addImm(Val: SystemZ::CCMASK_CS_NE).addMBB(MBB: LoopMBB); |
10066 | MBB->addSuccessor(Succ: LoopMBB); |
10067 | MBB->addSuccessor(Succ: DoneMBB); |
10068 | |
10069 | MI.eraseFromParent(); |
10070 | return DoneMBB; |
10071 | } |
10072 | |
10073 | // Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW |
10074 | // instruction MI. |
10075 | MachineBasicBlock * |
10076 | SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, |
10077 | MachineBasicBlock *MBB) const { |
10078 | MachineFunction &MF = *MBB->getParent(); |
10079 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
10080 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
10081 | |
10082 | // Extract the operands. Base can be a register or a frame index. |
10083 | Register Dest = MI.getOperand(i: 0).getReg(); |
10084 | MachineOperand Base = earlyUseOperand(Op: MI.getOperand(i: 1)); |
10085 | int64_t Disp = MI.getOperand(i: 2).getImm(); |
10086 | Register CmpVal = MI.getOperand(i: 3).getReg(); |
10087 | Register OrigSwapVal = MI.getOperand(i: 4).getReg(); |
10088 | Register BitShift = MI.getOperand(i: 5).getReg(); |
10089 | Register NegBitShift = MI.getOperand(i: 6).getReg(); |
10090 | int64_t BitSize = MI.getOperand(i: 7).getImm(); |
10091 | DebugLoc DL = MI.getDebugLoc(); |
10092 | |
10093 | const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass; |
10094 | |
10095 | // Get the right opcodes for the displacement and zero-extension. |
10096 | unsigned LOpcode = TII->getOpcodeForOffset(Opcode: SystemZ::L, Offset: Disp); |
10097 | unsigned CSOpcode = TII->getOpcodeForOffset(Opcode: SystemZ::CS, Offset: Disp); |
10098 | unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR; |
10099 | assert(LOpcode && CSOpcode && "Displacement out of range" ); |
10100 | |
10101 | // Create virtual registers for temporary results. |
10102 | Register OrigOldVal = MRI.createVirtualRegister(RegClass: RC); |
10103 | Register OldVal = MRI.createVirtualRegister(RegClass: RC); |
10104 | Register SwapVal = MRI.createVirtualRegister(RegClass: RC); |
10105 | Register StoreVal = MRI.createVirtualRegister(RegClass: RC); |
10106 | Register OldValRot = MRI.createVirtualRegister(RegClass: RC); |
10107 | Register RetryOldVal = MRI.createVirtualRegister(RegClass: RC); |
10108 | Register RetrySwapVal = MRI.createVirtualRegister(RegClass: RC); |
10109 | |
10110 | // Insert 2 basic blocks for the loop. |
10111 | MachineBasicBlock *StartMBB = MBB; |
10112 | MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); |
10113 | MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB: StartMBB); |
10114 | MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(MBB: LoopMBB); |
10115 | |
10116 | // StartMBB: |
10117 | // ... |
10118 | // %OrigOldVal = L Disp(%Base) |
10119 | // # fall through to LoopMBB |
10120 | MBB = StartMBB; |
10121 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: LOpcode), DestReg: OrigOldVal) |
10122 | .add(MO: Base) |
10123 | .addImm(Val: Disp) |
10124 | .addReg(RegNo: 0); |
10125 | MBB->addSuccessor(Succ: LoopMBB); |
10126 | |
10127 | // LoopMBB: |
10128 | // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ] |
10129 | // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ] |
10130 | // %OldValRot = RLL %OldVal, BitSize(%BitShift) |
10131 | // ^^ The low BitSize bits contain the field |
10132 | // of interest. |
10133 | // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0 |
10134 | // ^^ Replace the upper 32-BitSize bits of the |
10135 | // swap value with those that we loaded and rotated. |
10136 | // %Dest = LL[CH] %OldValRot |
10137 | // CR %Dest, %CmpVal |
10138 | // JNE DoneMBB |
10139 | // # Fall through to SetMBB |
10140 | MBB = LoopMBB; |
10141 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: OldVal) |
10142 | .addReg(RegNo: OrigOldVal).addMBB(MBB: StartMBB) |
10143 | .addReg(RegNo: RetryOldVal).addMBB(MBB: SetMBB); |
10144 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: SwapVal) |
10145 | .addReg(RegNo: OrigSwapVal).addMBB(MBB: StartMBB) |
10146 | .addReg(RegNo: RetrySwapVal).addMBB(MBB: SetMBB); |
10147 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::RLL), DestReg: OldValRot) |
10148 | .addReg(RegNo: OldVal).addReg(RegNo: BitShift).addImm(Val: BitSize); |
10149 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::RISBG32), DestReg: RetrySwapVal) |
10150 | .addReg(RegNo: SwapVal).addReg(RegNo: OldValRot).addImm(Val: 32).addImm(Val: 63 - BitSize).addImm(Val: 0); |
10151 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: ZExtOpcode), DestReg: Dest) |
10152 | .addReg(RegNo: OldValRot); |
10153 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::CR)) |
10154 | .addReg(RegNo: Dest).addReg(RegNo: CmpVal); |
10155 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC)) |
10156 | .addImm(Val: SystemZ::CCMASK_ICMP) |
10157 | .addImm(Val: SystemZ::CCMASK_CMP_NE).addMBB(MBB: DoneMBB); |
10158 | MBB->addSuccessor(Succ: DoneMBB); |
10159 | MBB->addSuccessor(Succ: SetMBB); |
10160 | |
10161 | // SetMBB: |
10162 | // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift) |
10163 | // ^^ Rotate the new field to its proper position. |
10164 | // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base) |
10165 | // JNE LoopMBB |
10166 | // # fall through to ExitMBB |
10167 | MBB = SetMBB; |
10168 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::RLL), DestReg: StoreVal) |
10169 | .addReg(RegNo: RetrySwapVal).addReg(RegNo: NegBitShift).addImm(Val: -BitSize); |
10170 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: CSOpcode), DestReg: RetryOldVal) |
10171 | .addReg(RegNo: OldVal) |
10172 | .addReg(RegNo: StoreVal) |
10173 | .add(MO: Base) |
10174 | .addImm(Val: Disp); |
10175 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC)) |
10176 | .addImm(Val: SystemZ::CCMASK_CS).addImm(Val: SystemZ::CCMASK_CS_NE).addMBB(MBB: LoopMBB); |
10177 | MBB->addSuccessor(Succ: LoopMBB); |
10178 | MBB->addSuccessor(Succ: DoneMBB); |
10179 | |
10180 | // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in |
10181 | // to the block after the loop. At this point, CC may have been defined |
10182 | // either by the CR in LoopMBB or by the CS in SetMBB. |
10183 | if (!MI.registerDefIsDead(Reg: SystemZ::CC, /*TRI=*/nullptr)) |
10184 | DoneMBB->addLiveIn(PhysReg: SystemZ::CC); |
10185 | |
10186 | MI.eraseFromParent(); |
10187 | return DoneMBB; |
10188 | } |
10189 | |
10190 | // Emit a move from two GR64s to a GR128. |
10191 | MachineBasicBlock * |
10192 | SystemZTargetLowering::emitPair128(MachineInstr &MI, |
10193 | MachineBasicBlock *MBB) const { |
10194 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
10195 | const DebugLoc &DL = MI.getDebugLoc(); |
10196 | |
10197 | Register Dest = MI.getOperand(i: 0).getReg(); |
10198 | BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::REG_SEQUENCE), DestReg: Dest) |
10199 | .add(MO: MI.getOperand(i: 1)) |
10200 | .addImm(Val: SystemZ::subreg_h64) |
10201 | .add(MO: MI.getOperand(i: 2)) |
10202 | .addImm(Val: SystemZ::subreg_l64); |
10203 | MI.eraseFromParent(); |
10204 | return MBB; |
10205 | } |
10206 | |
10207 | // Emit an extension from a GR64 to a GR128. ClearEven is true |
10208 | // if the high register of the GR128 value must be cleared or false if |
10209 | // it's "don't care". |
10210 | MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI, |
10211 | MachineBasicBlock *MBB, |
10212 | bool ClearEven) const { |
10213 | MachineFunction &MF = *MBB->getParent(); |
10214 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
10215 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
10216 | DebugLoc DL = MI.getDebugLoc(); |
10217 | |
10218 | Register Dest = MI.getOperand(i: 0).getReg(); |
10219 | Register Src = MI.getOperand(i: 1).getReg(); |
10220 | Register In128 = MRI.createVirtualRegister(RegClass: &SystemZ::GR128BitRegClass); |
10221 | |
10222 | BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: In128); |
10223 | if (ClearEven) { |
10224 | Register NewIn128 = MRI.createVirtualRegister(RegClass: &SystemZ::GR128BitRegClass); |
10225 | Register Zero64 = MRI.createVirtualRegister(RegClass: &SystemZ::GR64BitRegClass); |
10226 | |
10227 | BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LLILL), DestReg: Zero64) |
10228 | .addImm(Val: 0); |
10229 | BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::INSERT_SUBREG), DestReg: NewIn128) |
10230 | .addReg(RegNo: In128).addReg(RegNo: Zero64).addImm(Val: SystemZ::subreg_h64); |
10231 | In128 = NewIn128; |
10232 | } |
10233 | BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::INSERT_SUBREG), DestReg: Dest) |
10234 | .addReg(RegNo: In128).addReg(RegNo: Src).addImm(Val: SystemZ::subreg_l64); |
10235 | |
10236 | MI.eraseFromParent(); |
10237 | return MBB; |
10238 | } |
10239 | |
10240 | MachineBasicBlock * |
10241 | SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI, |
10242 | MachineBasicBlock *MBB, |
10243 | unsigned Opcode, bool IsMemset) const { |
10244 | MachineFunction &MF = *MBB->getParent(); |
10245 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
10246 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
10247 | DebugLoc DL = MI.getDebugLoc(); |
10248 | |
10249 | MachineOperand DestBase = earlyUseOperand(Op: MI.getOperand(i: 0)); |
10250 | uint64_t DestDisp = MI.getOperand(i: 1).getImm(); |
10251 | MachineOperand SrcBase = MachineOperand::CreateReg(Reg: 0U, isDef: false); |
10252 | uint64_t SrcDisp; |
10253 | |
10254 | // Fold the displacement Disp if it is out of range. |
10255 | auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void { |
10256 | if (!isUInt<12>(x: Disp)) { |
10257 | Register Reg = MRI.createVirtualRegister(RegClass: &SystemZ::ADDR64BitRegClass); |
10258 | unsigned Opcode = TII->getOpcodeForOffset(Opcode: SystemZ::LA, Offset: Disp); |
10259 | BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode), DestReg: Reg) |
10260 | .add(MO: Base).addImm(Val: Disp).addReg(RegNo: 0); |
10261 | Base = MachineOperand::CreateReg(Reg, isDef: false); |
10262 | Disp = 0; |
10263 | } |
10264 | }; |
10265 | |
10266 | if (!IsMemset) { |
10267 | SrcBase = earlyUseOperand(Op: MI.getOperand(i: 2)); |
10268 | SrcDisp = MI.getOperand(i: 3).getImm(); |
10269 | } else { |
10270 | SrcBase = DestBase; |
10271 | SrcDisp = DestDisp++; |
10272 | foldDisplIfNeeded(DestBase, DestDisp); |
10273 | } |
10274 | |
10275 | MachineOperand &LengthMO = MI.getOperand(i: IsMemset ? 2 : 4); |
10276 | bool IsImmForm = LengthMO.isImm(); |
10277 | bool IsRegForm = !IsImmForm; |
10278 | |
10279 | // Build and insert one Opcode of Length, with special treatment for memset. |
10280 | auto insertMemMemOp = [&](MachineBasicBlock *InsMBB, |
10281 | MachineBasicBlock::iterator InsPos, |
10282 | MachineOperand DBase, uint64_t DDisp, |
10283 | MachineOperand SBase, uint64_t SDisp, |
10284 | unsigned Length) -> void { |
10285 | assert(Length > 0 && Length <= 256 && "Building memory op with bad length." ); |
10286 | if (IsMemset) { |
10287 | MachineOperand ByteMO = earlyUseOperand(Op: MI.getOperand(i: 3)); |
10288 | if (ByteMO.isImm()) |
10289 | BuildMI(BB&: *InsMBB, I: InsPos, MIMD: DL, MCID: TII->get(Opcode: SystemZ::MVI)) |
10290 | .add(MO: SBase).addImm(Val: SDisp).add(MO: ByteMO); |
10291 | else |
10292 | BuildMI(BB&: *InsMBB, I: InsPos, MIMD: DL, MCID: TII->get(Opcode: SystemZ::STC)) |
10293 | .add(MO: ByteMO).add(MO: SBase).addImm(Val: SDisp).addReg(RegNo: 0); |
10294 | if (--Length == 0) |
10295 | return; |
10296 | } |
10297 | BuildMI(BB&: *MBB, I: InsPos, MIMD: DL, MCID: TII->get(Opcode)) |
10298 | .add(MO: DBase).addImm(Val: DDisp).addImm(Val: Length) |
10299 | .add(MO: SBase).addImm(Val: SDisp) |
10300 | .setMemRefs(MI.memoperands()); |
10301 | }; |
10302 | |
10303 | bool NeedsLoop = false; |
10304 | uint64_t ImmLength = 0; |
10305 | Register LenAdjReg = SystemZ::NoRegister; |
10306 | if (IsImmForm) { |
10307 | ImmLength = LengthMO.getImm(); |
10308 | ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment. |
10309 | if (ImmLength == 0) { |
10310 | MI.eraseFromParent(); |
10311 | return MBB; |
10312 | } |
10313 | if (Opcode == SystemZ::CLC) { |
10314 | if (ImmLength > 3 * 256) |
10315 | // A two-CLC sequence is a clear win over a loop, not least because |
10316 | // it needs only one branch. A three-CLC sequence needs the same |
10317 | // number of branches as a loop (i.e. 2), but is shorter. That |
10318 | // brings us to lengths greater than 768 bytes. It seems relatively |
10319 | // likely that a difference will be found within the first 768 bytes, |
10320 | // so we just optimize for the smallest number of branch |
10321 | // instructions, in order to avoid polluting the prediction buffer |
10322 | // too much. |
10323 | NeedsLoop = true; |
10324 | } else if (ImmLength > 6 * 256) |
10325 | // The heuristic we use is to prefer loops for anything that would |
10326 | // require 7 or more MVCs. With these kinds of sizes there isn't much |
10327 | // to choose between straight-line code and looping code, since the |
10328 | // time will be dominated by the MVCs themselves. |
10329 | NeedsLoop = true; |
10330 | } else { |
10331 | NeedsLoop = true; |
10332 | LenAdjReg = LengthMO.getReg(); |
10333 | } |
10334 | |
10335 | // When generating more than one CLC, all but the last will need to |
10336 | // branch to the end when a difference is found. |
10337 | MachineBasicBlock *EndMBB = |
10338 | (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop) |
10339 | ? SystemZ::splitBlockAfter(MI, MBB) |
10340 | : nullptr); |
10341 | |
10342 | if (NeedsLoop) { |
10343 | Register StartCountReg = |
10344 | MRI.createVirtualRegister(RegClass: &SystemZ::GR64BitRegClass); |
10345 | if (IsImmForm) { |
10346 | TII->loadImmediate(MBB&: *MBB, MBBI: MI, Reg: StartCountReg, Value: ImmLength / 256); |
10347 | ImmLength &= 255; |
10348 | } else { |
10349 | BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::SRLG), DestReg: StartCountReg) |
10350 | .addReg(RegNo: LenAdjReg) |
10351 | .addReg(RegNo: 0) |
10352 | .addImm(Val: 8); |
10353 | } |
10354 | |
10355 | bool HaveSingleBase = DestBase.isIdenticalTo(Other: SrcBase); |
10356 | auto loadZeroAddress = [&]() -> MachineOperand { |
10357 | Register Reg = MRI.createVirtualRegister(RegClass: &SystemZ::ADDR64BitRegClass); |
10358 | BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LGHI), DestReg: Reg).addImm(Val: 0); |
10359 | return MachineOperand::CreateReg(Reg, isDef: false); |
10360 | }; |
10361 | if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister) |
10362 | DestBase = loadZeroAddress(); |
10363 | if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister) |
10364 | SrcBase = HaveSingleBase ? DestBase : loadZeroAddress(); |
10365 | |
10366 | MachineBasicBlock *StartMBB = nullptr; |
10367 | MachineBasicBlock *LoopMBB = nullptr; |
10368 | MachineBasicBlock *NextMBB = nullptr; |
10369 | MachineBasicBlock *DoneMBB = nullptr; |
10370 | MachineBasicBlock *AllDoneMBB = nullptr; |
10371 | |
10372 | Register StartSrcReg = forceReg(MI, Base&: SrcBase, TII); |
10373 | Register StartDestReg = |
10374 | (HaveSingleBase ? StartSrcReg : forceReg(MI, Base&: DestBase, TII)); |
10375 | |
10376 | const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; |
10377 | Register ThisSrcReg = MRI.createVirtualRegister(RegClass: RC); |
10378 | Register ThisDestReg = |
10379 | (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RegClass: RC)); |
10380 | Register NextSrcReg = MRI.createVirtualRegister(RegClass: RC); |
10381 | Register NextDestReg = |
10382 | (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RegClass: RC)); |
10383 | RC = &SystemZ::GR64BitRegClass; |
10384 | Register ThisCountReg = MRI.createVirtualRegister(RegClass: RC); |
10385 | Register NextCountReg = MRI.createVirtualRegister(RegClass: RC); |
10386 | |
10387 | if (IsRegForm) { |
10388 | AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB); |
10389 | StartMBB = SystemZ::emitBlockAfter(MBB); |
10390 | LoopMBB = SystemZ::emitBlockAfter(MBB: StartMBB); |
10391 | NextMBB = (EndMBB ? SystemZ::emitBlockAfter(MBB: LoopMBB) : LoopMBB); |
10392 | DoneMBB = SystemZ::emitBlockAfter(MBB: NextMBB); |
10393 | |
10394 | // MBB: |
10395 | // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB. |
10396 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::CGHI)) |
10397 | .addReg(RegNo: LenAdjReg).addImm(Val: IsMemset ? -2 : -1); |
10398 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC)) |
10399 | .addImm(Val: SystemZ::CCMASK_ICMP).addImm(Val: SystemZ::CCMASK_CMP_EQ) |
10400 | .addMBB(MBB: AllDoneMBB); |
10401 | MBB->addSuccessor(Succ: AllDoneMBB); |
10402 | if (!IsMemset) |
10403 | MBB->addSuccessor(Succ: StartMBB); |
10404 | else { |
10405 | // MemsetOneCheckMBB: |
10406 | // # Jump to MemsetOneMBB for a memset of length 1, or |
10407 | // # fall thru to StartMBB. |
10408 | MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB); |
10409 | MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(MBB: &*MF.rbegin()); |
10410 | MBB->addSuccessor(Succ: MemsetOneCheckMBB); |
10411 | MBB = MemsetOneCheckMBB; |
10412 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::CGHI)) |
10413 | .addReg(RegNo: LenAdjReg).addImm(Val: -1); |
10414 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC)) |
10415 | .addImm(Val: SystemZ::CCMASK_ICMP).addImm(Val: SystemZ::CCMASK_CMP_EQ) |
10416 | .addMBB(MBB: MemsetOneMBB); |
10417 | MBB->addSuccessor(Succ: MemsetOneMBB, Prob: {10, 100}); |
10418 | MBB->addSuccessor(Succ: StartMBB, Prob: {90, 100}); |
10419 | |
10420 | // MemsetOneMBB: |
10421 | // # Jump back to AllDoneMBB after a single MVI or STC. |
10422 | MBB = MemsetOneMBB; |
10423 | insertMemMemOp(MBB, MBB->end(), |
10424 | MachineOperand::CreateReg(Reg: StartDestReg, isDef: false), DestDisp, |
10425 | MachineOperand::CreateReg(Reg: StartSrcReg, isDef: false), SrcDisp, |
10426 | 1); |
10427 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::J)).addMBB(MBB: AllDoneMBB); |
10428 | MBB->addSuccessor(Succ: AllDoneMBB); |
10429 | } |
10430 | |
10431 | // StartMBB: |
10432 | // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB. |
10433 | MBB = StartMBB; |
10434 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::CGHI)) |
10435 | .addReg(RegNo: StartCountReg).addImm(Val: 0); |
10436 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC)) |
10437 | .addImm(Val: SystemZ::CCMASK_ICMP).addImm(Val: SystemZ::CCMASK_CMP_EQ) |
10438 | .addMBB(MBB: DoneMBB); |
10439 | MBB->addSuccessor(Succ: DoneMBB); |
10440 | MBB->addSuccessor(Succ: LoopMBB); |
10441 | } |
10442 | else { |
10443 | StartMBB = MBB; |
10444 | DoneMBB = SystemZ::splitBlockBefore(MI, MBB); |
10445 | LoopMBB = SystemZ::emitBlockAfter(MBB: StartMBB); |
10446 | NextMBB = (EndMBB ? SystemZ::emitBlockAfter(MBB: LoopMBB) : LoopMBB); |
10447 | |
10448 | // StartMBB: |
10449 | // # fall through to LoopMBB |
10450 | MBB->addSuccessor(Succ: LoopMBB); |
10451 | |
10452 | DestBase = MachineOperand::CreateReg(Reg: NextDestReg, isDef: false); |
10453 | SrcBase = MachineOperand::CreateReg(Reg: NextSrcReg, isDef: false); |
10454 | if (EndMBB && !ImmLength) |
10455 | // If the loop handled the whole CLC range, DoneMBB will be empty with |
10456 | // CC live-through into EndMBB, so add it as live-in. |
10457 | DoneMBB->addLiveIn(PhysReg: SystemZ::CC); |
10458 | } |
10459 | |
10460 | // LoopMBB: |
10461 | // %ThisDestReg = phi [ %StartDestReg, StartMBB ], |
10462 | // [ %NextDestReg, NextMBB ] |
10463 | // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ], |
10464 | // [ %NextSrcReg, NextMBB ] |
10465 | // %ThisCountReg = phi [ %StartCountReg, StartMBB ], |
10466 | // [ %NextCountReg, NextMBB ] |
10467 | // ( PFD 2, 768+DestDisp(%ThisDestReg) ) |
10468 | // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg) |
10469 | // ( JLH EndMBB ) |
10470 | // |
10471 | // The prefetch is used only for MVC. The JLH is used only for CLC. |
10472 | MBB = LoopMBB; |
10473 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: ThisDestReg) |
10474 | .addReg(RegNo: StartDestReg).addMBB(MBB: StartMBB) |
10475 | .addReg(RegNo: NextDestReg).addMBB(MBB: NextMBB); |
10476 | if (!HaveSingleBase) |
10477 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: ThisSrcReg) |
10478 | .addReg(RegNo: StartSrcReg).addMBB(MBB: StartMBB) |
10479 | .addReg(RegNo: NextSrcReg).addMBB(MBB: NextMBB); |
10480 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: ThisCountReg) |
10481 | .addReg(RegNo: StartCountReg).addMBB(MBB: StartMBB) |
10482 | .addReg(RegNo: NextCountReg).addMBB(MBB: NextMBB); |
10483 | if (Opcode == SystemZ::MVC) |
10484 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PFD)) |
10485 | .addImm(Val: SystemZ::PFD_WRITE) |
10486 | .addReg(RegNo: ThisDestReg).addImm(Val: DestDisp - IsMemset + 768).addReg(RegNo: 0); |
10487 | insertMemMemOp(MBB, MBB->end(), |
10488 | MachineOperand::CreateReg(Reg: ThisDestReg, isDef: false), DestDisp, |
10489 | MachineOperand::CreateReg(Reg: ThisSrcReg, isDef: false), SrcDisp, 256); |
10490 | if (EndMBB) { |
10491 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC)) |
10492 | .addImm(Val: SystemZ::CCMASK_ICMP).addImm(Val: SystemZ::CCMASK_CMP_NE) |
10493 | .addMBB(MBB: EndMBB); |
10494 | MBB->addSuccessor(Succ: EndMBB); |
10495 | MBB->addSuccessor(Succ: NextMBB); |
10496 | } |
10497 | |
10498 | // NextMBB: |
10499 | // %NextDestReg = LA 256(%ThisDestReg) |
10500 | // %NextSrcReg = LA 256(%ThisSrcReg) |
10501 | // %NextCountReg = AGHI %ThisCountReg, -1 |
10502 | // CGHI %NextCountReg, 0 |
10503 | // JLH LoopMBB |
10504 | // # fall through to DoneMBB |
10505 | // |
10506 | // The AGHI, CGHI and JLH should be converted to BRCTG by later passes. |
10507 | MBB = NextMBB; |
10508 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LA), DestReg: NextDestReg) |
10509 | .addReg(RegNo: ThisDestReg).addImm(Val: 256).addReg(RegNo: 0); |
10510 | if (!HaveSingleBase) |
10511 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LA), DestReg: NextSrcReg) |
10512 | .addReg(RegNo: ThisSrcReg).addImm(Val: 256).addReg(RegNo: 0); |
10513 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::AGHI), DestReg: NextCountReg) |
10514 | .addReg(RegNo: ThisCountReg).addImm(Val: -1); |
10515 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::CGHI)) |
10516 | .addReg(RegNo: NextCountReg).addImm(Val: 0); |
10517 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC)) |
10518 | .addImm(Val: SystemZ::CCMASK_ICMP).addImm(Val: SystemZ::CCMASK_CMP_NE) |
10519 | .addMBB(MBB: LoopMBB); |
10520 | MBB->addSuccessor(Succ: LoopMBB); |
10521 | MBB->addSuccessor(Succ: DoneMBB); |
10522 | |
10523 | MBB = DoneMBB; |
10524 | if (IsRegForm) { |
10525 | // DoneMBB: |
10526 | // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run. |
10527 | // # Use EXecute Relative Long for the remainder of the bytes. The target |
10528 | // instruction of the EXRL will have a length field of 1 since 0 is an |
10529 | // illegal value. The number of bytes processed becomes (%LenAdjReg & |
10530 | // 0xff) + 1. |
10531 | // # Fall through to AllDoneMBB. |
10532 | Register RemSrcReg = MRI.createVirtualRegister(RegClass: &SystemZ::ADDR64BitRegClass); |
10533 | Register RemDestReg = HaveSingleBase ? RemSrcReg |
10534 | : MRI.createVirtualRegister(RegClass: &SystemZ::ADDR64BitRegClass); |
10535 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: RemDestReg) |
10536 | .addReg(RegNo: StartDestReg).addMBB(MBB: StartMBB) |
10537 | .addReg(RegNo: NextDestReg).addMBB(MBB: NextMBB); |
10538 | if (!HaveSingleBase) |
10539 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: RemSrcReg) |
10540 | .addReg(RegNo: StartSrcReg).addMBB(MBB: StartMBB) |
10541 | .addReg(RegNo: NextSrcReg).addMBB(MBB: NextMBB); |
10542 | if (IsMemset) |
10543 | insertMemMemOp(MBB, MBB->end(), |
10544 | MachineOperand::CreateReg(Reg: RemDestReg, isDef: false), DestDisp, |
10545 | MachineOperand::CreateReg(Reg: RemSrcReg, isDef: false), SrcDisp, 1); |
10546 | MachineInstrBuilder EXRL_MIB = |
10547 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::EXRL_Pseudo)) |
10548 | .addImm(Val: Opcode) |
10549 | .addReg(RegNo: LenAdjReg) |
10550 | .addReg(RegNo: RemDestReg).addImm(Val: DestDisp) |
10551 | .addReg(RegNo: RemSrcReg).addImm(Val: SrcDisp); |
10552 | MBB->addSuccessor(Succ: AllDoneMBB); |
10553 | MBB = AllDoneMBB; |
10554 | if (Opcode != SystemZ::MVC) { |
10555 | EXRL_MIB.addReg(RegNo: SystemZ::CC, flags: RegState::ImplicitDefine); |
10556 | if (EndMBB) |
10557 | MBB->addLiveIn(PhysReg: SystemZ::CC); |
10558 | } |
10559 | } |
10560 | MF.getProperties().resetNoPHIs(); |
10561 | } |
10562 | |
10563 | // Handle any remaining bytes with straight-line code. |
10564 | while (ImmLength > 0) { |
10565 | uint64_t ThisLength = std::min(a: ImmLength, b: uint64_t(256)); |
10566 | // The previous iteration might have created out-of-range displacements. |
10567 | // Apply them using LA/LAY if so. |
10568 | foldDisplIfNeeded(DestBase, DestDisp); |
10569 | foldDisplIfNeeded(SrcBase, SrcDisp); |
10570 | insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength); |
10571 | DestDisp += ThisLength; |
10572 | SrcDisp += ThisLength; |
10573 | ImmLength -= ThisLength; |
10574 | // If there's another CLC to go, branch to the end if a difference |
10575 | // was found. |
10576 | if (EndMBB && ImmLength > 0) { |
10577 | MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB); |
10578 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC)) |
10579 | .addImm(Val: SystemZ::CCMASK_ICMP).addImm(Val: SystemZ::CCMASK_CMP_NE) |
10580 | .addMBB(MBB: EndMBB); |
10581 | MBB->addSuccessor(Succ: EndMBB); |
10582 | MBB->addSuccessor(Succ: NextMBB); |
10583 | MBB = NextMBB; |
10584 | } |
10585 | } |
10586 | if (EndMBB) { |
10587 | MBB->addSuccessor(Succ: EndMBB); |
10588 | MBB = EndMBB; |
10589 | MBB->addLiveIn(PhysReg: SystemZ::CC); |
10590 | } |
10591 | |
10592 | MI.eraseFromParent(); |
10593 | return MBB; |
10594 | } |
10595 | |
10596 | // Decompose string pseudo-instruction MI into a loop that continually performs |
10597 | // Opcode until CC != 3. |
10598 | MachineBasicBlock *SystemZTargetLowering::emitStringWrapper( |
10599 | MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { |
10600 | MachineFunction &MF = *MBB->getParent(); |
10601 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
10602 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
10603 | DebugLoc DL = MI.getDebugLoc(); |
10604 | |
10605 | uint64_t End1Reg = MI.getOperand(i: 0).getReg(); |
10606 | uint64_t Start1Reg = MI.getOperand(i: 1).getReg(); |
10607 | uint64_t Start2Reg = MI.getOperand(i: 2).getReg(); |
10608 | uint64_t CharReg = MI.getOperand(i: 3).getReg(); |
10609 | |
10610 | const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass; |
10611 | uint64_t This1Reg = MRI.createVirtualRegister(RegClass: RC); |
10612 | uint64_t This2Reg = MRI.createVirtualRegister(RegClass: RC); |
10613 | uint64_t End2Reg = MRI.createVirtualRegister(RegClass: RC); |
10614 | |
10615 | MachineBasicBlock *StartMBB = MBB; |
10616 | MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); |
10617 | MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB: StartMBB); |
10618 | |
10619 | // StartMBB: |
10620 | // # fall through to LoopMBB |
10621 | MBB->addSuccessor(Succ: LoopMBB); |
10622 | |
10623 | // LoopMBB: |
10624 | // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ] |
10625 | // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ] |
10626 | // R0L = %CharReg |
10627 | // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L |
10628 | // JO LoopMBB |
10629 | // # fall through to DoneMBB |
10630 | // |
10631 | // The load of R0L can be hoisted by post-RA LICM. |
10632 | MBB = LoopMBB; |
10633 | |
10634 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: This1Reg) |
10635 | .addReg(RegNo: Start1Reg).addMBB(MBB: StartMBB) |
10636 | .addReg(RegNo: End1Reg).addMBB(MBB: LoopMBB); |
10637 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: This2Reg) |
10638 | .addReg(RegNo: Start2Reg).addMBB(MBB: StartMBB) |
10639 | .addReg(RegNo: End2Reg).addMBB(MBB: LoopMBB); |
10640 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: SystemZ::R0L).addReg(RegNo: CharReg); |
10641 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode)) |
10642 | .addReg(RegNo: End1Reg, flags: RegState::Define).addReg(RegNo: End2Reg, flags: RegState::Define) |
10643 | .addReg(RegNo: This1Reg).addReg(RegNo: This2Reg); |
10644 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC)) |
10645 | .addImm(Val: SystemZ::CCMASK_ANY).addImm(Val: SystemZ::CCMASK_3).addMBB(MBB: LoopMBB); |
10646 | MBB->addSuccessor(Succ: LoopMBB); |
10647 | MBB->addSuccessor(Succ: DoneMBB); |
10648 | |
10649 | DoneMBB->addLiveIn(PhysReg: SystemZ::CC); |
10650 | |
10651 | MI.eraseFromParent(); |
10652 | return DoneMBB; |
10653 | } |
10654 | |
10655 | // Update TBEGIN instruction with final opcode and register clobbers. |
10656 | MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin( |
10657 | MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode, |
10658 | bool NoFloat) const { |
10659 | MachineFunction &MF = *MBB->getParent(); |
10660 | const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); |
10661 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
10662 | |
10663 | // Update opcode. |
10664 | MI.setDesc(TII->get(Opcode)); |
10665 | |
10666 | // We cannot handle a TBEGIN that clobbers the stack or frame pointer. |
10667 | // Make sure to add the corresponding GRSM bits if they are missing. |
10668 | uint64_t Control = MI.getOperand(i: 2).getImm(); |
10669 | static const unsigned GPRControlBit[16] = { |
10670 | 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000, |
10671 | 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100 |
10672 | }; |
10673 | Control |= GPRControlBit[15]; |
10674 | if (TFI->hasFP(MF)) |
10675 | Control |= GPRControlBit[11]; |
10676 | MI.getOperand(i: 2).setImm(Control); |
10677 | |
10678 | // Add GPR clobbers. |
10679 | for (int I = 0; I < 16; I++) { |
10680 | if ((Control & GPRControlBit[I]) == 0) { |
10681 | unsigned Reg = SystemZMC::GR64Regs[I]; |
10682 | MI.addOperand(Op: MachineOperand::CreateReg(Reg, isDef: true, isImp: true)); |
10683 | } |
10684 | } |
10685 | |
10686 | // Add FPR/VR clobbers. |
10687 | if (!NoFloat && (Control & 4) != 0) { |
10688 | if (Subtarget.hasVector()) { |
10689 | for (unsigned Reg : SystemZMC::VR128Regs) { |
10690 | MI.addOperand(Op: MachineOperand::CreateReg(Reg, isDef: true, isImp: true)); |
10691 | } |
10692 | } else { |
10693 | for (unsigned Reg : SystemZMC::FP64Regs) { |
10694 | MI.addOperand(Op: MachineOperand::CreateReg(Reg, isDef: true, isImp: true)); |
10695 | } |
10696 | } |
10697 | } |
10698 | |
10699 | return MBB; |
10700 | } |
10701 | |
10702 | MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0( |
10703 | MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { |
10704 | MachineFunction &MF = *MBB->getParent(); |
10705 | MachineRegisterInfo *MRI = &MF.getRegInfo(); |
10706 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
10707 | DebugLoc DL = MI.getDebugLoc(); |
10708 | |
10709 | Register SrcReg = MI.getOperand(i: 0).getReg(); |
10710 | |
10711 | // Create new virtual register of the same class as source. |
10712 | const TargetRegisterClass *RC = MRI->getRegClass(Reg: SrcReg); |
10713 | Register DstReg = MRI->createVirtualRegister(RegClass: RC); |
10714 | |
10715 | // Replace pseudo with a normal load-and-test that models the def as |
10716 | // well. |
10717 | BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode), DestReg: DstReg) |
10718 | .addReg(RegNo: SrcReg) |
10719 | .setMIFlags(MI.getFlags()); |
10720 | MI.eraseFromParent(); |
10721 | |
10722 | return MBB; |
10723 | } |
10724 | |
10725 | MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca( |
10726 | MachineInstr &MI, MachineBasicBlock *MBB) const { |
10727 | MachineFunction &MF = *MBB->getParent(); |
10728 | MachineRegisterInfo *MRI = &MF.getRegInfo(); |
10729 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
10730 | DebugLoc DL = MI.getDebugLoc(); |
10731 | const unsigned ProbeSize = getStackProbeSize(MF); |
10732 | Register DstReg = MI.getOperand(i: 0).getReg(); |
10733 | Register SizeReg = MI.getOperand(i: 2).getReg(); |
10734 | |
10735 | MachineBasicBlock *StartMBB = MBB; |
10736 | MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB); |
10737 | MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(MBB: StartMBB); |
10738 | MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(MBB: LoopTestMBB); |
10739 | MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(MBB: LoopBodyMBB); |
10740 | MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(MBB: TailTestMBB); |
10741 | |
10742 | MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(), |
10743 | F: MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, Size: 8, BaseAlignment: Align(1)); |
10744 | |
10745 | Register PHIReg = MRI->createVirtualRegister(RegClass: &SystemZ::ADDR64BitRegClass); |
10746 | Register IncReg = MRI->createVirtualRegister(RegClass: &SystemZ::ADDR64BitRegClass); |
10747 | |
10748 | // LoopTestMBB |
10749 | // BRC TailTestMBB |
10750 | // # fallthrough to LoopBodyMBB |
10751 | StartMBB->addSuccessor(Succ: LoopTestMBB); |
10752 | MBB = LoopTestMBB; |
10753 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: PHIReg) |
10754 | .addReg(RegNo: SizeReg) |
10755 | .addMBB(MBB: StartMBB) |
10756 | .addReg(RegNo: IncReg) |
10757 | .addMBB(MBB: LoopBodyMBB); |
10758 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::CLGFI)) |
10759 | .addReg(RegNo: PHIReg) |
10760 | .addImm(Val: ProbeSize); |
10761 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC)) |
10762 | .addImm(Val: SystemZ::CCMASK_ICMP).addImm(Val: SystemZ::CCMASK_CMP_LT) |
10763 | .addMBB(MBB: TailTestMBB); |
10764 | MBB->addSuccessor(Succ: LoopBodyMBB); |
10765 | MBB->addSuccessor(Succ: TailTestMBB); |
10766 | |
10767 | // LoopBodyMBB: Allocate and probe by means of a volatile compare. |
10768 | // J LoopTestMBB |
10769 | MBB = LoopBodyMBB; |
10770 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::SLGFI), DestReg: IncReg) |
10771 | .addReg(RegNo: PHIReg) |
10772 | .addImm(Val: ProbeSize); |
10773 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::SLGFI), DestReg: SystemZ::R15D) |
10774 | .addReg(RegNo: SystemZ::R15D) |
10775 | .addImm(Val: ProbeSize); |
10776 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::CG)).addReg(RegNo: SystemZ::R15D) |
10777 | .addReg(RegNo: SystemZ::R15D).addImm(Val: ProbeSize - 8).addReg(RegNo: 0) |
10778 | .setMemRefs(VolLdMMO); |
10779 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::J)).addMBB(MBB: LoopTestMBB); |
10780 | MBB->addSuccessor(Succ: LoopTestMBB); |
10781 | |
10782 | // TailTestMBB |
10783 | // BRC DoneMBB |
10784 | // # fallthrough to TailMBB |
10785 | MBB = TailTestMBB; |
10786 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::CGHI)) |
10787 | .addReg(RegNo: PHIReg) |
10788 | .addImm(Val: 0); |
10789 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC)) |
10790 | .addImm(Val: SystemZ::CCMASK_ICMP).addImm(Val: SystemZ::CCMASK_CMP_EQ) |
10791 | .addMBB(MBB: DoneMBB); |
10792 | MBB->addSuccessor(Succ: TailMBB); |
10793 | MBB->addSuccessor(Succ: DoneMBB); |
10794 | |
10795 | // TailMBB |
10796 | // # fallthrough to DoneMBB |
10797 | MBB = TailMBB; |
10798 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::SLGR), DestReg: SystemZ::R15D) |
10799 | .addReg(RegNo: SystemZ::R15D) |
10800 | .addReg(RegNo: PHIReg); |
10801 | BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::CG)).addReg(RegNo: SystemZ::R15D) |
10802 | .addReg(RegNo: SystemZ::R15D).addImm(Val: -8).addReg(RegNo: PHIReg) |
10803 | .setMemRefs(VolLdMMO); |
10804 | MBB->addSuccessor(Succ: DoneMBB); |
10805 | |
10806 | // DoneMBB |
10807 | MBB = DoneMBB; |
10808 | BuildMI(BB&: *MBB, I: MBB->begin(), MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: DstReg) |
10809 | .addReg(RegNo: SystemZ::R15D); |
10810 | |
10811 | MI.eraseFromParent(); |
10812 | return DoneMBB; |
10813 | } |
10814 | |
10815 | SDValue SystemZTargetLowering:: |
10816 | getBackchainAddress(SDValue SP, SelectionDAG &DAG) const { |
10817 | MachineFunction &MF = DAG.getMachineFunction(); |
10818 | auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>(); |
10819 | SDLoc DL(SP); |
10820 | return DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: SP, |
10821 | N2: DAG.getIntPtrConstant(Val: TFL->getBackchainOffset(MF), DL)); |
10822 | } |
10823 | |
10824 | MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( |
10825 | MachineInstr &MI, MachineBasicBlock *MBB) const { |
10826 | switch (MI.getOpcode()) { |
10827 | case SystemZ::ADJCALLSTACKDOWN: |
10828 | case SystemZ::ADJCALLSTACKUP: |
10829 | return emitAdjCallStack(MI, BB: MBB); |
10830 | |
10831 | case SystemZ::Select32: |
10832 | case SystemZ::Select64: |
10833 | case SystemZ::Select128: |
10834 | case SystemZ::SelectF32: |
10835 | case SystemZ::SelectF64: |
10836 | case SystemZ::SelectF128: |
10837 | case SystemZ::SelectVR32: |
10838 | case SystemZ::SelectVR64: |
10839 | case SystemZ::SelectVR128: |
10840 | return emitSelect(MI, MBB); |
10841 | |
10842 | case SystemZ::CondStore8Mux: |
10843 | return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STCMux, STOCOpcode: 0, Invert: false); |
10844 | case SystemZ::CondStore8MuxInv: |
10845 | return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STCMux, STOCOpcode: 0, Invert: true); |
10846 | case SystemZ::CondStore16Mux: |
10847 | return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STHMux, STOCOpcode: 0, Invert: false); |
10848 | case SystemZ::CondStore16MuxInv: |
10849 | return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STHMux, STOCOpcode: 0, Invert: true); |
10850 | case SystemZ::CondStore32Mux: |
10851 | return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STMux, STOCOpcode: SystemZ::STOCMux, Invert: false); |
10852 | case SystemZ::CondStore32MuxInv: |
10853 | return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STMux, STOCOpcode: SystemZ::STOCMux, Invert: true); |
10854 | case SystemZ::CondStore8: |
10855 | return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STC, STOCOpcode: 0, Invert: false); |
10856 | case SystemZ::CondStore8Inv: |
10857 | return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STC, STOCOpcode: 0, Invert: true); |
10858 | case SystemZ::CondStore16: |
10859 | return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STH, STOCOpcode: 0, Invert: false); |
10860 | case SystemZ::CondStore16Inv: |
10861 | return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STH, STOCOpcode: 0, Invert: true); |
10862 | case SystemZ::CondStore32: |
10863 | return emitCondStore(MI, MBB, StoreOpcode: SystemZ::ST, STOCOpcode: SystemZ::STOC, Invert: false); |
10864 | case SystemZ::CondStore32Inv: |
10865 | return emitCondStore(MI, MBB, StoreOpcode: SystemZ::ST, STOCOpcode: SystemZ::STOC, Invert: true); |
10866 | case SystemZ::CondStore64: |
10867 | return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STG, STOCOpcode: SystemZ::STOCG, Invert: false); |
10868 | case SystemZ::CondStore64Inv: |
10869 | return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STG, STOCOpcode: SystemZ::STOCG, Invert: true); |
10870 | case SystemZ::CondStoreF32: |
10871 | return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STE, STOCOpcode: 0, Invert: false); |
10872 | case SystemZ::CondStoreF32Inv: |
10873 | return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STE, STOCOpcode: 0, Invert: true); |
10874 | case SystemZ::CondStoreF64: |
10875 | return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STD, STOCOpcode: 0, Invert: false); |
10876 | case SystemZ::CondStoreF64Inv: |
10877 | return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STD, STOCOpcode: 0, Invert: true); |
10878 | |
10879 | case SystemZ::SCmp128Hi: |
10880 | return emitICmp128Hi(MI, MBB, Unsigned: false); |
10881 | case SystemZ::UCmp128Hi: |
10882 | return emitICmp128Hi(MI, MBB, Unsigned: true); |
10883 | |
10884 | case SystemZ::PAIR128: |
10885 | return emitPair128(MI, MBB); |
10886 | case SystemZ::AEXT128: |
10887 | return emitExt128(MI, MBB, ClearEven: false); |
10888 | case SystemZ::ZEXT128: |
10889 | return emitExt128(MI, MBB, ClearEven: true); |
10890 | |
10891 | case SystemZ::ATOMIC_SWAPW: |
10892 | return emitAtomicLoadBinary(MI, MBB, BinOpcode: 0); |
10893 | |
10894 | case SystemZ::ATOMIC_LOADW_AR: |
10895 | return emitAtomicLoadBinary(MI, MBB, BinOpcode: SystemZ::AR); |
10896 | case SystemZ::ATOMIC_LOADW_AFI: |
10897 | return emitAtomicLoadBinary(MI, MBB, BinOpcode: SystemZ::AFI); |
10898 | |
10899 | case SystemZ::ATOMIC_LOADW_SR: |
10900 | return emitAtomicLoadBinary(MI, MBB, BinOpcode: SystemZ::SR); |
10901 | |
10902 | case SystemZ::ATOMIC_LOADW_NR: |
10903 | return emitAtomicLoadBinary(MI, MBB, BinOpcode: SystemZ::NR); |
10904 | case SystemZ::ATOMIC_LOADW_NILH: |
10905 | return emitAtomicLoadBinary(MI, MBB, BinOpcode: SystemZ::NILH); |
10906 | |
10907 | case SystemZ::ATOMIC_LOADW_OR: |
10908 | return emitAtomicLoadBinary(MI, MBB, BinOpcode: SystemZ::OR); |
10909 | case SystemZ::ATOMIC_LOADW_OILH: |
10910 | return emitAtomicLoadBinary(MI, MBB, BinOpcode: SystemZ::OILH); |
10911 | |
10912 | case SystemZ::ATOMIC_LOADW_XR: |
10913 | return emitAtomicLoadBinary(MI, MBB, BinOpcode: SystemZ::XR); |
10914 | case SystemZ::ATOMIC_LOADW_XILF: |
10915 | return emitAtomicLoadBinary(MI, MBB, BinOpcode: SystemZ::XILF); |
10916 | |
10917 | case SystemZ::ATOMIC_LOADW_NRi: |
10918 | return emitAtomicLoadBinary(MI, MBB, BinOpcode: SystemZ::NR, Invert: true); |
10919 | case SystemZ::ATOMIC_LOADW_NILHi: |
10920 | return emitAtomicLoadBinary(MI, MBB, BinOpcode: SystemZ::NILH, Invert: true); |
10921 | |
10922 | case SystemZ::ATOMIC_LOADW_MIN: |
10923 | return emitAtomicLoadMinMax(MI, MBB, CompareOpcode: SystemZ::CR, KeepOldMask: SystemZ::CCMASK_CMP_LE); |
10924 | case SystemZ::ATOMIC_LOADW_MAX: |
10925 | return emitAtomicLoadMinMax(MI, MBB, CompareOpcode: SystemZ::CR, KeepOldMask: SystemZ::CCMASK_CMP_GE); |
10926 | case SystemZ::ATOMIC_LOADW_UMIN: |
10927 | return emitAtomicLoadMinMax(MI, MBB, CompareOpcode: SystemZ::CLR, KeepOldMask: SystemZ::CCMASK_CMP_LE); |
10928 | case SystemZ::ATOMIC_LOADW_UMAX: |
10929 | return emitAtomicLoadMinMax(MI, MBB, CompareOpcode: SystemZ::CLR, KeepOldMask: SystemZ::CCMASK_CMP_GE); |
10930 | |
10931 | case SystemZ::ATOMIC_CMP_SWAPW: |
10932 | return emitAtomicCmpSwapW(MI, MBB); |
10933 | case SystemZ::MVCImm: |
10934 | case SystemZ::MVCReg: |
10935 | return emitMemMemWrapper(MI, MBB, Opcode: SystemZ::MVC); |
10936 | case SystemZ::NCImm: |
10937 | return emitMemMemWrapper(MI, MBB, Opcode: SystemZ::NC); |
10938 | case SystemZ::OCImm: |
10939 | return emitMemMemWrapper(MI, MBB, Opcode: SystemZ::OC); |
10940 | case SystemZ::XCImm: |
10941 | case SystemZ::XCReg: |
10942 | return emitMemMemWrapper(MI, MBB, Opcode: SystemZ::XC); |
10943 | case SystemZ::CLCImm: |
10944 | case SystemZ::CLCReg: |
10945 | return emitMemMemWrapper(MI, MBB, Opcode: SystemZ::CLC); |
10946 | case SystemZ::MemsetImmImm: |
10947 | case SystemZ::MemsetImmReg: |
10948 | case SystemZ::MemsetRegImm: |
10949 | case SystemZ::MemsetRegReg: |
10950 | return emitMemMemWrapper(MI, MBB, Opcode: SystemZ::MVC, IsMemset: true/*IsMemset*/); |
10951 | case SystemZ::CLSTLoop: |
10952 | return emitStringWrapper(MI, MBB, Opcode: SystemZ::CLST); |
10953 | case SystemZ::MVSTLoop: |
10954 | return emitStringWrapper(MI, MBB, Opcode: SystemZ::MVST); |
10955 | case SystemZ::SRSTLoop: |
10956 | return emitStringWrapper(MI, MBB, Opcode: SystemZ::SRST); |
10957 | case SystemZ::TBEGIN: |
10958 | return emitTransactionBegin(MI, MBB, Opcode: SystemZ::TBEGIN, NoFloat: false); |
10959 | case SystemZ::TBEGIN_nofloat: |
10960 | return emitTransactionBegin(MI, MBB, Opcode: SystemZ::TBEGIN, NoFloat: true); |
10961 | case SystemZ::TBEGINC: |
10962 | return emitTransactionBegin(MI, MBB, Opcode: SystemZ::TBEGINC, NoFloat: true); |
10963 | case SystemZ::LTEBRCompare_Pseudo: |
10964 | return emitLoadAndTestCmp0(MI, MBB, Opcode: SystemZ::LTEBR); |
10965 | case SystemZ::LTDBRCompare_Pseudo: |
10966 | return emitLoadAndTestCmp0(MI, MBB, Opcode: SystemZ::LTDBR); |
10967 | case SystemZ::LTXBRCompare_Pseudo: |
10968 | return emitLoadAndTestCmp0(MI, MBB, Opcode: SystemZ::LTXBR); |
10969 | |
10970 | case SystemZ::PROBED_ALLOCA: |
10971 | return emitProbedAlloca(MI, MBB); |
10972 | case SystemZ::EH_SjLj_SetJmp: |
10973 | return emitEHSjLjSetJmp(MI, MBB); |
10974 | case SystemZ::EH_SjLj_LongJmp: |
10975 | return emitEHSjLjLongJmp(MI, MBB); |
10976 | |
10977 | case TargetOpcode::STACKMAP: |
10978 | case TargetOpcode::PATCHPOINT: |
10979 | return emitPatchPoint(MI, MBB); |
10980 | |
10981 | default: |
10982 | llvm_unreachable("Unexpected instr type to insert" ); |
10983 | } |
10984 | } |
10985 | |
10986 | // This is only used by the isel schedulers, and is needed only to prevent |
10987 | // compiler from crashing when list-ilp is used. |
10988 | const TargetRegisterClass * |
10989 | SystemZTargetLowering::getRepRegClassFor(MVT VT) const { |
10990 | if (VT == MVT::Untyped) |
10991 | return &SystemZ::ADDR128BitRegClass; |
10992 | return TargetLowering::getRepRegClassFor(VT); |
10993 | } |
10994 | |
10995 | SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op, |
10996 | SelectionDAG &DAG) const { |
10997 | SDLoc dl(Op); |
10998 | /* |
10999 | The rounding method is in FPC Byte 3 bits 6-7, and has the following |
11000 | settings: |
11001 | 00 Round to nearest |
11002 | 01 Round to 0 |
11003 | 10 Round to +inf |
11004 | 11 Round to -inf |
11005 | |
11006 | FLT_ROUNDS, on the other hand, expects the following: |
11007 | -1 Undefined |
11008 | 0 Round to 0 |
11009 | 1 Round to nearest |
11010 | 2 Round to +inf |
11011 | 3 Round to -inf |
11012 | */ |
11013 | |
11014 | // Save FPC to register. |
11015 | SDValue Chain = Op.getOperand(i: 0); |
11016 | SDValue EFPC( |
11017 | DAG.getMachineNode(Opcode: SystemZ::EFPC, dl, ResultTys: {MVT::i32, MVT::Other}, Ops: Chain), 0); |
11018 | Chain = EFPC.getValue(R: 1); |
11019 | |
11020 | // Transform as necessary |
11021 | SDValue CWD1 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: EFPC, |
11022 | N2: DAG.getConstant(Val: 3, DL: dl, VT: MVT::i32)); |
11023 | // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1 |
11024 | SDValue CWD2 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i32, N1: CWD1, |
11025 | N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i32, N1: CWD1, |
11026 | N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32))); |
11027 | |
11028 | SDValue RetVal = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i32, N1: CWD2, |
11029 | N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32)); |
11030 | RetVal = DAG.getZExtOrTrunc(Op: RetVal, DL: dl, VT: Op.getValueType()); |
11031 | |
11032 | return DAG.getMergeValues(Ops: {RetVal, Chain}, dl); |
11033 | } |
11034 | |
11035 | SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op, |
11036 | SelectionDAG &DAG) const { |
11037 | EVT VT = Op.getValueType(); |
11038 | Op = Op.getOperand(i: 0); |
11039 | EVT OpVT = Op.getValueType(); |
11040 | |
11041 | assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector." ); |
11042 | |
11043 | SDLoc DL(Op); |
11044 | |
11045 | // load a 0 vector for the third operand of VSUM. |
11046 | SDValue Zero = DAG.getSplatBuildVector(VT: OpVT, DL, Op: DAG.getConstant(Val: 0, DL, VT)); |
11047 | |
11048 | // execute VSUM. |
11049 | switch (OpVT.getScalarSizeInBits()) { |
11050 | case 8: |
11051 | case 16: |
11052 | Op = DAG.getNode(Opcode: SystemZISD::VSUM, DL, VT: MVT::v4i32, N1: Op, N2: Zero); |
11053 | [[fallthrough]]; |
11054 | case 32: |
11055 | case 64: |
11056 | Op = DAG.getNode(Opcode: SystemZISD::VSUM, DL, VT: MVT::i128, N1: Op, |
11057 | N2: DAG.getBitcast(VT: Op.getValueType(), V: Zero)); |
11058 | break; |
11059 | case 128: |
11060 | break; // VSUM over v1i128 should not happen and would be a noop |
11061 | default: |
11062 | llvm_unreachable("Unexpected scalar size." ); |
11063 | } |
11064 | // Cast to original vector type, retrieve last element. |
11065 | return DAG.getNode( |
11066 | Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT, N1: DAG.getBitcast(VT: OpVT, V: Op), |
11067 | N2: DAG.getConstant(Val: OpVT.getVectorNumElements() - 1, DL, VT: MVT::i32)); |
11068 | } |
11069 | |
11070 | static void printFunctionArgExts(const Function *F, raw_fd_ostream &OS) { |
11071 | FunctionType *FT = F->getFunctionType(); |
11072 | const AttributeList &Attrs = F->getAttributes(); |
11073 | if (Attrs.hasRetAttrs()) |
11074 | OS << Attrs.getAsString(Index: AttributeList::ReturnIndex) << " " ; |
11075 | OS << *F->getReturnType() << " @" << F->getName() << "(" ; |
11076 | for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) { |
11077 | if (I) |
11078 | OS << ", " ; |
11079 | OS << *FT->getParamType(i: I); |
11080 | AttributeSet ArgAttrs = Attrs.getParamAttrs(ArgNo: I); |
11081 | for (auto A : {Attribute::SExt, Attribute::ZExt, Attribute::NoExt}) |
11082 | if (ArgAttrs.hasAttribute(Kind: A)) |
11083 | OS << " " << Attribute::getNameFromAttrKind(AttrKind: A); |
11084 | } |
11085 | OS << ")\n" ; |
11086 | } |
11087 | |
11088 | bool SystemZTargetLowering::isInternal(const Function *Fn) const { |
11089 | std::map<const Function *, bool>::iterator Itr = IsInternalCache.find(x: Fn); |
11090 | if (Itr == IsInternalCache.end()) |
11091 | Itr = IsInternalCache |
11092 | .insert(x: std::pair<const Function *, bool>( |
11093 | Fn, (Fn->hasLocalLinkage() && !Fn->hasAddressTaken()))) |
11094 | .first; |
11095 | return Itr->second; |
11096 | } |
11097 | |
11098 | void SystemZTargetLowering:: |
11099 | verifyNarrowIntegerArgs_Call(const SmallVectorImpl<ISD::OutputArg> &Outs, |
11100 | const Function *F, SDValue Callee) const { |
11101 | // Temporarily only do the check when explicitly requested, until it can be |
11102 | // enabled by default. |
11103 | if (!EnableIntArgExtCheck) |
11104 | return; |
11105 | |
11106 | bool IsInternal = false; |
11107 | const Function *CalleeFn = nullptr; |
11108 | if (auto *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) |
11109 | if ((CalleeFn = dyn_cast<Function>(Val: G->getGlobal()))) |
11110 | IsInternal = isInternal(Fn: CalleeFn); |
11111 | if (!IsInternal && !verifyNarrowIntegerArgs(Outs)) { |
11112 | errs() << "ERROR: Missing extension attribute of passed " |
11113 | << "value in call to function:\n" << "Callee: " ; |
11114 | if (CalleeFn != nullptr) |
11115 | printFunctionArgExts(F: CalleeFn, OS&: errs()); |
11116 | else |
11117 | errs() << "-\n" ; |
11118 | errs() << "Caller: " ; |
11119 | printFunctionArgExts(F, OS&: errs()); |
11120 | llvm_unreachable("" ); |
11121 | } |
11122 | } |
11123 | |
11124 | void SystemZTargetLowering:: |
11125 | verifyNarrowIntegerArgs_Ret(const SmallVectorImpl<ISD::OutputArg> &Outs, |
11126 | const Function *F) const { |
11127 | // Temporarily only do the check when explicitly requested, until it can be |
11128 | // enabled by default. |
11129 | if (!EnableIntArgExtCheck) |
11130 | return; |
11131 | |
11132 | if (!isInternal(Fn: F) && !verifyNarrowIntegerArgs(Outs)) { |
11133 | errs() << "ERROR: Missing extension attribute of returned " |
11134 | << "value from function:\n" ; |
11135 | printFunctionArgExts(F, OS&: errs()); |
11136 | llvm_unreachable("" ); |
11137 | } |
11138 | } |
11139 | |
11140 | // Verify that narrow integer arguments are extended as required by the ABI. |
11141 | // Return false if an error is found. |
11142 | bool SystemZTargetLowering::verifyNarrowIntegerArgs( |
11143 | const SmallVectorImpl<ISD::OutputArg> &Outs) const { |
11144 | if (!Subtarget.isTargetELF()) |
11145 | return true; |
11146 | |
11147 | if (EnableIntArgExtCheck.getNumOccurrences()) { |
11148 | if (!EnableIntArgExtCheck) |
11149 | return true; |
11150 | } else if (!getTargetMachine().Options.VerifyArgABICompliance) |
11151 | return true; |
11152 | |
11153 | for (unsigned i = 0; i < Outs.size(); ++i) { |
11154 | MVT VT = Outs[i].VT; |
11155 | ISD::ArgFlagsTy Flags = Outs[i].Flags; |
11156 | if (VT.isInteger()) { |
11157 | assert((VT == MVT::i32 || VT.getSizeInBits() >= 64) && |
11158 | "Unexpected integer argument VT." ); |
11159 | if (VT == MVT::i32 && |
11160 | !Flags.isSExt() && !Flags.isZExt() && !Flags.isNoExt()) |
11161 | return false; |
11162 | } |
11163 | } |
11164 | |
11165 | return true; |
11166 | } |
11167 | |