1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
15#include "SystemZConstantPoolValue.h"
16#include "SystemZMachineFunctionInfo.h"
17#include "SystemZTargetMachine.h"
18#include "llvm/ADT/SmallSet.h"
19#include "llvm/CodeGen/CallingConvLower.h"
20#include "llvm/CodeGen/ISDOpcodes.h"
21#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
24#include "llvm/IR/GlobalAlias.h"
25#include "llvm/IR/IntrinsicInst.h"
26#include "llvm/IR/Intrinsics.h"
27#include "llvm/IR/IntrinsicsS390.h"
28#include "llvm/IR/PatternMatch.h"
29#include "llvm/Support/CommandLine.h"
30#include "llvm/Support/ErrorHandling.h"
31#include "llvm/Support/KnownBits.h"
32#include <cctype>
33#include <optional>
34
35using namespace llvm;
36
37#define DEBUG_TYPE "systemz-lower"
38
39// Temporarily let this be disabled by default until all known problems
40// related to argument extensions are fixed.
41static cl::opt<bool> EnableIntArgExtCheck(
42 "argext-abi-check", cl::init(Val: false),
43 cl::desc("Verify that narrow int args are properly extended per the "
44 "SystemZ ABI."));
45
46namespace {
47// Represents information about a comparison.
48struct Comparison {
49 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
50 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
51 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
52
53 // The operands to the comparison.
54 SDValue Op0, Op1;
55
56 // Chain if this is a strict floating-point comparison.
57 SDValue Chain;
58
59 // The opcode that should be used to compare Op0 and Op1.
60 unsigned Opcode;
61
62 // A SystemZICMP value. Only used for integer comparisons.
63 unsigned ICmpType;
64
65 // The mask of CC values that Opcode can produce.
66 unsigned CCValid;
67
68 // The mask of CC values for which the original condition is true.
69 unsigned CCMask;
70};
71} // end anonymous namespace
72
73// Classify VT as either 32 or 64 bit.
74static bool is32Bit(EVT VT) {
75 switch (VT.getSimpleVT().SimpleTy) {
76 case MVT::i32:
77 return true;
78 case MVT::i64:
79 return false;
80 default:
81 llvm_unreachable("Unsupported type");
82 }
83}
84
85// Return a version of MachineOperand that can be safely used before the
86// final use.
87static MachineOperand earlyUseOperand(MachineOperand Op) {
88 if (Op.isReg())
89 Op.setIsKill(false);
90 return Op;
91}
92
93SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
94 const SystemZSubtarget &STI)
95 : TargetLowering(TM, STI), Subtarget(STI) {
96 MVT PtrVT = MVT::getIntegerVT(BitWidth: TM.getPointerSizeInBits(AS: 0));
97
98 auto *Regs = STI.getSpecialRegisters();
99
100 // Set up the register classes.
101 if (Subtarget.hasHighWord())
102 addRegisterClass(VT: MVT::i32, RC: &SystemZ::GRX32BitRegClass);
103 else
104 addRegisterClass(VT: MVT::i32, RC: &SystemZ::GR32BitRegClass);
105 addRegisterClass(VT: MVT::i64, RC: &SystemZ::GR64BitRegClass);
106 if (!useSoftFloat()) {
107 if (Subtarget.hasVector()) {
108 addRegisterClass(VT: MVT::f16, RC: &SystemZ::VR16BitRegClass);
109 addRegisterClass(VT: MVT::f32, RC: &SystemZ::VR32BitRegClass);
110 addRegisterClass(VT: MVT::f64, RC: &SystemZ::VR64BitRegClass);
111 } else {
112 addRegisterClass(VT: MVT::f16, RC: &SystemZ::FP16BitRegClass);
113 addRegisterClass(VT: MVT::f32, RC: &SystemZ::FP32BitRegClass);
114 addRegisterClass(VT: MVT::f64, RC: &SystemZ::FP64BitRegClass);
115 }
116 if (Subtarget.hasVectorEnhancements1())
117 addRegisterClass(VT: MVT::f128, RC: &SystemZ::VR128BitRegClass);
118 else
119 addRegisterClass(VT: MVT::f128, RC: &SystemZ::FP128BitRegClass);
120
121 if (Subtarget.hasVector()) {
122 addRegisterClass(VT: MVT::v16i8, RC: &SystemZ::VR128BitRegClass);
123 addRegisterClass(VT: MVT::v8i16, RC: &SystemZ::VR128BitRegClass);
124 addRegisterClass(VT: MVT::v4i32, RC: &SystemZ::VR128BitRegClass);
125 addRegisterClass(VT: MVT::v2i64, RC: &SystemZ::VR128BitRegClass);
126 addRegisterClass(VT: MVT::v8f16, RC: &SystemZ::VR128BitRegClass);
127 addRegisterClass(VT: MVT::v4f32, RC: &SystemZ::VR128BitRegClass);
128 addRegisterClass(VT: MVT::v2f64, RC: &SystemZ::VR128BitRegClass);
129 }
130
131 if (Subtarget.hasVector())
132 addRegisterClass(VT: MVT::i128, RC: &SystemZ::VR128BitRegClass);
133 }
134
135 // Compute derived properties from the register classes
136 computeRegisterProperties(TRI: Subtarget.getRegisterInfo());
137
138 // Set up special registers.
139 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
140
141 // TODO: It may be better to default to latency-oriented scheduling, however
142 // LLVM's current latency-oriented scheduler can't handle physreg definitions
143 // such as SystemZ has with CC, so set this to the register-pressure
144 // scheduler, because it can.
145 setSchedulingPreference(Sched::RegPressure);
146
147 setBooleanContents(ZeroOrOneBooleanContent);
148 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
149
150 setMaxAtomicSizeInBitsSupported(128);
151
152 // Instructions are strings of 2-byte aligned 2-byte values.
153 setMinFunctionAlignment(Align(2));
154 // For performance reasons we prefer 16-byte alignment.
155 setPrefFunctionAlignment(Align(16));
156
157 // Handle operations that are handled in a similar way for all types.
158 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
159 I <= MVT::LAST_FP_VALUETYPE;
160 ++I) {
161 MVT VT = MVT::SimpleValueType(I);
162 if (isTypeLegal(VT)) {
163 // Lower SET_CC into an IPM-based sequence.
164 setOperationAction(Op: ISD::SETCC, VT, Action: Custom);
165 setOperationAction(Op: ISD::STRICT_FSETCC, VT, Action: Custom);
166 setOperationAction(Op: ISD::STRICT_FSETCCS, VT, Action: Custom);
167
168 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
169 setOperationAction(Op: ISD::SELECT, VT, Action: Expand);
170
171 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
172 setOperationAction(Op: ISD::SELECT_CC, VT, Action: Custom);
173 setOperationAction(Op: ISD::BR_CC, VT, Action: Custom);
174 }
175 }
176
177 // Expand jump table branches as address arithmetic followed by an
178 // indirect jump.
179 setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
180
181 // Expand BRCOND into a BR_CC (see above).
182 setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Expand);
183
184 // Handle integer types except i128.
185 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
186 I <= MVT::LAST_INTEGER_VALUETYPE;
187 ++I) {
188 MVT VT = MVT::SimpleValueType(I);
189 if (isTypeLegal(VT) && VT != MVT::i128) {
190 setOperationAction(Op: ISD::ABS, VT, Action: Legal);
191
192 // Expand individual DIV and REMs into DIVREMs.
193 setOperationAction(Op: ISD::SDIV, VT, Action: Expand);
194 setOperationAction(Op: ISD::UDIV, VT, Action: Expand);
195 setOperationAction(Op: ISD::SREM, VT, Action: Expand);
196 setOperationAction(Op: ISD::UREM, VT, Action: Expand);
197 setOperationAction(Op: ISD::SDIVREM, VT, Action: Custom);
198 setOperationAction(Op: ISD::UDIVREM, VT, Action: Custom);
199
200 // Support addition/subtraction with overflow.
201 setOperationAction(Op: ISD::SADDO, VT, Action: Custom);
202 setOperationAction(Op: ISD::SSUBO, VT, Action: Custom);
203
204 // Support addition/subtraction with carry.
205 setOperationAction(Op: ISD::UADDO, VT, Action: Custom);
206 setOperationAction(Op: ISD::USUBO, VT, Action: Custom);
207
208 // Support carry in as value rather than glue.
209 setOperationAction(Op: ISD::UADDO_CARRY, VT, Action: Custom);
210 setOperationAction(Op: ISD::USUBO_CARRY, VT, Action: Custom);
211
212 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
213 // available, or if the operand is constant.
214 setOperationAction(Op: ISD::ATOMIC_LOAD_SUB, VT, Action: Custom);
215
216 // Use POPCNT on z196 and above.
217 if (Subtarget.hasPopulationCount())
218 setOperationAction(Op: ISD::CTPOP, VT, Action: Custom);
219 else
220 setOperationAction(Op: ISD::CTPOP, VT, Action: Expand);
221
222 // No special instructions for these.
223 setOperationAction(Op: ISD::CTTZ, VT, Action: Expand);
224 setOperationAction(Op: ISD::ROTR, VT, Action: Expand);
225
226 // Use *MUL_LOHI where possible instead of MULH*.
227 setOperationAction(Op: ISD::MULHS, VT, Action: Expand);
228 setOperationAction(Op: ISD::MULHU, VT, Action: Expand);
229 setOperationAction(Op: ISD::SMUL_LOHI, VT, Action: Custom);
230 setOperationAction(Op: ISD::UMUL_LOHI, VT, Action: Custom);
231
232 // The fp<=>i32/i64 conversions are all Legal except for f16 and for
233 // unsigned on z10 (only z196 and above have native support for
234 // unsigned conversions).
235 for (auto Op : {ISD::FP_TO_SINT, ISD::STRICT_FP_TO_SINT,
236 ISD::SINT_TO_FP, ISD::STRICT_SINT_TO_FP})
237 setOperationAction(Op, VT, Action: Custom);
238 for (auto Op : {ISD::FP_TO_UINT, ISD::STRICT_FP_TO_UINT})
239 setOperationAction(Op, VT, Action: Custom);
240 for (auto Op : {ISD::UINT_TO_FP, ISD::STRICT_UINT_TO_FP}) {
241 // Handle unsigned 32-bit input types as signed 64-bit types on z10.
242 auto OpAction =
243 (!Subtarget.hasFPExtension() && VT == MVT::i32) ? Promote : Custom;
244 setOperationAction(Op, VT, Action: OpAction);
245 }
246 }
247 }
248
249 // Handle i128 if legal.
250 if (isTypeLegal(VT: MVT::i128)) {
251 // No special instructions for these.
252 setOperationAction(Op: ISD::SDIVREM, VT: MVT::i128, Action: Expand);
253 setOperationAction(Op: ISD::UDIVREM, VT: MVT::i128, Action: Expand);
254 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i128, Action: Expand);
255 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i128, Action: Expand);
256 setOperationAction(Op: ISD::ROTR, VT: MVT::i128, Action: Expand);
257 setOperationAction(Op: ISD::ROTL, VT: MVT::i128, Action: Expand);
258
259 // We may be able to use VSLDB/VSLD/VSRD for these.
260 setOperationAction(Op: ISD::FSHL, VT: MVT::i128, Action: Custom);
261 setOperationAction(Op: ISD::FSHR, VT: MVT::i128, Action: Custom);
262
263 // No special instructions for these before z17.
264 if (!Subtarget.hasVectorEnhancements3()) {
265 setOperationAction(Op: ISD::MUL, VT: MVT::i128, Action: Expand);
266 setOperationAction(Op: ISD::MULHS, VT: MVT::i128, Action: Expand);
267 setOperationAction(Op: ISD::MULHU, VT: MVT::i128, Action: Expand);
268 setOperationAction(Op: ISD::SDIV, VT: MVT::i128, Action: Expand);
269 setOperationAction(Op: ISD::UDIV, VT: MVT::i128, Action: Expand);
270 setOperationAction(Op: ISD::SREM, VT: MVT::i128, Action: Expand);
271 setOperationAction(Op: ISD::UREM, VT: MVT::i128, Action: Expand);
272 setOperationAction(Op: ISD::CTLZ, VT: MVT::i128, Action: Expand);
273 setOperationAction(Op: ISD::CTTZ, VT: MVT::i128, Action: Expand);
274 } else {
275 // Even if we do have a legal 128-bit multiply, we do not
276 // want 64-bit multiply-high operations to use it.
277 setOperationAction(Op: ISD::MULHS, VT: MVT::i64, Action: Custom);
278 setOperationAction(Op: ISD::MULHU, VT: MVT::i64, Action: Custom);
279 }
280
281 // Support addition/subtraction with carry.
282 setOperationAction(Op: ISD::UADDO, VT: MVT::i128, Action: Custom);
283 setOperationAction(Op: ISD::USUBO, VT: MVT::i128, Action: Custom);
284 setOperationAction(Op: ISD::UADDO_CARRY, VT: MVT::i128, Action: Custom);
285 setOperationAction(Op: ISD::USUBO_CARRY, VT: MVT::i128, Action: Custom);
286
287 // Use VPOPCT and add up partial results.
288 setOperationAction(Op: ISD::CTPOP, VT: MVT::i128, Action: Custom);
289
290 // Additional instructions available with z17.
291 if (Subtarget.hasVectorEnhancements3()) {
292 setOperationAction(Op: ISD::ABS, VT: MVT::i128, Action: Legal);
293
294 setOperationAction(Ops: {ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX},
295 VT: MVT::i128, Action: Legal);
296 }
297 }
298
299 // These need custom handling in order to handle the f16 conversions.
300 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i128, Action: Custom);
301 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i128, Action: Custom);
302 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i128, Action: Custom);
303 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i128, Action: Custom);
304 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i128, Action: Custom);
305 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i128, Action: Custom);
306 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i128, Action: Custom);
307 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i128, Action: Custom);
308
309 // Type legalization will convert 8- and 16-bit atomic operations into
310 // forms that operate on i32s (but still keeping the original memory VT).
311 // Lower them into full i32 operations.
312 setOperationAction(Op: ISD::ATOMIC_SWAP, VT: MVT::i32, Action: Custom);
313 setOperationAction(Op: ISD::ATOMIC_LOAD_ADD, VT: MVT::i32, Action: Custom);
314 setOperationAction(Op: ISD::ATOMIC_LOAD_SUB, VT: MVT::i32, Action: Custom);
315 setOperationAction(Op: ISD::ATOMIC_LOAD_AND, VT: MVT::i32, Action: Custom);
316 setOperationAction(Op: ISD::ATOMIC_LOAD_OR, VT: MVT::i32, Action: Custom);
317 setOperationAction(Op: ISD::ATOMIC_LOAD_XOR, VT: MVT::i32, Action: Custom);
318 setOperationAction(Op: ISD::ATOMIC_LOAD_NAND, VT: MVT::i32, Action: Custom);
319 setOperationAction(Op: ISD::ATOMIC_LOAD_MIN, VT: MVT::i32, Action: Custom);
320 setOperationAction(Op: ISD::ATOMIC_LOAD_MAX, VT: MVT::i32, Action: Custom);
321 setOperationAction(Op: ISD::ATOMIC_LOAD_UMIN, VT: MVT::i32, Action: Custom);
322 setOperationAction(Op: ISD::ATOMIC_LOAD_UMAX, VT: MVT::i32, Action: Custom);
323
324 // Whether or not i128 is not a legal type, we need to custom lower
325 // the atomic operations in order to exploit SystemZ instructions.
326 setOperationAction(Op: ISD::ATOMIC_LOAD, VT: MVT::i128, Action: Custom);
327 setOperationAction(Op: ISD::ATOMIC_STORE, VT: MVT::i128, Action: Custom);
328 setOperationAction(Op: ISD::ATOMIC_LOAD, VT: MVT::f128, Action: Custom);
329 setOperationAction(Op: ISD::ATOMIC_STORE, VT: MVT::f128, Action: Custom);
330
331 // Mark sign/zero extending atomic loads as legal, which will make
332 // DAGCombiner fold extensions into atomic loads if possible.
333 setAtomicLoadExtAction(ExtTypes: {ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: MVT::i64,
334 MemVTs: {MVT::i8, MVT::i16, MVT::i32}, Action: Legal);
335 setAtomicLoadExtAction(ExtTypes: {ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: MVT::i32,
336 MemVTs: {MVT::i8, MVT::i16}, Action: Legal);
337 setAtomicLoadExtAction(ExtTypes: {ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: MVT::i16,
338 MemVT: MVT::i8, Action: Legal);
339
340 // We can use the CC result of compare-and-swap to implement
341 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
342 setOperationAction(Op: ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT: MVT::i32, Action: Custom);
343 setOperationAction(Op: ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT: MVT::i64, Action: Custom);
344 setOperationAction(Op: ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT: MVT::i128, Action: Custom);
345
346 setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom);
347
348 // Traps are legal, as we will convert them to "j .+2".
349 setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Legal);
350
351 // We have native support for a 64-bit CTLZ, via FLOGR.
352 setOperationAction(Op: ISD::CTLZ, VT: MVT::i32, Action: Promote);
353 setOperationAction(Op: ISD::CTLZ_ZERO_UNDEF, VT: MVT::i32, Action: Promote);
354 setOperationAction(Op: ISD::CTLZ, VT: MVT::i64, Action: Legal);
355
356 // On z17 we have native support for a 64-bit CTTZ.
357 if (Subtarget.hasMiscellaneousExtensions4()) {
358 setOperationAction(Op: ISD::CTTZ, VT: MVT::i32, Action: Promote);
359 setOperationAction(Op: ISD::CTTZ_ZERO_UNDEF, VT: MVT::i32, Action: Promote);
360 setOperationAction(Op: ISD::CTTZ, VT: MVT::i64, Action: Legal);
361 }
362
363 // On z15 we have native support for a 64-bit CTPOP.
364 if (Subtarget.hasMiscellaneousExtensions3()) {
365 setOperationAction(Op: ISD::CTPOP, VT: MVT::i32, Action: Promote);
366 setOperationAction(Op: ISD::CTPOP, VT: MVT::i64, Action: Legal);
367 }
368
369 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
370 setOperationAction(Op: ISD::OR, VT: MVT::i64, Action: Custom);
371
372 // Expand 128 bit shifts without using a libcall.
373 setOperationAction(Op: ISD::SRL_PARTS, VT: MVT::i64, Action: Expand);
374 setOperationAction(Op: ISD::SHL_PARTS, VT: MVT::i64, Action: Expand);
375 setOperationAction(Op: ISD::SRA_PARTS, VT: MVT::i64, Action: Expand);
376
377 // Also expand 256 bit shifts if i128 is a legal type.
378 if (isTypeLegal(VT: MVT::i128)) {
379 setOperationAction(Op: ISD::SRL_PARTS, VT: MVT::i128, Action: Expand);
380 setOperationAction(Op: ISD::SHL_PARTS, VT: MVT::i128, Action: Expand);
381 setOperationAction(Op: ISD::SRA_PARTS, VT: MVT::i128, Action: Expand);
382 }
383
384 // Handle bitcast from fp128 to i128.
385 if (!isTypeLegal(VT: MVT::i128))
386 setOperationAction(Op: ISD::BITCAST, VT: MVT::i128, Action: Custom);
387
388 // We have native instructions for i8, i16 and i32 extensions, but not i1.
389 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
390 for (MVT VT : MVT::integer_valuetypes()) {
391 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
392 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
393 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
394 }
395
396 // Handle the various types of symbolic address.
397 setOperationAction(Op: ISD::ConstantPool, VT: PtrVT, Action: Custom);
398 setOperationAction(Op: ISD::GlobalAddress, VT: PtrVT, Action: Custom);
399 setOperationAction(Op: ISD::GlobalTLSAddress, VT: PtrVT, Action: Custom);
400 setOperationAction(Op: ISD::BlockAddress, VT: PtrVT, Action: Custom);
401 setOperationAction(Op: ISD::JumpTable, VT: PtrVT, Action: Custom);
402
403 // We need to handle dynamic allocations specially because of the
404 // 160-byte area at the bottom of the stack.
405 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: PtrVT, Action: Custom);
406 setOperationAction(Op: ISD::GET_DYNAMIC_AREA_OFFSET, VT: PtrVT, Action: Custom);
407
408 setOperationAction(Op: ISD::STACKSAVE, VT: MVT::Other, Action: Custom);
409 setOperationAction(Op: ISD::STACKRESTORE, VT: MVT::Other, Action: Custom);
410
411 // Handle prefetches with PFD or PFDRL.
412 setOperationAction(Op: ISD::PREFETCH, VT: MVT::Other, Action: Custom);
413
414 // Handle readcyclecounter with STCKF.
415 setOperationAction(Op: ISD::READCYCLECOUNTER, VT: MVT::i64, Action: Custom);
416
417 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
418 // Assume by default that all vector operations need to be expanded.
419 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
420 if (getOperationAction(Op: Opcode, VT) == Legal)
421 setOperationAction(Op: Opcode, VT, Action: Expand);
422
423 // Likewise all truncating stores and extending loads.
424 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
425 setTruncStoreAction(ValVT: VT, MemVT: InnerVT, Action: Expand);
426 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
427 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
428 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
429 }
430
431 if (isTypeLegal(VT)) {
432 // These operations are legal for anything that can be stored in a
433 // vector register, even if there is no native support for the format
434 // as such. In particular, we can do these for v4f32 even though there
435 // are no specific instructions for that format.
436 setOperationAction(Op: ISD::LOAD, VT, Action: Legal);
437 setOperationAction(Op: ISD::STORE, VT, Action: Legal);
438 setOperationAction(Op: ISD::VSELECT, VT, Action: Legal);
439 setOperationAction(Op: ISD::BITCAST, VT, Action: Legal);
440 setOperationAction(Op: ISD::UNDEF, VT, Action: Legal);
441
442 // Likewise, except that we need to replace the nodes with something
443 // more specific.
444 setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Custom);
445 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Custom);
446 }
447 }
448
449 // Handle integer vector types.
450 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
451 if (isTypeLegal(VT)) {
452 // These operations have direct equivalents.
453 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Legal);
454 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Legal);
455 setOperationAction(Op: ISD::ADD, VT, Action: Legal);
456 setOperationAction(Op: ISD::SUB, VT, Action: Legal);
457 if (VT != MVT::v2i64 || Subtarget.hasVectorEnhancements3()) {
458 setOperationAction(Op: ISD::MUL, VT, Action: Legal);
459 setOperationAction(Op: ISD::MULHS, VT, Action: Legal);
460 setOperationAction(Op: ISD::MULHU, VT, Action: Legal);
461 }
462 if (Subtarget.hasVectorEnhancements3() &&
463 VT != MVT::v16i8 && VT != MVT::v8i16) {
464 setOperationAction(Op: ISD::SDIV, VT, Action: Legal);
465 setOperationAction(Op: ISD::UDIV, VT, Action: Legal);
466 setOperationAction(Op: ISD::SREM, VT, Action: Legal);
467 setOperationAction(Op: ISD::UREM, VT, Action: Legal);
468 }
469 setOperationAction(Op: ISD::ABS, VT, Action: Legal);
470 setOperationAction(Op: ISD::AND, VT, Action: Legal);
471 setOperationAction(Op: ISD::OR, VT, Action: Legal);
472 setOperationAction(Op: ISD::XOR, VT, Action: Legal);
473 if (Subtarget.hasVectorEnhancements1())
474 setOperationAction(Op: ISD::CTPOP, VT, Action: Legal);
475 else
476 setOperationAction(Op: ISD::CTPOP, VT, Action: Custom);
477 setOperationAction(Op: ISD::CTTZ, VT, Action: Legal);
478 setOperationAction(Op: ISD::CTLZ, VT, Action: Legal);
479
480 // Convert a GPR scalar to a vector by inserting it into element 0.
481 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Custom);
482
483 // Use a series of unpacks for extensions.
484 setOperationAction(Op: ISD::SIGN_EXTEND_VECTOR_INREG, VT, Action: Custom);
485 setOperationAction(Op: ISD::ZERO_EXTEND_VECTOR_INREG, VT, Action: Custom);
486
487 // Detect shifts/rotates by a scalar amount and convert them into
488 // V*_BY_SCALAR.
489 setOperationAction(Op: ISD::SHL, VT, Action: Custom);
490 setOperationAction(Op: ISD::SRA, VT, Action: Custom);
491 setOperationAction(Op: ISD::SRL, VT, Action: Custom);
492 setOperationAction(Op: ISD::ROTL, VT, Action: Custom);
493
494 // Add ISD::VECREDUCE_ADD as custom in order to implement
495 // it with VZERO+VSUM
496 setOperationAction(Op: ISD::VECREDUCE_ADD, VT, Action: Custom);
497
498 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
499 // and inverting the result as necessary.
500 setOperationAction(Op: ISD::SETCC, VT, Action: Custom);
501
502 setOperationAction(Ops: {ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, VT,
503 Action: Legal);
504 }
505 }
506
507 if (Subtarget.hasVector()) {
508 // There should be no need to check for float types other than v2f64
509 // since <2 x f32> isn't a legal type.
510 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::v2i64, Action: Legal);
511 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::v2f64, Action: Legal);
512 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::v2i64, Action: Legal);
513 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::v2f64, Action: Legal);
514 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v2i64, Action: Legal);
515 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v2f64, Action: Legal);
516 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v2i64, Action: Legal);
517 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v2f64, Action: Legal);
518
519 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::v2i64, Action: Legal);
520 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::v2f64, Action: Legal);
521 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::v2i64, Action: Legal);
522 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::v2f64, Action: Legal);
523 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v2i64, Action: Legal);
524 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v2f64, Action: Legal);
525 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v2i64, Action: Legal);
526 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v2f64, Action: Legal);
527 }
528
529 if (Subtarget.hasVectorEnhancements2()) {
530 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::v4i32, Action: Legal);
531 setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::v4f32, Action: Legal);
532 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::v4i32, Action: Legal);
533 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::v4f32, Action: Legal);
534 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v4i32, Action: Legal);
535 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v4f32, Action: Legal);
536 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v4i32, Action: Legal);
537 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v4f32, Action: Legal);
538
539 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::v4i32, Action: Legal);
540 setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::v4f32, Action: Legal);
541 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::v4i32, Action: Legal);
542 setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::v4f32, Action: Legal);
543 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v4i32, Action: Legal);
544 setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v4f32, Action: Legal);
545 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v4i32, Action: Legal);
546 setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v4f32, Action: Legal);
547 }
548
549 // Handle floating-point types.
550 if (!useSoftFloat()) {
551 // Promote all f16 operations to float, with some exceptions below.
552 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
553 setOperationAction(Op: Opc, VT: MVT::f16, Action: Promote);
554 setOperationAction(Op: ISD::ConstantFP, VT: MVT::f16, Action: Expand);
555 for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
556 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: MVT::f16, Action: Expand);
557 setTruncStoreAction(ValVT: VT, MemVT: MVT::f16, Action: Expand);
558 }
559 for (auto Op : {ISD::LOAD, ISD::ATOMIC_LOAD, ISD::STORE, ISD::ATOMIC_STORE})
560 setOperationAction(Op, VT: MVT::f16, Action: Subtarget.hasVector() ? Legal : Custom);
561 setOperationAction(Op: ISD::FP_ROUND, VT: MVT::f16, Action: LibCall);
562 setOperationAction(Op: ISD::STRICT_FP_ROUND, VT: MVT::f16, Action: LibCall);
563 setOperationAction(Op: ISD::BITCAST, VT: MVT::i16, Action: Custom);
564 setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f16, Action: Custom);
565 for (auto Op : {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN})
566 setOperationAction(Op, VT: MVT::f16, Action: Legal);
567 }
568
569 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
570 I <= MVT::LAST_FP_VALUETYPE;
571 ++I) {
572 MVT VT = MVT::SimpleValueType(I);
573 if (isTypeLegal(VT) && VT != MVT::f16) {
574 // We can use FI for FRINT.
575 setOperationAction(Op: ISD::FRINT, VT, Action: Legal);
576
577 // We can use the extended form of FI for other rounding operations.
578 if (Subtarget.hasFPExtension()) {
579 setOperationAction(Op: ISD::FNEARBYINT, VT, Action: Legal);
580 setOperationAction(Op: ISD::FFLOOR, VT, Action: Legal);
581 setOperationAction(Op: ISD::FCEIL, VT, Action: Legal);
582 setOperationAction(Op: ISD::FTRUNC, VT, Action: Legal);
583 setOperationAction(Op: ISD::FROUND, VT, Action: Legal);
584 setOperationAction(Op: ISD::FROUNDEVEN, VT, Action: Legal);
585 }
586
587 // No special instructions for these.
588 setOperationAction(Op: ISD::FSIN, VT, Action: Expand);
589 setOperationAction(Op: ISD::FCOS, VT, Action: Expand);
590 setOperationAction(Op: ISD::FSINCOS, VT, Action: Expand);
591 setOperationAction(Op: ISD::FREM, VT, Action: LibCall);
592 setOperationAction(Op: ISD::FPOW, VT, Action: Expand);
593
594 // Special treatment.
595 setOperationAction(Op: ISD::IS_FPCLASS, VT, Action: Custom);
596
597 // Handle constrained floating-point operations.
598 setOperationAction(Op: ISD::STRICT_FADD, VT, Action: Legal);
599 setOperationAction(Op: ISD::STRICT_FSUB, VT, Action: Legal);
600 setOperationAction(Op: ISD::STRICT_FMUL, VT, Action: Legal);
601 setOperationAction(Op: ISD::STRICT_FDIV, VT, Action: Legal);
602 setOperationAction(Op: ISD::STRICT_FMA, VT, Action: Legal);
603 setOperationAction(Op: ISD::STRICT_FSQRT, VT, Action: Legal);
604 setOperationAction(Op: ISD::STRICT_FRINT, VT, Action: Legal);
605 setOperationAction(Op: ISD::STRICT_FP_ROUND, VT, Action: Legal);
606 if (Subtarget.hasFPExtension()) {
607 setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT, Action: Legal);
608 setOperationAction(Op: ISD::STRICT_FFLOOR, VT, Action: Legal);
609 setOperationAction(Op: ISD::STRICT_FCEIL, VT, Action: Legal);
610 setOperationAction(Op: ISD::STRICT_FTRUNC, VT, Action: Legal);
611 setOperationAction(Op: ISD::STRICT_FROUND, VT, Action: Legal);
612 setOperationAction(Op: ISD::STRICT_FROUNDEVEN, VT, Action: Legal);
613 }
614
615 // Extension from f16 needs libcall.
616 setOperationAction(Op: ISD::FP_EXTEND, VT, Action: Custom);
617 setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT, Action: Custom);
618 }
619 }
620
621 // Handle floating-point vector types.
622 if (Subtarget.hasVector()) {
623 // Scalar-to-vector conversion is just a subreg.
624 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v8f16, Action: Legal);
625 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v4f32, Action: Legal);
626 setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v2f64, Action: Legal);
627
628 // Some insertions and extractions can be done directly but others
629 // need to go via integers.
630 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v8f16, Action: Custom);
631 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v4f32, Action: Custom);
632 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v2f64, Action: Custom);
633 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v8f16, Action: Custom);
634 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v4f32, Action: Custom);
635 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v2f64, Action: Custom);
636
637 // These operations have direct equivalents.
638 setOperationAction(Op: ISD::FADD, VT: MVT::v2f64, Action: Legal);
639 setOperationAction(Op: ISD::FNEG, VT: MVT::v2f64, Action: Legal);
640 setOperationAction(Op: ISD::FSUB, VT: MVT::v2f64, Action: Legal);
641 setOperationAction(Op: ISD::FMUL, VT: MVT::v2f64, Action: Legal);
642 setOperationAction(Op: ISD::FMA, VT: MVT::v2f64, Action: Legal);
643 setOperationAction(Op: ISD::FDIV, VT: MVT::v2f64, Action: Legal);
644 setOperationAction(Op: ISD::FABS, VT: MVT::v2f64, Action: Legal);
645 setOperationAction(Op: ISD::FSQRT, VT: MVT::v2f64, Action: Legal);
646 setOperationAction(Op: ISD::FRINT, VT: MVT::v2f64, Action: Legal);
647 setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::v2f64, Action: Legal);
648 setOperationAction(Op: ISD::FFLOOR, VT: MVT::v2f64, Action: Legal);
649 setOperationAction(Op: ISD::FCEIL, VT: MVT::v2f64, Action: Legal);
650 setOperationAction(Op: ISD::FTRUNC, VT: MVT::v2f64, Action: Legal);
651 setOperationAction(Op: ISD::FROUND, VT: MVT::v2f64, Action: Legal);
652 setOperationAction(Op: ISD::FROUNDEVEN, VT: MVT::v2f64, Action: Legal);
653
654 // Handle constrained floating-point operations.
655 setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::v2f64, Action: Legal);
656 setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::v2f64, Action: Legal);
657 setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::v2f64, Action: Legal);
658 setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::v2f64, Action: Legal);
659 setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::v2f64, Action: Legal);
660 setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::v2f64, Action: Legal);
661 setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::v2f64, Action: Legal);
662 setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT: MVT::v2f64, Action: Legal);
663 setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::v2f64, Action: Legal);
664 setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::v2f64, Action: Legal);
665 setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::v2f64, Action: Legal);
666 setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::v2f64, Action: Legal);
667 setOperationAction(Op: ISD::STRICT_FROUNDEVEN, VT: MVT::v2f64, Action: Legal);
668
669 setOperationAction(Op: ISD::SETCC, VT: MVT::v2f64, Action: Custom);
670 setOperationAction(Op: ISD::SETCC, VT: MVT::v4f32, Action: Custom);
671 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::v2f64, Action: Custom);
672 setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::v4f32, Action: Custom);
673 if (Subtarget.hasVectorEnhancements1()) {
674 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::v2f64, Action: Custom);
675 setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::v4f32, Action: Custom);
676 }
677 }
678
679 // The vector enhancements facility 1 has instructions for these.
680 if (Subtarget.hasVectorEnhancements1()) {
681 setOperationAction(Op: ISD::FADD, VT: MVT::v4f32, Action: Legal);
682 setOperationAction(Op: ISD::FNEG, VT: MVT::v4f32, Action: Legal);
683 setOperationAction(Op: ISD::FSUB, VT: MVT::v4f32, Action: Legal);
684 setOperationAction(Op: ISD::FMUL, VT: MVT::v4f32, Action: Legal);
685 setOperationAction(Op: ISD::FMA, VT: MVT::v4f32, Action: Legal);
686 setOperationAction(Op: ISD::FDIV, VT: MVT::v4f32, Action: Legal);
687 setOperationAction(Op: ISD::FABS, VT: MVT::v4f32, Action: Legal);
688 setOperationAction(Op: ISD::FSQRT, VT: MVT::v4f32, Action: Legal);
689 setOperationAction(Op: ISD::FRINT, VT: MVT::v4f32, Action: Legal);
690 setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::v4f32, Action: Legal);
691 setOperationAction(Op: ISD::FFLOOR, VT: MVT::v4f32, Action: Legal);
692 setOperationAction(Op: ISD::FCEIL, VT: MVT::v4f32, Action: Legal);
693 setOperationAction(Op: ISD::FTRUNC, VT: MVT::v4f32, Action: Legal);
694 setOperationAction(Op: ISD::FROUND, VT: MVT::v4f32, Action: Legal);
695 setOperationAction(Op: ISD::FROUNDEVEN, VT: MVT::v4f32, Action: Legal);
696
697 setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f64, Action: Legal);
698 setOperationAction(Op: ISD::FMAXIMUM, VT: MVT::f64, Action: Legal);
699 setOperationAction(Op: ISD::FMINNUM, VT: MVT::f64, Action: Legal);
700 setOperationAction(Op: ISD::FMINIMUM, VT: MVT::f64, Action: Legal);
701
702 setOperationAction(Op: ISD::FMAXNUM, VT: MVT::v2f64, Action: Legal);
703 setOperationAction(Op: ISD::FMAXIMUM, VT: MVT::v2f64, Action: Legal);
704 setOperationAction(Op: ISD::FMINNUM, VT: MVT::v2f64, Action: Legal);
705 setOperationAction(Op: ISD::FMINIMUM, VT: MVT::v2f64, Action: Legal);
706
707 setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f32, Action: Legal);
708 setOperationAction(Op: ISD::FMAXIMUM, VT: MVT::f32, Action: Legal);
709 setOperationAction(Op: ISD::FMINNUM, VT: MVT::f32, Action: Legal);
710 setOperationAction(Op: ISD::FMINIMUM, VT: MVT::f32, Action: Legal);
711
712 setOperationAction(Op: ISD::FMAXNUM, VT: MVT::v4f32, Action: Legal);
713 setOperationAction(Op: ISD::FMAXIMUM, VT: MVT::v4f32, Action: Legal);
714 setOperationAction(Op: ISD::FMINNUM, VT: MVT::v4f32, Action: Legal);
715 setOperationAction(Op: ISD::FMINIMUM, VT: MVT::v4f32, Action: Legal);
716
717 setOperationAction(Op: ISD::FMAXNUM, VT: MVT::f128, Action: Legal);
718 setOperationAction(Op: ISD::FMAXIMUM, VT: MVT::f128, Action: Legal);
719 setOperationAction(Op: ISD::FMINNUM, VT: MVT::f128, Action: Legal);
720 setOperationAction(Op: ISD::FMINIMUM, VT: MVT::f128, Action: Legal);
721
722 // Handle constrained floating-point operations.
723 setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::v4f32, Action: Legal);
724 setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::v4f32, Action: Legal);
725 setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::v4f32, Action: Legal);
726 setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::v4f32, Action: Legal);
727 setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::v4f32, Action: Legal);
728 setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::v4f32, Action: Legal);
729 setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::v4f32, Action: Legal);
730 setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT: MVT::v4f32, Action: Legal);
731 setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::v4f32, Action: Legal);
732 setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::v4f32, Action: Legal);
733 setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::v4f32, Action: Legal);
734 setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::v4f32, Action: Legal);
735 setOperationAction(Op: ISD::STRICT_FROUNDEVEN, VT: MVT::v4f32, Action: Legal);
736 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
737 MVT::v4f32, MVT::v2f64 }) {
738 setOperationAction(Op: ISD::STRICT_FMAXNUM, VT, Action: Legal);
739 setOperationAction(Op: ISD::STRICT_FMINNUM, VT, Action: Legal);
740 setOperationAction(Op: ISD::STRICT_FMAXIMUM, VT, Action: Legal);
741 setOperationAction(Op: ISD::STRICT_FMINIMUM, VT, Action: Legal);
742 }
743 }
744
745 // We only have fused f128 multiply-addition on vector registers.
746 if (!Subtarget.hasVectorEnhancements1()) {
747 setOperationAction(Op: ISD::FMA, VT: MVT::f128, Action: Expand);
748 setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::f128, Action: Expand);
749 }
750
751 // We don't have a copysign instruction on vector registers.
752 if (Subtarget.hasVectorEnhancements1())
753 setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f128, Action: Expand);
754
755 // Needed so that we don't try to implement f128 constant loads using
756 // a load-and-extend of a f80 constant (in cases where the constant
757 // would fit in an f80).
758 for (MVT VT : MVT::fp_valuetypes())
759 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: MVT::f80, Action: Expand);
760
761 // We don't have extending load instruction on vector registers.
762 if (Subtarget.hasVectorEnhancements1()) {
763 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f128, MemVT: MVT::f32, Action: Expand);
764 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f128, MemVT: MVT::f64, Action: Expand);
765 }
766
767 // Floating-point truncation and stores need to be done separately.
768 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
769 setTruncStoreAction(ValVT: MVT::f128, MemVT: MVT::f32, Action: Expand);
770 setTruncStoreAction(ValVT: MVT::f128, MemVT: MVT::f64, Action: Expand);
771
772 // We have 64-bit FPR<->GPR moves, but need special handling for
773 // 32-bit forms.
774 if (!Subtarget.hasVector()) {
775 setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Custom);
776 setOperationAction(Op: ISD::BITCAST, VT: MVT::f32, Action: Custom);
777 }
778
779 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
780 // structure, but VAEND is a no-op.
781 setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom);
782 setOperationAction(Op: ISD::VACOPY, VT: MVT::Other, Action: Custom);
783 setOperationAction(Op: ISD::VAEND, VT: MVT::Other, Action: Expand);
784
785 if (Subtarget.isTargetzOS()) {
786 // Handle address space casts between mixed sized pointers.
787 setOperationAction(Op: ISD::ADDRSPACECAST, VT: MVT::i32, Action: Custom);
788 setOperationAction(Op: ISD::ADDRSPACECAST, VT: MVT::i64, Action: Custom);
789 }
790
791 setOperationAction(Op: ISD::GET_ROUNDING, VT: MVT::i32, Action: Custom);
792
793 // Codes for which we want to perform some z-specific combinations.
794 setTargetDAGCombine({ISD::ZERO_EXTEND,
795 ISD::SIGN_EXTEND,
796 ISD::SIGN_EXTEND_INREG,
797 ISD::LOAD,
798 ISD::STORE,
799 ISD::VECTOR_SHUFFLE,
800 ISD::EXTRACT_VECTOR_ELT,
801 ISD::FP_ROUND,
802 ISD::STRICT_FP_ROUND,
803 ISD::FP_EXTEND,
804 ISD::SINT_TO_FP,
805 ISD::UINT_TO_FP,
806 ISD::STRICT_FP_EXTEND,
807 ISD::FCOPYSIGN,
808 ISD::BSWAP,
809 ISD::SETCC,
810 ISD::SRL,
811 ISD::SRA,
812 ISD::MUL,
813 ISD::SDIV,
814 ISD::UDIV,
815 ISD::SREM,
816 ISD::UREM,
817 ISD::INTRINSIC_VOID,
818 ISD::INTRINSIC_W_CHAIN});
819
820 // Handle intrinsics.
821 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom);
822 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
823
824 // We're not using SJLJ for exception handling, but they're implemented
825 // solely to support use of __builtin_setjmp / __builtin_longjmp.
826 setOperationAction(Op: ISD::EH_SJLJ_SETJMP, VT: MVT::i32, Action: Custom);
827 setOperationAction(Op: ISD::EH_SJLJ_LONGJMP, VT: MVT::Other, Action: Custom);
828
829 // We want to use MVC in preference to even a single load/store pair.
830 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
831 MaxStoresPerMemcpyOptSize = 0;
832
833 // The main memset sequence is a byte store followed by an MVC.
834 // Two STC or MV..I stores win over that, but the kind of fused stores
835 // generated by target-independent code don't when the byte value is
836 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
837 // than "STC;MVC". Handle the choice in target-specific code instead.
838 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
839 MaxStoresPerMemsetOptSize = 0;
840
841 // Default to having -disable-strictnode-mutation on
842 IsStrictFPEnabled = true;
843}
844
845bool SystemZTargetLowering::useSoftFloat() const {
846 return Subtarget.hasSoftFloat();
847}
848
849unsigned SystemZTargetLowering::getVectorTypeBreakdownForCallingConv(
850 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
851 unsigned &NumIntermediates, MVT &RegisterVT) const {
852 // Pass fp16 vectors in VR(s).
853 if (Subtarget.hasVector() && VT.isVector() && VT.getScalarType() == MVT::f16) {
854 IntermediateVT = RegisterVT = MVT::v8f16;
855 return NumIntermediates =
856 divideCeil(Numerator: VT.getVectorNumElements(), Denominator: SystemZ::VectorBytes / 2);
857 }
858 return TargetLowering::getVectorTypeBreakdownForCallingConv(
859 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
860}
861
862MVT SystemZTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
863 CallingConv::ID CC,
864 EVT VT) const {
865 // 128-bit single-element vector types are passed like other vectors,
866 // not like their element type.
867 if (VT.isVector() && VT.getSizeInBits() == 128 &&
868 VT.getVectorNumElements() == 1)
869 return MVT::v16i8;
870 // Pass fp16 vectors in VR(s).
871 if (Subtarget.hasVector() && VT.isVector() && VT.getScalarType() == MVT::f16)
872 return MVT::v8f16;
873 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
874}
875
876unsigned SystemZTargetLowering::getNumRegistersForCallingConv(
877 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
878 // Pass fp16 vectors in VR(s).
879 if (Subtarget.hasVector() && VT.isVector() && VT.getScalarType() == MVT::f16)
880 return divideCeil(Numerator: VT.getVectorNumElements(), Denominator: SystemZ::VectorBytes / 2);
881 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
882}
883
884EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
885 LLVMContext &, EVT VT) const {
886 if (!VT.isVector())
887 return MVT::i32;
888 return VT.changeVectorElementTypeToInteger();
889}
890
891bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(
892 const MachineFunction &MF, EVT VT) const {
893 if (useSoftFloat())
894 return false;
895
896 VT = VT.getScalarType();
897
898 if (!VT.isSimple())
899 return false;
900
901 switch (VT.getSimpleVT().SimpleTy) {
902 case MVT::f32:
903 case MVT::f64:
904 return true;
905 case MVT::f128:
906 return Subtarget.hasVectorEnhancements1();
907 default:
908 break;
909 }
910
911 return false;
912}
913
914// Return true if the constant can be generated with a vector instruction,
915// such as VGM, VGMB or VREPI.
916bool SystemZVectorConstantInfo::isVectorConstantLegal(
917 const SystemZSubtarget &Subtarget) {
918 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
919 if (!Subtarget.hasVector() ||
920 (isFP128 && !Subtarget.hasVectorEnhancements1()))
921 return false;
922
923 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
924 // preferred way of creating all-zero and all-one vectors so give it
925 // priority over other methods below.
926 unsigned Mask = 0;
927 unsigned I = 0;
928 for (; I < SystemZ::VectorBytes; ++I) {
929 uint64_t Byte = IntBits.lshr(shiftAmt: I * 8).trunc(width: 8).getZExtValue();
930 if (Byte == 0xff)
931 Mask |= 1ULL << I;
932 else if (Byte != 0)
933 break;
934 }
935 if (I == SystemZ::VectorBytes) {
936 Opcode = SystemZISD::BYTE_MASK;
937 OpVals.push_back(Elt: Mask);
938 VecVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: 8), NumElements: 16);
939 return true;
940 }
941
942 if (SplatBitSize > 64)
943 return false;
944
945 auto TryValue = [&](uint64_t Value) -> bool {
946 // Try VECTOR REPLICATE IMMEDIATE
947 int64_t SignedValue = SignExtend64(X: Value, B: SplatBitSize);
948 if (isInt<16>(x: SignedValue)) {
949 OpVals.push_back(Elt: ((unsigned) SignedValue));
950 Opcode = SystemZISD::REPLICATE;
951 VecVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SplatBitSize),
952 NumElements: SystemZ::VectorBits / SplatBitSize);
953 return true;
954 }
955 // Try VECTOR GENERATE MASK
956 unsigned Start, End;
957 if (TII->isRxSBGMask(Mask: Value, BitSize: SplatBitSize, Start, End)) {
958 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
959 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
960 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
961 OpVals.push_back(Elt: Start - (64 - SplatBitSize));
962 OpVals.push_back(Elt: End - (64 - SplatBitSize));
963 Opcode = SystemZISD::ROTATE_MASK;
964 VecVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SplatBitSize),
965 NumElements: SystemZ::VectorBits / SplatBitSize);
966 return true;
967 }
968 return false;
969 };
970
971 // First try assuming that any undefined bits above the highest set bit
972 // and below the lowest set bit are 1s. This increases the likelihood of
973 // being able to use a sign-extended element value in VECTOR REPLICATE
974 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
975 uint64_t SplatBitsZ = SplatBits.getZExtValue();
976 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
977 unsigned LowerBits = llvm::countr_zero(Val: SplatBitsZ);
978 unsigned UpperBits = llvm::countl_zero(Val: SplatBitsZ);
979 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(N: LowerBits);
980 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(N: UpperBits);
981 if (TryValue(SplatBitsZ | Upper | Lower))
982 return true;
983
984 // Now try assuming that any undefined bits between the first and
985 // last defined set bits are set. This increases the chances of
986 // using a non-wraparound mask.
987 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
988 return TryValue(SplatBitsZ | Middle);
989}
990
991SystemZVectorConstantInfo::SystemZVectorConstantInfo(APInt IntImm) {
992 if (IntImm.isSingleWord()) {
993 IntBits = APInt(128, IntImm.getZExtValue());
994 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
995 } else
996 IntBits = IntImm;
997 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
998
999 // Find the smallest splat.
1000 SplatBits = IntImm;
1001 unsigned Width = SplatBits.getBitWidth();
1002 while (Width > 8) {
1003 unsigned HalfSize = Width / 2;
1004 APInt HighValue = SplatBits.lshr(shiftAmt: HalfSize).trunc(width: HalfSize);
1005 APInt LowValue = SplatBits.trunc(width: HalfSize);
1006
1007 // If the two halves do not match, stop here.
1008 if (HighValue != LowValue || 8 > HalfSize)
1009 break;
1010
1011 SplatBits = HighValue;
1012 Width = HalfSize;
1013 }
1014 SplatUndef = 0;
1015 SplatBitSize = Width;
1016}
1017
1018SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) {
1019 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
1020 bool HasAnyUndefs;
1021
1022 // Get IntBits by finding the 128 bit splat.
1023 BVN->isConstantSplat(SplatValue&: IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, MinSplatBits: 128,
1024 isBigEndian: true);
1025
1026 // Get SplatBits by finding the 8 bit or greater splat.
1027 BVN->isConstantSplat(SplatValue&: SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, MinSplatBits: 8,
1028 isBigEndian: true);
1029}
1030
1031bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
1032 bool ForCodeSize) const {
1033 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
1034 if (Imm.isZero() || Imm.isNegZero())
1035 return true;
1036
1037 return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
1038}
1039
1040MachineBasicBlock *
1041SystemZTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
1042 MachineBasicBlock *MBB) const {
1043 DebugLoc DL = MI.getDebugLoc();
1044 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1045 const SystemZRegisterInfo *TRI = Subtarget.getRegisterInfo();
1046
1047 MachineFunction *MF = MBB->getParent();
1048 MachineRegisterInfo &MRI = MF->getRegInfo();
1049
1050 const BasicBlock *BB = MBB->getBasicBlock();
1051 MachineFunction::iterator I = ++MBB->getIterator();
1052
1053 Register DstReg = MI.getOperand(i: 0).getReg();
1054 const TargetRegisterClass *RC = MRI.getRegClass(Reg: DstReg);
1055 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
1056 (void)TRI;
1057 Register MainDstReg = MRI.createVirtualRegister(RegClass: RC);
1058 Register RestoreDstReg = MRI.createVirtualRegister(RegClass: RC);
1059
1060 MVT PVT = getPointerTy(DL: MF->getDataLayout());
1061 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1062 // For v = setjmp(buf), we generate.
1063 // Algorithm:
1064 //
1065 // ---------
1066 // | thisMBB |
1067 // ---------
1068 // |
1069 // ------------------------
1070 // | |
1071 // ---------- ---------------
1072 // | mainMBB | | restoreMBB |
1073 // | v = 0 | | v = 1 |
1074 // ---------- ---------------
1075 // | |
1076 // -------------------------
1077 // |
1078 // -----------------------------
1079 // | sinkMBB |
1080 // | phi(v_mainMBB,v_restoreMBB) |
1081 // -----------------------------
1082 // thisMBB:
1083 // buf[FPOffset] = Frame Pointer if hasFP.
1084 // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB.
1085 // buf[BCOffset] = Backchain value if building with -mbackchain.
1086 // buf[SPOffset] = Stack Pointer.
1087 // buf[LPOffset] = We never write this slot with R13, gcc stores R13 always.
1088 // SjLjSetup restoreMBB
1089 // mainMBB:
1090 // v_main = 0
1091 // sinkMBB:
1092 // v = phi(v_main, v_restore)
1093 // restoreMBB:
1094 // v_restore = 1
1095
1096 MachineBasicBlock *ThisMBB = MBB;
1097 MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
1098 MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
1099 MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
1100
1101 MF->insert(MBBI: I, MBB: MainMBB);
1102 MF->insert(MBBI: I, MBB: SinkMBB);
1103 MF->push_back(MBB: RestoreMBB);
1104 RestoreMBB->setMachineBlockAddressTaken();
1105
1106 MachineInstrBuilder MIB;
1107
1108 // Transfer the remainder of BB and its successor edges to sinkMBB.
1109 SinkMBB->splice(Where: SinkMBB->begin(), Other: MBB,
1110 From: std::next(x: MachineBasicBlock::iterator(MI)), To: MBB->end());
1111 SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
1112
1113 // thisMBB:
1114 const int64_t FPOffset = 0; // Slot 1.
1115 const int64_t LabelOffset = 1 * PVT.getStoreSize(); // Slot 2.
1116 const int64_t BCOffset = 2 * PVT.getStoreSize(); // Slot 3.
1117 const int64_t SPOffset = 3 * PVT.getStoreSize(); // Slot 4.
1118
1119 // Buf address.
1120 Register BufReg = MI.getOperand(i: 1).getReg();
1121
1122 const TargetRegisterClass *PtrRC = getRegClassFor(VT: PVT);
1123 Register LabelReg = MRI.createVirtualRegister(RegClass: PtrRC);
1124
1125 // Prepare IP for longjmp.
1126 BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LARL), DestReg: LabelReg)
1127 .addMBB(MBB: RestoreMBB);
1128 // Store IP for return from jmp, slot 2, offset = 1.
1129 BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::STG))
1130 .addReg(RegNo: LabelReg)
1131 .addReg(RegNo: BufReg)
1132 .addImm(Val: LabelOffset)
1133 .addReg(RegNo: 0);
1134
1135 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1136 bool HasFP = Subtarget.getFrameLowering()->hasFP(MF: *MF);
1137 if (HasFP) {
1138 BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::STG))
1139 .addReg(RegNo: SpecialRegs->getFramePointerRegister())
1140 .addReg(RegNo: BufReg)
1141 .addImm(Val: FPOffset)
1142 .addReg(RegNo: 0);
1143 }
1144
1145 // Store SP.
1146 BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::STG))
1147 .addReg(RegNo: SpecialRegs->getStackPointerRegister())
1148 .addReg(RegNo: BufReg)
1149 .addImm(Val: SPOffset)
1150 .addReg(RegNo: 0);
1151
1152 // Slot 3(Offset = 2) Backchain value (if building with -mbackchain).
1153 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1154 if (BackChain) {
1155 Register BCReg = MRI.createVirtualRegister(RegClass: PtrRC);
1156 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1157 MIB = BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LG), DestReg: BCReg)
1158 .addReg(RegNo: SpecialRegs->getStackPointerRegister())
1159 .addImm(Val: TFL->getBackchainOffset(MF&: *MF))
1160 .addReg(RegNo: 0);
1161
1162 BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::STG))
1163 .addReg(RegNo: BCReg)
1164 .addReg(RegNo: BufReg)
1165 .addImm(Val: BCOffset)
1166 .addReg(RegNo: 0);
1167 }
1168
1169 // Setup.
1170 MIB = BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::EH_SjLj_Setup))
1171 .addMBB(MBB: RestoreMBB);
1172
1173 const SystemZRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1174 MIB.addRegMask(Mask: RegInfo->getNoPreservedMask());
1175
1176 ThisMBB->addSuccessor(Succ: MainMBB);
1177 ThisMBB->addSuccessor(Succ: RestoreMBB);
1178
1179 // mainMBB:
1180 BuildMI(BB: MainMBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LHI), DestReg: MainDstReg).addImm(Val: 0);
1181 MainMBB->addSuccessor(Succ: SinkMBB);
1182
1183 // sinkMBB:
1184 BuildMI(BB&: *SinkMBB, I: SinkMBB->begin(), MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: DstReg)
1185 .addReg(RegNo: MainDstReg)
1186 .addMBB(MBB: MainMBB)
1187 .addReg(RegNo: RestoreDstReg)
1188 .addMBB(MBB: RestoreMBB);
1189
1190 // restoreMBB.
1191 BuildMI(BB: RestoreMBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LHI), DestReg: RestoreDstReg).addImm(Val: 1);
1192 BuildMI(BB: RestoreMBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::J)).addMBB(MBB: SinkMBB);
1193 RestoreMBB->addSuccessor(Succ: SinkMBB);
1194
1195 MI.eraseFromParent();
1196
1197 return SinkMBB;
1198}
1199
1200MachineBasicBlock *
1201SystemZTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
1202 MachineBasicBlock *MBB) const {
1203
1204 DebugLoc DL = MI.getDebugLoc();
1205 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1206
1207 MachineFunction *MF = MBB->getParent();
1208 MachineRegisterInfo &MRI = MF->getRegInfo();
1209
1210 MVT PVT = getPointerTy(DL: MF->getDataLayout());
1211 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1212 Register BufReg = MI.getOperand(i: 0).getReg();
1213 const TargetRegisterClass *RC = MRI.getRegClass(Reg: BufReg);
1214 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1215
1216 Register Tmp = MRI.createVirtualRegister(RegClass: RC);
1217 Register BCReg = MRI.createVirtualRegister(RegClass: RC);
1218
1219 MachineInstrBuilder MIB;
1220
1221 const int64_t FPOffset = 0;
1222 const int64_t LabelOffset = 1 * PVT.getStoreSize();
1223 const int64_t BCOffset = 2 * PVT.getStoreSize();
1224 const int64_t SPOffset = 3 * PVT.getStoreSize();
1225 const int64_t LPOffset = 4 * PVT.getStoreSize();
1226
1227 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LG), DestReg: Tmp)
1228 .addReg(RegNo: BufReg)
1229 .addImm(Val: LabelOffset)
1230 .addReg(RegNo: 0);
1231
1232 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LG),
1233 DestReg: SpecialRegs->getFramePointerRegister())
1234 .addReg(RegNo: BufReg)
1235 .addImm(Val: FPOffset)
1236 .addReg(RegNo: 0);
1237
1238 // We are restoring R13 even though we never stored in setjmp from llvm,
1239 // as gcc always stores R13 in builtin_setjmp. We could have mixed code
1240 // gcc setjmp and llvm longjmp.
1241 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LG), DestReg: SystemZ::R13D)
1242 .addReg(RegNo: BufReg)
1243 .addImm(Val: LPOffset)
1244 .addReg(RegNo: 0);
1245
1246 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1247 if (BackChain) {
1248 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LG), DestReg: BCReg)
1249 .addReg(RegNo: BufReg)
1250 .addImm(Val: BCOffset)
1251 .addReg(RegNo: 0);
1252 }
1253
1254 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LG),
1255 DestReg: SpecialRegs->getStackPointerRegister())
1256 .addReg(RegNo: BufReg)
1257 .addImm(Val: SPOffset)
1258 .addReg(RegNo: 0);
1259
1260 if (BackChain) {
1261 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1262 BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::STG))
1263 .addReg(RegNo: BCReg)
1264 .addReg(RegNo: SpecialRegs->getStackPointerRegister())
1265 .addImm(Val: TFL->getBackchainOffset(MF&: *MF))
1266 .addReg(RegNo: 0);
1267 }
1268
1269 MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BR)).addReg(RegNo: Tmp);
1270
1271 MI.eraseFromParent();
1272 return MBB;
1273}
1274
1275/// Returns true if stack probing through inline assembly is requested.
1276bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {
1277 // If the function specifically requests inline stack probes, emit them.
1278 if (MF.getFunction().hasFnAttribute(Kind: "probe-stack"))
1279 return MF.getFunction().getFnAttribute(Kind: "probe-stack").getValueAsString() ==
1280 "inline-asm";
1281 return false;
1282}
1283
1284TargetLowering::AtomicExpansionKind
1285SystemZTargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {
1286 return AtomicExpansionKind::None;
1287}
1288
1289TargetLowering::AtomicExpansionKind
1290SystemZTargetLowering::shouldCastAtomicStoreInIR(StoreInst *SI) const {
1291 return AtomicExpansionKind::None;
1292}
1293
1294TargetLowering::AtomicExpansionKind
1295SystemZTargetLowering::shouldExpandAtomicRMWInIR(
1296 const AtomicRMWInst *RMW) const {
1297 // Don't expand subword operations as they require special treatment.
1298 if (RMW->getType()->isIntegerTy(Bitwidth: 8) || RMW->getType()->isIntegerTy(Bitwidth: 16))
1299 return AtomicExpansionKind::None;
1300
1301 // Don't expand if there is a target instruction available.
1302 if (Subtarget.hasInterlockedAccess1() &&
1303 (RMW->getType()->isIntegerTy(Bitwidth: 32) || RMW->getType()->isIntegerTy(Bitwidth: 64)) &&
1304 (RMW->getOperation() == AtomicRMWInst::BinOp::Add ||
1305 RMW->getOperation() == AtomicRMWInst::BinOp::Sub ||
1306 RMW->getOperation() == AtomicRMWInst::BinOp::And ||
1307 RMW->getOperation() == AtomicRMWInst::BinOp::Or ||
1308 RMW->getOperation() == AtomicRMWInst::BinOp::Xor))
1309 return AtomicExpansionKind::None;
1310
1311 return AtomicExpansionKind::CmpXChg;
1312}
1313
1314bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
1315 // We can use CGFI or CLGFI.
1316 return isInt<32>(x: Imm) || isUInt<32>(x: Imm);
1317}
1318
1319bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
1320 // We can use ALGFI or SLGFI.
1321 return isUInt<32>(x: Imm) || isUInt<32>(x: -Imm);
1322}
1323
1324bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
1325 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
1326 // Unaligned accesses should never be slower than the expanded version.
1327 // We check specifically for aligned accesses in the few cases where
1328 // they are required.
1329 if (Fast)
1330 *Fast = 1;
1331 return true;
1332}
1333
1334bool SystemZTargetLowering::hasAndNot(SDValue Y) const {
1335 EVT VT = Y.getValueType();
1336
1337 // We can use NC(G)RK for types in GPRs ...
1338 if (VT == MVT::i32 || VT == MVT::i64)
1339 return Subtarget.hasMiscellaneousExtensions3();
1340
1341 // ... or VNC for types in VRs.
1342 if (VT.isVector() || VT == MVT::i128)
1343 return Subtarget.hasVector();
1344
1345 return false;
1346}
1347
1348// Information about the addressing mode for a memory access.
1349struct AddressingMode {
1350 // True if a long displacement is supported.
1351 bool LongDisplacement;
1352
1353 // True if use of index register is supported.
1354 bool IndexReg;
1355
1356 AddressingMode(bool LongDispl, bool IdxReg) :
1357 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
1358};
1359
1360// Return the desired addressing mode for a Load which has only one use (in
1361// the same block) which is a Store.
1362static AddressingMode getLoadStoreAddrMode(bool HasVector,
1363 Type *Ty) {
1364 // With vector support a Load->Store combination may be combined to either
1365 // an MVC or vector operations and it seems to work best to allow the
1366 // vector addressing mode.
1367 if (HasVector)
1368 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1369
1370 // Otherwise only the MVC case is special.
1371 bool MVC = Ty->isIntegerTy(Bitwidth: 8);
1372 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1373}
1374
1375// Return the addressing mode which seems most desirable given an LLVM
1376// Instruction pointer.
1377static AddressingMode
1378supportedAddressingMode(Instruction *I, bool HasVector) {
1379 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
1380 switch (II->getIntrinsicID()) {
1381 default: break;
1382 case Intrinsic::memset:
1383 case Intrinsic::memmove:
1384 case Intrinsic::memcpy:
1385 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1386 }
1387 }
1388
1389 if (isa<LoadInst>(Val: I) && I->hasOneUse()) {
1390 auto *SingleUser = cast<Instruction>(Val: *I->user_begin());
1391 if (SingleUser->getParent() == I->getParent()) {
1392 if (isa<ICmpInst>(Val: SingleUser)) {
1393 if (auto *C = dyn_cast<ConstantInt>(Val: SingleUser->getOperand(i: 1)))
1394 if (C->getBitWidth() <= 64 &&
1395 (isInt<16>(x: C->getSExtValue()) || isUInt<16>(x: C->getZExtValue())))
1396 // Comparison of memory with 16 bit signed / unsigned immediate
1397 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1398 } else if (isa<StoreInst>(Val: SingleUser))
1399 // Load->Store
1400 return getLoadStoreAddrMode(HasVector, Ty: I->getType());
1401 }
1402 } else if (auto *StoreI = dyn_cast<StoreInst>(Val: I)) {
1403 if (auto *LoadI = dyn_cast<LoadInst>(Val: StoreI->getValueOperand()))
1404 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1405 // Load->Store
1406 return getLoadStoreAddrMode(HasVector, Ty: LoadI->getType());
1407 }
1408
1409 if (HasVector && (isa<LoadInst>(Val: I) || isa<StoreInst>(Val: I))) {
1410
1411 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1412 // dependencies (LDE only supports small offsets).
1413 // * Utilize the vector registers to hold floating point
1414 // values (vector load / store instructions only support small
1415 // offsets).
1416
1417 Type *MemAccessTy = (isa<LoadInst>(Val: I) ? I->getType() :
1418 I->getOperand(i: 0)->getType());
1419 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1420 bool IsVectorAccess = MemAccessTy->isVectorTy();
1421
1422 // A store of an extracted vector element will be combined into a VSTE type
1423 // instruction.
1424 if (!IsVectorAccess && isa<StoreInst>(Val: I)) {
1425 Value *DataOp = I->getOperand(i: 0);
1426 if (isa<ExtractElementInst>(Val: DataOp))
1427 IsVectorAccess = true;
1428 }
1429
1430 // A load which gets inserted into a vector element will be combined into a
1431 // VLE type instruction.
1432 if (!IsVectorAccess && isa<LoadInst>(Val: I) && I->hasOneUse()) {
1433 User *LoadUser = *I->user_begin();
1434 if (isa<InsertElementInst>(Val: LoadUser))
1435 IsVectorAccess = true;
1436 }
1437
1438 if (IsFPAccess || IsVectorAccess)
1439 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1440 }
1441
1442 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1443}
1444
1445bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1446 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1447 // Punt on globals for now, although they can be used in limited
1448 // RELATIVE LONG cases.
1449 if (AM.BaseGV)
1450 return false;
1451
1452 // Require a 20-bit signed offset.
1453 if (!isInt<20>(x: AM.BaseOffs))
1454 return false;
1455
1456 bool RequireD12 =
1457 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(Bitwidth: 128));
1458 AddressingMode SupportedAM(!RequireD12, true);
1459 if (I != nullptr)
1460 SupportedAM = supportedAddressingMode(I, HasVector: Subtarget.hasVector());
1461
1462 if (!SupportedAM.LongDisplacement && !isUInt<12>(x: AM.BaseOffs))
1463 return false;
1464
1465 if (!SupportedAM.IndexReg)
1466 // No indexing allowed.
1467 return AM.Scale == 0;
1468 else
1469 // Indexing is OK but no scale factor can be applied.
1470 return AM.Scale == 0 || AM.Scale == 1;
1471}
1472
1473bool SystemZTargetLowering::findOptimalMemOpLowering(
1474 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
1475 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
1476 const AttributeList &FuncAttributes, EVT *LargestVT) const {
1477 const int MVCFastLen = 16;
1478
1479 if (Limit != ~unsigned(0)) {
1480 // Don't expand Op into scalar loads/stores in these cases:
1481 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1482 return false; // Small memcpy: Use MVC
1483 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1484 return false; // Small memset (first byte with STC/MVI): Use MVC
1485 if (Op.isZeroMemset())
1486 return false; // Memset zero: Use XC
1487 }
1488
1489 return TargetLowering::findOptimalMemOpLowering(
1490 Context, MemOps, Limit, Op, DstAS, SrcAS, FuncAttributes, LargestVT);
1491}
1492
1493EVT SystemZTargetLowering::getOptimalMemOpType(
1494 LLVMContext &Context, const MemOp &Op,
1495 const AttributeList &FuncAttributes) const {
1496 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1497}
1498
1499bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1500 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1501 return false;
1502 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1503 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1504 return FromBits > ToBits;
1505}
1506
1507bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {
1508 if (!FromVT.isInteger() || !ToVT.isInteger())
1509 return false;
1510 unsigned FromBits = FromVT.getFixedSizeInBits();
1511 unsigned ToBits = ToVT.getFixedSizeInBits();
1512 return FromBits > ToBits;
1513}
1514
1515//===----------------------------------------------------------------------===//
1516// Inline asm support
1517//===----------------------------------------------------------------------===//
1518
1519TargetLowering::ConstraintType
1520SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
1521 if (Constraint.size() == 1) {
1522 switch (Constraint[0]) {
1523 case 'a': // Address register
1524 case 'd': // Data register (equivalent to 'r')
1525 case 'f': // Floating-point register
1526 case 'h': // High-part register
1527 case 'r': // General-purpose register
1528 case 'v': // Vector register
1529 return C_RegisterClass;
1530
1531 case 'Q': // Memory with base and unsigned 12-bit displacement
1532 case 'R': // Likewise, plus an index
1533 case 'S': // Memory with base and signed 20-bit displacement
1534 case 'T': // Likewise, plus an index
1535 case 'm': // Equivalent to 'T'.
1536 return C_Memory;
1537
1538 case 'I': // Unsigned 8-bit constant
1539 case 'J': // Unsigned 12-bit constant
1540 case 'K': // Signed 16-bit constant
1541 case 'L': // Signed 20-bit displacement (on all targets we support)
1542 case 'M': // 0x7fffffff
1543 return C_Immediate;
1544
1545 default:
1546 break;
1547 }
1548 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1549 switch (Constraint[1]) {
1550 case 'Q': // Address with base and unsigned 12-bit displacement
1551 case 'R': // Likewise, plus an index
1552 case 'S': // Address with base and signed 20-bit displacement
1553 case 'T': // Likewise, plus an index
1554 return C_Address;
1555
1556 default:
1557 break;
1558 }
1559 } else if (Constraint.size() == 5 && Constraint.starts_with(Prefix: "{")) {
1560 if (StringRef("{@cc}").compare(RHS: Constraint) == 0)
1561 return C_Other;
1562 }
1563 return TargetLowering::getConstraintType(Constraint);
1564}
1565
1566TargetLowering::ConstraintWeight
1567SystemZTargetLowering::getSingleConstraintMatchWeight(
1568 AsmOperandInfo &Info, const char *Constraint) const {
1569 ConstraintWeight Weight = CW_Invalid;
1570 Value *CallOperandVal = Info.CallOperandVal;
1571 // If we don't have a value, we can't do a match,
1572 // but allow it at the lowest weight.
1573 if (!CallOperandVal)
1574 return CW_Default;
1575 Type *type = CallOperandVal->getType();
1576 // Look at the constraint type.
1577 switch (*Constraint) {
1578 default:
1579 Weight = TargetLowering::getSingleConstraintMatchWeight(info&: Info, constraint: Constraint);
1580 break;
1581
1582 case 'a': // Address register
1583 case 'd': // Data register (equivalent to 'r')
1584 case 'h': // High-part register
1585 case 'r': // General-purpose register
1586 Weight =
1587 CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1588 break;
1589
1590 case 'f': // Floating-point register
1591 if (!useSoftFloat())
1592 Weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1593 break;
1594
1595 case 'v': // Vector register
1596 if (Subtarget.hasVector())
1597 Weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1598 : CW_Default;
1599 break;
1600
1601 case 'I': // Unsigned 8-bit constant
1602 if (auto *C = dyn_cast<ConstantInt>(Val: CallOperandVal))
1603 if (isUInt<8>(x: C->getZExtValue()))
1604 Weight = CW_Constant;
1605 break;
1606
1607 case 'J': // Unsigned 12-bit constant
1608 if (auto *C = dyn_cast<ConstantInt>(Val: CallOperandVal))
1609 if (isUInt<12>(x: C->getZExtValue()))
1610 Weight = CW_Constant;
1611 break;
1612
1613 case 'K': // Signed 16-bit constant
1614 if (auto *C = dyn_cast<ConstantInt>(Val: CallOperandVal))
1615 if (isInt<16>(x: C->getSExtValue()))
1616 Weight = CW_Constant;
1617 break;
1618
1619 case 'L': // Signed 20-bit displacement (on all targets we support)
1620 if (auto *C = dyn_cast<ConstantInt>(Val: CallOperandVal))
1621 if (isInt<20>(x: C->getSExtValue()))
1622 Weight = CW_Constant;
1623 break;
1624
1625 case 'M': // 0x7fffffff
1626 if (auto *C = dyn_cast<ConstantInt>(Val: CallOperandVal))
1627 if (C->getZExtValue() == 0x7fffffff)
1628 Weight = CW_Constant;
1629 break;
1630 }
1631 return Weight;
1632}
1633
1634// Parse a "{tNNN}" register constraint for which the register type "t"
1635// has already been verified. MC is the class associated with "t" and
1636// Map maps 0-based register numbers to LLVM register numbers.
1637static std::pair<unsigned, const TargetRegisterClass *>
1638parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC,
1639 const unsigned *Map, unsigned Size) {
1640 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1641 if (isdigit(Constraint[2])) {
1642 unsigned Index;
1643 bool Failed =
1644 Constraint.slice(Start: 2, End: Constraint.size() - 1).getAsInteger(Radix: 10, Result&: Index);
1645 if (!Failed && Index < Size && Map[Index])
1646 return std::make_pair(x: Map[Index], y&: RC);
1647 }
1648 return std::make_pair(x: 0U, y: nullptr);
1649}
1650
1651std::pair<unsigned, const TargetRegisterClass *>
1652SystemZTargetLowering::getRegForInlineAsmConstraint(
1653 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1654 if (Constraint.size() == 1) {
1655 // GCC Constraint Letters
1656 switch (Constraint[0]) {
1657 default: break;
1658 case 'd': // Data register (equivalent to 'r')
1659 case 'r': // General-purpose register
1660 if (VT.getSizeInBits() == 64)
1661 return std::make_pair(x: 0U, y: &SystemZ::GR64BitRegClass);
1662 else if (VT.getSizeInBits() == 128)
1663 return std::make_pair(x: 0U, y: &SystemZ::GR128BitRegClass);
1664 return std::make_pair(x: 0U, y: &SystemZ::GR32BitRegClass);
1665
1666 case 'a': // Address register
1667 if (VT == MVT::i64)
1668 return std::make_pair(x: 0U, y: &SystemZ::ADDR64BitRegClass);
1669 else if (VT == MVT::i128)
1670 return std::make_pair(x: 0U, y: &SystemZ::ADDR128BitRegClass);
1671 return std::make_pair(x: 0U, y: &SystemZ::ADDR32BitRegClass);
1672
1673 case 'h': // High-part register (an LLVM extension)
1674 return std::make_pair(x: 0U, y: &SystemZ::GRH32BitRegClass);
1675
1676 case 'f': // Floating-point register
1677 if (!useSoftFloat()) {
1678 if (VT.getSizeInBits() == 16)
1679 return std::make_pair(x: 0U, y: &SystemZ::FP16BitRegClass);
1680 else if (VT.getSizeInBits() == 64)
1681 return std::make_pair(x: 0U, y: &SystemZ::FP64BitRegClass);
1682 else if (VT.getSizeInBits() == 128)
1683 return std::make_pair(x: 0U, y: &SystemZ::FP128BitRegClass);
1684 return std::make_pair(x: 0U, y: &SystemZ::FP32BitRegClass);
1685 }
1686 break;
1687
1688 case 'v': // Vector register
1689 if (Subtarget.hasVector()) {
1690 if (VT.getSizeInBits() == 16)
1691 return std::make_pair(x: 0U, y: &SystemZ::VR16BitRegClass);
1692 if (VT.getSizeInBits() == 32)
1693 return std::make_pair(x: 0U, y: &SystemZ::VR32BitRegClass);
1694 if (VT.getSizeInBits() == 64)
1695 return std::make_pair(x: 0U, y: &SystemZ::VR64BitRegClass);
1696 return std::make_pair(x: 0U, y: &SystemZ::VR128BitRegClass);
1697 }
1698 break;
1699 }
1700 }
1701 if (Constraint.starts_with(Prefix: "{")) {
1702
1703 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1704 // to check the size on.
1705 auto getVTSizeInBits = [&VT]() {
1706 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1707 };
1708
1709 // We need to override the default register parsing for GPRs and FPRs
1710 // because the interpretation depends on VT. The internal names of
1711 // the registers are also different from the external names
1712 // (F0D and F0S instead of F0, etc.).
1713 if (Constraint[1] == 'r') {
1714 if (getVTSizeInBits() == 32)
1715 return parseRegisterNumber(Constraint, RC: &SystemZ::GR32BitRegClass,
1716 Map: SystemZMC::GR32Regs, Size: 16);
1717 if (getVTSizeInBits() == 128)
1718 return parseRegisterNumber(Constraint, RC: &SystemZ::GR128BitRegClass,
1719 Map: SystemZMC::GR128Regs, Size: 16);
1720 return parseRegisterNumber(Constraint, RC: &SystemZ::GR64BitRegClass,
1721 Map: SystemZMC::GR64Regs, Size: 16);
1722 }
1723 if (Constraint[1] == 'f') {
1724 if (useSoftFloat())
1725 return std::make_pair(
1726 x: 0u, y: static_cast<const TargetRegisterClass *>(nullptr));
1727 if (getVTSizeInBits() == 16)
1728 return parseRegisterNumber(Constraint, RC: &SystemZ::FP16BitRegClass,
1729 Map: SystemZMC::FP16Regs, Size: 16);
1730 if (getVTSizeInBits() == 32)
1731 return parseRegisterNumber(Constraint, RC: &SystemZ::FP32BitRegClass,
1732 Map: SystemZMC::FP32Regs, Size: 16);
1733 if (getVTSizeInBits() == 128)
1734 return parseRegisterNumber(Constraint, RC: &SystemZ::FP128BitRegClass,
1735 Map: SystemZMC::FP128Regs, Size: 16);
1736 return parseRegisterNumber(Constraint, RC: &SystemZ::FP64BitRegClass,
1737 Map: SystemZMC::FP64Regs, Size: 16);
1738 }
1739 if (Constraint[1] == 'v') {
1740 if (!Subtarget.hasVector())
1741 return std::make_pair(
1742 x: 0u, y: static_cast<const TargetRegisterClass *>(nullptr));
1743 if (getVTSizeInBits() == 16)
1744 return parseRegisterNumber(Constraint, RC: &SystemZ::VR16BitRegClass,
1745 Map: SystemZMC::VR16Regs, Size: 32);
1746 if (getVTSizeInBits() == 32)
1747 return parseRegisterNumber(Constraint, RC: &SystemZ::VR32BitRegClass,
1748 Map: SystemZMC::VR32Regs, Size: 32);
1749 if (getVTSizeInBits() == 64)
1750 return parseRegisterNumber(Constraint, RC: &SystemZ::VR64BitRegClass,
1751 Map: SystemZMC::VR64Regs, Size: 32);
1752 return parseRegisterNumber(Constraint, RC: &SystemZ::VR128BitRegClass,
1753 Map: SystemZMC::VR128Regs, Size: 32);
1754 }
1755 if (Constraint[1] == '@') {
1756 if (StringRef("{@cc}").compare(RHS: Constraint) == 0)
1757 return std::make_pair(x: SystemZ::CC, y: &SystemZ::CCRRegClass);
1758 }
1759 }
1760 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1761}
1762
1763// FIXME? Maybe this could be a TableGen attribute on some registers and
1764// this table could be generated automatically from RegInfo.
1765Register
1766SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT,
1767 const MachineFunction &MF) const {
1768 Register Reg =
1769 StringSwitch<Register>(RegName)
1770 .Case(S: "r4", Value: Subtarget.isTargetXPLINK64() ? SystemZ::R4D
1771 : SystemZ::NoRegister)
1772 .Case(S: "r15",
1773 Value: Subtarget.isTargetELF() ? SystemZ::R15D : SystemZ::NoRegister)
1774 .Default(Value: Register());
1775
1776 return Reg;
1777}
1778
1779Register SystemZTargetLowering::getExceptionPointerRegister(
1780 const Constant *PersonalityFn) const {
1781 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1782}
1783
1784Register SystemZTargetLowering::getExceptionSelectorRegister(
1785 const Constant *PersonalityFn) const {
1786 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1787}
1788
1789// Convert condition code in CCReg to an i32 value.
1790static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) {
1791 SDLoc DL(CCReg);
1792 SDValue IPM = DAG.getNode(Opcode: SystemZISD::IPM, DL, VT: MVT::i32, Operand: CCReg);
1793 return DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i32, N1: IPM,
1794 N2: DAG.getConstant(Val: SystemZ::IPM_CC, DL, VT: MVT::i32));
1795}
1796
1797// Lower @cc targets via setcc.
1798SDValue SystemZTargetLowering::LowerAsmOutputForConstraint(
1799 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
1800 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
1801 if (StringRef("{@cc}").compare(RHS: OpInfo.ConstraintCode) != 0)
1802 return SDValue();
1803
1804 // Check that return type is valid.
1805 if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
1806 OpInfo.ConstraintVT.getSizeInBits() < 8)
1807 report_fatal_error(reason: "Glue output operand is of invalid type");
1808
1809 if (Glue.getNode()) {
1810 Glue = DAG.getCopyFromReg(Chain, dl: DL, Reg: SystemZ::CC, VT: MVT::i32, Glue);
1811 Chain = Glue.getValue(R: 1);
1812 } else
1813 Glue = DAG.getCopyFromReg(Chain, dl: DL, Reg: SystemZ::CC, VT: MVT::i32);
1814 return getCCResult(DAG, CCReg: Glue);
1815}
1816
1817void SystemZTargetLowering::LowerAsmOperandForConstraint(
1818 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1819 SelectionDAG &DAG) const {
1820 // Only support length 1 constraints for now.
1821 if (Constraint.size() == 1) {
1822 switch (Constraint[0]) {
1823 case 'I': // Unsigned 8-bit constant
1824 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op))
1825 if (isUInt<8>(x: C->getZExtValue()))
1826 Ops.push_back(x: DAG.getTargetConstant(Val: C->getZExtValue(), DL: SDLoc(Op),
1827 VT: Op.getValueType()));
1828 return;
1829
1830 case 'J': // Unsigned 12-bit constant
1831 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op))
1832 if (isUInt<12>(x: C->getZExtValue()))
1833 Ops.push_back(x: DAG.getTargetConstant(Val: C->getZExtValue(), DL: SDLoc(Op),
1834 VT: Op.getValueType()));
1835 return;
1836
1837 case 'K': // Signed 16-bit constant
1838 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op))
1839 if (isInt<16>(x: C->getSExtValue()))
1840 Ops.push_back(x: DAG.getSignedTargetConstant(
1841 Val: C->getSExtValue(), DL: SDLoc(Op), VT: Op.getValueType()));
1842 return;
1843
1844 case 'L': // Signed 20-bit displacement (on all targets we support)
1845 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op))
1846 if (isInt<20>(x: C->getSExtValue()))
1847 Ops.push_back(x: DAG.getSignedTargetConstant(
1848 Val: C->getSExtValue(), DL: SDLoc(Op), VT: Op.getValueType()));
1849 return;
1850
1851 case 'M': // 0x7fffffff
1852 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op))
1853 if (C->getZExtValue() == 0x7fffffff)
1854 Ops.push_back(x: DAG.getTargetConstant(Val: C->getZExtValue(), DL: SDLoc(Op),
1855 VT: Op.getValueType()));
1856 return;
1857 }
1858 }
1859 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1860}
1861
1862//===----------------------------------------------------------------------===//
1863// Calling conventions
1864//===----------------------------------------------------------------------===//
1865
1866#include "SystemZGenCallingConv.inc"
1867
1868const MCPhysReg *SystemZTargetLowering::getScratchRegisters(
1869 CallingConv::ID) const {
1870 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1871 SystemZ::R14D, 0 };
1872 return ScratchRegs;
1873}
1874
1875bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
1876 Type *ToType) const {
1877 return isTruncateFree(FromType, ToType);
1878}
1879
1880bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1881 return CI->isTailCall();
1882}
1883
1884// Value is a value that has been passed to us in the location described by VA
1885// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1886// any loads onto Chain.
1887static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL,
1888 CCValAssign &VA, SDValue Chain,
1889 SDValue Value) {
1890 // If the argument has been promoted from a smaller type, insert an
1891 // assertion to capture this.
1892 if (VA.getLocInfo() == CCValAssign::SExt)
1893 Value = DAG.getNode(Opcode: ISD::AssertSext, DL, VT: VA.getLocVT(), N1: Value,
1894 N2: DAG.getValueType(VA.getValVT()));
1895 else if (VA.getLocInfo() == CCValAssign::ZExt)
1896 Value = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: VA.getLocVT(), N1: Value,
1897 N2: DAG.getValueType(VA.getValVT()));
1898
1899 if (VA.isExtInLoc())
1900 Value = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VA.getValVT(), Operand: Value);
1901 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1902 // If this is a short vector argument loaded from the stack,
1903 // extend from i64 to full vector size and then bitcast.
1904 assert(VA.getLocVT() == MVT::i64);
1905 assert(VA.getValVT().isVector());
1906 Value = DAG.getBuildVector(VT: MVT::v2i64, DL, Ops: {Value, DAG.getUNDEF(VT: MVT::i64)});
1907 Value = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VA.getValVT(), Operand: Value);
1908 } else
1909 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1910 return Value;
1911}
1912
1913// Value is a value of type VA.getValVT() that we need to copy into
1914// the location described by VA. Return a copy of Value converted to
1915// VA.getValVT(). The caller is responsible for handling indirect values.
1916static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
1917 CCValAssign &VA, SDValue Value) {
1918 switch (VA.getLocInfo()) {
1919 case CCValAssign::SExt:
1920 return DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: VA.getLocVT(), Operand: Value);
1921 case CCValAssign::ZExt:
1922 return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: VA.getLocVT(), Operand: Value);
1923 case CCValAssign::AExt:
1924 return DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: VA.getLocVT(), Operand: Value);
1925 case CCValAssign::BCvt: {
1926 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1927 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1928 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1929 // For an f32 vararg we need to first promote it to an f64 and then
1930 // bitcast it to an i64.
1931 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1932 Value = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: MVT::f64, Operand: Value);
1933 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1934 ? MVT::v2i64
1935 : VA.getLocVT();
1936 Value = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: BitCastToType, Operand: Value);
1937 // For ELF, this is a short vector argument to be stored to the stack,
1938 // bitcast to v2i64 and then extract first element.
1939 if (BitCastToType == MVT::v2i64)
1940 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: VA.getLocVT(), N1: Value,
1941 N2: DAG.getConstant(Val: 0, DL, VT: MVT::i32));
1942 return Value;
1943 }
1944 case CCValAssign::Full:
1945 return Value;
1946 default:
1947 llvm_unreachable("Unhandled getLocInfo()");
1948 }
1949}
1950
1951static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) {
1952 SDLoc DL(In);
1953 SDValue Lo, Hi;
1954 if (DAG.getTargetLoweringInfo().isTypeLegal(VT: MVT::i128)) {
1955 Lo = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i64, Operand: In);
1956 Hi = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i64,
1957 Operand: DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i128, N1: In,
1958 N2: DAG.getConstant(Val: 64, DL, VT: MVT::i32)));
1959 } else {
1960 std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: In, DL, LoVT: MVT::i64, HiVT: MVT::i64);
1961 }
1962
1963 // FIXME: If v2i64 were a legal type, we could use it instead of
1964 // Untyped here. This might enable improved folding.
1965 SDNode *Pair = DAG.getMachineNode(Opcode: SystemZ::PAIR128, dl: DL,
1966 VT: MVT::Untyped, Op1: Hi, Op2: Lo);
1967 return SDValue(Pair, 0);
1968}
1969
1970static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) {
1971 SDLoc DL(In);
1972 SDValue Hi = DAG.getTargetExtractSubreg(SRIdx: SystemZ::subreg_h64,
1973 DL, VT: MVT::i64, Operand: In);
1974 SDValue Lo = DAG.getTargetExtractSubreg(SRIdx: SystemZ::subreg_l64,
1975 DL, VT: MVT::i64, Operand: In);
1976
1977 if (DAG.getTargetLoweringInfo().isTypeLegal(VT: MVT::i128)) {
1978 Lo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i128, Operand: Lo);
1979 Hi = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i128, Operand: Hi);
1980 Hi = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i128, N1: Hi,
1981 N2: DAG.getConstant(Val: 64, DL, VT: MVT::i32));
1982 return DAG.getNode(Opcode: ISD::OR, DL, VT: MVT::i128, N1: Lo, N2: Hi);
1983 } else {
1984 return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VT: MVT::i128, N1: Lo, N2: Hi);
1985 }
1986}
1987
1988bool SystemZTargetLowering::splitValueIntoRegisterParts(
1989 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1990 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1991 EVT ValueVT = Val.getValueType();
1992 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1993 // Inline assembly operand.
1994 Parts[0] = lowerI128ToGR128(DAG, In: DAG.getBitcast(VT: MVT::i128, V: Val));
1995 return true;
1996 }
1997
1998 return false;
1999}
2000
2001SDValue SystemZTargetLowering::joinRegisterPartsIntoValue(
2002 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
2003 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
2004 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
2005 // Inline assembly operand.
2006 SDValue Res = lowerGR128ToI128(DAG, In: Parts[0]);
2007 return DAG.getBitcast(VT: ValueVT, V: Res);
2008 }
2009
2010 return SDValue();
2011}
2012
2013// The first part of a split stack argument is at index I in Args (and
2014// ArgLocs). Return the type of a part and the number of them by reference.
2015template <class ArgTy>
2016static bool analyzeArgSplit(const SmallVectorImpl<ArgTy> &Args,
2017 SmallVector<CCValAssign, 16> &ArgLocs, unsigned I,
2018 MVT &PartVT, unsigned &NumParts) {
2019 if (!Args[I].Flags.isSplit())
2020 return false;
2021 assert(I < ArgLocs.size() && ArgLocs.size() == Args.size() &&
2022 "ArgLocs havoc.");
2023 PartVT = ArgLocs[I].getValVT();
2024 NumParts = 1;
2025 for (unsigned PartIdx = I + 1;; ++PartIdx) {
2026 assert(PartIdx != ArgLocs.size() && "SplitEnd not found.");
2027 assert(ArgLocs[PartIdx].getValVT() == PartVT && "Unsupported split.");
2028 ++NumParts;
2029 if (Args[PartIdx].Flags.isSplitEnd())
2030 break;
2031 }
2032 return true;
2033}
2034
2035SDValue SystemZTargetLowering::LowerFormalArguments(
2036 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
2037 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
2038 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2039 MachineFunction &MF = DAG.getMachineFunction();
2040 MachineFrameInfo &MFI = MF.getFrameInfo();
2041 MachineRegisterInfo &MRI = MF.getRegInfo();
2042 SystemZMachineFunctionInfo *FuncInfo =
2043 MF.getInfo<SystemZMachineFunctionInfo>();
2044 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
2045 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
2046
2047 // Assign locations to all of the incoming arguments.
2048 SmallVector<CCValAssign, 16> ArgLocs;
2049 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
2050 CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_SystemZ);
2051 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
2052
2053 unsigned NumFixedGPRs = 0;
2054 unsigned NumFixedFPRs = 0;
2055 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2056 SDValue ArgValue;
2057 CCValAssign &VA = ArgLocs[I];
2058 EVT LocVT = VA.getLocVT();
2059 if (VA.isRegLoc()) {
2060 // Arguments passed in registers
2061 const TargetRegisterClass *RC;
2062 switch (LocVT.getSimpleVT().SimpleTy) {
2063 default:
2064 // Integers smaller than i64 should be promoted to i64.
2065 llvm_unreachable("Unexpected argument type");
2066 case MVT::i32:
2067 NumFixedGPRs += 1;
2068 RC = &SystemZ::GR32BitRegClass;
2069 break;
2070 case MVT::i64:
2071 NumFixedGPRs += 1;
2072 RC = &SystemZ::GR64BitRegClass;
2073 break;
2074 case MVT::f16:
2075 NumFixedFPRs += 1;
2076 RC = &SystemZ::FP16BitRegClass;
2077 break;
2078 case MVT::f32:
2079 NumFixedFPRs += 1;
2080 RC = &SystemZ::FP32BitRegClass;
2081 break;
2082 case MVT::f64:
2083 NumFixedFPRs += 1;
2084 RC = &SystemZ::FP64BitRegClass;
2085 break;
2086 case MVT::f128:
2087 NumFixedFPRs += 2;
2088 RC = &SystemZ::FP128BitRegClass;
2089 break;
2090 case MVT::v16i8:
2091 case MVT::v8i16:
2092 case MVT::v4i32:
2093 case MVT::v2i64:
2094 case MVT::v8f16:
2095 case MVT::v4f32:
2096 case MVT::v2f64:
2097 RC = &SystemZ::VR128BitRegClass;
2098 break;
2099 }
2100
2101 Register VReg = MRI.createVirtualRegister(RegClass: RC);
2102 MRI.addLiveIn(Reg: VA.getLocReg(), vreg: VReg);
2103 ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: LocVT);
2104 } else {
2105 assert(VA.isMemLoc() && "Argument not register or memory");
2106
2107 // Create the frame index object for this incoming parameter.
2108 // FIXME: Pre-include call frame size in the offset, should not
2109 // need to manually add it here.
2110 int64_t ArgSPOffset = VA.getLocMemOffset();
2111 if (Subtarget.isTargetXPLINK64()) {
2112 auto &XPRegs =
2113 Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
2114 ArgSPOffset += XPRegs.getCallFrameSize();
2115 }
2116 int FI =
2117 MFI.CreateFixedObject(Size: LocVT.getSizeInBits() / 8, SPOffset: ArgSPOffset, IsImmutable: true);
2118
2119 // Create the SelectionDAG nodes corresponding to a load
2120 // from this parameter. Unpromoted ints and floats are
2121 // passed as right-justified 8-byte values.
2122 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
2123 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32 ||
2124 VA.getLocVT() == MVT::f16) {
2125 unsigned SlotOffs = VA.getLocVT() == MVT::f16 ? 6 : 4;
2126 FIN = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: FIN,
2127 N2: DAG.getIntPtrConstant(Val: SlotOffs, DL));
2128 }
2129 ArgValue = DAG.getLoad(VT: LocVT, dl: DL, Chain, Ptr: FIN,
2130 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI));
2131 }
2132
2133 // Convert the value of the argument register into the value that's
2134 // being passed.
2135 if (VA.getLocInfo() == CCValAssign::Indirect) {
2136 InVals.push_back(Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain, Ptr: ArgValue,
2137 PtrInfo: MachinePointerInfo()));
2138 // If the original argument was split (e.g. i128), we need
2139 // to load all parts of it here (using the same address).
2140 MVT PartVT;
2141 unsigned NumParts;
2142 if (analyzeArgSplit(Args: Ins, ArgLocs, I, PartVT, NumParts)) {
2143 for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
2144 ++I;
2145 CCValAssign &PartVA = ArgLocs[I];
2146 unsigned PartOffset = Ins[I].PartOffset;
2147 SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: ArgValue,
2148 N2: DAG.getIntPtrConstant(Val: PartOffset, DL));
2149 InVals.push_back(Elt: DAG.getLoad(VT: PartVA.getValVT(), dl: DL, Chain, Ptr: Address,
2150 PtrInfo: MachinePointerInfo()));
2151 assert(PartOffset && "Offset should be non-zero.");
2152 }
2153 }
2154 } else
2155 InVals.push_back(Elt: convertLocVTToValVT(DAG, DL, VA, Chain, Value: ArgValue));
2156 }
2157
2158 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
2159 // Save the number of non-varargs registers for later use by va_start, etc.
2160 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2161 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2162
2163 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2164 Subtarget.getSpecialRegisters());
2165
2166 // Likewise the address (in the form of a frame index) of where the
2167 // first stack vararg would be. The 1-byte size here is arbitrary.
2168 // FIXME: Pre-include call frame size in the offset, should not
2169 // need to manually add it here.
2170 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
2171 int FI = MFI.CreateFixedObject(Size: 1, SPOffset: VarArgOffset, IsImmutable: true);
2172 FuncInfo->setVarArgsFrameIndex(FI);
2173 }
2174
2175 if (IsVarArg && Subtarget.isTargetELF()) {
2176 // Save the number of non-varargs registers for later use by va_start, etc.
2177 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2178 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2179
2180 // Likewise the address (in the form of a frame index) of where the
2181 // first stack vararg would be. The 1-byte size here is arbitrary.
2182 int64_t VarArgsOffset = CCInfo.getStackSize();
2183 FuncInfo->setVarArgsFrameIndex(
2184 MFI.CreateFixedObject(Size: 1, SPOffset: VarArgsOffset, IsImmutable: true));
2185
2186 // ...and a similar frame index for the caller-allocated save area
2187 // that will be used to store the incoming registers.
2188 int64_t RegSaveOffset =
2189 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, Reg: SystemZ::R2D) - 16;
2190 unsigned RegSaveIndex = MFI.CreateFixedObject(Size: 1, SPOffset: RegSaveOffset, IsImmutable: true);
2191 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
2192
2193 // Store the FPR varargs in the reserved frame slots. (We store the
2194 // GPRs as part of the prologue.)
2195 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
2196 SDValue MemOps[SystemZ::ELFNumArgFPRs];
2197 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
2198 unsigned Offset = TFL->getRegSpillOffset(MF, Reg: SystemZ::ELFArgFPRs[I]);
2199 int FI =
2200 MFI.CreateFixedObject(Size: 8, SPOffset: -SystemZMC::ELFCallFrameSize + Offset, IsImmutable: true);
2201 SDValue FIN = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
2202 Register VReg = MF.addLiveIn(PReg: SystemZ::ELFArgFPRs[I],
2203 RC: &SystemZ::FP64BitRegClass);
2204 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: MVT::f64);
2205 MemOps[I] = DAG.getStore(Chain: ArgValue.getValue(R: 1), dl: DL, Val: ArgValue, Ptr: FIN,
2206 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI));
2207 }
2208 // Join the stores, which are independent of one another.
2209 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other,
2210 Ops: ArrayRef(&MemOps[NumFixedFPRs],
2211 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
2212 }
2213 }
2214
2215 if (Subtarget.isTargetXPLINK64()) {
2216 // Create virual register for handling incoming "ADA" special register (R5)
2217 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
2218 Register ADAvReg = MRI.createVirtualRegister(RegClass: RC);
2219 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2220 Subtarget.getSpecialRegisters());
2221 MRI.addLiveIn(Reg: Regs->getADARegister(), vreg: ADAvReg);
2222 FuncInfo->setADAVirtualRegister(ADAvReg);
2223 }
2224 return Chain;
2225}
2226
2227static bool canUseSiblingCall(const CCState &ArgCCInfo,
2228 SmallVectorImpl<CCValAssign> &ArgLocs,
2229 SmallVectorImpl<ISD::OutputArg> &Outs) {
2230 // Punt if there are any indirect or stack arguments, or if the call
2231 // needs the callee-saved argument register R6, or if the call uses
2232 // the callee-saved register arguments SwiftSelf and SwiftError.
2233 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2234 CCValAssign &VA = ArgLocs[I];
2235 if (VA.getLocInfo() == CCValAssign::Indirect)
2236 return false;
2237 if (!VA.isRegLoc())
2238 return false;
2239 Register Reg = VA.getLocReg();
2240 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
2241 return false;
2242 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
2243 return false;
2244 }
2245 return true;
2246}
2247
2248static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL,
2249 unsigned Offset, bool LoadAdr = false) {
2250 MachineFunction &MF = DAG.getMachineFunction();
2251 SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
2252 Register ADAvReg = MFI->getADAVirtualRegister();
2253 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout());
2254
2255 SDValue Reg = DAG.getRegister(Reg: ADAvReg, VT: PtrVT);
2256 SDValue Ofs = DAG.getTargetConstant(Val: Offset, DL, VT: PtrVT);
2257
2258 SDValue Result = DAG.getNode(Opcode: SystemZISD::ADA_ENTRY, DL, VT: PtrVT, N1: Val, N2: Reg, N3: Ofs);
2259 if (!LoadAdr)
2260 Result = DAG.getLoad(
2261 VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: Result, PtrInfo: MachinePointerInfo(), Alignment: Align(8),
2262 MMOFlags: MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant);
2263
2264 return Result;
2265}
2266
2267// ADA access using Global value
2268// Note: for functions, address of descriptor is returned
2269static SDValue getADAEntry(SelectionDAG &DAG, const GlobalValue *GV, SDLoc DL,
2270 EVT PtrVT) {
2271 unsigned ADAtype;
2272 bool LoadAddr = false;
2273 const GlobalAlias *GA = dyn_cast<GlobalAlias>(Val: GV);
2274 bool IsFunction =
2275 (isa<Function>(Val: GV)) || (GA && isa<Function>(Val: GA->getAliaseeObject()));
2276 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
2277
2278 if (IsFunction) {
2279 if (IsInternal) {
2280 ADAtype = SystemZII::MO_ADA_DIRECT_FUNC_DESC;
2281 LoadAddr = true;
2282 } else
2283 ADAtype = SystemZII::MO_ADA_INDIRECT_FUNC_DESC;
2284 } else {
2285 ADAtype = SystemZII::MO_ADA_DATA_SYMBOL_ADDR;
2286 }
2287 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: 0, TargetFlags: ADAtype);
2288
2289 return getADAEntry(DAG, Val, DL, Offset: 0, LoadAdr: LoadAddr);
2290}
2291
2292static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
2293 SDLoc &DL, SDValue &Chain) {
2294 unsigned ADADelta = 0; // ADA offset in desc.
2295 unsigned EPADelta = 8; // EPA offset in desc.
2296 MachineFunction &MF = DAG.getMachineFunction();
2297 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout());
2298
2299 // XPLink calling convention.
2300 if (auto *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) {
2301 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
2302 G->getGlobal()->hasPrivateLinkage());
2303 if (IsInternal) {
2304 SystemZMachineFunctionInfo *MFI =
2305 MF.getInfo<SystemZMachineFunctionInfo>();
2306 Register ADAvReg = MFI->getADAVirtualRegister();
2307 ADA = DAG.getCopyFromReg(Chain, dl: DL, Reg: ADAvReg, VT: PtrVT);
2308 Callee = DAG.getTargetGlobalAddress(GV: G->getGlobal(), DL, VT: PtrVT);
2309 Callee = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Callee);
2310 return true;
2311 } else {
2312 SDValue GA = DAG.getTargetGlobalAddress(
2313 GV: G->getGlobal(), DL, VT: PtrVT, offset: 0, TargetFlags: SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2314 ADA = getADAEntry(DAG, Val: GA, DL, Offset: ADADelta);
2315 Callee = getADAEntry(DAG, Val: GA, DL, Offset: EPADelta);
2316 }
2317 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
2318 SDValue ES = DAG.getTargetExternalSymbol(
2319 Sym: E->getSymbol(), VT: PtrVT, TargetFlags: SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2320 ADA = getADAEntry(DAG, Val: ES, DL, Offset: ADADelta);
2321 Callee = getADAEntry(DAG, Val: ES, DL, Offset: EPADelta);
2322 } else {
2323 // Function pointer case
2324 ADA = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Callee,
2325 N2: DAG.getConstant(Val: ADADelta, DL, VT: PtrVT));
2326 ADA = DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: ADA,
2327 PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction()));
2328 Callee = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Callee,
2329 N2: DAG.getConstant(Val: EPADelta, DL, VT: PtrVT));
2330 Callee = DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: Callee,
2331 PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction()));
2332 }
2333 return false;
2334}
2335
2336SDValue
2337SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
2338 SmallVectorImpl<SDValue> &InVals) const {
2339 SelectionDAG &DAG = CLI.DAG;
2340 SDLoc &DL = CLI.DL;
2341 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2342 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2343 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2344 SDValue Chain = CLI.Chain;
2345 SDValue Callee = CLI.Callee;
2346 bool &IsTailCall = CLI.IsTailCall;
2347 CallingConv::ID CallConv = CLI.CallConv;
2348 bool IsVarArg = CLI.IsVarArg;
2349 MachineFunction &MF = DAG.getMachineFunction();
2350 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
2351 LLVMContext &Ctx = *DAG.getContext();
2352 SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();
2353
2354 // FIXME: z/OS support to be added in later.
2355 if (Subtarget.isTargetXPLINK64())
2356 IsTailCall = false;
2357
2358 // Integer args <=32 bits should have an extension attribute.
2359 verifyNarrowIntegerArgs_Call(Outs, F: &MF.getFunction(), Callee);
2360
2361 // Analyze the operands of the call, assigning locations to each operand.
2362 SmallVector<CCValAssign, 16> ArgLocs;
2363 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
2364 ArgCCInfo.AnalyzeCallOperands(Outs, Fn: CC_SystemZ);
2365
2366 // We don't support GuaranteedTailCallOpt, only automatically-detected
2367 // sibling calls.
2368 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
2369 IsTailCall = false;
2370
2371 // Get a count of how many bytes are to be pushed on the stack.
2372 unsigned NumBytes = ArgCCInfo.getStackSize();
2373
2374 // Mark the start of the call.
2375 if (!IsTailCall)
2376 Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL);
2377
2378 // Copy argument values to their designated locations.
2379 SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
2380 SmallVector<SDValue, 8> MemOpChains;
2381 SDValue StackPtr;
2382 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2383 CCValAssign &VA = ArgLocs[I];
2384 SDValue ArgValue = OutVals[I];
2385
2386 if (VA.getLocInfo() == CCValAssign::Indirect) {
2387 // Store the argument in a stack slot and pass its address.
2388 EVT SlotVT;
2389 MVT PartVT;
2390 unsigned NumParts = 1;
2391 if (analyzeArgSplit(Args: Outs, ArgLocs, I, PartVT, NumParts))
2392 SlotVT = EVT::getIntegerVT(Context&: Ctx, BitWidth: PartVT.getSizeInBits() * NumParts);
2393 else
2394 SlotVT = Outs[I].VT;
2395 SDValue SpillSlot = DAG.CreateStackTemporary(VT: SlotVT);
2396 int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex();
2397
2398 MachinePointerInfo StackPtrInfo =
2399 MachinePointerInfo::getFixedStack(MF, FI);
2400 MemOpChains.push_back(
2401 Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: SpillSlot, PtrInfo: StackPtrInfo));
2402 // If the original argument was split (e.g. i128), we need
2403 // to store all parts of it here (and pass just one address).
2404 assert(Outs[I].PartOffset == 0);
2405 for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
2406 ++I;
2407 SDValue PartValue = OutVals[I];
2408 unsigned PartOffset = Outs[I].PartOffset;
2409 SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: SpillSlot,
2410 N2: DAG.getIntPtrConstant(Val: PartOffset, DL));
2411 MemOpChains.push_back(
2412 Elt: DAG.getStore(Chain, dl: DL, Val: PartValue, Ptr: Address,
2413 PtrInfo: StackPtrInfo.getWithOffset(O: PartOffset)));
2414 assert(PartOffset && "Offset should be non-zero.");
2415 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
2416 SlotVT.getStoreSize()) && "Not enough space for argument part!");
2417 }
2418 ArgValue = SpillSlot;
2419 } else
2420 ArgValue = convertValVTToLocVT(DAG, DL, VA, Value: ArgValue);
2421
2422 if (VA.isRegLoc()) {
2423 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
2424 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
2425 // and low values.
2426 if (VA.getLocVT() == MVT::i128)
2427 ArgValue = lowerI128ToGR128(DAG, In: ArgValue);
2428 // Queue up the argument copies and emit them at the end.
2429 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgValue));
2430 } else {
2431 assert(VA.isMemLoc() && "Argument not register or memory");
2432
2433 // Work out the address of the stack slot. Unpromoted ints and
2434 // floats are passed as right-justified 8-byte values.
2435 if (!StackPtr.getNode())
2436 StackPtr = DAG.getCopyFromReg(Chain, dl: DL,
2437 Reg: Regs->getStackPointerRegister(), VT: PtrVT);
2438 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
2439 VA.getLocMemOffset();
2440 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
2441 Offset += 4;
2442 else if (VA.getLocVT() == MVT::f16)
2443 Offset += 6;
2444 SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr,
2445 N2: DAG.getIntPtrConstant(Val: Offset, DL));
2446
2447 // Emit the store.
2448 MemOpChains.push_back(
2449 Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: Address, PtrInfo: MachinePointerInfo()));
2450
2451 // Although long doubles or vectors are passed through the stack when
2452 // they are vararg (non-fixed arguments), if a long double or vector
2453 // occupies the third and fourth slot of the argument list GPR3 should
2454 // still shadow the third slot of the argument list.
2455 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
2456 SDValue ShadowArgValue =
2457 DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::i64, N1: ArgValue,
2458 N2: DAG.getIntPtrConstant(Val: 1, DL));
2459 RegsToPass.push_back(Elt: std::make_pair(x: SystemZ::R3D, y&: ShadowArgValue));
2460 }
2461 }
2462 }
2463
2464 // Join the stores, which are independent of one another.
2465 if (!MemOpChains.empty())
2466 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOpChains);
2467
2468 // Accept direct calls by converting symbolic call addresses to the
2469 // associated Target* opcodes. Force %r1 to be used for indirect
2470 // tail calls.
2471 SDValue Glue;
2472
2473 if (Subtarget.isTargetXPLINK64()) {
2474 SDValue ADA;
2475 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2476 if (!IsBRASL) {
2477 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2478 ->getAddressOfCalleeRegister();
2479 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: CalleeReg, N: Callee, Glue);
2480 Glue = Chain.getValue(R: 1);
2481 Callee = DAG.getRegister(Reg: CalleeReg, VT: Callee.getValueType());
2482 }
2483 RegsToPass.push_back(Elt: std::make_pair(
2484 x: static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), y&: ADA));
2485 } else {
2486 if (auto *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) {
2487 Callee = DAG.getTargetGlobalAddress(GV: G->getGlobal(), DL, VT: PtrVT);
2488 Callee = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Callee);
2489 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
2490 Callee = DAG.getTargetExternalSymbol(Sym: E->getSymbol(), VT: PtrVT);
2491 Callee = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Callee);
2492 } else if (IsTailCall) {
2493 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: SystemZ::R1D, N: Callee, Glue);
2494 Glue = Chain.getValue(R: 1);
2495 Callee = DAG.getRegister(Reg: SystemZ::R1D, VT: Callee.getValueType());
2496 }
2497 }
2498
2499 // Build a sequence of copy-to-reg nodes, chained and glued together.
2500 for (const auto &[Reg, N] : RegsToPass) {
2501 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg, N, Glue);
2502 Glue = Chain.getValue(R: 1);
2503 }
2504
2505 // The first call operand is the chain and the second is the target address.
2506 SmallVector<SDValue, 8> Ops;
2507 Ops.push_back(Elt: Chain);
2508 Ops.push_back(Elt: Callee);
2509
2510 // Add argument registers to the end of the list so that they are
2511 // known live into the call.
2512 for (const auto &[Reg, N] : RegsToPass)
2513 Ops.push_back(Elt: DAG.getRegister(Reg, VT: N.getValueType()));
2514
2515 // Add a register mask operand representing the call-preserved registers.
2516 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2517 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2518 assert(Mask && "Missing call preserved mask for calling convention");
2519 Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
2520
2521 // Glue the call to the argument copies, if any.
2522 if (Glue.getNode())
2523 Ops.push_back(Elt: Glue);
2524
2525 // Emit the call.
2526 SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
2527 if (IsTailCall) {
2528 SDValue Ret = DAG.getNode(Opcode: SystemZISD::SIBCALL, DL, VTList: NodeTys, Ops);
2529 DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge);
2530 return Ret;
2531 }
2532 Chain = DAG.getNode(Opcode: SystemZISD::CALL, DL, VTList: NodeTys, Ops);
2533 DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge);
2534 Glue = Chain.getValue(R: 1);
2535
2536 // Mark the end of the call, which is glued to the call itself.
2537 Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: 0, Glue, DL);
2538 Glue = Chain.getValue(R: 1);
2539
2540 // Assign locations to each value returned by this call.
2541 SmallVector<CCValAssign, 16> RetLocs;
2542 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2543 RetCCInfo.AnalyzeCallResult(Ins, Fn: RetCC_SystemZ);
2544
2545 // Copy all of the result registers out of their specified physreg.
2546 for (CCValAssign &VA : RetLocs) {
2547 // Copy the value out, gluing the copy to the end of the call sequence.
2548 SDValue RetValue = DAG.getCopyFromReg(Chain, dl: DL, Reg: VA.getLocReg(),
2549 VT: VA.getLocVT(), Glue);
2550 Chain = RetValue.getValue(R: 1);
2551 Glue = RetValue.getValue(R: 2);
2552
2553 // Convert the value of the return register into the value that's
2554 // being returned.
2555 InVals.push_back(Elt: convertLocVTToValVT(DAG, DL, VA, Chain, Value: RetValue));
2556 }
2557
2558 return Chain;
2559}
2560
2561// Generate a call taking the given operands as arguments and returning a
2562// result of type RetVT.
2563std::pair<SDValue, SDValue> SystemZTargetLowering::makeExternalCall(
2564 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2565 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2566 bool DoesNotReturn, bool IsReturnValueUsed) const {
2567 TargetLowering::ArgListTy Args;
2568 Args.reserve(n: Ops.size());
2569
2570 for (SDValue Op : Ops) {
2571 TargetLowering::ArgListEntry Entry(
2572 Op, Op.getValueType().getTypeForEVT(Context&: *DAG.getContext()));
2573 Entry.IsSExt = shouldSignExtendTypeInLibCall(Ty: Entry.Ty, IsSigned);
2574 Entry.IsZExt = !Entry.IsSExt;
2575 Args.push_back(x: Entry);
2576 }
2577
2578 SDValue Callee =
2579 DAG.getExternalSymbol(Sym: CalleeName, VT: getPointerTy(DL: DAG.getDataLayout()));
2580
2581 Type *RetTy = RetVT.getTypeForEVT(Context&: *DAG.getContext());
2582 TargetLowering::CallLoweringInfo CLI(DAG);
2583 bool SignExtend = shouldSignExtendTypeInLibCall(Ty: RetTy, IsSigned);
2584 CLI.setDebugLoc(DL)
2585 .setChain(Chain)
2586 .setCallee(CC: CallConv, ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
2587 .setNoReturn(DoesNotReturn)
2588 .setDiscardResult(!IsReturnValueUsed)
2589 .setSExtResult(SignExtend)
2590 .setZExtResult(!SignExtend);
2591 return LowerCallTo(CLI);
2592}
2593
2594bool SystemZTargetLowering::CanLowerReturn(
2595 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
2596 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
2597 const Type *RetTy) const {
2598 // Special case that we cannot easily detect in RetCC_SystemZ since
2599 // i128 may not be a legal type.
2600 for (auto &Out : Outs)
2601 if (Out.ArgVT.isScalarInteger() && Out.ArgVT.getSizeInBits() > 64)
2602 return false;
2603
2604 SmallVector<CCValAssign, 16> RetLocs;
2605 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Context);
2606 return RetCCInfo.CheckReturn(Outs, Fn: RetCC_SystemZ);
2607}
2608
2609SDValue
2610SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2611 bool IsVarArg,
2612 const SmallVectorImpl<ISD::OutputArg> &Outs,
2613 const SmallVectorImpl<SDValue> &OutVals,
2614 const SDLoc &DL, SelectionDAG &DAG) const {
2615 MachineFunction &MF = DAG.getMachineFunction();
2616
2617 // Integer args <=32 bits should have an extension attribute.
2618 verifyNarrowIntegerArgs_Ret(Outs, F: &MF.getFunction());
2619
2620 // Assign locations to each returned value.
2621 SmallVector<CCValAssign, 16> RetLocs;
2622 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2623 RetCCInfo.AnalyzeReturn(Outs, Fn: RetCC_SystemZ);
2624
2625 // Quick exit for void returns
2626 if (RetLocs.empty())
2627 return DAG.getNode(Opcode: SystemZISD::RET_GLUE, DL, VT: MVT::Other, Operand: Chain);
2628
2629 if (CallConv == CallingConv::GHC)
2630 report_fatal_error(reason: "GHC functions return void only");
2631
2632 // Copy the result values into the output registers.
2633 SDValue Glue;
2634 SmallVector<SDValue, 4> RetOps;
2635 RetOps.push_back(Elt: Chain);
2636 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2637 CCValAssign &VA = RetLocs[I];
2638 SDValue RetValue = OutVals[I];
2639
2640 // Make the return register live on exit.
2641 assert(VA.isRegLoc() && "Can only return in registers!");
2642
2643 // Promote the value as required.
2644 RetValue = convertValVTToLocVT(DAG, DL, VA, Value: RetValue);
2645
2646 // Chain and glue the copies together.
2647 Register Reg = VA.getLocReg();
2648 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg, N: RetValue, Glue);
2649 Glue = Chain.getValue(R: 1);
2650 RetOps.push_back(Elt: DAG.getRegister(Reg, VT: VA.getLocVT()));
2651 }
2652
2653 // Update chain and glue.
2654 RetOps[0] = Chain;
2655 if (Glue.getNode())
2656 RetOps.push_back(Elt: Glue);
2657
2658 return DAG.getNode(Opcode: SystemZISD::RET_GLUE, DL, VT: MVT::Other, Ops: RetOps);
2659}
2660
2661// Return true if Op is an intrinsic node with chain that returns the CC value
2662// as its only (other) argument. Provide the associated SystemZISD opcode and
2663// the mask of valid CC values if so.
2664static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2665 unsigned &CCValid) {
2666 unsigned Id = Op.getConstantOperandVal(i: 1);
2667 switch (Id) {
2668 case Intrinsic::s390_tbegin:
2669 Opcode = SystemZISD::TBEGIN;
2670 CCValid = SystemZ::CCMASK_TBEGIN;
2671 return true;
2672
2673 case Intrinsic::s390_tbegin_nofloat:
2674 Opcode = SystemZISD::TBEGIN_NOFLOAT;
2675 CCValid = SystemZ::CCMASK_TBEGIN;
2676 return true;
2677
2678 case Intrinsic::s390_tend:
2679 Opcode = SystemZISD::TEND;
2680 CCValid = SystemZ::CCMASK_TEND;
2681 return true;
2682
2683 default:
2684 return false;
2685 }
2686}
2687
2688// Return true if Op is an intrinsic node without chain that returns the
2689// CC value as its final argument. Provide the associated SystemZISD
2690// opcode and the mask of valid CC values if so.
2691static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2692 unsigned Id = Op.getConstantOperandVal(i: 0);
2693 switch (Id) {
2694 case Intrinsic::s390_vpkshs:
2695 case Intrinsic::s390_vpksfs:
2696 case Intrinsic::s390_vpksgs:
2697 Opcode = SystemZISD::PACKS_CC;
2698 CCValid = SystemZ::CCMASK_VCMP;
2699 return true;
2700
2701 case Intrinsic::s390_vpklshs:
2702 case Intrinsic::s390_vpklsfs:
2703 case Intrinsic::s390_vpklsgs:
2704 Opcode = SystemZISD::PACKLS_CC;
2705 CCValid = SystemZ::CCMASK_VCMP;
2706 return true;
2707
2708 case Intrinsic::s390_vceqbs:
2709 case Intrinsic::s390_vceqhs:
2710 case Intrinsic::s390_vceqfs:
2711 case Intrinsic::s390_vceqgs:
2712 case Intrinsic::s390_vceqqs:
2713 Opcode = SystemZISD::VICMPES;
2714 CCValid = SystemZ::CCMASK_VCMP;
2715 return true;
2716
2717 case Intrinsic::s390_vchbs:
2718 case Intrinsic::s390_vchhs:
2719 case Intrinsic::s390_vchfs:
2720 case Intrinsic::s390_vchgs:
2721 case Intrinsic::s390_vchqs:
2722 Opcode = SystemZISD::VICMPHS;
2723 CCValid = SystemZ::CCMASK_VCMP;
2724 return true;
2725
2726 case Intrinsic::s390_vchlbs:
2727 case Intrinsic::s390_vchlhs:
2728 case Intrinsic::s390_vchlfs:
2729 case Intrinsic::s390_vchlgs:
2730 case Intrinsic::s390_vchlqs:
2731 Opcode = SystemZISD::VICMPHLS;
2732 CCValid = SystemZ::CCMASK_VCMP;
2733 return true;
2734
2735 case Intrinsic::s390_vtm:
2736 Opcode = SystemZISD::VTM;
2737 CCValid = SystemZ::CCMASK_VCMP;
2738 return true;
2739
2740 case Intrinsic::s390_vfaebs:
2741 case Intrinsic::s390_vfaehs:
2742 case Intrinsic::s390_vfaefs:
2743 Opcode = SystemZISD::VFAE_CC;
2744 CCValid = SystemZ::CCMASK_ANY;
2745 return true;
2746
2747 case Intrinsic::s390_vfaezbs:
2748 case Intrinsic::s390_vfaezhs:
2749 case Intrinsic::s390_vfaezfs:
2750 Opcode = SystemZISD::VFAEZ_CC;
2751 CCValid = SystemZ::CCMASK_ANY;
2752 return true;
2753
2754 case Intrinsic::s390_vfeebs:
2755 case Intrinsic::s390_vfeehs:
2756 case Intrinsic::s390_vfeefs:
2757 Opcode = SystemZISD::VFEE_CC;
2758 CCValid = SystemZ::CCMASK_ANY;
2759 return true;
2760
2761 case Intrinsic::s390_vfeezbs:
2762 case Intrinsic::s390_vfeezhs:
2763 case Intrinsic::s390_vfeezfs:
2764 Opcode = SystemZISD::VFEEZ_CC;
2765 CCValid = SystemZ::CCMASK_ANY;
2766 return true;
2767
2768 case Intrinsic::s390_vfenebs:
2769 case Intrinsic::s390_vfenehs:
2770 case Intrinsic::s390_vfenefs:
2771 Opcode = SystemZISD::VFENE_CC;
2772 CCValid = SystemZ::CCMASK_ANY;
2773 return true;
2774
2775 case Intrinsic::s390_vfenezbs:
2776 case Intrinsic::s390_vfenezhs:
2777 case Intrinsic::s390_vfenezfs:
2778 Opcode = SystemZISD::VFENEZ_CC;
2779 CCValid = SystemZ::CCMASK_ANY;
2780 return true;
2781
2782 case Intrinsic::s390_vistrbs:
2783 case Intrinsic::s390_vistrhs:
2784 case Intrinsic::s390_vistrfs:
2785 Opcode = SystemZISD::VISTR_CC;
2786 CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3;
2787 return true;
2788
2789 case Intrinsic::s390_vstrcbs:
2790 case Intrinsic::s390_vstrchs:
2791 case Intrinsic::s390_vstrcfs:
2792 Opcode = SystemZISD::VSTRC_CC;
2793 CCValid = SystemZ::CCMASK_ANY;
2794 return true;
2795
2796 case Intrinsic::s390_vstrczbs:
2797 case Intrinsic::s390_vstrczhs:
2798 case Intrinsic::s390_vstrczfs:
2799 Opcode = SystemZISD::VSTRCZ_CC;
2800 CCValid = SystemZ::CCMASK_ANY;
2801 return true;
2802
2803 case Intrinsic::s390_vstrsb:
2804 case Intrinsic::s390_vstrsh:
2805 case Intrinsic::s390_vstrsf:
2806 Opcode = SystemZISD::VSTRS_CC;
2807 CCValid = SystemZ::CCMASK_ANY;
2808 return true;
2809
2810 case Intrinsic::s390_vstrszb:
2811 case Intrinsic::s390_vstrszh:
2812 case Intrinsic::s390_vstrszf:
2813 Opcode = SystemZISD::VSTRSZ_CC;
2814 CCValid = SystemZ::CCMASK_ANY;
2815 return true;
2816
2817 case Intrinsic::s390_vfcedbs:
2818 case Intrinsic::s390_vfcesbs:
2819 Opcode = SystemZISD::VFCMPES;
2820 CCValid = SystemZ::CCMASK_VCMP;
2821 return true;
2822
2823 case Intrinsic::s390_vfchdbs:
2824 case Intrinsic::s390_vfchsbs:
2825 Opcode = SystemZISD::VFCMPHS;
2826 CCValid = SystemZ::CCMASK_VCMP;
2827 return true;
2828
2829 case Intrinsic::s390_vfchedbs:
2830 case Intrinsic::s390_vfchesbs:
2831 Opcode = SystemZISD::VFCMPHES;
2832 CCValid = SystemZ::CCMASK_VCMP;
2833 return true;
2834
2835 case Intrinsic::s390_vftcidb:
2836 case Intrinsic::s390_vftcisb:
2837 Opcode = SystemZISD::VFTCI;
2838 CCValid = SystemZ::CCMASK_VCMP;
2839 return true;
2840
2841 case Intrinsic::s390_tdc:
2842 Opcode = SystemZISD::TDC;
2843 CCValid = SystemZ::CCMASK_TDC;
2844 return true;
2845
2846 default:
2847 return false;
2848 }
2849}
2850
2851// Emit an intrinsic with chain and an explicit CC register result.
2852static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op,
2853 unsigned Opcode) {
2854 // Copy all operands except the intrinsic ID.
2855 unsigned NumOps = Op.getNumOperands();
2856 SmallVector<SDValue, 6> Ops;
2857 Ops.reserve(N: NumOps - 1);
2858 Ops.push_back(Elt: Op.getOperand(i: 0));
2859 for (unsigned I = 2; I < NumOps; ++I)
2860 Ops.push_back(Elt: Op.getOperand(i: I));
2861
2862 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2863 SDVTList RawVTs = DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other);
2864 SDValue Intr = DAG.getNode(Opcode, DL: SDLoc(Op), VTList: RawVTs, Ops);
2865 SDValue OldChain = SDValue(Op.getNode(), 1);
2866 SDValue NewChain = SDValue(Intr.getNode(), 1);
2867 DAG.ReplaceAllUsesOfValueWith(From: OldChain, To: NewChain);
2868 return Intr.getNode();
2869}
2870
2871// Emit an intrinsic with an explicit CC register result.
2872static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op,
2873 unsigned Opcode) {
2874 // Copy all operands except the intrinsic ID.
2875 SDLoc DL(Op);
2876 unsigned NumOps = Op.getNumOperands();
2877 SmallVector<SDValue, 6> Ops;
2878 Ops.reserve(N: NumOps - 1);
2879 for (unsigned I = 1; I < NumOps; ++I) {
2880 SDValue CurrOper = Op.getOperand(i: I);
2881 if (CurrOper.getValueType() == MVT::f16) {
2882 assert((Op.getConstantOperandVal(0) == Intrinsic::s390_tdc && I == 1) &&
2883 "Unhandled intrinsic with f16 operand.");
2884 CurrOper = DAG.getFPExtendOrRound(Op: CurrOper, DL, VT: MVT::f32);
2885 }
2886 Ops.push_back(Elt: CurrOper);
2887 }
2888
2889 SDValue Intr = DAG.getNode(Opcode, DL, VTList: Op->getVTList(), Ops);
2890 return Intr.getNode();
2891}
2892
2893// CC is a comparison that will be implemented using an integer or
2894// floating-point comparison. Return the condition code mask for
2895// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2896// unsigned comparisons and clear for signed ones. In the floating-point
2897// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2898static unsigned CCMaskForCondCode(ISD::CondCode CC) {
2899#define CONV(X) \
2900 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2901 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2902 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2903
2904 switch (CC) {
2905 default:
2906 llvm_unreachable("Invalid integer condition!");
2907
2908 CONV(EQ);
2909 CONV(NE);
2910 CONV(GT);
2911 CONV(GE);
2912 CONV(LT);
2913 CONV(LE);
2914
2915 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2916 case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
2917 }
2918#undef CONV
2919}
2920
2921// If C can be converted to a comparison against zero, adjust the operands
2922// as necessary.
2923static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2924 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2925 return;
2926
2927 auto *ConstOp1 = dyn_cast<ConstantSDNode>(Val: C.Op1.getNode());
2928 if (!ConstOp1 || ConstOp1->getValueSizeInBits(ResNo: 0) > 64)
2929 return;
2930
2931 int64_t Value = ConstOp1->getSExtValue();
2932 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2933 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2934 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2935 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2936 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2937 C.Op1 = DAG.getConstant(Val: 0, DL, VT: C.Op1.getValueType());
2938 }
2939}
2940
2941// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2942// adjust the operands as necessary.
2943static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2944 Comparison &C) {
2945 // For us to make any changes, it must a comparison between a single-use
2946 // load and a constant.
2947 if (!C.Op0.hasOneUse() ||
2948 C.Op0.getOpcode() != ISD::LOAD ||
2949 C.Op1.getOpcode() != ISD::Constant)
2950 return;
2951
2952 // We must have an 8- or 16-bit load.
2953 auto *Load = cast<LoadSDNode>(Val&: C.Op0);
2954 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2955 if ((NumBits != 8 && NumBits != 16) ||
2956 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2957 return;
2958
2959 // The load must be an extending one and the constant must be within the
2960 // range of the unextended value.
2961 auto *ConstOp1 = cast<ConstantSDNode>(Val&: C.Op1);
2962 if (!ConstOp1 || ConstOp1->getValueSizeInBits(ResNo: 0) > 64)
2963 return;
2964 uint64_t Value = ConstOp1->getZExtValue();
2965 uint64_t Mask = (1 << NumBits) - 1;
2966 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2967 // Make sure that ConstOp1 is in range of C.Op0.
2968 int64_t SignedValue = ConstOp1->getSExtValue();
2969 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2970 return;
2971 if (C.ICmpType != SystemZICMP::SignedOnly) {
2972 // Unsigned comparison between two sign-extended values is equivalent
2973 // to unsigned comparison between two zero-extended values.
2974 Value &= Mask;
2975 } else if (NumBits == 8) {
2976 // Try to treat the comparison as unsigned, so that we can use CLI.
2977 // Adjust CCMask and Value as necessary.
2978 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2979 // Test whether the high bit of the byte is set.
2980 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2981 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2982 // Test whether the high bit of the byte is clear.
2983 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2984 else
2985 // No instruction exists for this combination.
2986 return;
2987 C.ICmpType = SystemZICMP::UnsignedOnly;
2988 }
2989 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2990 if (Value > Mask)
2991 return;
2992 // If the constant is in range, we can use any comparison.
2993 C.ICmpType = SystemZICMP::Any;
2994 } else
2995 return;
2996
2997 // Make sure that the first operand is an i32 of the right extension type.
2998 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2999 ISD::SEXTLOAD :
3000 ISD::ZEXTLOAD);
3001 if (C.Op0.getValueType() != MVT::i32 ||
3002 Load->getExtensionType() != ExtType) {
3003 C.Op0 = DAG.getExtLoad(ExtType, dl: SDLoc(Load), VT: MVT::i32, Chain: Load->getChain(),
3004 Ptr: Load->getBasePtr(), PtrInfo: Load->getPointerInfo(),
3005 MemVT: Load->getMemoryVT(), Alignment: Load->getAlign(),
3006 MMOFlags: Load->getMemOperand()->getFlags());
3007 // Update the chain uses.
3008 DAG.ReplaceAllUsesOfValueWith(From: SDValue(Load, 1), To: C.Op0.getValue(R: 1));
3009 }
3010
3011 // Make sure that the second operand is an i32 with the right value.
3012 if (C.Op1.getValueType() != MVT::i32 ||
3013 Value != ConstOp1->getZExtValue())
3014 C.Op1 = DAG.getConstant(Val: (uint32_t)Value, DL, VT: MVT::i32);
3015}
3016
3017// Return true if Op is either an unextended load, or a load suitable
3018// for integer register-memory comparisons of type ICmpType.
3019static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
3020 auto *Load = dyn_cast<LoadSDNode>(Val: Op.getNode());
3021 if (Load) {
3022 // There are no instructions to compare a register with a memory byte.
3023 if (Load->getMemoryVT() == MVT::i8)
3024 return false;
3025 // Otherwise decide on extension type.
3026 switch (Load->getExtensionType()) {
3027 case ISD::NON_EXTLOAD:
3028 return true;
3029 case ISD::SEXTLOAD:
3030 return ICmpType != SystemZICMP::UnsignedOnly;
3031 case ISD::ZEXTLOAD:
3032 return ICmpType != SystemZICMP::SignedOnly;
3033 default:
3034 break;
3035 }
3036 }
3037 return false;
3038}
3039
3040// Return true if it is better to swap the operands of C.
3041static bool shouldSwapCmpOperands(const Comparison &C) {
3042 // Leave i128 and f128 comparisons alone, since they have no memory forms.
3043 if (C.Op0.getValueType() == MVT::i128)
3044 return false;
3045 if (C.Op0.getValueType() == MVT::f128)
3046 return false;
3047
3048 // Always keep a floating-point constant second, since comparisons with
3049 // zero can use LOAD TEST and comparisons with other constants make a
3050 // natural memory operand.
3051 if (isa<ConstantFPSDNode>(Val: C.Op1))
3052 return false;
3053
3054 // Never swap comparisons with zero since there are many ways to optimize
3055 // those later.
3056 auto *ConstOp1 = dyn_cast<ConstantSDNode>(Val: C.Op1);
3057 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
3058 return false;
3059
3060 // Also keep natural memory operands second if the loaded value is
3061 // only used here. Several comparisons have memory forms.
3062 if (isNaturalMemoryOperand(Op: C.Op1, ICmpType: C.ICmpType) && C.Op1.hasOneUse())
3063 return false;
3064
3065 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
3066 // In that case we generally prefer the memory to be second.
3067 if (isNaturalMemoryOperand(Op: C.Op0, ICmpType: C.ICmpType) && C.Op0.hasOneUse()) {
3068 // The only exceptions are when the second operand is a constant and
3069 // we can use things like CHHSI.
3070 if (!ConstOp1)
3071 return true;
3072 // The unsigned memory-immediate instructions can handle 16-bit
3073 // unsigned integers.
3074 if (C.ICmpType != SystemZICMP::SignedOnly &&
3075 isUInt<16>(x: ConstOp1->getZExtValue()))
3076 return false;
3077 // The signed memory-immediate instructions can handle 16-bit
3078 // signed integers.
3079 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
3080 isInt<16>(x: ConstOp1->getSExtValue()))
3081 return false;
3082 return true;
3083 }
3084
3085 // Try to promote the use of CGFR and CLGFR.
3086 unsigned Opcode0 = C.Op0.getOpcode();
3087 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
3088 return true;
3089 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
3090 return true;
3091 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
3092 C.Op0.getOperand(i: 1).getOpcode() == ISD::Constant &&
3093 C.Op0.getConstantOperandVal(i: 1) == 0xffffffff)
3094 return true;
3095
3096 return false;
3097}
3098
3099// Check whether C tests for equality between X and Y and whether X - Y
3100// or Y - X is also computed. In that case it's better to compare the
3101// result of the subtraction against zero.
3102static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
3103 Comparison &C) {
3104 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3105 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3106 for (SDNode *N : C.Op0->users()) {
3107 if (N->getOpcode() == ISD::SUB &&
3108 ((N->getOperand(Num: 0) == C.Op0 && N->getOperand(Num: 1) == C.Op1) ||
3109 (N->getOperand(Num: 0) == C.Op1 && N->getOperand(Num: 1) == C.Op0))) {
3110 // Disable the nsw and nuw flags: the backend needs to handle
3111 // overflow as well during comparison elimination.
3112 N->dropFlags(Mask: SDNodeFlags::NoWrap);
3113 C.Op0 = SDValue(N, 0);
3114 C.Op1 = DAG.getConstant(Val: 0, DL, VT: N->getValueType(ResNo: 0));
3115 return;
3116 }
3117 }
3118 }
3119}
3120
3121// Check whether C compares a floating-point value with zero and if that
3122// floating-point value is also negated. In this case we can use the
3123// negation to set CC, so avoiding separate LOAD AND TEST and
3124// LOAD (NEGATIVE/COMPLEMENT) instructions.
3125static void adjustForFNeg(Comparison &C) {
3126 // This optimization is invalid for strict comparisons, since FNEG
3127 // does not raise any exceptions.
3128 if (C.Chain)
3129 return;
3130 auto *C1 = dyn_cast<ConstantFPSDNode>(Val&: C.Op1);
3131 if (C1 && C1->isZero()) {
3132 for (SDNode *N : C.Op0->users()) {
3133 if (N->getOpcode() == ISD::FNEG) {
3134 C.Op0 = SDValue(N, 0);
3135 C.CCMask = SystemZ::reverseCCMask(CCMask: C.CCMask);
3136 return;
3137 }
3138 }
3139 }
3140}
3141
3142// Check whether C compares (shl X, 32) with 0 and whether X is
3143// also sign-extended. In that case it is better to test the result
3144// of the sign extension using LTGFR.
3145//
3146// This case is important because InstCombine transforms a comparison
3147// with (sext (trunc X)) into a comparison with (shl X, 32).
3148static void adjustForLTGFR(Comparison &C) {
3149 // Check for a comparison between (shl X, 32) and 0.
3150 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
3151 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
3152 auto *C1 = dyn_cast<ConstantSDNode>(Val: C.Op0.getOperand(i: 1));
3153 if (C1 && C1->getZExtValue() == 32) {
3154 SDValue ShlOp0 = C.Op0.getOperand(i: 0);
3155 // See whether X has any SIGN_EXTEND_INREG uses.
3156 for (SDNode *N : ShlOp0->users()) {
3157 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
3158 cast<VTSDNode>(Val: N->getOperand(Num: 1))->getVT() == MVT::i32) {
3159 C.Op0 = SDValue(N, 0);
3160 return;
3161 }
3162 }
3163 }
3164 }
3165}
3166
3167// If C compares the truncation of an extending load, try to compare
3168// the untruncated value instead. This exposes more opportunities to
3169// reuse CC.
3170static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
3171 Comparison &C) {
3172 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
3173 C.Op0.getOperand(i: 0).getOpcode() == ISD::LOAD &&
3174 C.Op1.getOpcode() == ISD::Constant &&
3175 cast<ConstantSDNode>(Val&: C.Op1)->getValueSizeInBits(ResNo: 0) <= 64 &&
3176 C.Op1->getAsZExtVal() == 0) {
3177 auto *L = cast<LoadSDNode>(Val: C.Op0.getOperand(i: 0));
3178 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
3179 C.Op0.getValueSizeInBits().getFixedValue()) {
3180 unsigned Type = L->getExtensionType();
3181 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
3182 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
3183 C.Op0 = C.Op0.getOperand(i: 0);
3184 C.Op1 = DAG.getConstant(Val: 0, DL, VT: C.Op0.getValueType());
3185 }
3186 }
3187 }
3188}
3189
3190// Return true if shift operation N has an in-range constant shift value.
3191// Store it in ShiftVal if so.
3192static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
3193 auto *Shift = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1));
3194 if (!Shift)
3195 return false;
3196
3197 uint64_t Amount = Shift->getZExtValue();
3198 if (Amount >= N.getValueSizeInBits())
3199 return false;
3200
3201 ShiftVal = Amount;
3202 return true;
3203}
3204
3205// Check whether an AND with Mask is suitable for a TEST UNDER MASK
3206// instruction and whether the CC value is descriptive enough to handle
3207// a comparison of type Opcode between the AND result and CmpVal.
3208// CCMask says which comparison result is being tested and BitSize is
3209// the number of bits in the operands. If TEST UNDER MASK can be used,
3210// return the corresponding CC mask, otherwise return 0.
3211static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
3212 uint64_t Mask, uint64_t CmpVal,
3213 unsigned ICmpType) {
3214 assert(Mask != 0 && "ANDs with zero should have been removed by now");
3215
3216 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
3217 if (!SystemZ::isImmLL(Val: Mask) && !SystemZ::isImmLH(Val: Mask) &&
3218 !SystemZ::isImmHL(Val: Mask) && !SystemZ::isImmHH(Val: Mask))
3219 return 0;
3220
3221 // Work out the masks for the lowest and highest bits.
3222 uint64_t High = llvm::bit_floor(Value: Mask);
3223 uint64_t Low = uint64_t(1) << llvm::countr_zero(Val: Mask);
3224
3225 // Signed ordered comparisons are effectively unsigned if the sign
3226 // bit is dropped.
3227 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
3228
3229 // Check for equality comparisons with 0, or the equivalent.
3230 if (CmpVal == 0) {
3231 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3232 return SystemZ::CCMASK_TM_ALL_0;
3233 if (CCMask == SystemZ::CCMASK_CMP_NE)
3234 return SystemZ::CCMASK_TM_SOME_1;
3235 }
3236 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
3237 if (CCMask == SystemZ::CCMASK_CMP_LT)
3238 return SystemZ::CCMASK_TM_ALL_0;
3239 if (CCMask == SystemZ::CCMASK_CMP_GE)
3240 return SystemZ::CCMASK_TM_SOME_1;
3241 }
3242 if (EffectivelyUnsigned && CmpVal < Low) {
3243 if (CCMask == SystemZ::CCMASK_CMP_LE)
3244 return SystemZ::CCMASK_TM_ALL_0;
3245 if (CCMask == SystemZ::CCMASK_CMP_GT)
3246 return SystemZ::CCMASK_TM_SOME_1;
3247 }
3248
3249 // Check for equality comparisons with the mask, or the equivalent.
3250 if (CmpVal == Mask) {
3251 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3252 return SystemZ::CCMASK_TM_ALL_1;
3253 if (CCMask == SystemZ::CCMASK_CMP_NE)
3254 return SystemZ::CCMASK_TM_SOME_0;
3255 }
3256 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
3257 if (CCMask == SystemZ::CCMASK_CMP_GT)
3258 return SystemZ::CCMASK_TM_ALL_1;
3259 if (CCMask == SystemZ::CCMASK_CMP_LE)
3260 return SystemZ::CCMASK_TM_SOME_0;
3261 }
3262 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
3263 if (CCMask == SystemZ::CCMASK_CMP_GE)
3264 return SystemZ::CCMASK_TM_ALL_1;
3265 if (CCMask == SystemZ::CCMASK_CMP_LT)
3266 return SystemZ::CCMASK_TM_SOME_0;
3267 }
3268
3269 // Check for ordered comparisons with the top bit.
3270 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
3271 if (CCMask == SystemZ::CCMASK_CMP_LE)
3272 return SystemZ::CCMASK_TM_MSB_0;
3273 if (CCMask == SystemZ::CCMASK_CMP_GT)
3274 return SystemZ::CCMASK_TM_MSB_1;
3275 }
3276 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
3277 if (CCMask == SystemZ::CCMASK_CMP_LT)
3278 return SystemZ::CCMASK_TM_MSB_0;
3279 if (CCMask == SystemZ::CCMASK_CMP_GE)
3280 return SystemZ::CCMASK_TM_MSB_1;
3281 }
3282
3283 // If there are just two bits, we can do equality checks for Low and High
3284 // as well.
3285 if (Mask == Low + High) {
3286 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
3287 return SystemZ::CCMASK_TM_MIXED_MSB_0;
3288 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
3289 return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY;
3290 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
3291 return SystemZ::CCMASK_TM_MIXED_MSB_1;
3292 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
3293 return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY;
3294 }
3295
3296 // Looks like we've exhausted our options.
3297 return 0;
3298}
3299
3300// See whether C can be implemented as a TEST UNDER MASK instruction.
3301// Update the arguments with the TM version if so.
3302static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
3303 Comparison &C) {
3304 // Use VECTOR TEST UNDER MASK for i128 operations.
3305 if (C.Op0.getValueType() == MVT::i128) {
3306 // We can use VTM for EQ/NE comparisons of x & y against 0.
3307 if (C.Op0.getOpcode() == ISD::AND &&
3308 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3309 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
3310 auto *Mask = dyn_cast<ConstantSDNode>(Val&: C.Op1);
3311 if (Mask && Mask->getAPIntValue() == 0) {
3312 C.Opcode = SystemZISD::VTM;
3313 C.Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v16i8, Operand: C.Op0.getOperand(i: 1));
3314 C.Op0 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v16i8, Operand: C.Op0.getOperand(i: 0));
3315 C.CCValid = SystemZ::CCMASK_VCMP;
3316 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3317 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3318 else
3319 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3320 }
3321 }
3322 return;
3323 }
3324
3325 // Check that we have a comparison with a constant.
3326 auto *ConstOp1 = dyn_cast<ConstantSDNode>(Val&: C.Op1);
3327 if (!ConstOp1)
3328 return;
3329 uint64_t CmpVal = ConstOp1->getZExtValue();
3330
3331 // Check whether the nonconstant input is an AND with a constant mask.
3332 Comparison NewC(C);
3333 uint64_t MaskVal;
3334 ConstantSDNode *Mask = nullptr;
3335 if (C.Op0.getOpcode() == ISD::AND) {
3336 NewC.Op0 = C.Op0.getOperand(i: 0);
3337 NewC.Op1 = C.Op0.getOperand(i: 1);
3338 Mask = dyn_cast<ConstantSDNode>(Val&: NewC.Op1);
3339 if (!Mask)
3340 return;
3341 MaskVal = Mask->getZExtValue();
3342 } else {
3343 // There is no instruction to compare with a 64-bit immediate
3344 // so use TMHH instead if possible. We need an unsigned ordered
3345 // comparison with an i64 immediate.
3346 if (NewC.Op0.getValueType() != MVT::i64 ||
3347 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
3348 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
3349 NewC.ICmpType == SystemZICMP::SignedOnly)
3350 return;
3351 // Convert LE and GT comparisons into LT and GE.
3352 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
3353 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
3354 if (CmpVal == uint64_t(-1))
3355 return;
3356 CmpVal += 1;
3357 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
3358 }
3359 // If the low N bits of Op1 are zero than the low N bits of Op0 can
3360 // be masked off without changing the result.
3361 MaskVal = -(CmpVal & -CmpVal);
3362 NewC.ICmpType = SystemZICMP::UnsignedOnly;
3363 }
3364 if (!MaskVal)
3365 return;
3366
3367 // Check whether the combination of mask, comparison value and comparison
3368 // type are suitable.
3369 unsigned BitSize = NewC.Op0.getValueSizeInBits();
3370 unsigned NewCCMask, ShiftVal;
3371 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3372 NewC.Op0.getOpcode() == ISD::SHL &&
3373 isSimpleShift(N: NewC.Op0, ShiftVal) &&
3374 (MaskVal >> ShiftVal != 0) &&
3375 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
3376 (NewCCMask = getTestUnderMaskCond(BitSize, CCMask: NewC.CCMask,
3377 Mask: MaskVal >> ShiftVal,
3378 CmpVal: CmpVal >> ShiftVal,
3379 ICmpType: SystemZICMP::Any))) {
3380 NewC.Op0 = NewC.Op0.getOperand(i: 0);
3381 MaskVal >>= ShiftVal;
3382 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3383 NewC.Op0.getOpcode() == ISD::SRL &&
3384 isSimpleShift(N: NewC.Op0, ShiftVal) &&
3385 (MaskVal << ShiftVal != 0) &&
3386 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
3387 (NewCCMask = getTestUnderMaskCond(BitSize, CCMask: NewC.CCMask,
3388 Mask: MaskVal << ShiftVal,
3389 CmpVal: CmpVal << ShiftVal,
3390 ICmpType: SystemZICMP::UnsignedOnly))) {
3391 NewC.Op0 = NewC.Op0.getOperand(i: 0);
3392 MaskVal <<= ShiftVal;
3393 } else {
3394 NewCCMask = getTestUnderMaskCond(BitSize, CCMask: NewC.CCMask, Mask: MaskVal, CmpVal,
3395 ICmpType: NewC.ICmpType);
3396 if (!NewCCMask)
3397 return;
3398 }
3399
3400 // Go ahead and make the change.
3401 C.Opcode = SystemZISD::TM;
3402 C.Op0 = NewC.Op0;
3403 if (Mask && Mask->getZExtValue() == MaskVal)
3404 C.Op1 = SDValue(Mask, 0);
3405 else
3406 C.Op1 = DAG.getConstant(Val: MaskVal, DL, VT: C.Op0.getValueType());
3407 C.CCValid = SystemZ::CCMASK_TM;
3408 C.CCMask = NewCCMask;
3409}
3410
3411// Implement i128 comparison in vector registers.
3412static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
3413 Comparison &C) {
3414 if (C.Opcode != SystemZISD::ICMP)
3415 return;
3416 if (C.Op0.getValueType() != MVT::i128)
3417 return;
3418
3419 // Recognize vector comparison reductions.
3420 if ((C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3421 C.CCMask == SystemZ::CCMASK_CMP_NE) &&
3422 (isNullConstant(V: C.Op1) || isAllOnesConstant(V: C.Op1))) {
3423 bool CmpEq = C.CCMask == SystemZ::CCMASK_CMP_EQ;
3424 bool CmpNull = isNullConstant(V: C.Op1);
3425 SDValue Src = peekThroughBitcasts(V: C.Op0);
3426 if (Src.hasOneUse() && isBitwiseNot(V: Src)) {
3427 Src = Src.getOperand(i: 0);
3428 CmpNull = !CmpNull;
3429 }
3430 unsigned Opcode = 0;
3431 if (Src.hasOneUse()) {
3432 switch (Src.getOpcode()) {
3433 case SystemZISD::VICMPE: Opcode = SystemZISD::VICMPES; break;
3434 case SystemZISD::VICMPH: Opcode = SystemZISD::VICMPHS; break;
3435 case SystemZISD::VICMPHL: Opcode = SystemZISD::VICMPHLS; break;
3436 case SystemZISD::VFCMPE: Opcode = SystemZISD::VFCMPES; break;
3437 case SystemZISD::VFCMPH: Opcode = SystemZISD::VFCMPHS; break;
3438 case SystemZISD::VFCMPHE: Opcode = SystemZISD::VFCMPHES; break;
3439 default: break;
3440 }
3441 }
3442 if (Opcode) {
3443 C.Opcode = Opcode;
3444 C.Op0 = Src->getOperand(Num: 0);
3445 C.Op1 = Src->getOperand(Num: 1);
3446 C.CCValid = SystemZ::CCMASK_VCMP;
3447 C.CCMask = CmpNull ? SystemZ::CCMASK_VCMP_NONE : SystemZ::CCMASK_VCMP_ALL;
3448 if (!CmpEq)
3449 C.CCMask ^= C.CCValid;
3450 return;
3451 }
3452 }
3453
3454 // Everything below here is not useful if we have native i128 compares.
3455 if (DAG.getSubtarget<SystemZSubtarget>().hasVectorEnhancements3())
3456 return;
3457
3458 // (In-)Equality comparisons can be implemented via VCEQGS.
3459 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3460 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3461 C.Opcode = SystemZISD::VICMPES;
3462 C.Op0 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v2i64, Operand: C.Op0);
3463 C.Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v2i64, Operand: C.Op1);
3464 C.CCValid = SystemZ::CCMASK_VCMP;
3465 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3466 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3467 else
3468 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3469 return;
3470 }
3471
3472 // Normalize other comparisons to GT.
3473 bool Swap = false, Invert = false;
3474 switch (C.CCMask) {
3475 case SystemZ::CCMASK_CMP_GT: break;
3476 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
3477 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
3478 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
3479 default: llvm_unreachable("Invalid integer condition!");
3480 }
3481 if (Swap)
3482 std::swap(a&: C.Op0, b&: C.Op1);
3483
3484 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3485 C.Opcode = SystemZISD::UCMP128HI;
3486 else
3487 C.Opcode = SystemZISD::SCMP128HI;
3488 C.CCValid = SystemZ::CCMASK_ANY;
3489 C.CCMask = SystemZ::CCMASK_1;
3490
3491 if (Invert)
3492 C.CCMask ^= C.CCValid;
3493}
3494
3495// See whether the comparison argument contains a redundant AND
3496// and remove it if so. This sometimes happens due to the generic
3497// BRCOND expansion.
3498static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
3499 Comparison &C) {
3500 if (C.Op0.getOpcode() != ISD::AND)
3501 return;
3502 auto *Mask = dyn_cast<ConstantSDNode>(Val: C.Op0.getOperand(i: 1));
3503 if (!Mask || Mask->getValueSizeInBits(ResNo: 0) > 64)
3504 return;
3505 KnownBits Known = DAG.computeKnownBits(Op: C.Op0.getOperand(i: 0));
3506 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3507 return;
3508
3509 C.Op0 = C.Op0.getOperand(i: 0);
3510}
3511
3512// Return a Comparison that tests the condition-code result of intrinsic
3513// node Call against constant integer CC using comparison code Cond.
3514// Opcode is the opcode of the SystemZISD operation for the intrinsic
3515// and CCValid is the set of possible condition-code results.
3516static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3517 SDValue Call, unsigned CCValid, uint64_t CC,
3518 ISD::CondCode Cond) {
3519 Comparison C(Call, SDValue(), SDValue());
3520 C.Opcode = Opcode;
3521 C.CCValid = CCValid;
3522 if (Cond == ISD::SETEQ)
3523 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3524 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3525 else if (Cond == ISD::SETNE)
3526 // ...and the inverse of that.
3527 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3528 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3529 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3530 // always true for CC>3.
3531 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3532 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3533 // ...and the inverse of that.
3534 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3535 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3536 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3537 // always true for CC>3.
3538 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3539 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3540 // ...and the inverse of that.
3541 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3542 else
3543 llvm_unreachable("Unexpected integer comparison type");
3544 C.CCMask &= CCValid;
3545 return C;
3546}
3547
3548// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3549static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3550 ISD::CondCode Cond, const SDLoc &DL,
3551 SDValue Chain = SDValue(),
3552 bool IsSignaling = false) {
3553 if (CmpOp1.getOpcode() == ISD::Constant) {
3554 assert(!Chain);
3555 unsigned Opcode, CCValid;
3556 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3557 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(NUses: 1, Value: 0) &&
3558 isIntrinsicWithCCAndChain(Op: CmpOp0, Opcode, CCValid))
3559 return getIntrinsicCmp(DAG, Opcode, Call: CmpOp0, CCValid,
3560 CC: CmpOp1->getAsZExtVal(), Cond);
3561 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3562 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3563 isIntrinsicWithCC(Op: CmpOp0, Opcode, CCValid))
3564 return getIntrinsicCmp(DAG, Opcode, Call: CmpOp0, CCValid,
3565 CC: CmpOp1->getAsZExtVal(), Cond);
3566 }
3567 Comparison C(CmpOp0, CmpOp1, Chain);
3568 C.CCMask = CCMaskForCondCode(CC: Cond);
3569 if (C.Op0.getValueType().isFloatingPoint()) {
3570 C.CCValid = SystemZ::CCMASK_FCMP;
3571 if (!C.Chain)
3572 C.Opcode = SystemZISD::FCMP;
3573 else if (!IsSignaling)
3574 C.Opcode = SystemZISD::STRICT_FCMP;
3575 else
3576 C.Opcode = SystemZISD::STRICT_FCMPS;
3577 adjustForFNeg(C);
3578 } else {
3579 assert(!C.Chain);
3580 C.CCValid = SystemZ::CCMASK_ICMP;
3581 C.Opcode = SystemZISD::ICMP;
3582 // Choose the type of comparison. Equality and inequality tests can
3583 // use either signed or unsigned comparisons. The choice also doesn't
3584 // matter if both sign bits are known to be clear. In those cases we
3585 // want to give the main isel code the freedom to choose whichever
3586 // form fits best.
3587 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3588 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3589 (DAG.SignBitIsZero(Op: C.Op0) && DAG.SignBitIsZero(Op: C.Op1)))
3590 C.ICmpType = SystemZICMP::Any;
3591 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3592 C.ICmpType = SystemZICMP::UnsignedOnly;
3593 else
3594 C.ICmpType = SystemZICMP::SignedOnly;
3595 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3596 adjustForRedundantAnd(DAG, DL, C);
3597 adjustZeroCmp(DAG, DL, C);
3598 adjustSubwordCmp(DAG, DL, C);
3599 adjustForSubtraction(DAG, DL, C);
3600 adjustForLTGFR(C);
3601 adjustICmpTruncate(DAG, DL, C);
3602 }
3603
3604 if (shouldSwapCmpOperands(C)) {
3605 std::swap(a&: C.Op0, b&: C.Op1);
3606 C.CCMask = SystemZ::reverseCCMask(CCMask: C.CCMask);
3607 }
3608
3609 adjustForTestUnderMask(DAG, DL, C);
3610 adjustICmp128(DAG, DL, C);
3611 return C;
3612}
3613
3614// Emit the comparison instruction described by C.
3615static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3616 if (!C.Op1.getNode()) {
3617 SDNode *Node;
3618 switch (C.Op0.getOpcode()) {
3619 case ISD::INTRINSIC_W_CHAIN:
3620 Node = emitIntrinsicWithCCAndChain(DAG, Op: C.Op0, Opcode: C.Opcode);
3621 return SDValue(Node, 0);
3622 case ISD::INTRINSIC_WO_CHAIN:
3623 Node = emitIntrinsicWithCC(DAG, Op: C.Op0, Opcode: C.Opcode);
3624 return SDValue(Node, Node->getNumValues() - 1);
3625 default:
3626 llvm_unreachable("Invalid comparison operands");
3627 }
3628 }
3629 if (C.Opcode == SystemZISD::ICMP)
3630 return DAG.getNode(Opcode: SystemZISD::ICMP, DL, VT: MVT::i32, N1: C.Op0, N2: C.Op1,
3631 N3: DAG.getTargetConstant(Val: C.ICmpType, DL, VT: MVT::i32));
3632 if (C.Opcode == SystemZISD::TM) {
3633 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3634 bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
3635 return DAG.getNode(Opcode: SystemZISD::TM, DL, VT: MVT::i32, N1: C.Op0, N2: C.Op1,
3636 N3: DAG.getTargetConstant(Val: RegisterOnly, DL, VT: MVT::i32));
3637 }
3638 if (C.Opcode == SystemZISD::VICMPES ||
3639 C.Opcode == SystemZISD::VICMPHS ||
3640 C.Opcode == SystemZISD::VICMPHLS ||
3641 C.Opcode == SystemZISD::VFCMPES ||
3642 C.Opcode == SystemZISD::VFCMPHS ||
3643 C.Opcode == SystemZISD::VFCMPHES) {
3644 EVT IntVT = C.Op0.getValueType().changeVectorElementTypeToInteger();
3645 SDVTList VTs = DAG.getVTList(VT1: IntVT, VT2: MVT::i32);
3646 SDValue Val = DAG.getNode(Opcode: C.Opcode, DL, VTList: VTs, N1: C.Op0, N2: C.Op1);
3647 return SDValue(Val.getNode(), 1);
3648 }
3649 if (C.Chain) {
3650 SDVTList VTs = DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other);
3651 return DAG.getNode(Opcode: C.Opcode, DL, VTList: VTs, N1: C.Chain, N2: C.Op0, N3: C.Op1);
3652 }
3653 return DAG.getNode(Opcode: C.Opcode, DL, VT: MVT::i32, N1: C.Op0, N2: C.Op1);
3654}
3655
3656// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3657// 64 bits. Extend is the extension type to use. Store the high part
3658// in Hi and the low part in Lo.
3659static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3660 SDValue Op0, SDValue Op1, SDValue &Hi,
3661 SDValue &Lo) {
3662 Op0 = DAG.getNode(Opcode: Extend, DL, VT: MVT::i64, Operand: Op0);
3663 Op1 = DAG.getNode(Opcode: Extend, DL, VT: MVT::i64, Operand: Op1);
3664 SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL, VT: MVT::i64, N1: Op0, N2: Op1);
3665 Hi = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: Mul,
3666 N2: DAG.getConstant(Val: 32, DL, VT: MVT::i64));
3667 Hi = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Hi);
3668 Lo = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Mul);
3669}
3670
3671// Lower a binary operation that produces two VT results, one in each
3672// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3673// and Opcode performs the GR128 operation. Store the even register result
3674// in Even and the odd register result in Odd.
3675static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3676 unsigned Opcode, SDValue Op0, SDValue Op1,
3677 SDValue &Even, SDValue &Odd) {
3678 SDValue Result = DAG.getNode(Opcode, DL, VT: MVT::Untyped, N1: Op0, N2: Op1);
3679 bool Is32Bit = is32Bit(VT);
3680 Even = DAG.getTargetExtractSubreg(SRIdx: SystemZ::even128(Is32bit: Is32Bit), DL, VT, Operand: Result);
3681 Odd = DAG.getTargetExtractSubreg(SRIdx: SystemZ::odd128(Is32bit: Is32Bit), DL, VT, Operand: Result);
3682}
3683
3684// Return an i32 value that is 1 if the CC value produced by CCReg is
3685// in the mask CCMask and 0 otherwise. CC is known to have a value
3686// in CCValid, so other values can be ignored.
3687static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3688 unsigned CCValid, unsigned CCMask) {
3689 SDValue Ops[] = {DAG.getConstant(Val: 1, DL, VT: MVT::i32),
3690 DAG.getConstant(Val: 0, DL, VT: MVT::i32),
3691 DAG.getTargetConstant(Val: CCValid, DL, VT: MVT::i32),
3692 DAG.getTargetConstant(Val: CCMask, DL, VT: MVT::i32), CCReg};
3693 return DAG.getNode(Opcode: SystemZISD::SELECT_CCMASK, DL, VT: MVT::i32, Ops);
3694}
3695
3696// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3697// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3698// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3699// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3700// floating-point comparisons.
3701enum class CmpMode { Int, FP, StrictFP, SignalingFP };
3702static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) {
3703 switch (CC) {
3704 case ISD::SETOEQ:
3705 case ISD::SETEQ:
3706 switch (Mode) {
3707 case CmpMode::Int: return SystemZISD::VICMPE;
3708 case CmpMode::FP: return SystemZISD::VFCMPE;
3709 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3710 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3711 }
3712 llvm_unreachable("Bad mode");
3713
3714 case ISD::SETOGE:
3715 case ISD::SETGE:
3716 switch (Mode) {
3717 case CmpMode::Int: return 0;
3718 case CmpMode::FP: return SystemZISD::VFCMPHE;
3719 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3720 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3721 }
3722 llvm_unreachable("Bad mode");
3723
3724 case ISD::SETOGT:
3725 case ISD::SETGT:
3726 switch (Mode) {
3727 case CmpMode::Int: return SystemZISD::VICMPH;
3728 case CmpMode::FP: return SystemZISD::VFCMPH;
3729 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3730 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3731 }
3732 llvm_unreachable("Bad mode");
3733
3734 case ISD::SETUGT:
3735 switch (Mode) {
3736 case CmpMode::Int: return SystemZISD::VICMPHL;
3737 case CmpMode::FP: return 0;
3738 case CmpMode::StrictFP: return 0;
3739 case CmpMode::SignalingFP: return 0;
3740 }
3741 llvm_unreachable("Bad mode");
3742
3743 default:
3744 return 0;
3745 }
3746}
3747
3748// Return the SystemZISD vector comparison operation for CC or its inverse,
3749// or 0 if neither can be done directly. Indicate in Invert whether the
3750// result is for the inverse of CC. Mode is as above.
3751static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode,
3752 bool &Invert) {
3753 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3754 Invert = false;
3755 return Opcode;
3756 }
3757
3758 CC = ISD::getSetCCInverse(Operation: CC, Type: Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3759 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3760 Invert = true;
3761 return Opcode;
3762 }
3763
3764 return 0;
3765}
3766
3767// Return a v2f64 that contains the extended form of elements Start and Start+1
3768// of v4f32 value Op. If Chain is nonnull, return the strict form.
3769static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3770 SDValue Op, SDValue Chain) {
3771 int Mask[] = { Start, -1, Start + 1, -1 };
3772 Op = DAG.getVectorShuffle(VT: MVT::v4f32, dl: DL, N1: Op, N2: DAG.getUNDEF(VT: MVT::v4f32), Mask);
3773 if (Chain) {
3774 SDVTList VTs = DAG.getVTList(VT1: MVT::v2f64, VT2: MVT::Other);
3775 return DAG.getNode(Opcode: SystemZISD::STRICT_VEXTEND, DL, VTList: VTs, N1: Chain, N2: Op);
3776 }
3777 return DAG.getNode(Opcode: SystemZISD::VEXTEND, DL, VT: MVT::v2f64, Operand: Op);
3778}
3779
3780// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3781// producing a result of type VT. If Chain is nonnull, return the strict form.
3782SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3783 const SDLoc &DL, EVT VT,
3784 SDValue CmpOp0,
3785 SDValue CmpOp1,
3786 SDValue Chain) const {
3787 // There is no hardware support for v4f32 (unless we have the vector
3788 // enhancements facility 1), so extend the vector into two v2f64s
3789 // and compare those.
3790 if (CmpOp0.getValueType() == MVT::v4f32 &&
3791 !Subtarget.hasVectorEnhancements1()) {
3792 SDValue H0 = expandV4F32ToV2F64(DAG, Start: 0, DL, Op: CmpOp0, Chain);
3793 SDValue L0 = expandV4F32ToV2F64(DAG, Start: 2, DL, Op: CmpOp0, Chain);
3794 SDValue H1 = expandV4F32ToV2F64(DAG, Start: 0, DL, Op: CmpOp1, Chain);
3795 SDValue L1 = expandV4F32ToV2F64(DAG, Start: 2, DL, Op: CmpOp1, Chain);
3796 if (Chain) {
3797 SDVTList VTs = DAG.getVTList(VT1: MVT::v2i64, VT2: MVT::Other);
3798 SDValue HRes = DAG.getNode(Opcode, DL, VTList: VTs, N1: Chain, N2: H0, N3: H1);
3799 SDValue LRes = DAG.getNode(Opcode, DL, VTList: VTs, N1: Chain, N2: L0, N3: L1);
3800 SDValue Res = DAG.getNode(Opcode: SystemZISD::PACK, DL, VT, N1: HRes, N2: LRes);
3801 SDValue Chains[6] = { H0.getValue(R: 1), L0.getValue(R: 1),
3802 H1.getValue(R: 1), L1.getValue(R: 1),
3803 HRes.getValue(R: 1), LRes.getValue(R: 1) };
3804 SDValue NewChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: Chains);
3805 SDValue Ops[2] = { Res, NewChain };
3806 return DAG.getMergeValues(Ops, dl: DL);
3807 }
3808 SDValue HRes = DAG.getNode(Opcode, DL, VT: MVT::v2i64, N1: H0, N2: H1);
3809 SDValue LRes = DAG.getNode(Opcode, DL, VT: MVT::v2i64, N1: L0, N2: L1);
3810 return DAG.getNode(Opcode: SystemZISD::PACK, DL, VT, N1: HRes, N2: LRes);
3811 }
3812 if (Chain) {
3813 SDVTList VTs = DAG.getVTList(VT1: VT, VT2: MVT::Other);
3814 return DAG.getNode(Opcode, DL, VTList: VTs, N1: Chain, N2: CmpOp0, N3: CmpOp1);
3815 }
3816 return DAG.getNode(Opcode, DL, VT, N1: CmpOp0, N2: CmpOp1);
3817}
3818
3819// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3820// an integer mask of type VT. If Chain is nonnull, we have a strict
3821// floating-point comparison. If in addition IsSignaling is true, we have
3822// a strict signaling floating-point comparison.
3823SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3824 const SDLoc &DL, EVT VT,
3825 ISD::CondCode CC,
3826 SDValue CmpOp0,
3827 SDValue CmpOp1,
3828 SDValue Chain,
3829 bool IsSignaling) const {
3830 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3831 assert (!Chain || IsFP);
3832 assert (!IsSignaling || Chain);
3833 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3834 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3835 bool Invert = false;
3836 SDValue Cmp;
3837 switch (CC) {
3838 // Handle tests for order using (or (ogt y x) (oge x y)).
3839 case ISD::SETUO:
3840 Invert = true;
3841 [[fallthrough]];
3842 case ISD::SETO: {
3843 assert(IsFP && "Unexpected integer comparison");
3844 SDValue LT = getVectorCmp(DAG, Opcode: getVectorComparison(CC: ISD::SETOGT, Mode),
3845 DL, VT, CmpOp0: CmpOp1, CmpOp1: CmpOp0, Chain);
3846 SDValue GE = getVectorCmp(DAG, Opcode: getVectorComparison(CC: ISD::SETOGE, Mode),
3847 DL, VT, CmpOp0, CmpOp1, Chain);
3848 Cmp = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: LT, N2: GE);
3849 if (Chain)
3850 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other,
3851 N1: LT.getValue(R: 1), N2: GE.getValue(R: 1));
3852 break;
3853 }
3854
3855 // Handle <> tests using (or (ogt y x) (ogt x y)).
3856 case ISD::SETUEQ:
3857 Invert = true;
3858 [[fallthrough]];
3859 case ISD::SETONE: {
3860 assert(IsFP && "Unexpected integer comparison");
3861 SDValue LT = getVectorCmp(DAG, Opcode: getVectorComparison(CC: ISD::SETOGT, Mode),
3862 DL, VT, CmpOp0: CmpOp1, CmpOp1: CmpOp0, Chain);
3863 SDValue GT = getVectorCmp(DAG, Opcode: getVectorComparison(CC: ISD::SETOGT, Mode),
3864 DL, VT, CmpOp0, CmpOp1, Chain);
3865 Cmp = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: LT, N2: GT);
3866 if (Chain)
3867 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other,
3868 N1: LT.getValue(R: 1), N2: GT.getValue(R: 1));
3869 break;
3870 }
3871
3872 // Otherwise a single comparison is enough. It doesn't really
3873 // matter whether we try the inversion or the swap first, since
3874 // there are no cases where both work.
3875 default:
3876 // Optimize sign-bit comparisons to signed compares.
3877 if (Mode == CmpMode::Int && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
3878 ISD::isConstantSplatVectorAllZeros(N: CmpOp1.getNode())) {
3879 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3880 APInt Mask;
3881 if (CmpOp0.getOpcode() == ISD::AND
3882 && ISD::isConstantSplatVector(N: CmpOp0.getOperand(i: 1).getNode(), SplatValue&: Mask)
3883 && Mask == APInt::getSignMask(BitWidth: EltSize)) {
3884 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
3885 CmpOp0 = CmpOp0.getOperand(i: 0);
3886 }
3887 }
3888 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3889 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3890 else {
3891 CC = ISD::getSetCCSwappedOperands(Operation: CC);
3892 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3893 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0: CmpOp1, CmpOp1: CmpOp0, Chain);
3894 else
3895 llvm_unreachable("Unhandled comparison");
3896 }
3897 if (Chain)
3898 Chain = Cmp.getValue(R: 1);
3899 break;
3900 }
3901 if (Invert) {
3902 SDValue Mask =
3903 DAG.getSplatBuildVector(VT, DL, Op: DAG.getAllOnesConstant(DL, VT: MVT::i64));
3904 Cmp = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Cmp, N2: Mask);
3905 }
3906 if (Chain && Chain.getNode() != Cmp.getNode()) {
3907 SDValue Ops[2] = { Cmp, Chain };
3908 Cmp = DAG.getMergeValues(Ops, dl: DL);
3909 }
3910 return Cmp;
3911}
3912
3913SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3914 SelectionDAG &DAG) const {
3915 SDValue CmpOp0 = Op.getOperand(i: 0);
3916 SDValue CmpOp1 = Op.getOperand(i: 1);
3917 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
3918 SDLoc DL(Op);
3919 EVT VT = Op.getValueType();
3920 if (VT.isVector())
3921 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3922
3923 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, Cond: CC, DL));
3924 SDValue CCReg = emitCmp(DAG, DL, C);
3925 return emitSETCC(DAG, DL, CCReg, CCValid: C.CCValid, CCMask: C.CCMask);
3926}
3927
3928SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3929 SelectionDAG &DAG,
3930 bool IsSignaling) const {
3931 SDValue Chain = Op.getOperand(i: 0);
3932 SDValue CmpOp0 = Op.getOperand(i: 1);
3933 SDValue CmpOp1 = Op.getOperand(i: 2);
3934 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 3))->get();
3935 SDLoc DL(Op);
3936 EVT VT = Op.getNode()->getValueType(ResNo: 0);
3937 if (VT.isVector()) {
3938 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3939 Chain, IsSignaling);
3940 return Res.getValue(R: Op.getResNo());
3941 }
3942
3943 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, Cond: CC, DL, Chain, IsSignaling));
3944 SDValue CCReg = emitCmp(DAG, DL, C);
3945 CCReg->setFlags(Op->getFlags());
3946 SDValue Result = emitSETCC(DAG, DL, CCReg, CCValid: C.CCValid, CCMask: C.CCMask);
3947 SDValue Ops[2] = { Result, CCReg.getValue(R: 1) };
3948 return DAG.getMergeValues(Ops, dl: DL);
3949}
3950
3951SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3952 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 1))->get();
3953 SDValue CmpOp0 = Op.getOperand(i: 2);
3954 SDValue CmpOp1 = Op.getOperand(i: 3);
3955 SDValue Dest = Op.getOperand(i: 4);
3956 SDLoc DL(Op);
3957
3958 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, Cond: CC, DL));
3959 SDValue CCReg = emitCmp(DAG, DL, C);
3960 return DAG.getNode(
3961 Opcode: SystemZISD::BR_CCMASK, DL, VT: Op.getValueType(), N1: Op.getOperand(i: 0),
3962 N2: DAG.getTargetConstant(Val: C.CCValid, DL, VT: MVT::i32),
3963 N3: DAG.getTargetConstant(Val: C.CCMask, DL, VT: MVT::i32), N4: Dest, N5: CCReg);
3964}
3965
3966// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3967// allowing Pos and Neg to be wider than CmpOp.
3968static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3969 return (Neg.getOpcode() == ISD::SUB &&
3970 Neg.getOperand(i: 0).getOpcode() == ISD::Constant &&
3971 Neg.getConstantOperandVal(i: 0) == 0 && Neg.getOperand(i: 1) == Pos &&
3972 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3973 Pos.getOperand(i: 0) == CmpOp)));
3974}
3975
3976// Return the absolute or negative absolute of Op; IsNegative decides which.
3977static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op,
3978 bool IsNegative) {
3979 Op = DAG.getNode(Opcode: ISD::ABS, DL, VT: Op.getValueType(), Operand: Op);
3980 if (IsNegative)
3981 Op = DAG.getNode(Opcode: ISD::SUB, DL, VT: Op.getValueType(),
3982 N1: DAG.getConstant(Val: 0, DL, VT: Op.getValueType()), N2: Op);
3983 return Op;
3984}
3985
3986static SDValue getI128Select(SelectionDAG &DAG, const SDLoc &DL,
3987 Comparison C, SDValue TrueOp, SDValue FalseOp) {
3988 EVT VT = MVT::i128;
3989 unsigned Op;
3990
3991 if (C.CCMask == SystemZ::CCMASK_CMP_NE ||
3992 C.CCMask == SystemZ::CCMASK_CMP_GE ||
3993 C.CCMask == SystemZ::CCMASK_CMP_LE) {
3994 std::swap(a&: TrueOp, b&: FalseOp);
3995 C.CCMask ^= C.CCValid;
3996 }
3997 if (C.CCMask == SystemZ::CCMASK_CMP_LT) {
3998 std::swap(a&: C.Op0, b&: C.Op1);
3999 C.CCMask = SystemZ::CCMASK_CMP_GT;
4000 }
4001 switch (C.CCMask) {
4002 case SystemZ::CCMASK_CMP_EQ:
4003 Op = SystemZISD::VICMPE;
4004 break;
4005 case SystemZ::CCMASK_CMP_GT:
4006 if (C.ICmpType == SystemZICMP::UnsignedOnly)
4007 Op = SystemZISD::VICMPHL;
4008 else
4009 Op = SystemZISD::VICMPH;
4010 break;
4011 default:
4012 llvm_unreachable("Unhandled comparison");
4013 break;
4014 }
4015
4016 SDValue Mask = DAG.getNode(Opcode: Op, DL, VT, N1: C.Op0, N2: C.Op1);
4017 TrueOp = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: TrueOp, N2: Mask);
4018 FalseOp = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: FalseOp, N2: DAG.getNOT(DL, Val: Mask, VT));
4019 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: TrueOp, N2: FalseOp);
4020}
4021
4022SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
4023 SelectionDAG &DAG) const {
4024 SDValue CmpOp0 = Op.getOperand(i: 0);
4025 SDValue CmpOp1 = Op.getOperand(i: 1);
4026 SDValue TrueOp = Op.getOperand(i: 2);
4027 SDValue FalseOp = Op.getOperand(i: 3);
4028 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 4))->get();
4029 SDLoc DL(Op);
4030
4031 // SELECT_CC involving f16 will not have the cmp-ops promoted by the
4032 // legalizer, as it will be handled according to the type of the resulting
4033 // value. Extend them here if needed.
4034 if (CmpOp0.getSimpleValueType() == MVT::f16) {
4035 CmpOp0 = DAG.getFPExtendOrRound(Op: CmpOp0, DL: SDLoc(CmpOp0), VT: MVT::f32);
4036 CmpOp1 = DAG.getFPExtendOrRound(Op: CmpOp1, DL: SDLoc(CmpOp1), VT: MVT::f32);
4037 }
4038
4039 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, Cond: CC, DL));
4040
4041 // Check for absolute and negative-absolute selections, including those
4042 // where the comparison value is sign-extended (for LPGFR and LNGFR).
4043 // This check supplements the one in DAGCombiner.
4044 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
4045 C.CCMask != SystemZ::CCMASK_CMP_NE &&
4046 C.Op1.getOpcode() == ISD::Constant &&
4047 cast<ConstantSDNode>(Val&: C.Op1)->getValueSizeInBits(ResNo: 0) <= 64 &&
4048 C.Op1->getAsZExtVal() == 0) {
4049 if (isAbsolute(CmpOp: C.Op0, Pos: TrueOp, Neg: FalseOp))
4050 return getAbsolute(DAG, DL, Op: TrueOp, IsNegative: C.CCMask & SystemZ::CCMASK_CMP_LT);
4051 if (isAbsolute(CmpOp: C.Op0, Pos: FalseOp, Neg: TrueOp))
4052 return getAbsolute(DAG, DL, Op: FalseOp, IsNegative: C.CCMask & SystemZ::CCMASK_CMP_GT);
4053 }
4054
4055 if (Subtarget.hasVectorEnhancements3() &&
4056 C.Opcode == SystemZISD::ICMP &&
4057 C.Op0.getValueType() == MVT::i128 &&
4058 TrueOp.getValueType() == MVT::i128) {
4059 return getI128Select(DAG, DL, C, TrueOp, FalseOp);
4060 }
4061
4062 SDValue CCReg = emitCmp(DAG, DL, C);
4063 SDValue Ops[] = {TrueOp, FalseOp,
4064 DAG.getTargetConstant(Val: C.CCValid, DL, VT: MVT::i32),
4065 DAG.getTargetConstant(Val: C.CCMask, DL, VT: MVT::i32), CCReg};
4066
4067 return DAG.getNode(Opcode: SystemZISD::SELECT_CCMASK, DL, VT: Op.getValueType(), Ops);
4068}
4069
4070SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
4071 SelectionDAG &DAG) const {
4072 SDLoc DL(Node);
4073 const GlobalValue *GV = Node->getGlobal();
4074 int64_t Offset = Node->getOffset();
4075 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
4076 CodeModel::Model CM = DAG.getTarget().getCodeModel();
4077
4078 SDValue Result;
4079 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
4080 if (isInt<32>(x: Offset)) {
4081 // Assign anchors at 1<<12 byte boundaries.
4082 uint64_t Anchor = Offset & ~uint64_t(0xfff);
4083 Result = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: Anchor);
4084 Result = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Result);
4085
4086 // The offset can be folded into the address if it is aligned to a
4087 // halfword.
4088 Offset -= Anchor;
4089 if (Offset != 0 && (Offset & 1) == 0) {
4090 SDValue Full =
4091 DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: Anchor + Offset);
4092 Result = DAG.getNode(Opcode: SystemZISD::PCREL_OFFSET, DL, VT: PtrVT, N1: Full, N2: Result);
4093 Offset = 0;
4094 }
4095 } else {
4096 // Conservatively load a constant offset greater than 32 bits into a
4097 // register below.
4098 Result = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT);
4099 Result = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Result);
4100 }
4101 } else if (Subtarget.isTargetELF()) {
4102 Result = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: 0, TargetFlags: SystemZII::MO_GOT);
4103 Result = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Result);
4104 Result = DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: Result,
4105 PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction()));
4106 } else if (Subtarget.isTargetzOS()) {
4107 Result = getADAEntry(DAG, GV, DL, PtrVT);
4108 } else
4109 llvm_unreachable("Unexpected Subtarget");
4110
4111 // If there was a non-zero offset that we didn't fold, create an explicit
4112 // addition for it.
4113 if (Offset != 0)
4114 Result = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Result,
4115 N2: DAG.getSignedConstant(Val: Offset, DL, VT: PtrVT));
4116
4117 return Result;
4118}
4119
4120SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
4121 SelectionDAG &DAG,
4122 unsigned Opcode,
4123 SDValue GOTOffset) const {
4124 SDLoc DL(Node);
4125 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
4126 SDValue Chain = DAG.getEntryNode();
4127 SDValue Glue;
4128
4129 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
4130 CallingConv::GHC)
4131 report_fatal_error(reason: "In GHC calling convention TLS is not supported");
4132
4133 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
4134 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(VT: PtrVT);
4135 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: SystemZ::R12D, N: GOT, Glue);
4136 Glue = Chain.getValue(R: 1);
4137 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: SystemZ::R2D, N: GOTOffset, Glue);
4138 Glue = Chain.getValue(R: 1);
4139
4140 // The first call operand is the chain and the second is the TLS symbol.
4141 SmallVector<SDValue, 8> Ops;
4142 Ops.push_back(Elt: Chain);
4143 Ops.push_back(Elt: DAG.getTargetGlobalAddress(GV: Node->getGlobal(), DL,
4144 VT: Node->getValueType(ResNo: 0),
4145 offset: 0, TargetFlags: 0));
4146
4147 // Add argument registers to the end of the list so that they are
4148 // known live into the call.
4149 Ops.push_back(Elt: DAG.getRegister(Reg: SystemZ::R2D, VT: PtrVT));
4150 Ops.push_back(Elt: DAG.getRegister(Reg: SystemZ::R12D, VT: PtrVT));
4151
4152 // Add a register mask operand representing the call-preserved registers.
4153 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4154 const uint32_t *Mask =
4155 TRI->getCallPreservedMask(MF: DAG.getMachineFunction(), CallingConv::C);
4156 assert(Mask && "Missing call preserved mask for calling convention");
4157 Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
4158
4159 // Glue the call to the argument copies.
4160 Ops.push_back(Elt: Glue);
4161
4162 // Emit the call.
4163 SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
4164 Chain = DAG.getNode(Opcode, DL, VTList: NodeTys, Ops);
4165 Glue = Chain.getValue(R: 1);
4166
4167 // Copy the return value from %r2.
4168 return DAG.getCopyFromReg(Chain, dl: DL, Reg: SystemZ::R2D, VT: PtrVT, Glue);
4169}
4170
4171SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
4172 SelectionDAG &DAG) const {
4173 SDValue Chain = DAG.getEntryNode();
4174 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
4175
4176 // The high part of the thread pointer is in access register 0.
4177 SDValue TPHi = DAG.getCopyFromReg(Chain, dl: DL, Reg: SystemZ::A0, VT: MVT::i32);
4178 TPHi = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: PtrVT, Operand: TPHi);
4179
4180 // The low part of the thread pointer is in access register 1.
4181 SDValue TPLo = DAG.getCopyFromReg(Chain, dl: DL, Reg: SystemZ::A1, VT: MVT::i32);
4182 TPLo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: PtrVT, Operand: TPLo);
4183
4184 // Merge them into a single 64-bit address.
4185 SDValue TPHiShifted = DAG.getNode(Opcode: ISD::SHL, DL, VT: PtrVT, N1: TPHi,
4186 N2: DAG.getConstant(Val: 32, DL, VT: PtrVT));
4187 return DAG.getNode(Opcode: ISD::OR, DL, VT: PtrVT, N1: TPHiShifted, N2: TPLo);
4188}
4189
4190SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
4191 SelectionDAG &DAG) const {
4192 if (DAG.getTarget().useEmulatedTLS())
4193 return LowerToTLSEmulatedModel(GA: Node, DAG);
4194 SDLoc DL(Node);
4195 const GlobalValue *GV = Node->getGlobal();
4196 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
4197 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
4198
4199 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
4200 CallingConv::GHC)
4201 report_fatal_error(reason: "In GHC calling convention TLS is not supported");
4202
4203 SDValue TP = lowerThreadPointer(DL, DAG);
4204
4205 // Get the offset of GA from the thread pointer, based on the TLS model.
4206 SDValue Offset;
4207 switch (model) {
4208 case TLSModel::GeneralDynamic: {
4209 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
4210 SystemZConstantPoolValue *CPV =
4211 SystemZConstantPoolValue::Create(GV, Modifier: SystemZCP::TLSGD);
4212
4213 Offset = DAG.getConstantPool(C: CPV, VT: PtrVT, Align: Align(8));
4214 Offset = DAG.getLoad(
4215 VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: Offset,
4216 PtrInfo: MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction()));
4217
4218 // Call __tls_get_offset to retrieve the offset.
4219 Offset = lowerTLSGetOffset(Node, DAG, Opcode: SystemZISD::TLS_GDCALL, GOTOffset: Offset);
4220 break;
4221 }
4222
4223 case TLSModel::LocalDynamic: {
4224 // Load the GOT offset of the module ID.
4225 SystemZConstantPoolValue *CPV =
4226 SystemZConstantPoolValue::Create(GV, Modifier: SystemZCP::TLSLDM);
4227
4228 Offset = DAG.getConstantPool(C: CPV, VT: PtrVT, Align: Align(8));
4229 Offset = DAG.getLoad(
4230 VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: Offset,
4231 PtrInfo: MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction()));
4232
4233 // Call __tls_get_offset to retrieve the module base offset.
4234 Offset = lowerTLSGetOffset(Node, DAG, Opcode: SystemZISD::TLS_LDCALL, GOTOffset: Offset);
4235
4236 // Note: The SystemZLDCleanupPass will remove redundant computations
4237 // of the module base offset. Count total number of local-dynamic
4238 // accesses to trigger execution of that pass.
4239 SystemZMachineFunctionInfo* MFI =
4240 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
4241 MFI->incNumLocalDynamicTLSAccesses();
4242
4243 // Add the per-symbol offset.
4244 CPV = SystemZConstantPoolValue::Create(GV, Modifier: SystemZCP::DTPOFF);
4245
4246 SDValue DTPOffset = DAG.getConstantPool(C: CPV, VT: PtrVT, Align: Align(8));
4247 DTPOffset = DAG.getLoad(
4248 VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: DTPOffset,
4249 PtrInfo: MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction()));
4250
4251 Offset = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Offset, N2: DTPOffset);
4252 break;
4253 }
4254
4255 case TLSModel::InitialExec: {
4256 // Load the offset from the GOT.
4257 Offset = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: 0,
4258 TargetFlags: SystemZII::MO_INDNTPOFF);
4259 Offset = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Offset);
4260 Offset =
4261 DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: Offset,
4262 PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction()));
4263 break;
4264 }
4265
4266 case TLSModel::LocalExec: {
4267 // Force the offset into the constant pool and load it from there.
4268 SystemZConstantPoolValue *CPV =
4269 SystemZConstantPoolValue::Create(GV, Modifier: SystemZCP::NTPOFF);
4270
4271 Offset = DAG.getConstantPool(C: CPV, VT: PtrVT, Align: Align(8));
4272 Offset = DAG.getLoad(
4273 VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: Offset,
4274 PtrInfo: MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction()));
4275 break;
4276 }
4277 }
4278
4279 // Add the base and offset together.
4280 return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: TP, N2: Offset);
4281}
4282
4283SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
4284 SelectionDAG &DAG) const {
4285 SDLoc DL(Node);
4286 const BlockAddress *BA = Node->getBlockAddress();
4287 int64_t Offset = Node->getOffset();
4288 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
4289
4290 SDValue Result = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset);
4291 Result = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Result);
4292 return Result;
4293}
4294
4295SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
4296 SelectionDAG &DAG) const {
4297 SDLoc DL(JT);
4298 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
4299 SDValue Result = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT);
4300
4301 // Use LARL to load the address of the table.
4302 return DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Result);
4303}
4304
4305SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
4306 SelectionDAG &DAG) const {
4307 SDLoc DL(CP);
4308 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
4309
4310 SDValue Result;
4311 if (CP->isMachineConstantPoolEntry())
4312 Result =
4313 DAG.getTargetConstantPool(C: CP->getMachineCPVal(), VT: PtrVT, Align: CP->getAlign());
4314 else
4315 Result = DAG.getTargetConstantPool(C: CP->getConstVal(), VT: PtrVT, Align: CP->getAlign(),
4316 Offset: CP->getOffset());
4317
4318 // Use LARL to load the address of the constant pool entry.
4319 return DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Result);
4320}
4321
4322SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
4323 SelectionDAG &DAG) const {
4324 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4325 MachineFunction &MF = DAG.getMachineFunction();
4326 MachineFrameInfo &MFI = MF.getFrameInfo();
4327 MFI.setFrameAddressIsTaken(true);
4328
4329 SDLoc DL(Op);
4330 unsigned Depth = Op.getConstantOperandVal(i: 0);
4331 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
4332
4333 // By definition, the frame address is the address of the back chain. (In
4334 // the case of packed stack without backchain, return the address where the
4335 // backchain would have been stored. This will either be an unused space or
4336 // contain a saved register).
4337 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
4338 SDValue BackChain = DAG.getFrameIndex(FI: BackChainIdx, VT: PtrVT);
4339
4340 if (Depth > 0) {
4341 // FIXME The frontend should detect this case.
4342 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4343 report_fatal_error(reason: "Unsupported stack frame traversal count");
4344
4345 SDValue Offset = DAG.getConstant(Val: TFL->getBackchainOffset(MF), DL, VT: PtrVT);
4346 while (Depth--) {
4347 BackChain = DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: BackChain,
4348 PtrInfo: MachinePointerInfo());
4349 BackChain = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: BackChain, N2: Offset);
4350 }
4351 }
4352
4353 return BackChain;
4354}
4355
4356SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
4357 SelectionDAG &DAG) const {
4358 MachineFunction &MF = DAG.getMachineFunction();
4359 MachineFrameInfo &MFI = MF.getFrameInfo();
4360 MFI.setReturnAddressIsTaken(true);
4361
4362 SDLoc DL(Op);
4363 unsigned Depth = Op.getConstantOperandVal(i: 0);
4364 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
4365
4366 if (Depth > 0) {
4367 // FIXME The frontend should detect this case.
4368 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4369 report_fatal_error(reason: "Unsupported stack frame traversal count");
4370
4371 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
4372 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4373 int Offset = TFL->getReturnAddressOffset(MF);
4374 SDValue Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: FrameAddr,
4375 N2: DAG.getSignedConstant(Val: Offset, DL, VT: PtrVT));
4376 return DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr,
4377 PtrInfo: MachinePointerInfo());
4378 }
4379
4380 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an
4381 // implicit live-in.
4382 SystemZCallingConventionRegisters *CCR = Subtarget.getSpecialRegisters();
4383 Register LinkReg = MF.addLiveIn(PReg: CCR->getReturnFunctionAddressRegister(),
4384 RC: &SystemZ::GR64BitRegClass);
4385 return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: LinkReg, VT: PtrVT);
4386}
4387
4388SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
4389 SelectionDAG &DAG) const {
4390 SDLoc DL(Op);
4391 SDValue In = Op.getOperand(i: 0);
4392 EVT InVT = In.getValueType();
4393 EVT ResVT = Op.getValueType();
4394
4395 // Convert loads directly. This is normally done by DAGCombiner,
4396 // but we need this case for bitcasts that are created during lowering
4397 // and which are then lowered themselves.
4398 if (auto *LoadN = dyn_cast<LoadSDNode>(Val&: In))
4399 if (ISD::isNormalLoad(N: LoadN)) {
4400 SDValue NewLoad = DAG.getLoad(VT: ResVT, dl: DL, Chain: LoadN->getChain(),
4401 Ptr: LoadN->getBasePtr(), MMO: LoadN->getMemOperand());
4402 // Update the chain uses.
4403 DAG.ReplaceAllUsesOfValueWith(From: SDValue(LoadN, 1), To: NewLoad.getValue(R: 1));
4404 return NewLoad;
4405 }
4406
4407 if (InVT == MVT::i32 && ResVT == MVT::f32) {
4408 SDValue In64;
4409 if (Subtarget.hasHighWord()) {
4410 SDNode *U64 = DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL,
4411 VT: MVT::i64);
4412 In64 = DAG.getTargetInsertSubreg(SRIdx: SystemZ::subreg_h32, DL,
4413 VT: MVT::i64, Operand: SDValue(U64, 0), Subreg: In);
4414 } else {
4415 In64 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: In);
4416 In64 = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, N1: In64,
4417 N2: DAG.getConstant(Val: 32, DL, VT: MVT::i64));
4418 }
4419 SDValue Out64 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::f64, Operand: In64);
4420 return DAG.getTargetExtractSubreg(SRIdx: SystemZ::subreg_h32,
4421 DL, VT: MVT::f32, Operand: Out64);
4422 }
4423 if (InVT == MVT::f32 && ResVT == MVT::i32) {
4424 SDNode *U64 = DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::f64);
4425 SDValue In64 = DAG.getTargetInsertSubreg(SRIdx: SystemZ::subreg_h32, DL,
4426 VT: MVT::f64, Operand: SDValue(U64, 0), Subreg: In);
4427 SDValue Out64 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i64, Operand: In64);
4428 if (Subtarget.hasHighWord())
4429 return DAG.getTargetExtractSubreg(SRIdx: SystemZ::subreg_h32, DL,
4430 VT: MVT::i32, Operand: Out64);
4431 SDValue Shift = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: Out64,
4432 N2: DAG.getConstant(Val: 32, DL, VT: MVT::i64));
4433 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Shift);
4434 }
4435 llvm_unreachable("Unexpected bitcast combination");
4436}
4437
4438SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
4439 SelectionDAG &DAG) const {
4440
4441 if (Subtarget.isTargetXPLINK64())
4442 return lowerVASTART_XPLINK(Op, DAG);
4443 else
4444 return lowerVASTART_ELF(Op, DAG);
4445}
4446
4447SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
4448 SelectionDAG &DAG) const {
4449 MachineFunction &MF = DAG.getMachineFunction();
4450 SystemZMachineFunctionInfo *FuncInfo =
4451 MF.getInfo<SystemZMachineFunctionInfo>();
4452
4453 SDLoc DL(Op);
4454
4455 // vastart just stores the address of the VarArgsFrameIndex slot into the
4456 // memory location argument.
4457 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
4458 SDValue FR = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT);
4459 const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue();
4460 return DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL, Val: FR, Ptr: Op.getOperand(i: 1),
4461 PtrInfo: MachinePointerInfo(SV));
4462}
4463
4464SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
4465 SelectionDAG &DAG) const {
4466 MachineFunction &MF = DAG.getMachineFunction();
4467 SystemZMachineFunctionInfo *FuncInfo =
4468 MF.getInfo<SystemZMachineFunctionInfo>();
4469 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
4470
4471 SDValue Chain = Op.getOperand(i: 0);
4472 SDValue Addr = Op.getOperand(i: 1);
4473 const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue();
4474 SDLoc DL(Op);
4475
4476 // The initial values of each field.
4477 const unsigned NumFields = 4;
4478 SDValue Fields[NumFields] = {
4479 DAG.getConstant(Val: FuncInfo->getVarArgsFirstGPR(), DL, VT: PtrVT),
4480 DAG.getConstant(Val: FuncInfo->getVarArgsFirstFPR(), DL, VT: PtrVT),
4481 DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT),
4482 DAG.getFrameIndex(FI: FuncInfo->getRegSaveFrameIndex(), VT: PtrVT)
4483 };
4484
4485 // Store each field into its respective slot.
4486 SDValue MemOps[NumFields];
4487 unsigned Offset = 0;
4488 for (unsigned I = 0; I < NumFields; ++I) {
4489 SDValue FieldAddr = Addr;
4490 if (Offset != 0)
4491 FieldAddr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: FieldAddr,
4492 N2: DAG.getIntPtrConstant(Val: Offset, DL));
4493 MemOps[I] = DAG.getStore(Chain, dl: DL, Val: Fields[I], Ptr: FieldAddr,
4494 PtrInfo: MachinePointerInfo(SV, Offset));
4495 Offset += 8;
4496 }
4497 return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOps);
4498}
4499
4500SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
4501 SelectionDAG &DAG) const {
4502 SDValue Chain = Op.getOperand(i: 0);
4503 SDValue DstPtr = Op.getOperand(i: 1);
4504 SDValue SrcPtr = Op.getOperand(i: 2);
4505 const Value *DstSV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 3))->getValue();
4506 const Value *SrcSV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 4))->getValue();
4507 SDLoc DL(Op);
4508
4509 uint32_t Sz =
4510 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(AS: 0) : 32;
4511 return DAG.getMemcpy(Chain, dl: DL, Dst: DstPtr, Src: SrcPtr, Size: DAG.getIntPtrConstant(Val: Sz, DL),
4512 Alignment: Align(8), /*isVolatile*/ isVol: false, /*AlwaysInline*/ false,
4513 /*CI=*/nullptr, OverrideTailCall: std::nullopt, DstPtrInfo: MachinePointerInfo(DstSV),
4514 SrcPtrInfo: MachinePointerInfo(SrcSV));
4515}
4516
4517SDValue
4518SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
4519 SelectionDAG &DAG) const {
4520 if (Subtarget.isTargetXPLINK64())
4521 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
4522 else
4523 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
4524}
4525
4526SDValue
4527SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
4528 SelectionDAG &DAG) const {
4529 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4530 MachineFunction &MF = DAG.getMachineFunction();
4531 bool RealignOpt = !MF.getFunction().hasFnAttribute(Kind: "no-realign-stack");
4532 SDValue Chain = Op.getOperand(i: 0);
4533 SDValue Size = Op.getOperand(i: 1);
4534 SDValue Align = Op.getOperand(i: 2);
4535 SDLoc DL(Op);
4536
4537 // If user has set the no alignment function attribute, ignore
4538 // alloca alignments.
4539 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4540
4541 uint64_t StackAlign = TFI->getStackAlignment();
4542 uint64_t RequiredAlign = std::max(a: AlignVal, b: StackAlign);
4543 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4544
4545 SDValue NeededSpace = Size;
4546
4547 // Add extra space for alignment if needed.
4548 EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
4549 if (ExtraAlignSpace)
4550 NeededSpace = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: NeededSpace,
4551 N2: DAG.getConstant(Val: ExtraAlignSpace, DL, VT: PtrVT));
4552
4553 bool IsSigned = false;
4554 bool DoesNotReturn = false;
4555 bool IsReturnValueUsed = false;
4556 EVT VT = Op.getValueType();
4557 SDValue AllocaCall =
4558 makeExternalCall(Chain, DAG, CalleeName: "@@ALCAXP", RetVT: VT, Ops: ArrayRef(NeededSpace),
4559 CallConv: CallingConv::C, IsSigned, DL, DoesNotReturn,
4560 IsReturnValueUsed)
4561 .first;
4562
4563 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
4564 // to end of call in order to ensure it isn't broken up from the call
4565 // sequence.
4566 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
4567 Register SPReg = Regs.getStackPointerRegister();
4568 Chain = AllocaCall.getValue(R: 1);
4569 SDValue Glue = AllocaCall.getValue(R: 2);
4570 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, dl: DL, Reg: SPReg, VT: PtrVT, Glue);
4571 Chain = NewSPRegNode.getValue(R: 1);
4572
4573 MVT PtrMVT = getPointerMemTy(DL: MF.getDataLayout());
4574 SDValue ArgAdjust = DAG.getNode(Opcode: SystemZISD::ADJDYNALLOC, DL, VT: PtrMVT);
4575 SDValue Result = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrMVT, N1: NewSPRegNode, N2: ArgAdjust);
4576
4577 // Dynamically realign if needed.
4578 if (ExtraAlignSpace) {
4579 Result = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Result,
4580 N2: DAG.getConstant(Val: ExtraAlignSpace, DL, VT: PtrVT));
4581 Result = DAG.getNode(Opcode: ISD::AND, DL, VT: PtrVT, N1: Result,
4582 N2: DAG.getConstant(Val: ~(RequiredAlign - 1), DL, VT: PtrVT));
4583 }
4584
4585 SDValue Ops[2] = {Result, Chain};
4586 return DAG.getMergeValues(Ops, dl: DL);
4587}
4588
4589SDValue
4590SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4591 SelectionDAG &DAG) const {
4592 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4593 MachineFunction &MF = DAG.getMachineFunction();
4594 bool RealignOpt = !MF.getFunction().hasFnAttribute(Kind: "no-realign-stack");
4595 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4596
4597 SDValue Chain = Op.getOperand(i: 0);
4598 SDValue Size = Op.getOperand(i: 1);
4599 SDValue Align = Op.getOperand(i: 2);
4600 SDLoc DL(Op);
4601
4602 // If user has set the no alignment function attribute, ignore
4603 // alloca alignments.
4604 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4605
4606 uint64_t StackAlign = TFI->getStackAlignment();
4607 uint64_t RequiredAlign = std::max(a: AlignVal, b: StackAlign);
4608 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4609
4610 Register SPReg = getStackPointerRegisterToSaveRestore();
4611 SDValue NeededSpace = Size;
4612
4613 // Get a reference to the stack pointer.
4614 SDValue OldSP = DAG.getCopyFromReg(Chain, dl: DL, Reg: SPReg, VT: MVT::i64);
4615
4616 // If we need a backchain, save it now.
4617 SDValue Backchain;
4618 if (StoreBackchain)
4619 Backchain = DAG.getLoad(VT: MVT::i64, dl: DL, Chain, Ptr: getBackchainAddress(SP: OldSP, DAG),
4620 PtrInfo: MachinePointerInfo());
4621
4622 // Add extra space for alignment if needed.
4623 if (ExtraAlignSpace)
4624 NeededSpace = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: NeededSpace,
4625 N2: DAG.getConstant(Val: ExtraAlignSpace, DL, VT: MVT::i64));
4626
4627 // Get the new stack pointer value.
4628 SDValue NewSP;
4629 if (hasInlineStackProbe(MF)) {
4630 NewSP = DAG.getNode(Opcode: SystemZISD::PROBED_ALLOCA, DL,
4631 VTList: DAG.getVTList(VT1: MVT::i64, VT2: MVT::Other), N1: Chain, N2: OldSP, N3: NeededSpace);
4632 Chain = NewSP.getValue(R: 1);
4633 }
4634 else {
4635 NewSP = DAG.getNode(Opcode: ISD::SUB, DL, VT: MVT::i64, N1: OldSP, N2: NeededSpace);
4636 // Copy the new stack pointer back.
4637 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: SPReg, N: NewSP);
4638 }
4639
4640 // The allocated data lives above the 160 bytes allocated for the standard
4641 // frame, plus any outgoing stack arguments. We don't know how much that
4642 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4643 SDValue ArgAdjust = DAG.getNode(Opcode: SystemZISD::ADJDYNALLOC, DL, VT: MVT::i64);
4644 SDValue Result = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: NewSP, N2: ArgAdjust);
4645
4646 // Dynamically realign if needed.
4647 if (RequiredAlign > StackAlign) {
4648 Result =
4649 DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: Result,
4650 N2: DAG.getConstant(Val: ExtraAlignSpace, DL, VT: MVT::i64));
4651 Result =
4652 DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, N1: Result,
4653 N2: DAG.getConstant(Val: ~(RequiredAlign - 1), DL, VT: MVT::i64));
4654 }
4655
4656 if (StoreBackchain)
4657 Chain = DAG.getStore(Chain, dl: DL, Val: Backchain, Ptr: getBackchainAddress(SP: NewSP, DAG),
4658 PtrInfo: MachinePointerInfo());
4659
4660 SDValue Ops[2] = { Result, Chain };
4661 return DAG.getMergeValues(Ops, dl: DL);
4662}
4663
4664SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4665 SDValue Op, SelectionDAG &DAG) const {
4666 SDLoc DL(Op);
4667
4668 return DAG.getNode(Opcode: SystemZISD::ADJDYNALLOC, DL, VT: MVT::i64);
4669}
4670
4671SDValue SystemZTargetLowering::lowerMULH(SDValue Op,
4672 SelectionDAG &DAG,
4673 unsigned Opcode) const {
4674 EVT VT = Op.getValueType();
4675 SDLoc DL(Op);
4676 SDValue Even, Odd;
4677
4678 // This custom expander is only used on z17 and later for 64-bit types.
4679 assert(!is32Bit(VT));
4680 assert(Subtarget.hasMiscellaneousExtensions2());
4681
4682 // SystemZISD::xMUL_LOHI returns the low result in the odd register and
4683 // the high result in the even register. Return the latter.
4684 lowerGR128Binary(DAG, DL, VT, Opcode,
4685 Op0: Op.getOperand(i: 0), Op1: Op.getOperand(i: 1), Even, Odd);
4686 return Even;
4687}
4688
4689SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4690 SelectionDAG &DAG) const {
4691 EVT VT = Op.getValueType();
4692 SDLoc DL(Op);
4693 SDValue Ops[2];
4694 if (is32Bit(VT))
4695 // Just do a normal 64-bit multiplication and extract the results.
4696 // We define this so that it can be used for constant division.
4697 lowerMUL_LOHI32(DAG, DL, Extend: ISD::SIGN_EXTEND, Op0: Op.getOperand(i: 0),
4698 Op1: Op.getOperand(i: 1), Hi&: Ops[1], Lo&: Ops[0]);
4699 else if (Subtarget.hasMiscellaneousExtensions2())
4700 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4701 // the high result in the even register. ISD::SMUL_LOHI is defined to
4702 // return the low half first, so the results are in reverse order.
4703 lowerGR128Binary(DAG, DL, VT, Opcode: SystemZISD::SMUL_LOHI,
4704 Op0: Op.getOperand(i: 0), Op1: Op.getOperand(i: 1), Even&: Ops[1], Odd&: Ops[0]);
4705 else {
4706 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4707 //
4708 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4709 //
4710 // but using the fact that the upper halves are either all zeros
4711 // or all ones:
4712 //
4713 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4714 //
4715 // and grouping the right terms together since they are quicker than the
4716 // multiplication:
4717 //
4718 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4719 SDValue C63 = DAG.getConstant(Val: 63, DL, VT: MVT::i64);
4720 SDValue LL = Op.getOperand(i: 0);
4721 SDValue RL = Op.getOperand(i: 1);
4722 SDValue LH = DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: LL, N2: C63);
4723 SDValue RH = DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: RL, N2: C63);
4724 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4725 // the high result in the even register. ISD::SMUL_LOHI is defined to
4726 // return the low half first, so the results are in reverse order.
4727 lowerGR128Binary(DAG, DL, VT, Opcode: SystemZISD::UMUL_LOHI,
4728 Op0: LL, Op1: RL, Even&: Ops[1], Odd&: Ops[0]);
4729 SDValue NegLLTimesRH = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: LL, N2: RH);
4730 SDValue NegLHTimesRL = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: LH, N2: RL);
4731 SDValue NegSum = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: NegLLTimesRH, N2: NegLHTimesRL);
4732 Ops[1] = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Ops[1], N2: NegSum);
4733 }
4734 return DAG.getMergeValues(Ops, dl: DL);
4735}
4736
4737SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4738 SelectionDAG &DAG) const {
4739 EVT VT = Op.getValueType();
4740 SDLoc DL(Op);
4741 SDValue Ops[2];
4742 if (is32Bit(VT))
4743 // Just do a normal 64-bit multiplication and extract the results.
4744 // We define this so that it can be used for constant division.
4745 lowerMUL_LOHI32(DAG, DL, Extend: ISD::ZERO_EXTEND, Op0: Op.getOperand(i: 0),
4746 Op1: Op.getOperand(i: 1), Hi&: Ops[1], Lo&: Ops[0]);
4747 else
4748 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4749 // the high result in the even register. ISD::UMUL_LOHI is defined to
4750 // return the low half first, so the results are in reverse order.
4751 lowerGR128Binary(DAG, DL, VT, Opcode: SystemZISD::UMUL_LOHI,
4752 Op0: Op.getOperand(i: 0), Op1: Op.getOperand(i: 1), Even&: Ops[1], Odd&: Ops[0]);
4753 return DAG.getMergeValues(Ops, dl: DL);
4754}
4755
4756SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4757 SelectionDAG &DAG) const {
4758 SDValue Op0 = Op.getOperand(i: 0);
4759 SDValue Op1 = Op.getOperand(i: 1);
4760 EVT VT = Op.getValueType();
4761 SDLoc DL(Op);
4762
4763 // We use DSGF for 32-bit division. This means the first operand must
4764 // always be 64-bit, and the second operand should be 32-bit whenever
4765 // that is possible, to improve performance.
4766 if (is32Bit(VT))
4767 Op0 = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: MVT::i64, Operand: Op0);
4768 else if (DAG.ComputeNumSignBits(Op: Op1) > 32)
4769 Op1 = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Op1);
4770
4771 // DSG(F) returns the remainder in the even register and the
4772 // quotient in the odd register.
4773 SDValue Ops[2];
4774 lowerGR128Binary(DAG, DL, VT, Opcode: SystemZISD::SDIVREM, Op0, Op1, Even&: Ops[1], Odd&: Ops[0]);
4775 return DAG.getMergeValues(Ops, dl: DL);
4776}
4777
4778SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4779 SelectionDAG &DAG) const {
4780 EVT VT = Op.getValueType();
4781 SDLoc DL(Op);
4782
4783 // DL(G) returns the remainder in the even register and the
4784 // quotient in the odd register.
4785 SDValue Ops[2];
4786 lowerGR128Binary(DAG, DL, VT, Opcode: SystemZISD::UDIVREM,
4787 Op0: Op.getOperand(i: 0), Op1: Op.getOperand(i: 1), Even&: Ops[1], Odd&: Ops[0]);
4788 return DAG.getMergeValues(Ops, dl: DL);
4789}
4790
4791SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4792 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4793
4794 // Get the known-zero masks for each operand.
4795 SDValue Ops[] = {Op.getOperand(i: 0), Op.getOperand(i: 1)};
4796 KnownBits Known[2] = {DAG.computeKnownBits(Op: Ops[0]),
4797 DAG.computeKnownBits(Op: Ops[1])};
4798
4799 // See if the upper 32 bits of one operand and the lower 32 bits of the
4800 // other are known zero. They are the low and high operands respectively.
4801 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4802 Known[1].Zero.getZExtValue() };
4803 unsigned High, Low;
4804 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4805 High = 1, Low = 0;
4806 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4807 High = 0, Low = 1;
4808 else
4809 return Op;
4810
4811 SDValue LowOp = Ops[Low];
4812 SDValue HighOp = Ops[High];
4813
4814 // If the high part is a constant, we're better off using IILH.
4815 if (HighOp.getOpcode() == ISD::Constant)
4816 return Op;
4817
4818 // If the low part is a constant that is outside the range of LHI,
4819 // then we're better off using IILF.
4820 if (LowOp.getOpcode() == ISD::Constant) {
4821 int64_t Value = int32_t(LowOp->getAsZExtVal());
4822 if (!isInt<16>(x: Value))
4823 return Op;
4824 }
4825
4826 // Check whether the high part is an AND that doesn't change the
4827 // high 32 bits and just masks out low bits. We can skip it if so.
4828 if (HighOp.getOpcode() == ISD::AND &&
4829 HighOp.getOperand(i: 1).getOpcode() == ISD::Constant) {
4830 SDValue HighOp0 = HighOp.getOperand(i: 0);
4831 uint64_t Mask = HighOp.getConstantOperandVal(i: 1);
4832 if (DAG.MaskedValueIsZero(Op: HighOp0, Mask: APInt(64, ~(Mask | 0xffffffff))))
4833 HighOp = HighOp0;
4834 }
4835
4836 // Take advantage of the fact that all GR32 operations only change the
4837 // low 32 bits by truncating Low to an i32 and inserting it directly
4838 // using a subreg. The interesting cases are those where the truncation
4839 // can be folded.
4840 SDLoc DL(Op);
4841 SDValue Low32 = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: LowOp);
4842 return DAG.getTargetInsertSubreg(SRIdx: SystemZ::subreg_l32, DL,
4843 VT: MVT::i64, Operand: HighOp, Subreg: Low32);
4844}
4845
4846// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4847SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4848 SelectionDAG &DAG) const {
4849 SDNode *N = Op.getNode();
4850 SDValue LHS = N->getOperand(Num: 0);
4851 SDValue RHS = N->getOperand(Num: 1);
4852 SDLoc DL(N);
4853
4854 if (N->getValueType(ResNo: 0) == MVT::i128) {
4855 unsigned BaseOp = 0;
4856 unsigned FlagOp = 0;
4857 bool IsBorrow = false;
4858 switch (Op.getOpcode()) {
4859 default: llvm_unreachable("Unknown instruction!");
4860 case ISD::UADDO:
4861 BaseOp = ISD::ADD;
4862 FlagOp = SystemZISD::VACC;
4863 break;
4864 case ISD::USUBO:
4865 BaseOp = ISD::SUB;
4866 FlagOp = SystemZISD::VSCBI;
4867 IsBorrow = true;
4868 break;
4869 }
4870 SDValue Result = DAG.getNode(Opcode: BaseOp, DL, VT: MVT::i128, N1: LHS, N2: RHS);
4871 SDValue Flag = DAG.getNode(Opcode: FlagOp, DL, VT: MVT::i128, N1: LHS, N2: RHS);
4872 Flag = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: MVT::i128, N1: Flag,
4873 N2: DAG.getValueType(MVT::i1));
4874 Flag = DAG.getZExtOrTrunc(Op: Flag, DL, VT: N->getValueType(ResNo: 1));
4875 if (IsBorrow)
4876 Flag = DAG.getNode(Opcode: ISD::XOR, DL, VT: Flag.getValueType(),
4877 N1: Flag, N2: DAG.getConstant(Val: 1, DL, VT: Flag.getValueType()));
4878 return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: N->getVTList(), N1: Result, N2: Flag);
4879 }
4880
4881 unsigned BaseOp = 0;
4882 unsigned CCValid = 0;
4883 unsigned CCMask = 0;
4884
4885 switch (Op.getOpcode()) {
4886 default: llvm_unreachable("Unknown instruction!");
4887 case ISD::SADDO:
4888 BaseOp = SystemZISD::SADDO;
4889 CCValid = SystemZ::CCMASK_ARITH;
4890 CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;
4891 break;
4892 case ISD::SSUBO:
4893 BaseOp = SystemZISD::SSUBO;
4894 CCValid = SystemZ::CCMASK_ARITH;
4895 CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;
4896 break;
4897 case ISD::UADDO:
4898 BaseOp = SystemZISD::UADDO;
4899 CCValid = SystemZ::CCMASK_LOGICAL;
4900 CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
4901 break;
4902 case ISD::USUBO:
4903 BaseOp = SystemZISD::USUBO;
4904 CCValid = SystemZ::CCMASK_LOGICAL;
4905 CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
4906 break;
4907 }
4908
4909 SDVTList VTs = DAG.getVTList(VT1: N->getValueType(ResNo: 0), VT2: MVT::i32);
4910 SDValue Result = DAG.getNode(Opcode: BaseOp, DL, VTList: VTs, N1: LHS, N2: RHS);
4911
4912 SDValue SetCC = emitSETCC(DAG, DL, CCReg: Result.getValue(R: 1), CCValid, CCMask);
4913 if (N->getValueType(ResNo: 1) == MVT::i1)
4914 SetCC = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i1, Operand: SetCC);
4915
4916 return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: N->getVTList(), N1: Result, N2: SetCC);
4917}
4918
4919static bool isAddCarryChain(SDValue Carry) {
4920 while (Carry.getOpcode() == ISD::UADDO_CARRY &&
4921 Carry->getValueType(ResNo: 0) != MVT::i128)
4922 Carry = Carry.getOperand(i: 2);
4923 return Carry.getOpcode() == ISD::UADDO &&
4924 Carry->getValueType(ResNo: 0) != MVT::i128;
4925}
4926
4927static bool isSubBorrowChain(SDValue Carry) {
4928 while (Carry.getOpcode() == ISD::USUBO_CARRY &&
4929 Carry->getValueType(ResNo: 0) != MVT::i128)
4930 Carry = Carry.getOperand(i: 2);
4931 return Carry.getOpcode() == ISD::USUBO &&
4932 Carry->getValueType(ResNo: 0) != MVT::i128;
4933}
4934
4935// Lower UADDO_CARRY/USUBO_CARRY nodes.
4936SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4937 SelectionDAG &DAG) const {
4938
4939 SDNode *N = Op.getNode();
4940 MVT VT = N->getSimpleValueType(ResNo: 0);
4941
4942 // Let legalize expand this if it isn't a legal type yet.
4943 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4944 return SDValue();
4945
4946 SDValue LHS = N->getOperand(Num: 0);
4947 SDValue RHS = N->getOperand(Num: 1);
4948 SDValue Carry = Op.getOperand(i: 2);
4949 SDLoc DL(N);
4950
4951 if (VT == MVT::i128) {
4952 unsigned BaseOp = 0;
4953 unsigned FlagOp = 0;
4954 bool IsBorrow = false;
4955 switch (Op.getOpcode()) {
4956 default: llvm_unreachable("Unknown instruction!");
4957 case ISD::UADDO_CARRY:
4958 BaseOp = SystemZISD::VAC;
4959 FlagOp = SystemZISD::VACCC;
4960 break;
4961 case ISD::USUBO_CARRY:
4962 BaseOp = SystemZISD::VSBI;
4963 FlagOp = SystemZISD::VSBCBI;
4964 IsBorrow = true;
4965 break;
4966 }
4967 if (IsBorrow)
4968 Carry = DAG.getNode(Opcode: ISD::XOR, DL, VT: Carry.getValueType(),
4969 N1: Carry, N2: DAG.getConstant(Val: 1, DL, VT: Carry.getValueType()));
4970 Carry = DAG.getZExtOrTrunc(Op: Carry, DL, VT: MVT::i128);
4971 SDValue Result = DAG.getNode(Opcode: BaseOp, DL, VT: MVT::i128, N1: LHS, N2: RHS, N3: Carry);
4972 SDValue Flag = DAG.getNode(Opcode: FlagOp, DL, VT: MVT::i128, N1: LHS, N2: RHS, N3: Carry);
4973 Flag = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: MVT::i128, N1: Flag,
4974 N2: DAG.getValueType(MVT::i1));
4975 Flag = DAG.getZExtOrTrunc(Op: Flag, DL, VT: N->getValueType(ResNo: 1));
4976 if (IsBorrow)
4977 Flag = DAG.getNode(Opcode: ISD::XOR, DL, VT: Flag.getValueType(),
4978 N1: Flag, N2: DAG.getConstant(Val: 1, DL, VT: Flag.getValueType()));
4979 return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: N->getVTList(), N1: Result, N2: Flag);
4980 }
4981
4982 unsigned BaseOp = 0;
4983 unsigned CCValid = 0;
4984 unsigned CCMask = 0;
4985
4986 switch (Op.getOpcode()) {
4987 default: llvm_unreachable("Unknown instruction!");
4988 case ISD::UADDO_CARRY:
4989 if (!isAddCarryChain(Carry))
4990 return SDValue();
4991
4992 BaseOp = SystemZISD::ADDCARRY;
4993 CCValid = SystemZ::CCMASK_LOGICAL;
4994 CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
4995 break;
4996 case ISD::USUBO_CARRY:
4997 if (!isSubBorrowChain(Carry))
4998 return SDValue();
4999
5000 BaseOp = SystemZISD::SUBCARRY;
5001 CCValid = SystemZ::CCMASK_LOGICAL;
5002 CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
5003 break;
5004 }
5005
5006 // Set the condition code from the carry flag.
5007 Carry = DAG.getNode(Opcode: SystemZISD::GET_CCMASK, DL, VT: MVT::i32, N1: Carry,
5008 N2: DAG.getConstant(Val: CCValid, DL, VT: MVT::i32),
5009 N3: DAG.getConstant(Val: CCMask, DL, VT: MVT::i32));
5010
5011 SDVTList VTs = DAG.getVTList(VT1: VT, VT2: MVT::i32);
5012 SDValue Result = DAG.getNode(Opcode: BaseOp, DL, VTList: VTs, N1: LHS, N2: RHS, N3: Carry);
5013
5014 SDValue SetCC = emitSETCC(DAG, DL, CCReg: Result.getValue(R: 1), CCValid, CCMask);
5015 if (N->getValueType(ResNo: 1) == MVT::i1)
5016 SetCC = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i1, Operand: SetCC);
5017
5018 return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: N->getVTList(), N1: Result, N2: SetCC);
5019}
5020
5021SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
5022 SelectionDAG &DAG) const {
5023 EVT VT = Op.getValueType();
5024 SDLoc DL(Op);
5025 Op = Op.getOperand(i: 0);
5026
5027 if (VT.getScalarSizeInBits() == 128) {
5028 Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v2i64, Operand: Op);
5029 Op = DAG.getNode(Opcode: ISD::CTPOP, DL, VT: MVT::v2i64, Operand: Op);
5030 SDValue Tmp = DAG.getSplatBuildVector(VT: MVT::v2i64, DL,
5031 Op: DAG.getConstant(Val: 0, DL, VT: MVT::i64));
5032 Op = DAG.getNode(Opcode: SystemZISD::VSUM, DL, VT, N1: Op, N2: Tmp);
5033 return Op;
5034 }
5035
5036 // Handle vector types via VPOPCT.
5037 if (VT.isVector()) {
5038 Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v16i8, Operand: Op);
5039 Op = DAG.getNode(Opcode: SystemZISD::POPCNT, DL, VT: MVT::v16i8, Operand: Op);
5040 switch (VT.getScalarSizeInBits()) {
5041 case 8:
5042 break;
5043 case 16: {
5044 Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op);
5045 SDValue Shift = DAG.getConstant(Val: 8, DL, VT: MVT::i32);
5046 SDValue Tmp = DAG.getNode(Opcode: SystemZISD::VSHL_BY_SCALAR, DL, VT, N1: Op, N2: Shift);
5047 Op = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op, N2: Tmp);
5048 Op = DAG.getNode(Opcode: SystemZISD::VSRL_BY_SCALAR, DL, VT, N1: Op, N2: Shift);
5049 break;
5050 }
5051 case 32: {
5052 SDValue Tmp = DAG.getSplatBuildVector(VT: MVT::v16i8, DL,
5053 Op: DAG.getConstant(Val: 0, DL, VT: MVT::i32));
5054 Op = DAG.getNode(Opcode: SystemZISD::VSUM, DL, VT, N1: Op, N2: Tmp);
5055 break;
5056 }
5057 case 64: {
5058 SDValue Tmp = DAG.getSplatBuildVector(VT: MVT::v16i8, DL,
5059 Op: DAG.getConstant(Val: 0, DL, VT: MVT::i32));
5060 Op = DAG.getNode(Opcode: SystemZISD::VSUM, DL, VT: MVT::v4i32, N1: Op, N2: Tmp);
5061 Op = DAG.getNode(Opcode: SystemZISD::VSUM, DL, VT, N1: Op, N2: Tmp);
5062 break;
5063 }
5064 default:
5065 llvm_unreachable("Unexpected type");
5066 }
5067 return Op;
5068 }
5069
5070 // Get the known-zero mask for the operand.
5071 KnownBits Known = DAG.computeKnownBits(Op);
5072 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
5073 if (NumSignificantBits == 0)
5074 return DAG.getConstant(Val: 0, DL, VT);
5075
5076 // Skip known-zero high parts of the operand.
5077 int64_t OrigBitSize = VT.getSizeInBits();
5078 int64_t BitSize = llvm::bit_ceil(Value: NumSignificantBits);
5079 BitSize = std::min(a: BitSize, b: OrigBitSize);
5080
5081 // The POPCNT instruction counts the number of bits in each byte.
5082 Op = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op);
5083 Op = DAG.getNode(Opcode: SystemZISD::POPCNT, DL, VT: MVT::i64, Operand: Op);
5084 Op = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Op);
5085
5086 // Add up per-byte counts in a binary tree. All bits of Op at
5087 // position larger than BitSize remain zero throughout.
5088 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
5089 SDValue Tmp = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Op, N2: DAG.getConstant(Val: I, DL, VT));
5090 if (BitSize != OrigBitSize)
5091 Tmp = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Tmp,
5092 N2: DAG.getConstant(Val: ((uint64_t)1 << BitSize) - 1, DL, VT));
5093 Op = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op, N2: Tmp);
5094 }
5095
5096 // Extract overall result from high byte.
5097 if (BitSize > 8)
5098 Op = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Op,
5099 N2: DAG.getConstant(Val: BitSize - 8, DL, VT));
5100
5101 return Op;
5102}
5103
5104SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
5105 SelectionDAG &DAG) const {
5106 SDLoc DL(Op);
5107 AtomicOrdering FenceOrdering =
5108 static_cast<AtomicOrdering>(Op.getConstantOperandVal(i: 1));
5109 SyncScope::ID FenceSSID =
5110 static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: 2));
5111
5112 // The only fence that needs an instruction is a sequentially-consistent
5113 // cross-thread fence.
5114 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5115 FenceSSID == SyncScope::System) {
5116 return SDValue(DAG.getMachineNode(Opcode: SystemZ::Serialize, dl: DL, VT: MVT::Other,
5117 Op1: Op.getOperand(i: 0)),
5118 0);
5119 }
5120
5121 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5122 return DAG.getNode(Opcode: ISD::MEMBARRIER, DL, VT: MVT::Other, Operand: Op.getOperand(i: 0));
5123}
5124
5125SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
5126 SelectionDAG &DAG) const {
5127 EVT RegVT = Op.getValueType();
5128 if (RegVT.getSizeInBits() == 128)
5129 return lowerATOMIC_LDST_I128(Op, DAG);
5130 return lowerLoadF16(Op, DAG);
5131}
5132
5133SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
5134 SelectionDAG &DAG) const {
5135 auto *Node = cast<AtomicSDNode>(Val: Op.getNode());
5136 if (Node->getMemoryVT().getSizeInBits() == 128)
5137 return lowerATOMIC_LDST_I128(Op, DAG);
5138 return lowerStoreF16(Op, DAG);
5139}
5140
5141SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
5142 SelectionDAG &DAG) const {
5143 auto *Node = cast<AtomicSDNode>(Val: Op.getNode());
5144 assert(
5145 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
5146 "Only custom lowering i128 or f128.");
5147 // Use same code to handle both legal and non-legal i128 types.
5148 SmallVector<SDValue, 2> Results;
5149 LowerOperationWrapper(N: Node, Results, DAG);
5150 return DAG.getMergeValues(Ops: Results, dl: SDLoc(Op));
5151}
5152
5153// Prepare for a Compare And Swap for a subword operation. This needs to be
5154// done in memory with 4 bytes at natural alignment.
5155static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL,
5156 SDValue &AlignedAddr, SDValue &BitShift,
5157 SDValue &NegBitShift) {
5158 EVT PtrVT = Addr.getValueType();
5159 EVT WideVT = MVT::i32;
5160
5161 // Get the address of the containing word.
5162 AlignedAddr = DAG.getNode(Opcode: ISD::AND, DL, VT: PtrVT, N1: Addr,
5163 N2: DAG.getSignedConstant(Val: -4, DL, VT: PtrVT));
5164
5165 // Get the number of bits that the word must be rotated left in order
5166 // to bring the field to the top bits of a GR32.
5167 BitShift = DAG.getNode(Opcode: ISD::SHL, DL, VT: PtrVT, N1: Addr,
5168 N2: DAG.getConstant(Val: 3, DL, VT: PtrVT));
5169 BitShift = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: WideVT, Operand: BitShift);
5170
5171 // Get the complementing shift amount, for rotating a field in the top
5172 // bits back to its proper position.
5173 NegBitShift = DAG.getNode(Opcode: ISD::SUB, DL, VT: WideVT,
5174 N1: DAG.getConstant(Val: 0, DL, VT: WideVT), N2: BitShift);
5175
5176}
5177
5178// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
5179// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
5180SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
5181 SelectionDAG &DAG,
5182 unsigned Opcode) const {
5183 auto *Node = cast<AtomicSDNode>(Val: Op.getNode());
5184
5185 // 32-bit operations need no special handling.
5186 EVT NarrowVT = Node->getMemoryVT();
5187 EVT WideVT = MVT::i32;
5188 if (NarrowVT == WideVT)
5189 return Op;
5190
5191 int64_t BitSize = NarrowVT.getSizeInBits();
5192 SDValue ChainIn = Node->getChain();
5193 SDValue Addr = Node->getBasePtr();
5194 SDValue Src2 = Node->getVal();
5195 MachineMemOperand *MMO = Node->getMemOperand();
5196 SDLoc DL(Node);
5197
5198 // Convert atomic subtracts of constants into additions.
5199 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
5200 if (auto *Const = dyn_cast<ConstantSDNode>(Val&: Src2)) {
5201 Opcode = SystemZISD::ATOMIC_LOADW_ADD;
5202 Src2 = DAG.getSignedConstant(Val: -Const->getSExtValue(), DL,
5203 VT: Src2.getValueType());
5204 }
5205
5206 SDValue AlignedAddr, BitShift, NegBitShift;
5207 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5208
5209 // Extend the source operand to 32 bits and prepare it for the inner loop.
5210 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
5211 // operations require the source to be shifted in advance. (This shift
5212 // can be folded if the source is constant.) For AND and NAND, the lower
5213 // bits must be set, while for other opcodes they should be left clear.
5214 if (Opcode != SystemZISD::ATOMIC_SWAPW)
5215 Src2 = DAG.getNode(Opcode: ISD::SHL, DL, VT: WideVT, N1: Src2,
5216 N2: DAG.getConstant(Val: 32 - BitSize, DL, VT: WideVT));
5217 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
5218 Opcode == SystemZISD::ATOMIC_LOADW_NAND)
5219 Src2 = DAG.getNode(Opcode: ISD::OR, DL, VT: WideVT, N1: Src2,
5220 N2: DAG.getConstant(Val: uint32_t(-1) >> BitSize, DL, VT: WideVT));
5221
5222 // Construct the ATOMIC_LOADW_* node.
5223 SDVTList VTList = DAG.getVTList(VT1: WideVT, VT2: MVT::Other);
5224 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
5225 DAG.getConstant(Val: BitSize, DL, VT: WideVT) };
5226 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, dl: DL, VTList, Ops,
5227 MemVT: NarrowVT, MMO);
5228
5229 // Rotate the result of the final CS so that the field is in the lower
5230 // bits of a GR32, then truncate it.
5231 SDValue ResultShift = DAG.getNode(Opcode: ISD::ADD, DL, VT: WideVT, N1: BitShift,
5232 N2: DAG.getConstant(Val: BitSize, DL, VT: WideVT));
5233 SDValue Result = DAG.getNode(Opcode: ISD::ROTL, DL, VT: WideVT, N1: AtomicOp, N2: ResultShift);
5234
5235 SDValue RetOps[2] = { Result, AtomicOp.getValue(R: 1) };
5236 return DAG.getMergeValues(Ops: RetOps, dl: DL);
5237}
5238
5239// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
5240// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
5241SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
5242 SelectionDAG &DAG) const {
5243 auto *Node = cast<AtomicSDNode>(Val: Op.getNode());
5244 EVT MemVT = Node->getMemoryVT();
5245 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
5246 // A full-width operation: negate and use LAA(G).
5247 assert(Op.getValueType() == MemVT && "Mismatched VTs");
5248 assert(Subtarget.hasInterlockedAccess1() &&
5249 "Should have been expanded by AtomicExpand pass.");
5250 SDValue Src2 = Node->getVal();
5251 SDLoc DL(Src2);
5252 SDValue NegSrc2 =
5253 DAG.getNode(Opcode: ISD::SUB, DL, VT: MemVT, N1: DAG.getConstant(Val: 0, DL, VT: MemVT), N2: Src2);
5254 return DAG.getAtomic(Opcode: ISD::ATOMIC_LOAD_ADD, dl: DL, MemVT,
5255 Chain: Node->getChain(), Ptr: Node->getBasePtr(), Val: NegSrc2,
5256 MMO: Node->getMemOperand());
5257 }
5258
5259 return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_SUB);
5260}
5261
5262// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
5263SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
5264 SelectionDAG &DAG) const {
5265 auto *Node = cast<AtomicSDNode>(Val: Op.getNode());
5266 SDValue ChainIn = Node->getOperand(Num: 0);
5267 SDValue Addr = Node->getOperand(Num: 1);
5268 SDValue CmpVal = Node->getOperand(Num: 2);
5269 SDValue SwapVal = Node->getOperand(Num: 3);
5270 MachineMemOperand *MMO = Node->getMemOperand();
5271 SDLoc DL(Node);
5272
5273 if (Node->getMemoryVT() == MVT::i128) {
5274 // Use same code to handle both legal and non-legal i128 types.
5275 SmallVector<SDValue, 3> Results;
5276 LowerOperationWrapper(N: Node, Results, DAG);
5277 return DAG.getMergeValues(Ops: Results, dl: DL);
5278 }
5279
5280 // We have native support for 32-bit and 64-bit compare and swap, but we
5281 // still need to expand extracting the "success" result from the CC.
5282 EVT NarrowVT = Node->getMemoryVT();
5283 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
5284 if (NarrowVT == WideVT) {
5285 SDVTList Tys = DAG.getVTList(VT1: WideVT, VT2: MVT::i32, VT3: MVT::Other);
5286 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
5287 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode: SystemZISD::ATOMIC_CMP_SWAP,
5288 dl: DL, VTList: Tys, Ops, MemVT: NarrowVT, MMO);
5289 SDValue Success = emitSETCC(DAG, DL, CCReg: AtomicOp.getValue(R: 1),
5290 CCValid: SystemZ::CCMASK_CS, CCMask: SystemZ::CCMASK_CS_EQ);
5291
5292 DAG.ReplaceAllUsesOfValueWith(From: Op.getValue(R: 0), To: AtomicOp.getValue(R: 0));
5293 DAG.ReplaceAllUsesOfValueWith(From: Op.getValue(R: 1), To: Success);
5294 DAG.ReplaceAllUsesOfValueWith(From: Op.getValue(R: 2), To: AtomicOp.getValue(R: 2));
5295 return SDValue();
5296 }
5297
5298 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
5299 // via a fullword ATOMIC_CMP_SWAPW operation.
5300 int64_t BitSize = NarrowVT.getSizeInBits();
5301
5302 SDValue AlignedAddr, BitShift, NegBitShift;
5303 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5304
5305 // Construct the ATOMIC_CMP_SWAPW node.
5306 SDVTList VTList = DAG.getVTList(VT1: WideVT, VT2: MVT::i32, VT3: MVT::Other);
5307 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
5308 NegBitShift, DAG.getConstant(Val: BitSize, DL, VT: WideVT) };
5309 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode: SystemZISD::ATOMIC_CMP_SWAPW, dl: DL,
5310 VTList, Ops, MemVT: NarrowVT, MMO);
5311 SDValue Success = emitSETCC(DAG, DL, CCReg: AtomicOp.getValue(R: 1),
5312 CCValid: SystemZ::CCMASK_ICMP, CCMask: SystemZ::CCMASK_CMP_EQ);
5313
5314 // emitAtomicCmpSwapW() will zero extend the result (original value).
5315 SDValue OrigVal = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: WideVT, N1: AtomicOp.getValue(R: 0),
5316 N2: DAG.getValueType(NarrowVT));
5317 DAG.ReplaceAllUsesOfValueWith(From: Op.getValue(R: 0), To: OrigVal);
5318 DAG.ReplaceAllUsesOfValueWith(From: Op.getValue(R: 1), To: Success);
5319 DAG.ReplaceAllUsesOfValueWith(From: Op.getValue(R: 2), To: AtomicOp.getValue(R: 2));
5320 return SDValue();
5321}
5322
5323MachineMemOperand::Flags
5324SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
5325 // Because of how we convert atomic_load and atomic_store to normal loads and
5326 // stores in the DAG, we need to ensure that the MMOs are marked volatile
5327 // since DAGCombine hasn't been updated to account for atomic, but non
5328 // volatile loads. (See D57601)
5329 if (auto *SI = dyn_cast<StoreInst>(Val: &I))
5330 if (SI->isAtomic())
5331 return MachineMemOperand::MOVolatile;
5332 if (auto *LI = dyn_cast<LoadInst>(Val: &I))
5333 if (LI->isAtomic())
5334 return MachineMemOperand::MOVolatile;
5335 if (auto *AI = dyn_cast<AtomicRMWInst>(Val: &I))
5336 if (AI->isAtomic())
5337 return MachineMemOperand::MOVolatile;
5338 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(Val: &I))
5339 if (AI->isAtomic())
5340 return MachineMemOperand::MOVolatile;
5341 return MachineMemOperand::MONone;
5342}
5343
5344SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
5345 SelectionDAG &DAG) const {
5346 MachineFunction &MF = DAG.getMachineFunction();
5347 auto *Regs = Subtarget.getSpecialRegisters();
5348 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
5349 report_fatal_error(reason: "Variable-sized stack allocations are not supported "
5350 "in GHC calling convention");
5351 return DAG.getCopyFromReg(Chain: Op.getOperand(i: 0), dl: SDLoc(Op),
5352 Reg: Regs->getStackPointerRegister(), VT: Op.getValueType());
5353}
5354
5355SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
5356 SelectionDAG &DAG) const {
5357 MachineFunction &MF = DAG.getMachineFunction();
5358 auto *Regs = Subtarget.getSpecialRegisters();
5359 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
5360
5361 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
5362 report_fatal_error(reason: "Variable-sized stack allocations are not supported "
5363 "in GHC calling convention");
5364
5365 SDValue Chain = Op.getOperand(i: 0);
5366 SDValue NewSP = Op.getOperand(i: 1);
5367 SDValue Backchain;
5368 SDLoc DL(Op);
5369
5370 if (StoreBackchain) {
5371 SDValue OldSP = DAG.getCopyFromReg(
5372 Chain, dl: DL, Reg: Regs->getStackPointerRegister(), VT: MVT::i64);
5373 Backchain = DAG.getLoad(VT: MVT::i64, dl: DL, Chain, Ptr: getBackchainAddress(SP: OldSP, DAG),
5374 PtrInfo: MachinePointerInfo());
5375 }
5376
5377 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: Regs->getStackPointerRegister(), N: NewSP);
5378
5379 if (StoreBackchain)
5380 Chain = DAG.getStore(Chain, dl: DL, Val: Backchain, Ptr: getBackchainAddress(SP: NewSP, DAG),
5381 PtrInfo: MachinePointerInfo());
5382
5383 return Chain;
5384}
5385
5386SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
5387 SelectionDAG &DAG) const {
5388 bool IsData = Op.getConstantOperandVal(i: 4);
5389 if (!IsData)
5390 // Just preserve the chain.
5391 return Op.getOperand(i: 0);
5392
5393 SDLoc DL(Op);
5394 bool IsWrite = Op.getConstantOperandVal(i: 2);
5395 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
5396 auto *Node = cast<MemIntrinsicSDNode>(Val: Op.getNode());
5397 SDValue Ops[] = {Op.getOperand(i: 0), DAG.getTargetConstant(Val: Code, DL, VT: MVT::i32),
5398 Op.getOperand(i: 1)};
5399 return DAG.getMemIntrinsicNode(Opcode: SystemZISD::PREFETCH, dl: DL,
5400 VTList: Node->getVTList(), Ops,
5401 MemVT: Node->getMemoryVT(), MMO: Node->getMemOperand());
5402}
5403
5404SDValue
5405SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
5406 SelectionDAG &DAG) const {
5407 unsigned Opcode, CCValid;
5408 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
5409 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
5410 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
5411 SDValue CC = getCCResult(DAG, CCReg: SDValue(Node, 0));
5412 DAG.ReplaceAllUsesOfValueWith(From: SDValue(Op.getNode(), 0), To: CC);
5413 return SDValue();
5414 }
5415
5416 return SDValue();
5417}
5418
5419SDValue
5420SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
5421 SelectionDAG &DAG) const {
5422 unsigned Opcode, CCValid;
5423 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
5424 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
5425 if (Op->getNumValues() == 1)
5426 return getCCResult(DAG, CCReg: SDValue(Node, 0));
5427 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
5428 return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: SDLoc(Op), VTList: Op->getVTList(),
5429 N1: SDValue(Node, 0), N2: getCCResult(DAG, CCReg: SDValue(Node, 1)));
5430 }
5431
5432 unsigned Id = Op.getConstantOperandVal(i: 0);
5433 switch (Id) {
5434 case Intrinsic::thread_pointer:
5435 return lowerThreadPointer(DL: SDLoc(Op), DAG);
5436
5437 case Intrinsic::s390_vpdi:
5438 return DAG.getNode(Opcode: SystemZISD::PERMUTE_DWORDS, DL: SDLoc(Op), VT: Op.getValueType(),
5439 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3));
5440
5441 case Intrinsic::s390_vperm:
5442 return DAG.getNode(Opcode: SystemZISD::PERMUTE, DL: SDLoc(Op), VT: Op.getValueType(),
5443 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3));
5444
5445 case Intrinsic::s390_vuphb:
5446 case Intrinsic::s390_vuphh:
5447 case Intrinsic::s390_vuphf:
5448 case Intrinsic::s390_vuphg:
5449 return DAG.getNode(Opcode: SystemZISD::UNPACK_HIGH, DL: SDLoc(Op), VT: Op.getValueType(),
5450 Operand: Op.getOperand(i: 1));
5451
5452 case Intrinsic::s390_vuplhb:
5453 case Intrinsic::s390_vuplhh:
5454 case Intrinsic::s390_vuplhf:
5455 case Intrinsic::s390_vuplhg:
5456 return DAG.getNode(Opcode: SystemZISD::UNPACKL_HIGH, DL: SDLoc(Op), VT: Op.getValueType(),
5457 Operand: Op.getOperand(i: 1));
5458
5459 case Intrinsic::s390_vuplb:
5460 case Intrinsic::s390_vuplhw:
5461 case Intrinsic::s390_vuplf:
5462 case Intrinsic::s390_vuplg:
5463 return DAG.getNode(Opcode: SystemZISD::UNPACK_LOW, DL: SDLoc(Op), VT: Op.getValueType(),
5464 Operand: Op.getOperand(i: 1));
5465
5466 case Intrinsic::s390_vupllb:
5467 case Intrinsic::s390_vupllh:
5468 case Intrinsic::s390_vupllf:
5469 case Intrinsic::s390_vupllg:
5470 return DAG.getNode(Opcode: SystemZISD::UNPACKL_LOW, DL: SDLoc(Op), VT: Op.getValueType(),
5471 Operand: Op.getOperand(i: 1));
5472
5473 case Intrinsic::s390_vsumb:
5474 case Intrinsic::s390_vsumh:
5475 case Intrinsic::s390_vsumgh:
5476 case Intrinsic::s390_vsumgf:
5477 case Intrinsic::s390_vsumqf:
5478 case Intrinsic::s390_vsumqg:
5479 return DAG.getNode(Opcode: SystemZISD::VSUM, DL: SDLoc(Op), VT: Op.getValueType(),
5480 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2));
5481
5482 case Intrinsic::s390_vaq:
5483 return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(Op), VT: Op.getValueType(),
5484 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2));
5485 case Intrinsic::s390_vaccb:
5486 case Intrinsic::s390_vacch:
5487 case Intrinsic::s390_vaccf:
5488 case Intrinsic::s390_vaccg:
5489 case Intrinsic::s390_vaccq:
5490 return DAG.getNode(Opcode: SystemZISD::VACC, DL: SDLoc(Op), VT: Op.getValueType(),
5491 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2));
5492 case Intrinsic::s390_vacq:
5493 return DAG.getNode(Opcode: SystemZISD::VAC, DL: SDLoc(Op), VT: Op.getValueType(),
5494 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3));
5495 case Intrinsic::s390_vacccq:
5496 return DAG.getNode(Opcode: SystemZISD::VACCC, DL: SDLoc(Op), VT: Op.getValueType(),
5497 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3));
5498
5499 case Intrinsic::s390_vsq:
5500 return DAG.getNode(Opcode: ISD::SUB, DL: SDLoc(Op), VT: Op.getValueType(),
5501 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2));
5502 case Intrinsic::s390_vscbib:
5503 case Intrinsic::s390_vscbih:
5504 case Intrinsic::s390_vscbif:
5505 case Intrinsic::s390_vscbig:
5506 case Intrinsic::s390_vscbiq:
5507 return DAG.getNode(Opcode: SystemZISD::VSCBI, DL: SDLoc(Op), VT: Op.getValueType(),
5508 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2));
5509 case Intrinsic::s390_vsbiq:
5510 return DAG.getNode(Opcode: SystemZISD::VSBI, DL: SDLoc(Op), VT: Op.getValueType(),
5511 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3));
5512 case Intrinsic::s390_vsbcbiq:
5513 return DAG.getNode(Opcode: SystemZISD::VSBCBI, DL: SDLoc(Op), VT: Op.getValueType(),
5514 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3));
5515
5516 case Intrinsic::s390_vmhb:
5517 case Intrinsic::s390_vmhh:
5518 case Intrinsic::s390_vmhf:
5519 case Intrinsic::s390_vmhg:
5520 case Intrinsic::s390_vmhq:
5521 return DAG.getNode(Opcode: ISD::MULHS, DL: SDLoc(Op), VT: Op.getValueType(),
5522 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2));
5523 case Intrinsic::s390_vmlhb:
5524 case Intrinsic::s390_vmlhh:
5525 case Intrinsic::s390_vmlhf:
5526 case Intrinsic::s390_vmlhg:
5527 case Intrinsic::s390_vmlhq:
5528 return DAG.getNode(Opcode: ISD::MULHU, DL: SDLoc(Op), VT: Op.getValueType(),
5529 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2));
5530
5531 case Intrinsic::s390_vmahb:
5532 case Intrinsic::s390_vmahh:
5533 case Intrinsic::s390_vmahf:
5534 case Intrinsic::s390_vmahg:
5535 case Intrinsic::s390_vmahq:
5536 return DAG.getNode(Opcode: SystemZISD::VMAH, DL: SDLoc(Op), VT: Op.getValueType(),
5537 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3));
5538 case Intrinsic::s390_vmalhb:
5539 case Intrinsic::s390_vmalhh:
5540 case Intrinsic::s390_vmalhf:
5541 case Intrinsic::s390_vmalhg:
5542 case Intrinsic::s390_vmalhq:
5543 return DAG.getNode(Opcode: SystemZISD::VMALH, DL: SDLoc(Op), VT: Op.getValueType(),
5544 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3));
5545
5546 case Intrinsic::s390_vmeb:
5547 case Intrinsic::s390_vmeh:
5548 case Intrinsic::s390_vmef:
5549 case Intrinsic::s390_vmeg:
5550 return DAG.getNode(Opcode: SystemZISD::VME, DL: SDLoc(Op), VT: Op.getValueType(),
5551 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2));
5552 case Intrinsic::s390_vmleb:
5553 case Intrinsic::s390_vmleh:
5554 case Intrinsic::s390_vmlef:
5555 case Intrinsic::s390_vmleg:
5556 return DAG.getNode(Opcode: SystemZISD::VMLE, DL: SDLoc(Op), VT: Op.getValueType(),
5557 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2));
5558 case Intrinsic::s390_vmob:
5559 case Intrinsic::s390_vmoh:
5560 case Intrinsic::s390_vmof:
5561 case Intrinsic::s390_vmog:
5562 return DAG.getNode(Opcode: SystemZISD::VMO, DL: SDLoc(Op), VT: Op.getValueType(),
5563 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2));
5564 case Intrinsic::s390_vmlob:
5565 case Intrinsic::s390_vmloh:
5566 case Intrinsic::s390_vmlof:
5567 case Intrinsic::s390_vmlog:
5568 return DAG.getNode(Opcode: SystemZISD::VMLO, DL: SDLoc(Op), VT: Op.getValueType(),
5569 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2));
5570
5571 case Intrinsic::s390_vmaeb:
5572 case Intrinsic::s390_vmaeh:
5573 case Intrinsic::s390_vmaef:
5574 case Intrinsic::s390_vmaeg:
5575 return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(Op), VT: Op.getValueType(),
5576 N1: DAG.getNode(Opcode: SystemZISD::VME, DL: SDLoc(Op), VT: Op.getValueType(),
5577 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)),
5578 N2: Op.getOperand(i: 3));
5579 case Intrinsic::s390_vmaleb:
5580 case Intrinsic::s390_vmaleh:
5581 case Intrinsic::s390_vmalef:
5582 case Intrinsic::s390_vmaleg:
5583 return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(Op), VT: Op.getValueType(),
5584 N1: DAG.getNode(Opcode: SystemZISD::VMLE, DL: SDLoc(Op), VT: Op.getValueType(),
5585 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)),
5586 N2: Op.getOperand(i: 3));
5587 case Intrinsic::s390_vmaob:
5588 case Intrinsic::s390_vmaoh:
5589 case Intrinsic::s390_vmaof:
5590 case Intrinsic::s390_vmaog:
5591 return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(Op), VT: Op.getValueType(),
5592 N1: DAG.getNode(Opcode: SystemZISD::VMO, DL: SDLoc(Op), VT: Op.getValueType(),
5593 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)),
5594 N2: Op.getOperand(i: 3));
5595 case Intrinsic::s390_vmalob:
5596 case Intrinsic::s390_vmaloh:
5597 case Intrinsic::s390_vmalof:
5598 case Intrinsic::s390_vmalog:
5599 return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(Op), VT: Op.getValueType(),
5600 N1: DAG.getNode(Opcode: SystemZISD::VMLO, DL: SDLoc(Op), VT: Op.getValueType(),
5601 N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)),
5602 N2: Op.getOperand(i: 3));
5603 }
5604
5605 return SDValue();
5606}
5607
5608namespace {
5609// Says that SystemZISD operation Opcode can be used to perform the equivalent
5610// of a VPERM with permute vector Bytes. If Opcode takes three operands,
5611// Operand is the constant third operand, otherwise it is the number of
5612// bytes in each element of the result.
5613struct Permute {
5614 unsigned Opcode;
5615 unsigned Operand;
5616 unsigned char Bytes[SystemZ::VectorBytes];
5617};
5618}
5619
5620static const Permute PermuteForms[] = {
5621 // VMRHG
5622 { .Opcode: SystemZISD::MERGE_HIGH, .Operand: 8,
5623 .Bytes: { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
5624 // VMRHF
5625 { .Opcode: SystemZISD::MERGE_HIGH, .Operand: 4,
5626 .Bytes: { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
5627 // VMRHH
5628 { .Opcode: SystemZISD::MERGE_HIGH, .Operand: 2,
5629 .Bytes: { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
5630 // VMRHB
5631 { .Opcode: SystemZISD::MERGE_HIGH, .Operand: 1,
5632 .Bytes: { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
5633 // VMRLG
5634 { .Opcode: SystemZISD::MERGE_LOW, .Operand: 8,
5635 .Bytes: { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
5636 // VMRLF
5637 { .Opcode: SystemZISD::MERGE_LOW, .Operand: 4,
5638 .Bytes: { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
5639 // VMRLH
5640 { .Opcode: SystemZISD::MERGE_LOW, .Operand: 2,
5641 .Bytes: { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
5642 // VMRLB
5643 { .Opcode: SystemZISD::MERGE_LOW, .Operand: 1,
5644 .Bytes: { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
5645 // VPKG
5646 { .Opcode: SystemZISD::PACK, .Operand: 4,
5647 .Bytes: { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
5648 // VPKF
5649 { .Opcode: SystemZISD::PACK, .Operand: 2,
5650 .Bytes: { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
5651 // VPKH
5652 { .Opcode: SystemZISD::PACK, .Operand: 1,
5653 .Bytes: { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
5654 // VPDI V1, V2, 4 (low half of V1, high half of V2)
5655 { .Opcode: SystemZISD::PERMUTE_DWORDS, .Operand: 4,
5656 .Bytes: { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
5657 // VPDI V1, V2, 1 (high half of V1, low half of V2)
5658 { .Opcode: SystemZISD::PERMUTE_DWORDS, .Operand: 1,
5659 .Bytes: { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
5660};
5661
5662// Called after matching a vector shuffle against a particular pattern.
5663// Both the original shuffle and the pattern have two vector operands.
5664// OpNos[0] is the operand of the original shuffle that should be used for
5665// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
5666// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
5667// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
5668// for operands 0 and 1 of the pattern.
5669static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
5670 if (OpNos[0] < 0) {
5671 if (OpNos[1] < 0)
5672 return false;
5673 OpNo0 = OpNo1 = OpNos[1];
5674 } else if (OpNos[1] < 0) {
5675 OpNo0 = OpNo1 = OpNos[0];
5676 } else {
5677 OpNo0 = OpNos[0];
5678 OpNo1 = OpNos[1];
5679 }
5680 return true;
5681}
5682
5683// Bytes is a VPERM-like permute vector, except that -1 is used for
5684// undefined bytes. Return true if the VPERM can be implemented using P.
5685// When returning true set OpNo0 to the VPERM operand that should be
5686// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
5687//
5688// For example, if swapping the VPERM operands allows P to match, OpNo0
5689// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
5690// operand, but rewriting it to use two duplicated operands allows it to
5691// match P, then OpNo0 and OpNo1 will be the same.
5692static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
5693 unsigned &OpNo0, unsigned &OpNo1) {
5694 int OpNos[] = { -1, -1 };
5695 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5696 int Elt = Bytes[I];
5697 if (Elt >= 0) {
5698 // Make sure that the two permute vectors use the same suboperand
5699 // byte number. Only the operand numbers (the high bits) are
5700 // allowed to differ.
5701 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5702 return false;
5703 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5704 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5705 // Make sure that the operand mappings are consistent with previous
5706 // elements.
5707 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5708 return false;
5709 OpNos[ModelOpNo] = RealOpNo;
5710 }
5711 }
5712 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5713}
5714
5715// As above, but search for a matching permute.
5716static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5717 unsigned &OpNo0, unsigned &OpNo1) {
5718 for (auto &P : PermuteForms)
5719 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5720 return &P;
5721 return nullptr;
5722}
5723
5724// Bytes is a VPERM-like permute vector, except that -1 is used for
5725// undefined bytes. This permute is an operand of an outer permute.
5726// See whether redistributing the -1 bytes gives a shuffle that can be
5727// implemented using P. If so, set Transform to a VPERM-like permute vector
5728// that, when applied to the result of P, gives the original permute in Bytes.
5729static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5730 const Permute &P,
5731 SmallVectorImpl<int> &Transform) {
5732 unsigned To = 0;
5733 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5734 int Elt = Bytes[From];
5735 if (Elt < 0)
5736 // Byte number From of the result is undefined.
5737 Transform[From] = -1;
5738 else {
5739 while (P.Bytes[To] != Elt) {
5740 To += 1;
5741 if (To == SystemZ::VectorBytes)
5742 return false;
5743 }
5744 Transform[From] = To;
5745 }
5746 }
5747 return true;
5748}
5749
5750// As above, but search for a matching permute.
5751static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5752 SmallVectorImpl<int> &Transform) {
5753 for (auto &P : PermuteForms)
5754 if (matchDoublePermute(Bytes, P, Transform))
5755 return &P;
5756 return nullptr;
5757}
5758
5759// Convert the mask of the given shuffle op into a byte-level mask,
5760// as if it had type vNi8.
5761static bool getVPermMask(SDValue ShuffleOp,
5762 SmallVectorImpl<int> &Bytes) {
5763 EVT VT = ShuffleOp.getValueType();
5764 unsigned NumElements = VT.getVectorNumElements();
5765 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5766
5767 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Val&: ShuffleOp)) {
5768 Bytes.resize(N: NumElements * BytesPerElement, NV: -1);
5769 for (unsigned I = 0; I < NumElements; ++I) {
5770 int Index = VSN->getMaskElt(Idx: I);
5771 if (Index >= 0)
5772 for (unsigned J = 0; J < BytesPerElement; ++J)
5773 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5774 }
5775 return true;
5776 }
5777 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5778 isa<ConstantSDNode>(Val: ShuffleOp.getOperand(i: 1))) {
5779 unsigned Index = ShuffleOp.getConstantOperandVal(i: 1);
5780 Bytes.resize(N: NumElements * BytesPerElement, NV: -1);
5781 for (unsigned I = 0; I < NumElements; ++I)
5782 for (unsigned J = 0; J < BytesPerElement; ++J)
5783 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5784 return true;
5785 }
5786 return false;
5787}
5788
5789// Bytes is a VPERM-like permute vector, except that -1 is used for
5790// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5791// the result come from a contiguous sequence of bytes from one input.
5792// Set Base to the selector for the first byte if so.
5793static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5794 unsigned BytesPerElement, int &Base) {
5795 Base = -1;
5796 for (unsigned I = 0; I < BytesPerElement; ++I) {
5797 if (Bytes[Start + I] >= 0) {
5798 unsigned Elem = Bytes[Start + I];
5799 if (Base < 0) {
5800 Base = Elem - I;
5801 // Make sure the bytes would come from one input operand.
5802 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5803 return false;
5804 } else if (unsigned(Base) != Elem - I)
5805 return false;
5806 }
5807 }
5808 return true;
5809}
5810
5811// Bytes is a VPERM-like permute vector, except that -1 is used for
5812// undefined bytes. Return true if it can be performed using VSLDB.
5813// When returning true, set StartIndex to the shift amount and OpNo0
5814// and OpNo1 to the VPERM operands that should be used as the first
5815// and second shift operand respectively.
5816static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
5817 unsigned &StartIndex, unsigned &OpNo0,
5818 unsigned &OpNo1) {
5819 int OpNos[] = { -1, -1 };
5820 int Shift = -1;
5821 for (unsigned I = 0; I < 16; ++I) {
5822 int Index = Bytes[I];
5823 if (Index >= 0) {
5824 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5825 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5826 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5827 if (Shift < 0)
5828 Shift = ExpectedShift;
5829 else if (Shift != ExpectedShift)
5830 return false;
5831 // Make sure that the operand mappings are consistent with previous
5832 // elements.
5833 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5834 return false;
5835 OpNos[ModelOpNo] = RealOpNo;
5836 }
5837 }
5838 StartIndex = Shift;
5839 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5840}
5841
5842// Create a node that performs P on operands Op0 and Op1, casting the
5843// operands to the appropriate type. The type of the result is determined by P.
5844static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
5845 const Permute &P, SDValue Op0, SDValue Op1) {
5846 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5847 // elements of a PACK are twice as wide as the outputs.
5848 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5849 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5850 P.Operand);
5851 // Cast both operands to the appropriate type.
5852 MVT InVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: InBytes * 8),
5853 NumElements: SystemZ::VectorBytes / InBytes);
5854 Op0 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: InVT, Operand: Op0);
5855 Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: InVT, Operand: Op1);
5856 SDValue Op;
5857 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5858 SDValue Op2 = DAG.getTargetConstant(Val: P.Operand, DL, VT: MVT::i32);
5859 Op = DAG.getNode(Opcode: SystemZISD::PERMUTE_DWORDS, DL, VT: InVT, N1: Op0, N2: Op1, N3: Op2);
5860 } else if (P.Opcode == SystemZISD::PACK) {
5861 MVT OutVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: P.Operand * 8),
5862 NumElements: SystemZ::VectorBytes / P.Operand);
5863 Op = DAG.getNode(Opcode: SystemZISD::PACK, DL, VT: OutVT, N1: Op0, N2: Op1);
5864 } else {
5865 Op = DAG.getNode(Opcode: P.Opcode, DL, VT: InVT, N1: Op0, N2: Op1);
5866 }
5867 return Op;
5868}
5869
5870static bool isZeroVector(SDValue N) {
5871 if (N->getOpcode() == ISD::BITCAST)
5872 N = N->getOperand(Num: 0);
5873 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5874 if (auto *Op = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 0)))
5875 return Op->getZExtValue() == 0;
5876 return ISD::isBuildVectorAllZeros(N: N.getNode());
5877}
5878
5879// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5880static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5881 for (unsigned I = 0; I < Num ; I++)
5882 if (isZeroVector(N: Ops[I]))
5883 return I;
5884 return UINT32_MAX;
5885}
5886
5887// Bytes is a VPERM-like permute vector, except that -1 is used for
5888// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5889// VSLDB or VPERM.
5890static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
5891 SDValue *Ops,
5892 const SmallVectorImpl<int> &Bytes) {
5893 for (unsigned I = 0; I < 2; ++I)
5894 Ops[I] = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v16i8, Operand: Ops[I]);
5895
5896 // First see whether VSLDB can be used.
5897 unsigned StartIndex, OpNo0, OpNo1;
5898 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5899 return DAG.getNode(Opcode: SystemZISD::SHL_DOUBLE, DL, VT: MVT::v16i8, N1: Ops[OpNo0],
5900 N2: Ops[OpNo1],
5901 N3: DAG.getTargetConstant(Val: StartIndex, DL, VT: MVT::i32));
5902
5903 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5904 // eliminate a zero vector by reusing any zero index in the permute vector.
5905 unsigned ZeroVecIdx = findZeroVectorIdx(Ops: &Ops[0], Num: 2);
5906 if (ZeroVecIdx != UINT32_MAX) {
5907 bool MaskFirst = true;
5908 int ZeroIdx = -1;
5909 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5910 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5911 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5912 if (OpNo == ZeroVecIdx && I == 0) {
5913 // If the first byte is zero, use mask as first operand.
5914 ZeroIdx = 0;
5915 break;
5916 }
5917 if (OpNo != ZeroVecIdx && Byte == 0) {
5918 // If mask contains a zero, use it by placing that vector first.
5919 ZeroIdx = I + SystemZ::VectorBytes;
5920 MaskFirst = false;
5921 break;
5922 }
5923 }
5924 if (ZeroIdx != -1) {
5925 SDValue IndexNodes[SystemZ::VectorBytes];
5926 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5927 if (Bytes[I] >= 0) {
5928 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5929 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5930 if (OpNo == ZeroVecIdx)
5931 IndexNodes[I] = DAG.getConstant(Val: ZeroIdx, DL, VT: MVT::i32);
5932 else {
5933 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5934 IndexNodes[I] = DAG.getConstant(Val: BIdx, DL, VT: MVT::i32);
5935 }
5936 } else
5937 IndexNodes[I] = DAG.getUNDEF(VT: MVT::i32);
5938 }
5939 SDValue Mask = DAG.getBuildVector(VT: MVT::v16i8, DL, Ops: IndexNodes);
5940 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5941 if (MaskFirst)
5942 return DAG.getNode(Opcode: SystemZISD::PERMUTE, DL, VT: MVT::v16i8, N1: Mask, N2: Src,
5943 N3: Mask);
5944 else
5945 return DAG.getNode(Opcode: SystemZISD::PERMUTE, DL, VT: MVT::v16i8, N1: Src, N2: Mask,
5946 N3: Mask);
5947 }
5948 }
5949
5950 SDValue IndexNodes[SystemZ::VectorBytes];
5951 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5952 if (Bytes[I] >= 0)
5953 IndexNodes[I] = DAG.getConstant(Val: Bytes[I], DL, VT: MVT::i32);
5954 else
5955 IndexNodes[I] = DAG.getUNDEF(VT: MVT::i32);
5956 SDValue Op2 = DAG.getBuildVector(VT: MVT::v16i8, DL, Ops: IndexNodes);
5957 return DAG.getNode(Opcode: SystemZISD::PERMUTE, DL, VT: MVT::v16i8, N1: Ops[0],
5958 N2: (!Ops[1].isUndef() ? Ops[1] : Ops[0]), N3: Op2);
5959}
5960
5961namespace {
5962// Describes a general N-operand vector shuffle.
5963struct GeneralShuffle {
5964 GeneralShuffle(EVT vt)
5965 : VT(vt), UnpackFromEltSize(UINT_MAX), UnpackLow(false) {}
5966 void addUndef();
5967 bool add(SDValue, unsigned);
5968 SDValue getNode(SelectionDAG &, const SDLoc &);
5969 void tryPrepareForUnpack();
5970 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
5971 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
5972
5973 // The operands of the shuffle.
5974 SmallVector<SDValue, SystemZ::VectorBytes> Ops;
5975
5976 // Index I is -1 if byte I of the result is undefined. Otherwise the
5977 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
5978 // Bytes[I] / SystemZ::VectorBytes.
5979 SmallVector<int, SystemZ::VectorBytes> Bytes;
5980
5981 // The type of the shuffle result.
5982 EVT VT;
5983
5984 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
5985 unsigned UnpackFromEltSize;
5986 // True if the final unpack uses the low half.
5987 bool UnpackLow;
5988};
5989} // namespace
5990
5991// Add an extra undefined element to the shuffle.
5992void GeneralShuffle::addUndef() {
5993 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5994 for (unsigned I = 0; I < BytesPerElement; ++I)
5995 Bytes.push_back(Elt: -1);
5996}
5997
5998// Add an extra element to the shuffle, taking it from element Elem of Op.
5999// A null Op indicates a vector input whose value will be calculated later;
6000// there is at most one such input per shuffle and it always has the same
6001// type as the result. Aborts and returns false if the source vector elements
6002// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
6003// LLVM they become implicitly extended, but this is rare and not optimized.
6004bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
6005 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
6006
6007 // The source vector can have wider elements than the result,
6008 // either through an explicit TRUNCATE or because of type legalization.
6009 // We want the least significant part.
6010 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
6011 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
6012
6013 // Return false if the source elements are smaller than their destination
6014 // elements.
6015 if (FromBytesPerElement < BytesPerElement)
6016 return false;
6017
6018 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
6019 (FromBytesPerElement - BytesPerElement));
6020
6021 // Look through things like shuffles and bitcasts.
6022 while (Op.getNode()) {
6023 if (Op.getOpcode() == ISD::BITCAST)
6024 Op = Op.getOperand(i: 0);
6025 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
6026 // See whether the bytes we need come from a contiguous part of one
6027 // operand.
6028 SmallVector<int, SystemZ::VectorBytes> OpBytes;
6029 if (!getVPermMask(ShuffleOp: Op, Bytes&: OpBytes))
6030 break;
6031 int NewByte;
6032 if (!getShuffleInput(Bytes: OpBytes, Start: Byte, BytesPerElement, Base&: NewByte))
6033 break;
6034 if (NewByte < 0) {
6035 addUndef();
6036 return true;
6037 }
6038 Op = Op.getOperand(i: unsigned(NewByte) / SystemZ::VectorBytes);
6039 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
6040 } else if (Op.isUndef()) {
6041 addUndef();
6042 return true;
6043 } else
6044 break;
6045 }
6046
6047 // Make sure that the source of the extraction is in Ops.
6048 unsigned OpNo = 0;
6049 for (; OpNo < Ops.size(); ++OpNo)
6050 if (Ops[OpNo] == Op)
6051 break;
6052 if (OpNo == Ops.size())
6053 Ops.push_back(Elt: Op);
6054
6055 // Add the element to Bytes.
6056 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
6057 for (unsigned I = 0; I < BytesPerElement; ++I)
6058 Bytes.push_back(Elt: Base + I);
6059
6060 return true;
6061}
6062
6063// Return SDNodes for the completed shuffle.
6064SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
6065 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
6066
6067 if (Ops.size() == 0)
6068 return DAG.getUNDEF(VT);
6069
6070 // Use a single unpack if possible as the last operation.
6071 tryPrepareForUnpack();
6072
6073 // Make sure that there are at least two shuffle operands.
6074 if (Ops.size() == 1)
6075 Ops.push_back(Elt: DAG.getUNDEF(VT: MVT::v16i8));
6076
6077 // Create a tree of shuffles, deferring root node until after the loop.
6078 // Try to redistribute the undefined elements of non-root nodes so that
6079 // the non-root shuffles match something like a pack or merge, then adjust
6080 // the parent node's permute vector to compensate for the new order.
6081 // Among other things, this copes with vectors like <2 x i16> that were
6082 // padded with undefined elements during type legalization.
6083 //
6084 // In the best case this redistribution will lead to the whole tree
6085 // using packs and merges. It should rarely be a loss in other cases.
6086 unsigned Stride = 1;
6087 for (; Stride * 2 < Ops.size(); Stride *= 2) {
6088 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
6089 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
6090
6091 // Create a mask for just these two operands.
6092 SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes);
6093 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6094 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
6095 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
6096 if (OpNo == I)
6097 NewBytes[J] = Byte;
6098 else if (OpNo == I + Stride)
6099 NewBytes[J] = SystemZ::VectorBytes + Byte;
6100 else
6101 NewBytes[J] = -1;
6102 }
6103 // See if it would be better to reorganize NewMask to avoid using VPERM.
6104 SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
6105 if (const Permute *P = matchDoublePermute(Bytes: NewBytes, Transform&: NewBytesMap)) {
6106 Ops[I] = getPermuteNode(DAG, DL, P: *P, Op0: SubOps[0], Op1: SubOps[1]);
6107 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
6108 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6109 if (NewBytes[J] >= 0) {
6110 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
6111 "Invalid double permute");
6112 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
6113 } else
6114 assert(NewBytesMap[J] < 0 && "Invalid double permute");
6115 }
6116 } else {
6117 // Just use NewBytes on the operands.
6118 Ops[I] = getGeneralPermuteNode(DAG, DL, Ops: SubOps, Bytes: NewBytes);
6119 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
6120 if (NewBytes[J] >= 0)
6121 Bytes[J] = I * SystemZ::VectorBytes + J;
6122 }
6123 }
6124 }
6125
6126 // Now we just have 2 inputs. Put the second operand in Ops[1].
6127 if (Stride > 1) {
6128 Ops[1] = Ops[Stride];
6129 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6130 if (Bytes[I] >= int(SystemZ::VectorBytes))
6131 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
6132 }
6133
6134 // Look for an instruction that can do the permute without resorting
6135 // to VPERM.
6136 unsigned OpNo0, OpNo1;
6137 SDValue Op;
6138 if (unpackWasPrepared() && Ops[1].isUndef())
6139 Op = Ops[0];
6140 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
6141 Op = getPermuteNode(DAG, DL, P: *P, Op0: Ops[OpNo0], Op1: Ops[OpNo1]);
6142 else
6143 Op = getGeneralPermuteNode(DAG, DL, Ops: &Ops[0], Bytes);
6144
6145 Op = insertUnpackIfPrepared(DAG, DL, Op);
6146
6147 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op);
6148}
6149
6150#ifndef NDEBUG
6151static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
6152 dbgs() << Msg.c_str() << " { ";
6153 for (unsigned I = 0; I < Bytes.size(); I++)
6154 dbgs() << Bytes[I] << " ";
6155 dbgs() << "}\n";
6156}
6157#endif
6158
6159// If the Bytes vector matches an unpack operation, prepare to do the unpack
6160// after all else by removing the zero vector and the effect of the unpack on
6161// Bytes.
6162void GeneralShuffle::tryPrepareForUnpack() {
6163 uint32_t ZeroVecOpNo = findZeroVectorIdx(Ops: &Ops[0], Num: Ops.size());
6164 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
6165 return;
6166
6167 // Only do this if removing the zero vector reduces the depth, otherwise
6168 // the critical path will increase with the final unpack.
6169 if (Ops.size() > 2 &&
6170 Log2_32_Ceil(Value: Ops.size()) == Log2_32_Ceil(Value: Ops.size() - 1))
6171 return;
6172
6173 // Find an unpack that would allow removing the zero vector from Ops.
6174 UnpackFromEltSize = 1;
6175 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
6176 bool MatchUnpack = true;
6177 SmallVector<int, SystemZ::VectorBytes> SrcBytes;
6178 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
6179 unsigned ToEltSize = UnpackFromEltSize * 2;
6180 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
6181 if (!IsZextByte)
6182 SrcBytes.push_back(Elt: Bytes[Elt]);
6183 if (Bytes[Elt] != -1) {
6184 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
6185 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
6186 MatchUnpack = false;
6187 break;
6188 }
6189 }
6190 }
6191 if (MatchUnpack) {
6192 if (Ops.size() == 2) {
6193 // Don't use unpack if a single source operand needs rearrangement.
6194 bool CanUseUnpackLow = true, CanUseUnpackHigh = true;
6195 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) {
6196 if (SrcBytes[i] == -1)
6197 continue;
6198 if (SrcBytes[i] % 16 != int(i))
6199 CanUseUnpackHigh = false;
6200 if (SrcBytes[i] % 16 != int(i + SystemZ::VectorBytes / 2))
6201 CanUseUnpackLow = false;
6202 if (!CanUseUnpackLow && !CanUseUnpackHigh) {
6203 UnpackFromEltSize = UINT_MAX;
6204 return;
6205 }
6206 }
6207 if (!CanUseUnpackHigh)
6208 UnpackLow = true;
6209 }
6210 break;
6211 }
6212 }
6213 if (UnpackFromEltSize > 4)
6214 return;
6215
6216 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
6217 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
6218 << ".\n";
6219 dumpBytes(Bytes, "Original Bytes vector:"););
6220
6221 // Apply the unpack in reverse to the Bytes array.
6222 unsigned B = 0;
6223 if (UnpackLow) {
6224 while (B < SystemZ::VectorBytes / 2)
6225 Bytes[B++] = -1;
6226 }
6227 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
6228 Elt += UnpackFromEltSize;
6229 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
6230 Bytes[B] = Bytes[Elt];
6231 }
6232 if (!UnpackLow) {
6233 while (B < SystemZ::VectorBytes)
6234 Bytes[B++] = -1;
6235 }
6236
6237 // Remove the zero vector from Ops
6238 Ops.erase(CI: &Ops[ZeroVecOpNo]);
6239 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6240 if (Bytes[I] >= 0) {
6241 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
6242 if (OpNo > ZeroVecOpNo)
6243 Bytes[I] -= SystemZ::VectorBytes;
6244 }
6245
6246 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
6247 dbgs() << "\n";);
6248}
6249
6250SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
6251 const SDLoc &DL,
6252 SDValue Op) {
6253 if (!unpackWasPrepared())
6254 return Op;
6255 unsigned InBits = UnpackFromEltSize * 8;
6256 EVT InVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: InBits),
6257 NumElements: SystemZ::VectorBits / InBits);
6258 SDValue PackedOp = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: InVT, Operand: Op);
6259 unsigned OutBits = InBits * 2;
6260 EVT OutVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: OutBits),
6261 NumElements: SystemZ::VectorBits / OutBits);
6262 return DAG.getNode(Opcode: UnpackLow ? SystemZISD::UNPACKL_LOW
6263 : SystemZISD::UNPACKL_HIGH,
6264 DL, VT: OutVT, Operand: PackedOp);
6265}
6266
6267// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
6268static bool isScalarToVector(SDValue Op) {
6269 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
6270 if (!Op.getOperand(i: I).isUndef())
6271 return false;
6272 return true;
6273}
6274
6275// Return a vector of type VT that contains Value in the first element.
6276// The other elements don't matter.
6277static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
6278 SDValue Value) {
6279 // If we have a constant, replicate it to all elements and let the
6280 // BUILD_VECTOR lowering take care of it.
6281 if (Value.getOpcode() == ISD::Constant ||
6282 Value.getOpcode() == ISD::ConstantFP) {
6283 SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value);
6284 return DAG.getBuildVector(VT, DL, Ops);
6285 }
6286 if (Value.isUndef())
6287 return DAG.getUNDEF(VT);
6288 return DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT, Operand: Value);
6289}
6290
6291// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
6292// element 1. Used for cases in which replication is cheap.
6293static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
6294 SDValue Op0, SDValue Op1) {
6295 if (Op0.isUndef()) {
6296 if (Op1.isUndef())
6297 return DAG.getUNDEF(VT);
6298 return DAG.getNode(Opcode: SystemZISD::REPLICATE, DL, VT, Operand: Op1);
6299 }
6300 if (Op1.isUndef())
6301 return DAG.getNode(Opcode: SystemZISD::REPLICATE, DL, VT, Operand: Op0);
6302 return DAG.getNode(Opcode: SystemZISD::MERGE_HIGH, DL, VT,
6303 N1: buildScalarToVector(DAG, DL, VT, Value: Op0),
6304 N2: buildScalarToVector(DAG, DL, VT, Value: Op1));
6305}
6306
6307// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
6308// vector for them.
6309static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
6310 SDValue Op1) {
6311 if (Op0.isUndef() && Op1.isUndef())
6312 return DAG.getUNDEF(VT: MVT::v2i64);
6313 // If one of the two inputs is undefined then replicate the other one,
6314 // in order to avoid using another register unnecessarily.
6315 if (Op0.isUndef())
6316 Op0 = Op1 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op1);
6317 else if (Op1.isUndef())
6318 Op0 = Op1 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op0);
6319 else {
6320 Op0 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op0);
6321 Op1 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Op1);
6322 }
6323 return DAG.getNode(Opcode: SystemZISD::JOIN_DWORDS, DL, VT: MVT::v2i64, N1: Op0, N2: Op1);
6324}
6325
6326// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
6327// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
6328// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
6329// would benefit from this representation and return it if so.
6330static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
6331 BuildVectorSDNode *BVN) {
6332 EVT VT = BVN->getValueType(ResNo: 0);
6333 unsigned NumElements = VT.getVectorNumElements();
6334
6335 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
6336 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
6337 // need a BUILD_VECTOR, add an additional placeholder operand for that
6338 // BUILD_VECTOR and store its operands in ResidueOps.
6339 GeneralShuffle GS(VT);
6340 SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps;
6341 bool FoundOne = false;
6342 for (unsigned I = 0; I < NumElements; ++I) {
6343 SDValue Op = BVN->getOperand(Num: I);
6344 if (Op.getOpcode() == ISD::TRUNCATE)
6345 Op = Op.getOperand(i: 0);
6346 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6347 Op.getOperand(i: 1).getOpcode() == ISD::Constant) {
6348 unsigned Elem = Op.getConstantOperandVal(i: 1);
6349 if (!GS.add(Op: Op.getOperand(i: 0), Elem))
6350 return SDValue();
6351 FoundOne = true;
6352 } else if (Op.isUndef()) {
6353 GS.addUndef();
6354 } else {
6355 if (!GS.add(Op: SDValue(), Elem: ResidueOps.size()))
6356 return SDValue();
6357 ResidueOps.push_back(Elt: BVN->getOperand(Num: I));
6358 }
6359 }
6360
6361 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
6362 if (!FoundOne)
6363 return SDValue();
6364
6365 // Create the BUILD_VECTOR for the remaining elements, if any.
6366 if (!ResidueOps.empty()) {
6367 while (ResidueOps.size() < NumElements)
6368 ResidueOps.push_back(Elt: DAG.getUNDEF(VT: ResidueOps[0].getValueType()));
6369 for (auto &Op : GS.Ops) {
6370 if (!Op.getNode()) {
6371 Op = DAG.getBuildVector(VT, DL: SDLoc(BVN), Ops: ResidueOps);
6372 break;
6373 }
6374 }
6375 }
6376 return GS.getNode(DAG, DL: SDLoc(BVN));
6377}
6378
6379bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
6380 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Val&: Op)->isUnindexed())
6381 return true;
6382 if (auto *AL = dyn_cast<AtomicSDNode>(Val&: Op))
6383 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
6384 return true;
6385 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
6386 return true;
6387 return false;
6388}
6389
6390static SDValue mergeHighParts(SelectionDAG &DAG, const SDLoc &DL,
6391 unsigned MergedBits, EVT VT, SDValue Op0,
6392 SDValue Op1) {
6393 MVT IntVecVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: MergedBits),
6394 NumElements: SystemZ::VectorBits / MergedBits);
6395 assert(VT.getSizeInBits() == 128 && IntVecVT.getSizeInBits() == 128 &&
6396 "Handling full vectors only.");
6397 Op0 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVecVT, Operand: Op0);
6398 Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVecVT, Operand: Op1);
6399 SDValue Op = DAG.getNode(Opcode: SystemZISD::MERGE_HIGH, DL, VT: IntVecVT, N1: Op0, N2: Op1);
6400 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op);
6401}
6402
6403static SDValue buildFPVecFromScalars4(SelectionDAG &DAG, const SDLoc &DL,
6404 EVT VT, SmallVectorImpl<SDValue> &Elems,
6405 unsigned Pos) {
6406 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Op0: Elems[Pos + 0], Op1: Elems[Pos + 1]);
6407 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Op0: Elems[Pos + 2], Op1: Elems[Pos + 3]);
6408 // Avoid unnecessary undefs by reusing the other operand.
6409 if (Op01.isUndef()) {
6410 if (Op23.isUndef())
6411 return Op01;
6412 Op01 = Op23;
6413 } else if (Op23.isUndef())
6414 Op23 = Op01;
6415 // Merging identical replications is a no-op.
6416 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
6417 return Op01;
6418 unsigned MergedBits = VT.getSimpleVT().getScalarSizeInBits() * 2;
6419 return mergeHighParts(DAG, DL, MergedBits, VT, Op0: Op01, Op1: Op23);
6420}
6421
6422// Combine GPR scalar values Elems into a vector of type VT.
6423SDValue
6424SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
6425 SmallVectorImpl<SDValue> &Elems) const {
6426 // See whether there is a single replicated value.
6427 SDValue Single;
6428 unsigned int NumElements = Elems.size();
6429 unsigned int Count = 0;
6430 for (auto Elem : Elems) {
6431 if (!Elem.isUndef()) {
6432 if (!Single.getNode())
6433 Single = Elem;
6434 else if (Elem != Single) {
6435 Single = SDValue();
6436 break;
6437 }
6438 Count += 1;
6439 }
6440 }
6441 // There are three cases here:
6442 //
6443 // - if the only defined element is a loaded one, the best sequence
6444 // is a replicating load.
6445 //
6446 // - otherwise, if the only defined element is an i64 value, we will
6447 // end up with the same VLVGP sequence regardless of whether we short-cut
6448 // for replication or fall through to the later code.
6449 //
6450 // - otherwise, if the only defined element is an i32 or smaller value,
6451 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
6452 // This is only a win if the single defined element is used more than once.
6453 // In other cases we're better off using a single VLVGx.
6454 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Op: Single)))
6455 return DAG.getNode(Opcode: SystemZISD::REPLICATE, DL, VT, Operand: Single);
6456
6457 // If all elements are loads, use VLREP/VLEs (below).
6458 bool AllLoads = true;
6459 for (auto Elem : Elems)
6460 if (!isVectorElementLoad(Op: Elem)) {
6461 AllLoads = false;
6462 break;
6463 }
6464
6465 // The best way of building a v2i64 from two i64s is to use VLVGP.
6466 if (VT == MVT::v2i64 && !AllLoads)
6467 return joinDwords(DAG, DL, Op0: Elems[0], Op1: Elems[1]);
6468
6469 // Use a 64-bit merge high to combine two doubles.
6470 if (VT == MVT::v2f64 && !AllLoads)
6471 return buildMergeScalars(DAG, DL, VT, Op0: Elems[0], Op1: Elems[1]);
6472
6473 // Build v4f32 values directly from the FPRs:
6474 //
6475 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
6476 // V V VMRHF
6477 // <ABxx> <CDxx>
6478 // V VMRHG
6479 // <ABCD>
6480 if (VT == MVT::v4f32 && !AllLoads)
6481 return buildFPVecFromScalars4(DAG, DL, VT, Elems, Pos: 0);
6482
6483 // Same for v8f16.
6484 if (VT == MVT::v8f16 && !AllLoads) {
6485 SDValue Op0123 = buildFPVecFromScalars4(DAG, DL, VT, Elems, Pos: 0);
6486 SDValue Op4567 = buildFPVecFromScalars4(DAG, DL, VT, Elems, Pos: 4);
6487 // Avoid unnecessary undefs by reusing the other operand.
6488 if (Op0123.isUndef())
6489 Op0123 = Op4567;
6490 else if (Op4567.isUndef())
6491 Op4567 = Op0123;
6492 // Merging identical replications is a no-op.
6493 if (Op0123.getOpcode() == SystemZISD::REPLICATE && Op0123 == Op4567)
6494 return Op0123;
6495 return mergeHighParts(DAG, DL, MergedBits: 64, VT, Op0: Op0123, Op1: Op4567);
6496 }
6497
6498 // Collect the constant terms.
6499 SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue());
6500 SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
6501
6502 unsigned NumConstants = 0;
6503 for (unsigned I = 0; I < NumElements; ++I) {
6504 SDValue Elem = Elems[I];
6505 if (Elem.getOpcode() == ISD::Constant ||
6506 Elem.getOpcode() == ISD::ConstantFP) {
6507 NumConstants += 1;
6508 Constants[I] = Elem;
6509 Done[I] = true;
6510 }
6511 }
6512 // If there was at least one constant, fill in the other elements of
6513 // Constants with undefs to get a full vector constant and use that
6514 // as the starting point.
6515 SDValue Result;
6516 SDValue ReplicatedVal;
6517 if (NumConstants > 0) {
6518 for (unsigned I = 0; I < NumElements; ++I)
6519 if (!Constants[I].getNode())
6520 Constants[I] = DAG.getUNDEF(VT: Elems[I].getValueType());
6521 Result = DAG.getBuildVector(VT, DL, Ops: Constants);
6522 } else {
6523 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
6524 // avoid a false dependency on any previous contents of the vector
6525 // register.
6526
6527 // Use a VLREP if at least one element is a load. Make sure to replicate
6528 // the load with the most elements having its value.
6529 std::map<const SDNode*, unsigned> UseCounts;
6530 SDNode *LoadMaxUses = nullptr;
6531 for (unsigned I = 0; I < NumElements; ++I)
6532 if (isVectorElementLoad(Op: Elems[I])) {
6533 SDNode *Ld = Elems[I].getNode();
6534 unsigned Count = ++UseCounts[Ld];
6535 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < Count)
6536 LoadMaxUses = Ld;
6537 }
6538 if (LoadMaxUses != nullptr) {
6539 ReplicatedVal = SDValue(LoadMaxUses, 0);
6540 Result = DAG.getNode(Opcode: SystemZISD::REPLICATE, DL, VT, Operand: ReplicatedVal);
6541 } else {
6542 // Try to use VLVGP.
6543 unsigned I1 = NumElements / 2 - 1;
6544 unsigned I2 = NumElements - 1;
6545 bool Def1 = !Elems[I1].isUndef();
6546 bool Def2 = !Elems[I2].isUndef();
6547 if (Def1 || Def2) {
6548 SDValue Elem1 = Elems[Def1 ? I1 : I2];
6549 SDValue Elem2 = Elems[Def2 ? I2 : I1];
6550 Result = DAG.getNode(Opcode: ISD::BITCAST, DL, VT,
6551 Operand: joinDwords(DAG, DL, Op0: Elem1, Op1: Elem2));
6552 Done[I1] = true;
6553 Done[I2] = true;
6554 } else
6555 Result = DAG.getUNDEF(VT);
6556 }
6557 }
6558
6559 // Use VLVGx to insert the other elements.
6560 for (unsigned I = 0; I < NumElements; ++I)
6561 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
6562 Result = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT, N1: Result, N2: Elems[I],
6563 N3: DAG.getConstant(Val: I, DL, VT: MVT::i32));
6564 return Result;
6565}
6566
6567SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
6568 SelectionDAG &DAG) const {
6569 auto *BVN = cast<BuildVectorSDNode>(Val: Op.getNode());
6570 SDLoc DL(Op);
6571 EVT VT = Op.getValueType();
6572
6573 if (BVN->isConstant()) {
6574 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
6575 return Op;
6576
6577 // Fall back to loading it from memory.
6578 return SDValue();
6579 }
6580
6581 // See if we should use shuffles to construct the vector from other vectors.
6582 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
6583 return Res;
6584
6585 // Detect SCALAR_TO_VECTOR conversions.
6586 if (isOperationLegal(Op: ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op))
6587 return buildScalarToVector(DAG, DL, VT, Value: Op.getOperand(i: 0));
6588
6589 // Otherwise use buildVector to build the vector up from GPRs.
6590 unsigned NumElements = Op.getNumOperands();
6591 SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements);
6592 for (unsigned I = 0; I < NumElements; ++I)
6593 Ops[I] = Op.getOperand(i: I);
6594 return buildVector(DAG, DL, VT, Elems&: Ops);
6595}
6596
6597SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
6598 SelectionDAG &DAG) const {
6599 auto *VSN = cast<ShuffleVectorSDNode>(Val: Op.getNode());
6600 SDLoc DL(Op);
6601 EVT VT = Op.getValueType();
6602 unsigned NumElements = VT.getVectorNumElements();
6603
6604 if (VSN->isSplat()) {
6605 SDValue Op0 = Op.getOperand(i: 0);
6606 unsigned Index = VSN->getSplatIndex();
6607 assert(Index < VT.getVectorNumElements() &&
6608 "Splat index should be defined and in first operand");
6609 // See whether the value we're splatting is directly available as a scalar.
6610 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6611 Op0.getOpcode() == ISD::BUILD_VECTOR)
6612 return DAG.getNode(Opcode: SystemZISD::REPLICATE, DL, VT, Operand: Op0.getOperand(i: Index));
6613 // Otherwise keep it as a vector-to-vector operation.
6614 return DAG.getNode(Opcode: SystemZISD::SPLAT, DL, VT, N1: Op.getOperand(i: 0),
6615 N2: DAG.getTargetConstant(Val: Index, DL, VT: MVT::i32));
6616 }
6617
6618 GeneralShuffle GS(VT);
6619 for (unsigned I = 0; I < NumElements; ++I) {
6620 int Elt = VSN->getMaskElt(Idx: I);
6621 if (Elt < 0)
6622 GS.addUndef();
6623 else if (!GS.add(Op: Op.getOperand(i: unsigned(Elt) / NumElements),
6624 Elem: unsigned(Elt) % NumElements))
6625 return SDValue();
6626 }
6627 return GS.getNode(DAG, DL: SDLoc(VSN));
6628}
6629
6630SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
6631 SelectionDAG &DAG) const {
6632 SDLoc DL(Op);
6633 // Just insert the scalar into element 0 of an undefined vector.
6634 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL,
6635 VT: Op.getValueType(), N1: DAG.getUNDEF(VT: Op.getValueType()),
6636 N2: Op.getOperand(i: 0), N3: DAG.getConstant(Val: 0, DL, VT: MVT::i32));
6637}
6638
6639// Shift the lower 2 bytes of Op to the left in order to insert into the
6640// upper 2 bytes of the FP register.
6641static SDValue convertToF16(SDValue Op, SelectionDAG &DAG) {
6642 assert(Op.getSimpleValueType() == MVT::i64 &&
6643 "Expexted to convert i64 to f16.");
6644 SDLoc DL(Op);
6645 SDValue Shft = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, N1: Op,
6646 N2: DAG.getConstant(Val: 48, DL, VT: MVT::i64));
6647 SDValue BCast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::f64, Operand: Shft);
6648 SDValue F16Val =
6649 DAG.getTargetExtractSubreg(SRIdx: SystemZ::subreg_h16, DL, VT: MVT::f16, Operand: BCast);
6650 return F16Val;
6651}
6652
6653// Extract Op into GPR and shift the 2 f16 bytes to the right.
6654static SDValue convertFromF16(SDValue Op, SDLoc DL, SelectionDAG &DAG) {
6655 assert(Op.getSimpleValueType() == MVT::f16 &&
6656 "Expected to convert f16 to i64.");
6657 SDNode *U32 = DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::f64);
6658 SDValue In64 = DAG.getTargetInsertSubreg(SRIdx: SystemZ::subreg_h16, DL, VT: MVT::f64,
6659 Operand: SDValue(U32, 0), Subreg: Op);
6660 SDValue BCast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i64, Operand: In64);
6661 SDValue Shft = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: BCast,
6662 N2: DAG.getConstant(Val: 48, DL, VT: MVT::i32));
6663 return Shft;
6664}
6665
6666SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
6667 SelectionDAG &DAG) const {
6668 // Handle insertions of floating-point values.
6669 SDLoc DL(Op);
6670 SDValue Op0 = Op.getOperand(i: 0);
6671 SDValue Op1 = Op.getOperand(i: 1);
6672 SDValue Op2 = Op.getOperand(i: 2);
6673 EVT VT = Op.getValueType();
6674
6675 // Insertions into constant indices of a v2f64 can be done using VPDI.
6676 // However, if the inserted value is a bitcast or a constant then it's
6677 // better to use GPRs, as below.
6678 if (VT == MVT::v2f64 &&
6679 Op1.getOpcode() != ISD::BITCAST &&
6680 Op1.getOpcode() != ISD::ConstantFP &&
6681 Op2.getOpcode() == ISD::Constant) {
6682 uint64_t Index = Op2->getAsZExtVal();
6683 unsigned Mask = VT.getVectorNumElements() - 1;
6684 if (Index <= Mask)
6685 return Op;
6686 }
6687
6688 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
6689 MVT IntVT = MVT::getIntegerVT(BitWidth: VT.getScalarSizeInBits());
6690 MVT IntVecVT = MVT::getVectorVT(VT: IntVT, NumElements: VT.getVectorNumElements());
6691 SDValue IntOp1 =
6692 VT == MVT::v8f16
6693 ? DAG.getZExtOrTrunc(Op: convertFromF16(Op: Op1, DL, DAG), DL, VT: MVT::i32)
6694 : DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Op1);
6695 SDValue Res =
6696 DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: IntVecVT,
6697 N1: DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVecVT, Operand: Op0), N2: IntOp1, N3: Op2);
6698 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Res);
6699}
6700
6701SDValue
6702SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
6703 SelectionDAG &DAG) const {
6704 // Handle extractions of floating-point values.
6705 SDLoc DL(Op);
6706 SDValue Op0 = Op.getOperand(i: 0);
6707 SDValue Op1 = Op.getOperand(i: 1);
6708 EVT VT = Op.getValueType();
6709 EVT VecVT = Op0.getValueType();
6710
6711 // Extractions of constant indices can be done directly.
6712 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Val&: Op1)) {
6713 uint64_t Index = CIndexN->getZExtValue();
6714 unsigned Mask = VecVT.getVectorNumElements() - 1;
6715 if (Index <= Mask)
6716 return Op;
6717 }
6718
6719 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
6720 MVT IntVT = MVT::getIntegerVT(BitWidth: VT.getSizeInBits());
6721 MVT IntVecVT = MVT::getVectorVT(VT: IntVT, NumElements: VecVT.getVectorNumElements());
6722 MVT ExtrVT = IntVT == MVT::i16 ? MVT::i32 : IntVT;
6723 SDValue Extr = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: ExtrVT,
6724 N1: DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVecVT, Operand: Op0), N2: Op1);
6725 if (VT == MVT::f16)
6726 return convertToF16(Op: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Extr), DAG);
6727 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Extr);
6728}
6729
6730SDValue SystemZTargetLowering::
6731lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6732 SDValue PackedOp = Op.getOperand(i: 0);
6733 EVT OutVT = Op.getValueType();
6734 EVT InVT = PackedOp.getValueType();
6735 unsigned ToBits = OutVT.getScalarSizeInBits();
6736 unsigned FromBits = InVT.getScalarSizeInBits();
6737 unsigned StartOffset = 0;
6738
6739 // If the input is a VECTOR_SHUFFLE, there are a number of important
6740 // cases where we can directly implement the sign-extension of the
6741 // original input lanes of the shuffle.
6742 if (PackedOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
6743 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: PackedOp.getNode());
6744 ArrayRef<int> ShuffleMask = SVN->getMask();
6745 int OutNumElts = OutVT.getVectorNumElements();
6746
6747 // Recognize the special case where the sign-extension can be done
6748 // by the VSEG instruction. Handled via the default expander.
6749 if (ToBits == 64 && OutNumElts == 2) {
6750 int NumElem = ToBits / FromBits;
6751 if (ShuffleMask[0] == NumElem - 1 && ShuffleMask[1] == 2 * NumElem - 1)
6752 return SDValue();
6753 }
6754
6755 // Recognize the special case where we can fold the shuffle by
6756 // replacing some of the UNPACK_HIGH with UNPACK_LOW.
6757 int StartOffsetCandidate = -1;
6758 for (int Elt = 0; Elt < OutNumElts; Elt++) {
6759 if (ShuffleMask[Elt] == -1)
6760 continue;
6761 if (ShuffleMask[Elt] % OutNumElts == Elt) {
6762 if (StartOffsetCandidate == -1)
6763 StartOffsetCandidate = ShuffleMask[Elt] - Elt;
6764 if (StartOffsetCandidate == ShuffleMask[Elt] - Elt)
6765 continue;
6766 }
6767 StartOffsetCandidate = -1;
6768 break;
6769 }
6770 if (StartOffsetCandidate != -1) {
6771 StartOffset = StartOffsetCandidate;
6772 PackedOp = PackedOp.getOperand(i: 0);
6773 }
6774 }
6775
6776 do {
6777 FromBits *= 2;
6778 unsigned OutNumElts = SystemZ::VectorBits / FromBits;
6779 EVT OutVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: FromBits), NumElements: OutNumElts);
6780 unsigned Opcode = SystemZISD::UNPACK_HIGH;
6781 if (StartOffset >= OutNumElts) {
6782 Opcode = SystemZISD::UNPACK_LOW;
6783 StartOffset -= OutNumElts;
6784 }
6785 PackedOp = DAG.getNode(Opcode, DL: SDLoc(PackedOp), VT: OutVT, Operand: PackedOp);
6786 } while (FromBits != ToBits);
6787 return PackedOp;
6788}
6789
6790// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
6791SDValue SystemZTargetLowering::
6792lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6793 SDValue PackedOp = Op.getOperand(i: 0);
6794 SDLoc DL(Op);
6795 EVT OutVT = Op.getValueType();
6796 EVT InVT = PackedOp.getValueType();
6797 unsigned InNumElts = InVT.getVectorNumElements();
6798 unsigned OutNumElts = OutVT.getVectorNumElements();
6799 unsigned NumInPerOut = InNumElts / OutNumElts;
6800
6801 SDValue ZeroVec =
6802 DAG.getSplatVector(VT: InVT, DL, Op: DAG.getConstant(Val: 0, DL, VT: InVT.getScalarType()));
6803
6804 SmallVector<int, 16> Mask(InNumElts);
6805 unsigned ZeroVecElt = InNumElts;
6806 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
6807 unsigned MaskElt = PackedElt * NumInPerOut;
6808 unsigned End = MaskElt + NumInPerOut - 1;
6809 for (; MaskElt < End; MaskElt++)
6810 Mask[MaskElt] = ZeroVecElt++;
6811 Mask[MaskElt] = PackedElt;
6812 }
6813 SDValue Shuf = DAG.getVectorShuffle(VT: InVT, dl: DL, N1: PackedOp, N2: ZeroVec, Mask);
6814 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: OutVT, Operand: Shuf);
6815}
6816
6817SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
6818 unsigned ByScalar) const {
6819 // Look for cases where a vector shift can use the *_BY_SCALAR form.
6820 SDValue Op0 = Op.getOperand(i: 0);
6821 SDValue Op1 = Op.getOperand(i: 1);
6822 SDLoc DL(Op);
6823 EVT VT = Op.getValueType();
6824 unsigned ElemBitSize = VT.getScalarSizeInBits();
6825
6826 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6827 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Val&: Op1)) {
6828 APInt SplatBits, SplatUndef;
6829 unsigned SplatBitSize;
6830 bool HasAnyUndefs;
6831 // Check for constant splats. Use ElemBitSize as the minimum element
6832 // width and reject splats that need wider elements.
6833 if (BVN->isConstantSplat(SplatValue&: SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6834 MinSplatBits: ElemBitSize, isBigEndian: true) &&
6835 SplatBitSize == ElemBitSize) {
6836 SDValue Shift = DAG.getConstant(Val: SplatBits.getZExtValue() & 0xfff,
6837 DL, VT: MVT::i32);
6838 return DAG.getNode(Opcode: ByScalar, DL, VT, N1: Op0, N2: Shift);
6839 }
6840 // Check for variable splats.
6841 BitVector UndefElements;
6842 SDValue Splat = BVN->getSplatValue(UndefElements: &UndefElements);
6843 if (Splat) {
6844 // Since i32 is the smallest legal type, we either need a no-op
6845 // or a truncation.
6846 SDValue Shift = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: Splat);
6847 return DAG.getNode(Opcode: ByScalar, DL, VT, N1: Op0, N2: Shift);
6848 }
6849 }
6850
6851 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6852 // and the shift amount is directly available in a GPR.
6853 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Val&: Op1)) {
6854 if (VSN->isSplat()) {
6855 SDValue VSNOp0 = VSN->getOperand(Num: 0);
6856 unsigned Index = VSN->getSplatIndex();
6857 assert(Index < VT.getVectorNumElements() &&
6858 "Splat index should be defined and in first operand");
6859 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6860 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6861 // Since i32 is the smallest legal type, we either need a no-op
6862 // or a truncation.
6863 SDValue Shift = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32,
6864 Operand: VSNOp0.getOperand(i: Index));
6865 return DAG.getNode(Opcode: ByScalar, DL, VT, N1: Op0, N2: Shift);
6866 }
6867 }
6868 }
6869
6870 // Otherwise just treat the current form as legal.
6871 return Op;
6872}
6873
6874SDValue SystemZTargetLowering::lowerFSHL(SDValue Op, SelectionDAG &DAG) const {
6875 SDLoc DL(Op);
6876
6877 // i128 FSHL with a constant amount that is a multiple of 8 can be
6878 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6879 // facility, FSHL with a constant amount less than 8 can be implemented
6880 // via SHL_DOUBLE_BIT, and FSHL with other constant amounts by a
6881 // combination of the two.
6882 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2))) {
6883 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6884 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6885 SDValue Op0 = DAG.getBitcast(VT: MVT::v16i8, V: Op.getOperand(i: 0));
6886 SDValue Op1 = DAG.getBitcast(VT: MVT::v16i8, V: Op.getOperand(i: 1));
6887 if (ShiftAmt > 120) {
6888 // For N in 121..128, fshl N == fshr (128 - N), and for 1 <= N < 8
6889 // SHR_DOUBLE_BIT emits fewer instructions.
6890 SDValue Val =
6891 DAG.getNode(Opcode: SystemZISD::SHR_DOUBLE_BIT, DL, VT: MVT::v16i8, N1: Op0, N2: Op1,
6892 N3: DAG.getTargetConstant(Val: 128 - ShiftAmt, DL, VT: MVT::i32));
6893 return DAG.getBitcast(VT: MVT::i128, V: Val);
6894 }
6895 SmallVector<int, 16> Mask(16);
6896 for (unsigned Elt = 0; Elt < 16; Elt++)
6897 Mask[Elt] = (ShiftAmt >> 3) + Elt;
6898 SDValue Shuf1 = DAG.getVectorShuffle(VT: MVT::v16i8, dl: DL, N1: Op0, N2: Op1, Mask);
6899 if ((ShiftAmt & 7) == 0)
6900 return DAG.getBitcast(VT: MVT::i128, V: Shuf1);
6901 SDValue Shuf2 = DAG.getVectorShuffle(VT: MVT::v16i8, dl: DL, N1: Op1, N2: Op1, Mask);
6902 SDValue Val =
6903 DAG.getNode(Opcode: SystemZISD::SHL_DOUBLE_BIT, DL, VT: MVT::v16i8, N1: Shuf1, N2: Shuf2,
6904 N3: DAG.getTargetConstant(Val: ShiftAmt & 7, DL, VT: MVT::i32));
6905 return DAG.getBitcast(VT: MVT::i128, V: Val);
6906 }
6907 }
6908
6909 return SDValue();
6910}
6911
6912SDValue SystemZTargetLowering::lowerFSHR(SDValue Op, SelectionDAG &DAG) const {
6913 SDLoc DL(Op);
6914
6915 // i128 FSHR with a constant amount that is a multiple of 8 can be
6916 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6917 // facility, FSHR with a constant amount less than 8 can be implemented
6918 // via SHR_DOUBLE_BIT, and FSHR with other constant amounts by a
6919 // combination of the two.
6920 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2))) {
6921 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6922 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6923 SDValue Op0 = DAG.getBitcast(VT: MVT::v16i8, V: Op.getOperand(i: 0));
6924 SDValue Op1 = DAG.getBitcast(VT: MVT::v16i8, V: Op.getOperand(i: 1));
6925 if (ShiftAmt > 120) {
6926 // For N in 121..128, fshr N == fshl (128 - N), and for 1 <= N < 8
6927 // SHL_DOUBLE_BIT emits fewer instructions.
6928 SDValue Val =
6929 DAG.getNode(Opcode: SystemZISD::SHL_DOUBLE_BIT, DL, VT: MVT::v16i8, N1: Op0, N2: Op1,
6930 N3: DAG.getTargetConstant(Val: 128 - ShiftAmt, DL, VT: MVT::i32));
6931 return DAG.getBitcast(VT: MVT::i128, V: Val);
6932 }
6933 SmallVector<int, 16> Mask(16);
6934 for (unsigned Elt = 0; Elt < 16; Elt++)
6935 Mask[Elt] = 16 - (ShiftAmt >> 3) + Elt;
6936 SDValue Shuf1 = DAG.getVectorShuffle(VT: MVT::v16i8, dl: DL, N1: Op0, N2: Op1, Mask);
6937 if ((ShiftAmt & 7) == 0)
6938 return DAG.getBitcast(VT: MVT::i128, V: Shuf1);
6939 SDValue Shuf2 = DAG.getVectorShuffle(VT: MVT::v16i8, dl: DL, N1: Op0, N2: Op0, Mask);
6940 SDValue Val =
6941 DAG.getNode(Opcode: SystemZISD::SHR_DOUBLE_BIT, DL, VT: MVT::v16i8, N1: Shuf2, N2: Shuf1,
6942 N3: DAG.getTargetConstant(Val: ShiftAmt & 7, DL, VT: MVT::i32));
6943 return DAG.getBitcast(VT: MVT::i128, V: Val);
6944 }
6945 }
6946
6947 return SDValue();
6948}
6949
6950static SDValue lowerAddrSpaceCast(SDValue Op, SelectionDAG &DAG) {
6951 SDLoc DL(Op);
6952 SDValue Src = Op.getOperand(i: 0);
6953 MVT DstVT = Op.getSimpleValueType();
6954
6955 AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Val: Op.getNode());
6956 unsigned SrcAS = N->getSrcAddressSpace();
6957
6958 assert(SrcAS != N->getDestAddressSpace() &&
6959 "addrspacecast must be between different address spaces");
6960
6961 // addrspacecast [0 <- 1] : Assinging a ptr32 value to a 64-bit pointer.
6962 // addrspacecast [1 <- 0] : Assigining a 64-bit pointer to a ptr32 value.
6963 if (SrcAS == SYSTEMZAS::PTR32 && DstVT == MVT::i64) {
6964 Op = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i32, N1: Src,
6965 N2: DAG.getConstant(Val: 0x7fffffff, DL, VT: MVT::i32));
6966 Op = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: DstVT, Operand: Op);
6967 } else if (DstVT == MVT::i32) {
6968 Op = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: DstVT, Operand: Src);
6969 Op = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i32, N1: Op,
6970 N2: DAG.getConstant(Val: 0x7fffffff, DL, VT: MVT::i32));
6971 Op = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: DstVT, Operand: Op);
6972 } else {
6973 report_fatal_error(reason: "Bad address space in addrspacecast");
6974 }
6975 return Op;
6976}
6977
6978SDValue SystemZTargetLowering::lowerFP_EXTEND(SDValue Op,
6979 SelectionDAG &DAG) const {
6980 SDValue In = Op.getOperand(i: Op->isStrictFPOpcode() ? 1 : 0);
6981 if (In.getSimpleValueType() != MVT::f16)
6982 return Op; // Legal
6983 return SDValue(); // Let legalizer emit the libcall.
6984}
6985
6986SDValue SystemZTargetLowering::useLibCall(SelectionDAG &DAG, RTLIB::Libcall LC,
6987 MVT VT, SDValue Arg, SDLoc DL,
6988 SDValue Chain, bool IsStrict) const {
6989 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected request for libcall!");
6990 MakeLibCallOptions CallOptions;
6991 SDValue Result;
6992 std::tie(args&: Result, args&: Chain) =
6993 makeLibCall(DAG, LC, RetVT: VT, Ops: Arg, CallOptions, dl: DL, Chain);
6994 return IsStrict ? DAG.getMergeValues(Ops: {Result, Chain}, dl: DL) : Result;
6995}
6996
6997SDValue SystemZTargetLowering::lower_FP_TO_INT(SDValue Op,
6998 SelectionDAG &DAG) const {
6999 bool IsSigned = (Op->getOpcode() == ISD::FP_TO_SINT ||
7000 Op->getOpcode() == ISD::STRICT_FP_TO_SINT);
7001 bool IsStrict = Op->isStrictFPOpcode();
7002 SDLoc DL(Op);
7003 MVT VT = Op.getSimpleValueType();
7004 SDValue InOp = Op.getOperand(i: IsStrict ? 1 : 0);
7005 SDValue Chain = IsStrict ? Op.getOperand(i: 0) : DAG.getEntryNode();
7006 EVT InVT = InOp.getValueType();
7007
7008 // FP to unsigned is not directly supported on z10. Promoting an i32
7009 // result to (signed) i64 doesn't generate an inexact condition (fp
7010 // exception) for values that are outside the i32 range but in the i64
7011 // range, so use the default expansion.
7012 if (!Subtarget.hasFPExtension() && !IsSigned)
7013 // Expand i32/i64. F16 values will be recognized to fit and extended.
7014 return SDValue();
7015
7016 // Conversion from f16 is done via f32.
7017 if (InOp.getSimpleValueType() == MVT::f16) {
7018 SmallVector<SDValue, 2> Results;
7019 LowerOperationWrapper(N: Op.getNode(), Results, DAG);
7020 return DAG.getMergeValues(Ops: Results, dl: DL);
7021 }
7022
7023 if (VT == MVT::i128) {
7024 RTLIB::Libcall LC =
7025 IsSigned ? RTLIB::getFPTOSINT(OpVT: InVT, RetVT: VT) : RTLIB::getFPTOUINT(OpVT: InVT, RetVT: VT);
7026 return useLibCall(DAG, LC, VT, Arg: InOp, DL, Chain, IsStrict);
7027 }
7028
7029 return Op; // Legal
7030}
7031
7032SDValue SystemZTargetLowering::lower_INT_TO_FP(SDValue Op,
7033 SelectionDAG &DAG) const {
7034 bool IsSigned = (Op->getOpcode() == ISD::SINT_TO_FP ||
7035 Op->getOpcode() == ISD::STRICT_SINT_TO_FP);
7036 bool IsStrict = Op->isStrictFPOpcode();
7037 SDLoc DL(Op);
7038 MVT VT = Op.getSimpleValueType();
7039 SDValue InOp = Op.getOperand(i: IsStrict ? 1 : 0);
7040 SDValue Chain = IsStrict ? Op.getOperand(i: 0) : DAG.getEntryNode();
7041 EVT InVT = InOp.getValueType();
7042
7043 // Conversion to f16 is done via f32.
7044 if (VT == MVT::f16) {
7045 SmallVector<SDValue, 2> Results;
7046 LowerOperationWrapper(N: Op.getNode(), Results, DAG);
7047 return DAG.getMergeValues(Ops: Results, dl: DL);
7048 }
7049
7050 // Unsigned to fp is not directly supported on z10.
7051 if (!Subtarget.hasFPExtension() && !IsSigned)
7052 return SDValue(); // Expand i64.
7053
7054 if (InVT == MVT::i128) {
7055 RTLIB::Libcall LC =
7056 IsSigned ? RTLIB::getSINTTOFP(OpVT: InVT, RetVT: VT) : RTLIB::getUINTTOFP(OpVT: InVT, RetVT: VT);
7057 return useLibCall(DAG, LC, VT, Arg: InOp, DL, Chain, IsStrict);
7058 }
7059
7060 return Op; // Legal
7061}
7062
7063// Lower an f16 LOAD in case of no vector support.
7064SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op,
7065 SelectionDAG &DAG) const {
7066 EVT RegVT = Op.getValueType();
7067 assert(RegVT == MVT::f16 && "Expected to lower an f16 load.");
7068 (void)RegVT;
7069
7070 // Load as integer.
7071 SDLoc DL(Op);
7072 SDValue NewLd;
7073 if (auto *AtomicLd = dyn_cast<AtomicSDNode>(Val: Op.getNode())) {
7074 assert(EVT(RegVT) == AtomicLd->getMemoryVT() && "Unhandled f16 load");
7075 NewLd = DAG.getAtomicLoad(ExtType: ISD::EXTLOAD, dl: DL, MemVT: MVT::i16, VT: MVT::i64,
7076 Chain: AtomicLd->getChain(), Ptr: AtomicLd->getBasePtr(),
7077 MMO: AtomicLd->getMemOperand());
7078 } else {
7079 LoadSDNode *Ld = cast<LoadSDNode>(Val: Op.getNode());
7080 assert(EVT(RegVT) == Ld->getMemoryVT() && "Unhandled f16 load");
7081 NewLd = DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl: DL, VT: MVT::i64, Chain: Ld->getChain(),
7082 Ptr: Ld->getBasePtr(), PtrInfo: Ld->getPointerInfo(), MemVT: MVT::i16,
7083 Alignment: Ld->getBaseAlign(), MMOFlags: Ld->getMemOperand()->getFlags());
7084 }
7085 SDValue F16Val = convertToF16(Op: NewLd, DAG);
7086 return DAG.getMergeValues(Ops: {F16Val, NewLd.getValue(R: 1)}, dl: DL);
7087}
7088
7089// Lower an f16 STORE in case of no vector support.
7090SDValue SystemZTargetLowering::lowerStoreF16(SDValue Op,
7091 SelectionDAG &DAG) const {
7092 SDLoc DL(Op);
7093 SDValue Shft = convertFromF16(Op: Op->getOperand(Num: 1), DL, DAG);
7094
7095 if (auto *AtomicSt = dyn_cast<AtomicSDNode>(Val: Op.getNode()))
7096 return DAG.getAtomic(Opcode: ISD::ATOMIC_STORE, dl: DL, MemVT: MVT::i16, Chain: AtomicSt->getChain(),
7097 Ptr: Shft, Val: AtomicSt->getBasePtr(),
7098 MMO: AtomicSt->getMemOperand());
7099
7100 StoreSDNode *St = cast<StoreSDNode>(Val: Op.getNode());
7101 return DAG.getTruncStore(Chain: St->getChain(), dl: DL, Val: Shft, Ptr: St->getBasePtr(), SVT: MVT::i16,
7102 MMO: St->getMemOperand());
7103}
7104
7105SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
7106 SelectionDAG &DAG) const {
7107 SDLoc DL(Op);
7108 MVT ResultVT = Op.getSimpleValueType();
7109 SDValue Arg = Op.getOperand(i: 0);
7110 unsigned Check = Op.getConstantOperandVal(i: 1);
7111
7112 unsigned TDCMask = 0;
7113 if (Check & fcSNan)
7114 TDCMask |= SystemZ::TDCMASK_SNAN_PLUS | SystemZ::TDCMASK_SNAN_MINUS;
7115 if (Check & fcQNan)
7116 TDCMask |= SystemZ::TDCMASK_QNAN_PLUS | SystemZ::TDCMASK_QNAN_MINUS;
7117 if (Check & fcPosInf)
7118 TDCMask |= SystemZ::TDCMASK_INFINITY_PLUS;
7119 if (Check & fcNegInf)
7120 TDCMask |= SystemZ::TDCMASK_INFINITY_MINUS;
7121 if (Check & fcPosNormal)
7122 TDCMask |= SystemZ::TDCMASK_NORMAL_PLUS;
7123 if (Check & fcNegNormal)
7124 TDCMask |= SystemZ::TDCMASK_NORMAL_MINUS;
7125 if (Check & fcPosSubnormal)
7126 TDCMask |= SystemZ::TDCMASK_SUBNORMAL_PLUS;
7127 if (Check & fcNegSubnormal)
7128 TDCMask |= SystemZ::TDCMASK_SUBNORMAL_MINUS;
7129 if (Check & fcPosZero)
7130 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
7131 if (Check & fcNegZero)
7132 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
7133 SDValue TDCMaskV = DAG.getConstant(Val: TDCMask, DL, VT: MVT::i64);
7134
7135 if (Arg.getSimpleValueType() == MVT::f16)
7136 Arg = DAG.getFPExtendOrRound(Op: Arg, DL: SDLoc(Arg), VT: MVT::f32);
7137 SDValue Intr = DAG.getNode(Opcode: SystemZISD::TDC, DL, VT: ResultVT, N1: Arg, N2: TDCMaskV);
7138 return getCCResult(DAG, CCReg: Intr);
7139}
7140
7141SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
7142 SelectionDAG &DAG) const {
7143 SDLoc DL(Op);
7144 SDValue Chain = Op.getOperand(i: 0);
7145
7146 // STCKF only supports a memory operand, so we have to use a temporary.
7147 SDValue StackPtr = DAG.CreateStackTemporary(VT: MVT::i64);
7148 int SPFI = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
7149 MachinePointerInfo MPI =
7150 MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: SPFI);
7151
7152 // Use STCFK to store the TOD clock into the temporary.
7153 SDValue StoreOps[] = {Chain, StackPtr};
7154 Chain = DAG.getMemIntrinsicNode(
7155 Opcode: SystemZISD::STCKF, dl: DL, VTList: DAG.getVTList(VT: MVT::Other), Ops: StoreOps, MemVT: MVT::i64,
7156 PtrInfo: MPI, Alignment: MaybeAlign(), Flags: MachineMemOperand::MOStore);
7157
7158 // And read it back from there.
7159 return DAG.getLoad(VT: MVT::i64, dl: DL, Chain, Ptr: StackPtr, PtrInfo: MPI);
7160}
7161
7162SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
7163 SelectionDAG &DAG) const {
7164 switch (Op.getOpcode()) {
7165 case ISD::FRAMEADDR:
7166 return lowerFRAMEADDR(Op, DAG);
7167 case ISD::RETURNADDR:
7168 return lowerRETURNADDR(Op, DAG);
7169 case ISD::BR_CC:
7170 return lowerBR_CC(Op, DAG);
7171 case ISD::SELECT_CC:
7172 return lowerSELECT_CC(Op, DAG);
7173 case ISD::SETCC:
7174 return lowerSETCC(Op, DAG);
7175 case ISD::STRICT_FSETCC:
7176 return lowerSTRICT_FSETCC(Op, DAG, IsSignaling: false);
7177 case ISD::STRICT_FSETCCS:
7178 return lowerSTRICT_FSETCC(Op, DAG, IsSignaling: true);
7179 case ISD::GlobalAddress:
7180 return lowerGlobalAddress(Node: cast<GlobalAddressSDNode>(Val&: Op), DAG);
7181 case ISD::GlobalTLSAddress:
7182 return lowerGlobalTLSAddress(Node: cast<GlobalAddressSDNode>(Val&: Op), DAG);
7183 case ISD::BlockAddress:
7184 return lowerBlockAddress(Node: cast<BlockAddressSDNode>(Val&: Op), DAG);
7185 case ISD::JumpTable:
7186 return lowerJumpTable(JT: cast<JumpTableSDNode>(Val&: Op), DAG);
7187 case ISD::ConstantPool:
7188 return lowerConstantPool(CP: cast<ConstantPoolSDNode>(Val&: Op), DAG);
7189 case ISD::BITCAST:
7190 return lowerBITCAST(Op, DAG);
7191 case ISD::VASTART:
7192 return lowerVASTART(Op, DAG);
7193 case ISD::VACOPY:
7194 return lowerVACOPY(Op, DAG);
7195 case ISD::DYNAMIC_STACKALLOC:
7196 return lowerDYNAMIC_STACKALLOC(Op, DAG);
7197 case ISD::GET_DYNAMIC_AREA_OFFSET:
7198 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
7199 case ISD::MULHS:
7200 return lowerMULH(Op, DAG, Opcode: SystemZISD::SMUL_LOHI);
7201 case ISD::MULHU:
7202 return lowerMULH(Op, DAG, Opcode: SystemZISD::UMUL_LOHI);
7203 case ISD::SMUL_LOHI:
7204 return lowerSMUL_LOHI(Op, DAG);
7205 case ISD::UMUL_LOHI:
7206 return lowerUMUL_LOHI(Op, DAG);
7207 case ISD::SDIVREM:
7208 return lowerSDIVREM(Op, DAG);
7209 case ISD::UDIVREM:
7210 return lowerUDIVREM(Op, DAG);
7211 case ISD::SADDO:
7212 case ISD::SSUBO:
7213 case ISD::UADDO:
7214 case ISD::USUBO:
7215 return lowerXALUO(Op, DAG);
7216 case ISD::UADDO_CARRY:
7217 case ISD::USUBO_CARRY:
7218 return lowerUADDSUBO_CARRY(Op, DAG);
7219 case ISD::OR:
7220 return lowerOR(Op, DAG);
7221 case ISD::CTPOP:
7222 return lowerCTPOP(Op, DAG);
7223 case ISD::VECREDUCE_ADD:
7224 return lowerVECREDUCE_ADD(Op, DAG);
7225 case ISD::ATOMIC_FENCE:
7226 return lowerATOMIC_FENCE(Op, DAG);
7227 case ISD::ATOMIC_SWAP:
7228 return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_SWAPW);
7229 case ISD::ATOMIC_STORE:
7230 return lowerATOMIC_STORE(Op, DAG);
7231 case ISD::ATOMIC_LOAD:
7232 return lowerATOMIC_LOAD(Op, DAG);
7233 case ISD::ATOMIC_LOAD_ADD:
7234 return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_ADD);
7235 case ISD::ATOMIC_LOAD_SUB:
7236 return lowerATOMIC_LOAD_SUB(Op, DAG);
7237 case ISD::ATOMIC_LOAD_AND:
7238 return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_AND);
7239 case ISD::ATOMIC_LOAD_OR:
7240 return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_OR);
7241 case ISD::ATOMIC_LOAD_XOR:
7242 return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_XOR);
7243 case ISD::ATOMIC_LOAD_NAND:
7244 return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_NAND);
7245 case ISD::ATOMIC_LOAD_MIN:
7246 return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_MIN);
7247 case ISD::ATOMIC_LOAD_MAX:
7248 return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_MAX);
7249 case ISD::ATOMIC_LOAD_UMIN:
7250 return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_UMIN);
7251 case ISD::ATOMIC_LOAD_UMAX:
7252 return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_UMAX);
7253 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
7254 return lowerATOMIC_CMP_SWAP(Op, DAG);
7255 case ISD::STACKSAVE:
7256 return lowerSTACKSAVE(Op, DAG);
7257 case ISD::STACKRESTORE:
7258 return lowerSTACKRESTORE(Op, DAG);
7259 case ISD::PREFETCH:
7260 return lowerPREFETCH(Op, DAG);
7261 case ISD::INTRINSIC_W_CHAIN:
7262 return lowerINTRINSIC_W_CHAIN(Op, DAG);
7263 case ISD::INTRINSIC_WO_CHAIN:
7264 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
7265 case ISD::BUILD_VECTOR:
7266 return lowerBUILD_VECTOR(Op, DAG);
7267 case ISD::VECTOR_SHUFFLE:
7268 return lowerVECTOR_SHUFFLE(Op, DAG);
7269 case ISD::SCALAR_TO_VECTOR:
7270 return lowerSCALAR_TO_VECTOR(Op, DAG);
7271 case ISD::INSERT_VECTOR_ELT:
7272 return lowerINSERT_VECTOR_ELT(Op, DAG);
7273 case ISD::EXTRACT_VECTOR_ELT:
7274 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7275 case ISD::SIGN_EXTEND_VECTOR_INREG:
7276 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
7277 case ISD::ZERO_EXTEND_VECTOR_INREG:
7278 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
7279 case ISD::SHL:
7280 return lowerShift(Op, DAG, ByScalar: SystemZISD::VSHL_BY_SCALAR);
7281 case ISD::SRL:
7282 return lowerShift(Op, DAG, ByScalar: SystemZISD::VSRL_BY_SCALAR);
7283 case ISD::SRA:
7284 return lowerShift(Op, DAG, ByScalar: SystemZISD::VSRA_BY_SCALAR);
7285 case ISD::ADDRSPACECAST:
7286 return lowerAddrSpaceCast(Op, DAG);
7287 case ISD::ROTL:
7288 return lowerShift(Op, DAG, ByScalar: SystemZISD::VROTL_BY_SCALAR);
7289 case ISD::FSHL:
7290 return lowerFSHL(Op, DAG);
7291 case ISD::FSHR:
7292 return lowerFSHR(Op, DAG);
7293 case ISD::FP_EXTEND:
7294 case ISD::STRICT_FP_EXTEND:
7295 return lowerFP_EXTEND(Op, DAG);
7296 case ISD::FP_TO_UINT:
7297 case ISD::FP_TO_SINT:
7298 case ISD::STRICT_FP_TO_UINT:
7299 case ISD::STRICT_FP_TO_SINT:
7300 return lower_FP_TO_INT(Op, DAG);
7301 case ISD::UINT_TO_FP:
7302 case ISD::SINT_TO_FP:
7303 case ISD::STRICT_UINT_TO_FP:
7304 case ISD::STRICT_SINT_TO_FP:
7305 return lower_INT_TO_FP(Op, DAG);
7306 case ISD::LOAD:
7307 return lowerLoadF16(Op, DAG);
7308 case ISD::STORE:
7309 return lowerStoreF16(Op, DAG);
7310 case ISD::IS_FPCLASS:
7311 return lowerIS_FPCLASS(Op, DAG);
7312 case ISD::GET_ROUNDING:
7313 return lowerGET_ROUNDING(Op, DAG);
7314 case ISD::READCYCLECOUNTER:
7315 return lowerREADCYCLECOUNTER(Op, DAG);
7316 case ISD::EH_SJLJ_SETJMP:
7317 case ISD::EH_SJLJ_LONGJMP:
7318 // These operations are legal on our platform, but we cannot actually
7319 // set the operation action to Legal as common code would treat this
7320 // as equivalent to Expand. Instead, we keep the operation action to
7321 // Custom and just leave them unchanged here.
7322 return Op;
7323
7324 default:
7325 llvm_unreachable("Unexpected node to lower");
7326 }
7327}
7328
7329static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src,
7330 const SDLoc &SL) {
7331 // If i128 is legal, just use a normal bitcast.
7332 if (DAG.getTargetLoweringInfo().isTypeLegal(VT: MVT::i128))
7333 return DAG.getBitcast(VT: MVT::f128, V: Src);
7334
7335 // Otherwise, f128 must live in FP128, so do a partwise move.
7336 assert(DAG.getTargetLoweringInfo().getRepRegClassFor(MVT::f128) ==
7337 &SystemZ::FP128BitRegClass);
7338
7339 SDValue Hi, Lo;
7340 std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: Src, DL: SL, LoVT: MVT::i64, HiVT: MVT::i64);
7341
7342 Hi = DAG.getBitcast(VT: MVT::f64, V: Hi);
7343 Lo = DAG.getBitcast(VT: MVT::f64, V: Lo);
7344
7345 SDNode *Pair = DAG.getMachineNode(
7346 Opcode: SystemZ::REG_SEQUENCE, dl: SL, VT: MVT::f128,
7347 Ops: {DAG.getTargetConstant(Val: SystemZ::FP128BitRegClassID, DL: SL, VT: MVT::i32), Lo,
7348 DAG.getTargetConstant(Val: SystemZ::subreg_l64, DL: SL, VT: MVT::i32), Hi,
7349 DAG.getTargetConstant(Val: SystemZ::subreg_h64, DL: SL, VT: MVT::i32)});
7350 return SDValue(Pair, 0);
7351}
7352
7353static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src,
7354 const SDLoc &SL) {
7355 // If i128 is legal, just use a normal bitcast.
7356 if (DAG.getTargetLoweringInfo().isTypeLegal(VT: MVT::i128))
7357 return DAG.getBitcast(VT: MVT::i128, V: Src);
7358
7359 // Otherwise, f128 must live in FP128, so do a partwise move.
7360 assert(DAG.getTargetLoweringInfo().getRepRegClassFor(MVT::f128) ==
7361 &SystemZ::FP128BitRegClass);
7362
7363 SDValue LoFP =
7364 DAG.getTargetExtractSubreg(SRIdx: SystemZ::subreg_l64, DL: SL, VT: MVT::f64, Operand: Src);
7365 SDValue HiFP =
7366 DAG.getTargetExtractSubreg(SRIdx: SystemZ::subreg_h64, DL: SL, VT: MVT::f64, Operand: Src);
7367 SDValue Lo = DAG.getNode(Opcode: ISD::BITCAST, DL: SL, VT: MVT::i64, Operand: LoFP);
7368 SDValue Hi = DAG.getNode(Opcode: ISD::BITCAST, DL: SL, VT: MVT::i64, Operand: HiFP);
7369
7370 return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: SL, VT: MVT::i128, N1: Lo, N2: Hi);
7371}
7372
7373// Lower operations with invalid operand or result types.
7374void
7375SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
7376 SmallVectorImpl<SDValue> &Results,
7377 SelectionDAG &DAG) const {
7378 switch (N->getOpcode()) {
7379 case ISD::ATOMIC_LOAD: {
7380 SDLoc DL(N);
7381 SDVTList Tys = DAG.getVTList(VT1: MVT::Untyped, VT2: MVT::Other);
7382 SDValue Ops[] = { N->getOperand(Num: 0), N->getOperand(Num: 1) };
7383 MachineMemOperand *MMO = cast<AtomicSDNode>(Val: N)->getMemOperand();
7384 SDValue Res = DAG.getMemIntrinsicNode(Opcode: SystemZISD::ATOMIC_LOAD_128,
7385 dl: DL, VTList: Tys, Ops, MemVT: MVT::i128, MMO);
7386
7387 SDValue Lowered = lowerGR128ToI128(DAG, In: Res);
7388 if (N->getValueType(ResNo: 0) == MVT::f128)
7389 Lowered = expandBitCastI128ToF128(DAG, Src: Lowered, SL: DL);
7390 Results.push_back(Elt: Lowered);
7391 Results.push_back(Elt: Res.getValue(R: 1));
7392 break;
7393 }
7394 case ISD::ATOMIC_STORE: {
7395 SDLoc DL(N);
7396 SDVTList Tys = DAG.getVTList(VT: MVT::Other);
7397 SDValue Val = N->getOperand(Num: 1);
7398 if (Val.getValueType() == MVT::f128)
7399 Val = expandBitCastF128ToI128(DAG, Src: Val, SL: DL);
7400 Val = lowerI128ToGR128(DAG, In: Val);
7401
7402 SDValue Ops[] = {N->getOperand(Num: 0), Val, N->getOperand(Num: 2)};
7403 MachineMemOperand *MMO = cast<AtomicSDNode>(Val: N)->getMemOperand();
7404 SDValue Res = DAG.getMemIntrinsicNode(Opcode: SystemZISD::ATOMIC_STORE_128,
7405 dl: DL, VTList: Tys, Ops, MemVT: MVT::i128, MMO);
7406 // We have to enforce sequential consistency by performing a
7407 // serialization operation after the store.
7408 if (cast<AtomicSDNode>(Val: N)->getSuccessOrdering() ==
7409 AtomicOrdering::SequentiallyConsistent)
7410 Res = SDValue(DAG.getMachineNode(Opcode: SystemZ::Serialize, dl: DL,
7411 VT: MVT::Other, Op1: Res), 0);
7412 Results.push_back(Elt: Res);
7413 break;
7414 }
7415 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
7416 SDLoc DL(N);
7417 SDVTList Tys = DAG.getVTList(VT1: MVT::Untyped, VT2: MVT::i32, VT3: MVT::Other);
7418 SDValue Ops[] = { N->getOperand(Num: 0), N->getOperand(Num: 1),
7419 lowerI128ToGR128(DAG, In: N->getOperand(Num: 2)),
7420 lowerI128ToGR128(DAG, In: N->getOperand(Num: 3)) };
7421 MachineMemOperand *MMO = cast<AtomicSDNode>(Val: N)->getMemOperand();
7422 SDValue Res = DAG.getMemIntrinsicNode(Opcode: SystemZISD::ATOMIC_CMP_SWAP_128,
7423 dl: DL, VTList: Tys, Ops, MemVT: MVT::i128, MMO);
7424 SDValue Success = emitSETCC(DAG, DL, CCReg: Res.getValue(R: 1),
7425 CCValid: SystemZ::CCMASK_CS, CCMask: SystemZ::CCMASK_CS_EQ);
7426 Success = DAG.getZExtOrTrunc(Op: Success, DL, VT: N->getValueType(ResNo: 1));
7427 Results.push_back(Elt: lowerGR128ToI128(DAG, In: Res));
7428 Results.push_back(Elt: Success);
7429 Results.push_back(Elt: Res.getValue(R: 2));
7430 break;
7431 }
7432 case ISD::BITCAST: {
7433 if (useSoftFloat())
7434 return;
7435 SDLoc DL(N);
7436 SDValue Src = N->getOperand(Num: 0);
7437 EVT SrcVT = Src.getValueType();
7438 EVT ResVT = N->getValueType(ResNo: 0);
7439 if (ResVT == MVT::i128 && SrcVT == MVT::f128)
7440 Results.push_back(Elt: expandBitCastF128ToI128(DAG, Src, SL: DL));
7441 else if (SrcVT == MVT::i16 && ResVT == MVT::f16) {
7442 if (Subtarget.hasVector()) {
7443 SDValue In32 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i32, Operand: Src);
7444 Results.push_back(Elt: SDValue(
7445 DAG.getMachineNode(Opcode: SystemZ::LEFR_16, dl: DL, VT: MVT::f16, Op1: In32), 0));
7446 } else {
7447 SDValue In64 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: MVT::i64, Operand: Src);
7448 Results.push_back(Elt: convertToF16(Op: In64, DAG));
7449 }
7450 } else if (SrcVT == MVT::f16 && ResVT == MVT::i16) {
7451 SDValue ExtractedVal =
7452 Subtarget.hasVector()
7453 ? SDValue(DAG.getMachineNode(Opcode: SystemZ::LFER_16, dl: DL, VT: MVT::i32, Op1: Src),
7454 0)
7455 : convertFromF16(Op: Src, DL, DAG);
7456 Results.push_back(Elt: DAG.getZExtOrTrunc(Op: ExtractedVal, DL, VT: ResVT));
7457 }
7458 break;
7459 }
7460 case ISD::UINT_TO_FP:
7461 case ISD::SINT_TO_FP:
7462 case ISD::STRICT_UINT_TO_FP:
7463 case ISD::STRICT_SINT_TO_FP: {
7464 if (useSoftFloat())
7465 return;
7466 bool IsStrict = N->isStrictFPOpcode();
7467 SDLoc DL(N);
7468 SDValue InOp = N->getOperand(Num: IsStrict ? 1 : 0);
7469 EVT ResVT = N->getValueType(ResNo: 0);
7470 SDValue Chain = IsStrict ? N->getOperand(Num: 0) : DAG.getEntryNode();
7471 if (ResVT == MVT::f16) {
7472 if (!IsStrict) {
7473 SDValue OpF32 = DAG.getNode(Opcode: N->getOpcode(), DL, VT: MVT::f32, Operand: InOp);
7474 Results.push_back(Elt: DAG.getFPExtendOrRound(Op: OpF32, DL, VT: MVT::f16));
7475 } else {
7476 SDValue OpF32 =
7477 DAG.getNode(Opcode: N->getOpcode(), DL, VTList: DAG.getVTList(VT1: MVT::f32, VT2: MVT::Other),
7478 Ops: {Chain, InOp});
7479 SDValue F16Res;
7480 std::tie(args&: F16Res, args&: Chain) = DAG.getStrictFPExtendOrRound(
7481 Op: OpF32, Chain: OpF32.getValue(R: 1), DL, VT: MVT::f16);
7482 Results.push_back(Elt: F16Res);
7483 Results.push_back(Elt: Chain);
7484 }
7485 }
7486 break;
7487 }
7488 case ISD::FP_TO_UINT:
7489 case ISD::FP_TO_SINT:
7490 case ISD::STRICT_FP_TO_UINT:
7491 case ISD::STRICT_FP_TO_SINT: {
7492 if (useSoftFloat())
7493 return;
7494 bool IsStrict = N->isStrictFPOpcode();
7495 SDLoc DL(N);
7496 EVT ResVT = N->getValueType(ResNo: 0);
7497 SDValue InOp = N->getOperand(Num: IsStrict ? 1 : 0);
7498 EVT InVT = InOp->getValueType(ResNo: 0);
7499 SDValue Chain = IsStrict ? N->getOperand(Num: 0) : DAG.getEntryNode();
7500 if (InVT == MVT::f16) {
7501 if (!IsStrict) {
7502 SDValue InF32 = DAG.getFPExtendOrRound(Op: InOp, DL, VT: MVT::f32);
7503 Results.push_back(Elt: DAG.getNode(Opcode: N->getOpcode(), DL, VT: ResVT, Operand: InF32));
7504 } else {
7505 SDValue InF32;
7506 std::tie(args&: InF32, args&: Chain) =
7507 DAG.getStrictFPExtendOrRound(Op: InOp, Chain, DL, VT: MVT::f32);
7508 SDValue OpF32 =
7509 DAG.getNode(Opcode: N->getOpcode(), DL, VTList: DAG.getVTList(VT1: ResVT, VT2: MVT::Other),
7510 Ops: {Chain, InF32});
7511 Results.push_back(Elt: OpF32);
7512 Results.push_back(Elt: OpF32.getValue(R: 1));
7513 }
7514 }
7515 break;
7516 }
7517 default:
7518 llvm_unreachable("Unexpected node to lower");
7519 }
7520}
7521
7522void
7523SystemZTargetLowering::ReplaceNodeResults(SDNode *N,
7524 SmallVectorImpl<SDValue> &Results,
7525 SelectionDAG &DAG) const {
7526 return LowerOperationWrapper(N, Results, DAG);
7527}
7528
7529// Return true if VT is a vector whose elements are a whole number of bytes
7530// in width. Also check for presence of vector support.
7531bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
7532 if (!Subtarget.hasVector())
7533 return false;
7534
7535 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
7536}
7537
7538// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
7539// producing a result of type ResVT. Op is a possibly bitcast version
7540// of the input vector and Index is the index (based on type VecVT) that
7541// should be extracted. Return the new extraction if a simplification
7542// was possible or if Force is true.
7543SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
7544 EVT VecVT, SDValue Op,
7545 unsigned Index,
7546 DAGCombinerInfo &DCI,
7547 bool Force) const {
7548 SelectionDAG &DAG = DCI.DAG;
7549
7550 // The number of bytes being extracted.
7551 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7552
7553 for (;;) {
7554 unsigned Opcode = Op.getOpcode();
7555 if (Opcode == ISD::BITCAST)
7556 // Look through bitcasts.
7557 Op = Op.getOperand(i: 0);
7558 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
7559 canTreatAsByteVector(VT: Op.getValueType())) {
7560 // Get a VPERM-like permute mask and see whether the bytes covered
7561 // by the extracted element are a contiguous sequence from one
7562 // source operand.
7563 SmallVector<int, SystemZ::VectorBytes> Bytes;
7564 if (!getVPermMask(ShuffleOp: Op, Bytes))
7565 break;
7566 int First;
7567 if (!getShuffleInput(Bytes, Start: Index * BytesPerElement,
7568 BytesPerElement, Base&: First))
7569 break;
7570 if (First < 0)
7571 return DAG.getUNDEF(VT: ResVT);
7572 // Make sure the contiguous sequence starts at a multiple of the
7573 // original element size.
7574 unsigned Byte = unsigned(First) % Bytes.size();
7575 if (Byte % BytesPerElement != 0)
7576 break;
7577 // We can get the extracted value directly from an input.
7578 Index = Byte / BytesPerElement;
7579 Op = Op.getOperand(i: unsigned(First) / Bytes.size());
7580 Force = true;
7581 } else if (Opcode == ISD::BUILD_VECTOR &&
7582 canTreatAsByteVector(VT: Op.getValueType())) {
7583 // We can only optimize this case if the BUILD_VECTOR elements are
7584 // at least as wide as the extracted value.
7585 EVT OpVT = Op.getValueType();
7586 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7587 if (OpBytesPerElement < BytesPerElement)
7588 break;
7589 // Make sure that the least-significant bit of the extracted value
7590 // is the least significant bit of an input.
7591 unsigned End = (Index + 1) * BytesPerElement;
7592 if (End % OpBytesPerElement != 0)
7593 break;
7594 // We're extracting the low part of one operand of the BUILD_VECTOR.
7595 Op = Op.getOperand(i: End / OpBytesPerElement - 1);
7596 if (!Op.getValueType().isInteger()) {
7597 EVT VT = MVT::getIntegerVT(BitWidth: Op.getValueSizeInBits());
7598 Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op);
7599 DCI.AddToWorklist(N: Op.getNode());
7600 }
7601 EVT VT = MVT::getIntegerVT(BitWidth: ResVT.getSizeInBits());
7602 Op = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Op);
7603 if (VT != ResVT) {
7604 DCI.AddToWorklist(N: Op.getNode());
7605 Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ResVT, Operand: Op);
7606 }
7607 return Op;
7608 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
7609 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
7610 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
7611 canTreatAsByteVector(VT: Op.getValueType()) &&
7612 canTreatAsByteVector(VT: Op.getOperand(i: 0).getValueType())) {
7613 // Make sure that only the unextended bits are significant.
7614 EVT ExtVT = Op.getValueType();
7615 EVT OpVT = Op.getOperand(i: 0).getValueType();
7616 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
7617 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7618 unsigned Byte = Index * BytesPerElement;
7619 unsigned SubByte = Byte % ExtBytesPerElement;
7620 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
7621 if (SubByte < MinSubByte ||
7622 SubByte + BytesPerElement > ExtBytesPerElement)
7623 break;
7624 // Get the byte offset of the unextended element
7625 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
7626 // ...then add the byte offset relative to that element.
7627 Byte += SubByte - MinSubByte;
7628 if (Byte % BytesPerElement != 0)
7629 break;
7630 Op = Op.getOperand(i: 0);
7631 Index = Byte / BytesPerElement;
7632 Force = true;
7633 } else
7634 break;
7635 }
7636 if (Force) {
7637 if (Op.getValueType() != VecVT) {
7638 Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VecVT, Operand: Op);
7639 DCI.AddToWorklist(N: Op.getNode());
7640 }
7641 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: ResVT, N1: Op,
7642 N2: DAG.getConstant(Val: Index, DL, VT: MVT::i32));
7643 }
7644 return SDValue();
7645}
7646
7647// Optimize vector operations in scalar value Op on the basis that Op
7648// is truncated to TruncVT.
7649SDValue SystemZTargetLowering::combineTruncateExtract(
7650 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
7651 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
7652 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
7653 // of type TruncVT.
7654 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7655 TruncVT.getSizeInBits() % 8 == 0) {
7656 SDValue Vec = Op.getOperand(i: 0);
7657 EVT VecVT = Vec.getValueType();
7658 if (canTreatAsByteVector(VT: VecVT)) {
7659 if (auto *IndexN = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1))) {
7660 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7661 unsigned TruncBytes = TruncVT.getStoreSize();
7662 if (BytesPerElement % TruncBytes == 0) {
7663 // Calculate the value of Y' in the above description. We are
7664 // splitting the original elements into Scale equal-sized pieces
7665 // and for truncation purposes want the last (least-significant)
7666 // of these pieces for IndexN. This is easiest to do by calculating
7667 // the start index of the following element and then subtracting 1.
7668 unsigned Scale = BytesPerElement / TruncBytes;
7669 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
7670
7671 // Defer the creation of the bitcast from X to combineExtract,
7672 // which might be able to optimize the extraction.
7673 VecVT = EVT::getVectorVT(Context&: *DCI.DAG.getContext(),
7674 VT: MVT::getIntegerVT(BitWidth: TruncBytes * 8),
7675 NumElements: VecVT.getStoreSize() / TruncBytes);
7676 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
7677 return combineExtract(DL, ResVT, VecVT, Op: Vec, Index: NewIndex, DCI, Force: true);
7678 }
7679 }
7680 }
7681 }
7682 return SDValue();
7683}
7684
7685SDValue SystemZTargetLowering::combineZERO_EXTEND(
7686 SDNode *N, DAGCombinerInfo &DCI) const {
7687 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
7688 SelectionDAG &DAG = DCI.DAG;
7689 SDValue N0 = N->getOperand(Num: 0);
7690 EVT VT = N->getValueType(ResNo: 0);
7691 if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) {
7692 auto *TrueOp = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 0));
7693 auto *FalseOp = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
7694 if (TrueOp && FalseOp) {
7695 SDLoc DL(N0);
7696 SDValue Ops[] = { DAG.getConstant(Val: TrueOp->getZExtValue(), DL, VT),
7697 DAG.getConstant(Val: FalseOp->getZExtValue(), DL, VT),
7698 N0.getOperand(i: 2), N0.getOperand(i: 3), N0.getOperand(i: 4) };
7699 SDValue NewSelect = DAG.getNode(Opcode: SystemZISD::SELECT_CCMASK, DL, VT, Ops);
7700 // If N0 has multiple uses, change other uses as well.
7701 if (!N0.hasOneUse()) {
7702 SDValue TruncSelect =
7703 DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N0.getValueType(), Operand: NewSelect);
7704 DCI.CombineTo(N: N0.getNode(), Res: TruncSelect);
7705 }
7706 return NewSelect;
7707 }
7708 }
7709 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
7710 // of the result is smaller than the size of X and all the truncated bits
7711 // of X are already zero.
7712 if (N0.getOpcode() == ISD::XOR &&
7713 N0.hasOneUse() && N0.getOperand(i: 0).hasOneUse() &&
7714 N0.getOperand(i: 0).getOpcode() == ISD::TRUNCATE &&
7715 N0.getOperand(i: 1).getOpcode() == ISD::Constant) {
7716 SDValue X = N0.getOperand(i: 0).getOperand(i: 0);
7717 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
7718 KnownBits Known = DAG.computeKnownBits(Op: X);
7719 APInt TruncatedBits = APInt::getBitsSet(numBits: X.getValueSizeInBits(),
7720 loBit: N0.getValueSizeInBits(),
7721 hiBit: VT.getSizeInBits());
7722 if (TruncatedBits.isSubsetOf(RHS: Known.Zero)) {
7723 X = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc(X), VT, Operand: X);
7724 APInt Mask = N0.getConstantOperandAPInt(i: 1).zext(width: VT.getSizeInBits());
7725 return DAG.getNode(Opcode: ISD::XOR, DL: SDLoc(N0), VT,
7726 N1: X, N2: DAG.getConstant(Val: Mask, DL: SDLoc(N0), VT));
7727 }
7728 }
7729 }
7730 // Recognize patterns for VECTOR SUBTRACT COMPUTE BORROW INDICATION
7731 // and VECTOR ADD COMPUTE CARRY for i128:
7732 // (zext (setcc_uge X Y)) --> (VSCBI X Y)
7733 // (zext (setcc_ule Y X)) --> (VSCBI X Y)
7734 // (zext (setcc_ult (add X Y) X/Y) -> (VACC X Y)
7735 // (zext (setcc_ugt X/Y (add X Y)) -> (VACC X Y)
7736 // For vector types, these patterns are recognized in the .td file.
7737 if (N0.getOpcode() == ISD::SETCC && isTypeLegal(VT) && VT == MVT::i128 &&
7738 N0.getOperand(i: 0).getValueType() == VT) {
7739 SDValue Op0 = N0.getOperand(i: 0);
7740 SDValue Op1 = N0.getOperand(i: 1);
7741 const ISD::CondCode CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: 2))->get();
7742 switch (CC) {
7743 case ISD::SETULE:
7744 std::swap(a&: Op0, b&: Op1);
7745 [[fallthrough]];
7746 case ISD::SETUGE:
7747 return DAG.getNode(Opcode: SystemZISD::VSCBI, DL: SDLoc(N0), VT, N1: Op0, N2: Op1);
7748 case ISD::SETUGT:
7749 std::swap(a&: Op0, b&: Op1);
7750 [[fallthrough]];
7751 case ISD::SETULT:
7752 if (Op0->hasOneUse() && Op0->getOpcode() == ISD::ADD &&
7753 (Op0->getOperand(Num: 0) == Op1 || Op0->getOperand(Num: 1) == Op1))
7754 return DAG.getNode(Opcode: SystemZISD::VACC, DL: SDLoc(N0), VT, N1: Op0->getOperand(Num: 0),
7755 N2: Op0->getOperand(Num: 1));
7756 break;
7757 default:
7758 break;
7759 }
7760 }
7761
7762 return SDValue();
7763}
7764
7765SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
7766 SDNode *N, DAGCombinerInfo &DCI) const {
7767 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
7768 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
7769 // into (select_cc LHS, RHS, -1, 0, COND)
7770 SelectionDAG &DAG = DCI.DAG;
7771 SDValue N0 = N->getOperand(Num: 0);
7772 EVT VT = N->getValueType(ResNo: 0);
7773 EVT EVT = cast<VTSDNode>(Val: N->getOperand(Num: 1))->getVT();
7774 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
7775 N0 = N0.getOperand(i: 0);
7776 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
7777 SDLoc DL(N0);
7778 SDValue Ops[] = { N0.getOperand(i: 0), N0.getOperand(i: 1),
7779 DAG.getAllOnesConstant(DL, VT),
7780 DAG.getConstant(Val: 0, DL, VT), N0.getOperand(i: 2) };
7781 return DAG.getNode(Opcode: ISD::SELECT_CC, DL, VT, Ops);
7782 }
7783 return SDValue();
7784}
7785
7786SDValue SystemZTargetLowering::combineSIGN_EXTEND(
7787 SDNode *N, DAGCombinerInfo &DCI) const {
7788 // Convert (sext (ashr (shl X, C1), C2)) to
7789 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
7790 // cheap as narrower ones.
7791 SelectionDAG &DAG = DCI.DAG;
7792 SDValue N0 = N->getOperand(Num: 0);
7793 EVT VT = N->getValueType(ResNo: 0);
7794 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
7795 auto *SraAmt = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
7796 SDValue Inner = N0.getOperand(i: 0);
7797 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
7798 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Val: Inner.getOperand(i: 1))) {
7799 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
7800 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
7801 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
7802 EVT ShiftVT = N0.getOperand(i: 1).getValueType();
7803 SDValue Ext = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: SDLoc(Inner), VT,
7804 Operand: Inner.getOperand(i: 0));
7805 SDValue Shl = DAG.getNode(Opcode: ISD::SHL, DL: SDLoc(Inner), VT, N1: Ext,
7806 N2: DAG.getConstant(Val: NewShlAmt, DL: SDLoc(Inner),
7807 VT: ShiftVT));
7808 return DAG.getNode(Opcode: ISD::SRA, DL: SDLoc(N0), VT, N1: Shl,
7809 N2: DAG.getConstant(Val: NewSraAmt, DL: SDLoc(N0), VT: ShiftVT));
7810 }
7811 }
7812 }
7813
7814 return SDValue();
7815}
7816
7817SDValue SystemZTargetLowering::combineMERGE(
7818 SDNode *N, DAGCombinerInfo &DCI) const {
7819 SelectionDAG &DAG = DCI.DAG;
7820 unsigned Opcode = N->getOpcode();
7821 SDValue Op0 = N->getOperand(Num: 0);
7822 SDValue Op1 = N->getOperand(Num: 1);
7823 if (Op0.getOpcode() == ISD::BITCAST)
7824 Op0 = Op0.getOperand(i: 0);
7825 if (ISD::isBuildVectorAllZeros(N: Op0.getNode())) {
7826 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
7827 // for v4f32.
7828 if (Op1 == N->getOperand(Num: 0))
7829 return Op1;
7830 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
7831 EVT VT = Op1.getValueType();
7832 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
7833 if (ElemBytes <= 4) {
7834 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
7835 SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
7836 EVT InVT = VT.changeVectorElementTypeToInteger();
7837 EVT OutVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: ElemBytes * 16),
7838 NumElements: SystemZ::VectorBytes / ElemBytes / 2);
7839 if (VT != InVT) {
7840 Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(N), VT: InVT, Operand: Op1);
7841 DCI.AddToWorklist(N: Op1.getNode());
7842 }
7843 SDValue Op = DAG.getNode(Opcode, DL: SDLoc(N), VT: OutVT, Operand: Op1);
7844 DCI.AddToWorklist(N: Op.getNode());
7845 return DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(N), VT, Operand: Op);
7846 }
7847 }
7848 return SDValue();
7849}
7850
7851static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7852 SDNode *&HiPart) {
7853 LoPart = HiPart = nullptr;
7854
7855 // Scan through all users.
7856 for (SDUse &Use : LD->uses()) {
7857 // Skip the uses of the chain.
7858 if (Use.getResNo() != 0)
7859 continue;
7860
7861 // Verify every user is a TRUNCATE to i64 of the low or high half.
7862 SDNode *User = Use.getUser();
7863 bool IsLoPart = true;
7864 if (User->getOpcode() == ISD::SRL &&
7865 User->getOperand(Num: 1).getOpcode() == ISD::Constant &&
7866 User->getConstantOperandVal(Num: 1) == 64 && User->hasOneUse()) {
7867 User = *User->user_begin();
7868 IsLoPart = false;
7869 }
7870 if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(ResNo: 0) != MVT::i64)
7871 return false;
7872
7873 if (IsLoPart) {
7874 if (LoPart)
7875 return false;
7876 LoPart = User;
7877 } else {
7878 if (HiPart)
7879 return false;
7880 HiPart = User;
7881 }
7882 }
7883 return true;
7884}
7885
7886static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7887 SDNode *&HiPart) {
7888 LoPart = HiPart = nullptr;
7889
7890 // Scan through all users.
7891 for (SDUse &Use : LD->uses()) {
7892 // Skip the uses of the chain.
7893 if (Use.getResNo() != 0)
7894 continue;
7895
7896 // Verify every user is an EXTRACT_SUBREG of the low or high half.
7897 SDNode *User = Use.getUser();
7898 if (!User->hasOneUse() || !User->isMachineOpcode() ||
7899 User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
7900 return false;
7901
7902 switch (User->getConstantOperandVal(Num: 1)) {
7903 case SystemZ::subreg_l64:
7904 if (LoPart)
7905 return false;
7906 LoPart = User;
7907 break;
7908 case SystemZ::subreg_h64:
7909 if (HiPart)
7910 return false;
7911 HiPart = User;
7912 break;
7913 default:
7914 return false;
7915 }
7916 }
7917 return true;
7918}
7919
7920SDValue SystemZTargetLowering::combineLOAD(
7921 SDNode *N, DAGCombinerInfo &DCI) const {
7922 SelectionDAG &DAG = DCI.DAG;
7923 EVT LdVT = N->getValueType(ResNo: 0);
7924 if (auto *LN = dyn_cast<LoadSDNode>(Val: N)) {
7925 if (LN->getAddressSpace() == SYSTEMZAS::PTR32) {
7926 MVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7927 MVT LoadNodeVT = LN->getBasePtr().getSimpleValueType();
7928 if (PtrVT != LoadNodeVT) {
7929 SDLoc DL(LN);
7930 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(
7931 dl: DL, VT: PtrVT, Ptr: LN->getBasePtr(), SrcAS: SYSTEMZAS::PTR32, DestAS: 0);
7932 return DAG.getExtLoad(ExtType: LN->getExtensionType(), dl: DL, VT: LN->getValueType(ResNo: 0),
7933 Chain: LN->getChain(), Ptr: AddrSpaceCast, MemVT: LN->getMemoryVT(),
7934 MMO: LN->getMemOperand());
7935 }
7936 }
7937 }
7938 SDLoc DL(N);
7939
7940 // Replace a 128-bit load that is used solely to move its value into GPRs
7941 // by separate loads of both halves.
7942 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
7943 if (LD->isSimple() && ISD::isNormalLoad(N: LD)) {
7944 SDNode *LoPart, *HiPart;
7945 if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) ||
7946 (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) {
7947 // Rewrite each extraction as an independent load.
7948 SmallVector<SDValue, 2> ArgChains;
7949 if (HiPart) {
7950 SDValue EltLoad = DAG.getLoad(
7951 VT: HiPart->getValueType(ResNo: 0), dl: DL, Chain: LD->getChain(), Ptr: LD->getBasePtr(),
7952 PtrInfo: LD->getPointerInfo(), Alignment: LD->getBaseAlign(),
7953 MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
7954
7955 DCI.CombineTo(N: HiPart, Res: EltLoad, AddTo: true);
7956 ArgChains.push_back(Elt: EltLoad.getValue(R: 1));
7957 }
7958 if (LoPart) {
7959 SDValue EltLoad = DAG.getLoad(
7960 VT: LoPart->getValueType(ResNo: 0), dl: DL, Chain: LD->getChain(),
7961 Ptr: DAG.getObjectPtrOffset(SL: DL, Ptr: LD->getBasePtr(), Offset: TypeSize::getFixed(ExactSize: 8)),
7962 PtrInfo: LD->getPointerInfo().getWithOffset(O: 8), Alignment: LD->getBaseAlign(),
7963 MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
7964
7965 DCI.CombineTo(N: LoPart, Res: EltLoad, AddTo: true);
7966 ArgChains.push_back(Elt: EltLoad.getValue(R: 1));
7967 }
7968
7969 // Collect all chains via TokenFactor.
7970 SDValue Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: ArgChains);
7971 DAG.ReplaceAllUsesOfValueWith(From: SDValue(N, 1), To: Chain);
7972 DCI.AddToWorklist(N: Chain.getNode());
7973 return SDValue(N, 0);
7974 }
7975 }
7976
7977 if (LdVT.isVector() || LdVT.isInteger())
7978 return SDValue();
7979 // Transform a scalar load that is REPLICATEd as well as having other
7980 // use(s) to the form where the other use(s) use the first element of the
7981 // REPLICATE instead of the load. Otherwise instruction selection will not
7982 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
7983 // point loads.
7984
7985 SDValue Replicate;
7986 SmallVector<SDNode*, 8> OtherUses;
7987 for (SDUse &Use : N->uses()) {
7988 if (Use.getUser()->getOpcode() == SystemZISD::REPLICATE) {
7989 if (Replicate)
7990 return SDValue(); // Should never happen
7991 Replicate = SDValue(Use.getUser(), 0);
7992 } else if (Use.getResNo() == 0)
7993 OtherUses.push_back(Elt: Use.getUser());
7994 }
7995 if (!Replicate || OtherUses.empty())
7996 return SDValue();
7997
7998 SDValue Extract0 = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: LdVT,
7999 N1: Replicate, N2: DAG.getConstant(Val: 0, DL, VT: MVT::i32));
8000 // Update uses of the loaded Value while preserving old chains.
8001 for (SDNode *U : OtherUses) {
8002 SmallVector<SDValue, 8> Ops;
8003 for (SDValue Op : U->ops())
8004 Ops.push_back(Elt: (Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
8005 DAG.UpdateNodeOperands(N: U, Ops);
8006 }
8007 return SDValue(N, 0);
8008}
8009
8010bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
8011 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
8012 return true;
8013 if (Subtarget.hasVectorEnhancements2())
8014 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
8015 return true;
8016 return false;
8017}
8018
8019static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) {
8020 if (!VT.isVector() || !VT.isSimple() ||
8021 VT.getSizeInBits() != 128 ||
8022 VT.getScalarSizeInBits() % 8 != 0)
8023 return false;
8024
8025 unsigned NumElts = VT.getVectorNumElements();
8026 for (unsigned i = 0; i < NumElts; ++i) {
8027 if (M[i] < 0) continue; // ignore UNDEF indices
8028 if ((unsigned) M[i] != NumElts - 1 - i)
8029 return false;
8030 }
8031
8032 return true;
8033}
8034
8035static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
8036 for (auto *U : StoredVal->users()) {
8037 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: U)) {
8038 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
8039 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
8040 continue;
8041 } else if (isa<BuildVectorSDNode>(Val: U)) {
8042 SDValue BuildVector = SDValue(U, 0);
8043 if (DAG.isSplatValue(V: BuildVector, AllowUndefs: true/*AllowUndefs*/) &&
8044 isOnlyUsedByStores(StoredVal: BuildVector, DAG))
8045 continue;
8046 }
8047 return false;
8048 }
8049 return true;
8050}
8051
8052static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,
8053 SDValue &HiPart) {
8054 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
8055 return false;
8056
8057 SDValue Op0 = Val.getOperand(i: 0);
8058 SDValue Op1 = Val.getOperand(i: 1);
8059
8060 if (Op0.getOpcode() == ISD::SHL)
8061 std::swap(a&: Op0, b&: Op1);
8062 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
8063 Op1.getOperand(i: 1).getOpcode() != ISD::Constant ||
8064 Op1.getConstantOperandVal(i: 1) != 64)
8065 return false;
8066 Op1 = Op1.getOperand(i: 0);
8067
8068 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
8069 Op0.getOperand(i: 0).getValueType() != MVT::i64)
8070 return false;
8071 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
8072 Op1.getOperand(i: 0).getValueType() != MVT::i64)
8073 return false;
8074
8075 LoPart = Op0.getOperand(i: 0);
8076 HiPart = Op1.getOperand(i: 0);
8077 return true;
8078}
8079
8080static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,
8081 SDValue &HiPart) {
8082 if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||
8083 Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)
8084 return false;
8085
8086 if (Val->getNumOperands() != 5 ||
8087 Val->getOperand(Num: 0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||
8088 Val->getOperand(Num: 2)->getAsZExtVal() != SystemZ::subreg_l64 ||
8089 Val->getOperand(Num: 4)->getAsZExtVal() != SystemZ::subreg_h64)
8090 return false;
8091
8092 LoPart = Val->getOperand(Num: 1);
8093 HiPart = Val->getOperand(Num: 3);
8094 return true;
8095}
8096
8097SDValue SystemZTargetLowering::combineSTORE(
8098 SDNode *N, DAGCombinerInfo &DCI) const {
8099 SelectionDAG &DAG = DCI.DAG;
8100 auto *SN = cast<StoreSDNode>(Val: N);
8101 auto &Op1 = N->getOperand(Num: 1);
8102 EVT MemVT = SN->getMemoryVT();
8103
8104 if (SN->getAddressSpace() == SYSTEMZAS::PTR32) {
8105 MVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
8106 MVT StoreNodeVT = SN->getBasePtr().getSimpleValueType();
8107 if (PtrVT != StoreNodeVT) {
8108 SDLoc DL(SN);
8109 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(dl: DL, VT: PtrVT, Ptr: SN->getBasePtr(),
8110 SrcAS: SYSTEMZAS::PTR32, DestAS: 0);
8111 return DAG.getStore(Chain: SN->getChain(), dl: DL, Val: SN->getValue(), Ptr: AddrSpaceCast,
8112 PtrInfo: SN->getPointerInfo(), Alignment: SN->getBaseAlign(),
8113 MMOFlags: SN->getMemOperand()->getFlags(), AAInfo: SN->getAAInfo());
8114 }
8115 }
8116
8117 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
8118 // for the extraction to be done on a vMiN value, so that we can use VSTE.
8119 // If X has wider elements then convert it to:
8120 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
8121 if (MemVT.isInteger() && SN->isTruncatingStore()) {
8122 if (SDValue Value =
8123 combineTruncateExtract(DL: SDLoc(N), TruncVT: MemVT, Op: SN->getValue(), DCI)) {
8124 DCI.AddToWorklist(N: Value.getNode());
8125
8126 // Rewrite the store with the new form of stored value.
8127 return DAG.getTruncStore(Chain: SN->getChain(), dl: SDLoc(SN), Val: Value,
8128 Ptr: SN->getBasePtr(), SVT: SN->getMemoryVT(),
8129 MMO: SN->getMemOperand());
8130 }
8131 }
8132 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
8133 if (!SN->isTruncatingStore() &&
8134 Op1.getOpcode() == ISD::BSWAP &&
8135 Op1.getNode()->hasOneUse() &&
8136 canLoadStoreByteSwapped(VT: Op1.getValueType())) {
8137
8138 SDValue BSwapOp = Op1.getOperand(i: 0);
8139
8140 if (BSwapOp.getValueType() == MVT::i16)
8141 BSwapOp = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: SDLoc(N), VT: MVT::i32, Operand: BSwapOp);
8142
8143 SDValue Ops[] = {
8144 N->getOperand(Num: 0), BSwapOp, N->getOperand(Num: 2)
8145 };
8146
8147 return
8148 DAG.getMemIntrinsicNode(Opcode: SystemZISD::STRV, dl: SDLoc(N), VTList: DAG.getVTList(VT: MVT::Other),
8149 Ops, MemVT, MMO: SN->getMemOperand());
8150 }
8151 // Combine STORE (element-swap) into VSTER
8152 if (!SN->isTruncatingStore() &&
8153 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
8154 Op1.getNode()->hasOneUse() &&
8155 Subtarget.hasVectorEnhancements2()) {
8156 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: Op1.getNode());
8157 ArrayRef<int> ShuffleMask = SVN->getMask();
8158 if (isVectorElementSwap(M: ShuffleMask, VT: Op1.getValueType())) {
8159 SDValue Ops[] = {
8160 N->getOperand(Num: 0), Op1.getOperand(i: 0), N->getOperand(Num: 2)
8161 };
8162
8163 return DAG.getMemIntrinsicNode(Opcode: SystemZISD::VSTER, dl: SDLoc(N),
8164 VTList: DAG.getVTList(VT: MVT::Other),
8165 Ops, MemVT, MMO: SN->getMemOperand());
8166 }
8167 }
8168
8169 // Combine STORE (READCYCLECOUNTER) into STCKF.
8170 if (!SN->isTruncatingStore() &&
8171 Op1.getOpcode() == ISD::READCYCLECOUNTER &&
8172 Op1.hasOneUse() &&
8173 N->getOperand(Num: 0).reachesChainWithoutSideEffects(Dest: SDValue(Op1.getNode(), 1))) {
8174 SDValue Ops[] = { Op1.getOperand(i: 0), N->getOperand(Num: 2) };
8175 return DAG.getMemIntrinsicNode(Opcode: SystemZISD::STCKF, dl: SDLoc(N),
8176 VTList: DAG.getVTList(VT: MVT::Other),
8177 Ops, MemVT, MMO: SN->getMemOperand());
8178 }
8179
8180 // Transform a store of a 128-bit value moved from parts into two stores.
8181 if (SN->isSimple() && ISD::isNormalStore(N: SN)) {
8182 SDValue LoPart, HiPart;
8183 if ((MemVT == MVT::i128 && isI128MovedFromParts(Val: Op1, LoPart, HiPart)) ||
8184 (MemVT == MVT::f128 && isF128MovedFromParts(Val: Op1, LoPart, HiPart))) {
8185 SDLoc DL(SN);
8186 SDValue Chain0 = DAG.getStore(
8187 Chain: SN->getChain(), dl: DL, Val: HiPart, Ptr: SN->getBasePtr(), PtrInfo: SN->getPointerInfo(),
8188 Alignment: SN->getBaseAlign(), MMOFlags: SN->getMemOperand()->getFlags(), AAInfo: SN->getAAInfo());
8189 SDValue Chain1 = DAG.getStore(
8190 Chain: SN->getChain(), dl: DL, Val: LoPart,
8191 Ptr: DAG.getObjectPtrOffset(SL: DL, Ptr: SN->getBasePtr(), Offset: TypeSize::getFixed(ExactSize: 8)),
8192 PtrInfo: SN->getPointerInfo().getWithOffset(O: 8), Alignment: SN->getBaseAlign(),
8193 MMOFlags: SN->getMemOperand()->getFlags(), AAInfo: SN->getAAInfo());
8194
8195 return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, N1: Chain0, N2: Chain1);
8196 }
8197 }
8198
8199 // Replicate a reg or immediate with VREP instead of scalar multiply or
8200 // immediate load. It seems best to do this during the first DAGCombine as
8201 // it is straight-forward to handle the zero-extend node in the initial
8202 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
8203 // extracting an i16 element from a v16i8 vector).
8204 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
8205 isOnlyUsedByStores(StoredVal: Op1, DAG)) {
8206 SDValue Word = SDValue();
8207 EVT WordVT;
8208
8209 // Find a replicated immediate and return it if found in Word and its
8210 // type in WordVT.
8211 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
8212 // Some constants are better handled with a scalar store.
8213 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
8214 isInt<16>(x: C->getSExtValue()) || MemVT.getStoreSize() <= 2)
8215 return;
8216
8217 APInt Val = C->getAPIntValue();
8218 // Truncate Val in case of a truncating store.
8219 if (!llvm::isUIntN(N: TotBytes * 8, x: Val.getZExtValue())) {
8220 assert(SN->isTruncatingStore() &&
8221 "Non-truncating store and immediate value does not fit?");
8222 Val = Val.trunc(width: TotBytes * 8);
8223 }
8224
8225 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, Val.getZExtValue()));
8226 if (VCI.isVectorConstantLegal(Subtarget) &&
8227 VCI.Opcode == SystemZISD::REPLICATE) {
8228 Word = DAG.getConstant(Val: VCI.OpVals[0], DL: SDLoc(SN), VT: MVT::i32);
8229 WordVT = VCI.VecVT.getScalarType();
8230 }
8231 };
8232
8233 // Find a replicated register and return it if found in Word and its type
8234 // in WordVT.
8235 auto FindReplicatedReg = [&](SDValue MulOp) {
8236 EVT MulVT = MulOp.getValueType();
8237 if (MulOp->getOpcode() == ISD::MUL &&
8238 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
8239 // Find a zero extended value and its type.
8240 SDValue LHS = MulOp->getOperand(Num: 0);
8241 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
8242 WordVT = LHS->getOperand(Num: 0).getValueType();
8243 else if (LHS->getOpcode() == ISD::AssertZext)
8244 WordVT = cast<VTSDNode>(Val: LHS->getOperand(Num: 1))->getVT();
8245 else
8246 return;
8247 // Find a replicating constant, e.g. 0x00010001.
8248 if (auto *C = dyn_cast<ConstantSDNode>(Val: MulOp->getOperand(Num: 1))) {
8249 SystemZVectorConstantInfo VCI(
8250 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
8251 if (VCI.isVectorConstantLegal(Subtarget) &&
8252 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
8253 WordVT == VCI.VecVT.getScalarType())
8254 Word = DAG.getZExtOrTrunc(Op: LHS->getOperand(Num: 0), DL: SDLoc(SN), VT: WordVT);
8255 }
8256 }
8257 };
8258
8259 if (isa<BuildVectorSDNode>(Val: Op1) &&
8260 DAG.isSplatValue(V: Op1, AllowUndefs: true/*AllowUndefs*/)) {
8261 SDValue SplatVal = Op1->getOperand(Num: 0);
8262 if (auto *C = dyn_cast<ConstantSDNode>(Val&: SplatVal))
8263 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
8264 else
8265 FindReplicatedReg(SplatVal);
8266 } else {
8267 if (auto *C = dyn_cast<ConstantSDNode>(Val: Op1))
8268 FindReplicatedImm(C, MemVT.getStoreSize());
8269 else
8270 FindReplicatedReg(Op1);
8271 }
8272
8273 if (Word != SDValue()) {
8274 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
8275 "Bad type handling");
8276 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
8277 EVT SplatVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WordVT, NumElements: NumElts);
8278 SDValue SplatVal = DAG.getSplatVector(VT: SplatVT, DL: SDLoc(SN), Op: Word);
8279 return DAG.getStore(Chain: SN->getChain(), dl: SDLoc(SN), Val: SplatVal,
8280 Ptr: SN->getBasePtr(), MMO: SN->getMemOperand());
8281 }
8282 }
8283
8284 return SDValue();
8285}
8286
8287SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
8288 SDNode *N, DAGCombinerInfo &DCI) const {
8289 SelectionDAG &DAG = DCI.DAG;
8290 // Combine element-swap (LOAD) into VLER
8291 if (ISD::isNON_EXTLoad(N: N->getOperand(Num: 0).getNode()) &&
8292 N->getOperand(Num: 0).hasOneUse() &&
8293 Subtarget.hasVectorEnhancements2()) {
8294 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: N);
8295 ArrayRef<int> ShuffleMask = SVN->getMask();
8296 if (isVectorElementSwap(M: ShuffleMask, VT: N->getValueType(ResNo: 0))) {
8297 SDValue Load = N->getOperand(Num: 0);
8298 LoadSDNode *LD = cast<LoadSDNode>(Val&: Load);
8299
8300 // Create the element-swapping load.
8301 SDValue Ops[] = {
8302 LD->getChain(), // Chain
8303 LD->getBasePtr() // Ptr
8304 };
8305 SDValue ESLoad =
8306 DAG.getMemIntrinsicNode(Opcode: SystemZISD::VLER, dl: SDLoc(N),
8307 VTList: DAG.getVTList(VT1: LD->getValueType(ResNo: 0), VT2: MVT::Other),
8308 Ops, MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
8309
8310 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
8311 // by the load dead.
8312 DCI.CombineTo(N, Res: ESLoad);
8313
8314 // Next, combine the load away, we give it a bogus result value but a real
8315 // chain result. The result value is dead because the shuffle is dead.
8316 DCI.CombineTo(N: Load.getNode(), Res0: ESLoad, Res1: ESLoad.getValue(R: 1));
8317
8318 // Return N so it doesn't get rechecked!
8319 return SDValue(N, 0);
8320 }
8321 }
8322
8323 return SDValue();
8324}
8325
8326SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
8327 SDNode *N, DAGCombinerInfo &DCI) const {
8328 SelectionDAG &DAG = DCI.DAG;
8329
8330 if (!Subtarget.hasVector())
8331 return SDValue();
8332
8333 // Look through bitcasts that retain the number of vector elements.
8334 SDValue Op = N->getOperand(Num: 0);
8335 if (Op.getOpcode() == ISD::BITCAST &&
8336 Op.getValueType().isVector() &&
8337 Op.getOperand(i: 0).getValueType().isVector() &&
8338 Op.getValueType().getVectorNumElements() ==
8339 Op.getOperand(i: 0).getValueType().getVectorNumElements())
8340 Op = Op.getOperand(i: 0);
8341
8342 // Pull BSWAP out of a vector extraction.
8343 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
8344 EVT VecVT = Op.getValueType();
8345 EVT EltVT = VecVT.getVectorElementType();
8346 Op = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SDLoc(N), VT: EltVT,
8347 N1: Op.getOperand(i: 0), N2: N->getOperand(Num: 1));
8348 DCI.AddToWorklist(N: Op.getNode());
8349 Op = DAG.getNode(Opcode: ISD::BSWAP, DL: SDLoc(N), VT: EltVT, Operand: Op);
8350 if (EltVT != N->getValueType(ResNo: 0)) {
8351 DCI.AddToWorklist(N: Op.getNode());
8352 Op = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), Operand: Op);
8353 }
8354 return Op;
8355 }
8356
8357 // Try to simplify a vector extraction.
8358 if (auto *IndexN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1))) {
8359 SDValue Op0 = N->getOperand(Num: 0);
8360 EVT VecVT = Op0.getValueType();
8361 if (canTreatAsByteVector(VT: VecVT))
8362 return combineExtract(DL: SDLoc(N), ResVT: N->getValueType(ResNo: 0), VecVT, Op: Op0,
8363 Index: IndexN->getZExtValue(), DCI, Force: false);
8364 }
8365 return SDValue();
8366}
8367
8368SDValue SystemZTargetLowering::combineJOIN_DWORDS(
8369 SDNode *N, DAGCombinerInfo &DCI) const {
8370 SelectionDAG &DAG = DCI.DAG;
8371 // (join_dwords X, X) == (replicate X)
8372 if (N->getOperand(Num: 0) == N->getOperand(Num: 1))
8373 return DAG.getNode(Opcode: SystemZISD::REPLICATE, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
8374 Operand: N->getOperand(Num: 0));
8375 return SDValue();
8376}
8377
8378static SDValue MergeInputChains(SDNode *N1, SDNode *N2) {
8379 SDValue Chain1 = N1->getOperand(Num: 0);
8380 SDValue Chain2 = N2->getOperand(Num: 0);
8381
8382 // Trivial case: both nodes take the same chain.
8383 if (Chain1 == Chain2)
8384 return Chain1;
8385
8386 // FIXME - we could handle more complex cases via TokenFactor,
8387 // assuming we can verify that this would not create a cycle.
8388 return SDValue();
8389}
8390
8391SDValue SystemZTargetLowering::combineFP_ROUND(
8392 SDNode *N, DAGCombinerInfo &DCI) const {
8393
8394 if (!Subtarget.hasVector())
8395 return SDValue();
8396
8397 // (fpround (extract_vector_elt X 0))
8398 // (fpround (extract_vector_elt X 1)) ->
8399 // (extract_vector_elt (VROUND X) 0)
8400 // (extract_vector_elt (VROUND X) 2)
8401 //
8402 // This is a special case since the target doesn't really support v2f32s.
8403 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8404 SelectionDAG &DAG = DCI.DAG;
8405 SDValue Op0 = N->getOperand(Num: OpNo);
8406 if (N->getValueType(ResNo: 0) == MVT::f32 && Op0.hasOneUse() &&
8407 Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8408 Op0.getOperand(i: 0).getValueType() == MVT::v2f64 &&
8409 Op0.getOperand(i: 1).getOpcode() == ISD::Constant &&
8410 Op0.getConstantOperandVal(i: 1) == 0) {
8411 SDValue Vec = Op0.getOperand(i: 0);
8412 for (auto *U : Vec->users()) {
8413 if (U != Op0.getNode() && U->hasOneUse() &&
8414 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8415 U->getOperand(Num: 0) == Vec &&
8416 U->getOperand(Num: 1).getOpcode() == ISD::Constant &&
8417 U->getConstantOperandVal(Num: 1) == 1) {
8418 SDValue OtherRound = SDValue(*U->user_begin(), 0);
8419 if (OtherRound.getOpcode() == N->getOpcode() &&
8420 OtherRound.getOperand(i: OpNo) == SDValue(U, 0) &&
8421 OtherRound.getValueType() == MVT::f32) {
8422 SDValue VRound, Chain;
8423 if (N->isStrictFPOpcode()) {
8424 Chain = MergeInputChains(N1: N, N2: OtherRound.getNode());
8425 if (!Chain)
8426 continue;
8427 VRound = DAG.getNode(Opcode: SystemZISD::STRICT_VROUND, DL: SDLoc(N),
8428 ResultTys: {MVT::v4f32, MVT::Other}, Ops: {Chain, Vec});
8429 Chain = VRound.getValue(R: 1);
8430 } else
8431 VRound = DAG.getNode(Opcode: SystemZISD::VROUND, DL: SDLoc(N),
8432 VT: MVT::v4f32, Operand: Vec);
8433 DCI.AddToWorklist(N: VRound.getNode());
8434 SDValue Extract1 =
8435 DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SDLoc(U), VT: MVT::f32,
8436 N1: VRound, N2: DAG.getConstant(Val: 2, DL: SDLoc(U), VT: MVT::i32));
8437 DCI.AddToWorklist(N: Extract1.getNode());
8438 DAG.ReplaceAllUsesOfValueWith(From: OtherRound, To: Extract1);
8439 if (Chain)
8440 DAG.ReplaceAllUsesOfValueWith(From: OtherRound.getValue(R: 1), To: Chain);
8441 SDValue Extract0 =
8442 DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SDLoc(Op0), VT: MVT::f32,
8443 N1: VRound, N2: DAG.getConstant(Val: 0, DL: SDLoc(Op0), VT: MVT::i32));
8444 if (Chain)
8445 return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: SDLoc(Op0),
8446 VTList: N->getVTList(), N1: Extract0, N2: Chain);
8447 return Extract0;
8448 }
8449 }
8450 }
8451 }
8452 return SDValue();
8453}
8454
8455SDValue SystemZTargetLowering::combineFP_EXTEND(
8456 SDNode *N, DAGCombinerInfo &DCI) const {
8457
8458 if (!Subtarget.hasVector())
8459 return SDValue();
8460
8461 // (fpextend (extract_vector_elt X 0))
8462 // (fpextend (extract_vector_elt X 2)) ->
8463 // (extract_vector_elt (VEXTEND X) 0)
8464 // (extract_vector_elt (VEXTEND X) 1)
8465 //
8466 // This is a special case since the target doesn't really support v2f32s.
8467 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8468 SelectionDAG &DAG = DCI.DAG;
8469 SDValue Op0 = N->getOperand(Num: OpNo);
8470 if (N->getValueType(ResNo: 0) == MVT::f64 && Op0.hasOneUse() &&
8471 Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8472 Op0.getOperand(i: 0).getValueType() == MVT::v4f32 &&
8473 Op0.getOperand(i: 1).getOpcode() == ISD::Constant &&
8474 Op0.getConstantOperandVal(i: 1) == 0) {
8475 SDValue Vec = Op0.getOperand(i: 0);
8476 for (auto *U : Vec->users()) {
8477 if (U != Op0.getNode() && U->hasOneUse() &&
8478 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8479 U->getOperand(Num: 0) == Vec &&
8480 U->getOperand(Num: 1).getOpcode() == ISD::Constant &&
8481 U->getConstantOperandVal(Num: 1) == 2) {
8482 SDValue OtherExtend = SDValue(*U->user_begin(), 0);
8483 if (OtherExtend.getOpcode() == N->getOpcode() &&
8484 OtherExtend.getOperand(i: OpNo) == SDValue(U, 0) &&
8485 OtherExtend.getValueType() == MVT::f64) {
8486 SDValue VExtend, Chain;
8487 if (N->isStrictFPOpcode()) {
8488 Chain = MergeInputChains(N1: N, N2: OtherExtend.getNode());
8489 if (!Chain)
8490 continue;
8491 VExtend = DAG.getNode(Opcode: SystemZISD::STRICT_VEXTEND, DL: SDLoc(N),
8492 ResultTys: {MVT::v2f64, MVT::Other}, Ops: {Chain, Vec});
8493 Chain = VExtend.getValue(R: 1);
8494 } else
8495 VExtend = DAG.getNode(Opcode: SystemZISD::VEXTEND, DL: SDLoc(N),
8496 VT: MVT::v2f64, Operand: Vec);
8497 DCI.AddToWorklist(N: VExtend.getNode());
8498 SDValue Extract1 =
8499 DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SDLoc(U), VT: MVT::f64,
8500 N1: VExtend, N2: DAG.getConstant(Val: 1, DL: SDLoc(U), VT: MVT::i32));
8501 DCI.AddToWorklist(N: Extract1.getNode());
8502 DAG.ReplaceAllUsesOfValueWith(From: OtherExtend, To: Extract1);
8503 if (Chain)
8504 DAG.ReplaceAllUsesOfValueWith(From: OtherExtend.getValue(R: 1), To: Chain);
8505 SDValue Extract0 =
8506 DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SDLoc(Op0), VT: MVT::f64,
8507 N1: VExtend, N2: DAG.getConstant(Val: 0, DL: SDLoc(Op0), VT: MVT::i32));
8508 if (Chain)
8509 return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: SDLoc(Op0),
8510 VTList: N->getVTList(), N1: Extract0, N2: Chain);
8511 return Extract0;
8512 }
8513 }
8514 }
8515 }
8516 return SDValue();
8517}
8518
8519SDValue SystemZTargetLowering::combineINT_TO_FP(
8520 SDNode *N, DAGCombinerInfo &DCI) const {
8521 if (DCI.Level != BeforeLegalizeTypes)
8522 return SDValue();
8523 SelectionDAG &DAG = DCI.DAG;
8524 LLVMContext &Ctx = *DAG.getContext();
8525 unsigned Opcode = N->getOpcode();
8526 EVT OutVT = N->getValueType(ResNo: 0);
8527 Type *OutLLVMTy = OutVT.getTypeForEVT(Context&: Ctx);
8528 SDValue Op = N->getOperand(Num: 0);
8529 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
8530 unsigned InScalarBits = Op->getValueType(ResNo: 0).getScalarSizeInBits();
8531
8532 // Insert an extension before type-legalization to avoid scalarization, e.g.:
8533 // v2f64 = uint_to_fp v2i16
8534 // =>
8535 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
8536 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
8537 OutScalarBits <= 64) {
8538 unsigned NumElts = cast<FixedVectorType>(Val: OutLLVMTy)->getNumElements();
8539 EVT ExtVT = EVT::getVectorVT(
8540 Context&: Ctx, VT: EVT::getIntegerVT(Context&: Ctx, BitWidth: OutLLVMTy->getScalarSizeInBits()), NumElements: NumElts);
8541 unsigned ExtOpcode =
8542 (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND);
8543 SDValue ExtOp = DAG.getNode(Opcode: ExtOpcode, DL: SDLoc(N), VT: ExtVT, Operand: Op);
8544 return DAG.getNode(Opcode, DL: SDLoc(N), VT: OutVT, Operand: ExtOp);
8545 }
8546 return SDValue();
8547}
8548
8549SDValue SystemZTargetLowering::combineFCOPYSIGN(
8550 SDNode *N, DAGCombinerInfo &DCI) const {
8551 SelectionDAG &DAG = DCI.DAG;
8552 EVT VT = N->getValueType(ResNo: 0);
8553 SDValue ValOp = N->getOperand(Num: 0);
8554 SDValue SignOp = N->getOperand(Num: 1);
8555
8556 // Remove the rounding which is not needed.
8557 if (SignOp.getOpcode() == ISD::FP_ROUND) {
8558 SDValue WideOp = SignOp.getOperand(i: 0);
8559 return DAG.getNode(Opcode: ISD::FCOPYSIGN, DL: SDLoc(N), VT, N1: ValOp, N2: WideOp);
8560 }
8561
8562 return SDValue();
8563}
8564
8565SDValue SystemZTargetLowering::combineBSWAP(
8566 SDNode *N, DAGCombinerInfo &DCI) const {
8567 SelectionDAG &DAG = DCI.DAG;
8568 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
8569 if (ISD::isNON_EXTLoad(N: N->getOperand(Num: 0).getNode()) &&
8570 N->getOperand(Num: 0).hasOneUse() &&
8571 canLoadStoreByteSwapped(VT: N->getValueType(ResNo: 0))) {
8572 SDValue Load = N->getOperand(Num: 0);
8573 LoadSDNode *LD = cast<LoadSDNode>(Val&: Load);
8574
8575 // Create the byte-swapping load.
8576 SDValue Ops[] = {
8577 LD->getChain(), // Chain
8578 LD->getBasePtr() // Ptr
8579 };
8580 EVT LoadVT = N->getValueType(ResNo: 0);
8581 if (LoadVT == MVT::i16)
8582 LoadVT = MVT::i32;
8583 SDValue BSLoad =
8584 DAG.getMemIntrinsicNode(Opcode: SystemZISD::LRV, dl: SDLoc(N),
8585 VTList: DAG.getVTList(VT1: LoadVT, VT2: MVT::Other),
8586 Ops, MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
8587
8588 // If this is an i16 load, insert the truncate.
8589 SDValue ResVal = BSLoad;
8590 if (N->getValueType(ResNo: 0) == MVT::i16)
8591 ResVal = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc(N), VT: MVT::i16, Operand: BSLoad);
8592
8593 // First, combine the bswap away. This makes the value produced by the
8594 // load dead.
8595 DCI.CombineTo(N, Res: ResVal);
8596
8597 // Next, combine the load away, we give it a bogus result value but a real
8598 // chain result. The result value is dead because the bswap is dead.
8599 DCI.CombineTo(N: Load.getNode(), Res0: ResVal, Res1: BSLoad.getValue(R: 1));
8600
8601 // Return N so it doesn't get rechecked!
8602 return SDValue(N, 0);
8603 }
8604
8605 // Look through bitcasts that retain the number of vector elements.
8606 SDValue Op = N->getOperand(Num: 0);
8607 if (Op.getOpcode() == ISD::BITCAST &&
8608 Op.getValueType().isVector() &&
8609 Op.getOperand(i: 0).getValueType().isVector() &&
8610 Op.getValueType().getVectorNumElements() ==
8611 Op.getOperand(i: 0).getValueType().getVectorNumElements())
8612 Op = Op.getOperand(i: 0);
8613
8614 // Push BSWAP into a vector insertion if at least one side then simplifies.
8615 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
8616 SDValue Vec = Op.getOperand(i: 0);
8617 SDValue Elt = Op.getOperand(i: 1);
8618 SDValue Idx = Op.getOperand(i: 2);
8619
8620 if (DAG.isConstantIntBuildVectorOrConstantInt(N: Vec) ||
8621 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
8622 DAG.isConstantIntBuildVectorOrConstantInt(N: Elt) ||
8623 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
8624 (canLoadStoreByteSwapped(VT: N->getValueType(ResNo: 0)) &&
8625 ISD::isNON_EXTLoad(N: Elt.getNode()) && Elt.hasOneUse())) {
8626 EVT VecVT = N->getValueType(ResNo: 0);
8627 EVT EltVT = N->getValueType(ResNo: 0).getVectorElementType();
8628 if (VecVT != Vec.getValueType()) {
8629 Vec = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(N), VT: VecVT, Operand: Vec);
8630 DCI.AddToWorklist(N: Vec.getNode());
8631 }
8632 if (EltVT != Elt.getValueType()) {
8633 Elt = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(N), VT: EltVT, Operand: Elt);
8634 DCI.AddToWorklist(N: Elt.getNode());
8635 }
8636 Vec = DAG.getNode(Opcode: ISD::BSWAP, DL: SDLoc(N), VT: VecVT, Operand: Vec);
8637 DCI.AddToWorklist(N: Vec.getNode());
8638 Elt = DAG.getNode(Opcode: ISD::BSWAP, DL: SDLoc(N), VT: EltVT, Operand: Elt);
8639 DCI.AddToWorklist(N: Elt.getNode());
8640 return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: VecVT,
8641 N1: Vec, N2: Elt, N3: Idx);
8642 }
8643 }
8644
8645 // Push BSWAP into a vector shuffle if at least one side then simplifies.
8646 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Val&: Op);
8647 if (SV && Op.hasOneUse()) {
8648 SDValue Op0 = Op.getOperand(i: 0);
8649 SDValue Op1 = Op.getOperand(i: 1);
8650
8651 if (DAG.isConstantIntBuildVectorOrConstantInt(N: Op0) ||
8652 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
8653 DAG.isConstantIntBuildVectorOrConstantInt(N: Op1) ||
8654 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
8655 EVT VecVT = N->getValueType(ResNo: 0);
8656 if (VecVT != Op0.getValueType()) {
8657 Op0 = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(N), VT: VecVT, Operand: Op0);
8658 DCI.AddToWorklist(N: Op0.getNode());
8659 }
8660 if (VecVT != Op1.getValueType()) {
8661 Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(N), VT: VecVT, Operand: Op1);
8662 DCI.AddToWorklist(N: Op1.getNode());
8663 }
8664 Op0 = DAG.getNode(Opcode: ISD::BSWAP, DL: SDLoc(N), VT: VecVT, Operand: Op0);
8665 DCI.AddToWorklist(N: Op0.getNode());
8666 Op1 = DAG.getNode(Opcode: ISD::BSWAP, DL: SDLoc(N), VT: VecVT, Operand: Op1);
8667 DCI.AddToWorklist(N: Op1.getNode());
8668 return DAG.getVectorShuffle(VT: VecVT, dl: SDLoc(N), N1: Op0, N2: Op1, Mask: SV->getMask());
8669 }
8670 }
8671
8672 return SDValue();
8673}
8674
8675SDValue SystemZTargetLowering::combineSETCC(
8676 SDNode *N, DAGCombinerInfo &DCI) const {
8677 SelectionDAG &DAG = DCI.DAG;
8678 const ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
8679 const SDValue LHS = N->getOperand(Num: 0);
8680 const SDValue RHS = N->getOperand(Num: 1);
8681 bool CmpNull = isNullConstant(V: RHS);
8682 bool CmpAllOnes = isAllOnesConstant(V: RHS);
8683 EVT VT = N->getValueType(ResNo: 0);
8684 SDLoc DL(N);
8685
8686 // Match icmp_eq/ne(bitcast(icmp(X,Y)),0/-1) reduction patterns, and
8687 // change the outer compare to a i128 compare. This will normally
8688 // allow the reduction to be recognized in adjustICmp128, and even if
8689 // not, the i128 compare will still generate better code.
8690 if ((CC == ISD::SETNE || CC == ISD::SETEQ) && (CmpNull || CmpAllOnes)) {
8691 SDValue Src = peekThroughBitcasts(V: LHS);
8692 if (Src.getOpcode() == ISD::SETCC &&
8693 Src.getValueType().isFixedLengthVector() &&
8694 Src.getValueType().getScalarType() == MVT::i1) {
8695 EVT CmpVT = Src.getOperand(i: 0).getValueType();
8696 if (CmpVT.getSizeInBits() == 128) {
8697 EVT IntVT = CmpVT.changeVectorElementTypeToInteger();
8698 SDValue LHS =
8699 DAG.getBitcast(VT: MVT::i128, V: DAG.getSExtOrTrunc(Op: Src, DL, VT: IntVT));
8700 SDValue RHS = CmpNull ? DAG.getConstant(Val: 0, DL, VT: MVT::i128)
8701 : DAG.getAllOnesConstant(DL, VT: MVT::i128);
8702 return DAG.getNode(Opcode: ISD::SETCC, DL, VT, N1: LHS, N2: RHS, N3: N->getOperand(Num: 2),
8703 Flags: N->getFlags());
8704 }
8705 }
8706 }
8707
8708 return SDValue();
8709}
8710
8711static std::pair<SDValue, int> findCCUse(const SDValue &Val) {
8712 switch (Val.getOpcode()) {
8713 default:
8714 return std::make_pair(x: SDValue(), y: SystemZ::CCMASK_NONE);
8715 case SystemZISD::IPM:
8716 if (Val.getOperand(i: 0).getOpcode() == SystemZISD::CLC ||
8717 Val.getOperand(i: 0).getOpcode() == SystemZISD::STRCMP)
8718 return std::make_pair(x: Val.getOperand(i: 0), y: SystemZ::CCMASK_ICMP);
8719 return std::make_pair(x: Val.getOperand(i: 0), y: SystemZ::CCMASK_ANY);
8720 case SystemZISD::SELECT_CCMASK: {
8721 SDValue Op4CCReg = Val.getOperand(i: 4);
8722 if (Op4CCReg.getOpcode() == SystemZISD::ICMP ||
8723 Op4CCReg.getOpcode() == SystemZISD::TM) {
8724 auto [OpCC, OpCCValid] = findCCUse(Val: Op4CCReg.getOperand(i: 0));
8725 if (OpCC != SDValue())
8726 return std::make_pair(x&: OpCC, y&: OpCCValid);
8727 }
8728 auto *CCValid = dyn_cast<ConstantSDNode>(Val: Val.getOperand(i: 2));
8729 if (!CCValid)
8730 return std::make_pair(x: SDValue(), y: SystemZ::CCMASK_NONE);
8731 int CCValidVal = CCValid->getZExtValue();
8732 return std::make_pair(x&: Op4CCReg, y&: CCValidVal);
8733 }
8734 case ISD::ADD:
8735 case ISD::AND:
8736 case ISD::OR:
8737 case ISD::XOR:
8738 case ISD::SHL:
8739 case ISD::SRA:
8740 case ISD::SRL:
8741 auto [Op0CC, Op0CCValid] = findCCUse(Val: Val.getOperand(i: 0));
8742 if (Op0CC != SDValue())
8743 return std::make_pair(x&: Op0CC, y&: Op0CCValid);
8744 return findCCUse(Val: Val.getOperand(i: 1));
8745 }
8746}
8747
8748static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
8749 SelectionDAG &DAG);
8750
8751SmallVector<SDValue, 4> static simplifyAssumingCCVal(SDValue &Val, SDValue &CC,
8752 SelectionDAG &DAG) {
8753 SDLoc DL(Val);
8754 auto Opcode = Val.getOpcode();
8755 switch (Opcode) {
8756 default:
8757 return {};
8758 case ISD::Constant:
8759 return {Val, Val, Val, Val};
8760 case SystemZISD::IPM: {
8761 SDValue IPMOp0 = Val.getOperand(i: 0);
8762 if (IPMOp0 != CC)
8763 return {};
8764 SmallVector<SDValue, 4> ShiftedCCVals;
8765 for (auto CC : {0, 1, 2, 3})
8766 ShiftedCCVals.emplace_back(
8767 Args: DAG.getConstant(Val: (CC << SystemZ::IPM_CC), DL, VT: MVT::i32));
8768 return ShiftedCCVals;
8769 }
8770 case SystemZISD::SELECT_CCMASK: {
8771 SDValue TrueVal = Val.getOperand(i: 0), FalseVal = Val.getOperand(i: 1);
8772 auto *CCValid = dyn_cast<ConstantSDNode>(Val: Val.getOperand(i: 2));
8773 auto *CCMask = dyn_cast<ConstantSDNode>(Val: Val.getOperand(i: 3));
8774 if (!CCValid || !CCMask)
8775 return {};
8776
8777 int CCValidVal = CCValid->getZExtValue();
8778 int CCMaskVal = CCMask->getZExtValue();
8779 // Pruning search tree early - Moving CC test and combineCCMask ahead of
8780 // recursive call to simplifyAssumingCCVal.
8781 SDValue Op4CCReg = Val.getOperand(i: 4);
8782 if (Op4CCReg != CC)
8783 combineCCMask(CCReg&: Op4CCReg, CCValid&: CCValidVal, CCMask&: CCMaskVal, DAG);
8784 if (Op4CCReg != CC)
8785 return {};
8786 const auto &&TrueSDVals = simplifyAssumingCCVal(Val&: TrueVal, CC, DAG);
8787 const auto &&FalseSDVals = simplifyAssumingCCVal(Val&: FalseVal, CC, DAG);
8788 if (TrueSDVals.empty() || FalseSDVals.empty())
8789 return {};
8790 SmallVector<SDValue, 4> MergedSDVals;
8791 for (auto &CCVal : {0, 1, 2, 3})
8792 MergedSDVals.emplace_back(Args: ((CCMaskVal & (1 << (3 - CCVal))) != 0)
8793 ? TrueSDVals[CCVal]
8794 : FalseSDVals[CCVal]);
8795 return MergedSDVals;
8796 }
8797 case ISD::ADD:
8798 case ISD::AND:
8799 case ISD::OR:
8800 case ISD::XOR:
8801 case ISD::SRA:
8802 // Avoid introducing CC spills (because ADD/AND/OR/XOR/SRA
8803 // would clobber CC).
8804 if (!Val.hasOneUse())
8805 return {};
8806 [[fallthrough]];
8807 case ISD::SHL:
8808 case ISD::SRL:
8809 SDValue Op0 = Val.getOperand(i: 0), Op1 = Val.getOperand(i: 1);
8810 const auto &&Op0SDVals = simplifyAssumingCCVal(Val&: Op0, CC, DAG);
8811 const auto &&Op1SDVals = simplifyAssumingCCVal(Val&: Op1, CC, DAG);
8812 if (Op0SDVals.empty() || Op1SDVals.empty())
8813 return {};
8814 SmallVector<SDValue, 4> BinaryOpSDVals;
8815 for (auto CCVal : {0, 1, 2, 3})
8816 BinaryOpSDVals.emplace_back(Args: DAG.getNode(
8817 Opcode, DL, VT: Val.getValueType(), N1: Op0SDVals[CCVal], N2: Op1SDVals[CCVal]));
8818 return BinaryOpSDVals;
8819 }
8820}
8821
8822static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
8823 SelectionDAG &DAG) {
8824 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
8825 // set by the CCReg instruction using the CCValid / CCMask masks,
8826 // If the CCReg instruction is itself a ICMP / TM testing the condition
8827 // code set by some other instruction, see whether we can directly
8828 // use that condition code.
8829 auto *CCNode = CCReg.getNode();
8830 if (!CCNode)
8831 return false;
8832
8833 if (CCNode->getOpcode() == SystemZISD::TM) {
8834 if (CCValid != SystemZ::CCMASK_TM)
8835 return false;
8836 auto emulateTMCCMask = [](const SDValue &Op0Val, const SDValue &Op1Val) {
8837 auto *Op0Node = dyn_cast<ConstantSDNode>(Val: Op0Val.getNode());
8838 auto *Op1Node = dyn_cast<ConstantSDNode>(Val: Op1Val.getNode());
8839 if (!Op0Node || !Op1Node)
8840 return -1;
8841 auto Op0APVal = Op0Node->getAPIntValue();
8842 auto Op1APVal = Op1Node->getAPIntValue();
8843 auto Result = Op0APVal & Op1APVal;
8844 bool AllOnes = Result == Op1APVal;
8845 bool AllZeros = Result == 0;
8846 bool IsLeftMostBitSet = Result[Op1APVal.getActiveBits()] != 0;
8847 return AllZeros ? 0 : AllOnes ? 3 : IsLeftMostBitSet ? 2 : 1;
8848 };
8849 SDValue Op0 = CCNode->getOperand(Num: 0);
8850 SDValue Op1 = CCNode->getOperand(Num: 1);
8851 auto [Op0CC, Op0CCValid] = findCCUse(Val: Op0);
8852 if (Op0CC == SDValue())
8853 return false;
8854 const auto &&Op0SDVals = simplifyAssumingCCVal(Val&: Op0, CC&: Op0CC, DAG);
8855 const auto &&Op1SDVals = simplifyAssumingCCVal(Val&: Op1, CC&: Op0CC, DAG);
8856 if (Op0SDVals.empty() || Op1SDVals.empty())
8857 return false;
8858 int NewCCMask = 0;
8859 for (auto CC : {0, 1, 2, 3}) {
8860 auto CCVal = emulateTMCCMask(Op0SDVals[CC], Op1SDVals[CC]);
8861 if (CCVal < 0)
8862 return false;
8863 NewCCMask <<= 1;
8864 NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
8865 }
8866 NewCCMask &= Op0CCValid;
8867 CCReg = Op0CC;
8868 CCMask = NewCCMask;
8869 CCValid = Op0CCValid;
8870 return true;
8871 }
8872 if (CCNode->getOpcode() != SystemZISD::ICMP ||
8873 CCValid != SystemZ::CCMASK_ICMP)
8874 return false;
8875
8876 SDValue CmpOp0 = CCNode->getOperand(Num: 0);
8877 SDValue CmpOp1 = CCNode->getOperand(Num: 1);
8878 SDValue CmpOp2 = CCNode->getOperand(Num: 2);
8879 auto [Op0CC, Op0CCValid] = findCCUse(Val: CmpOp0);
8880 if (Op0CC != SDValue()) {
8881 const auto &&Op0SDVals = simplifyAssumingCCVal(Val&: CmpOp0, CC&: Op0CC, DAG);
8882 const auto &&Op1SDVals = simplifyAssumingCCVal(Val&: CmpOp1, CC&: Op0CC, DAG);
8883 if (Op0SDVals.empty() || Op1SDVals.empty())
8884 return false;
8885
8886 auto *CmpType = dyn_cast<ConstantSDNode>(Val&: CmpOp2);
8887 auto CmpTypeVal = CmpType->getZExtValue();
8888 const auto compareCCSigned = [&CmpTypeVal](const SDValue &Op0Val,
8889 const SDValue &Op1Val) {
8890 auto *Op0Node = dyn_cast<ConstantSDNode>(Val: Op0Val.getNode());
8891 auto *Op1Node = dyn_cast<ConstantSDNode>(Val: Op1Val.getNode());
8892 if (!Op0Node || !Op1Node)
8893 return -1;
8894 auto Op0APVal = Op0Node->getAPIntValue();
8895 auto Op1APVal = Op1Node->getAPIntValue();
8896 if (CmpTypeVal == SystemZICMP::SignedOnly)
8897 return Op0APVal == Op1APVal ? 0 : Op0APVal.slt(RHS: Op1APVal) ? 1 : 2;
8898 return Op0APVal == Op1APVal ? 0 : Op0APVal.ult(RHS: Op1APVal) ? 1 : 2;
8899 };
8900 int NewCCMask = 0;
8901 for (auto CC : {0, 1, 2, 3}) {
8902 auto CCVal = compareCCSigned(Op0SDVals[CC], Op1SDVals[CC]);
8903 if (CCVal < 0)
8904 return false;
8905 NewCCMask <<= 1;
8906 NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
8907 }
8908 NewCCMask &= Op0CCValid;
8909 CCMask = NewCCMask;
8910 CCReg = Op0CC;
8911 CCValid = Op0CCValid;
8912 return true;
8913 }
8914
8915 return false;
8916}
8917
8918// Merging versus split in multiple branches cost.
8919TargetLoweringBase::CondMergingParams
8920SystemZTargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
8921 const Value *Lhs,
8922 const Value *Rhs) const {
8923 const auto isFlagOutOpCC = [](const Value *V) {
8924 using namespace llvm::PatternMatch;
8925 const Value *RHSVal;
8926 const APInt *RHSC;
8927 if (const auto *I = dyn_cast<Instruction>(Val: V)) {
8928 // PatternMatch.h provides concise tree-based pattern match of llvm IR.
8929 if (match(V: I->getOperand(i: 0), P: m_And(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC))) ||
8930 match(V: I, P: m_Cmp(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
8931 if (const auto *CB = dyn_cast<CallBase>(Val: RHSVal)) {
8932 if (CB->isInlineAsm()) {
8933 const InlineAsm *IA = cast<InlineAsm>(Val: CB->getCalledOperand());
8934 return IA && IA->getConstraintString().contains(Other: "{@cc}");
8935 }
8936 }
8937 }
8938 }
8939 return false;
8940 };
8941 // Pattern (ICmp %asm) or (ICmp (And %asm)).
8942 // Cost of longest dependency chain (ICmp, And) is 2. CostThreshold or
8943 // BaseCost can be set >=2. If cost of instruction <= CostThreshold
8944 // conditionals will be merged or else conditionals will be split.
8945 if (isFlagOutOpCC(Lhs) && isFlagOutOpCC(Rhs))
8946 return {.BaseCost: 3, .LikelyBias: 0, .UnlikelyBias: -1};
8947 // Default.
8948 return {.BaseCost: -1, .LikelyBias: -1, .UnlikelyBias: -1};
8949}
8950
8951SDValue SystemZTargetLowering::combineBR_CCMASK(SDNode *N,
8952 DAGCombinerInfo &DCI) const {
8953 SelectionDAG &DAG = DCI.DAG;
8954
8955 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
8956 auto *CCValid = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1));
8957 auto *CCMask = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 2));
8958 if (!CCValid || !CCMask)
8959 return SDValue();
8960
8961 int CCValidVal = CCValid->getZExtValue();
8962 int CCMaskVal = CCMask->getZExtValue();
8963 SDValue Chain = N->getOperand(Num: 0);
8964 SDValue CCReg = N->getOperand(Num: 4);
8965 // If combineCMask was able to merge or simplify ccvalid or ccmask, re-emit
8966 // the modified BR_CCMASK with the new values.
8967 // In order to avoid conditional branches with full or empty cc masks, do not
8968 // do this if ccmask is 0 or equal to ccvalid.
8969 if (combineCCMask(CCReg, CCValid&: CCValidVal, CCMask&: CCMaskVal, DAG) && CCMaskVal != 0 &&
8970 CCMaskVal != CCValidVal)
8971 return DAG.getNode(Opcode: SystemZISD::BR_CCMASK, DL: SDLoc(N), VT: N->getValueType(ResNo: 0),
8972 N1: Chain,
8973 N2: DAG.getTargetConstant(Val: CCValidVal, DL: SDLoc(N), VT: MVT::i32),
8974 N3: DAG.getTargetConstant(Val: CCMaskVal, DL: SDLoc(N), VT: MVT::i32),
8975 N4: N->getOperand(Num: 3), N5: CCReg);
8976 return SDValue();
8977}
8978
8979SDValue SystemZTargetLowering::combineSELECT_CCMASK(
8980 SDNode *N, DAGCombinerInfo &DCI) const {
8981 SelectionDAG &DAG = DCI.DAG;
8982
8983 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
8984 auto *CCValid = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 2));
8985 auto *CCMask = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 3));
8986 if (!CCValid || !CCMask)
8987 return SDValue();
8988
8989 int CCValidVal = CCValid->getZExtValue();
8990 int CCMaskVal = CCMask->getZExtValue();
8991 SDValue CCReg = N->getOperand(Num: 4);
8992
8993 bool IsCombinedCCReg = combineCCMask(CCReg, CCValid&: CCValidVal, CCMask&: CCMaskVal, DAG);
8994
8995 // Populate SDVals vector for each condition code ccval for given Val, which
8996 // can again be another nested select_ccmask with the same CC.
8997 const auto constructCCSDValsFromSELECT = [&CCReg](SDValue &Val) {
8998 if (Val.getOpcode() == SystemZISD::SELECT_CCMASK) {
8999 SmallVector<SDValue, 4> Res;
9000 if (Val.getOperand(i: 4) != CCReg)
9001 return SmallVector<SDValue, 4>{};
9002 SDValue TrueVal = Val.getOperand(i: 0), FalseVal = Val.getOperand(i: 1);
9003 auto *CCMask = dyn_cast<ConstantSDNode>(Val: Val.getOperand(i: 3));
9004 if (!CCMask)
9005 return SmallVector<SDValue, 4>{};
9006
9007 int CCMaskVal = CCMask->getZExtValue();
9008 for (auto &CC : {0, 1, 2, 3})
9009 Res.emplace_back(Args&: ((CCMaskVal & (1 << (3 - CC))) != 0) ? TrueVal
9010 : FalseVal);
9011 return Res;
9012 }
9013 return SmallVector<SDValue, 4>{Val, Val, Val, Val};
9014 };
9015 // Attempting to optimize TrueVal/FalseVal in outermost select_ccmask either
9016 // with CCReg found by combineCCMask or original CCReg.
9017 SDValue TrueVal = N->getOperand(Num: 0);
9018 SDValue FalseVal = N->getOperand(Num: 1);
9019 auto &&TrueSDVals = simplifyAssumingCCVal(Val&: TrueVal, CC&: CCReg, DAG);
9020 auto &&FalseSDVals = simplifyAssumingCCVal(Val&: FalseVal, CC&: CCReg, DAG);
9021 // TrueSDVals/FalseSDVals might be empty in case of non-constant
9022 // TrueVal/FalseVal for select_ccmask, which can not be optimized further.
9023 if (TrueSDVals.empty())
9024 TrueSDVals = constructCCSDValsFromSELECT(TrueVal);
9025 if (FalseSDVals.empty())
9026 FalseSDVals = constructCCSDValsFromSELECT(FalseVal);
9027 if (!TrueSDVals.empty() && !FalseSDVals.empty()) {
9028 SmallSet<SDValue, 4> MergedSDValsSet;
9029 // Ignoring CC values outside CCValiid.
9030 for (auto CC : {0, 1, 2, 3}) {
9031 if ((CCValidVal & ((1 << (3 - CC)))) != 0)
9032 MergedSDValsSet.insert(V: ((CCMaskVal & (1 << (3 - CC))) != 0)
9033 ? TrueSDVals[CC]
9034 : FalseSDVals[CC]);
9035 }
9036 if (MergedSDValsSet.size() == 1)
9037 return *MergedSDValsSet.begin();
9038 if (MergedSDValsSet.size() == 2) {
9039 auto BeginIt = MergedSDValsSet.begin();
9040 SDValue NewTrueVal = *BeginIt, NewFalseVal = *next(x: BeginIt);
9041 if (NewTrueVal == FalseVal || NewFalseVal == TrueVal)
9042 std::swap(a&: NewTrueVal, b&: NewFalseVal);
9043 int NewCCMask = 0;
9044 for (auto CC : {0, 1, 2, 3}) {
9045 NewCCMask <<= 1;
9046 NewCCMask |= ((CCMaskVal & (1 << (3 - CC))) != 0)
9047 ? (TrueSDVals[CC] == NewTrueVal)
9048 : (FalseSDVals[CC] == NewTrueVal);
9049 }
9050 CCMaskVal = NewCCMask;
9051 CCMaskVal &= CCValidVal;
9052 TrueVal = NewTrueVal;
9053 FalseVal = NewFalseVal;
9054 IsCombinedCCReg = true;
9055 }
9056 }
9057 // If the condition is trivially false or trivially true after
9058 // combineCCMask, just collapse this SELECT_CCMASK to the indicated value
9059 // (possibly modified by constructCCSDValsFromSELECT).
9060 if (CCMaskVal == 0)
9061 return FalseVal;
9062 if (CCMaskVal == CCValidVal)
9063 return TrueVal;
9064
9065 if (IsCombinedCCReg)
9066 return DAG.getNode(
9067 Opcode: SystemZISD::SELECT_CCMASK, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), N1: TrueVal,
9068 N2: FalseVal, N3: DAG.getTargetConstant(Val: CCValidVal, DL: SDLoc(N), VT: MVT::i32),
9069 N4: DAG.getTargetConstant(Val: CCMaskVal, DL: SDLoc(N), VT: MVT::i32), N5: CCReg);
9070
9071 return SDValue();
9072}
9073
9074SDValue SystemZTargetLowering::combineGET_CCMASK(
9075 SDNode *N, DAGCombinerInfo &DCI) const {
9076
9077 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
9078 auto *CCValid = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1));
9079 auto *CCMask = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 2));
9080 if (!CCValid || !CCMask)
9081 return SDValue();
9082 int CCValidVal = CCValid->getZExtValue();
9083 int CCMaskVal = CCMask->getZExtValue();
9084
9085 SDValue Select = N->getOperand(Num: 0);
9086 if (Select->getOpcode() == ISD::TRUNCATE)
9087 Select = Select->getOperand(Num: 0);
9088 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
9089 return SDValue();
9090
9091 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Val: Select->getOperand(Num: 2));
9092 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Val: Select->getOperand(Num: 3));
9093 if (!SelectCCValid || !SelectCCMask)
9094 return SDValue();
9095 int SelectCCValidVal = SelectCCValid->getZExtValue();
9096 int SelectCCMaskVal = SelectCCMask->getZExtValue();
9097
9098 auto *TrueVal = dyn_cast<ConstantSDNode>(Val: Select->getOperand(Num: 0));
9099 auto *FalseVal = dyn_cast<ConstantSDNode>(Val: Select->getOperand(Num: 1));
9100 if (!TrueVal || !FalseVal)
9101 return SDValue();
9102 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
9103 ;
9104 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
9105 SelectCCMaskVal ^= SelectCCValidVal;
9106 else
9107 return SDValue();
9108
9109 if (SelectCCValidVal & ~CCValidVal)
9110 return SDValue();
9111 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
9112 return SDValue();
9113
9114 return Select->getOperand(Num: 4);
9115}
9116
9117SDValue SystemZTargetLowering::combineIntDIVREM(
9118 SDNode *N, DAGCombinerInfo &DCI) const {
9119 SelectionDAG &DAG = DCI.DAG;
9120 EVT VT = N->getValueType(ResNo: 0);
9121 // In the case where the divisor is a vector of constants a cheaper
9122 // sequence of instructions can replace the divide. BuildSDIV is called to
9123 // do this during DAG combining, but it only succeeds when it can build a
9124 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
9125 // since it is not Legal but Custom it can only happen before
9126 // legalization. Therefore we must scalarize this early before Combine
9127 // 1. For widened vectors, this is already the result of type legalization.
9128 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
9129 DAG.isConstantIntBuildVectorOrConstantInt(N: N->getOperand(Num: 1)))
9130 return DAG.UnrollVectorOp(N);
9131 return SDValue();
9132}
9133
9134
9135// Transform a right shift of a multiply-and-add into a multiply-and-add-high.
9136// This is closely modeled after the common-code combineShiftToMULH.
9137SDValue SystemZTargetLowering::combineShiftToMulAddHigh(
9138 SDNode *N, DAGCombinerInfo &DCI) const {
9139 SelectionDAG &DAG = DCI.DAG;
9140 SDLoc DL(N);
9141
9142 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
9143 "SRL or SRA node is required here!");
9144
9145 if (!Subtarget.hasVector())
9146 return SDValue();
9147
9148 // Check the shift amount. Proceed with the transformation if the shift
9149 // amount is constant.
9150 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N: N->getOperand(Num: 1));
9151 if (!ShiftAmtSrc)
9152 return SDValue();
9153
9154 // The operation feeding into the shift must be an add.
9155 SDValue ShiftOperand = N->getOperand(Num: 0);
9156 if (ShiftOperand.getOpcode() != ISD::ADD)
9157 return SDValue();
9158
9159 // One operand of the add must be a multiply.
9160 SDValue MulOp = ShiftOperand.getOperand(i: 0);
9161 SDValue AddOp = ShiftOperand.getOperand(i: 1);
9162 if (MulOp.getOpcode() != ISD::MUL) {
9163 if (AddOp.getOpcode() != ISD::MUL)
9164 return SDValue();
9165 std::swap(a&: MulOp, b&: AddOp);
9166 }
9167
9168 // All operands must be equivalent extend nodes.
9169 SDValue LeftOp = MulOp.getOperand(i: 0);
9170 SDValue RightOp = MulOp.getOperand(i: 1);
9171
9172 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
9173 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
9174
9175 if (!IsSignExt && !IsZeroExt)
9176 return SDValue();
9177
9178 EVT NarrowVT = LeftOp.getOperand(i: 0).getValueType();
9179 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
9180
9181 SDValue MulhRightOp;
9182 if (ConstantSDNode *Constant = isConstOrConstSplat(N: RightOp)) {
9183 unsigned ActiveBits = IsSignExt
9184 ? Constant->getAPIntValue().getSignificantBits()
9185 : Constant->getAPIntValue().getActiveBits();
9186 if (ActiveBits > NarrowVTSize)
9187 return SDValue();
9188 MulhRightOp = DAG.getConstant(
9189 Val: Constant->getAPIntValue().trunc(width: NarrowVT.getScalarSizeInBits()), DL,
9190 VT: NarrowVT);
9191 } else {
9192 if (LeftOp.getOpcode() != RightOp.getOpcode())
9193 return SDValue();
9194 // Check that the two extend nodes are the same type.
9195 if (NarrowVT != RightOp.getOperand(i: 0).getValueType())
9196 return SDValue();
9197 MulhRightOp = RightOp.getOperand(i: 0);
9198 }
9199
9200 SDValue MulhAddOp;
9201 if (ConstantSDNode *Constant = isConstOrConstSplat(N: AddOp)) {
9202 unsigned ActiveBits = IsSignExt
9203 ? Constant->getAPIntValue().getSignificantBits()
9204 : Constant->getAPIntValue().getActiveBits();
9205 if (ActiveBits > NarrowVTSize)
9206 return SDValue();
9207 MulhAddOp = DAG.getConstant(
9208 Val: Constant->getAPIntValue().trunc(width: NarrowVT.getScalarSizeInBits()), DL,
9209 VT: NarrowVT);
9210 } else {
9211 if (LeftOp.getOpcode() != AddOp.getOpcode())
9212 return SDValue();
9213 // Check that the two extend nodes are the same type.
9214 if (NarrowVT != AddOp.getOperand(i: 0).getValueType())
9215 return SDValue();
9216 MulhAddOp = AddOp.getOperand(i: 0);
9217 }
9218
9219 EVT WideVT = LeftOp.getValueType();
9220 // Proceed with the transformation if the wide types match.
9221 assert((WideVT == RightOp.getValueType()) &&
9222 "Cannot have a multiply node with two different operand types.");
9223 assert((WideVT == AddOp.getValueType()) &&
9224 "Cannot have an add node with two different operand types.");
9225
9226 // Proceed with the transformation if the wide type is twice as large
9227 // as the narrow type.
9228 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
9229 return SDValue();
9230
9231 // Check the shift amount with the narrow type size.
9232 // Proceed with the transformation if the shift amount is the width
9233 // of the narrow type.
9234 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
9235 if (ShiftAmt != NarrowVTSize)
9236 return SDValue();
9237
9238 // Proceed if we support the multiply-and-add-high operation.
9239 if (!(NarrowVT == MVT::v16i8 || NarrowVT == MVT::v8i16 ||
9240 NarrowVT == MVT::v4i32 ||
9241 (Subtarget.hasVectorEnhancements3() &&
9242 (NarrowVT == MVT::v2i64 || NarrowVT == MVT::i128))))
9243 return SDValue();
9244
9245 // Emit the VMAH (signed) or VMALH (unsigned) operation.
9246 SDValue Result = DAG.getNode(Opcode: IsSignExt ? SystemZISD::VMAH : SystemZISD::VMALH,
9247 DL, VT: NarrowVT, N1: LeftOp.getOperand(i: 0),
9248 N2: MulhRightOp, N3: MulhAddOp);
9249 bool IsSigned = N->getOpcode() == ISD::SRA;
9250 return DAG.getExtOrTrunc(IsSigned, Op: Result, DL, VT: WideVT);
9251}
9252
9253// Op is an operand of a multiplication. Check whether this can be folded
9254// into an even/odd widening operation; if so, return the opcode to be used
9255// and update Op to the appropriate sub-operand. Note that the caller must
9256// verify that *both* operands of the multiplication support the operation.
9257static unsigned detectEvenOddMultiplyOperand(const SelectionDAG &DAG,
9258 const SystemZSubtarget &Subtarget,
9259 SDValue &Op) {
9260 EVT VT = Op.getValueType();
9261
9262 // Check for (sign/zero_extend_vector_inreg (vector_shuffle)) corresponding
9263 // to selecting the even or odd vector elements.
9264 if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
9265 (Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9266 Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG)) {
9267 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
9268 unsigned NumElts = VT.getVectorNumElements();
9269 Op = Op.getOperand(i: 0);
9270 if (Op.getValueType().getVectorNumElements() == 2 * NumElts &&
9271 Op.getOpcode() == ISD::VECTOR_SHUFFLE) {
9272 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: Op.getNode());
9273 ArrayRef<int> ShuffleMask = SVN->getMask();
9274 bool CanUseEven = true, CanUseOdd = true;
9275 for (unsigned Elt = 0; Elt < NumElts; Elt++) {
9276 if (ShuffleMask[Elt] == -1)
9277 continue;
9278 if (unsigned(ShuffleMask[Elt]) != 2 * Elt)
9279 CanUseEven = false;
9280 if (unsigned(ShuffleMask[Elt]) != 2 * Elt + 1)
9281 CanUseOdd = false;
9282 }
9283 Op = Op.getOperand(i: 0);
9284 if (CanUseEven)
9285 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9286 if (CanUseOdd)
9287 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9288 }
9289 }
9290
9291 // For z17, we can also support the v2i64->i128 case, which looks like
9292 // (sign/zero_extend (extract_vector_elt X 0/1))
9293 if (VT == MVT::i128 && Subtarget.hasVectorEnhancements3() &&
9294 (Op.getOpcode() == ISD::SIGN_EXTEND ||
9295 Op.getOpcode() == ISD::ZERO_EXTEND)) {
9296 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND;
9297 Op = Op.getOperand(i: 0);
9298 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
9299 Op.getOperand(i: 0).getValueType() == MVT::v2i64 &&
9300 Op.getOperand(i: 1).getOpcode() == ISD::Constant) {
9301 unsigned Elem = Op.getConstantOperandVal(i: 1);
9302 Op = Op.getOperand(i: 0);
9303 if (Elem == 0)
9304 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9305 if (Elem == 1)
9306 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9307 }
9308 }
9309
9310 return 0;
9311}
9312
9313SDValue SystemZTargetLowering::combineMUL(
9314 SDNode *N, DAGCombinerInfo &DCI) const {
9315 SelectionDAG &DAG = DCI.DAG;
9316
9317 // Detect even/odd widening multiplication.
9318 SDValue Op0 = N->getOperand(Num: 0);
9319 SDValue Op1 = N->getOperand(Num: 1);
9320 unsigned OpcodeCand0 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op&: Op0);
9321 unsigned OpcodeCand1 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op&: Op1);
9322 if (OpcodeCand0 && OpcodeCand0 == OpcodeCand1)
9323 return DAG.getNode(Opcode: OpcodeCand0, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), N1: Op0, N2: Op1);
9324
9325 return SDValue();
9326}
9327
9328SDValue SystemZTargetLowering::combineINTRINSIC(
9329 SDNode *N, DAGCombinerInfo &DCI) const {
9330 SelectionDAG &DAG = DCI.DAG;
9331
9332 unsigned Id = N->getConstantOperandVal(Num: 1);
9333 switch (Id) {
9334 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
9335 // or larger is simply a vector load.
9336 case Intrinsic::s390_vll:
9337 case Intrinsic::s390_vlrl:
9338 if (auto *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 2)))
9339 if (C->getZExtValue() >= 15)
9340 return DAG.getLoad(VT: N->getValueType(ResNo: 0), dl: SDLoc(N), Chain: N->getOperand(Num: 0),
9341 Ptr: N->getOperand(Num: 3), PtrInfo: MachinePointerInfo());
9342 break;
9343 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
9344 case Intrinsic::s390_vstl:
9345 case Intrinsic::s390_vstrl:
9346 if (auto *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 3)))
9347 if (C->getZExtValue() >= 15)
9348 return DAG.getStore(Chain: N->getOperand(Num: 0), dl: SDLoc(N), Val: N->getOperand(Num: 2),
9349 Ptr: N->getOperand(Num: 4), PtrInfo: MachinePointerInfo());
9350 break;
9351 }
9352
9353 return SDValue();
9354}
9355
9356SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
9357 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
9358 return N->getOperand(Num: 0);
9359 return N;
9360}
9361
9362SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
9363 DAGCombinerInfo &DCI) const {
9364 switch(N->getOpcode()) {
9365 default: break;
9366 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
9367 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
9368 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
9369 case SystemZISD::MERGE_HIGH:
9370 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
9371 case ISD::LOAD: return combineLOAD(N, DCI);
9372 case ISD::STORE: return combineSTORE(N, DCI);
9373 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
9374 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
9375 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
9376 case ISD::STRICT_FP_ROUND:
9377 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
9378 case ISD::STRICT_FP_EXTEND:
9379 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
9380 case ISD::SINT_TO_FP:
9381 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
9382 case ISD::FCOPYSIGN: return combineFCOPYSIGN(N, DCI);
9383 case ISD::BSWAP: return combineBSWAP(N, DCI);
9384 case ISD::SETCC: return combineSETCC(N, DCI);
9385 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
9386 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
9387 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
9388 case ISD::SRL:
9389 case ISD::SRA: return combineShiftToMulAddHigh(N, DCI);
9390 case ISD::MUL: return combineMUL(N, DCI);
9391 case ISD::SDIV:
9392 case ISD::UDIV:
9393 case ISD::SREM:
9394 case ISD::UREM: return combineIntDIVREM(N, DCI);
9395 case ISD::INTRINSIC_W_CHAIN:
9396 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
9397 }
9398
9399 return SDValue();
9400}
9401
9402// Return the demanded elements for the OpNo source operand of Op. DemandedElts
9403// are for Op.
9404static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
9405 unsigned OpNo) {
9406 EVT VT = Op.getValueType();
9407 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
9408 APInt SrcDemE;
9409 unsigned Opcode = Op.getOpcode();
9410 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9411 unsigned Id = Op.getConstantOperandVal(i: 0);
9412 switch (Id) {
9413 case Intrinsic::s390_vpksh: // PACKS
9414 case Intrinsic::s390_vpksf:
9415 case Intrinsic::s390_vpksg:
9416 case Intrinsic::s390_vpkshs: // PACKS_CC
9417 case Intrinsic::s390_vpksfs:
9418 case Intrinsic::s390_vpksgs:
9419 case Intrinsic::s390_vpklsh: // PACKLS
9420 case Intrinsic::s390_vpklsf:
9421 case Intrinsic::s390_vpklsg:
9422 case Intrinsic::s390_vpklshs: // PACKLS_CC
9423 case Intrinsic::s390_vpklsfs:
9424 case Intrinsic::s390_vpklsgs:
9425 // VECTOR PACK truncates the elements of two source vectors into one.
9426 SrcDemE = DemandedElts;
9427 if (OpNo == 2)
9428 SrcDemE.lshrInPlace(ShiftAmt: NumElts / 2);
9429 SrcDemE = SrcDemE.trunc(width: NumElts / 2);
9430 break;
9431 // VECTOR UNPACK extends half the elements of the source vector.
9432 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9433 case Intrinsic::s390_vuphh:
9434 case Intrinsic::s390_vuphf:
9435 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9436 case Intrinsic::s390_vuplhh:
9437 case Intrinsic::s390_vuplhf:
9438 SrcDemE = APInt(NumElts * 2, 0);
9439 SrcDemE.insertBits(SubBits: DemandedElts, bitPosition: 0);
9440 break;
9441 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9442 case Intrinsic::s390_vuplhw:
9443 case Intrinsic::s390_vuplf:
9444 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9445 case Intrinsic::s390_vupllh:
9446 case Intrinsic::s390_vupllf:
9447 SrcDemE = APInt(NumElts * 2, 0);
9448 SrcDemE.insertBits(SubBits: DemandedElts, bitPosition: NumElts);
9449 break;
9450 case Intrinsic::s390_vpdi: {
9451 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
9452 SrcDemE = APInt(NumElts, 0);
9453 if (!DemandedElts[OpNo - 1])
9454 break;
9455 unsigned Mask = Op.getConstantOperandVal(i: 3);
9456 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
9457 // Demand input element 0 or 1, given by the mask bit value.
9458 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
9459 break;
9460 }
9461 case Intrinsic::s390_vsldb: {
9462 // VECTOR SHIFT LEFT DOUBLE BY BYTE
9463 assert(VT == MVT::v16i8 && "Unexpected type.");
9464 unsigned FirstIdx = Op.getConstantOperandVal(i: 3);
9465 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
9466 unsigned NumSrc0Els = 16 - FirstIdx;
9467 SrcDemE = APInt(NumElts, 0);
9468 if (OpNo == 1) {
9469 APInt DemEls = DemandedElts.trunc(width: NumSrc0Els);
9470 SrcDemE.insertBits(SubBits: DemEls, bitPosition: FirstIdx);
9471 } else {
9472 APInt DemEls = DemandedElts.lshr(shiftAmt: NumSrc0Els);
9473 SrcDemE.insertBits(SubBits: DemEls, bitPosition: 0);
9474 }
9475 break;
9476 }
9477 case Intrinsic::s390_vperm:
9478 SrcDemE = APInt::getAllOnes(numBits: NumElts);
9479 break;
9480 default:
9481 llvm_unreachable("Unhandled intrinsic.");
9482 break;
9483 }
9484 } else {
9485 switch (Opcode) {
9486 case SystemZISD::JOIN_DWORDS:
9487 // Scalar operand.
9488 SrcDemE = APInt(1, 1);
9489 break;
9490 case SystemZISD::SELECT_CCMASK:
9491 SrcDemE = DemandedElts;
9492 break;
9493 default:
9494 llvm_unreachable("Unhandled opcode.");
9495 break;
9496 }
9497 }
9498 return SrcDemE;
9499}
9500
9501static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
9502 const APInt &DemandedElts,
9503 const SelectionDAG &DAG, unsigned Depth,
9504 unsigned OpNo) {
9505 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9506 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo: OpNo + 1);
9507 KnownBits LHSKnown =
9508 DAG.computeKnownBits(Op: Op.getOperand(i: OpNo), DemandedElts: Src0DemE, Depth: Depth + 1);
9509 KnownBits RHSKnown =
9510 DAG.computeKnownBits(Op: Op.getOperand(i: OpNo + 1), DemandedElts: Src1DemE, Depth: Depth + 1);
9511 Known = LHSKnown.intersectWith(RHS: RHSKnown);
9512}
9513
9514void
9515SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
9516 KnownBits &Known,
9517 const APInt &DemandedElts,
9518 const SelectionDAG &DAG,
9519 unsigned Depth) const {
9520 Known.resetAll();
9521
9522 // Intrinsic CC result is returned in the two low bits.
9523 unsigned Tmp0, Tmp1; // not used
9524 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, Opcode&: Tmp0, CCValid&: Tmp1)) {
9525 Known.Zero.setBitsFrom(2);
9526 return;
9527 }
9528 EVT VT = Op.getValueType();
9529 if (Op.getResNo() != 0 || VT == MVT::Untyped)
9530 return;
9531 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
9532 "KnownBits does not match VT in bitwidth");
9533 assert ((!VT.isVector() ||
9534 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
9535 "DemandedElts does not match VT number of elements");
9536 unsigned BitWidth = Known.getBitWidth();
9537 unsigned Opcode = Op.getOpcode();
9538 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9539 bool IsLogical = false;
9540 unsigned Id = Op.getConstantOperandVal(i: 0);
9541 switch (Id) {
9542 case Intrinsic::s390_vpksh: // PACKS
9543 case Intrinsic::s390_vpksf:
9544 case Intrinsic::s390_vpksg:
9545 case Intrinsic::s390_vpkshs: // PACKS_CC
9546 case Intrinsic::s390_vpksfs:
9547 case Intrinsic::s390_vpksgs:
9548 case Intrinsic::s390_vpklsh: // PACKLS
9549 case Intrinsic::s390_vpklsf:
9550 case Intrinsic::s390_vpklsg:
9551 case Intrinsic::s390_vpklshs: // PACKLS_CC
9552 case Intrinsic::s390_vpklsfs:
9553 case Intrinsic::s390_vpklsgs:
9554 case Intrinsic::s390_vpdi:
9555 case Intrinsic::s390_vsldb:
9556 case Intrinsic::s390_vperm:
9557 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, OpNo: 1);
9558 break;
9559 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9560 case Intrinsic::s390_vuplhh:
9561 case Intrinsic::s390_vuplhf:
9562 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9563 case Intrinsic::s390_vupllh:
9564 case Intrinsic::s390_vupllf:
9565 IsLogical = true;
9566 [[fallthrough]];
9567 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9568 case Intrinsic::s390_vuphh:
9569 case Intrinsic::s390_vuphf:
9570 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9571 case Intrinsic::s390_vuplhw:
9572 case Intrinsic::s390_vuplf: {
9573 SDValue SrcOp = Op.getOperand(i: 1);
9574 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, OpNo: 0);
9575 Known = DAG.computeKnownBits(Op: SrcOp, DemandedElts: SrcDemE, Depth: Depth + 1);
9576 if (IsLogical) {
9577 Known = Known.zext(BitWidth);
9578 } else
9579 Known = Known.sext(BitWidth);
9580 break;
9581 }
9582 default:
9583 break;
9584 }
9585 } else {
9586 switch (Opcode) {
9587 case SystemZISD::JOIN_DWORDS:
9588 case SystemZISD::SELECT_CCMASK:
9589 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, OpNo: 0);
9590 break;
9591 case SystemZISD::REPLICATE: {
9592 SDValue SrcOp = Op.getOperand(i: 0);
9593 Known = DAG.computeKnownBits(Op: SrcOp, Depth: Depth + 1);
9594 if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(Val: SrcOp))
9595 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
9596 break;
9597 }
9598 default:
9599 break;
9600 }
9601 }
9602
9603 // Known has the width of the source operand(s). Adjust if needed to match
9604 // the passed bitwidth.
9605 if (Known.getBitWidth() != BitWidth)
9606 Known = Known.anyextOrTrunc(BitWidth);
9607}
9608
9609static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
9610 const SelectionDAG &DAG, unsigned Depth,
9611 unsigned OpNo) {
9612 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9613 unsigned LHS = DAG.ComputeNumSignBits(Op: Op.getOperand(i: OpNo), DemandedElts: Src0DemE, Depth: Depth + 1);
9614 if (LHS == 1) return 1; // Early out.
9615 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo: OpNo + 1);
9616 unsigned RHS = DAG.ComputeNumSignBits(Op: Op.getOperand(i: OpNo + 1), DemandedElts: Src1DemE, Depth: Depth + 1);
9617 if (RHS == 1) return 1; // Early out.
9618 unsigned Common = std::min(a: LHS, b: RHS);
9619 unsigned SrcBitWidth = Op.getOperand(i: OpNo).getScalarValueSizeInBits();
9620 EVT VT = Op.getValueType();
9621 unsigned VTBits = VT.getScalarSizeInBits();
9622 if (SrcBitWidth > VTBits) { // PACK
9623 unsigned SrcExtraBits = SrcBitWidth - VTBits;
9624 if (Common > SrcExtraBits)
9625 return (Common - SrcExtraBits);
9626 return 1;
9627 }
9628 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
9629 return Common;
9630}
9631
9632unsigned
9633SystemZTargetLowering::ComputeNumSignBitsForTargetNode(
9634 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
9635 unsigned Depth) const {
9636 if (Op.getResNo() != 0)
9637 return 1;
9638 unsigned Opcode = Op.getOpcode();
9639 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9640 unsigned Id = Op.getConstantOperandVal(i: 0);
9641 switch (Id) {
9642 case Intrinsic::s390_vpksh: // PACKS
9643 case Intrinsic::s390_vpksf:
9644 case Intrinsic::s390_vpksg:
9645 case Intrinsic::s390_vpkshs: // PACKS_CC
9646 case Intrinsic::s390_vpksfs:
9647 case Intrinsic::s390_vpksgs:
9648 case Intrinsic::s390_vpklsh: // PACKLS
9649 case Intrinsic::s390_vpklsf:
9650 case Intrinsic::s390_vpklsg:
9651 case Intrinsic::s390_vpklshs: // PACKLS_CC
9652 case Intrinsic::s390_vpklsfs:
9653 case Intrinsic::s390_vpklsgs:
9654 case Intrinsic::s390_vpdi:
9655 case Intrinsic::s390_vsldb:
9656 case Intrinsic::s390_vperm:
9657 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, OpNo: 1);
9658 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9659 case Intrinsic::s390_vuphh:
9660 case Intrinsic::s390_vuphf:
9661 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9662 case Intrinsic::s390_vuplhw:
9663 case Intrinsic::s390_vuplf: {
9664 SDValue PackedOp = Op.getOperand(i: 1);
9665 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, OpNo: 1);
9666 unsigned Tmp = DAG.ComputeNumSignBits(Op: PackedOp, DemandedElts: SrcDemE, Depth: Depth + 1);
9667 EVT VT = Op.getValueType();
9668 unsigned VTBits = VT.getScalarSizeInBits();
9669 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
9670 return Tmp;
9671 }
9672 default:
9673 break;
9674 }
9675 } else {
9676 switch (Opcode) {
9677 case SystemZISD::SELECT_CCMASK:
9678 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, OpNo: 0);
9679 default:
9680 break;
9681 }
9682 }
9683
9684 return 1;
9685}
9686
9687bool SystemZTargetLowering::
9688isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op,
9689 const APInt &DemandedElts, const SelectionDAG &DAG,
9690 bool PoisonOnly, unsigned Depth) const {
9691 switch (Op->getOpcode()) {
9692 case SystemZISD::PCREL_WRAPPER:
9693 case SystemZISD::PCREL_OFFSET:
9694 return true;
9695 }
9696 return false;
9697}
9698
9699unsigned
9700SystemZTargetLowering::getStackProbeSize(const MachineFunction &MF) const {
9701 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9702 unsigned StackAlign = TFI->getStackAlignment();
9703 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
9704 "Unexpected stack alignment");
9705 // The default stack probe size is 4096 if the function has no
9706 // stack-probe-size attribute.
9707 unsigned StackProbeSize =
9708 MF.getFunction().getFnAttributeAsParsedInteger(Kind: "stack-probe-size", Default: 4096);
9709 // Round down to the stack alignment.
9710 StackProbeSize &= ~(StackAlign - 1);
9711 return StackProbeSize ? StackProbeSize : StackAlign;
9712}
9713
9714//===----------------------------------------------------------------------===//
9715// Custom insertion
9716//===----------------------------------------------------------------------===//
9717
9718// Force base value Base into a register before MI. Return the register.
9719static Register forceReg(MachineInstr &MI, MachineOperand &Base,
9720 const SystemZInstrInfo *TII) {
9721 MachineBasicBlock *MBB = MI.getParent();
9722 MachineFunction &MF = *MBB->getParent();
9723 MachineRegisterInfo &MRI = MF.getRegInfo();
9724
9725 if (Base.isReg()) {
9726 // Copy Base into a new virtual register to help register coalescing in
9727 // cases with multiple uses.
9728 Register Reg = MRI.createVirtualRegister(RegClass: &SystemZ::ADDR64BitRegClass);
9729 BuildMI(BB&: *MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: SystemZ::COPY), DestReg: Reg)
9730 .add(MO: Base);
9731 return Reg;
9732 }
9733
9734 Register Reg = MRI.createVirtualRegister(RegClass: &SystemZ::ADDR64BitRegClass);
9735 BuildMI(BB&: *MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: SystemZ::LA), DestReg: Reg)
9736 .add(MO: Base)
9737 .addImm(Val: 0)
9738 .addReg(RegNo: 0);
9739 return Reg;
9740}
9741
9742// The CC operand of MI might be missing a kill marker because there
9743// were multiple uses of CC, and ISel didn't know which to mark.
9744// Figure out whether MI should have had a kill marker.
9745static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB) {
9746 // Scan forward through BB for a use/def of CC.
9747 MachineBasicBlock::iterator miI(std::next(x: MachineBasicBlock::iterator(MI)));
9748 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
9749 const MachineInstr &MI = *miI;
9750 if (MI.readsRegister(Reg: SystemZ::CC, /*TRI=*/nullptr))
9751 return false;
9752 if (MI.definesRegister(Reg: SystemZ::CC, /*TRI=*/nullptr))
9753 break; // Should have kill-flag - update below.
9754 }
9755
9756 // If we hit the end of the block, check whether CC is live into a
9757 // successor.
9758 if (miI == MBB->end()) {
9759 for (const MachineBasicBlock *Succ : MBB->successors())
9760 if (Succ->isLiveIn(Reg: SystemZ::CC))
9761 return false;
9762 }
9763
9764 return true;
9765}
9766
9767// Return true if it is OK for this Select pseudo-opcode to be cascaded
9768// together with other Select pseudo-opcodes into a single basic-block with
9769// a conditional jump around it.
9770static bool isSelectPseudo(MachineInstr &MI) {
9771 switch (MI.getOpcode()) {
9772 case SystemZ::Select32:
9773 case SystemZ::Select64:
9774 case SystemZ::Select128:
9775 case SystemZ::SelectF32:
9776 case SystemZ::SelectF64:
9777 case SystemZ::SelectF128:
9778 case SystemZ::SelectVR32:
9779 case SystemZ::SelectVR64:
9780 case SystemZ::SelectVR128:
9781 return true;
9782
9783 default:
9784 return false;
9785 }
9786}
9787
9788// Helper function, which inserts PHI functions into SinkMBB:
9789// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
9790// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
9791static void createPHIsForSelects(SmallVector<MachineInstr*, 8> &Selects,
9792 MachineBasicBlock *TrueMBB,
9793 MachineBasicBlock *FalseMBB,
9794 MachineBasicBlock *SinkMBB) {
9795 MachineFunction *MF = TrueMBB->getParent();
9796 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
9797
9798 MachineInstr *FirstMI = Selects.front();
9799 unsigned CCValid = FirstMI->getOperand(i: 3).getImm();
9800 unsigned CCMask = FirstMI->getOperand(i: 4).getImm();
9801
9802 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
9803
9804 // As we are creating the PHIs, we have to be careful if there is more than
9805 // one. Later Selects may reference the results of earlier Selects, but later
9806 // PHIs have to reference the individual true/false inputs from earlier PHIs.
9807 // That also means that PHI construction must work forward from earlier to
9808 // later, and that the code must maintain a mapping from earlier PHI's
9809 // destination registers, and the registers that went into the PHI.
9810 DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable;
9811
9812 for (auto *MI : Selects) {
9813 Register DestReg = MI->getOperand(i: 0).getReg();
9814 Register TrueReg = MI->getOperand(i: 1).getReg();
9815 Register FalseReg = MI->getOperand(i: 2).getReg();
9816
9817 // If this Select we are generating is the opposite condition from
9818 // the jump we generated, then we have to swap the operands for the
9819 // PHI that is going to be generated.
9820 if (MI->getOperand(i: 4).getImm() == (CCValid ^ CCMask))
9821 std::swap(a&: TrueReg, b&: FalseReg);
9822
9823 if (auto It = RegRewriteTable.find(Val: TrueReg); It != RegRewriteTable.end())
9824 TrueReg = It->second.first;
9825
9826 if (auto It = RegRewriteTable.find(Val: FalseReg); It != RegRewriteTable.end())
9827 FalseReg = It->second.second;
9828
9829 DebugLoc DL = MI->getDebugLoc();
9830 BuildMI(BB&: *SinkMBB, I: SinkInsertionPoint, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg)
9831 .addReg(RegNo: TrueReg).addMBB(MBB: TrueMBB)
9832 .addReg(RegNo: FalseReg).addMBB(MBB: FalseMBB);
9833
9834 // Add this PHI to the rewrite table.
9835 RegRewriteTable[DestReg] = std::make_pair(x&: TrueReg, y&: FalseReg);
9836 }
9837
9838 MF->getProperties().resetNoPHIs();
9839}
9840
9841MachineBasicBlock *
9842SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
9843 MachineBasicBlock *BB) const {
9844 MachineFunction &MF = *BB->getParent();
9845 MachineFrameInfo &MFI = MF.getFrameInfo();
9846 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
9847 assert(TFL->hasReservedCallFrame(MF) &&
9848 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
9849 (void)TFL;
9850 // Get the MaxCallFrameSize value and erase MI since it serves no further
9851 // purpose as the call frame is statically reserved in the prolog. Set
9852 // AdjustsStack as MI is *not* mapped as a frame instruction.
9853 uint32_t NumBytes = MI.getOperand(i: 0).getImm();
9854 if (NumBytes > MFI.getMaxCallFrameSize())
9855 MFI.setMaxCallFrameSize(NumBytes);
9856 MFI.setAdjustsStack(true);
9857
9858 MI.eraseFromParent();
9859 return BB;
9860}
9861
9862// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
9863MachineBasicBlock *
9864SystemZTargetLowering::emitSelect(MachineInstr &MI,
9865 MachineBasicBlock *MBB) const {
9866 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
9867 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9868
9869 unsigned CCValid = MI.getOperand(i: 3).getImm();
9870 unsigned CCMask = MI.getOperand(i: 4).getImm();
9871
9872 // If we have a sequence of Select* pseudo instructions using the
9873 // same condition code value, we want to expand all of them into
9874 // a single pair of basic blocks using the same condition.
9875 SmallVector<MachineInstr*, 8> Selects;
9876 SmallVector<MachineInstr*, 8> DbgValues;
9877 Selects.push_back(Elt: &MI);
9878 unsigned Count = 0;
9879 for (MachineInstr &NextMI : llvm::make_range(
9880 x: std::next(x: MachineBasicBlock::iterator(MI)), y: MBB->end())) {
9881 if (isSelectPseudo(MI&: NextMI)) {
9882 assert(NextMI.getOperand(3).getImm() == CCValid &&
9883 "Bad CCValid operands since CC was not redefined.");
9884 if (NextMI.getOperand(i: 4).getImm() == CCMask ||
9885 NextMI.getOperand(i: 4).getImm() == (CCValid ^ CCMask)) {
9886 Selects.push_back(Elt: &NextMI);
9887 continue;
9888 }
9889 break;
9890 }
9891 if (NextMI.definesRegister(Reg: SystemZ::CC, /*TRI=*/nullptr) ||
9892 NextMI.usesCustomInsertionHook())
9893 break;
9894 bool User = false;
9895 for (auto *SelMI : Selects)
9896 if (NextMI.readsVirtualRegister(Reg: SelMI->getOperand(i: 0).getReg())) {
9897 User = true;
9898 break;
9899 }
9900 if (NextMI.isDebugInstr()) {
9901 if (User) {
9902 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
9903 DbgValues.push_back(Elt: &NextMI);
9904 }
9905 } else if (User || ++Count > 20)
9906 break;
9907 }
9908
9909 MachineInstr *LastMI = Selects.back();
9910 bool CCKilled = (LastMI->killsRegister(Reg: SystemZ::CC, /*TRI=*/nullptr) ||
9911 checkCCKill(MI&: *LastMI, MBB));
9912 MachineBasicBlock *StartMBB = MBB;
9913 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI: LastMI, MBB);
9914 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(MBB: StartMBB);
9915
9916 // Unless CC was killed in the last Select instruction, mark it as
9917 // live-in to both FalseMBB and JoinMBB.
9918 if (!CCKilled) {
9919 FalseMBB->addLiveIn(PhysReg: SystemZ::CC);
9920 JoinMBB->addLiveIn(PhysReg: SystemZ::CC);
9921 }
9922
9923 // StartMBB:
9924 // BRC CCMask, JoinMBB
9925 // # fallthrough to FalseMBB
9926 MBB = StartMBB;
9927 BuildMI(BB: MBB, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: SystemZ::BRC))
9928 .addImm(Val: CCValid).addImm(Val: CCMask).addMBB(MBB: JoinMBB);
9929 MBB->addSuccessor(Succ: JoinMBB);
9930 MBB->addSuccessor(Succ: FalseMBB);
9931
9932 // FalseMBB:
9933 // # fallthrough to JoinMBB
9934 MBB = FalseMBB;
9935 MBB->addSuccessor(Succ: JoinMBB);
9936
9937 // JoinMBB:
9938 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
9939 // ...
9940 MBB = JoinMBB;
9941 createPHIsForSelects(Selects, TrueMBB: StartMBB, FalseMBB, SinkMBB: MBB);
9942 for (auto *SelMI : Selects)
9943 SelMI->eraseFromParent();
9944
9945 MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI();
9946 for (auto *DbgMI : DbgValues)
9947 MBB->splice(Where: InsertPos, Other: StartMBB, From: DbgMI);
9948
9949 return JoinMBB;
9950}
9951
9952// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
9953// StoreOpcode is the store to use and Invert says whether the store should
9954// happen when the condition is false rather than true. If a STORE ON
9955// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
9956MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
9957 MachineBasicBlock *MBB,
9958 unsigned StoreOpcode,
9959 unsigned STOCOpcode,
9960 bool Invert) const {
9961 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9962
9963 Register SrcReg = MI.getOperand(i: 0).getReg();
9964 MachineOperand Base = MI.getOperand(i: 1);
9965 int64_t Disp = MI.getOperand(i: 2).getImm();
9966 Register IndexReg = MI.getOperand(i: 3).getReg();
9967 unsigned CCValid = MI.getOperand(i: 4).getImm();
9968 unsigned CCMask = MI.getOperand(i: 5).getImm();
9969 DebugLoc DL = MI.getDebugLoc();
9970
9971 StoreOpcode = TII->getOpcodeForOffset(Opcode: StoreOpcode, Offset: Disp);
9972
9973 // ISel pattern matching also adds a load memory operand of the same
9974 // address, so take special care to find the storing memory operand.
9975 MachineMemOperand *MMO = nullptr;
9976 for (auto *I : MI.memoperands())
9977 if (I->isStore()) {
9978 MMO = I;
9979 break;
9980 }
9981
9982 // Use STOCOpcode if possible. We could use different store patterns in
9983 // order to avoid matching the index register, but the performance trade-offs
9984 // might be more complicated in that case.
9985 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
9986 if (Invert)
9987 CCMask ^= CCValid;
9988
9989 BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: STOCOpcode))
9990 .addReg(RegNo: SrcReg)
9991 .add(MO: Base)
9992 .addImm(Val: Disp)
9993 .addImm(Val: CCValid)
9994 .addImm(Val: CCMask)
9995 .addMemOperand(MMO);
9996
9997 MI.eraseFromParent();
9998 return MBB;
9999 }
10000
10001 // Get the condition needed to branch around the store.
10002 if (!Invert)
10003 CCMask ^= CCValid;
10004
10005 MachineBasicBlock *StartMBB = MBB;
10006 MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB);
10007 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(MBB: StartMBB);
10008
10009 // Unless CC was killed in the CondStore instruction, mark it as
10010 // live-in to both FalseMBB and JoinMBB.
10011 if (!MI.killsRegister(Reg: SystemZ::CC, /*TRI=*/nullptr) &&
10012 !checkCCKill(MI, MBB: JoinMBB)) {
10013 FalseMBB->addLiveIn(PhysReg: SystemZ::CC);
10014 JoinMBB->addLiveIn(PhysReg: SystemZ::CC);
10015 }
10016
10017 // StartMBB:
10018 // BRC CCMask, JoinMBB
10019 // # fallthrough to FalseMBB
10020 MBB = StartMBB;
10021 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC))
10022 .addImm(Val: CCValid).addImm(Val: CCMask).addMBB(MBB: JoinMBB);
10023 MBB->addSuccessor(Succ: JoinMBB);
10024 MBB->addSuccessor(Succ: FalseMBB);
10025
10026 // FalseMBB:
10027 // store %SrcReg, %Disp(%Index,%Base)
10028 // # fallthrough to JoinMBB
10029 MBB = FalseMBB;
10030 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: StoreOpcode))
10031 .addReg(RegNo: SrcReg)
10032 .add(MO: Base)
10033 .addImm(Val: Disp)
10034 .addReg(RegNo: IndexReg)
10035 .addMemOperand(MMO);
10036 MBB->addSuccessor(Succ: JoinMBB);
10037
10038 MI.eraseFromParent();
10039 return JoinMBB;
10040}
10041
10042// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
10043MachineBasicBlock *
10044SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
10045 MachineBasicBlock *MBB,
10046 bool Unsigned) const {
10047 MachineFunction &MF = *MBB->getParent();
10048 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10049 MachineRegisterInfo &MRI = MF.getRegInfo();
10050
10051 // Synthetic instruction to compare 128-bit values.
10052 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
10053 Register Op0 = MI.getOperand(i: 0).getReg();
10054 Register Op1 = MI.getOperand(i: 1).getReg();
10055
10056 MachineBasicBlock *StartMBB = MBB;
10057 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI, MBB);
10058 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(MBB: StartMBB);
10059
10060 // StartMBB:
10061 //
10062 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
10063 // Swap the inputs to get:
10064 // CC 1 if high(Op0) > high(Op1)
10065 // CC 2 if high(Op0) < high(Op1)
10066 // CC 0 if high(Op0) == high(Op1)
10067 //
10068 // If CC != 0, we'd done, so jump over the next instruction.
10069 //
10070 // VEC[L]G Op1, Op0
10071 // JNE JoinMBB
10072 // # fallthrough to HiEqMBB
10073 MBB = StartMBB;
10074 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
10075 BuildMI(BB: MBB, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: HiOpcode))
10076 .addReg(RegNo: Op1).addReg(RegNo: Op0);
10077 BuildMI(BB: MBB, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: SystemZ::BRC))
10078 .addImm(Val: SystemZ::CCMASK_ICMP).addImm(Val: SystemZ::CCMASK_CMP_NE).addMBB(MBB: JoinMBB);
10079 MBB->addSuccessor(Succ: JoinMBB);
10080 MBB->addSuccessor(Succ: HiEqMBB);
10081
10082 // HiEqMBB:
10083 //
10084 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
10085 // Since we already know the high parts are equal, the CC
10086 // result will only depend on the low parts:
10087 // CC 1 if low(Op0) > low(Op1)
10088 // CC 3 if low(Op0) <= low(Op1)
10089 //
10090 // VCHLGS Tmp, Op0, Op1
10091 // # fallthrough to JoinMBB
10092 MBB = HiEqMBB;
10093 Register Temp = MRI.createVirtualRegister(RegClass: &SystemZ::VR128BitRegClass);
10094 BuildMI(BB: MBB, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: SystemZ::VCHLGS), DestReg: Temp)
10095 .addReg(RegNo: Op0).addReg(RegNo: Op1);
10096 MBB->addSuccessor(Succ: JoinMBB);
10097
10098 // Mark CC as live-in to JoinMBB.
10099 JoinMBB->addLiveIn(PhysReg: SystemZ::CC);
10100
10101 MI.eraseFromParent();
10102 return JoinMBB;
10103}
10104
10105// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
10106// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
10107// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
10108// whether the field should be inverted after performing BinOpcode (e.g. for
10109// NAND).
10110MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
10111 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
10112 bool Invert) const {
10113 MachineFunction &MF = *MBB->getParent();
10114 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10115 MachineRegisterInfo &MRI = MF.getRegInfo();
10116
10117 // Extract the operands. Base can be a register or a frame index.
10118 // Src2 can be a register or immediate.
10119 Register Dest = MI.getOperand(i: 0).getReg();
10120 MachineOperand Base = earlyUseOperand(Op: MI.getOperand(i: 1));
10121 int64_t Disp = MI.getOperand(i: 2).getImm();
10122 MachineOperand Src2 = earlyUseOperand(Op: MI.getOperand(i: 3));
10123 Register BitShift = MI.getOperand(i: 4).getReg();
10124 Register NegBitShift = MI.getOperand(i: 5).getReg();
10125 unsigned BitSize = MI.getOperand(i: 6).getImm();
10126 DebugLoc DL = MI.getDebugLoc();
10127
10128 // Get the right opcodes for the displacement.
10129 unsigned LOpcode = TII->getOpcodeForOffset(Opcode: SystemZ::L, Offset: Disp);
10130 unsigned CSOpcode = TII->getOpcodeForOffset(Opcode: SystemZ::CS, Offset: Disp);
10131 assert(LOpcode && CSOpcode && "Displacement out of range");
10132
10133 // Create virtual registers for temporary results.
10134 Register OrigVal = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass);
10135 Register OldVal = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass);
10136 Register NewVal = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass);
10137 Register RotatedOldVal = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass);
10138 Register RotatedNewVal = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass);
10139
10140 // Insert a basic block for the main loop.
10141 MachineBasicBlock *StartMBB = MBB;
10142 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10143 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB: StartMBB);
10144
10145 // StartMBB:
10146 // ...
10147 // %OrigVal = L Disp(%Base)
10148 // # fall through to LoopMBB
10149 MBB = StartMBB;
10150 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: LOpcode), DestReg: OrigVal).add(MO: Base).addImm(Val: Disp).addReg(RegNo: 0);
10151 MBB->addSuccessor(Succ: LoopMBB);
10152
10153 // LoopMBB:
10154 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
10155 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10156 // %RotatedNewVal = OP %RotatedOldVal, %Src2
10157 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10158 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10159 // JNE LoopMBB
10160 // # fall through to DoneMBB
10161 MBB = LoopMBB;
10162 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: OldVal)
10163 .addReg(RegNo: OrigVal).addMBB(MBB: StartMBB)
10164 .addReg(RegNo: Dest).addMBB(MBB: LoopMBB);
10165 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::RLL), DestReg: RotatedOldVal)
10166 .addReg(RegNo: OldVal).addReg(RegNo: BitShift).addImm(Val: 0);
10167 if (Invert) {
10168 // Perform the operation normally and then invert every bit of the field.
10169 Register Tmp = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass);
10170 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: BinOpcode), DestReg: Tmp).addReg(RegNo: RotatedOldVal).add(MO: Src2);
10171 // XILF with the upper BitSize bits set.
10172 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::XILF), DestReg: RotatedNewVal)
10173 .addReg(RegNo: Tmp).addImm(Val: -1U << (32 - BitSize));
10174 } else if (BinOpcode)
10175 // A simply binary operation.
10176 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: BinOpcode), DestReg: RotatedNewVal)
10177 .addReg(RegNo: RotatedOldVal)
10178 .add(MO: Src2);
10179 else
10180 // Use RISBG to rotate Src2 into position and use it to replace the
10181 // field in RotatedOldVal.
10182 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::RISBG32), DestReg: RotatedNewVal)
10183 .addReg(RegNo: RotatedOldVal).addReg(RegNo: Src2.getReg())
10184 .addImm(Val: 32).addImm(Val: 31 + BitSize).addImm(Val: 32 - BitSize);
10185 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::RLL), DestReg: NewVal)
10186 .addReg(RegNo: RotatedNewVal).addReg(RegNo: NegBitShift).addImm(Val: 0);
10187 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: CSOpcode), DestReg: Dest)
10188 .addReg(RegNo: OldVal)
10189 .addReg(RegNo: NewVal)
10190 .add(MO: Base)
10191 .addImm(Val: Disp);
10192 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC))
10193 .addImm(Val: SystemZ::CCMASK_CS).addImm(Val: SystemZ::CCMASK_CS_NE).addMBB(MBB: LoopMBB);
10194 MBB->addSuccessor(Succ: LoopMBB);
10195 MBB->addSuccessor(Succ: DoneMBB);
10196
10197 MI.eraseFromParent();
10198 return DoneMBB;
10199}
10200
10201// Implement EmitInstrWithCustomInserter for subword pseudo
10202// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
10203// instruction that should be used to compare the current field with the
10204// minimum or maximum value. KeepOldMask is the BRC condition-code mask
10205// for when the current field should be kept.
10206MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
10207 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
10208 unsigned KeepOldMask) const {
10209 MachineFunction &MF = *MBB->getParent();
10210 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10211 MachineRegisterInfo &MRI = MF.getRegInfo();
10212
10213 // Extract the operands. Base can be a register or a frame index.
10214 Register Dest = MI.getOperand(i: 0).getReg();
10215 MachineOperand Base = earlyUseOperand(Op: MI.getOperand(i: 1));
10216 int64_t Disp = MI.getOperand(i: 2).getImm();
10217 Register Src2 = MI.getOperand(i: 3).getReg();
10218 Register BitShift = MI.getOperand(i: 4).getReg();
10219 Register NegBitShift = MI.getOperand(i: 5).getReg();
10220 unsigned BitSize = MI.getOperand(i: 6).getImm();
10221 DebugLoc DL = MI.getDebugLoc();
10222
10223 // Get the right opcodes for the displacement.
10224 unsigned LOpcode = TII->getOpcodeForOffset(Opcode: SystemZ::L, Offset: Disp);
10225 unsigned CSOpcode = TII->getOpcodeForOffset(Opcode: SystemZ::CS, Offset: Disp);
10226 assert(LOpcode && CSOpcode && "Displacement out of range");
10227
10228 // Create virtual registers for temporary results.
10229 Register OrigVal = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass);
10230 Register OldVal = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass);
10231 Register NewVal = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass);
10232 Register RotatedOldVal = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass);
10233 Register RotatedAltVal = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass);
10234 Register RotatedNewVal = MRI.createVirtualRegister(RegClass: &SystemZ::GR32BitRegClass);
10235
10236 // Insert 3 basic blocks for the loop.
10237 MachineBasicBlock *StartMBB = MBB;
10238 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10239 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB: StartMBB);
10240 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(MBB: LoopMBB);
10241 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(MBB: UseAltMBB);
10242
10243 // StartMBB:
10244 // ...
10245 // %OrigVal = L Disp(%Base)
10246 // # fall through to LoopMBB
10247 MBB = StartMBB;
10248 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: LOpcode), DestReg: OrigVal).add(MO: Base).addImm(Val: Disp).addReg(RegNo: 0);
10249 MBB->addSuccessor(Succ: LoopMBB);
10250
10251 // LoopMBB:
10252 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
10253 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10254 // CompareOpcode %RotatedOldVal, %Src2
10255 // BRC KeepOldMask, UpdateMBB
10256 MBB = LoopMBB;
10257 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: OldVal)
10258 .addReg(RegNo: OrigVal).addMBB(MBB: StartMBB)
10259 .addReg(RegNo: Dest).addMBB(MBB: UpdateMBB);
10260 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::RLL), DestReg: RotatedOldVal)
10261 .addReg(RegNo: OldVal).addReg(RegNo: BitShift).addImm(Val: 0);
10262 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: CompareOpcode))
10263 .addReg(RegNo: RotatedOldVal).addReg(RegNo: Src2);
10264 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC))
10265 .addImm(Val: SystemZ::CCMASK_ICMP).addImm(Val: KeepOldMask).addMBB(MBB: UpdateMBB);
10266 MBB->addSuccessor(Succ: UpdateMBB);
10267 MBB->addSuccessor(Succ: UseAltMBB);
10268
10269 // UseAltMBB:
10270 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
10271 // # fall through to UpdateMBB
10272 MBB = UseAltMBB;
10273 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::RISBG32), DestReg: RotatedAltVal)
10274 .addReg(RegNo: RotatedOldVal).addReg(RegNo: Src2)
10275 .addImm(Val: 32).addImm(Val: 31 + BitSize).addImm(Val: 0);
10276 MBB->addSuccessor(Succ: UpdateMBB);
10277
10278 // UpdateMBB:
10279 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
10280 // [ %RotatedAltVal, UseAltMBB ]
10281 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10282 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10283 // JNE LoopMBB
10284 // # fall through to DoneMBB
10285 MBB = UpdateMBB;
10286 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: RotatedNewVal)
10287 .addReg(RegNo: RotatedOldVal).addMBB(MBB: LoopMBB)
10288 .addReg(RegNo: RotatedAltVal).addMBB(MBB: UseAltMBB);
10289 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::RLL), DestReg: NewVal)
10290 .addReg(RegNo: RotatedNewVal).addReg(RegNo: NegBitShift).addImm(Val: 0);
10291 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: CSOpcode), DestReg: Dest)
10292 .addReg(RegNo: OldVal)
10293 .addReg(RegNo: NewVal)
10294 .add(MO: Base)
10295 .addImm(Val: Disp);
10296 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC))
10297 .addImm(Val: SystemZ::CCMASK_CS).addImm(Val: SystemZ::CCMASK_CS_NE).addMBB(MBB: LoopMBB);
10298 MBB->addSuccessor(Succ: LoopMBB);
10299 MBB->addSuccessor(Succ: DoneMBB);
10300
10301 MI.eraseFromParent();
10302 return DoneMBB;
10303}
10304
10305// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
10306// instruction MI.
10307MachineBasicBlock *
10308SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
10309 MachineBasicBlock *MBB) const {
10310 MachineFunction &MF = *MBB->getParent();
10311 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10312 MachineRegisterInfo &MRI = MF.getRegInfo();
10313
10314 // Extract the operands. Base can be a register or a frame index.
10315 Register Dest = MI.getOperand(i: 0).getReg();
10316 MachineOperand Base = earlyUseOperand(Op: MI.getOperand(i: 1));
10317 int64_t Disp = MI.getOperand(i: 2).getImm();
10318 Register CmpVal = MI.getOperand(i: 3).getReg();
10319 Register OrigSwapVal = MI.getOperand(i: 4).getReg();
10320 Register BitShift = MI.getOperand(i: 5).getReg();
10321 Register NegBitShift = MI.getOperand(i: 6).getReg();
10322 int64_t BitSize = MI.getOperand(i: 7).getImm();
10323 DebugLoc DL = MI.getDebugLoc();
10324
10325 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
10326
10327 // Get the right opcodes for the displacement and zero-extension.
10328 unsigned LOpcode = TII->getOpcodeForOffset(Opcode: SystemZ::L, Offset: Disp);
10329 unsigned CSOpcode = TII->getOpcodeForOffset(Opcode: SystemZ::CS, Offset: Disp);
10330 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
10331 assert(LOpcode && CSOpcode && "Displacement out of range");
10332
10333 // Create virtual registers for temporary results.
10334 Register OrigOldVal = MRI.createVirtualRegister(RegClass: RC);
10335 Register OldVal = MRI.createVirtualRegister(RegClass: RC);
10336 Register SwapVal = MRI.createVirtualRegister(RegClass: RC);
10337 Register StoreVal = MRI.createVirtualRegister(RegClass: RC);
10338 Register OldValRot = MRI.createVirtualRegister(RegClass: RC);
10339 Register RetryOldVal = MRI.createVirtualRegister(RegClass: RC);
10340 Register RetrySwapVal = MRI.createVirtualRegister(RegClass: RC);
10341
10342 // Insert 2 basic blocks for the loop.
10343 MachineBasicBlock *StartMBB = MBB;
10344 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10345 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB: StartMBB);
10346 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(MBB: LoopMBB);
10347
10348 // StartMBB:
10349 // ...
10350 // %OrigOldVal = L Disp(%Base)
10351 // # fall through to LoopMBB
10352 MBB = StartMBB;
10353 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: LOpcode), DestReg: OrigOldVal)
10354 .add(MO: Base)
10355 .addImm(Val: Disp)
10356 .addReg(RegNo: 0);
10357 MBB->addSuccessor(Succ: LoopMBB);
10358
10359 // LoopMBB:
10360 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
10361 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
10362 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
10363 // ^^ The low BitSize bits contain the field
10364 // of interest.
10365 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
10366 // ^^ Replace the upper 32-BitSize bits of the
10367 // swap value with those that we loaded and rotated.
10368 // %Dest = LL[CH] %OldValRot
10369 // CR %Dest, %CmpVal
10370 // JNE DoneMBB
10371 // # Fall through to SetMBB
10372 MBB = LoopMBB;
10373 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: OldVal)
10374 .addReg(RegNo: OrigOldVal).addMBB(MBB: StartMBB)
10375 .addReg(RegNo: RetryOldVal).addMBB(MBB: SetMBB);
10376 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: SwapVal)
10377 .addReg(RegNo: OrigSwapVal).addMBB(MBB: StartMBB)
10378 .addReg(RegNo: RetrySwapVal).addMBB(MBB: SetMBB);
10379 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::RLL), DestReg: OldValRot)
10380 .addReg(RegNo: OldVal).addReg(RegNo: BitShift).addImm(Val: BitSize);
10381 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::RISBG32), DestReg: RetrySwapVal)
10382 .addReg(RegNo: SwapVal).addReg(RegNo: OldValRot).addImm(Val: 32).addImm(Val: 63 - BitSize).addImm(Val: 0);
10383 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: ZExtOpcode), DestReg: Dest)
10384 .addReg(RegNo: OldValRot);
10385 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::CR))
10386 .addReg(RegNo: Dest).addReg(RegNo: CmpVal);
10387 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC))
10388 .addImm(Val: SystemZ::CCMASK_ICMP)
10389 .addImm(Val: SystemZ::CCMASK_CMP_NE).addMBB(MBB: DoneMBB);
10390 MBB->addSuccessor(Succ: DoneMBB);
10391 MBB->addSuccessor(Succ: SetMBB);
10392
10393 // SetMBB:
10394 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
10395 // ^^ Rotate the new field to its proper position.
10396 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
10397 // JNE LoopMBB
10398 // # fall through to ExitMBB
10399 MBB = SetMBB;
10400 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::RLL), DestReg: StoreVal)
10401 .addReg(RegNo: RetrySwapVal).addReg(RegNo: NegBitShift).addImm(Val: -BitSize);
10402 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: CSOpcode), DestReg: RetryOldVal)
10403 .addReg(RegNo: OldVal)
10404 .addReg(RegNo: StoreVal)
10405 .add(MO: Base)
10406 .addImm(Val: Disp);
10407 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC))
10408 .addImm(Val: SystemZ::CCMASK_CS).addImm(Val: SystemZ::CCMASK_CS_NE).addMBB(MBB: LoopMBB);
10409 MBB->addSuccessor(Succ: LoopMBB);
10410 MBB->addSuccessor(Succ: DoneMBB);
10411
10412 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
10413 // to the block after the loop. At this point, CC may have been defined
10414 // either by the CR in LoopMBB or by the CS in SetMBB.
10415 if (!MI.registerDefIsDead(Reg: SystemZ::CC, /*TRI=*/nullptr))
10416 DoneMBB->addLiveIn(PhysReg: SystemZ::CC);
10417
10418 MI.eraseFromParent();
10419 return DoneMBB;
10420}
10421
10422// Emit a move from two GR64s to a GR128.
10423MachineBasicBlock *
10424SystemZTargetLowering::emitPair128(MachineInstr &MI,
10425 MachineBasicBlock *MBB) const {
10426 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10427 const DebugLoc &DL = MI.getDebugLoc();
10428
10429 Register Dest = MI.getOperand(i: 0).getReg();
10430 BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::REG_SEQUENCE), DestReg: Dest)
10431 .add(MO: MI.getOperand(i: 1))
10432 .addImm(Val: SystemZ::subreg_h64)
10433 .add(MO: MI.getOperand(i: 2))
10434 .addImm(Val: SystemZ::subreg_l64);
10435 MI.eraseFromParent();
10436 return MBB;
10437}
10438
10439// Emit an extension from a GR64 to a GR128. ClearEven is true
10440// if the high register of the GR128 value must be cleared or false if
10441// it's "don't care".
10442MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
10443 MachineBasicBlock *MBB,
10444 bool ClearEven) const {
10445 MachineFunction &MF = *MBB->getParent();
10446 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10447 MachineRegisterInfo &MRI = MF.getRegInfo();
10448 DebugLoc DL = MI.getDebugLoc();
10449
10450 Register Dest = MI.getOperand(i: 0).getReg();
10451 Register Src = MI.getOperand(i: 1).getReg();
10452 Register In128 = MRI.createVirtualRegister(RegClass: &SystemZ::GR128BitRegClass);
10453
10454 BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: In128);
10455 if (ClearEven) {
10456 Register NewIn128 = MRI.createVirtualRegister(RegClass: &SystemZ::GR128BitRegClass);
10457 Register Zero64 = MRI.createVirtualRegister(RegClass: &SystemZ::GR64BitRegClass);
10458
10459 BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LLILL), DestReg: Zero64)
10460 .addImm(Val: 0);
10461 BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::INSERT_SUBREG), DestReg: NewIn128)
10462 .addReg(RegNo: In128).addReg(RegNo: Zero64).addImm(Val: SystemZ::subreg_h64);
10463 In128 = NewIn128;
10464 }
10465 BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::INSERT_SUBREG), DestReg: Dest)
10466 .addReg(RegNo: In128).addReg(RegNo: Src).addImm(Val: SystemZ::subreg_l64);
10467
10468 MI.eraseFromParent();
10469 return MBB;
10470}
10471
10472MachineBasicBlock *
10473SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
10474 MachineBasicBlock *MBB,
10475 unsigned Opcode, bool IsMemset) const {
10476 MachineFunction &MF = *MBB->getParent();
10477 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10478 MachineRegisterInfo &MRI = MF.getRegInfo();
10479 DebugLoc DL = MI.getDebugLoc();
10480
10481 MachineOperand DestBase = earlyUseOperand(Op: MI.getOperand(i: 0));
10482 uint64_t DestDisp = MI.getOperand(i: 1).getImm();
10483 MachineOperand SrcBase = MachineOperand::CreateReg(Reg: 0U, isDef: false);
10484 uint64_t SrcDisp;
10485
10486 // Fold the displacement Disp if it is out of range.
10487 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
10488 if (!isUInt<12>(x: Disp)) {
10489 Register Reg = MRI.createVirtualRegister(RegClass: &SystemZ::ADDR64BitRegClass);
10490 unsigned Opcode = TII->getOpcodeForOffset(Opcode: SystemZ::LA, Offset: Disp);
10491 BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode), DestReg: Reg)
10492 .add(MO: Base).addImm(Val: Disp).addReg(RegNo: 0);
10493 Base = MachineOperand::CreateReg(Reg, isDef: false);
10494 Disp = 0;
10495 }
10496 };
10497
10498 if (!IsMemset) {
10499 SrcBase = earlyUseOperand(Op: MI.getOperand(i: 2));
10500 SrcDisp = MI.getOperand(i: 3).getImm();
10501 } else {
10502 SrcBase = DestBase;
10503 SrcDisp = DestDisp++;
10504 foldDisplIfNeeded(DestBase, DestDisp);
10505 }
10506
10507 MachineOperand &LengthMO = MI.getOperand(i: IsMemset ? 2 : 4);
10508 bool IsImmForm = LengthMO.isImm();
10509 bool IsRegForm = !IsImmForm;
10510
10511 // Build and insert one Opcode of Length, with special treatment for memset.
10512 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
10513 MachineBasicBlock::iterator InsPos,
10514 MachineOperand DBase, uint64_t DDisp,
10515 MachineOperand SBase, uint64_t SDisp,
10516 unsigned Length) -> void {
10517 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
10518 if (IsMemset) {
10519 MachineOperand ByteMO = earlyUseOperand(Op: MI.getOperand(i: 3));
10520 if (ByteMO.isImm())
10521 BuildMI(BB&: *InsMBB, I: InsPos, MIMD: DL, MCID: TII->get(Opcode: SystemZ::MVI))
10522 .add(MO: SBase).addImm(Val: SDisp).add(MO: ByteMO);
10523 else
10524 BuildMI(BB&: *InsMBB, I: InsPos, MIMD: DL, MCID: TII->get(Opcode: SystemZ::STC))
10525 .add(MO: ByteMO).add(MO: SBase).addImm(Val: SDisp).addReg(RegNo: 0);
10526 if (--Length == 0)
10527 return;
10528 }
10529 BuildMI(BB&: *MBB, I: InsPos, MIMD: DL, MCID: TII->get(Opcode))
10530 .add(MO: DBase).addImm(Val: DDisp).addImm(Val: Length)
10531 .add(MO: SBase).addImm(Val: SDisp)
10532 .setMemRefs(MI.memoperands());
10533 };
10534
10535 bool NeedsLoop = false;
10536 uint64_t ImmLength = 0;
10537 Register LenAdjReg = SystemZ::NoRegister;
10538 if (IsImmForm) {
10539 ImmLength = LengthMO.getImm();
10540 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
10541 if (ImmLength == 0) {
10542 MI.eraseFromParent();
10543 return MBB;
10544 }
10545 if (Opcode == SystemZ::CLC) {
10546 if (ImmLength > 3 * 256)
10547 // A two-CLC sequence is a clear win over a loop, not least because
10548 // it needs only one branch. A three-CLC sequence needs the same
10549 // number of branches as a loop (i.e. 2), but is shorter. That
10550 // brings us to lengths greater than 768 bytes. It seems relatively
10551 // likely that a difference will be found within the first 768 bytes,
10552 // so we just optimize for the smallest number of branch
10553 // instructions, in order to avoid polluting the prediction buffer
10554 // too much.
10555 NeedsLoop = true;
10556 } else if (ImmLength > 6 * 256)
10557 // The heuristic we use is to prefer loops for anything that would
10558 // require 7 or more MVCs. With these kinds of sizes there isn't much
10559 // to choose between straight-line code and looping code, since the
10560 // time will be dominated by the MVCs themselves.
10561 NeedsLoop = true;
10562 } else {
10563 NeedsLoop = true;
10564 LenAdjReg = LengthMO.getReg();
10565 }
10566
10567 // When generating more than one CLC, all but the last will need to
10568 // branch to the end when a difference is found.
10569 MachineBasicBlock *EndMBB =
10570 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
10571 ? SystemZ::splitBlockAfter(MI, MBB)
10572 : nullptr);
10573
10574 if (NeedsLoop) {
10575 Register StartCountReg =
10576 MRI.createVirtualRegister(RegClass: &SystemZ::GR64BitRegClass);
10577 if (IsImmForm) {
10578 TII->loadImmediate(MBB&: *MBB, MBBI: MI, Reg: StartCountReg, Value: ImmLength / 256);
10579 ImmLength &= 255;
10580 } else {
10581 BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::SRLG), DestReg: StartCountReg)
10582 .addReg(RegNo: LenAdjReg)
10583 .addReg(RegNo: 0)
10584 .addImm(Val: 8);
10585 }
10586
10587 bool HaveSingleBase = DestBase.isIdenticalTo(Other: SrcBase);
10588 auto loadZeroAddress = [&]() -> MachineOperand {
10589 Register Reg = MRI.createVirtualRegister(RegClass: &SystemZ::ADDR64BitRegClass);
10590 BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LGHI), DestReg: Reg).addImm(Val: 0);
10591 return MachineOperand::CreateReg(Reg, isDef: false);
10592 };
10593 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
10594 DestBase = loadZeroAddress();
10595 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
10596 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
10597
10598 MachineBasicBlock *StartMBB = nullptr;
10599 MachineBasicBlock *LoopMBB = nullptr;
10600 MachineBasicBlock *NextMBB = nullptr;
10601 MachineBasicBlock *DoneMBB = nullptr;
10602 MachineBasicBlock *AllDoneMBB = nullptr;
10603
10604 Register StartSrcReg = forceReg(MI, Base&: SrcBase, TII);
10605 Register StartDestReg =
10606 (HaveSingleBase ? StartSrcReg : forceReg(MI, Base&: DestBase, TII));
10607
10608 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
10609 Register ThisSrcReg = MRI.createVirtualRegister(RegClass: RC);
10610 Register ThisDestReg =
10611 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RegClass: RC));
10612 Register NextSrcReg = MRI.createVirtualRegister(RegClass: RC);
10613 Register NextDestReg =
10614 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RegClass: RC));
10615 RC = &SystemZ::GR64BitRegClass;
10616 Register ThisCountReg = MRI.createVirtualRegister(RegClass: RC);
10617 Register NextCountReg = MRI.createVirtualRegister(RegClass: RC);
10618
10619 if (IsRegForm) {
10620 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10621 StartMBB = SystemZ::emitBlockAfter(MBB);
10622 LoopMBB = SystemZ::emitBlockAfter(MBB: StartMBB);
10623 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(MBB: LoopMBB) : LoopMBB);
10624 DoneMBB = SystemZ::emitBlockAfter(MBB: NextMBB);
10625
10626 // MBB:
10627 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
10628 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::CGHI))
10629 .addReg(RegNo: LenAdjReg).addImm(Val: IsMemset ? -2 : -1);
10630 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC))
10631 .addImm(Val: SystemZ::CCMASK_ICMP).addImm(Val: SystemZ::CCMASK_CMP_EQ)
10632 .addMBB(MBB: AllDoneMBB);
10633 MBB->addSuccessor(Succ: AllDoneMBB);
10634 if (!IsMemset)
10635 MBB->addSuccessor(Succ: StartMBB);
10636 else {
10637 // MemsetOneCheckMBB:
10638 // # Jump to MemsetOneMBB for a memset of length 1, or
10639 // # fall thru to StartMBB.
10640 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
10641 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(MBB: &*MF.rbegin());
10642 MBB->addSuccessor(Succ: MemsetOneCheckMBB);
10643 MBB = MemsetOneCheckMBB;
10644 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::CGHI))
10645 .addReg(RegNo: LenAdjReg).addImm(Val: -1);
10646 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC))
10647 .addImm(Val: SystemZ::CCMASK_ICMP).addImm(Val: SystemZ::CCMASK_CMP_EQ)
10648 .addMBB(MBB: MemsetOneMBB);
10649 MBB->addSuccessor(Succ: MemsetOneMBB, Prob: {10, 100});
10650 MBB->addSuccessor(Succ: StartMBB, Prob: {90, 100});
10651
10652 // MemsetOneMBB:
10653 // # Jump back to AllDoneMBB after a single MVI or STC.
10654 MBB = MemsetOneMBB;
10655 insertMemMemOp(MBB, MBB->end(),
10656 MachineOperand::CreateReg(Reg: StartDestReg, isDef: false), DestDisp,
10657 MachineOperand::CreateReg(Reg: StartSrcReg, isDef: false), SrcDisp,
10658 1);
10659 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::J)).addMBB(MBB: AllDoneMBB);
10660 MBB->addSuccessor(Succ: AllDoneMBB);
10661 }
10662
10663 // StartMBB:
10664 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
10665 MBB = StartMBB;
10666 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::CGHI))
10667 .addReg(RegNo: StartCountReg).addImm(Val: 0);
10668 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC))
10669 .addImm(Val: SystemZ::CCMASK_ICMP).addImm(Val: SystemZ::CCMASK_CMP_EQ)
10670 .addMBB(MBB: DoneMBB);
10671 MBB->addSuccessor(Succ: DoneMBB);
10672 MBB->addSuccessor(Succ: LoopMBB);
10673 }
10674 else {
10675 StartMBB = MBB;
10676 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10677 LoopMBB = SystemZ::emitBlockAfter(MBB: StartMBB);
10678 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(MBB: LoopMBB) : LoopMBB);
10679
10680 // StartMBB:
10681 // # fall through to LoopMBB
10682 MBB->addSuccessor(Succ: LoopMBB);
10683
10684 DestBase = MachineOperand::CreateReg(Reg: NextDestReg, isDef: false);
10685 SrcBase = MachineOperand::CreateReg(Reg: NextSrcReg, isDef: false);
10686 if (EndMBB && !ImmLength)
10687 // If the loop handled the whole CLC range, DoneMBB will be empty with
10688 // CC live-through into EndMBB, so add it as live-in.
10689 DoneMBB->addLiveIn(PhysReg: SystemZ::CC);
10690 }
10691
10692 // LoopMBB:
10693 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
10694 // [ %NextDestReg, NextMBB ]
10695 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
10696 // [ %NextSrcReg, NextMBB ]
10697 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
10698 // [ %NextCountReg, NextMBB ]
10699 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
10700 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
10701 // ( JLH EndMBB )
10702 //
10703 // The prefetch is used only for MVC. The JLH is used only for CLC.
10704 MBB = LoopMBB;
10705 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: ThisDestReg)
10706 .addReg(RegNo: StartDestReg).addMBB(MBB: StartMBB)
10707 .addReg(RegNo: NextDestReg).addMBB(MBB: NextMBB);
10708 if (!HaveSingleBase)
10709 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: ThisSrcReg)
10710 .addReg(RegNo: StartSrcReg).addMBB(MBB: StartMBB)
10711 .addReg(RegNo: NextSrcReg).addMBB(MBB: NextMBB);
10712 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: ThisCountReg)
10713 .addReg(RegNo: StartCountReg).addMBB(MBB: StartMBB)
10714 .addReg(RegNo: NextCountReg).addMBB(MBB: NextMBB);
10715 if (Opcode == SystemZ::MVC)
10716 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PFD))
10717 .addImm(Val: SystemZ::PFD_WRITE)
10718 .addReg(RegNo: ThisDestReg).addImm(Val: DestDisp - IsMemset + 768).addReg(RegNo: 0);
10719 insertMemMemOp(MBB, MBB->end(),
10720 MachineOperand::CreateReg(Reg: ThisDestReg, isDef: false), DestDisp,
10721 MachineOperand::CreateReg(Reg: ThisSrcReg, isDef: false), SrcDisp, 256);
10722 if (EndMBB) {
10723 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC))
10724 .addImm(Val: SystemZ::CCMASK_ICMP).addImm(Val: SystemZ::CCMASK_CMP_NE)
10725 .addMBB(MBB: EndMBB);
10726 MBB->addSuccessor(Succ: EndMBB);
10727 MBB->addSuccessor(Succ: NextMBB);
10728 }
10729
10730 // NextMBB:
10731 // %NextDestReg = LA 256(%ThisDestReg)
10732 // %NextSrcReg = LA 256(%ThisSrcReg)
10733 // %NextCountReg = AGHI %ThisCountReg, -1
10734 // CGHI %NextCountReg, 0
10735 // JLH LoopMBB
10736 // # fall through to DoneMBB
10737 //
10738 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
10739 MBB = NextMBB;
10740 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LA), DestReg: NextDestReg)
10741 .addReg(RegNo: ThisDestReg).addImm(Val: 256).addReg(RegNo: 0);
10742 if (!HaveSingleBase)
10743 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::LA), DestReg: NextSrcReg)
10744 .addReg(RegNo: ThisSrcReg).addImm(Val: 256).addReg(RegNo: 0);
10745 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::AGHI), DestReg: NextCountReg)
10746 .addReg(RegNo: ThisCountReg).addImm(Val: -1);
10747 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::CGHI))
10748 .addReg(RegNo: NextCountReg).addImm(Val: 0);
10749 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC))
10750 .addImm(Val: SystemZ::CCMASK_ICMP).addImm(Val: SystemZ::CCMASK_CMP_NE)
10751 .addMBB(MBB: LoopMBB);
10752 MBB->addSuccessor(Succ: LoopMBB);
10753 MBB->addSuccessor(Succ: DoneMBB);
10754
10755 MBB = DoneMBB;
10756 if (IsRegForm) {
10757 // DoneMBB:
10758 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
10759 // # Use EXecute Relative Long for the remainder of the bytes. The target
10760 // instruction of the EXRL will have a length field of 1 since 0 is an
10761 // illegal value. The number of bytes processed becomes (%LenAdjReg &
10762 // 0xff) + 1.
10763 // # Fall through to AllDoneMBB.
10764 Register RemSrcReg = MRI.createVirtualRegister(RegClass: &SystemZ::ADDR64BitRegClass);
10765 Register RemDestReg = HaveSingleBase ? RemSrcReg
10766 : MRI.createVirtualRegister(RegClass: &SystemZ::ADDR64BitRegClass);
10767 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: RemDestReg)
10768 .addReg(RegNo: StartDestReg).addMBB(MBB: StartMBB)
10769 .addReg(RegNo: NextDestReg).addMBB(MBB: NextMBB);
10770 if (!HaveSingleBase)
10771 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: RemSrcReg)
10772 .addReg(RegNo: StartSrcReg).addMBB(MBB: StartMBB)
10773 .addReg(RegNo: NextSrcReg).addMBB(MBB: NextMBB);
10774 if (IsMemset)
10775 insertMemMemOp(MBB, MBB->end(),
10776 MachineOperand::CreateReg(Reg: RemDestReg, isDef: false), DestDisp,
10777 MachineOperand::CreateReg(Reg: RemSrcReg, isDef: false), SrcDisp, 1);
10778 MachineInstrBuilder EXRL_MIB =
10779 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::EXRL_Pseudo))
10780 .addImm(Val: Opcode)
10781 .addReg(RegNo: LenAdjReg)
10782 .addReg(RegNo: RemDestReg).addImm(Val: DestDisp)
10783 .addReg(RegNo: RemSrcReg).addImm(Val: SrcDisp);
10784 MBB->addSuccessor(Succ: AllDoneMBB);
10785 MBB = AllDoneMBB;
10786 if (Opcode != SystemZ::MVC) {
10787 EXRL_MIB.addReg(RegNo: SystemZ::CC, Flags: RegState::ImplicitDefine);
10788 if (EndMBB)
10789 MBB->addLiveIn(PhysReg: SystemZ::CC);
10790 }
10791 }
10792 MF.getProperties().resetNoPHIs();
10793 }
10794
10795 // Handle any remaining bytes with straight-line code.
10796 while (ImmLength > 0) {
10797 uint64_t ThisLength = std::min(a: ImmLength, b: uint64_t(256));
10798 // The previous iteration might have created out-of-range displacements.
10799 // Apply them using LA/LAY if so.
10800 foldDisplIfNeeded(DestBase, DestDisp);
10801 foldDisplIfNeeded(SrcBase, SrcDisp);
10802 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
10803 DestDisp += ThisLength;
10804 SrcDisp += ThisLength;
10805 ImmLength -= ThisLength;
10806 // If there's another CLC to go, branch to the end if a difference
10807 // was found.
10808 if (EndMBB && ImmLength > 0) {
10809 MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
10810 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC))
10811 .addImm(Val: SystemZ::CCMASK_ICMP).addImm(Val: SystemZ::CCMASK_CMP_NE)
10812 .addMBB(MBB: EndMBB);
10813 MBB->addSuccessor(Succ: EndMBB);
10814 MBB->addSuccessor(Succ: NextMBB);
10815 MBB = NextMBB;
10816 }
10817 }
10818 if (EndMBB) {
10819 MBB->addSuccessor(Succ: EndMBB);
10820 MBB = EndMBB;
10821 MBB->addLiveIn(PhysReg: SystemZ::CC);
10822 }
10823
10824 MI.eraseFromParent();
10825 return MBB;
10826}
10827
10828// Decompose string pseudo-instruction MI into a loop that continually performs
10829// Opcode until CC != 3.
10830MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
10831 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10832 MachineFunction &MF = *MBB->getParent();
10833 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10834 MachineRegisterInfo &MRI = MF.getRegInfo();
10835 DebugLoc DL = MI.getDebugLoc();
10836
10837 uint64_t End1Reg = MI.getOperand(i: 0).getReg();
10838 uint64_t Start1Reg = MI.getOperand(i: 1).getReg();
10839 uint64_t Start2Reg = MI.getOperand(i: 2).getReg();
10840 uint64_t CharReg = MI.getOperand(i: 3).getReg();
10841
10842 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
10843 uint64_t This1Reg = MRI.createVirtualRegister(RegClass: RC);
10844 uint64_t This2Reg = MRI.createVirtualRegister(RegClass: RC);
10845 uint64_t End2Reg = MRI.createVirtualRegister(RegClass: RC);
10846
10847 MachineBasicBlock *StartMBB = MBB;
10848 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10849 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB: StartMBB);
10850
10851 // StartMBB:
10852 // # fall through to LoopMBB
10853 MBB->addSuccessor(Succ: LoopMBB);
10854
10855 // LoopMBB:
10856 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
10857 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
10858 // R0L = %CharReg
10859 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
10860 // JO LoopMBB
10861 // # fall through to DoneMBB
10862 //
10863 // The load of R0L can be hoisted by post-RA LICM.
10864 MBB = LoopMBB;
10865
10866 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: This1Reg)
10867 .addReg(RegNo: Start1Reg).addMBB(MBB: StartMBB)
10868 .addReg(RegNo: End1Reg).addMBB(MBB: LoopMBB);
10869 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: This2Reg)
10870 .addReg(RegNo: Start2Reg).addMBB(MBB: StartMBB)
10871 .addReg(RegNo: End2Reg).addMBB(MBB: LoopMBB);
10872 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: SystemZ::R0L).addReg(RegNo: CharReg);
10873 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode))
10874 .addReg(RegNo: End1Reg, Flags: RegState::Define).addReg(RegNo: End2Reg, Flags: RegState::Define)
10875 .addReg(RegNo: This1Reg).addReg(RegNo: This2Reg);
10876 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC))
10877 .addImm(Val: SystemZ::CCMASK_ANY).addImm(Val: SystemZ::CCMASK_3).addMBB(MBB: LoopMBB);
10878 MBB->addSuccessor(Succ: LoopMBB);
10879 MBB->addSuccessor(Succ: DoneMBB);
10880
10881 DoneMBB->addLiveIn(PhysReg: SystemZ::CC);
10882
10883 MI.eraseFromParent();
10884 return DoneMBB;
10885}
10886
10887// Update TBEGIN instruction with final opcode and register clobbers.
10888MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
10889 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
10890 bool NoFloat) const {
10891 MachineFunction &MF = *MBB->getParent();
10892 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
10893 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10894
10895 // Update opcode.
10896 MI.setDesc(TII->get(Opcode));
10897
10898 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
10899 // Make sure to add the corresponding GRSM bits if they are missing.
10900 uint64_t Control = MI.getOperand(i: 2).getImm();
10901 static const unsigned GPRControlBit[16] = {
10902 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
10903 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
10904 };
10905 Control |= GPRControlBit[15];
10906 if (TFI->hasFP(MF))
10907 Control |= GPRControlBit[11];
10908 MI.getOperand(i: 2).setImm(Control);
10909
10910 // Add GPR clobbers.
10911 for (int I = 0; I < 16; I++) {
10912 if ((Control & GPRControlBit[I]) == 0) {
10913 unsigned Reg = SystemZMC::GR64Regs[I];
10914 MI.addOperand(Op: MachineOperand::CreateReg(Reg, isDef: true, isImp: true));
10915 }
10916 }
10917
10918 // Add FPR/VR clobbers.
10919 if (!NoFloat && (Control & 4) != 0) {
10920 if (Subtarget.hasVector()) {
10921 for (unsigned Reg : SystemZMC::VR128Regs) {
10922 MI.addOperand(Op: MachineOperand::CreateReg(Reg, isDef: true, isImp: true));
10923 }
10924 } else {
10925 for (unsigned Reg : SystemZMC::FP64Regs) {
10926 MI.addOperand(Op: MachineOperand::CreateReg(Reg, isDef: true, isImp: true));
10927 }
10928 }
10929 }
10930
10931 return MBB;
10932}
10933
10934MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
10935 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10936 MachineFunction &MF = *MBB->getParent();
10937 MachineRegisterInfo *MRI = &MF.getRegInfo();
10938 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10939 DebugLoc DL = MI.getDebugLoc();
10940
10941 Register SrcReg = MI.getOperand(i: 0).getReg();
10942
10943 // Create new virtual register of the same class as source.
10944 const TargetRegisterClass *RC = MRI->getRegClass(Reg: SrcReg);
10945 Register DstReg = MRI->createVirtualRegister(RegClass: RC);
10946
10947 // Replace pseudo with a normal load-and-test that models the def as
10948 // well.
10949 BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode), DestReg: DstReg)
10950 .addReg(RegNo: SrcReg)
10951 .setMIFlags(MI.getFlags());
10952 MI.eraseFromParent();
10953
10954 return MBB;
10955}
10956
10957MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
10958 MachineInstr &MI, MachineBasicBlock *MBB) const {
10959 MachineFunction &MF = *MBB->getParent();
10960 MachineRegisterInfo *MRI = &MF.getRegInfo();
10961 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10962 DebugLoc DL = MI.getDebugLoc();
10963 const unsigned ProbeSize = getStackProbeSize(MF);
10964 Register DstReg = MI.getOperand(i: 0).getReg();
10965 Register SizeReg = MI.getOperand(i: 2).getReg();
10966
10967 MachineBasicBlock *StartMBB = MBB;
10968 MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB);
10969 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(MBB: StartMBB);
10970 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(MBB: LoopTestMBB);
10971 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(MBB: LoopBodyMBB);
10972 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(MBB: TailTestMBB);
10973
10974 MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(),
10975 F: MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, Size: 8, BaseAlignment: Align(1));
10976
10977 Register PHIReg = MRI->createVirtualRegister(RegClass: &SystemZ::ADDR64BitRegClass);
10978 Register IncReg = MRI->createVirtualRegister(RegClass: &SystemZ::ADDR64BitRegClass);
10979
10980 // LoopTestMBB
10981 // BRC TailTestMBB
10982 // # fallthrough to LoopBodyMBB
10983 StartMBB->addSuccessor(Succ: LoopTestMBB);
10984 MBB = LoopTestMBB;
10985 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::PHI), DestReg: PHIReg)
10986 .addReg(RegNo: SizeReg)
10987 .addMBB(MBB: StartMBB)
10988 .addReg(RegNo: IncReg)
10989 .addMBB(MBB: LoopBodyMBB);
10990 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::CLGFI))
10991 .addReg(RegNo: PHIReg)
10992 .addImm(Val: ProbeSize);
10993 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC))
10994 .addImm(Val: SystemZ::CCMASK_ICMP).addImm(Val: SystemZ::CCMASK_CMP_LT)
10995 .addMBB(MBB: TailTestMBB);
10996 MBB->addSuccessor(Succ: LoopBodyMBB);
10997 MBB->addSuccessor(Succ: TailTestMBB);
10998
10999 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
11000 // J LoopTestMBB
11001 MBB = LoopBodyMBB;
11002 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::SLGFI), DestReg: IncReg)
11003 .addReg(RegNo: PHIReg)
11004 .addImm(Val: ProbeSize);
11005 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::SLGFI), DestReg: SystemZ::R15D)
11006 .addReg(RegNo: SystemZ::R15D)
11007 .addImm(Val: ProbeSize);
11008 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::CG)).addReg(RegNo: SystemZ::R15D)
11009 .addReg(RegNo: SystemZ::R15D).addImm(Val: ProbeSize - 8).addReg(RegNo: 0)
11010 .setMemRefs(VolLdMMO);
11011 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::J)).addMBB(MBB: LoopTestMBB);
11012 MBB->addSuccessor(Succ: LoopTestMBB);
11013
11014 // TailTestMBB
11015 // BRC DoneMBB
11016 // # fallthrough to TailMBB
11017 MBB = TailTestMBB;
11018 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::CGHI))
11019 .addReg(RegNo: PHIReg)
11020 .addImm(Val: 0);
11021 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::BRC))
11022 .addImm(Val: SystemZ::CCMASK_ICMP).addImm(Val: SystemZ::CCMASK_CMP_EQ)
11023 .addMBB(MBB: DoneMBB);
11024 MBB->addSuccessor(Succ: TailMBB);
11025 MBB->addSuccessor(Succ: DoneMBB);
11026
11027 // TailMBB
11028 // # fallthrough to DoneMBB
11029 MBB = TailMBB;
11030 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::SLGR), DestReg: SystemZ::R15D)
11031 .addReg(RegNo: SystemZ::R15D)
11032 .addReg(RegNo: PHIReg);
11033 BuildMI(BB: MBB, MIMD: DL, MCID: TII->get(Opcode: SystemZ::CG)).addReg(RegNo: SystemZ::R15D)
11034 .addReg(RegNo: SystemZ::R15D).addImm(Val: -8).addReg(RegNo: PHIReg)
11035 .setMemRefs(VolLdMMO);
11036 MBB->addSuccessor(Succ: DoneMBB);
11037
11038 // DoneMBB
11039 MBB = DoneMBB;
11040 BuildMI(BB&: *MBB, I: MBB->begin(), MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: DstReg)
11041 .addReg(RegNo: SystemZ::R15D);
11042
11043 MI.eraseFromParent();
11044 return DoneMBB;
11045}
11046
11047SDValue SystemZTargetLowering::
11048getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
11049 MachineFunction &MF = DAG.getMachineFunction();
11050 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
11051 SDLoc DL(SP);
11052 return DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: SP,
11053 N2: DAG.getIntPtrConstant(Val: TFL->getBackchainOffset(MF), DL));
11054}
11055
11056MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
11057 MachineInstr &MI, MachineBasicBlock *MBB) const {
11058 switch (MI.getOpcode()) {
11059 case SystemZ::ADJCALLSTACKDOWN:
11060 case SystemZ::ADJCALLSTACKUP:
11061 return emitAdjCallStack(MI, BB: MBB);
11062
11063 case SystemZ::Select32:
11064 case SystemZ::Select64:
11065 case SystemZ::Select128:
11066 case SystemZ::SelectF32:
11067 case SystemZ::SelectF64:
11068 case SystemZ::SelectF128:
11069 case SystemZ::SelectVR32:
11070 case SystemZ::SelectVR64:
11071 case SystemZ::SelectVR128:
11072 return emitSelect(MI, MBB);
11073
11074 case SystemZ::CondStore8Mux:
11075 return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STCMux, STOCOpcode: 0, Invert: false);
11076 case SystemZ::CondStore8MuxInv:
11077 return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STCMux, STOCOpcode: 0, Invert: true);
11078 case SystemZ::CondStore16Mux:
11079 return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STHMux, STOCOpcode: 0, Invert: false);
11080 case SystemZ::CondStore16MuxInv:
11081 return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STHMux, STOCOpcode: 0, Invert: true);
11082 case SystemZ::CondStore32Mux:
11083 return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STMux, STOCOpcode: SystemZ::STOCMux, Invert: false);
11084 case SystemZ::CondStore32MuxInv:
11085 return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STMux, STOCOpcode: SystemZ::STOCMux, Invert: true);
11086 case SystemZ::CondStore8:
11087 return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STC, STOCOpcode: 0, Invert: false);
11088 case SystemZ::CondStore8Inv:
11089 return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STC, STOCOpcode: 0, Invert: true);
11090 case SystemZ::CondStore16:
11091 return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STH, STOCOpcode: 0, Invert: false);
11092 case SystemZ::CondStore16Inv:
11093 return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STH, STOCOpcode: 0, Invert: true);
11094 case SystemZ::CondStore32:
11095 return emitCondStore(MI, MBB, StoreOpcode: SystemZ::ST, STOCOpcode: SystemZ::STOC, Invert: false);
11096 case SystemZ::CondStore32Inv:
11097 return emitCondStore(MI, MBB, StoreOpcode: SystemZ::ST, STOCOpcode: SystemZ::STOC, Invert: true);
11098 case SystemZ::CondStore64:
11099 return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STG, STOCOpcode: SystemZ::STOCG, Invert: false);
11100 case SystemZ::CondStore64Inv:
11101 return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STG, STOCOpcode: SystemZ::STOCG, Invert: true);
11102 case SystemZ::CondStoreF32:
11103 return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STE, STOCOpcode: 0, Invert: false);
11104 case SystemZ::CondStoreF32Inv:
11105 return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STE, STOCOpcode: 0, Invert: true);
11106 case SystemZ::CondStoreF64:
11107 return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STD, STOCOpcode: 0, Invert: false);
11108 case SystemZ::CondStoreF64Inv:
11109 return emitCondStore(MI, MBB, StoreOpcode: SystemZ::STD, STOCOpcode: 0, Invert: true);
11110
11111 case SystemZ::SCmp128Hi:
11112 return emitICmp128Hi(MI, MBB, Unsigned: false);
11113 case SystemZ::UCmp128Hi:
11114 return emitICmp128Hi(MI, MBB, Unsigned: true);
11115
11116 case SystemZ::PAIR128:
11117 return emitPair128(MI, MBB);
11118 case SystemZ::AEXT128:
11119 return emitExt128(MI, MBB, ClearEven: false);
11120 case SystemZ::ZEXT128:
11121 return emitExt128(MI, MBB, ClearEven: true);
11122
11123 case SystemZ::ATOMIC_SWAPW:
11124 return emitAtomicLoadBinary(MI, MBB, BinOpcode: 0);
11125
11126 case SystemZ::ATOMIC_LOADW_AR:
11127 return emitAtomicLoadBinary(MI, MBB, BinOpcode: SystemZ::AR);
11128 case SystemZ::ATOMIC_LOADW_AFI:
11129 return emitAtomicLoadBinary(MI, MBB, BinOpcode: SystemZ::AFI);
11130
11131 case SystemZ::ATOMIC_LOADW_SR:
11132 return emitAtomicLoadBinary(MI, MBB, BinOpcode: SystemZ::SR);
11133
11134 case SystemZ::ATOMIC_LOADW_NR:
11135 return emitAtomicLoadBinary(MI, MBB, BinOpcode: SystemZ::NR);
11136 case SystemZ::ATOMIC_LOADW_NILH:
11137 return emitAtomicLoadBinary(MI, MBB, BinOpcode: SystemZ::NILH);
11138
11139 case SystemZ::ATOMIC_LOADW_OR:
11140 return emitAtomicLoadBinary(MI, MBB, BinOpcode: SystemZ::OR);
11141 case SystemZ::ATOMIC_LOADW_OILH:
11142 return emitAtomicLoadBinary(MI, MBB, BinOpcode: SystemZ::OILH);
11143
11144 case SystemZ::ATOMIC_LOADW_XR:
11145 return emitAtomicLoadBinary(MI, MBB, BinOpcode: SystemZ::XR);
11146 case SystemZ::ATOMIC_LOADW_XILF:
11147 return emitAtomicLoadBinary(MI, MBB, BinOpcode: SystemZ::XILF);
11148
11149 case SystemZ::ATOMIC_LOADW_NRi:
11150 return emitAtomicLoadBinary(MI, MBB, BinOpcode: SystemZ::NR, Invert: true);
11151 case SystemZ::ATOMIC_LOADW_NILHi:
11152 return emitAtomicLoadBinary(MI, MBB, BinOpcode: SystemZ::NILH, Invert: true);
11153
11154 case SystemZ::ATOMIC_LOADW_MIN:
11155 return emitAtomicLoadMinMax(MI, MBB, CompareOpcode: SystemZ::CR, KeepOldMask: SystemZ::CCMASK_CMP_LE);
11156 case SystemZ::ATOMIC_LOADW_MAX:
11157 return emitAtomicLoadMinMax(MI, MBB, CompareOpcode: SystemZ::CR, KeepOldMask: SystemZ::CCMASK_CMP_GE);
11158 case SystemZ::ATOMIC_LOADW_UMIN:
11159 return emitAtomicLoadMinMax(MI, MBB, CompareOpcode: SystemZ::CLR, KeepOldMask: SystemZ::CCMASK_CMP_LE);
11160 case SystemZ::ATOMIC_LOADW_UMAX:
11161 return emitAtomicLoadMinMax(MI, MBB, CompareOpcode: SystemZ::CLR, KeepOldMask: SystemZ::CCMASK_CMP_GE);
11162
11163 case SystemZ::ATOMIC_CMP_SWAPW:
11164 return emitAtomicCmpSwapW(MI, MBB);
11165 case SystemZ::MVCImm:
11166 case SystemZ::MVCReg:
11167 return emitMemMemWrapper(MI, MBB, Opcode: SystemZ::MVC);
11168 case SystemZ::NCImm:
11169 return emitMemMemWrapper(MI, MBB, Opcode: SystemZ::NC);
11170 case SystemZ::OCImm:
11171 return emitMemMemWrapper(MI, MBB, Opcode: SystemZ::OC);
11172 case SystemZ::XCImm:
11173 case SystemZ::XCReg:
11174 return emitMemMemWrapper(MI, MBB, Opcode: SystemZ::XC);
11175 case SystemZ::CLCImm:
11176 case SystemZ::CLCReg:
11177 return emitMemMemWrapper(MI, MBB, Opcode: SystemZ::CLC);
11178 case SystemZ::MemsetImmImm:
11179 case SystemZ::MemsetImmReg:
11180 case SystemZ::MemsetRegImm:
11181 case SystemZ::MemsetRegReg:
11182 return emitMemMemWrapper(MI, MBB, Opcode: SystemZ::MVC, IsMemset: true/*IsMemset*/);
11183 case SystemZ::CLSTLoop:
11184 return emitStringWrapper(MI, MBB, Opcode: SystemZ::CLST);
11185 case SystemZ::MVSTLoop:
11186 return emitStringWrapper(MI, MBB, Opcode: SystemZ::MVST);
11187 case SystemZ::SRSTLoop:
11188 return emitStringWrapper(MI, MBB, Opcode: SystemZ::SRST);
11189 case SystemZ::TBEGIN:
11190 return emitTransactionBegin(MI, MBB, Opcode: SystemZ::TBEGIN, NoFloat: false);
11191 case SystemZ::TBEGIN_nofloat:
11192 return emitTransactionBegin(MI, MBB, Opcode: SystemZ::TBEGIN, NoFloat: true);
11193 case SystemZ::TBEGINC:
11194 return emitTransactionBegin(MI, MBB, Opcode: SystemZ::TBEGINC, NoFloat: true);
11195 case SystemZ::LTEBRCompare_Pseudo:
11196 return emitLoadAndTestCmp0(MI, MBB, Opcode: SystemZ::LTEBR);
11197 case SystemZ::LTDBRCompare_Pseudo:
11198 return emitLoadAndTestCmp0(MI, MBB, Opcode: SystemZ::LTDBR);
11199 case SystemZ::LTXBRCompare_Pseudo:
11200 return emitLoadAndTestCmp0(MI, MBB, Opcode: SystemZ::LTXBR);
11201
11202 case SystemZ::PROBED_ALLOCA:
11203 return emitProbedAlloca(MI, MBB);
11204 case SystemZ::EH_SjLj_SetJmp:
11205 return emitEHSjLjSetJmp(MI, MBB);
11206 case SystemZ::EH_SjLj_LongJmp:
11207 return emitEHSjLjLongJmp(MI, MBB);
11208
11209 case TargetOpcode::STACKMAP:
11210 case TargetOpcode::PATCHPOINT:
11211 return emitPatchPoint(MI, MBB);
11212
11213 default:
11214 llvm_unreachable("Unexpected instr type to insert");
11215 }
11216}
11217
11218// This is only used by the isel schedulers, and is needed only to prevent
11219// compiler from crashing when list-ilp is used.
11220const TargetRegisterClass *
11221SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
11222 if (VT == MVT::Untyped)
11223 return &SystemZ::ADDR128BitRegClass;
11224 return TargetLowering::getRepRegClassFor(VT);
11225}
11226
11227SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
11228 SelectionDAG &DAG) const {
11229 SDLoc dl(Op);
11230 /*
11231 The rounding method is in FPC Byte 3 bits 6-7, and has the following
11232 settings:
11233 00 Round to nearest
11234 01 Round to 0
11235 10 Round to +inf
11236 11 Round to -inf
11237
11238 FLT_ROUNDS, on the other hand, expects the following:
11239 -1 Undefined
11240 0 Round to 0
11241 1 Round to nearest
11242 2 Round to +inf
11243 3 Round to -inf
11244 */
11245
11246 // Save FPC to register.
11247 SDValue Chain = Op.getOperand(i: 0);
11248 SDValue EFPC(
11249 DAG.getMachineNode(Opcode: SystemZ::EFPC, dl, ResultTys: {MVT::i32, MVT::Other}, Ops: Chain), 0);
11250 Chain = EFPC.getValue(R: 1);
11251
11252 // Transform as necessary
11253 SDValue CWD1 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: EFPC,
11254 N2: DAG.getConstant(Val: 3, DL: dl, VT: MVT::i32));
11255 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
11256 SDValue CWD2 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i32, N1: CWD1,
11257 N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i32, N1: CWD1,
11258 N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32)));
11259
11260 SDValue RetVal = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i32, N1: CWD2,
11261 N2: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
11262 RetVal = DAG.getZExtOrTrunc(Op: RetVal, DL: dl, VT: Op.getValueType());
11263
11264 return DAG.getMergeValues(Ops: {RetVal, Chain}, dl);
11265}
11266
11267SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
11268 SelectionDAG &DAG) const {
11269 EVT VT = Op.getValueType();
11270 Op = Op.getOperand(i: 0);
11271 EVT OpVT = Op.getValueType();
11272
11273 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");
11274
11275 SDLoc DL(Op);
11276
11277 // load a 0 vector for the third operand of VSUM.
11278 SDValue Zero = DAG.getSplatBuildVector(VT: OpVT, DL, Op: DAG.getConstant(Val: 0, DL, VT));
11279
11280 // execute VSUM.
11281 switch (OpVT.getScalarSizeInBits()) {
11282 case 8:
11283 case 16:
11284 Op = DAG.getNode(Opcode: SystemZISD::VSUM, DL, VT: MVT::v4i32, N1: Op, N2: Zero);
11285 [[fallthrough]];
11286 case 32:
11287 case 64:
11288 Op = DAG.getNode(Opcode: SystemZISD::VSUM, DL, VT: MVT::i128, N1: Op,
11289 N2: DAG.getBitcast(VT: Op.getValueType(), V: Zero));
11290 break;
11291 case 128:
11292 break; // VSUM over v1i128 should not happen and would be a noop
11293 default:
11294 llvm_unreachable("Unexpected scalar size.");
11295 }
11296 // Cast to original vector type, retrieve last element.
11297 return DAG.getNode(
11298 Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT, N1: DAG.getBitcast(VT: OpVT, V: Op),
11299 N2: DAG.getConstant(Val: OpVT.getVectorNumElements() - 1, DL, VT: MVT::i32));
11300}
11301
11302static void printFunctionArgExts(const Function *F, raw_fd_ostream &OS) {
11303 FunctionType *FT = F->getFunctionType();
11304 const AttributeList &Attrs = F->getAttributes();
11305 if (Attrs.hasRetAttrs())
11306 OS << Attrs.getAsString(Index: AttributeList::ReturnIndex) << " ";
11307 OS << *F->getReturnType() << " @" << F->getName() << "(";
11308 for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) {
11309 if (I)
11310 OS << ", ";
11311 OS << *FT->getParamType(i: I);
11312 AttributeSet ArgAttrs = Attrs.getParamAttrs(ArgNo: I);
11313 for (auto A : {Attribute::SExt, Attribute::ZExt, Attribute::NoExt})
11314 if (ArgAttrs.hasAttribute(Kind: A))
11315 OS << " " << Attribute::getNameFromAttrKind(AttrKind: A);
11316 }
11317 OS << ")\n";
11318}
11319
11320bool SystemZTargetLowering::isInternal(const Function *Fn) const {
11321 std::map<const Function *, bool>::iterator Itr = IsInternalCache.find(x: Fn);
11322 if (Itr == IsInternalCache.end())
11323 Itr = IsInternalCache
11324 .insert(x: std::pair<const Function *, bool>(
11325 Fn, (Fn->hasLocalLinkage() && !Fn->hasAddressTaken())))
11326 .first;
11327 return Itr->second;
11328}
11329
11330void SystemZTargetLowering::
11331verifyNarrowIntegerArgs_Call(const SmallVectorImpl<ISD::OutputArg> &Outs,
11332 const Function *F, SDValue Callee) const {
11333 // Temporarily only do the check when explicitly requested, until it can be
11334 // enabled by default.
11335 if (!EnableIntArgExtCheck)
11336 return;
11337
11338 bool IsInternal = false;
11339 const Function *CalleeFn = nullptr;
11340 if (auto *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee))
11341 if ((CalleeFn = dyn_cast<Function>(Val: G->getGlobal())))
11342 IsInternal = isInternal(Fn: CalleeFn);
11343 if (!IsInternal && !verifyNarrowIntegerArgs(Outs)) {
11344 errs() << "ERROR: Missing extension attribute of passed "
11345 << "value in call to function:\n" << "Callee: ";
11346 if (CalleeFn != nullptr)
11347 printFunctionArgExts(F: CalleeFn, OS&: errs());
11348 else
11349 errs() << "-\n";
11350 errs() << "Caller: ";
11351 printFunctionArgExts(F, OS&: errs());
11352 llvm_unreachable("");
11353 }
11354}
11355
11356void SystemZTargetLowering::
11357verifyNarrowIntegerArgs_Ret(const SmallVectorImpl<ISD::OutputArg> &Outs,
11358 const Function *F) const {
11359 // Temporarily only do the check when explicitly requested, until it can be
11360 // enabled by default.
11361 if (!EnableIntArgExtCheck)
11362 return;
11363
11364 if (!isInternal(Fn: F) && !verifyNarrowIntegerArgs(Outs)) {
11365 errs() << "ERROR: Missing extension attribute of returned "
11366 << "value from function:\n";
11367 printFunctionArgExts(F, OS&: errs());
11368 llvm_unreachable("");
11369 }
11370}
11371
11372// Verify that narrow integer arguments are extended as required by the ABI.
11373// Return false if an error is found.
11374bool SystemZTargetLowering::verifyNarrowIntegerArgs(
11375 const SmallVectorImpl<ISD::OutputArg> &Outs) const {
11376 if (!Subtarget.isTargetELF())
11377 return true;
11378
11379 if (EnableIntArgExtCheck.getNumOccurrences()) {
11380 if (!EnableIntArgExtCheck)
11381 return true;
11382 } else if (!getTargetMachine().Options.VerifyArgABICompliance)
11383 return true;
11384
11385 for (unsigned i = 0; i < Outs.size(); ++i) {
11386 MVT VT = Outs[i].VT;
11387 ISD::ArgFlagsTy Flags = Outs[i].Flags;
11388 if (VT.isInteger()) {
11389 assert((VT == MVT::i32 || VT.getSizeInBits() >= 64) &&
11390 "Unexpected integer argument VT.");
11391 if (VT == MVT::i32 &&
11392 !Flags.isSExt() && !Flags.isZExt() && !Flags.isNoExt())
11393 return false;
11394 }
11395 }
11396
11397 return true;
11398}
11399