1//===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the interfaces that VE uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "VEISelLowering.h"
15#include "MCTargetDesc/VEMCAsmInfo.h"
16#include "VECustomDAG.h"
17#include "VEInstrBuilder.h"
18#include "VEMachineFunctionInfo.h"
19#include "VERegisterInfo.h"
20#include "VETargetMachine.h"
21#include "llvm/ADT/StringSwitch.h"
22#include "llvm/CodeGen/CallingConvLower.h"
23#include "llvm/CodeGen/MachineFrameInfo.h"
24#include "llvm/CodeGen/MachineFunction.h"
25#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineJumpTableInfo.h"
27#include "llvm/CodeGen/MachineModuleInfo.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29#include "llvm/CodeGen/SelectionDAG.h"
30#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
31#include "llvm/IR/DerivedTypes.h"
32#include "llvm/IR/Function.h"
33#include "llvm/IR/IRBuilder.h"
34#include "llvm/IR/Module.h"
35#include "llvm/Support/ErrorHandling.h"
36using namespace llvm;
37
38#define DEBUG_TYPE "ve-lower"
39
40//===----------------------------------------------------------------------===//
41// Calling Convention Implementation
42//===----------------------------------------------------------------------===//
43
44#include "VEGenCallingConv.inc"
45
46CCAssignFn *getReturnCC(CallingConv::ID CallConv) {
47 switch (CallConv) {
48 default:
49 return RetCC_VE_C;
50 case CallingConv::Fast:
51 return RetCC_VE_Fast;
52 }
53}
54
55CCAssignFn *getParamCC(CallingConv::ID CallConv, bool IsVarArg) {
56 if (IsVarArg)
57 return CC_VE2;
58 switch (CallConv) {
59 default:
60 return CC_VE_C;
61 case CallingConv::Fast:
62 return CC_VE_Fast;
63 }
64}
65
66bool VETargetLowering::CanLowerReturn(
67 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
68 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
69 const Type *RetTy) const {
70 CCAssignFn *RetCC = getReturnCC(CallConv);
71 SmallVector<CCValAssign, 16> RVLocs;
72 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
73 return CCInfo.CheckReturn(Outs, Fn: RetCC);
74}
75
76static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
77 MVT::v256f32, MVT::v512f32, MVT::v256f64};
78
79static const MVT AllMaskVTs[] = {MVT::v256i1, MVT::v512i1};
80
81static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
82
83void VETargetLowering::initRegisterClasses() {
84 // Set up the register classes.
85 addRegisterClass(VT: MVT::i32, RC: &VE::I32RegClass);
86 addRegisterClass(VT: MVT::i64, RC: &VE::I64RegClass);
87 addRegisterClass(VT: MVT::f32, RC: &VE::F32RegClass);
88 addRegisterClass(VT: MVT::f64, RC: &VE::I64RegClass);
89 addRegisterClass(VT: MVT::f128, RC: &VE::F128RegClass);
90
91 if (Subtarget->enableVPU()) {
92 for (MVT VecVT : AllVectorVTs)
93 addRegisterClass(VT: VecVT, RC: &VE::V64RegClass);
94 addRegisterClass(VT: MVT::v256i1, RC: &VE::VMRegClass);
95 addRegisterClass(VT: MVT::v512i1, RC: &VE::VM512RegClass);
96 }
97}
98
99void VETargetLowering::initSPUActions() {
100 const auto &TM = getTargetMachine();
101 /// Load & Store {
102
103 // VE doesn't have i1 sign extending load.
104 for (MVT VT : MVT::integer_valuetypes()) {
105 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
106 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
107 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
108 setTruncStoreAction(ValVT: VT, MemVT: MVT::i1, Action: Expand);
109 }
110
111 // VE doesn't have floating point extload/truncstore, so expand them.
112 for (MVT FPVT : MVT::fp_valuetypes()) {
113 for (MVT OtherFPVT : MVT::fp_valuetypes()) {
114 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: FPVT, MemVT: OtherFPVT, Action: Expand);
115 setTruncStoreAction(ValVT: FPVT, MemVT: OtherFPVT, Action: Expand);
116 }
117 }
118
119 // VE doesn't have fp128 load/store, so expand them in custom lower.
120 setOperationAction(Op: ISD::LOAD, VT: MVT::f128, Action: Custom);
121 setOperationAction(Op: ISD::STORE, VT: MVT::f128, Action: Custom);
122
123 /// } Load & Store
124
125 // Custom legalize address nodes into LO/HI parts.
126 MVT PtrVT = MVT::getIntegerVT(BitWidth: TM.getPointerSizeInBits(AS: 0));
127 setOperationAction(Op: ISD::BlockAddress, VT: PtrVT, Action: Custom);
128 setOperationAction(Op: ISD::GlobalAddress, VT: PtrVT, Action: Custom);
129 setOperationAction(Op: ISD::GlobalTLSAddress, VT: PtrVT, Action: Custom);
130 setOperationAction(Op: ISD::ConstantPool, VT: PtrVT, Action: Custom);
131 setOperationAction(Op: ISD::JumpTable, VT: PtrVT, Action: Custom);
132
133 /// VAARG handling {
134 setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom);
135 // VAARG needs to be lowered to access with 8 bytes alignment.
136 setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Custom);
137 // Use the default implementation.
138 setOperationAction(Op: ISD::VACOPY, VT: MVT::Other, Action: Expand);
139 setOperationAction(Op: ISD::VAEND, VT: MVT::Other, Action: Expand);
140 /// } VAARG handling
141
142 /// Stack {
143 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i32, Action: Custom);
144 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i64, Action: Custom);
145
146 // Use the default implementation.
147 setOperationAction(Op: ISD::STACKSAVE, VT: MVT::Other, Action: Expand);
148 setOperationAction(Op: ISD::STACKRESTORE, VT: MVT::Other, Action: Expand);
149 /// } Stack
150
151 /// Branch {
152
153 // VE doesn't have BRCOND
154 setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Expand);
155
156 // BR_JT is not implemented yet.
157 setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
158
159 /// } Branch
160
161 /// Int Ops {
162 for (MVT IntVT : {MVT::i32, MVT::i64}) {
163 // VE has no REM or DIVREM operations.
164 setOperationAction(Op: ISD::UREM, VT: IntVT, Action: Expand);
165 setOperationAction(Op: ISD::SREM, VT: IntVT, Action: Expand);
166 setOperationAction(Op: ISD::SDIVREM, VT: IntVT, Action: Expand);
167 setOperationAction(Op: ISD::UDIVREM, VT: IntVT, Action: Expand);
168
169 // VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
170 setOperationAction(Op: ISD::SHL_PARTS, VT: IntVT, Action: Expand);
171 setOperationAction(Op: ISD::SRA_PARTS, VT: IntVT, Action: Expand);
172 setOperationAction(Op: ISD::SRL_PARTS, VT: IntVT, Action: Expand);
173
174 // VE has no MULHU/S or U/SMUL_LOHI operations.
175 // TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
176 setOperationAction(Op: ISD::MULHU, VT: IntVT, Action: Expand);
177 setOperationAction(Op: ISD::MULHS, VT: IntVT, Action: Expand);
178 setOperationAction(Op: ISD::UMUL_LOHI, VT: IntVT, Action: Expand);
179 setOperationAction(Op: ISD::SMUL_LOHI, VT: IntVT, Action: Expand);
180
181 // VE has no CTTZ, ROTL, ROTR operations.
182 setOperationAction(Op: ISD::CTTZ, VT: IntVT, Action: Expand);
183 setOperationAction(Op: ISD::ROTL, VT: IntVT, Action: Expand);
184 setOperationAction(Op: ISD::ROTR, VT: IntVT, Action: Expand);
185
186 // VE has 64 bits instruction which works as i64 BSWAP operation. This
187 // instruction works fine as i32 BSWAP operation with an additional
188 // parameter. Use isel patterns to lower BSWAP.
189 setOperationAction(Op: ISD::BSWAP, VT: IntVT, Action: Legal);
190
191 // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
192 // operations. Use isel patterns for i64, promote for i32.
193 LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
194 setOperationAction(Op: ISD::BITREVERSE, VT: IntVT, Action: Act);
195 setOperationAction(Op: ISD::CTLZ, VT: IntVT, Action: Act);
196 setOperationAction(Op: ISD::CTLZ_ZERO_UNDEF, VT: IntVT, Action: Act);
197 setOperationAction(Op: ISD::CTPOP, VT: IntVT, Action: Act);
198
199 // VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
200 // Use isel patterns for i64, promote for i32.
201 setOperationAction(Op: ISD::AND, VT: IntVT, Action: Act);
202 setOperationAction(Op: ISD::OR, VT: IntVT, Action: Act);
203 setOperationAction(Op: ISD::XOR, VT: IntVT, Action: Act);
204
205 // Legal smax and smin
206 setOperationAction(Op: ISD::SMAX, VT: IntVT, Action: Legal);
207 setOperationAction(Op: ISD::SMIN, VT: IntVT, Action: Legal);
208 }
209 /// } Int Ops
210
211 /// Conversion {
212 // VE doesn't have instructions for fp<->uint, so expand them by llvm
213 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Promote); // use i64
214 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i32, Action: Promote); // use i64
215 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i64, Action: Expand);
216 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Expand);
217
218 // fp16 not supported
219 for (MVT FPVT : MVT::fp_valuetypes()) {
220 setOperationAction(Op: ISD::FP16_TO_FP, VT: FPVT, Action: Expand);
221 setOperationAction(Op: ISD::FP_TO_FP16, VT: FPVT, Action: Expand);
222 }
223 /// } Conversion
224
225 /// Floating-point Ops {
226 /// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
227 /// and fcmp.
228
229 // VE doesn't have following floating point operations.
230 for (MVT VT : MVT::fp_valuetypes()) {
231 setOperationAction(Op: ISD::FNEG, VT, Action: Expand);
232 setOperationAction(Op: ISD::FREM, VT, Action: Expand);
233 }
234
235 // VE doesn't have fdiv of f128.
236 setOperationAction(Op: ISD::FDIV, VT: MVT::f128, Action: Expand);
237
238 for (MVT FPVT : {MVT::f32, MVT::f64}) {
239 // f32 and f64 uses ConstantFP. f128 uses ConstantPool.
240 setOperationAction(Op: ISD::ConstantFP, VT: FPVT, Action: Legal);
241 }
242 /// } Floating-point Ops
243
244 /// Floating-point math functions {
245
246 // VE doesn't have following floating point math functions.
247 for (MVT VT : MVT::fp_valuetypes()) {
248 setOperationAction(Op: ISD::FABS, VT, Action: Expand);
249 setOperationAction(Op: ISD::FCOPYSIGN, VT, Action: Expand);
250 setOperationAction(Op: ISD::FCOS, VT, Action: Expand);
251 setOperationAction(Op: ISD::FMA, VT, Action: Expand);
252 setOperationAction(Op: ISD::FPOW, VT, Action: Expand);
253 setOperationAction(Op: ISD::FSIN, VT, Action: Expand);
254 setOperationAction(Op: ISD::FSQRT, VT, Action: Expand);
255 }
256
257 // VE has single and double FMINNUM and FMAXNUM
258 for (MVT VT : {MVT::f32, MVT::f64}) {
259 setOperationAction(Ops: {ISD::FMAXNUM, ISD::FMINNUM}, VT, Action: Legal);
260 }
261
262 /// } Floating-point math functions
263
264 /// Atomic instructions {
265
266 setMaxAtomicSizeInBitsSupported(64);
267 setMinCmpXchgSizeInBits(32);
268 setSupportsUnalignedAtomics(false);
269
270 // Use custom inserter for ATOMIC_FENCE.
271 setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom);
272
273 // Other atomic instructions.
274 for (MVT VT : MVT::integer_valuetypes()) {
275 // Support i8/i16 atomic swap.
276 setOperationAction(Op: ISD::ATOMIC_SWAP, VT, Action: Custom);
277
278 // FIXME: Support "atmam" instructions.
279 setOperationAction(Op: ISD::ATOMIC_LOAD_ADD, VT, Action: Expand);
280 setOperationAction(Op: ISD::ATOMIC_LOAD_SUB, VT, Action: Expand);
281 setOperationAction(Op: ISD::ATOMIC_LOAD_AND, VT, Action: Expand);
282 setOperationAction(Op: ISD::ATOMIC_LOAD_OR, VT, Action: Expand);
283
284 // VE doesn't have follwing instructions.
285 setOperationAction(Op: ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Action: Expand);
286 setOperationAction(Op: ISD::ATOMIC_LOAD_CLR, VT, Action: Expand);
287 setOperationAction(Op: ISD::ATOMIC_LOAD_XOR, VT, Action: Expand);
288 setOperationAction(Op: ISD::ATOMIC_LOAD_NAND, VT, Action: Expand);
289 setOperationAction(Op: ISD::ATOMIC_LOAD_MIN, VT, Action: Expand);
290 setOperationAction(Op: ISD::ATOMIC_LOAD_MAX, VT, Action: Expand);
291 setOperationAction(Op: ISD::ATOMIC_LOAD_UMIN, VT, Action: Expand);
292 setOperationAction(Op: ISD::ATOMIC_LOAD_UMAX, VT, Action: Expand);
293 }
294
295 /// } Atomic instructions
296
297 /// SJLJ instructions {
298 setOperationAction(Op: ISD::EH_SJLJ_LONGJMP, VT: MVT::Other, Action: Custom);
299 setOperationAction(Op: ISD::EH_SJLJ_SETJMP, VT: MVT::i32, Action: Custom);
300 setOperationAction(Op: ISD::EH_SJLJ_SETUP_DISPATCH, VT: MVT::Other, Action: Custom);
301 /// } SJLJ instructions
302
303 // Intrinsic instructions
304 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
305}
306
307void VETargetLowering::initVPUActions() {
308 for (MVT LegalMaskVT : AllMaskVTs)
309 setOperationAction(Op: ISD::BUILD_VECTOR, VT: LegalMaskVT, Action: Custom);
310
311 for (unsigned Opc : {ISD::AND, ISD::OR, ISD::XOR})
312 setOperationAction(Op: Opc, VT: MVT::v512i1, Action: Custom);
313
314 for (MVT LegalVecVT : AllVectorVTs) {
315 setOperationAction(Op: ISD::BUILD_VECTOR, VT: LegalVecVT, Action: Custom);
316 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: LegalVecVT, Action: Legal);
317 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: LegalVecVT, Action: Legal);
318 // Translate all vector instructions with legal element types to VVP_*
319 // nodes.
320 // TODO We will custom-widen into VVP_* nodes in the future. While we are
321 // buildling the infrastructure for this, we only do this for legal vector
322 // VTs.
323#define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME) \
324 setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
325#define ADD_VVP_OP(VVP_NAME, ISD_NAME) \
326 setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
327 setOperationAction(Op: ISD::EXPERIMENTAL_VP_STRIDED_LOAD, VT: LegalVecVT, Action: Custom);
328 setOperationAction(Op: ISD::EXPERIMENTAL_VP_STRIDED_STORE, VT: LegalVecVT, Action: Custom);
329#include "VVPNodes.def"
330 }
331
332 for (MVT LegalPackedVT : AllPackedVTs) {
333 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: LegalPackedVT, Action: Custom);
334 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: LegalPackedVT, Action: Custom);
335 }
336
337 // vNt32, vNt64 ops (legal element types)
338 for (MVT VT : MVT::vector_valuetypes()) {
339 MVT ElemVT = VT.getVectorElementType();
340 unsigned ElemBits = ElemVT.getScalarSizeInBits();
341 if (ElemBits != 32 && ElemBits != 64)
342 continue;
343
344 for (unsigned MemOpc : {ISD::MLOAD, ISD::MSTORE, ISD::LOAD, ISD::STORE})
345 setOperationAction(Op: MemOpc, VT, Action: Custom);
346
347 const ISD::NodeType IntReductionOCs[] = {
348 ISD::VECREDUCE_ADD, ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND,
349 ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMIN,
350 ISD::VECREDUCE_SMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_UMAX};
351
352 for (unsigned IntRedOpc : IntReductionOCs)
353 setOperationAction(Op: IntRedOpc, VT, Action: Custom);
354 }
355
356 // v256i1 and v512i1 ops
357 for (MVT MaskVT : AllMaskVTs) {
358 // Custom lower mask ops
359 setOperationAction(Op: ISD::STORE, VT: MaskVT, Action: Custom);
360 setOperationAction(Op: ISD::LOAD, VT: MaskVT, Action: Custom);
361 }
362}
363
364SDValue
365VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
366 bool IsVarArg,
367 const SmallVectorImpl<ISD::OutputArg> &Outs,
368 const SmallVectorImpl<SDValue> &OutVals,
369 const SDLoc &DL, SelectionDAG &DAG) const {
370 // CCValAssign - represent the assignment of the return value to locations.
371 SmallVector<CCValAssign, 16> RVLocs;
372
373 // CCState - Info about the registers and stack slot.
374 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
375 *DAG.getContext());
376
377 // Analyze return values.
378 CCInfo.AnalyzeReturn(Outs, Fn: getReturnCC(CallConv));
379
380 SDValue Glue;
381 SmallVector<SDValue, 4> RetOps(1, Chain);
382
383 // Copy the result values into the output registers.
384 for (unsigned i = 0; i != RVLocs.size(); ++i) {
385 CCValAssign &VA = RVLocs[i];
386 assert(VA.isRegLoc() && "Can only return in registers!");
387 assert(!VA.needsCustom() && "Unexpected custom lowering");
388 SDValue OutVal = OutVals[i];
389
390 // Integer return values must be sign or zero extended by the callee.
391 switch (VA.getLocInfo()) {
392 case CCValAssign::Full:
393 break;
394 case CCValAssign::SExt:
395 OutVal = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: VA.getLocVT(), Operand: OutVal);
396 break;
397 case CCValAssign::ZExt:
398 OutVal = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: VA.getLocVT(), Operand: OutVal);
399 break;
400 case CCValAssign::AExt:
401 OutVal = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: VA.getLocVT(), Operand: OutVal);
402 break;
403 case CCValAssign::BCvt: {
404 // Convert a float return value to i64 with padding.
405 // 63 31 0
406 // +------+------+
407 // | float| 0 |
408 // +------+------+
409 assert(VA.getLocVT() == MVT::i64);
410 assert(VA.getValVT() == MVT::f32);
411 SDValue Undef = SDValue(
412 DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::i64), 0);
413 SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
414 OutVal = SDValue(DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL,
415 VT: MVT::i64, Op1: Undef, Op2: OutVal, Op3: Sub_f32),
416 0);
417 break;
418 }
419 default:
420 llvm_unreachable("Unknown loc info!");
421 }
422
423 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: OutVal, Glue);
424
425 // Guarantee that all emitted copies are stuck together with flags.
426 Glue = Chain.getValue(R: 1);
427 RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
428 }
429
430 RetOps[0] = Chain; // Update chain.
431
432 // Add the glue if we have it.
433 if (Glue.getNode())
434 RetOps.push_back(Elt: Glue);
435
436 return DAG.getNode(Opcode: VEISD::RET_GLUE, DL, VT: MVT::Other, Ops: RetOps);
437}
438
439SDValue VETargetLowering::LowerFormalArguments(
440 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
441 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
442 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
443 MachineFunction &MF = DAG.getMachineFunction();
444
445 // Get the base offset of the incoming arguments stack space.
446 unsigned ArgsBaseOffset = Subtarget->getRsaSize();
447 // Get the size of the preserved arguments area
448 unsigned ArgsPreserved = 64;
449
450 // Analyze arguments according to CC_VE.
451 SmallVector<CCValAssign, 16> ArgLocs;
452 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
453 *DAG.getContext());
454 // Allocate the preserved area first.
455 CCInfo.AllocateStack(Size: ArgsPreserved, Alignment: Align(8));
456 // We already allocated the preserved area, so the stack offset computed
457 // by CC_VE would be correct now.
458 CCInfo.AnalyzeFormalArguments(Ins, Fn: getParamCC(CallConv, IsVarArg: false));
459
460 for (const CCValAssign &VA : ArgLocs) {
461 assert(!VA.needsCustom() && "Unexpected custom lowering");
462 if (VA.isRegLoc()) {
463 // This argument is passed in a register.
464 // All integer register arguments are promoted by the caller to i64.
465
466 // Create a virtual register for the promoted live-in value.
467 Register VReg =
468 MF.addLiveIn(PReg: VA.getLocReg(), RC: getRegClassFor(VT: VA.getLocVT()));
469 SDValue Arg = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: VA.getLocVT());
470
471 // The caller promoted the argument, so insert an Assert?ext SDNode so we
472 // won't promote the value again in this function.
473 switch (VA.getLocInfo()) {
474 case CCValAssign::SExt:
475 Arg = DAG.getNode(Opcode: ISD::AssertSext, DL, VT: VA.getLocVT(), N1: Arg,
476 N2: DAG.getValueType(VA.getValVT()));
477 break;
478 case CCValAssign::ZExt:
479 Arg = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: VA.getLocVT(), N1: Arg,
480 N2: DAG.getValueType(VA.getValVT()));
481 break;
482 case CCValAssign::BCvt: {
483 // Extract a float argument from i64 with padding.
484 // 63 31 0
485 // +------+------+
486 // | float| 0 |
487 // +------+------+
488 assert(VA.getLocVT() == MVT::i64);
489 assert(VA.getValVT() == MVT::f32);
490 SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
491 Arg = SDValue(DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL,
492 VT: MVT::f32, Op1: Arg, Op2: Sub_f32),
493 0);
494 break;
495 }
496 default:
497 break;
498 }
499
500 // Truncate the register down to the argument type.
501 if (VA.isExtInLoc())
502 Arg = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VA.getValVT(), Operand: Arg);
503
504 InVals.push_back(Elt: Arg);
505 continue;
506 }
507
508 // The registers are exhausted. This argument was passed on the stack.
509 assert(VA.isMemLoc());
510 // The CC_VE_Full/Half functions compute stack offsets relative to the
511 // beginning of the arguments area at %fp + the size of reserved area.
512 unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
513 unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
514
515 // Adjust offset for a float argument by adding 4 since the argument is
516 // stored in 8 bytes buffer with offset like below. LLVM generates
517 // 4 bytes load instruction, so need to adjust offset here. This
518 // adjustment is required in only LowerFormalArguments. In LowerCall,
519 // a float argument is converted to i64 first, and stored as 8 bytes
520 // data, which is required by ABI, so no need for adjustment.
521 // 0 4
522 // +------+------+
523 // | empty| float|
524 // +------+------+
525 if (VA.getValVT() == MVT::f32)
526 Offset += 4;
527
528 int FI = MF.getFrameInfo().CreateFixedObject(Size: ValSize, SPOffset: Offset, IsImmutable: true);
529 InVals.push_back(
530 Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain,
531 Ptr: DAG.getFrameIndex(FI, VT: getPointerTy(DL: MF.getDataLayout())),
532 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
533 }
534
535 if (!IsVarArg)
536 return Chain;
537
538 // This function takes variable arguments, some of which may have been passed
539 // in registers %s0-%s8.
540 //
541 // The va_start intrinsic needs to know the offset to the first variable
542 // argument.
543 // TODO: need to calculate offset correctly once we support f128.
544 unsigned ArgOffset = ArgLocs.size() * 8;
545 VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
546 // Skip the reserved area at the top of stack.
547 FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
548
549 return Chain;
550}
551
552// FIXME? Maybe this could be a TableGen attribute on some registers and
553// this table could be generated automatically from RegInfo.
554Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
555 const MachineFunction &MF) const {
556 Register Reg = StringSwitch<Register>(RegName)
557 .Case(S: "sp", Value: VE::SX11) // Stack pointer
558 .Case(S: "fp", Value: VE::SX9) // Frame pointer
559 .Case(S: "sl", Value: VE::SX8) // Stack limit
560 .Case(S: "lr", Value: VE::SX10) // Link register
561 .Case(S: "tp", Value: VE::SX14) // Thread pointer
562 .Case(S: "outer", Value: VE::SX12) // Outer regiser
563 .Case(S: "info", Value: VE::SX17) // Info area register
564 .Case(S: "got", Value: VE::SX15) // Global offset table register
565 .Case(S: "plt", Value: VE::SX16) // Procedure linkage table register
566 .Default(Value: Register());
567 return Reg;
568}
569
570//===----------------------------------------------------------------------===//
571// TargetLowering Implementation
572//===----------------------------------------------------------------------===//
573
574SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
575 SmallVectorImpl<SDValue> &InVals) const {
576 SelectionDAG &DAG = CLI.DAG;
577 SDLoc DL = CLI.DL;
578 SDValue Chain = CLI.Chain;
579 auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
580
581 // VE target does not yet support tail call optimization.
582 CLI.IsTailCall = false;
583
584 // Get the base offset of the outgoing arguments stack space.
585 unsigned ArgsBaseOffset = Subtarget->getRsaSize();
586 // Get the size of the preserved arguments area
587 unsigned ArgsPreserved = 8 * 8u;
588
589 // Analyze operands of the call, assigning locations to each operand.
590 SmallVector<CCValAssign, 16> ArgLocs;
591 CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
592 *DAG.getContext());
593 // Allocate the preserved area first.
594 CCInfo.AllocateStack(Size: ArgsPreserved, Alignment: Align(8));
595 // We already allocated the preserved area, so the stack offset computed
596 // by CC_VE would be correct now.
597 CCInfo.AnalyzeCallOperands(Outs: CLI.Outs, Fn: getParamCC(CallConv: CLI.CallConv, IsVarArg: false));
598
599 // VE requires to use both register and stack for varargs or no-prototyped
600 // functions.
601 bool UseBoth = CLI.IsVarArg;
602
603 // Analyze operands again if it is required to store BOTH.
604 SmallVector<CCValAssign, 16> ArgLocs2;
605 CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
606 ArgLocs2, *DAG.getContext());
607 if (UseBoth)
608 CCInfo2.AnalyzeCallOperands(Outs: CLI.Outs, Fn: getParamCC(CallConv: CLI.CallConv, IsVarArg: true));
609
610 // Get the size of the outgoing arguments stack space requirement.
611 unsigned ArgsSize = CCInfo.getStackSize();
612
613 // Keep stack frames 16-byte aligned.
614 ArgsSize = alignTo(Value: ArgsSize, Align: 16);
615
616 // Adjust the stack pointer to make room for the arguments.
617 // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
618 // with more than 6 arguments.
619 Chain = DAG.getCALLSEQ_START(Chain, InSize: ArgsSize, OutSize: 0, DL);
620
621 // Collect the set of registers to pass to the function and their values.
622 // This will be emitted as a sequence of CopyToReg nodes glued to the call
623 // instruction.
624 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
625
626 // Collect chains from all the memory opeations that copy arguments to the
627 // stack. They must follow the stack pointer adjustment above and precede the
628 // call instruction itself.
629 SmallVector<SDValue, 8> MemOpChains;
630
631 // VE needs to get address of callee function in a register
632 // So, prepare to copy it to SX12 here.
633
634 // If the callee is a GlobalAddress node (quite common, every direct call is)
635 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
636 // Likewise ExternalSymbol -> TargetExternalSymbol.
637 SDValue Callee = CLI.Callee;
638
639 bool IsPICCall = isPositionIndependent();
640
641 // PC-relative references to external symbols should go through $stub.
642 // If so, we need to prepare GlobalBaseReg first.
643 const TargetMachine &TM = DAG.getTarget();
644 const GlobalValue *GV = nullptr;
645 auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Val&: Callee);
646 if (CalleeG)
647 GV = CalleeG->getGlobal();
648 bool Local = TM.shouldAssumeDSOLocal(GV);
649 bool UsePlt = !Local;
650 MachineFunction &MF = DAG.getMachineFunction();
651
652 // Turn GlobalAddress/ExternalSymbol node into a value node
653 // containing the address of them here.
654 if (CalleeG) {
655 if (IsPICCall) {
656 if (UsePlt)
657 Subtarget->getInstrInfo()->getGlobalBaseReg(MF: &MF);
658 Callee = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: 0, TargetFlags: 0);
659 Callee = DAG.getNode(Opcode: VEISD::GETFUNPLT, DL, VT: PtrVT, Operand: Callee);
660 } else {
661 Callee = makeHiLoPair(Op: Callee, HiTF: VE::S_HI32, LoTF: VE::S_LO32, DAG);
662 }
663 } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
664 if (IsPICCall) {
665 if (UsePlt)
666 Subtarget->getInstrInfo()->getGlobalBaseReg(MF: &MF);
667 Callee = DAG.getTargetExternalSymbol(Sym: E->getSymbol(), VT: PtrVT, TargetFlags: 0);
668 Callee = DAG.getNode(Opcode: VEISD::GETFUNPLT, DL, VT: PtrVT, Operand: Callee);
669 } else {
670 Callee = makeHiLoPair(Op: Callee, HiTF: VE::S_HI32, LoTF: VE::S_LO32, DAG);
671 }
672 }
673
674 RegsToPass.push_back(Elt: std::make_pair(x: VE::SX12, y&: Callee));
675
676 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
677 CCValAssign &VA = ArgLocs[i];
678 SDValue Arg = CLI.OutVals[i];
679
680 // Promote the value if needed.
681 switch (VA.getLocInfo()) {
682 default:
683 llvm_unreachable("Unknown location info!");
684 case CCValAssign::Full:
685 break;
686 case CCValAssign::SExt:
687 Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: VA.getLocVT(), Operand: Arg);
688 break;
689 case CCValAssign::ZExt:
690 Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: VA.getLocVT(), Operand: Arg);
691 break;
692 case CCValAssign::AExt:
693 Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: VA.getLocVT(), Operand: Arg);
694 break;
695 case CCValAssign::BCvt: {
696 // Convert a float argument to i64 with padding.
697 // 63 31 0
698 // +------+------+
699 // | float| 0 |
700 // +------+------+
701 assert(VA.getLocVT() == MVT::i64);
702 assert(VA.getValVT() == MVT::f32);
703 SDValue Undef = SDValue(
704 DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::i64), 0);
705 SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
706 Arg = SDValue(DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL,
707 VT: MVT::i64, Op1: Undef, Op2: Arg, Op3: Sub_f32),
708 0);
709 break;
710 }
711 }
712
713 if (VA.isRegLoc()) {
714 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
715 if (!UseBoth)
716 continue;
717 VA = ArgLocs2[i];
718 }
719
720 assert(VA.isMemLoc());
721
722 // Create a store off the stack pointer for this argument.
723 SDValue StackPtr = DAG.getRegister(Reg: VE::SX11, VT: PtrVT);
724 // The argument area starts at %fp/%sp + the size of reserved area.
725 SDValue PtrOff =
726 DAG.getIntPtrConstant(Val: VA.getLocMemOffset() + ArgsBaseOffset, DL);
727 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: PtrOff);
728 MemOpChains.push_back(
729 Elt: DAG.getStore(Chain, dl: DL, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo()));
730 }
731
732 // Emit all stores, make sure they occur before the call.
733 if (!MemOpChains.empty())
734 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOpChains);
735
736 // Build a sequence of CopyToReg nodes glued together with token chain and
737 // glue operands which copy the outgoing args into registers. The InGlue is
738 // necessary since all emitted instructions must be stuck together in order
739 // to pass the live physical registers.
740 SDValue InGlue;
741 for (const auto &[Reg, N] : RegsToPass) {
742 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg, N, Glue: InGlue);
743 InGlue = Chain.getValue(R: 1);
744 }
745
746 // Build the operands for the call instruction itself.
747 SmallVector<SDValue, 8> Ops;
748 Ops.push_back(Elt: Chain);
749 for (const auto &[Reg, N] : RegsToPass)
750 Ops.push_back(Elt: DAG.getRegister(Reg, VT: N.getValueType()));
751
752 // Add a register mask operand representing the call-preserved registers.
753 const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
754 const uint32_t *Mask =
755 TRI->getCallPreservedMask(MF: DAG.getMachineFunction(), CC: CLI.CallConv);
756 assert(Mask && "Missing call preserved mask for calling convention");
757 Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
758
759 // Make sure the CopyToReg nodes are glued to the call instruction which
760 // consumes the registers.
761 if (InGlue.getNode())
762 Ops.push_back(Elt: InGlue);
763
764 // Now the call itself.
765 SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
766 Chain = DAG.getNode(Opcode: VEISD::CALL, DL, VTList: NodeTys, Ops);
767 InGlue = Chain.getValue(R: 1);
768
769 // Revert the stack pointer immediately after the call.
770 Chain = DAG.getCALLSEQ_END(Chain, Size1: ArgsSize, Size2: 0, Glue: InGlue, DL);
771 InGlue = Chain.getValue(R: 1);
772
773 // Now extract the return values. This is more or less the same as
774 // LowerFormalArguments.
775
776 // Assign locations to each value returned by this call.
777 SmallVector<CCValAssign, 16> RVLocs;
778 CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
779 *DAG.getContext());
780
781 // Set inreg flag manually for codegen generated library calls that
782 // return float.
783 if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
784 CLI.Ins[0].Flags.setInReg();
785
786 RVInfo.AnalyzeCallResult(Ins: CLI.Ins, Fn: getReturnCC(CallConv: CLI.CallConv));
787
788 // Copy all of the result registers out of their specified physreg.
789 for (unsigned i = 0; i != RVLocs.size(); ++i) {
790 CCValAssign &VA = RVLocs[i];
791 assert(!VA.needsCustom() && "Unexpected custom lowering");
792 Register Reg = VA.getLocReg();
793
794 // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
795 // reside in the same register in the high and low bits. Reuse the
796 // CopyFromReg previous node to avoid duplicate copies.
797 SDValue RV;
798 if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Val: Chain.getOperand(i: 1)))
799 if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
800 RV = Chain.getValue(R: 0);
801
802 // But usually we'll create a new CopyFromReg for a different register.
803 if (!RV.getNode()) {
804 RV = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: RVLocs[i].getLocVT(), Glue: InGlue);
805 Chain = RV.getValue(R: 1);
806 InGlue = Chain.getValue(R: 2);
807 }
808
809 // The callee promoted the return value, so insert an Assert?ext SDNode so
810 // we won't promote the value again in this function.
811 switch (VA.getLocInfo()) {
812 case CCValAssign::SExt:
813 RV = DAG.getNode(Opcode: ISD::AssertSext, DL, VT: VA.getLocVT(), N1: RV,
814 N2: DAG.getValueType(VA.getValVT()));
815 break;
816 case CCValAssign::ZExt:
817 RV = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: VA.getLocVT(), N1: RV,
818 N2: DAG.getValueType(VA.getValVT()));
819 break;
820 case CCValAssign::BCvt: {
821 // Extract a float return value from i64 with padding.
822 // 63 31 0
823 // +------+------+
824 // | float| 0 |
825 // +------+------+
826 assert(VA.getLocVT() == MVT::i64);
827 assert(VA.getValVT() == MVT::f32);
828 SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
829 RV = SDValue(DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL,
830 VT: MVT::f32, Op1: RV, Op2: Sub_f32),
831 0);
832 break;
833 }
834 default:
835 break;
836 }
837
838 // Truncate the register down to the return value type.
839 if (VA.isExtInLoc())
840 RV = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VA.getValVT(), Operand: RV);
841
842 InVals.push_back(Elt: RV);
843 }
844
845 return Chain;
846}
847
848bool VETargetLowering::isOffsetFoldingLegal(
849 const GlobalAddressSDNode *GA) const {
850 // VE uses 64 bit addressing, so we need multiple instructions to generate
851 // an address. Folding address with offset increases the number of
852 // instructions, so that we disable it here. Offsets will be folded in
853 // the DAG combine later if it worth to do so.
854 return false;
855}
856
857/// isFPImmLegal - Returns true if the target can instruction select the
858/// specified FP immediate natively. If false, the legalizer will
859/// materialize the FP immediate as a load from a constant pool.
860bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
861 bool ForCodeSize) const {
862 return VT == MVT::f32 || VT == MVT::f64;
863}
864
865/// Determine if the target supports unaligned memory accesses.
866///
867/// This function returns true if the target allows unaligned memory accesses
868/// of the specified type in the given address space. If true, it also returns
869/// whether the unaligned memory access is "fast" in the last argument by
870/// reference. This is used, for example, in situations where an array
871/// copy/move/set is converted to a sequence of store operations. Its use
872/// helps to ensure that such replacements don't generate code that causes an
873/// alignment error (trap) on the target machine.
874bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
875 unsigned AddrSpace,
876 Align A,
877 MachineMemOperand::Flags,
878 unsigned *Fast) const {
879 if (Fast) {
880 // It's fast anytime on VE
881 *Fast = 1;
882 }
883 return true;
884}
885
886VETargetLowering::VETargetLowering(const TargetMachine &TM,
887 const VESubtarget &STI)
888 : TargetLowering(TM), Subtarget(&STI) {
889 // Instructions which use registers as conditionals examine all the
890 // bits (as does the pseudo SELECT_CC expansion). I don't think it
891 // matters much whether it's ZeroOrOneBooleanContent, or
892 // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
893 // former.
894 setBooleanContents(ZeroOrOneBooleanContent);
895 setBooleanVectorContents(ZeroOrOneBooleanContent);
896
897 initRegisterClasses();
898 initSPUActions();
899 initVPUActions();
900
901 setStackPointerRegisterToSaveRestore(VE::SX11);
902
903 // We have target-specific dag combine patterns for the following nodes:
904 setTargetDAGCombine(ISD::TRUNCATE);
905 setTargetDAGCombine(ISD::SELECT);
906 setTargetDAGCombine(ISD::SELECT_CC);
907
908 // Set function alignment to 16 bytes
909 setMinFunctionAlignment(Align(16));
910
911 // VE stores all argument by 8 bytes alignment
912 setMinStackArgumentAlignment(Align(8));
913
914 computeRegisterProperties(TRI: Subtarget->getRegisterInfo());
915}
916
917const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
918#define TARGET_NODE_CASE(NAME) \
919 case VEISD::NAME: \
920 return "VEISD::" #NAME;
921 switch ((VEISD::NodeType)Opcode) {
922 case VEISD::FIRST_NUMBER:
923 break;
924 TARGET_NODE_CASE(CMPI)
925 TARGET_NODE_CASE(CMPU)
926 TARGET_NODE_CASE(CMPF)
927 TARGET_NODE_CASE(CMPQ)
928 TARGET_NODE_CASE(CMOV)
929 TARGET_NODE_CASE(CALL)
930 TARGET_NODE_CASE(EH_SJLJ_LONGJMP)
931 TARGET_NODE_CASE(EH_SJLJ_SETJMP)
932 TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH)
933 TARGET_NODE_CASE(GETFUNPLT)
934 TARGET_NODE_CASE(GETSTACKTOP)
935 TARGET_NODE_CASE(GETTLSADDR)
936 TARGET_NODE_CASE(GLOBAL_BASE_REG)
937 TARGET_NODE_CASE(Hi)
938 TARGET_NODE_CASE(Lo)
939 TARGET_NODE_CASE(RET_GLUE)
940 TARGET_NODE_CASE(TS1AM)
941 TARGET_NODE_CASE(VEC_UNPACK_LO)
942 TARGET_NODE_CASE(VEC_UNPACK_HI)
943 TARGET_NODE_CASE(VEC_PACK)
944 TARGET_NODE_CASE(VEC_BROADCAST)
945 TARGET_NODE_CASE(REPL_I32)
946 TARGET_NODE_CASE(REPL_F32)
947
948 TARGET_NODE_CASE(LEGALAVL)
949
950 // Register the VVP_* SDNodes.
951#define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
952#include "VVPNodes.def"
953 }
954#undef TARGET_NODE_CASE
955 return nullptr;
956}
957
958EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
959 EVT VT) const {
960 return MVT::i32;
961}
962
963// Convert to a target node and set target flags.
964SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
965 SelectionDAG &DAG) const {
966 if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op))
967 return DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: SDLoc(GA),
968 VT: GA->getValueType(ResNo: 0), offset: GA->getOffset(), TargetFlags: TF);
969
970 if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Val&: Op))
971 return DAG.getTargetBlockAddress(BA: BA->getBlockAddress(), VT: Op.getValueType(),
972 Offset: 0, TargetFlags: TF);
973
974 if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Val&: Op))
975 return DAG.getTargetConstantPool(C: CP->getConstVal(), VT: CP->getValueType(ResNo: 0),
976 Align: CP->getAlign(), Offset: CP->getOffset(), TargetFlags: TF);
977
978 if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Val&: Op))
979 return DAG.getTargetExternalSymbol(Sym: ES->getSymbol(), VT: ES->getValueType(ResNo: 0),
980 TargetFlags: TF);
981
982 if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Val&: Op))
983 return DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: JT->getValueType(ResNo: 0), TargetFlags: TF);
984
985 llvm_unreachable("Unhandled address SDNode");
986}
987
988// Split Op into high and low parts according to HiTF and LoTF.
989// Return an ADD node combining the parts.
990SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
991 SelectionDAG &DAG) const {
992 SDLoc DL(Op);
993 EVT VT = Op.getValueType();
994 SDValue Hi = DAG.getNode(Opcode: VEISD::Hi, DL, VT, Operand: withTargetFlags(Op, TF: HiTF, DAG));
995 SDValue Lo = DAG.getNode(Opcode: VEISD::Lo, DL, VT, Operand: withTargetFlags(Op, TF: LoTF, DAG));
996 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Hi, N2: Lo);
997}
998
999// Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
1000// or ExternalSymbol SDNode.
1001SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
1002 SDLoc DL(Op);
1003 EVT PtrVT = Op.getValueType();
1004
1005 // Handle PIC mode first. VE needs a got load for every variable!
1006 if (isPositionIndependent()) {
1007 auto GlobalN = dyn_cast<GlobalAddressSDNode>(Val&: Op);
1008
1009 if (isa<ConstantPoolSDNode>(Val: Op) || isa<JumpTableSDNode>(Val: Op) ||
1010 (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
1011 // Create following instructions for local linkage PIC code.
1012 // lea %reg, label@gotoff_lo
1013 // and %reg, %reg, (32)0
1014 // lea.sl %reg, label@gotoff_hi(%reg, %got)
1015 SDValue HiLo =
1016 makeHiLoPair(Op, HiTF: VE::S_GOTOFF_HI32, LoTF: VE::S_GOTOFF_LO32, DAG);
1017 SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT: PtrVT);
1018 return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: GlobalBase, N2: HiLo);
1019 }
1020 // Create following instructions for not local linkage PIC code.
1021 // lea %reg, label@got_lo
1022 // and %reg, %reg, (32)0
1023 // lea.sl %reg, label@got_hi(%reg)
1024 // ld %reg, (%reg, %got)
1025 SDValue HiLo = makeHiLoPair(Op, HiTF: VE::S_GOT_HI32, LoTF: VE::S_GOT_LO32, DAG);
1026 SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT: PtrVT);
1027 SDValue AbsAddr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: GlobalBase, N2: HiLo);
1028 return DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: AbsAddr,
1029 PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction()));
1030 }
1031
1032 // This is one of the absolute code models.
1033 switch (getTargetMachine().getCodeModel()) {
1034 default:
1035 llvm_unreachable("Unsupported absolute code model");
1036 case CodeModel::Small:
1037 case CodeModel::Medium:
1038 case CodeModel::Large:
1039 // abs64.
1040 return makeHiLoPair(Op, HiTF: VE::S_HI32, LoTF: VE::S_LO32, DAG);
1041 }
1042}
1043
1044/// Custom Lower {
1045
1046// The mappings for emitLeading/TrailingFence for VE is designed by following
1047// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
1048Instruction *VETargetLowering::emitLeadingFence(IRBuilderBase &Builder,
1049 Instruction *Inst,
1050 AtomicOrdering Ord) const {
1051 switch (Ord) {
1052 case AtomicOrdering::NotAtomic:
1053 case AtomicOrdering::Unordered:
1054 llvm_unreachable("Invalid fence: unordered/non-atomic");
1055 case AtomicOrdering::Monotonic:
1056 case AtomicOrdering::Acquire:
1057 return nullptr; // Nothing to do
1058 case AtomicOrdering::Release:
1059 case AtomicOrdering::AcquireRelease:
1060 return Builder.CreateFence(Ordering: AtomicOrdering::Release);
1061 case AtomicOrdering::SequentiallyConsistent:
1062 if (!Inst->hasAtomicStore())
1063 return nullptr; // Nothing to do
1064 return Builder.CreateFence(Ordering: AtomicOrdering::SequentiallyConsistent);
1065 }
1066 llvm_unreachable("Unknown fence ordering in emitLeadingFence");
1067}
1068
1069Instruction *VETargetLowering::emitTrailingFence(IRBuilderBase &Builder,
1070 Instruction *Inst,
1071 AtomicOrdering Ord) const {
1072 switch (Ord) {
1073 case AtomicOrdering::NotAtomic:
1074 case AtomicOrdering::Unordered:
1075 llvm_unreachable("Invalid fence: unordered/not-atomic");
1076 case AtomicOrdering::Monotonic:
1077 case AtomicOrdering::Release:
1078 return nullptr; // Nothing to do
1079 case AtomicOrdering::Acquire:
1080 case AtomicOrdering::AcquireRelease:
1081 return Builder.CreateFence(Ordering: AtomicOrdering::Acquire);
1082 case AtomicOrdering::SequentiallyConsistent:
1083 return Builder.CreateFence(Ordering: AtomicOrdering::SequentiallyConsistent);
1084 }
1085 llvm_unreachable("Unknown fence ordering in emitTrailingFence");
1086}
1087
1088SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
1089 SelectionDAG &DAG) const {
1090 SDLoc DL(Op);
1091 AtomicOrdering FenceOrdering =
1092 static_cast<AtomicOrdering>(Op.getConstantOperandVal(i: 1));
1093 SyncScope::ID FenceSSID =
1094 static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: 2));
1095
1096 // VE uses Release consistency, so need a fence instruction if it is a
1097 // cross-thread fence.
1098 if (FenceSSID == SyncScope::System) {
1099 switch (FenceOrdering) {
1100 case AtomicOrdering::NotAtomic:
1101 case AtomicOrdering::Unordered:
1102 case AtomicOrdering::Monotonic:
1103 // No need to generate fencem instruction here.
1104 break;
1105 case AtomicOrdering::Acquire:
1106 // Generate "fencem 2" as acquire fence.
1107 return SDValue(DAG.getMachineNode(Opcode: VE::FENCEM, dl: DL, VT: MVT::Other,
1108 Op1: DAG.getTargetConstant(Val: 2, DL, VT: MVT::i32),
1109 Op2: Op.getOperand(i: 0)),
1110 0);
1111 case AtomicOrdering::Release:
1112 // Generate "fencem 1" as release fence.
1113 return SDValue(DAG.getMachineNode(Opcode: VE::FENCEM, dl: DL, VT: MVT::Other,
1114 Op1: DAG.getTargetConstant(Val: 1, DL, VT: MVT::i32),
1115 Op2: Op.getOperand(i: 0)),
1116 0);
1117 case AtomicOrdering::AcquireRelease:
1118 case AtomicOrdering::SequentiallyConsistent:
1119 // Generate "fencem 3" as acq_rel and seq_cst fence.
1120 // FIXME: "fencem 3" doesn't wait for PCIe deveices accesses,
1121 // so seq_cst may require more instruction for them.
1122 return SDValue(DAG.getMachineNode(Opcode: VE::FENCEM, dl: DL, VT: MVT::Other,
1123 Op1: DAG.getTargetConstant(Val: 3, DL, VT: MVT::i32),
1124 Op2: Op.getOperand(i: 0)),
1125 0);
1126 }
1127 }
1128
1129 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1130 return DAG.getNode(Opcode: ISD::MEMBARRIER, DL, VT: MVT::Other, Operand: Op.getOperand(i: 0));
1131}
1132
1133TargetLowering::AtomicExpansionKind
1134VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1135 // We have TS1AM implementation for i8/i16/i32/i64, so use it.
1136 if (AI->getOperation() == AtomicRMWInst::Xchg) {
1137 return AtomicExpansionKind::None;
1138 }
1139 // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
1140
1141 // Otherwise, expand it using compare and exchange instruction to not call
1142 // __sync_fetch_and_* functions.
1143 return AtomicExpansionKind::CmpXChg;
1144}
1145
1146static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
1147 SDValue &Bits) {
1148 SDLoc DL(Op);
1149 AtomicSDNode *N = cast<AtomicSDNode>(Val&: Op);
1150 SDValue Ptr = N->getOperand(Num: 1);
1151 SDValue Val = N->getOperand(Num: 2);
1152 EVT PtrVT = Ptr.getValueType();
1153 bool Byte = N->getMemoryVT() == MVT::i8;
1154 // Remainder = AND Ptr, 3
1155 // Flag = 1 << Remainder ; If Byte is true (1 byte swap flag)
1156 // Flag = 3 << Remainder ; If Byte is false (2 bytes swap flag)
1157 // Bits = Remainder << 3
1158 // NewVal = Val << Bits
1159 SDValue Const3 = DAG.getConstant(Val: 3, DL, VT: PtrVT);
1160 SDValue Remainder = DAG.getNode(Opcode: ISD::AND, DL, VT: PtrVT, Ops: {Ptr, Const3});
1161 SDValue Mask = Byte ? DAG.getConstant(Val: 1, DL, VT: MVT::i32)
1162 : DAG.getConstant(Val: 3, DL, VT: MVT::i32);
1163 Flag = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i32, Ops: {Mask, Remainder});
1164 Bits = DAG.getNode(Opcode: ISD::SHL, DL, VT: PtrVT, Ops: {Remainder, Const3});
1165 return DAG.getNode(Opcode: ISD::SHL, DL, VT: Val.getValueType(), Ops: {Val, Bits});
1166}
1167
1168static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,
1169 SDValue Bits) {
1170 SDLoc DL(Op);
1171 EVT VT = Data.getValueType();
1172 bool Byte = cast<AtomicSDNode>(Val&: Op)->getMemoryVT() == MVT::i8;
1173 // NewData = Data >> Bits
1174 // Result = NewData & 0xff ; If Byte is true (1 byte)
1175 // Result = NewData & 0xffff ; If Byte is false (2 bytes)
1176
1177 SDValue NewData = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Data, N2: Bits);
1178 return DAG.getNode(Opcode: ISD::AND, DL, VT,
1179 Ops: {NewData, DAG.getConstant(Val: Byte ? 0xff : 0xffff, DL, VT)});
1180}
1181
1182SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,
1183 SelectionDAG &DAG) const {
1184 SDLoc DL(Op);
1185 AtomicSDNode *N = cast<AtomicSDNode>(Val&: Op);
1186
1187 if (N->getMemoryVT() == MVT::i8) {
1188 // For i8, use "ts1am"
1189 // Input:
1190 // ATOMIC_SWAP Ptr, Val, Order
1191 //
1192 // Output:
1193 // Remainder = AND Ptr, 3
1194 // Flag = 1 << Remainder ; 1 byte swap flag for TS1AM inst.
1195 // Bits = Remainder << 3
1196 // NewVal = Val << Bits
1197 //
1198 // Aligned = AND Ptr, -4
1199 // Data = TS1AM Aligned, Flag, NewVal
1200 //
1201 // NewData = Data >> Bits
1202 // Result = NewData & 0xff ; 1 byte result
1203 SDValue Flag;
1204 SDValue Bits;
1205 SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1206
1207 SDValue Ptr = N->getOperand(Num: 1);
1208 SDValue Aligned =
1209 DAG.getNode(Opcode: ISD::AND, DL, VT: Ptr.getValueType(),
1210 Ops: {Ptr, DAG.getSignedConstant(Val: -4, DL, VT: MVT::i64)});
1211 SDValue TS1AM = DAG.getAtomic(Opcode: VEISD::TS1AM, dl: DL, MemVT: N->getMemoryVT(),
1212 VTList: DAG.getVTList(VT1: Op.getNode()->getValueType(ResNo: 0),
1213 VT2: Op.getNode()->getValueType(ResNo: 1)),
1214 Ops: {N->getChain(), Aligned, Flag, NewVal},
1215 MMO: N->getMemOperand());
1216
1217 SDValue Result = finalizeTS1AM(Op, DAG, Data: TS1AM, Bits);
1218 SDValue Chain = TS1AM.getValue(R: 1);
1219 return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
1220 }
1221 if (N->getMemoryVT() == MVT::i16) {
1222 // For i16, use "ts1am"
1223 SDValue Flag;
1224 SDValue Bits;
1225 SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1226
1227 SDValue Ptr = N->getOperand(Num: 1);
1228 SDValue Aligned =
1229 DAG.getNode(Opcode: ISD::AND, DL, VT: Ptr.getValueType(),
1230 Ops: {Ptr, DAG.getSignedConstant(Val: -4, DL, VT: MVT::i64)});
1231 SDValue TS1AM = DAG.getAtomic(Opcode: VEISD::TS1AM, dl: DL, MemVT: N->getMemoryVT(),
1232 VTList: DAG.getVTList(VT1: Op.getNode()->getValueType(ResNo: 0),
1233 VT2: Op.getNode()->getValueType(ResNo: 1)),
1234 Ops: {N->getChain(), Aligned, Flag, NewVal},
1235 MMO: N->getMemOperand());
1236
1237 SDValue Result = finalizeTS1AM(Op, DAG, Data: TS1AM, Bits);
1238 SDValue Chain = TS1AM.getValue(R: 1);
1239 return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
1240 }
1241 // Otherwise, let llvm legalize it.
1242 return Op;
1243}
1244
1245SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
1246 SelectionDAG &DAG) const {
1247 return makeAddress(Op, DAG);
1248}
1249
1250SDValue VETargetLowering::lowerBlockAddress(SDValue Op,
1251 SelectionDAG &DAG) const {
1252 return makeAddress(Op, DAG);
1253}
1254
1255SDValue VETargetLowering::lowerConstantPool(SDValue Op,
1256 SelectionDAG &DAG) const {
1257 return makeAddress(Op, DAG);
1258}
1259
1260SDValue
1261VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op,
1262 SelectionDAG &DAG) const {
1263 SDLoc DL(Op);
1264
1265 // Generate the following code:
1266 // t1: ch,glue = callseq_start t0, 0, 0
1267 // t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
1268 // t3: ch,glue = callseq_end t2, 0, 0, t2:2
1269 // t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
1270 SDValue Label = withTargetFlags(Op, TF: 0, DAG);
1271 EVT PtrVT = Op.getValueType();
1272
1273 // Lowering the machine isd will make sure everything is in the right
1274 // location.
1275 SDValue Chain = DAG.getEntryNode();
1276 SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
1277 const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
1278 MF: DAG.getMachineFunction(), CC: CallingConv::C);
1279 Chain = DAG.getCALLSEQ_START(Chain, InSize: 64, OutSize: 0, DL);
1280 SDValue Args[] = {Chain, Label, DAG.getRegisterMask(RegMask: Mask), Chain.getValue(R: 1)};
1281 Chain = DAG.getNode(Opcode: VEISD::GETTLSADDR, DL, VTList: NodeTys, Ops: Args);
1282 Chain = DAG.getCALLSEQ_END(Chain, Size1: 64, Size2: 0, Glue: Chain.getValue(R: 1), DL);
1283 Chain = DAG.getCopyFromReg(Chain, dl: DL, Reg: VE::SX0, VT: PtrVT, Glue: Chain.getValue(R: 1));
1284
1285 // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1286 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1287 MFI.setHasCalls(true);
1288
1289 // Also generate code to prepare a GOT register if it is PIC.
1290 if (isPositionIndependent()) {
1291 MachineFunction &MF = DAG.getMachineFunction();
1292 Subtarget->getInstrInfo()->getGlobalBaseReg(MF: &MF);
1293 }
1294
1295 return Chain;
1296}
1297
1298SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
1299 SelectionDAG &DAG) const {
1300 // The current implementation of nld (2.26) doesn't allow local exec model
1301 // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
1302 // generate the general dynamic model code sequence.
1303 //
1304 // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
1305 return lowerToTLSGeneralDynamicModel(Op, DAG);
1306}
1307
1308SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1309 return makeAddress(Op, DAG);
1310}
1311
1312// Lower a f128 load into two f64 loads.
1313static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
1314 SDLoc DL(Op);
1315 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Val: Op.getNode());
1316 assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1317 Align Alignment = LdNode->getAlign();
1318 if (Alignment > 8)
1319 Alignment = Align(8);
1320
1321 SDValue Lo64 =
1322 DAG.getLoad(VT: MVT::f64, dl: DL, Chain: LdNode->getChain(), Ptr: LdNode->getBasePtr(),
1323 PtrInfo: LdNode->getPointerInfo(), Alignment,
1324 MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1325 : MachineMemOperand::MONone);
1326 EVT AddrVT = LdNode->getBasePtr().getValueType();
1327 SDValue HiPtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: LdNode->getBasePtr(),
1328 N2: DAG.getConstant(Val: 8, DL, VT: AddrVT));
1329 SDValue Hi64 =
1330 DAG.getLoad(VT: MVT::f64, dl: DL, Chain: LdNode->getChain(), Ptr: HiPtr,
1331 PtrInfo: LdNode->getPointerInfo(), Alignment,
1332 MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1333 : MachineMemOperand::MONone);
1334
1335 SDValue SubRegEven = DAG.getTargetConstant(Val: VE::sub_even, DL, VT: MVT::i32);
1336 SDValue SubRegOdd = DAG.getTargetConstant(Val: VE::sub_odd, DL, VT: MVT::i32);
1337
1338 // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1339 SDNode *InFP128 =
1340 DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::f128);
1341 InFP128 = DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL, VT: MVT::f128,
1342 Op1: SDValue(InFP128, 0), Op2: Hi64, Op3: SubRegEven);
1343 InFP128 = DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL, VT: MVT::f128,
1344 Op1: SDValue(InFP128, 0), Op2: Lo64, Op3: SubRegOdd);
1345 SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1),
1346 SDValue(Hi64.getNode(), 1)};
1347 SDValue OutChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1348 SDValue Ops[2] = {SDValue(InFP128, 0), OutChain};
1349 return DAG.getMergeValues(Ops, dl: DL);
1350}
1351
1352// Lower a vXi1 load into following instructions
1353// LDrii %1, (,%addr)
1354// LVMxir %vm, 0, %1
1355// LDrii %2, 8(,%addr)
1356// LVMxir %vm, 0, %2
1357// ...
1358static SDValue lowerLoadI1(SDValue Op, SelectionDAG &DAG) {
1359 SDLoc DL(Op);
1360 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Val: Op.getNode());
1361 assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1362
1363 SDValue BasePtr = LdNode->getBasePtr();
1364 Align Alignment = LdNode->getAlign();
1365 if (Alignment > 8)
1366 Alignment = Align(8);
1367
1368 EVT AddrVT = BasePtr.getValueType();
1369 EVT MemVT = LdNode->getMemoryVT();
1370 if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1371 SDValue OutChains[4];
1372 SDNode *VM = DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MemVT);
1373 for (int i = 0; i < 4; ++i) {
1374 // Generate load dag and prepare chains.
1375 SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1376 N2: DAG.getConstant(Val: 8 * i, DL, VT: AddrVT));
1377 SDValue Val =
1378 DAG.getLoad(VT: MVT::i64, dl: DL, Chain: LdNode->getChain(), Ptr: Addr,
1379 PtrInfo: LdNode->getPointerInfo(), Alignment,
1380 MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1381 : MachineMemOperand::MONone);
1382 OutChains[i] = SDValue(Val.getNode(), 1);
1383
1384 VM = DAG.getMachineNode(Opcode: VE::LVMir_m, dl: DL, VT: MVT::i64,
1385 Op1: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64), Op2: Val,
1386 Op3: SDValue(VM, 0));
1387 }
1388 SDValue OutChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1389 SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1390 return DAG.getMergeValues(Ops, dl: DL);
1391 } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1392 SDValue OutChains[8];
1393 SDNode *VM = DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MemVT);
1394 for (int i = 0; i < 8; ++i) {
1395 // Generate load dag and prepare chains.
1396 SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1397 N2: DAG.getConstant(Val: 8 * i, DL, VT: AddrVT));
1398 SDValue Val =
1399 DAG.getLoad(VT: MVT::i64, dl: DL, Chain: LdNode->getChain(), Ptr: Addr,
1400 PtrInfo: LdNode->getPointerInfo(), Alignment,
1401 MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1402 : MachineMemOperand::MONone);
1403 OutChains[i] = SDValue(Val.getNode(), 1);
1404
1405 VM = DAG.getMachineNode(Opcode: VE::LVMyir_y, dl: DL, VT: MVT::i64,
1406 Op1: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64), Op2: Val,
1407 Op3: SDValue(VM, 0));
1408 }
1409 SDValue OutChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1410 SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1411 return DAG.getMergeValues(Ops, dl: DL);
1412 } else {
1413 // Otherwise, ask llvm to expand it.
1414 return SDValue();
1415 }
1416}
1417
1418SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1419 LoadSDNode *LdNode = cast<LoadSDNode>(Val: Op.getNode());
1420 EVT MemVT = LdNode->getMemoryVT();
1421
1422 // If VPU is enabled, always expand non-mask vector loads to VVP
1423 if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(SomeVT: MemVT))
1424 return lowerToVVP(Op, DAG);
1425
1426 SDValue BasePtr = LdNode->getBasePtr();
1427 if (isa<FrameIndexSDNode>(Val: BasePtr.getNode())) {
1428 // Do not expand store instruction with frame index here because of
1429 // dependency problems. We expand it later in eliminateFrameIndex().
1430 return Op;
1431 }
1432
1433 if (MemVT == MVT::f128)
1434 return lowerLoadF128(Op, DAG);
1435 if (isMaskType(SomeVT: MemVT))
1436 return lowerLoadI1(Op, DAG);
1437
1438 return Op;
1439}
1440
1441// Lower a f128 store into two f64 stores.
1442static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
1443 SDLoc DL(Op);
1444 StoreSDNode *StNode = dyn_cast<StoreSDNode>(Val: Op.getNode());
1445 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1446
1447 SDValue SubRegEven = DAG.getTargetConstant(Val: VE::sub_even, DL, VT: MVT::i32);
1448 SDValue SubRegOdd = DAG.getTargetConstant(Val: VE::sub_odd, DL, VT: MVT::i32);
1449
1450 SDNode *Hi64 = DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT: MVT::i64,
1451 Op1: StNode->getValue(), Op2: SubRegEven);
1452 SDNode *Lo64 = DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT: MVT::i64,
1453 Op1: StNode->getValue(), Op2: SubRegOdd);
1454
1455 Align Alignment = StNode->getAlign();
1456 if (Alignment > 8)
1457 Alignment = Align(8);
1458
1459 // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1460 SDValue OutChains[2];
1461 OutChains[0] =
1462 DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue(Lo64, 0),
1463 Ptr: StNode->getBasePtr(), PtrInfo: MachinePointerInfo(), Alignment,
1464 MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1465 : MachineMemOperand::MONone);
1466 EVT AddrVT = StNode->getBasePtr().getValueType();
1467 SDValue HiPtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: StNode->getBasePtr(),
1468 N2: DAG.getConstant(Val: 8, DL, VT: AddrVT));
1469 OutChains[1] =
1470 DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue(Hi64, 0), Ptr: HiPtr,
1471 PtrInfo: MachinePointerInfo(), Alignment,
1472 MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1473 : MachineMemOperand::MONone);
1474 return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1475}
1476
1477// Lower a vXi1 store into following instructions
1478// SVMi %1, %vm, 0
1479// STrii %1, (,%addr)
1480// SVMi %2, %vm, 1
1481// STrii %2, 8(,%addr)
1482// ...
1483static SDValue lowerStoreI1(SDValue Op, SelectionDAG &DAG) {
1484 SDLoc DL(Op);
1485 StoreSDNode *StNode = dyn_cast<StoreSDNode>(Val: Op.getNode());
1486 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1487
1488 SDValue BasePtr = StNode->getBasePtr();
1489 Align Alignment = StNode->getAlign();
1490 if (Alignment > 8)
1491 Alignment = Align(8);
1492 EVT AddrVT = BasePtr.getValueType();
1493 EVT MemVT = StNode->getMemoryVT();
1494 if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1495 SDValue OutChains[4];
1496 for (int i = 0; i < 4; ++i) {
1497 SDNode *V =
1498 DAG.getMachineNode(Opcode: VE::SVMmi, dl: DL, VT: MVT::i64, Op1: StNode->getValue(),
1499 Op2: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64));
1500 SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1501 N2: DAG.getConstant(Val: 8 * i, DL, VT: AddrVT));
1502 OutChains[i] =
1503 DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue(V, 0), Ptr: Addr,
1504 PtrInfo: MachinePointerInfo(), Alignment,
1505 MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1506 : MachineMemOperand::MONone);
1507 }
1508 return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1509 } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1510 SDValue OutChains[8];
1511 for (int i = 0; i < 8; ++i) {
1512 SDNode *V =
1513 DAG.getMachineNode(Opcode: VE::SVMyi, dl: DL, VT: MVT::i64, Op1: StNode->getValue(),
1514 Op2: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64));
1515 SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1516 N2: DAG.getConstant(Val: 8 * i, DL, VT: AddrVT));
1517 OutChains[i] =
1518 DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue(V, 0), Ptr: Addr,
1519 PtrInfo: MachinePointerInfo(), Alignment,
1520 MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1521 : MachineMemOperand::MONone);
1522 }
1523 return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1524 } else {
1525 // Otherwise, ask llvm to expand it.
1526 return SDValue();
1527 }
1528}
1529
1530SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1531 StoreSDNode *StNode = cast<StoreSDNode>(Val: Op.getNode());
1532 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1533 EVT MemVT = StNode->getMemoryVT();
1534
1535 // If VPU is enabled, always expand non-mask vector stores to VVP
1536 if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(SomeVT: MemVT))
1537 return lowerToVVP(Op, DAG);
1538
1539 SDValue BasePtr = StNode->getBasePtr();
1540 if (isa<FrameIndexSDNode>(Val: BasePtr.getNode())) {
1541 // Do not expand store instruction with frame index here because of
1542 // dependency problems. We expand it later in eliminateFrameIndex().
1543 return Op;
1544 }
1545
1546 if (MemVT == MVT::f128)
1547 return lowerStoreF128(Op, DAG);
1548 if (isMaskType(SomeVT: MemVT))
1549 return lowerStoreI1(Op, DAG);
1550
1551 // Otherwise, ask llvm to expand it.
1552 return SDValue();
1553}
1554
1555SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1556 MachineFunction &MF = DAG.getMachineFunction();
1557 VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
1558 auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
1559
1560 // Need frame address to find the address of VarArgsFrameIndex.
1561 MF.getFrameInfo().setFrameAddressIsTaken(true);
1562
1563 // vastart just stores the address of the VarArgsFrameIndex slot into the
1564 // memory location argument.
1565 SDLoc DL(Op);
1566 SDValue Offset =
1567 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: DAG.getRegister(Reg: VE::SX9, VT: PtrVT),
1568 N2: DAG.getIntPtrConstant(Val: FuncInfo->getVarArgsFrameOffset(), DL));
1569 const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue();
1570 return DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL, Val: Offset, Ptr: Op.getOperand(i: 1),
1571 PtrInfo: MachinePointerInfo(SV));
1572}
1573
1574SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
1575 SDNode *Node = Op.getNode();
1576 EVT VT = Node->getValueType(ResNo: 0);
1577 SDValue InChain = Node->getOperand(Num: 0);
1578 SDValue VAListPtr = Node->getOperand(Num: 1);
1579 EVT PtrVT = VAListPtr.getValueType();
1580 const Value *SV = cast<SrcValueSDNode>(Val: Node->getOperand(Num: 2))->getValue();
1581 SDLoc DL(Node);
1582 SDValue VAList =
1583 DAG.getLoad(VT: PtrVT, dl: DL, Chain: InChain, Ptr: VAListPtr, PtrInfo: MachinePointerInfo(SV));
1584 SDValue Chain = VAList.getValue(R: 1);
1585 SDValue NextPtr;
1586
1587 if (VT == MVT::f128) {
1588 // VE f128 values must be stored with 16 bytes alignment. We don't
1589 // know the actual alignment of VAList, so we take alignment of it
1590 // dynamically.
1591 int Align = 16;
1592 VAList = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList,
1593 N2: DAG.getConstant(Val: Align - 1, DL, VT: PtrVT));
1594 VAList = DAG.getNode(Opcode: ISD::AND, DL, VT: PtrVT, N1: VAList,
1595 N2: DAG.getSignedConstant(Val: -Align, DL, VT: PtrVT));
1596 // Increment the pointer, VAList, by 16 to the next vaarg.
1597 NextPtr =
1598 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList, N2: DAG.getIntPtrConstant(Val: 16, DL));
1599 } else if (VT == MVT::f32) {
1600 // float --> need special handling like below.
1601 // 0 4
1602 // +------+------+
1603 // | empty| float|
1604 // +------+------+
1605 // Increment the pointer, VAList, by 8 to the next vaarg.
1606 NextPtr =
1607 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList, N2: DAG.getIntPtrConstant(Val: 8, DL));
1608 // Then, adjust VAList.
1609 unsigned InternalOffset = 4;
1610 VAList = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList,
1611 N2: DAG.getConstant(Val: InternalOffset, DL, VT: PtrVT));
1612 } else {
1613 // Increment the pointer, VAList, by 8 to the next vaarg.
1614 NextPtr =
1615 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList, N2: DAG.getIntPtrConstant(Val: 8, DL));
1616 }
1617
1618 // Store the incremented VAList to the legalized pointer.
1619 InChain = DAG.getStore(Chain, dl: DL, Val: NextPtr, Ptr: VAListPtr, PtrInfo: MachinePointerInfo(SV));
1620
1621 // Load the actual argument out of the pointer VAList.
1622 // We can't count on greater alignment than the word size.
1623 return DAG.getLoad(
1624 VT, dl: DL, Chain: InChain, Ptr: VAList, PtrInfo: MachinePointerInfo(),
1625 Alignment: Align(std::min(a: PtrVT.getSizeInBits(), b: VT.getSizeInBits()) / 8));
1626}
1627
1628SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
1629 SelectionDAG &DAG) const {
1630 // Generate following code.
1631 // (void)__llvm_grow_stack(size);
1632 // ret = GETSTACKTOP; // pseudo instruction
1633 SDLoc DL(Op);
1634
1635 // Get the inputs.
1636 SDNode *Node = Op.getNode();
1637 SDValue Chain = Op.getOperand(i: 0);
1638 SDValue Size = Op.getOperand(i: 1);
1639 MaybeAlign Alignment(Op.getConstantOperandVal(i: 2));
1640 EVT VT = Node->getValueType(ResNo: 0);
1641
1642 // Chain the dynamic stack allocation so that it doesn't modify the stack
1643 // pointer when other instructions are using the stack.
1644 Chain = DAG.getCALLSEQ_START(Chain, InSize: 0, OutSize: 0, DL);
1645
1646 const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
1647 Align StackAlign = TFI.getStackAlign();
1648 bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
1649
1650 // Prepare arguments
1651 TargetLowering::ArgListTy Args;
1652 TargetLowering::ArgListEntry Entry;
1653 Entry.Node = Size;
1654 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(Context&: *DAG.getContext());
1655 Args.push_back(x: Entry);
1656 if (NeedsAlign) {
1657 Entry.Node = DAG.getConstant(Val: ~(Alignment->value() - 1ULL), DL, VT);
1658 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(Context&: *DAG.getContext());
1659 Args.push_back(x: Entry);
1660 }
1661 Type *RetTy = Type::getVoidTy(C&: *DAG.getContext());
1662
1663 EVT PtrVT = Op.getValueType();
1664 SDValue Callee;
1665 if (NeedsAlign) {
1666 Callee = DAG.getTargetExternalSymbol(Sym: "__ve_grow_stack_align", VT: PtrVT, TargetFlags: 0);
1667 } else {
1668 Callee = DAG.getTargetExternalSymbol(Sym: "__ve_grow_stack", VT: PtrVT, TargetFlags: 0);
1669 }
1670
1671 TargetLowering::CallLoweringInfo CLI(DAG);
1672 CLI.setDebugLoc(DL)
1673 .setChain(Chain)
1674 .setCallee(CC: CallingConv::PreserveAll, ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
1675 .setDiscardResult(true);
1676 std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
1677 Chain = pair.second;
1678 SDValue Result = DAG.getNode(Opcode: VEISD::GETSTACKTOP, DL, VT, Operand: Chain);
1679 if (NeedsAlign) {
1680 Result = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Result,
1681 N2: DAG.getConstant(Val: (Alignment->value() - 1ULL), DL, VT));
1682 Result = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Result,
1683 N2: DAG.getConstant(Val: ~(Alignment->value() - 1ULL), DL, VT));
1684 }
1685 // Chain = Result.getValue(1);
1686 Chain = DAG.getCALLSEQ_END(Chain, Size1: 0, Size2: 0, Glue: SDValue(), DL);
1687
1688 SDValue Ops[2] = {Result, Chain};
1689 return DAG.getMergeValues(Ops, dl: DL);
1690}
1691
1692SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
1693 SelectionDAG &DAG) const {
1694 SDLoc DL(Op);
1695 return DAG.getNode(Opcode: VEISD::EH_SJLJ_LONGJMP, DL, VT: MVT::Other, N1: Op.getOperand(i: 0),
1696 N2: Op.getOperand(i: 1));
1697}
1698
1699SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
1700 SelectionDAG &DAG) const {
1701 SDLoc DL(Op);
1702 return DAG.getNode(Opcode: VEISD::EH_SJLJ_SETJMP, DL,
1703 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other), N1: Op.getOperand(i: 0),
1704 N2: Op.getOperand(i: 1));
1705}
1706
1707SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
1708 SelectionDAG &DAG) const {
1709 SDLoc DL(Op);
1710 return DAG.getNode(Opcode: VEISD::EH_SJLJ_SETUP_DISPATCH, DL, VT: MVT::Other,
1711 Operand: Op.getOperand(i: 0));
1712}
1713
1714static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
1715 const VETargetLowering &TLI,
1716 const VESubtarget *Subtarget) {
1717 SDLoc DL(Op);
1718 MachineFunction &MF = DAG.getMachineFunction();
1719 EVT PtrVT = TLI.getPointerTy(DL: MF.getDataLayout());
1720
1721 MachineFrameInfo &MFI = MF.getFrameInfo();
1722 MFI.setFrameAddressIsTaken(true);
1723
1724 unsigned Depth = Op.getConstantOperandVal(i: 0);
1725 const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
1726 Register FrameReg = RegInfo->getFrameRegister(MF);
1727 SDValue FrameAddr =
1728 DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT: PtrVT);
1729 while (Depth--)
1730 FrameAddr = DAG.getLoad(VT: Op.getValueType(), dl: DL, Chain: DAG.getEntryNode(),
1731 Ptr: FrameAddr, PtrInfo: MachinePointerInfo());
1732 return FrameAddr;
1733}
1734
1735static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
1736 const VETargetLowering &TLI,
1737 const VESubtarget *Subtarget) {
1738 MachineFunction &MF = DAG.getMachineFunction();
1739 MachineFrameInfo &MFI = MF.getFrameInfo();
1740 MFI.setReturnAddressIsTaken(true);
1741
1742 if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG))
1743 return SDValue();
1744
1745 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);
1746
1747 SDLoc DL(Op);
1748 EVT VT = Op.getValueType();
1749 SDValue Offset = DAG.getConstant(Val: 8, DL, VT);
1750 return DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(),
1751 Ptr: DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr, N2: Offset),
1752 PtrInfo: MachinePointerInfo());
1753}
1754
1755SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1756 SelectionDAG &DAG) const {
1757 SDLoc DL(Op);
1758 unsigned IntNo = Op.getConstantOperandVal(i: 0);
1759 switch (IntNo) {
1760 default: // Don't custom lower most intrinsics.
1761 return SDValue();
1762 case Intrinsic::eh_sjlj_lsda: {
1763 MachineFunction &MF = DAG.getMachineFunction();
1764 MVT VT = Op.getSimpleValueType();
1765 const VETargetMachine *TM =
1766 static_cast<const VETargetMachine *>(&DAG.getTarget());
1767
1768 // Create GCC_except_tableXX string. The real symbol for that will be
1769 // generated in EHStreamer::emitExceptionTable() later. So, we just
1770 // borrow it's name here.
1771 TM->getStrList()->push_back(x: std::string(
1772 (Twine("GCC_except_table") + Twine(MF.getFunctionNumber())).str()));
1773 SDValue Addr =
1774 DAG.getTargetExternalSymbol(Sym: TM->getStrList()->back().c_str(), VT, TargetFlags: 0);
1775 if (isPositionIndependent()) {
1776 Addr = makeHiLoPair(Op: Addr, HiTF: VE::S_GOTOFF_HI32, LoTF: VE::S_GOTOFF_LO32, DAG);
1777 SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT);
1778 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: GlobalBase, N2: Addr);
1779 }
1780 return makeHiLoPair(Op: Addr, HiTF: VE::S_HI32, LoTF: VE::S_LO32, DAG);
1781 }
1782 }
1783}
1784
1785static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) {
1786 if (!isa<BuildVectorSDNode>(Val: N))
1787 return false;
1788 const auto *BVN = cast<BuildVectorSDNode>(Val: N);
1789
1790 // Find first non-undef insertion.
1791 unsigned Idx;
1792 for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) {
1793 auto ElemV = BVN->getOperand(Num: Idx);
1794 if (!ElemV->isUndef())
1795 break;
1796 }
1797 // Catch the (hypothetical) all-undef case.
1798 if (Idx == BVN->getNumOperands())
1799 return false;
1800 // Remember insertion.
1801 UniqueIdx = Idx++;
1802 // Verify that all other insertions are undef.
1803 for (; Idx < BVN->getNumOperands(); ++Idx) {
1804 auto ElemV = BVN->getOperand(Num: Idx);
1805 if (!ElemV->isUndef())
1806 return false;
1807 }
1808 return true;
1809}
1810
1811static SDValue getSplatValue(SDNode *N) {
1812 if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(Val: N)) {
1813 return BuildVec->getSplatValue();
1814 }
1815 return SDValue();
1816}
1817
1818SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
1819 SelectionDAG &DAG) const {
1820 VECustomDAG CDAG(DAG, Op);
1821 MVT ResultVT = Op.getSimpleValueType();
1822
1823 // If there is just one element, expand to INSERT_VECTOR_ELT.
1824 unsigned UniqueIdx;
1825 if (getUniqueInsertion(N: Op.getNode(), UniqueIdx)) {
1826 SDValue AccuV = CDAG.getUNDEF(VT: Op.getValueType());
1827 auto ElemV = Op->getOperand(Num: UniqueIdx);
1828 SDValue IdxV = CDAG.getConstant(Val: UniqueIdx, VT: MVT::i64);
1829 return CDAG.getNode(OC: ISD::INSERT_VECTOR_ELT, ResVT: ResultVT, OpV: {AccuV, ElemV, IdxV});
1830 }
1831
1832 // Else emit a broadcast.
1833 if (SDValue ScalarV = getSplatValue(N: Op.getNode())) {
1834 unsigned NumEls = ResultVT.getVectorNumElements();
1835 auto AVL = CDAG.getConstant(Val: NumEls, VT: MVT::i32);
1836 return CDAG.getBroadcast(ResultVT, Scalar: ScalarV, AVL);
1837 }
1838
1839 // Expand
1840 return SDValue();
1841}
1842
1843TargetLowering::LegalizeAction
1844VETargetLowering::getCustomOperationAction(SDNode &Op) const {
1845 // Custom legalization on VVP_* and VEC_* opcodes is required to pack-legalize
1846 // these operations (transform nodes such that their AVL parameter refers to
1847 // packs of 64bit, instead of number of elements.
1848
1849 // Packing opcodes are created with a pack-legal AVL (LEGALAVL). No need to
1850 // re-visit them.
1851 if (isPackingSupportOpcode(Opc: Op.getOpcode()))
1852 return Legal;
1853
1854 // Custom lower to legalize AVL for packed mode.
1855 if (isVVPOrVEC(Op.getOpcode()))
1856 return Custom;
1857 return Legal;
1858}
1859
1860SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1861 LLVM_DEBUG(dbgs() << "::LowerOperation "; Op.dump(&DAG));
1862 unsigned Opcode = Op.getOpcode();
1863
1864 /// Scalar isel.
1865 switch (Opcode) {
1866 case ISD::ATOMIC_FENCE:
1867 return lowerATOMIC_FENCE(Op, DAG);
1868 case ISD::ATOMIC_SWAP:
1869 return lowerATOMIC_SWAP(Op, DAG);
1870 case ISD::BlockAddress:
1871 return lowerBlockAddress(Op, DAG);
1872 case ISD::ConstantPool:
1873 return lowerConstantPool(Op, DAG);
1874 case ISD::DYNAMIC_STACKALLOC:
1875 return lowerDYNAMIC_STACKALLOC(Op, DAG);
1876 case ISD::EH_SJLJ_LONGJMP:
1877 return lowerEH_SJLJ_LONGJMP(Op, DAG);
1878 case ISD::EH_SJLJ_SETJMP:
1879 return lowerEH_SJLJ_SETJMP(Op, DAG);
1880 case ISD::EH_SJLJ_SETUP_DISPATCH:
1881 return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
1882 case ISD::FRAMEADDR:
1883 return lowerFRAMEADDR(Op, DAG, TLI: *this, Subtarget);
1884 case ISD::GlobalAddress:
1885 return lowerGlobalAddress(Op, DAG);
1886 case ISD::GlobalTLSAddress:
1887 return lowerGlobalTLSAddress(Op, DAG);
1888 case ISD::INTRINSIC_WO_CHAIN:
1889 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
1890 case ISD::JumpTable:
1891 return lowerJumpTable(Op, DAG);
1892 case ISD::LOAD:
1893 return lowerLOAD(Op, DAG);
1894 case ISD::RETURNADDR:
1895 return lowerRETURNADDR(Op, DAG, TLI: *this, Subtarget);
1896 case ISD::BUILD_VECTOR:
1897 return lowerBUILD_VECTOR(Op, DAG);
1898 case ISD::STORE:
1899 return lowerSTORE(Op, DAG);
1900 case ISD::VASTART:
1901 return lowerVASTART(Op, DAG);
1902 case ISD::VAARG:
1903 return lowerVAARG(Op, DAG);
1904
1905 case ISD::INSERT_VECTOR_ELT:
1906 return lowerINSERT_VECTOR_ELT(Op, DAG);
1907 case ISD::EXTRACT_VECTOR_ELT:
1908 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1909 }
1910
1911 /// Vector isel.
1912 if (ISD::isVPOpcode(Opcode))
1913 return lowerToVVP(Op, DAG);
1914
1915 switch (Opcode) {
1916 default:
1917 llvm_unreachable("Should not custom lower this!");
1918
1919 // Legalize the AVL of this internal node.
1920 case VEISD::VEC_BROADCAST:
1921#define ADD_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME:
1922#include "VVPNodes.def"
1923 // AVL already legalized.
1924 if (getAnnotatedNodeAVL(Op).second)
1925 return Op;
1926 return legalizeInternalVectorOp(Op, DAG);
1927
1928 // Translate into a VEC_*/VVP_* layer operation.
1929 case ISD::MLOAD:
1930 case ISD::MSTORE:
1931#define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
1932#include "VVPNodes.def"
1933 if (isMaskArithmetic(Op) && isPackedVectorType(SomeVT: Op.getValueType()))
1934 return splitMaskArithmetic(Op, DAG);
1935 return lowerToVVP(Op, DAG);
1936 }
1937}
1938/// } Custom Lower
1939
1940void VETargetLowering::ReplaceNodeResults(SDNode *N,
1941 SmallVectorImpl<SDValue> &Results,
1942 SelectionDAG &DAG) const {
1943 switch (N->getOpcode()) {
1944 case ISD::ATOMIC_SWAP:
1945 // Let LLVM expand atomic swap instruction through LowerOperation.
1946 return;
1947 default:
1948 LLVM_DEBUG(N->dumpr(&DAG));
1949 llvm_unreachable("Do not know how to custom type legalize this operation!");
1950 }
1951}
1952
1953/// JumpTable for VE.
1954///
1955/// VE cannot generate relocatable symbol in jump table. VE cannot
1956/// generate expressions using symbols in both text segment and data
1957/// segment like below.
1958/// .4byte .LBB0_2-.LJTI0_0
1959/// So, we generate offset from the top of function like below as
1960/// a custom label.
1961/// .4byte .LBB0_2-<function name>
1962
1963unsigned VETargetLowering::getJumpTableEncoding() const {
1964 // Use custom label for PIC.
1965 if (isPositionIndependent())
1966 return MachineJumpTableInfo::EK_Custom32;
1967
1968 // Otherwise, use the normal jump table encoding heuristics.
1969 return TargetLowering::getJumpTableEncoding();
1970}
1971
1972const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
1973 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
1974 unsigned Uid, MCContext &Ctx) const {
1975 assert(isPositionIndependent());
1976
1977 // Generate custom label for PIC like below.
1978 // .4bytes .LBB0_2-<function name>
1979 const auto *Value = MCSymbolRefExpr::create(Symbol: MBB->getSymbol(), Ctx);
1980 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: MBB->getParent()->getName().data());
1981 const auto *Base = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
1982 return MCBinaryExpr::createSub(LHS: Value, RHS: Base, Ctx);
1983}
1984
1985SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
1986 SelectionDAG &DAG) const {
1987 assert(isPositionIndependent());
1988 SDLoc DL(Table);
1989 Function *Function = &DAG.getMachineFunction().getFunction();
1990 assert(Function != nullptr);
1991 auto PtrTy = getPointerTy(DL: DAG.getDataLayout(), AS: Function->getAddressSpace());
1992
1993 // In the jump table, we have following values in PIC mode.
1994 // .4bytes .LBB0_2-<function name>
1995 // We need to add this value and the address of this function to generate
1996 // .LBB0_2 label correctly under PIC mode. So, we want to generate following
1997 // instructions:
1998 // lea %reg, fun@gotoff_lo
1999 // and %reg, %reg, (32)0
2000 // lea.sl %reg, fun@gotoff_hi(%reg, %got)
2001 // In order to do so, we need to genarate correctly marked DAG node using
2002 // makeHiLoPair.
2003 SDValue Op = DAG.getGlobalAddress(GV: Function, DL, VT: PtrTy);
2004 SDValue HiLo = makeHiLoPair(Op, HiTF: VE::S_GOTOFF_HI32, LoTF: VE::S_GOTOFF_LO32, DAG);
2005 SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT: PtrTy);
2006 return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrTy, N1: GlobalBase, N2: HiLo);
2007}
2008
2009Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB,
2010 MachineBasicBlock::iterator I,
2011 MachineBasicBlock *TargetBB,
2012 const DebugLoc &DL) const {
2013 MachineFunction *MF = MBB.getParent();
2014 MachineRegisterInfo &MRI = MF->getRegInfo();
2015 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2016
2017 const TargetRegisterClass *RC = &VE::I64RegClass;
2018 Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2019 Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2020 Register Result = MRI.createVirtualRegister(RegClass: RC);
2021
2022 if (isPositionIndependent()) {
2023 // Create following instructions for local linkage PIC code.
2024 // lea %Tmp1, TargetBB@gotoff_lo
2025 // and %Tmp2, %Tmp1, (32)0
2026 // lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2027 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2028 .addImm(Val: 0)
2029 .addImm(Val: 0)
2030 .addMBB(MBB: TargetBB, TargetFlags: VE::S_GOTOFF_LO32);
2031 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2032 .addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2033 .addImm(Val: M0(Val: 32));
2034 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: Result)
2035 .addReg(RegNo: VE::SX15)
2036 .addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2037 .addMBB(MBB: TargetBB, TargetFlags: VE::S_GOTOFF_HI32);
2038 } else {
2039 // Create following instructions for non-PIC code.
2040 // lea %Tmp1, TargetBB@lo
2041 // and %Tmp2, %Tmp1, (32)0
2042 // lea.sl %Result, TargetBB@hi(%Tmp2)
2043 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2044 .addImm(Val: 0)
2045 .addImm(Val: 0)
2046 .addMBB(MBB: TargetBB, TargetFlags: VE::S_LO32);
2047 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2048 .addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2049 .addImm(Val: M0(Val: 32));
2050 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrii), DestReg: Result)
2051 .addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2052 .addImm(Val: 0)
2053 .addMBB(MBB: TargetBB, TargetFlags: VE::S_HI32);
2054 }
2055 return Result;
2056}
2057
2058Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB,
2059 MachineBasicBlock::iterator I,
2060 StringRef Symbol, const DebugLoc &DL,
2061 bool IsLocal = false,
2062 bool IsCall = false) const {
2063 MachineFunction *MF = MBB.getParent();
2064 MachineRegisterInfo &MRI = MF->getRegInfo();
2065 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2066
2067 const TargetRegisterClass *RC = &VE::I64RegClass;
2068 Register Result = MRI.createVirtualRegister(RegClass: RC);
2069
2070 if (isPositionIndependent()) {
2071 if (IsCall && !IsLocal) {
2072 // Create following instructions for non-local linkage PIC code function
2073 // calls. These instructions uses IC and magic number -24, so we expand
2074 // them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
2075 // lea %Reg, Symbol@plt_lo(-24)
2076 // and %Reg, %Reg, (32)0
2077 // sic %s16
2078 // lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
2079 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::GETFUNPLT), DestReg: Result)
2080 .addExternalSymbol(FnName: "abort");
2081 } else if (IsLocal) {
2082 Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2083 Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2084 // Create following instructions for local linkage PIC code.
2085 // lea %Tmp1, Symbol@gotoff_lo
2086 // and %Tmp2, %Tmp1, (32)0
2087 // lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2088 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2089 .addImm(Val: 0)
2090 .addImm(Val: 0)
2091 .addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_GOTOFF_LO32);
2092 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2093 .addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2094 .addImm(Val: M0(Val: 32));
2095 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: Result)
2096 .addReg(RegNo: VE::SX15)
2097 .addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2098 .addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_GOTOFF_HI32);
2099 } else {
2100 Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2101 Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2102 // Create following instructions for not local linkage PIC code.
2103 // lea %Tmp1, Symbol@got_lo
2104 // and %Tmp2, %Tmp1, (32)0
2105 // lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2106 // ld %Result, 0(%Tmp3)
2107 Register Tmp3 = MRI.createVirtualRegister(RegClass: RC);
2108 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2109 .addImm(Val: 0)
2110 .addImm(Val: 0)
2111 .addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_GOT_LO32);
2112 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2113 .addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2114 .addImm(Val: M0(Val: 32));
2115 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: Tmp3)
2116 .addReg(RegNo: VE::SX15)
2117 .addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2118 .addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_GOT_HI32);
2119 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: Result)
2120 .addReg(RegNo: Tmp3, flags: getKillRegState(B: true))
2121 .addImm(Val: 0)
2122 .addImm(Val: 0);
2123 }
2124 } else {
2125 Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2126 Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2127 // Create following instructions for non-PIC code.
2128 // lea %Tmp1, Symbol@lo
2129 // and %Tmp2, %Tmp1, (32)0
2130 // lea.sl %Result, Symbol@hi(%Tmp2)
2131 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2132 .addImm(Val: 0)
2133 .addImm(Val: 0)
2134 .addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_LO32);
2135 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2136 .addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2137 .addImm(Val: M0(Val: 32));
2138 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrii), DestReg: Result)
2139 .addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2140 .addImm(Val: 0)
2141 .addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_HI32);
2142 }
2143 return Result;
2144}
2145
2146void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI,
2147 MachineBasicBlock *MBB,
2148 MachineBasicBlock *DispatchBB,
2149 int FI, int Offset) const {
2150 DebugLoc DL = MI.getDebugLoc();
2151 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2152
2153 Register LabelReg =
2154 prepareMBB(MBB&: *MBB, I: MachineBasicBlock::iterator(MI), TargetBB: DispatchBB, DL);
2155
2156 // Store an address of DispatchBB to a given jmpbuf[1] where has next IC
2157 // referenced by longjmp (throw) later.
2158 MachineInstrBuilder MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::STrii));
2159 addFrameReference(MIB, FI, Offset); // jmpbuf[1]
2160 MIB.addReg(RegNo: LabelReg, flags: getKillRegState(B: true));
2161}
2162
2163MachineBasicBlock *
2164VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
2165 MachineBasicBlock *MBB) const {
2166 DebugLoc DL = MI.getDebugLoc();
2167 MachineFunction *MF = MBB->getParent();
2168 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2169 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
2170 MachineRegisterInfo &MRI = MF->getRegInfo();
2171
2172 const BasicBlock *BB = MBB->getBasicBlock();
2173 MachineFunction::iterator I = ++MBB->getIterator();
2174
2175 // Memory Reference.
2176 SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands());
2177 Register BufReg = MI.getOperand(i: 1).getReg();
2178
2179 Register DstReg;
2180
2181 DstReg = MI.getOperand(i: 0).getReg();
2182 const TargetRegisterClass *RC = MRI.getRegClass(Reg: DstReg);
2183 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
2184 (void)TRI;
2185 Register MainDestReg = MRI.createVirtualRegister(RegClass: RC);
2186 Register RestoreDestReg = MRI.createVirtualRegister(RegClass: RC);
2187
2188 // For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
2189 // instructions. SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
2190 //
2191 // ThisMBB:
2192 // buf[3] = %s17 iff %s17 is used as BP
2193 // buf[1] = RestoreMBB as IC after longjmp
2194 // # SjLjSetup RestoreMBB
2195 //
2196 // MainMBB:
2197 // v_main = 0
2198 //
2199 // SinkMBB:
2200 // v = phi(v_main, MainMBB, v_restore, RestoreMBB)
2201 // ...
2202 //
2203 // RestoreMBB:
2204 // %s17 = buf[3] = iff %s17 is used as BP
2205 // v_restore = 1
2206 // goto SinkMBB
2207
2208 MachineBasicBlock *ThisMBB = MBB;
2209 MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
2210 MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
2211 MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
2212 MF->insert(MBBI: I, MBB: MainMBB);
2213 MF->insert(MBBI: I, MBB: SinkMBB);
2214 MF->push_back(MBB: RestoreMBB);
2215 RestoreMBB->setMachineBlockAddressTaken();
2216
2217 // Transfer the remainder of BB and its successor edges to SinkMBB.
2218 SinkMBB->splice(Where: SinkMBB->begin(), Other: MBB,
2219 From: std::next(x: MachineBasicBlock::iterator(MI)), To: MBB->end());
2220 SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
2221
2222 // ThisMBB:
2223 Register LabelReg =
2224 prepareMBB(MBB&: *MBB, I: MachineBasicBlock::iterator(MI), TargetBB: RestoreMBB, DL);
2225
2226 // Store BP in buf[3] iff this function is using BP.
2227 const VEFrameLowering *TFI = Subtarget->getFrameLowering();
2228 if (TFI->hasBP(MF: *MF)) {
2229 MachineInstrBuilder MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::STrii));
2230 MIB.addReg(RegNo: BufReg);
2231 MIB.addImm(Val: 0);
2232 MIB.addImm(Val: 24);
2233 MIB.addReg(RegNo: VE::SX17);
2234 MIB.setMemRefs(MMOs);
2235 }
2236
2237 // Store IP in buf[1].
2238 MachineInstrBuilder MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::STrii));
2239 MIB.add(MO: MI.getOperand(i: 1)); // we can preserve the kill flags here.
2240 MIB.addImm(Val: 0);
2241 MIB.addImm(Val: 8);
2242 MIB.addReg(RegNo: LabelReg, flags: getKillRegState(B: true));
2243 MIB.setMemRefs(MMOs);
2244
2245 // SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
2246
2247 // Insert setup.
2248 MIB =
2249 BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::EH_SjLj_Setup)).addMBB(MBB: RestoreMBB);
2250
2251 const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2252 MIB.addRegMask(Mask: RegInfo->getNoPreservedMask());
2253 ThisMBB->addSuccessor(Succ: MainMBB);
2254 ThisMBB->addSuccessor(Succ: RestoreMBB);
2255
2256 // MainMBB:
2257 BuildMI(BB: MainMBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: MainDestReg)
2258 .addImm(Val: 0)
2259 .addImm(Val: 0)
2260 .addImm(Val: 0);
2261 MainMBB->addSuccessor(Succ: SinkMBB);
2262
2263 // SinkMBB:
2264 BuildMI(BB&: *SinkMBB, I: SinkMBB->begin(), MIMD: DL, MCID: TII->get(Opcode: VE::PHI), DestReg: DstReg)
2265 .addReg(RegNo: MainDestReg)
2266 .addMBB(MBB: MainMBB)
2267 .addReg(RegNo: RestoreDestReg)
2268 .addMBB(MBB: RestoreMBB);
2269
2270 // RestoreMBB:
2271 // Restore BP from buf[3] iff this function is using BP. The address of
2272 // buf is in SX10.
2273 // FIXME: Better to not use SX10 here
2274 if (TFI->hasBP(MF: *MF)) {
2275 MachineInstrBuilder MIB =
2276 BuildMI(BB: RestoreMBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: VE::SX17);
2277 MIB.addReg(RegNo: VE::SX10);
2278 MIB.addImm(Val: 0);
2279 MIB.addImm(Val: 24);
2280 MIB.setMemRefs(MMOs);
2281 }
2282 BuildMI(BB: RestoreMBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: RestoreDestReg)
2283 .addImm(Val: 0)
2284 .addImm(Val: 0)
2285 .addImm(Val: 1);
2286 BuildMI(BB: RestoreMBB, MIMD: DL, MCID: TII->get(Opcode: VE::BRCFLa_t)).addMBB(MBB: SinkMBB);
2287 RestoreMBB->addSuccessor(Succ: SinkMBB);
2288
2289 MI.eraseFromParent();
2290 return SinkMBB;
2291}
2292
2293MachineBasicBlock *
2294VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
2295 MachineBasicBlock *MBB) const {
2296 DebugLoc DL = MI.getDebugLoc();
2297 MachineFunction *MF = MBB->getParent();
2298 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2299 MachineRegisterInfo &MRI = MF->getRegInfo();
2300
2301 // Memory Reference.
2302 SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands());
2303 Register BufReg = MI.getOperand(i: 0).getReg();
2304
2305 Register Tmp = MRI.createVirtualRegister(RegClass: &VE::I64RegClass);
2306 // Since FP is only updated here but NOT referenced, it's treated as GPR.
2307 Register FP = VE::SX9;
2308 Register SP = VE::SX11;
2309
2310 MachineInstrBuilder MIB;
2311
2312 MachineBasicBlock *ThisMBB = MBB;
2313
2314 // For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
2315 //
2316 // ThisMBB:
2317 // %fp = load buf[0]
2318 // %jmp = load buf[1]
2319 // %s10 = buf ; Store an address of buf to SX10 for RestoreMBB
2320 // %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
2321 // jmp %jmp
2322
2323 // Reload FP.
2324 MIB = BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: FP);
2325 MIB.addReg(RegNo: BufReg);
2326 MIB.addImm(Val: 0);
2327 MIB.addImm(Val: 0);
2328 MIB.setMemRefs(MMOs);
2329
2330 // Reload IP.
2331 MIB = BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: Tmp);
2332 MIB.addReg(RegNo: BufReg);
2333 MIB.addImm(Val: 0);
2334 MIB.addImm(Val: 8);
2335 MIB.setMemRefs(MMOs);
2336
2337 // Copy BufReg to SX10 for later use in setjmp.
2338 // FIXME: Better to not use SX10 here
2339 BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::ORri), DestReg: VE::SX10)
2340 .addReg(RegNo: BufReg)
2341 .addImm(Val: 0);
2342
2343 // Reload SP.
2344 MIB = BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: SP);
2345 MIB.add(MO: MI.getOperand(i: 0)); // we can preserve the kill flags here.
2346 MIB.addImm(Val: 0);
2347 MIB.addImm(Val: 16);
2348 MIB.setMemRefs(MMOs);
2349
2350 // Jump.
2351 BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::BCFLari_t))
2352 .addReg(RegNo: Tmp, flags: getKillRegState(B: true))
2353 .addImm(Val: 0);
2354
2355 MI.eraseFromParent();
2356 return ThisMBB;
2357}
2358
2359MachineBasicBlock *
2360VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI,
2361 MachineBasicBlock *BB) const {
2362 DebugLoc DL = MI.getDebugLoc();
2363 MachineFunction *MF = BB->getParent();
2364 MachineFrameInfo &MFI = MF->getFrameInfo();
2365 MachineRegisterInfo &MRI = MF->getRegInfo();
2366 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2367 int FI = MFI.getFunctionContextIndex();
2368
2369 // Get a mapping of the call site numbers to all of the landing pads they're
2370 // associated with.
2371 DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad;
2372 unsigned MaxCSNum = 0;
2373 for (auto &MBB : *MF) {
2374 if (!MBB.isEHPad())
2375 continue;
2376
2377 MCSymbol *Sym = nullptr;
2378 for (const auto &MI : MBB) {
2379 if (MI.isDebugInstr())
2380 continue;
2381
2382 assert(MI.isEHLabel() && "expected EH_LABEL");
2383 Sym = MI.getOperand(i: 0).getMCSymbol();
2384 break;
2385 }
2386
2387 if (!MF->hasCallSiteLandingPad(Sym))
2388 continue;
2389
2390 for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
2391 CallSiteNumToLPad[CSI].push_back(Elt: &MBB);
2392 MaxCSNum = std::max(a: MaxCSNum, b: CSI);
2393 }
2394 }
2395
2396 // Get an ordered list of the machine basic blocks for the jump table.
2397 std::vector<MachineBasicBlock *> LPadList;
2398 SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs;
2399 LPadList.reserve(n: CallSiteNumToLPad.size());
2400
2401 for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
2402 for (auto &LP : CallSiteNumToLPad[CSI]) {
2403 LPadList.push_back(x: LP);
2404 InvokeBBs.insert_range(R: LP->predecessors());
2405 }
2406 }
2407
2408 assert(!LPadList.empty() &&
2409 "No landing pad destinations for the dispatch jump table!");
2410
2411 // The %fn_context is allocated like below (from --print-after=sjljehprepare):
2412 // %fn_context = alloca { i8*, i64, [4 x i64], i8*, i8*, [5 x i8*] }
2413 //
2414 // This `[5 x i8*]` is jmpbuf, so jmpbuf[1] is FI+72.
2415 // First `i64` is callsite, so callsite is FI+8.
2416 static const int OffsetIC = 72;
2417 static const int OffsetCS = 8;
2418
2419 // Create the MBBs for the dispatch code like following:
2420 //
2421 // ThisMBB:
2422 // Prepare DispatchBB address and store it to buf[1].
2423 // ...
2424 //
2425 // DispatchBB:
2426 // %s15 = GETGOT iff isPositionIndependent
2427 // %callsite = load callsite
2428 // brgt.l.t #size of callsites, %callsite, DispContBB
2429 //
2430 // TrapBB:
2431 // Call abort.
2432 //
2433 // DispContBB:
2434 // %breg = address of jump table
2435 // %pc = load and calculate next pc from %breg and %callsite
2436 // jmp %pc
2437
2438 // Shove the dispatch's address into the return slot in the function context.
2439 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
2440 DispatchBB->setIsEHPad(true);
2441
2442 // Trap BB will causes trap like `assert(0)`.
2443 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
2444 DispatchBB->addSuccessor(Succ: TrapBB);
2445
2446 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
2447 DispatchBB->addSuccessor(Succ: DispContBB);
2448
2449 // Insert MBBs.
2450 MF->push_back(MBB: DispatchBB);
2451 MF->push_back(MBB: DispContBB);
2452 MF->push_back(MBB: TrapBB);
2453
2454 // Insert code to call abort in the TrapBB.
2455 Register Abort = prepareSymbol(MBB&: *TrapBB, I: TrapBB->end(), Symbol: "abort", DL,
2456 /* Local */ IsLocal: false, /* Call */ IsCall: true);
2457 BuildMI(BB: TrapBB, MIMD: DL, MCID: TII->get(Opcode: VE::BSICrii), DestReg: VE::SX10)
2458 .addReg(RegNo: Abort, flags: getKillRegState(B: true))
2459 .addImm(Val: 0)
2460 .addImm(Val: 0);
2461
2462 // Insert code into the entry block that creates and registers the function
2463 // context.
2464 setupEntryBlockForSjLj(MI, MBB: BB, DispatchBB, FI, Offset: OffsetIC);
2465
2466 // Create the jump table and associated information
2467 unsigned JTE = getJumpTableEncoding();
2468 MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTEntryKind: JTE);
2469 unsigned MJTI = JTI->createJumpTableIndex(DestBBs: LPadList);
2470
2471 const VERegisterInfo &RI = TII->getRegisterInfo();
2472 // Add a register mask with no preserved registers. This results in all
2473 // registers being marked as clobbered.
2474 BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::NOP))
2475 .addRegMask(Mask: RI.getNoPreservedMask());
2476
2477 if (isPositionIndependent()) {
2478 // Force to generate GETGOT, since current implementation doesn't store GOT
2479 // register.
2480 BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::GETGOT), DestReg: VE::SX15);
2481 }
2482
2483 // IReg is used as an index in a memory operand and therefore can't be SP
2484 const TargetRegisterClass *RC = &VE::I64RegClass;
2485 Register IReg = MRI.createVirtualRegister(RegClass: RC);
2486 addFrameReference(MIB: BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDLZXrii), DestReg: IReg), FI,
2487 Offset: OffsetCS);
2488 if (LPadList.size() < 64) {
2489 BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::BRCFLir_t))
2490 .addImm(Val: VECC::CC_ILE)
2491 .addImm(Val: LPadList.size())
2492 .addReg(RegNo: IReg)
2493 .addMBB(MBB: TrapBB);
2494 } else {
2495 assert(LPadList.size() <= 0x7FFFFFFF && "Too large Landing Pad!");
2496 Register TmpReg = MRI.createVirtualRegister(RegClass: RC);
2497 BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: TmpReg)
2498 .addImm(Val: 0)
2499 .addImm(Val: 0)
2500 .addImm(Val: LPadList.size());
2501 BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::BRCFLrr_t))
2502 .addImm(Val: VECC::CC_ILE)
2503 .addReg(RegNo: TmpReg, flags: getKillRegState(B: true))
2504 .addReg(RegNo: IReg)
2505 .addMBB(MBB: TrapBB);
2506 }
2507
2508 Register BReg = MRI.createVirtualRegister(RegClass: RC);
2509 Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2510 Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2511
2512 if (isPositionIndependent()) {
2513 // Create following instructions for local linkage PIC code.
2514 // lea %Tmp1, .LJTI0_0@gotoff_lo
2515 // and %Tmp2, %Tmp1, (32)0
2516 // lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2517 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2518 .addImm(Val: 0)
2519 .addImm(Val: 0)
2520 .addJumpTableIndex(Idx: MJTI, TargetFlags: VE::S_GOTOFF_LO32);
2521 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2522 .addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2523 .addImm(Val: M0(Val: 32));
2524 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: BReg)
2525 .addReg(RegNo: VE::SX15)
2526 .addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2527 .addJumpTableIndex(Idx: MJTI, TargetFlags: VE::S_GOTOFF_HI32);
2528 } else {
2529 // Create following instructions for non-PIC code.
2530 // lea %Tmp1, .LJTI0_0@lo
2531 // and %Tmp2, %Tmp1, (32)0
2532 // lea.sl %BReg, .LJTI0_0@hi(%Tmp2)
2533 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2534 .addImm(Val: 0)
2535 .addImm(Val: 0)
2536 .addJumpTableIndex(Idx: MJTI, TargetFlags: VE::S_LO32);
2537 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2538 .addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2539 .addImm(Val: M0(Val: 32));
2540 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrii), DestReg: BReg)
2541 .addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2542 .addImm(Val: 0)
2543 .addJumpTableIndex(Idx: MJTI, TargetFlags: VE::S_HI32);
2544 }
2545
2546 switch (JTE) {
2547 case MachineJumpTableInfo::EK_BlockAddress: {
2548 // Generate simple block address code for no-PIC model.
2549 // sll %Tmp1, %IReg, 3
2550 // lds %TReg, 0(%Tmp1, %BReg)
2551 // bcfla %TReg
2552
2553 Register TReg = MRI.createVirtualRegister(RegClass: RC);
2554 Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2555
2556 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::SLLri), DestReg: Tmp1)
2557 .addReg(RegNo: IReg, flags: getKillRegState(B: true))
2558 .addImm(Val: 3);
2559 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDrri), DestReg: TReg)
2560 .addReg(RegNo: BReg, flags: getKillRegState(B: true))
2561 .addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2562 .addImm(Val: 0);
2563 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::BCFLari_t))
2564 .addReg(RegNo: TReg, flags: getKillRegState(B: true))
2565 .addImm(Val: 0);
2566 break;
2567 }
2568 case MachineJumpTableInfo::EK_Custom32: {
2569 // Generate block address code using differences from the function pointer
2570 // for PIC model.
2571 // sll %Tmp1, %IReg, 2
2572 // ldl.zx %OReg, 0(%Tmp1, %BReg)
2573 // Prepare function address in BReg2.
2574 // adds.l %TReg, %BReg2, %OReg
2575 // bcfla %TReg
2576
2577 assert(isPositionIndependent());
2578 Register OReg = MRI.createVirtualRegister(RegClass: RC);
2579 Register TReg = MRI.createVirtualRegister(RegClass: RC);
2580 Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2581
2582 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::SLLri), DestReg: Tmp1)
2583 .addReg(RegNo: IReg, flags: getKillRegState(B: true))
2584 .addImm(Val: 2);
2585 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDLZXrri), DestReg: OReg)
2586 .addReg(RegNo: BReg, flags: getKillRegState(B: true))
2587 .addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2588 .addImm(Val: 0);
2589 Register BReg2 =
2590 prepareSymbol(MBB&: *DispContBB, I: DispContBB->end(),
2591 Symbol: DispContBB->getParent()->getName(), DL, /* Local */ IsLocal: true);
2592 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::ADDSLrr), DestReg: TReg)
2593 .addReg(RegNo: OReg, flags: getKillRegState(B: true))
2594 .addReg(RegNo: BReg2, flags: getKillRegState(B: true));
2595 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::BCFLari_t))
2596 .addReg(RegNo: TReg, flags: getKillRegState(B: true))
2597 .addImm(Val: 0);
2598 break;
2599 }
2600 default:
2601 llvm_unreachable("Unexpected jump table encoding");
2602 }
2603
2604 // Add the jump table entries as successors to the MBB.
2605 SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs;
2606 for (auto &LP : LPadList)
2607 if (SeenMBBs.insert(Ptr: LP).second)
2608 DispContBB->addSuccessor(Succ: LP);
2609
2610 // N.B. the order the invoke BBs are processed in doesn't matter here.
2611 SmallVector<MachineBasicBlock *, 64> MBBLPads;
2612 const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
2613 for (MachineBasicBlock *MBB : InvokeBBs) {
2614 // Remove the landing pad successor from the invoke block and replace it
2615 // with the new dispatch block.
2616 // Keep a copy of Successors since it's modified inside the loop.
2617 SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(),
2618 MBB->succ_rend());
2619 // FIXME: Avoid quadratic complexity.
2620 for (auto *MBBS : Successors) {
2621 if (MBBS->isEHPad()) {
2622 MBB->removeSuccessor(Succ: MBBS);
2623 MBBLPads.push_back(Elt: MBBS);
2624 }
2625 }
2626
2627 MBB->addSuccessor(Succ: DispatchBB);
2628
2629 // Find the invoke call and mark all of the callee-saved registers as
2630 // 'implicit defined' so that they're spilled. This prevents code from
2631 // moving instructions to before the EH block, where they will never be
2632 // executed.
2633 for (auto &II : reverse(C&: *MBB)) {
2634 if (!II.isCall())
2635 continue;
2636
2637 DenseSet<Register> DefRegs;
2638 for (auto &MOp : II.operands())
2639 if (MOp.isReg())
2640 DefRegs.insert(V: MOp.getReg());
2641
2642 MachineInstrBuilder MIB(*MF, &II);
2643 for (unsigned RI = 0; SavedRegs[RI]; ++RI) {
2644 Register Reg = SavedRegs[RI];
2645 if (!DefRegs.contains(V: Reg))
2646 MIB.addReg(RegNo: Reg, flags: RegState::ImplicitDefine | RegState::Dead);
2647 }
2648
2649 break;
2650 }
2651 }
2652
2653 // Mark all former landing pads as non-landing pads. The dispatch is the only
2654 // landing pad now.
2655 for (auto &LP : MBBLPads)
2656 LP->setIsEHPad(false);
2657
2658 // The instruction is gone now.
2659 MI.eraseFromParent();
2660 return BB;
2661}
2662
2663MachineBasicBlock *
2664VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2665 MachineBasicBlock *BB) const {
2666 switch (MI.getOpcode()) {
2667 default:
2668 llvm_unreachable("Unknown Custom Instruction!");
2669 case VE::EH_SjLj_LongJmp:
2670 return emitEHSjLjLongJmp(MI, MBB: BB);
2671 case VE::EH_SjLj_SetJmp:
2672 return emitEHSjLjSetJmp(MI, MBB: BB);
2673 case VE::EH_SjLj_Setup_Dispatch:
2674 return emitSjLjDispatchBlock(MI, BB);
2675 }
2676}
2677
2678static bool isSimm7(SDValue V) {
2679 EVT VT = V.getValueType();
2680 if (VT.isVector())
2681 return false;
2682
2683 if (VT.isInteger()) {
2684 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: V))
2685 return isInt<7>(x: C->getSExtValue());
2686 } else if (VT.isFloatingPoint()) {
2687 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: V)) {
2688 if (VT == MVT::f32 || VT == MVT::f64) {
2689 const APInt &Imm = C->getValueAPF().bitcastToAPInt();
2690 uint64_t Val = Imm.getSExtValue();
2691 if (Imm.getBitWidth() == 32)
2692 Val <<= 32; // Immediate value of float place at higher bits on VE.
2693 return isInt<7>(x: Val);
2694 }
2695 }
2696 }
2697 return false;
2698}
2699
2700static bool isMImm(SDValue V) {
2701 EVT VT = V.getValueType();
2702 if (VT.isVector())
2703 return false;
2704
2705 if (VT.isInteger()) {
2706 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: V))
2707 return isMImmVal(Val: getImmVal(N: C));
2708 } else if (VT.isFloatingPoint()) {
2709 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: V)) {
2710 if (VT == MVT::f32) {
2711 // Float value places at higher bits, so ignore lower 32 bits.
2712 return isMImm32Val(Val: getFpImmVal(N: C) >> 32);
2713 } else if (VT == MVT::f64) {
2714 return isMImmVal(Val: getFpImmVal(N: C));
2715 }
2716 }
2717 }
2718 return false;
2719}
2720
2721static unsigned decideComp(EVT SrcVT, ISD::CondCode CC) {
2722 if (SrcVT.isFloatingPoint()) {
2723 if (SrcVT == MVT::f128)
2724 return VEISD::CMPQ;
2725 return VEISD::CMPF;
2726 }
2727 return isSignedIntSetCC(Code: CC) ? VEISD::CMPI : VEISD::CMPU;
2728}
2729
2730static EVT decideCompType(EVT SrcVT) {
2731 if (SrcVT == MVT::f128)
2732 return MVT::f64;
2733 return SrcVT;
2734}
2735
2736static bool safeWithoutCompWithNull(EVT SrcVT, ISD::CondCode CC,
2737 bool WithCMov) {
2738 if (SrcVT.isFloatingPoint()) {
2739 // For the case of floating point setcc, only unordered comparison
2740 // or general comparison with -enable-no-nans-fp-math option reach
2741 // here, so it is safe even if values are NaN. Only f128 doesn't
2742 // safe since VE uses f64 result of f128 comparison.
2743 return SrcVT != MVT::f128;
2744 }
2745 if (isIntEqualitySetCC(Code: CC)) {
2746 // For the case of equal or not equal, it is safe without comparison with 0.
2747 return true;
2748 }
2749 if (WithCMov) {
2750 // For the case of integer setcc with cmov, all signed comparison with 0
2751 // are safe.
2752 return isSignedIntSetCC(Code: CC);
2753 }
2754 // For the case of integer setcc, only signed 64 bits comparison is safe.
2755 // For unsigned, "CMPU 0x80000000, 0" has to be greater than 0, but it becomes
2756 // less than 0 witout CMPU. For 32 bits, other half of 32 bits are
2757 // uncoditional, so it is not safe too without CMPI..
2758 return isSignedIntSetCC(Code: CC) && SrcVT == MVT::i64;
2759}
2760
2761static SDValue generateComparison(EVT VT, SDValue LHS, SDValue RHS,
2762 ISD::CondCode CC, bool WithCMov,
2763 const SDLoc &DL, SelectionDAG &DAG) {
2764 // Compare values. If RHS is 0 and it is safe to calculate without
2765 // comparison, we don't generate an instruction for comparison.
2766 EVT CompVT = decideCompType(SrcVT: VT);
2767 if (CompVT == VT && safeWithoutCompWithNull(SrcVT: VT, CC, WithCMov) &&
2768 (isNullConstant(V: RHS) || isNullFPConstant(V: RHS))) {
2769 return LHS;
2770 }
2771 return DAG.getNode(Opcode: decideComp(SrcVT: VT, CC), DL, VT: CompVT, N1: LHS, N2: RHS);
2772}
2773
2774SDValue VETargetLowering::combineSelect(SDNode *N,
2775 DAGCombinerInfo &DCI) const {
2776 assert(N->getOpcode() == ISD::SELECT &&
2777 "Should be called with a SELECT node");
2778 ISD::CondCode CC = ISD::CondCode::SETNE;
2779 SDValue Cond = N->getOperand(Num: 0);
2780 SDValue True = N->getOperand(Num: 1);
2781 SDValue False = N->getOperand(Num: 2);
2782
2783 // We handle only scalar SELECT.
2784 EVT VT = N->getValueType(ResNo: 0);
2785 if (VT.isVector())
2786 return SDValue();
2787
2788 // Peform combineSelect after leagalize DAG.
2789 if (!DCI.isAfterLegalizeDAG())
2790 return SDValue();
2791
2792 EVT VT0 = Cond.getValueType();
2793 if (isMImm(V: True)) {
2794 // VE's condition move can handle MImm in True clause, so nothing to do.
2795 } else if (isMImm(V: False)) {
2796 // VE's condition move can handle MImm in True clause, so swap True and
2797 // False clauses if False has MImm value. And, update condition code.
2798 std::swap(a&: True, b&: False);
2799 CC = getSetCCInverse(Operation: CC, Type: VT0);
2800 }
2801
2802 SDLoc DL(N);
2803 SelectionDAG &DAG = DCI.DAG;
2804 VECC::CondCode VECCVal;
2805 if (VT0.isFloatingPoint()) {
2806 VECCVal = fpCondCode2Fcc(CC);
2807 } else {
2808 VECCVal = intCondCode2Icc(CC);
2809 }
2810 SDValue Ops[] = {Cond, True, False,
2811 DAG.getConstant(Val: VECCVal, DL, VT: MVT::i32)};
2812 return DAG.getNode(Opcode: VEISD::CMOV, DL, VT, Ops);
2813}
2814
2815SDValue VETargetLowering::combineSelectCC(SDNode *N,
2816 DAGCombinerInfo &DCI) const {
2817 assert(N->getOpcode() == ISD::SELECT_CC &&
2818 "Should be called with a SELECT_CC node");
2819 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 4))->get();
2820 SDValue LHS = N->getOperand(Num: 0);
2821 SDValue RHS = N->getOperand(Num: 1);
2822 SDValue True = N->getOperand(Num: 2);
2823 SDValue False = N->getOperand(Num: 3);
2824
2825 // We handle only scalar SELECT_CC.
2826 EVT VT = N->getValueType(ResNo: 0);
2827 if (VT.isVector())
2828 return SDValue();
2829
2830 // Peform combineSelectCC after leagalize DAG.
2831 if (!DCI.isAfterLegalizeDAG())
2832 return SDValue();
2833
2834 // We handle only i32/i64/f32/f64/f128 comparisons.
2835 EVT LHSVT = LHS.getValueType();
2836 assert(LHSVT == RHS.getValueType());
2837 switch (LHSVT.getSimpleVT().SimpleTy) {
2838 case MVT::i32:
2839 case MVT::i64:
2840 case MVT::f32:
2841 case MVT::f64:
2842 case MVT::f128:
2843 break;
2844 default:
2845 // Return SDValue to let llvm handle other types.
2846 return SDValue();
2847 }
2848
2849 if (isMImm(V: RHS)) {
2850 // VE's comparison can handle MImm in RHS, so nothing to do.
2851 } else if (isSimm7(V: RHS)) {
2852 // VE's comparison can handle Simm7 in LHS, so swap LHS and RHS, and
2853 // update condition code.
2854 std::swap(a&: LHS, b&: RHS);
2855 CC = getSetCCSwappedOperands(Operation: CC);
2856 }
2857 if (isMImm(V: True)) {
2858 // VE's condition move can handle MImm in True clause, so nothing to do.
2859 } else if (isMImm(V: False)) {
2860 // VE's condition move can handle MImm in True clause, so swap True and
2861 // False clauses if False has MImm value. And, update condition code.
2862 std::swap(a&: True, b&: False);
2863 CC = getSetCCInverse(Operation: CC, Type: LHSVT);
2864 }
2865
2866 SDLoc DL(N);
2867 SelectionDAG &DAG = DCI.DAG;
2868
2869 bool WithCMov = true;
2870 SDValue CompNode = generateComparison(VT: LHSVT, LHS, RHS, CC, WithCMov, DL, DAG);
2871
2872 VECC::CondCode VECCVal;
2873 if (LHSVT.isFloatingPoint()) {
2874 VECCVal = fpCondCode2Fcc(CC);
2875 } else {
2876 VECCVal = intCondCode2Icc(CC);
2877 }
2878 SDValue Ops[] = {CompNode, True, False,
2879 DAG.getConstant(Val: VECCVal, DL, VT: MVT::i32)};
2880 return DAG.getNode(Opcode: VEISD::CMOV, DL, VT, Ops);
2881}
2882
2883static bool isI32InsnAllUses(const SDNode *User, const SDNode *N);
2884static bool isI32Insn(const SDNode *User, const SDNode *N) {
2885 switch (User->getOpcode()) {
2886 default:
2887 return false;
2888 case ISD::ADD:
2889 case ISD::SUB:
2890 case ISD::MUL:
2891 case ISD::SDIV:
2892 case ISD::UDIV:
2893 case ISD::SETCC:
2894 case ISD::SMIN:
2895 case ISD::SMAX:
2896 case ISD::SHL:
2897 case ISD::SRA:
2898 case ISD::BSWAP:
2899 case ISD::SINT_TO_FP:
2900 case ISD::UINT_TO_FP:
2901 case ISD::BR_CC:
2902 case ISD::BITCAST:
2903 case ISD::ATOMIC_CMP_SWAP:
2904 case ISD::ATOMIC_SWAP:
2905 case VEISD::CMPU:
2906 case VEISD::CMPI:
2907 return true;
2908 case ISD::SRL:
2909 if (N->getOperand(Num: 0).getOpcode() != ISD::SRL)
2910 return true;
2911 // (srl (trunc (srl ...))) may be optimized by combining srl, so
2912 // doesn't optimize trunc now.
2913 return false;
2914 case ISD::SELECT_CC:
2915 if (User->getOperand(Num: 2).getNode() != N &&
2916 User->getOperand(Num: 3).getNode() != N)
2917 return true;
2918 return isI32InsnAllUses(User, N);
2919 case VEISD::CMOV:
2920 // CMOV in (cmov (trunc ...), true, false, int-comparison) is safe.
2921 // However, trunc in true or false clauses is not safe.
2922 if (User->getOperand(Num: 1).getNode() != N &&
2923 User->getOperand(Num: 2).getNode() != N &&
2924 isa<ConstantSDNode>(Val: User->getOperand(Num: 3))) {
2925 VECC::CondCode VECCVal =
2926 static_cast<VECC::CondCode>(User->getConstantOperandVal(Num: 3));
2927 return isIntVECondCode(CC: VECCVal);
2928 }
2929 [[fallthrough]];
2930 case ISD::AND:
2931 case ISD::OR:
2932 case ISD::XOR:
2933 case ISD::SELECT:
2934 case ISD::CopyToReg:
2935 // Check all use of selections, bit operations, and copies. If all of them
2936 // are safe, optimize truncate to extract_subreg.
2937 return isI32InsnAllUses(User, N);
2938 }
2939}
2940
2941static bool isI32InsnAllUses(const SDNode *User, const SDNode *N) {
2942 // Check all use of User node. If all of them are safe, optimize
2943 // truncate to extract_subreg.
2944 for (const SDNode *U : User->users()) {
2945 switch (U->getOpcode()) {
2946 default:
2947 // If the use is an instruction which treats the source operand as i32,
2948 // it is safe to avoid truncate here.
2949 if (isI32Insn(User: U, N))
2950 continue;
2951 break;
2952 case ISD::ANY_EXTEND:
2953 case ISD::SIGN_EXTEND:
2954 case ISD::ZERO_EXTEND: {
2955 // Special optimizations to the combination of ext and trunc.
2956 // (ext ... (select ... (trunc ...))) is safe to avoid truncate here
2957 // since this truncate instruction clears higher 32 bits which is filled
2958 // by one of ext instructions later.
2959 assert(N->getValueType(0) == MVT::i32 &&
2960 "find truncate to not i32 integer");
2961 if (User->getOpcode() == ISD::SELECT_CC ||
2962 User->getOpcode() == ISD::SELECT || User->getOpcode() == VEISD::CMOV)
2963 continue;
2964 break;
2965 }
2966 }
2967 return false;
2968 }
2969 return true;
2970}
2971
2972// Optimize TRUNCATE in DAG combining. Optimizing it in CUSTOM lower is
2973// sometime too early. Optimizing it in DAG pattern matching in VEInstrInfo.td
2974// is sometime too late. So, doing it at here.
2975SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
2976 DAGCombinerInfo &DCI) const {
2977 assert(N->getOpcode() == ISD::TRUNCATE &&
2978 "Should be called with a TRUNCATE node");
2979
2980 SelectionDAG &DAG = DCI.DAG;
2981 SDLoc DL(N);
2982 EVT VT = N->getValueType(ResNo: 0);
2983
2984 // We prefer to do this when all types are legal.
2985 if (!DCI.isAfterLegalizeDAG())
2986 return SDValue();
2987
2988 // Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
2989 if (N->getOperand(Num: 0)->getOpcode() == ISD::SELECT_CC &&
2990 isa<ConstantSDNode>(Val: N->getOperand(Num: 0)->getOperand(Num: 0)) &&
2991 isa<ConstantSDNode>(Val: N->getOperand(Num: 0)->getOperand(Num: 1)))
2992 return SDValue();
2993
2994 // Check all use of this TRUNCATE.
2995 for (const SDNode *User : N->users()) {
2996 // Make sure that we're not going to replace TRUNCATE for non i32
2997 // instructions.
2998 //
2999 // FIXME: Although we could sometimes handle this, and it does occur in
3000 // practice that one of the condition inputs to the select is also one of
3001 // the outputs, we currently can't deal with this.
3002 if (isI32Insn(User, N))
3003 continue;
3004
3005 return SDValue();
3006 }
3007
3008 SDValue SubI32 = DAG.getTargetConstant(Val: VE::sub_i32, DL, VT: MVT::i32);
3009 return SDValue(DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT,
3010 Op1: N->getOperand(Num: 0), Op2: SubI32),
3011 0);
3012}
3013
3014SDValue VETargetLowering::PerformDAGCombine(SDNode *N,
3015 DAGCombinerInfo &DCI) const {
3016 switch (N->getOpcode()) {
3017 default:
3018 break;
3019 case ISD::SELECT:
3020 return combineSelect(N, DCI);
3021 case ISD::SELECT_CC:
3022 return combineSelectCC(N, DCI);
3023 case ISD::TRUNCATE:
3024 return combineTRUNCATE(N, DCI);
3025 }
3026
3027 return SDValue();
3028}
3029
3030//===----------------------------------------------------------------------===//
3031// VE Inline Assembly Support
3032//===----------------------------------------------------------------------===//
3033
3034VETargetLowering::ConstraintType
3035VETargetLowering::getConstraintType(StringRef Constraint) const {
3036 if (Constraint.size() == 1) {
3037 switch (Constraint[0]) {
3038 default:
3039 break;
3040 case 'v': // vector registers
3041 return C_RegisterClass;
3042 }
3043 }
3044 return TargetLowering::getConstraintType(Constraint);
3045}
3046
3047std::pair<unsigned, const TargetRegisterClass *>
3048VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
3049 StringRef Constraint,
3050 MVT VT) const {
3051 const TargetRegisterClass *RC = nullptr;
3052 if (Constraint.size() == 1) {
3053 switch (Constraint[0]) {
3054 default:
3055 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3056 case 'r':
3057 RC = &VE::I64RegClass;
3058 break;
3059 case 'v':
3060 RC = &VE::V64RegClass;
3061 break;
3062 }
3063 return std::make_pair(x: 0U, y&: RC);
3064 }
3065
3066 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3067}
3068
3069//===----------------------------------------------------------------------===//
3070// VE Target Optimization Support
3071//===----------------------------------------------------------------------===//
3072
3073unsigned VETargetLowering::getMinimumJumpTableEntries() const {
3074 // Specify 8 for PIC model to relieve the impact of PIC load instructions.
3075 if (isJumpTableRelative())
3076 return 8;
3077
3078 return TargetLowering::getMinimumJumpTableEntries();
3079}
3080
3081bool VETargetLowering::hasAndNot(SDValue Y) const {
3082 EVT VT = Y.getValueType();
3083
3084 // VE doesn't have vector and not instruction.
3085 if (VT.isVector())
3086 return false;
3087
3088 // VE allows different immediate values for X and Y where ~X & Y.
3089 // Only simm7 works for X, and only mimm works for Y on VE. However, this
3090 // function is used to check whether an immediate value is OK for and-not
3091 // instruction as both X and Y. Generating additional instruction to
3092 // retrieve an immediate value is no good since the purpose of this
3093 // function is to convert a series of 3 instructions to another series of
3094 // 3 instructions with better parallelism. Therefore, we return false
3095 // for all immediate values now.
3096 // FIXME: Change hasAndNot function to have two operands to make it work
3097 // correctly with Aurora VE.
3098 if (isa<ConstantSDNode>(Val: Y))
3099 return false;
3100
3101 // It's ok for generic registers.
3102 return true;
3103}
3104
3105SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3106 SelectionDAG &DAG) const {
3107 assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
3108 MVT VT = Op.getOperand(i: 0).getSimpleValueType();
3109
3110 // Special treatment for packed V64 types.
3111 assert(VT == MVT::v512i32 || VT == MVT::v512f32);
3112 (void)VT;
3113 // Example of codes:
3114 // %packed_v = extractelt %vr, %idx / 2
3115 // %v = %packed_v >> (%idx % 2 * 32)
3116 // %res = %v & 0xffffffff
3117
3118 SDValue Vec = Op.getOperand(i: 0);
3119 SDValue Idx = Op.getOperand(i: 1);
3120 SDLoc DL(Op);
3121 SDValue Result = Op;
3122 if (false /* Idx->isConstant() */) {
3123 // TODO: optimized implementation using constant values
3124 } else {
3125 SDValue Const1 = DAG.getConstant(Val: 1, DL, VT: MVT::i64);
3126 SDValue HalfIdx = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {Idx, Const1});
3127 SDValue PackedElt =
3128 SDValue(DAG.getMachineNode(Opcode: VE::LVSvr, dl: DL, VT: MVT::i64, Ops: {Vec, HalfIdx}), 0);
3129 SDValue AndIdx = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {Idx, Const1});
3130 SDValue Shift = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i64, Ops: {AndIdx, Const1});
3131 SDValue Const5 = DAG.getConstant(Val: 5, DL, VT: MVT::i64);
3132 Shift = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, Ops: {Shift, Const5});
3133 PackedElt = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {PackedElt, Shift});
3134 SDValue Mask = DAG.getConstant(Val: 0xFFFFFFFFL, DL, VT: MVT::i64);
3135 PackedElt = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {PackedElt, Mask});
3136 SDValue SubI32 = DAG.getTargetConstant(Val: VE::sub_i32, DL, VT: MVT::i32);
3137 Result = SDValue(DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL,
3138 VT: MVT::i32, Op1: PackedElt, Op2: SubI32),
3139 0);
3140
3141 if (Op.getSimpleValueType() == MVT::f32) {
3142 Result = DAG.getBitcast(VT: MVT::f32, V: Result);
3143 } else {
3144 assert(Op.getSimpleValueType() == MVT::i32);
3145 }
3146 }
3147 return Result;
3148}
3149
3150SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3151 SelectionDAG &DAG) const {
3152 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
3153 MVT VT = Op.getOperand(i: 0).getSimpleValueType();
3154
3155 // Special treatment for packed V64 types.
3156 assert(VT == MVT::v512i32 || VT == MVT::v512f32);
3157 (void)VT;
3158 // The v512i32 and v512f32 starts from upper bits (0..31). This "upper
3159 // bits" required `val << 32` from C implementation's point of view.
3160 //
3161 // Example of codes:
3162 // %packed_elt = extractelt %vr, (%idx >> 1)
3163 // %shift = ((%idx & 1) ^ 1) << 5
3164 // %packed_elt &= 0xffffffff00000000 >> shift
3165 // %packed_elt |= (zext %val) << shift
3166 // %vr = insertelt %vr, %packed_elt, (%idx >> 1)
3167
3168 SDLoc DL(Op);
3169 SDValue Vec = Op.getOperand(i: 0);
3170 SDValue Val = Op.getOperand(i: 1);
3171 SDValue Idx = Op.getOperand(i: 2);
3172 if (Idx.getSimpleValueType() == MVT::i32)
3173 Idx = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: Idx);
3174 if (Val.getSimpleValueType() == MVT::f32)
3175 Val = DAG.getBitcast(VT: MVT::i32, V: Val);
3176 assert(Val.getSimpleValueType() == MVT::i32);
3177 Val = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: Val);
3178
3179 SDValue Result = Op;
3180 if (false /* Idx->isConstant()*/) {
3181 // TODO: optimized implementation using constant values
3182 } else {
3183 SDValue Const1 = DAG.getConstant(Val: 1, DL, VT: MVT::i64);
3184 SDValue HalfIdx = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {Idx, Const1});
3185 SDValue PackedElt =
3186 SDValue(DAG.getMachineNode(Opcode: VE::LVSvr, dl: DL, VT: MVT::i64, Ops: {Vec, HalfIdx}), 0);
3187 SDValue AndIdx = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {Idx, Const1});
3188 SDValue Shift = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i64, Ops: {AndIdx, Const1});
3189 SDValue Const5 = DAG.getConstant(Val: 5, DL, VT: MVT::i64);
3190 Shift = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, Ops: {Shift, Const5});
3191 SDValue Mask = DAG.getConstant(Val: 0xFFFFFFFF00000000L, DL, VT: MVT::i64);
3192 Mask = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {Mask, Shift});
3193 PackedElt = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {PackedElt, Mask});
3194 Val = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, Ops: {Val, Shift});
3195 PackedElt = DAG.getNode(Opcode: ISD::OR, DL, VT: MVT::i64, Ops: {PackedElt, Val});
3196 Result =
3197 SDValue(DAG.getMachineNode(Opcode: VE::LSVrr_v, dl: DL, VT: Vec.getSimpleValueType(),
3198 Ops: {HalfIdx, PackedElt, Vec}),
3199 0);
3200 }
3201 return Result;
3202}
3203