1//===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the interfaces that VE uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "VEISelLowering.h"
15#include "MCTargetDesc/VEMCExpr.h"
16#include "VECustomDAG.h"
17#include "VEInstrBuilder.h"
18#include "VEMachineFunctionInfo.h"
19#include "VERegisterInfo.h"
20#include "VETargetMachine.h"
21#include "llvm/ADT/StringSwitch.h"
22#include "llvm/CodeGen/CallingConvLower.h"
23#include "llvm/CodeGen/MachineFrameInfo.h"
24#include "llvm/CodeGen/MachineFunction.h"
25#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineJumpTableInfo.h"
27#include "llvm/CodeGen/MachineModuleInfo.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29#include "llvm/CodeGen/SelectionDAG.h"
30#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
31#include "llvm/IR/DerivedTypes.h"
32#include "llvm/IR/Function.h"
33#include "llvm/IR/IRBuilder.h"
34#include "llvm/IR/Module.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Support/KnownBits.h"
37using namespace llvm;
38
39#define DEBUG_TYPE "ve-lower"
40
41//===----------------------------------------------------------------------===//
42// Calling Convention Implementation
43//===----------------------------------------------------------------------===//
44
45#include "VEGenCallingConv.inc"
46
47CCAssignFn *getReturnCC(CallingConv::ID CallConv) {
48 switch (CallConv) {
49 default:
50 return RetCC_VE_C;
51 case CallingConv::Fast:
52 return RetCC_VE_Fast;
53 }
54}
55
56CCAssignFn *getParamCC(CallingConv::ID CallConv, bool IsVarArg) {
57 if (IsVarArg)
58 return CC_VE2;
59 switch (CallConv) {
60 default:
61 return CC_VE_C;
62 case CallingConv::Fast:
63 return CC_VE_Fast;
64 }
65}
66
67bool VETargetLowering::CanLowerReturn(
68 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
69 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
70 CCAssignFn *RetCC = getReturnCC(CallConv);
71 SmallVector<CCValAssign, 16> RVLocs;
72 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
73 return CCInfo.CheckReturn(Outs, Fn: RetCC);
74}
75
76static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
77 MVT::v256f32, MVT::v512f32, MVT::v256f64};
78
79static const MVT AllMaskVTs[] = {MVT::v256i1, MVT::v512i1};
80
81static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
82
83void VETargetLowering::initRegisterClasses() {
84 // Set up the register classes.
85 addRegisterClass(VT: MVT::i32, RC: &VE::I32RegClass);
86 addRegisterClass(VT: MVT::i64, RC: &VE::I64RegClass);
87 addRegisterClass(VT: MVT::f32, RC: &VE::F32RegClass);
88 addRegisterClass(VT: MVT::f64, RC: &VE::I64RegClass);
89 addRegisterClass(VT: MVT::f128, RC: &VE::F128RegClass);
90
91 if (Subtarget->enableVPU()) {
92 for (MVT VecVT : AllVectorVTs)
93 addRegisterClass(VT: VecVT, RC: &VE::V64RegClass);
94 addRegisterClass(VT: MVT::v256i1, RC: &VE::VMRegClass);
95 addRegisterClass(VT: MVT::v512i1, RC: &VE::VM512RegClass);
96 }
97}
98
99void VETargetLowering::initSPUActions() {
100 const auto &TM = getTargetMachine();
101 /// Load & Store {
102
103 // VE doesn't have i1 sign extending load.
104 for (MVT VT : MVT::integer_valuetypes()) {
105 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
106 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
107 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
108 setTruncStoreAction(ValVT: VT, MemVT: MVT::i1, Action: Expand);
109 }
110
111 // VE doesn't have floating point extload/truncstore, so expand them.
112 for (MVT FPVT : MVT::fp_valuetypes()) {
113 for (MVT OtherFPVT : MVT::fp_valuetypes()) {
114 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: FPVT, MemVT: OtherFPVT, Action: Expand);
115 setTruncStoreAction(ValVT: FPVT, MemVT: OtherFPVT, Action: Expand);
116 }
117 }
118
119 // VE doesn't have fp128 load/store, so expand them in custom lower.
120 setOperationAction(Op: ISD::LOAD, VT: MVT::f128, Action: Custom);
121 setOperationAction(Op: ISD::STORE, VT: MVT::f128, Action: Custom);
122
123 /// } Load & Store
124
125 // Custom legalize address nodes into LO/HI parts.
126 MVT PtrVT = MVT::getIntegerVT(BitWidth: TM.getPointerSizeInBits(AS: 0));
127 setOperationAction(Op: ISD::BlockAddress, VT: PtrVT, Action: Custom);
128 setOperationAction(Op: ISD::GlobalAddress, VT: PtrVT, Action: Custom);
129 setOperationAction(Op: ISD::GlobalTLSAddress, VT: PtrVT, Action: Custom);
130 setOperationAction(Op: ISD::ConstantPool, VT: PtrVT, Action: Custom);
131 setOperationAction(Op: ISD::JumpTable, VT: PtrVT, Action: Custom);
132
133 /// VAARG handling {
134 setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom);
135 // VAARG needs to be lowered to access with 8 bytes alignment.
136 setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Custom);
137 // Use the default implementation.
138 setOperationAction(Op: ISD::VACOPY, VT: MVT::Other, Action: Expand);
139 setOperationAction(Op: ISD::VAEND, VT: MVT::Other, Action: Expand);
140 /// } VAARG handling
141
142 /// Stack {
143 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i32, Action: Custom);
144 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i64, Action: Custom);
145
146 // Use the default implementation.
147 setOperationAction(Op: ISD::STACKSAVE, VT: MVT::Other, Action: Expand);
148 setOperationAction(Op: ISD::STACKRESTORE, VT: MVT::Other, Action: Expand);
149 /// } Stack
150
151 /// Branch {
152
153 // VE doesn't have BRCOND
154 setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Expand);
155
156 // BR_JT is not implemented yet.
157 setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
158
159 /// } Branch
160
161 /// Int Ops {
162 for (MVT IntVT : {MVT::i32, MVT::i64}) {
163 // VE has no REM or DIVREM operations.
164 setOperationAction(Op: ISD::UREM, VT: IntVT, Action: Expand);
165 setOperationAction(Op: ISD::SREM, VT: IntVT, Action: Expand);
166 setOperationAction(Op: ISD::SDIVREM, VT: IntVT, Action: Expand);
167 setOperationAction(Op: ISD::UDIVREM, VT: IntVT, Action: Expand);
168
169 // VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
170 setOperationAction(Op: ISD::SHL_PARTS, VT: IntVT, Action: Expand);
171 setOperationAction(Op: ISD::SRA_PARTS, VT: IntVT, Action: Expand);
172 setOperationAction(Op: ISD::SRL_PARTS, VT: IntVT, Action: Expand);
173
174 // VE has no MULHU/S or U/SMUL_LOHI operations.
175 // TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
176 setOperationAction(Op: ISD::MULHU, VT: IntVT, Action: Expand);
177 setOperationAction(Op: ISD::MULHS, VT: IntVT, Action: Expand);
178 setOperationAction(Op: ISD::UMUL_LOHI, VT: IntVT, Action: Expand);
179 setOperationAction(Op: ISD::SMUL_LOHI, VT: IntVT, Action: Expand);
180
181 // VE has no CTTZ, ROTL, ROTR operations.
182 setOperationAction(Op: ISD::CTTZ, VT: IntVT, Action: Expand);
183 setOperationAction(Op: ISD::ROTL, VT: IntVT, Action: Expand);
184 setOperationAction(Op: ISD::ROTR, VT: IntVT, Action: Expand);
185
186 // VE has 64 bits instruction which works as i64 BSWAP operation. This
187 // instruction works fine as i32 BSWAP operation with an additional
188 // parameter. Use isel patterns to lower BSWAP.
189 setOperationAction(Op: ISD::BSWAP, VT: IntVT, Action: Legal);
190
191 // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
192 // operations. Use isel patterns for i64, promote for i32.
193 LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
194 setOperationAction(Op: ISD::BITREVERSE, VT: IntVT, Action: Act);
195 setOperationAction(Op: ISD::CTLZ, VT: IntVT, Action: Act);
196 setOperationAction(Op: ISD::CTLZ_ZERO_UNDEF, VT: IntVT, Action: Act);
197 setOperationAction(Op: ISD::CTPOP, VT: IntVT, Action: Act);
198
199 // VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
200 // Use isel patterns for i64, promote for i32.
201 setOperationAction(Op: ISD::AND, VT: IntVT, Action: Act);
202 setOperationAction(Op: ISD::OR, VT: IntVT, Action: Act);
203 setOperationAction(Op: ISD::XOR, VT: IntVT, Action: Act);
204
205 // Legal smax and smin
206 setOperationAction(Op: ISD::SMAX, VT: IntVT, Action: Legal);
207 setOperationAction(Op: ISD::SMIN, VT: IntVT, Action: Legal);
208 }
209 /// } Int Ops
210
211 /// Conversion {
212 // VE doesn't have instructions for fp<->uint, so expand them by llvm
213 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Promote); // use i64
214 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i32, Action: Promote); // use i64
215 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i64, Action: Expand);
216 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Expand);
217
218 // fp16 not supported
219 for (MVT FPVT : MVT::fp_valuetypes()) {
220 setOperationAction(Op: ISD::FP16_TO_FP, VT: FPVT, Action: Expand);
221 setOperationAction(Op: ISD::FP_TO_FP16, VT: FPVT, Action: Expand);
222 }
223 /// } Conversion
224
225 /// Floating-point Ops {
226 /// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
227 /// and fcmp.
228
229 // VE doesn't have following floating point operations.
230 for (MVT VT : MVT::fp_valuetypes()) {
231 setOperationAction(Op: ISD::FNEG, VT, Action: Expand);
232 setOperationAction(Op: ISD::FREM, VT, Action: Expand);
233 }
234
235 // VE doesn't have fdiv of f128.
236 setOperationAction(Op: ISD::FDIV, VT: MVT::f128, Action: Expand);
237
238 for (MVT FPVT : {MVT::f32, MVT::f64}) {
239 // f32 and f64 uses ConstantFP. f128 uses ConstantPool.
240 setOperationAction(Op: ISD::ConstantFP, VT: FPVT, Action: Legal);
241 }
242 /// } Floating-point Ops
243
244 /// Floating-point math functions {
245
246 // VE doesn't have following floating point math functions.
247 for (MVT VT : MVT::fp_valuetypes()) {
248 setOperationAction(Op: ISD::FABS, VT, Action: Expand);
249 setOperationAction(Op: ISD::FCOPYSIGN, VT, Action: Expand);
250 setOperationAction(Op: ISD::FCOS, VT, Action: Expand);
251 setOperationAction(Op: ISD::FMA, VT, Action: Expand);
252 setOperationAction(Op: ISD::FPOW, VT, Action: Expand);
253 setOperationAction(Op: ISD::FSIN, VT, Action: Expand);
254 setOperationAction(Op: ISD::FSQRT, VT, Action: Expand);
255 }
256
257 // VE has single and double FMINNUM and FMAXNUM
258 for (MVT VT : {MVT::f32, MVT::f64}) {
259 setOperationAction(Ops: {ISD::FMAXNUM, ISD::FMINNUM}, VT, Action: Legal);
260 }
261
262 /// } Floating-point math functions
263
264 /// Atomic instructions {
265
266 setMaxAtomicSizeInBitsSupported(64);
267 setMinCmpXchgSizeInBits(32);
268 setSupportsUnalignedAtomics(false);
269
270 // Use custom inserter for ATOMIC_FENCE.
271 setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom);
272
273 // Other atomic instructions.
274 for (MVT VT : MVT::integer_valuetypes()) {
275 // Support i8/i16 atomic swap.
276 setOperationAction(Op: ISD::ATOMIC_SWAP, VT, Action: Custom);
277
278 // FIXME: Support "atmam" instructions.
279 setOperationAction(Op: ISD::ATOMIC_LOAD_ADD, VT, Action: Expand);
280 setOperationAction(Op: ISD::ATOMIC_LOAD_SUB, VT, Action: Expand);
281 setOperationAction(Op: ISD::ATOMIC_LOAD_AND, VT, Action: Expand);
282 setOperationAction(Op: ISD::ATOMIC_LOAD_OR, VT, Action: Expand);
283
284 // VE doesn't have follwing instructions.
285 setOperationAction(Op: ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Action: Expand);
286 setOperationAction(Op: ISD::ATOMIC_LOAD_CLR, VT, Action: Expand);
287 setOperationAction(Op: ISD::ATOMIC_LOAD_XOR, VT, Action: Expand);
288 setOperationAction(Op: ISD::ATOMIC_LOAD_NAND, VT, Action: Expand);
289 setOperationAction(Op: ISD::ATOMIC_LOAD_MIN, VT, Action: Expand);
290 setOperationAction(Op: ISD::ATOMIC_LOAD_MAX, VT, Action: Expand);
291 setOperationAction(Op: ISD::ATOMIC_LOAD_UMIN, VT, Action: Expand);
292 setOperationAction(Op: ISD::ATOMIC_LOAD_UMAX, VT, Action: Expand);
293 }
294
295 /// } Atomic instructions
296
297 /// SJLJ instructions {
298 setOperationAction(Op: ISD::EH_SJLJ_LONGJMP, VT: MVT::Other, Action: Custom);
299 setOperationAction(Op: ISD::EH_SJLJ_SETJMP, VT: MVT::i32, Action: Custom);
300 setOperationAction(Op: ISD::EH_SJLJ_SETUP_DISPATCH, VT: MVT::Other, Action: Custom);
301 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
302 setLibcallName(Call: RTLIB::UNWIND_RESUME, Name: "_Unwind_SjLj_Resume");
303 /// } SJLJ instructions
304
305 // Intrinsic instructions
306 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
307}
308
309void VETargetLowering::initVPUActions() {
310 for (MVT LegalMaskVT : AllMaskVTs)
311 setOperationAction(Op: ISD::BUILD_VECTOR, VT: LegalMaskVT, Action: Custom);
312
313 for (unsigned Opc : {ISD::AND, ISD::OR, ISD::XOR})
314 setOperationAction(Op: Opc, VT: MVT::v512i1, Action: Custom);
315
316 for (MVT LegalVecVT : AllVectorVTs) {
317 setOperationAction(Op: ISD::BUILD_VECTOR, VT: LegalVecVT, Action: Custom);
318 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: LegalVecVT, Action: Legal);
319 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: LegalVecVT, Action: Legal);
320 // Translate all vector instructions with legal element types to VVP_*
321 // nodes.
322 // TODO We will custom-widen into VVP_* nodes in the future. While we are
323 // buildling the infrastructure for this, we only do this for legal vector
324 // VTs.
325#define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME) \
326 setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
327#define ADD_VVP_OP(VVP_NAME, ISD_NAME) \
328 setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
329 setOperationAction(Op: ISD::EXPERIMENTAL_VP_STRIDED_LOAD, VT: LegalVecVT, Action: Custom);
330 setOperationAction(Op: ISD::EXPERIMENTAL_VP_STRIDED_STORE, VT: LegalVecVT, Action: Custom);
331#include "VVPNodes.def"
332 }
333
334 for (MVT LegalPackedVT : AllPackedVTs) {
335 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: LegalPackedVT, Action: Custom);
336 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: LegalPackedVT, Action: Custom);
337 }
338
339 // vNt32, vNt64 ops (legal element types)
340 for (MVT VT : MVT::vector_valuetypes()) {
341 MVT ElemVT = VT.getVectorElementType();
342 unsigned ElemBits = ElemVT.getScalarSizeInBits();
343 if (ElemBits != 32 && ElemBits != 64)
344 continue;
345
346 for (unsigned MemOpc : {ISD::MLOAD, ISD::MSTORE, ISD::LOAD, ISD::STORE})
347 setOperationAction(Op: MemOpc, VT, Action: Custom);
348
349 const ISD::NodeType IntReductionOCs[] = {
350 ISD::VECREDUCE_ADD, ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND,
351 ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMIN,
352 ISD::VECREDUCE_SMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_UMAX};
353
354 for (unsigned IntRedOpc : IntReductionOCs)
355 setOperationAction(Op: IntRedOpc, VT, Action: Custom);
356 }
357
358 // v256i1 and v512i1 ops
359 for (MVT MaskVT : AllMaskVTs) {
360 // Custom lower mask ops
361 setOperationAction(Op: ISD::STORE, VT: MaskVT, Action: Custom);
362 setOperationAction(Op: ISD::LOAD, VT: MaskVT, Action: Custom);
363 }
364}
365
366SDValue
367VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
368 bool IsVarArg,
369 const SmallVectorImpl<ISD::OutputArg> &Outs,
370 const SmallVectorImpl<SDValue> &OutVals,
371 const SDLoc &DL, SelectionDAG &DAG) const {
372 // CCValAssign - represent the assignment of the return value to locations.
373 SmallVector<CCValAssign, 16> RVLocs;
374
375 // CCState - Info about the registers and stack slot.
376 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
377 *DAG.getContext());
378
379 // Analyze return values.
380 CCInfo.AnalyzeReturn(Outs, Fn: getReturnCC(CallConv));
381
382 SDValue Glue;
383 SmallVector<SDValue, 4> RetOps(1, Chain);
384
385 // Copy the result values into the output registers.
386 for (unsigned i = 0; i != RVLocs.size(); ++i) {
387 CCValAssign &VA = RVLocs[i];
388 assert(VA.isRegLoc() && "Can only return in registers!");
389 assert(!VA.needsCustom() && "Unexpected custom lowering");
390 SDValue OutVal = OutVals[i];
391
392 // Integer return values must be sign or zero extended by the callee.
393 switch (VA.getLocInfo()) {
394 case CCValAssign::Full:
395 break;
396 case CCValAssign::SExt:
397 OutVal = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: VA.getLocVT(), Operand: OutVal);
398 break;
399 case CCValAssign::ZExt:
400 OutVal = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: VA.getLocVT(), Operand: OutVal);
401 break;
402 case CCValAssign::AExt:
403 OutVal = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: VA.getLocVT(), Operand: OutVal);
404 break;
405 case CCValAssign::BCvt: {
406 // Convert a float return value to i64 with padding.
407 // 63 31 0
408 // +------+------+
409 // | float| 0 |
410 // +------+------+
411 assert(VA.getLocVT() == MVT::i64);
412 assert(VA.getValVT() == MVT::f32);
413 SDValue Undef = SDValue(
414 DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::i64), 0);
415 SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
416 OutVal = SDValue(DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL,
417 VT: MVT::i64, Op1: Undef, Op2: OutVal, Op3: Sub_f32),
418 0);
419 break;
420 }
421 default:
422 llvm_unreachable("Unknown loc info!");
423 }
424
425 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: OutVal, Glue);
426
427 // Guarantee that all emitted copies are stuck together with flags.
428 Glue = Chain.getValue(R: 1);
429 RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
430 }
431
432 RetOps[0] = Chain; // Update chain.
433
434 // Add the glue if we have it.
435 if (Glue.getNode())
436 RetOps.push_back(Elt: Glue);
437
438 return DAG.getNode(Opcode: VEISD::RET_GLUE, DL, VT: MVT::Other, Ops: RetOps);
439}
440
441SDValue VETargetLowering::LowerFormalArguments(
442 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
443 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
444 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
445 MachineFunction &MF = DAG.getMachineFunction();
446
447 // Get the base offset of the incoming arguments stack space.
448 unsigned ArgsBaseOffset = Subtarget->getRsaSize();
449 // Get the size of the preserved arguments area
450 unsigned ArgsPreserved = 64;
451
452 // Analyze arguments according to CC_VE.
453 SmallVector<CCValAssign, 16> ArgLocs;
454 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
455 *DAG.getContext());
456 // Allocate the preserved area first.
457 CCInfo.AllocateStack(Size: ArgsPreserved, Alignment: Align(8));
458 // We already allocated the preserved area, so the stack offset computed
459 // by CC_VE would be correct now.
460 CCInfo.AnalyzeFormalArguments(Ins, Fn: getParamCC(CallConv, IsVarArg: false));
461
462 for (const CCValAssign &VA : ArgLocs) {
463 assert(!VA.needsCustom() && "Unexpected custom lowering");
464 if (VA.isRegLoc()) {
465 // This argument is passed in a register.
466 // All integer register arguments are promoted by the caller to i64.
467
468 // Create a virtual register for the promoted live-in value.
469 Register VReg =
470 MF.addLiveIn(PReg: VA.getLocReg(), RC: getRegClassFor(VT: VA.getLocVT()));
471 SDValue Arg = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: VA.getLocVT());
472
473 // The caller promoted the argument, so insert an Assert?ext SDNode so we
474 // won't promote the value again in this function.
475 switch (VA.getLocInfo()) {
476 case CCValAssign::SExt:
477 Arg = DAG.getNode(Opcode: ISD::AssertSext, DL, VT: VA.getLocVT(), N1: Arg,
478 N2: DAG.getValueType(VA.getValVT()));
479 break;
480 case CCValAssign::ZExt:
481 Arg = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: VA.getLocVT(), N1: Arg,
482 N2: DAG.getValueType(VA.getValVT()));
483 break;
484 case CCValAssign::BCvt: {
485 // Extract a float argument from i64 with padding.
486 // 63 31 0
487 // +------+------+
488 // | float| 0 |
489 // +------+------+
490 assert(VA.getLocVT() == MVT::i64);
491 assert(VA.getValVT() == MVT::f32);
492 SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
493 Arg = SDValue(DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL,
494 VT: MVT::f32, Op1: Arg, Op2: Sub_f32),
495 0);
496 break;
497 }
498 default:
499 break;
500 }
501
502 // Truncate the register down to the argument type.
503 if (VA.isExtInLoc())
504 Arg = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VA.getValVT(), Operand: Arg);
505
506 InVals.push_back(Elt: Arg);
507 continue;
508 }
509
510 // The registers are exhausted. This argument was passed on the stack.
511 assert(VA.isMemLoc());
512 // The CC_VE_Full/Half functions compute stack offsets relative to the
513 // beginning of the arguments area at %fp + the size of reserved area.
514 unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
515 unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
516
517 // Adjust offset for a float argument by adding 4 since the argument is
518 // stored in 8 bytes buffer with offset like below. LLVM generates
519 // 4 bytes load instruction, so need to adjust offset here. This
520 // adjustment is required in only LowerFormalArguments. In LowerCall,
521 // a float argument is converted to i64 first, and stored as 8 bytes
522 // data, which is required by ABI, so no need for adjustment.
523 // 0 4
524 // +------+------+
525 // | empty| float|
526 // +------+------+
527 if (VA.getValVT() == MVT::f32)
528 Offset += 4;
529
530 int FI = MF.getFrameInfo().CreateFixedObject(Size: ValSize, SPOffset: Offset, IsImmutable: true);
531 InVals.push_back(
532 Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain,
533 Ptr: DAG.getFrameIndex(FI, VT: getPointerTy(DL: MF.getDataLayout())),
534 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
535 }
536
537 if (!IsVarArg)
538 return Chain;
539
540 // This function takes variable arguments, some of which may have been passed
541 // in registers %s0-%s8.
542 //
543 // The va_start intrinsic needs to know the offset to the first variable
544 // argument.
545 // TODO: need to calculate offset correctly once we support f128.
546 unsigned ArgOffset = ArgLocs.size() * 8;
547 VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
548 // Skip the reserved area at the top of stack.
549 FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
550
551 return Chain;
552}
553
554// FIXME? Maybe this could be a TableGen attribute on some registers and
555// this table could be generated automatically from RegInfo.
556Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
557 const MachineFunction &MF) const {
558 Register Reg = StringSwitch<Register>(RegName)
559 .Case(S: "sp", Value: VE::SX11) // Stack pointer
560 .Case(S: "fp", Value: VE::SX9) // Frame pointer
561 .Case(S: "sl", Value: VE::SX8) // Stack limit
562 .Case(S: "lr", Value: VE::SX10) // Link register
563 .Case(S: "tp", Value: VE::SX14) // Thread pointer
564 .Case(S: "outer", Value: VE::SX12) // Outer regiser
565 .Case(S: "info", Value: VE::SX17) // Info area register
566 .Case(S: "got", Value: VE::SX15) // Global offset table register
567 .Case(S: "plt", Value: VE::SX16) // Procedure linkage table register
568 .Default(Value: 0);
569
570 if (Reg)
571 return Reg;
572
573 report_fatal_error(reason: "Invalid register name global variable");
574}
575
576//===----------------------------------------------------------------------===//
577// TargetLowering Implementation
578//===----------------------------------------------------------------------===//
579
580SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
581 SmallVectorImpl<SDValue> &InVals) const {
582 SelectionDAG &DAG = CLI.DAG;
583 SDLoc DL = CLI.DL;
584 SDValue Chain = CLI.Chain;
585 auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
586
587 // VE target does not yet support tail call optimization.
588 CLI.IsTailCall = false;
589
590 // Get the base offset of the outgoing arguments stack space.
591 unsigned ArgsBaseOffset = Subtarget->getRsaSize();
592 // Get the size of the preserved arguments area
593 unsigned ArgsPreserved = 8 * 8u;
594
595 // Analyze operands of the call, assigning locations to each operand.
596 SmallVector<CCValAssign, 16> ArgLocs;
597 CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
598 *DAG.getContext());
599 // Allocate the preserved area first.
600 CCInfo.AllocateStack(Size: ArgsPreserved, Alignment: Align(8));
601 // We already allocated the preserved area, so the stack offset computed
602 // by CC_VE would be correct now.
603 CCInfo.AnalyzeCallOperands(Outs: CLI.Outs, Fn: getParamCC(CallConv: CLI.CallConv, IsVarArg: false));
604
605 // VE requires to use both register and stack for varargs or no-prototyped
606 // functions.
607 bool UseBoth = CLI.IsVarArg;
608
609 // Analyze operands again if it is required to store BOTH.
610 SmallVector<CCValAssign, 16> ArgLocs2;
611 CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
612 ArgLocs2, *DAG.getContext());
613 if (UseBoth)
614 CCInfo2.AnalyzeCallOperands(Outs: CLI.Outs, Fn: getParamCC(CallConv: CLI.CallConv, IsVarArg: true));
615
616 // Get the size of the outgoing arguments stack space requirement.
617 unsigned ArgsSize = CCInfo.getStackSize();
618
619 // Keep stack frames 16-byte aligned.
620 ArgsSize = alignTo(Value: ArgsSize, Align: 16);
621
622 // Adjust the stack pointer to make room for the arguments.
623 // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
624 // with more than 6 arguments.
625 Chain = DAG.getCALLSEQ_START(Chain, InSize: ArgsSize, OutSize: 0, DL);
626
627 // Collect the set of registers to pass to the function and their values.
628 // This will be emitted as a sequence of CopyToReg nodes glued to the call
629 // instruction.
630 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
631
632 // Collect chains from all the memory opeations that copy arguments to the
633 // stack. They must follow the stack pointer adjustment above and precede the
634 // call instruction itself.
635 SmallVector<SDValue, 8> MemOpChains;
636
637 // VE needs to get address of callee function in a register
638 // So, prepare to copy it to SX12 here.
639
640 // If the callee is a GlobalAddress node (quite common, every direct call is)
641 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
642 // Likewise ExternalSymbol -> TargetExternalSymbol.
643 SDValue Callee = CLI.Callee;
644
645 bool IsPICCall = isPositionIndependent();
646
647 // PC-relative references to external symbols should go through $stub.
648 // If so, we need to prepare GlobalBaseReg first.
649 const TargetMachine &TM = DAG.getTarget();
650 const GlobalValue *GV = nullptr;
651 auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Val&: Callee);
652 if (CalleeG)
653 GV = CalleeG->getGlobal();
654 bool Local = TM.shouldAssumeDSOLocal(GV);
655 bool UsePlt = !Local;
656 MachineFunction &MF = DAG.getMachineFunction();
657
658 // Turn GlobalAddress/ExternalSymbol node into a value node
659 // containing the address of them here.
660 if (CalleeG) {
661 if (IsPICCall) {
662 if (UsePlt)
663 Subtarget->getInstrInfo()->getGlobalBaseReg(MF: &MF);
664 Callee = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: 0, TargetFlags: 0);
665 Callee = DAG.getNode(Opcode: VEISD::GETFUNPLT, DL, VT: PtrVT, Operand: Callee);
666 } else {
667 Callee =
668 makeHiLoPair(Op: Callee, HiTF: VEMCExpr::VK_VE_HI32, LoTF: VEMCExpr::VK_VE_LO32, DAG);
669 }
670 } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
671 if (IsPICCall) {
672 if (UsePlt)
673 Subtarget->getInstrInfo()->getGlobalBaseReg(MF: &MF);
674 Callee = DAG.getTargetExternalSymbol(Sym: E->getSymbol(), VT: PtrVT, TargetFlags: 0);
675 Callee = DAG.getNode(Opcode: VEISD::GETFUNPLT, DL, VT: PtrVT, Operand: Callee);
676 } else {
677 Callee =
678 makeHiLoPair(Op: Callee, HiTF: VEMCExpr::VK_VE_HI32, LoTF: VEMCExpr::VK_VE_LO32, DAG);
679 }
680 }
681
682 RegsToPass.push_back(Elt: std::make_pair(x: VE::SX12, y&: Callee));
683
684 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
685 CCValAssign &VA = ArgLocs[i];
686 SDValue Arg = CLI.OutVals[i];
687
688 // Promote the value if needed.
689 switch (VA.getLocInfo()) {
690 default:
691 llvm_unreachable("Unknown location info!");
692 case CCValAssign::Full:
693 break;
694 case CCValAssign::SExt:
695 Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: VA.getLocVT(), Operand: Arg);
696 break;
697 case CCValAssign::ZExt:
698 Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: VA.getLocVT(), Operand: Arg);
699 break;
700 case CCValAssign::AExt:
701 Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: VA.getLocVT(), Operand: Arg);
702 break;
703 case CCValAssign::BCvt: {
704 // Convert a float argument to i64 with padding.
705 // 63 31 0
706 // +------+------+
707 // | float| 0 |
708 // +------+------+
709 assert(VA.getLocVT() == MVT::i64);
710 assert(VA.getValVT() == MVT::f32);
711 SDValue Undef = SDValue(
712 DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::i64), 0);
713 SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
714 Arg = SDValue(DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL,
715 VT: MVT::i64, Op1: Undef, Op2: Arg, Op3: Sub_f32),
716 0);
717 break;
718 }
719 }
720
721 if (VA.isRegLoc()) {
722 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
723 if (!UseBoth)
724 continue;
725 VA = ArgLocs2[i];
726 }
727
728 assert(VA.isMemLoc());
729
730 // Create a store off the stack pointer for this argument.
731 SDValue StackPtr = DAG.getRegister(Reg: VE::SX11, VT: PtrVT);
732 // The argument area starts at %fp/%sp + the size of reserved area.
733 SDValue PtrOff =
734 DAG.getIntPtrConstant(Val: VA.getLocMemOffset() + ArgsBaseOffset, DL);
735 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: PtrOff);
736 MemOpChains.push_back(
737 Elt: DAG.getStore(Chain, dl: DL, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo()));
738 }
739
740 // Emit all stores, make sure they occur before the call.
741 if (!MemOpChains.empty())
742 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOpChains);
743
744 // Build a sequence of CopyToReg nodes glued together with token chain and
745 // glue operands which copy the outgoing args into registers. The InGlue is
746 // necessary since all emitted instructions must be stuck together in order
747 // to pass the live physical registers.
748 SDValue InGlue;
749 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
750 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegsToPass[i].first,
751 N: RegsToPass[i].second, Glue: InGlue);
752 InGlue = Chain.getValue(R: 1);
753 }
754
755 // Build the operands for the call instruction itself.
756 SmallVector<SDValue, 8> Ops;
757 Ops.push_back(Elt: Chain);
758 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
759 Ops.push_back(Elt: DAG.getRegister(Reg: RegsToPass[i].first,
760 VT: RegsToPass[i].second.getValueType()));
761
762 // Add a register mask operand representing the call-preserved registers.
763 const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
764 const uint32_t *Mask =
765 TRI->getCallPreservedMask(MF: DAG.getMachineFunction(), CC: CLI.CallConv);
766 assert(Mask && "Missing call preserved mask for calling convention");
767 Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
768
769 // Make sure the CopyToReg nodes are glued to the call instruction which
770 // consumes the registers.
771 if (InGlue.getNode())
772 Ops.push_back(Elt: InGlue);
773
774 // Now the call itself.
775 SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
776 Chain = DAG.getNode(Opcode: VEISD::CALL, DL, VTList: NodeTys, Ops);
777 InGlue = Chain.getValue(R: 1);
778
779 // Revert the stack pointer immediately after the call.
780 Chain = DAG.getCALLSEQ_END(Chain, Size1: ArgsSize, Size2: 0, Glue: InGlue, DL);
781 InGlue = Chain.getValue(R: 1);
782
783 // Now extract the return values. This is more or less the same as
784 // LowerFormalArguments.
785
786 // Assign locations to each value returned by this call.
787 SmallVector<CCValAssign, 16> RVLocs;
788 CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
789 *DAG.getContext());
790
791 // Set inreg flag manually for codegen generated library calls that
792 // return float.
793 if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
794 CLI.Ins[0].Flags.setInReg();
795
796 RVInfo.AnalyzeCallResult(Ins: CLI.Ins, Fn: getReturnCC(CallConv: CLI.CallConv));
797
798 // Copy all of the result registers out of their specified physreg.
799 for (unsigned i = 0; i != RVLocs.size(); ++i) {
800 CCValAssign &VA = RVLocs[i];
801 assert(!VA.needsCustom() && "Unexpected custom lowering");
802 Register Reg = VA.getLocReg();
803
804 // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
805 // reside in the same register in the high and low bits. Reuse the
806 // CopyFromReg previous node to avoid duplicate copies.
807 SDValue RV;
808 if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Val: Chain.getOperand(i: 1)))
809 if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
810 RV = Chain.getValue(R: 0);
811
812 // But usually we'll create a new CopyFromReg for a different register.
813 if (!RV.getNode()) {
814 RV = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: RVLocs[i].getLocVT(), Glue: InGlue);
815 Chain = RV.getValue(R: 1);
816 InGlue = Chain.getValue(R: 2);
817 }
818
819 // The callee promoted the return value, so insert an Assert?ext SDNode so
820 // we won't promote the value again in this function.
821 switch (VA.getLocInfo()) {
822 case CCValAssign::SExt:
823 RV = DAG.getNode(Opcode: ISD::AssertSext, DL, VT: VA.getLocVT(), N1: RV,
824 N2: DAG.getValueType(VA.getValVT()));
825 break;
826 case CCValAssign::ZExt:
827 RV = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: VA.getLocVT(), N1: RV,
828 N2: DAG.getValueType(VA.getValVT()));
829 break;
830 case CCValAssign::BCvt: {
831 // Extract a float return value from i64 with padding.
832 // 63 31 0
833 // +------+------+
834 // | float| 0 |
835 // +------+------+
836 assert(VA.getLocVT() == MVT::i64);
837 assert(VA.getValVT() == MVT::f32);
838 SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
839 RV = SDValue(DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL,
840 VT: MVT::f32, Op1: RV, Op2: Sub_f32),
841 0);
842 break;
843 }
844 default:
845 break;
846 }
847
848 // Truncate the register down to the return value type.
849 if (VA.isExtInLoc())
850 RV = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VA.getValVT(), Operand: RV);
851
852 InVals.push_back(Elt: RV);
853 }
854
855 return Chain;
856}
857
858bool VETargetLowering::isOffsetFoldingLegal(
859 const GlobalAddressSDNode *GA) const {
860 // VE uses 64 bit addressing, so we need multiple instructions to generate
861 // an address. Folding address with offset increases the number of
862 // instructions, so that we disable it here. Offsets will be folded in
863 // the DAG combine later if it worth to do so.
864 return false;
865}
866
867/// isFPImmLegal - Returns true if the target can instruction select the
868/// specified FP immediate natively. If false, the legalizer will
869/// materialize the FP immediate as a load from a constant pool.
870bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
871 bool ForCodeSize) const {
872 return VT == MVT::f32 || VT == MVT::f64;
873}
874
875/// Determine if the target supports unaligned memory accesses.
876///
877/// This function returns true if the target allows unaligned memory accesses
878/// of the specified type in the given address space. If true, it also returns
879/// whether the unaligned memory access is "fast" in the last argument by
880/// reference. This is used, for example, in situations where an array
881/// copy/move/set is converted to a sequence of store operations. Its use
882/// helps to ensure that such replacements don't generate code that causes an
883/// alignment error (trap) on the target machine.
884bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
885 unsigned AddrSpace,
886 Align A,
887 MachineMemOperand::Flags,
888 unsigned *Fast) const {
889 if (Fast) {
890 // It's fast anytime on VE
891 *Fast = 1;
892 }
893 return true;
894}
895
896VETargetLowering::VETargetLowering(const TargetMachine &TM,
897 const VESubtarget &STI)
898 : TargetLowering(TM), Subtarget(&STI) {
899 // Instructions which use registers as conditionals examine all the
900 // bits (as does the pseudo SELECT_CC expansion). I don't think it
901 // matters much whether it's ZeroOrOneBooleanContent, or
902 // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
903 // former.
904 setBooleanContents(ZeroOrOneBooleanContent);
905 setBooleanVectorContents(ZeroOrOneBooleanContent);
906
907 initRegisterClasses();
908 initSPUActions();
909 initVPUActions();
910
911 setStackPointerRegisterToSaveRestore(VE::SX11);
912
913 // We have target-specific dag combine patterns for the following nodes:
914 setTargetDAGCombine(ISD::TRUNCATE);
915 setTargetDAGCombine(ISD::SELECT);
916 setTargetDAGCombine(ISD::SELECT_CC);
917
918 // Set function alignment to 16 bytes
919 setMinFunctionAlignment(Align(16));
920
921 // VE stores all argument by 8 bytes alignment
922 setMinStackArgumentAlignment(Align(8));
923
924 computeRegisterProperties(TRI: Subtarget->getRegisterInfo());
925}
926
927const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
928#define TARGET_NODE_CASE(NAME) \
929 case VEISD::NAME: \
930 return "VEISD::" #NAME;
931 switch ((VEISD::NodeType)Opcode) {
932 case VEISD::FIRST_NUMBER:
933 break;
934 TARGET_NODE_CASE(CMPI)
935 TARGET_NODE_CASE(CMPU)
936 TARGET_NODE_CASE(CMPF)
937 TARGET_NODE_CASE(CMPQ)
938 TARGET_NODE_CASE(CMOV)
939 TARGET_NODE_CASE(CALL)
940 TARGET_NODE_CASE(EH_SJLJ_LONGJMP)
941 TARGET_NODE_CASE(EH_SJLJ_SETJMP)
942 TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH)
943 TARGET_NODE_CASE(GETFUNPLT)
944 TARGET_NODE_CASE(GETSTACKTOP)
945 TARGET_NODE_CASE(GETTLSADDR)
946 TARGET_NODE_CASE(GLOBAL_BASE_REG)
947 TARGET_NODE_CASE(Hi)
948 TARGET_NODE_CASE(Lo)
949 TARGET_NODE_CASE(RET_GLUE)
950 TARGET_NODE_CASE(TS1AM)
951 TARGET_NODE_CASE(VEC_UNPACK_LO)
952 TARGET_NODE_CASE(VEC_UNPACK_HI)
953 TARGET_NODE_CASE(VEC_PACK)
954 TARGET_NODE_CASE(VEC_BROADCAST)
955 TARGET_NODE_CASE(REPL_I32)
956 TARGET_NODE_CASE(REPL_F32)
957
958 TARGET_NODE_CASE(LEGALAVL)
959
960 // Register the VVP_* SDNodes.
961#define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
962#include "VVPNodes.def"
963 }
964#undef TARGET_NODE_CASE
965 return nullptr;
966}
967
968EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
969 EVT VT) const {
970 return MVT::i32;
971}
972
973// Convert to a target node and set target flags.
974SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
975 SelectionDAG &DAG) const {
976 if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op))
977 return DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: SDLoc(GA),
978 VT: GA->getValueType(ResNo: 0), offset: GA->getOffset(), TargetFlags: TF);
979
980 if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Val&: Op))
981 return DAG.getTargetBlockAddress(BA: BA->getBlockAddress(), VT: Op.getValueType(),
982 Offset: 0, TargetFlags: TF);
983
984 if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Val&: Op))
985 return DAG.getTargetConstantPool(C: CP->getConstVal(), VT: CP->getValueType(ResNo: 0),
986 Align: CP->getAlign(), Offset: CP->getOffset(), TargetFlags: TF);
987
988 if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Val&: Op))
989 return DAG.getTargetExternalSymbol(Sym: ES->getSymbol(), VT: ES->getValueType(ResNo: 0),
990 TargetFlags: TF);
991
992 if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Val&: Op))
993 return DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: JT->getValueType(ResNo: 0), TargetFlags: TF);
994
995 llvm_unreachable("Unhandled address SDNode");
996}
997
998// Split Op into high and low parts according to HiTF and LoTF.
999// Return an ADD node combining the parts.
1000SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
1001 SelectionDAG &DAG) const {
1002 SDLoc DL(Op);
1003 EVT VT = Op.getValueType();
1004 SDValue Hi = DAG.getNode(Opcode: VEISD::Hi, DL, VT, Operand: withTargetFlags(Op, TF: HiTF, DAG));
1005 SDValue Lo = DAG.getNode(Opcode: VEISD::Lo, DL, VT, Operand: withTargetFlags(Op, TF: LoTF, DAG));
1006 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Hi, N2: Lo);
1007}
1008
1009// Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
1010// or ExternalSymbol SDNode.
1011SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
1012 SDLoc DL(Op);
1013 EVT PtrVT = Op.getValueType();
1014
1015 // Handle PIC mode first. VE needs a got load for every variable!
1016 if (isPositionIndependent()) {
1017 auto GlobalN = dyn_cast<GlobalAddressSDNode>(Val&: Op);
1018
1019 if (isa<ConstantPoolSDNode>(Val: Op) || isa<JumpTableSDNode>(Val: Op) ||
1020 (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
1021 // Create following instructions for local linkage PIC code.
1022 // lea %reg, label@gotoff_lo
1023 // and %reg, %reg, (32)0
1024 // lea.sl %reg, label@gotoff_hi(%reg, %got)
1025 SDValue HiLo = makeHiLoPair(Op, HiTF: VEMCExpr::VK_VE_GOTOFF_HI32,
1026 LoTF: VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1027 SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT: PtrVT);
1028 return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: GlobalBase, N2: HiLo);
1029 }
1030 // Create following instructions for not local linkage PIC code.
1031 // lea %reg, label@got_lo
1032 // and %reg, %reg, (32)0
1033 // lea.sl %reg, label@got_hi(%reg)
1034 // ld %reg, (%reg, %got)
1035 SDValue HiLo = makeHiLoPair(Op, HiTF: VEMCExpr::VK_VE_GOT_HI32,
1036 LoTF: VEMCExpr::VK_VE_GOT_LO32, DAG);
1037 SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT: PtrVT);
1038 SDValue AbsAddr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: GlobalBase, N2: HiLo);
1039 return DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: AbsAddr,
1040 PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction()));
1041 }
1042
1043 // This is one of the absolute code models.
1044 switch (getTargetMachine().getCodeModel()) {
1045 default:
1046 llvm_unreachable("Unsupported absolute code model");
1047 case CodeModel::Small:
1048 case CodeModel::Medium:
1049 case CodeModel::Large:
1050 // abs64.
1051 return makeHiLoPair(Op, HiTF: VEMCExpr::VK_VE_HI32, LoTF: VEMCExpr::VK_VE_LO32, DAG);
1052 }
1053}
1054
1055/// Custom Lower {
1056
1057// The mappings for emitLeading/TrailingFence for VE is designed by following
1058// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
1059Instruction *VETargetLowering::emitLeadingFence(IRBuilderBase &Builder,
1060 Instruction *Inst,
1061 AtomicOrdering Ord) const {
1062 switch (Ord) {
1063 case AtomicOrdering::NotAtomic:
1064 case AtomicOrdering::Unordered:
1065 llvm_unreachable("Invalid fence: unordered/non-atomic");
1066 case AtomicOrdering::Monotonic:
1067 case AtomicOrdering::Acquire:
1068 return nullptr; // Nothing to do
1069 case AtomicOrdering::Release:
1070 case AtomicOrdering::AcquireRelease:
1071 return Builder.CreateFence(Ordering: AtomicOrdering::Release);
1072 case AtomicOrdering::SequentiallyConsistent:
1073 if (!Inst->hasAtomicStore())
1074 return nullptr; // Nothing to do
1075 return Builder.CreateFence(Ordering: AtomicOrdering::SequentiallyConsistent);
1076 }
1077 llvm_unreachable("Unknown fence ordering in emitLeadingFence");
1078}
1079
1080Instruction *VETargetLowering::emitTrailingFence(IRBuilderBase &Builder,
1081 Instruction *Inst,
1082 AtomicOrdering Ord) const {
1083 switch (Ord) {
1084 case AtomicOrdering::NotAtomic:
1085 case AtomicOrdering::Unordered:
1086 llvm_unreachable("Invalid fence: unordered/not-atomic");
1087 case AtomicOrdering::Monotonic:
1088 case AtomicOrdering::Release:
1089 return nullptr; // Nothing to do
1090 case AtomicOrdering::Acquire:
1091 case AtomicOrdering::AcquireRelease:
1092 return Builder.CreateFence(Ordering: AtomicOrdering::Acquire);
1093 case AtomicOrdering::SequentiallyConsistent:
1094 return Builder.CreateFence(Ordering: AtomicOrdering::SequentiallyConsistent);
1095 }
1096 llvm_unreachable("Unknown fence ordering in emitTrailingFence");
1097}
1098
1099SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
1100 SelectionDAG &DAG) const {
1101 SDLoc DL(Op);
1102 AtomicOrdering FenceOrdering =
1103 static_cast<AtomicOrdering>(Op.getConstantOperandVal(i: 1));
1104 SyncScope::ID FenceSSID =
1105 static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: 2));
1106
1107 // VE uses Release consistency, so need a fence instruction if it is a
1108 // cross-thread fence.
1109 if (FenceSSID == SyncScope::System) {
1110 switch (FenceOrdering) {
1111 case AtomicOrdering::NotAtomic:
1112 case AtomicOrdering::Unordered:
1113 case AtomicOrdering::Monotonic:
1114 // No need to generate fencem instruction here.
1115 break;
1116 case AtomicOrdering::Acquire:
1117 // Generate "fencem 2" as acquire fence.
1118 return SDValue(DAG.getMachineNode(Opcode: VE::FENCEM, dl: DL, VT: MVT::Other,
1119 Op1: DAG.getTargetConstant(Val: 2, DL, VT: MVT::i32),
1120 Op2: Op.getOperand(i: 0)),
1121 0);
1122 case AtomicOrdering::Release:
1123 // Generate "fencem 1" as release fence.
1124 return SDValue(DAG.getMachineNode(Opcode: VE::FENCEM, dl: DL, VT: MVT::Other,
1125 Op1: DAG.getTargetConstant(Val: 1, DL, VT: MVT::i32),
1126 Op2: Op.getOperand(i: 0)),
1127 0);
1128 case AtomicOrdering::AcquireRelease:
1129 case AtomicOrdering::SequentiallyConsistent:
1130 // Generate "fencem 3" as acq_rel and seq_cst fence.
1131 // FIXME: "fencem 3" doesn't wait for PCIe deveices accesses,
1132 // so seq_cst may require more instruction for them.
1133 return SDValue(DAG.getMachineNode(Opcode: VE::FENCEM, dl: DL, VT: MVT::Other,
1134 Op1: DAG.getTargetConstant(Val: 3, DL, VT: MVT::i32),
1135 Op2: Op.getOperand(i: 0)),
1136 0);
1137 }
1138 }
1139
1140 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1141 return DAG.getNode(Opcode: ISD::MEMBARRIER, DL, VT: MVT::Other, Operand: Op.getOperand(i: 0));
1142}
1143
1144TargetLowering::AtomicExpansionKind
1145VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1146 // We have TS1AM implementation for i8/i16/i32/i64, so use it.
1147 if (AI->getOperation() == AtomicRMWInst::Xchg) {
1148 return AtomicExpansionKind::None;
1149 }
1150 // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
1151
1152 // Otherwise, expand it using compare and exchange instruction to not call
1153 // __sync_fetch_and_* functions.
1154 return AtomicExpansionKind::CmpXChg;
1155}
1156
1157static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
1158 SDValue &Bits) {
1159 SDLoc DL(Op);
1160 AtomicSDNode *N = cast<AtomicSDNode>(Val&: Op);
1161 SDValue Ptr = N->getOperand(Num: 1);
1162 SDValue Val = N->getOperand(Num: 2);
1163 EVT PtrVT = Ptr.getValueType();
1164 bool Byte = N->getMemoryVT() == MVT::i8;
1165 // Remainder = AND Ptr, 3
1166 // Flag = 1 << Remainder ; If Byte is true (1 byte swap flag)
1167 // Flag = 3 << Remainder ; If Byte is false (2 bytes swap flag)
1168 // Bits = Remainder << 3
1169 // NewVal = Val << Bits
1170 SDValue Const3 = DAG.getConstant(Val: 3, DL, VT: PtrVT);
1171 SDValue Remainder = DAG.getNode(Opcode: ISD::AND, DL, VT: PtrVT, Ops: {Ptr, Const3});
1172 SDValue Mask = Byte ? DAG.getConstant(Val: 1, DL, VT: MVT::i32)
1173 : DAG.getConstant(Val: 3, DL, VT: MVT::i32);
1174 Flag = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i32, Ops: {Mask, Remainder});
1175 Bits = DAG.getNode(Opcode: ISD::SHL, DL, VT: PtrVT, Ops: {Remainder, Const3});
1176 return DAG.getNode(Opcode: ISD::SHL, DL, VT: Val.getValueType(), Ops: {Val, Bits});
1177}
1178
1179static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,
1180 SDValue Bits) {
1181 SDLoc DL(Op);
1182 EVT VT = Data.getValueType();
1183 bool Byte = cast<AtomicSDNode>(Val&: Op)->getMemoryVT() == MVT::i8;
1184 // NewData = Data >> Bits
1185 // Result = NewData & 0xff ; If Byte is true (1 byte)
1186 // Result = NewData & 0xffff ; If Byte is false (2 bytes)
1187
1188 SDValue NewData = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Data, N2: Bits);
1189 return DAG.getNode(Opcode: ISD::AND, DL, VT,
1190 Ops: {NewData, DAG.getConstant(Val: Byte ? 0xff : 0xffff, DL, VT)});
1191}
1192
1193SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,
1194 SelectionDAG &DAG) const {
1195 SDLoc DL(Op);
1196 AtomicSDNode *N = cast<AtomicSDNode>(Val&: Op);
1197
1198 if (N->getMemoryVT() == MVT::i8) {
1199 // For i8, use "ts1am"
1200 // Input:
1201 // ATOMIC_SWAP Ptr, Val, Order
1202 //
1203 // Output:
1204 // Remainder = AND Ptr, 3
1205 // Flag = 1 << Remainder ; 1 byte swap flag for TS1AM inst.
1206 // Bits = Remainder << 3
1207 // NewVal = Val << Bits
1208 //
1209 // Aligned = AND Ptr, -4
1210 // Data = TS1AM Aligned, Flag, NewVal
1211 //
1212 // NewData = Data >> Bits
1213 // Result = NewData & 0xff ; 1 byte result
1214 SDValue Flag;
1215 SDValue Bits;
1216 SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1217
1218 SDValue Ptr = N->getOperand(Num: 1);
1219 SDValue Aligned = DAG.getNode(Opcode: ISD::AND, DL, VT: Ptr.getValueType(),
1220 Ops: {Ptr, DAG.getConstant(Val: -4, DL, VT: MVT::i64)});
1221 SDValue TS1AM = DAG.getAtomic(Opcode: VEISD::TS1AM, dl: DL, MemVT: N->getMemoryVT(),
1222 VTList: DAG.getVTList(VT1: Op.getNode()->getValueType(ResNo: 0),
1223 VT2: Op.getNode()->getValueType(ResNo: 1)),
1224 Ops: {N->getChain(), Aligned, Flag, NewVal},
1225 MMO: N->getMemOperand());
1226
1227 SDValue Result = finalizeTS1AM(Op, DAG, Data: TS1AM, Bits);
1228 SDValue Chain = TS1AM.getValue(R: 1);
1229 return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
1230 }
1231 if (N->getMemoryVT() == MVT::i16) {
1232 // For i16, use "ts1am"
1233 SDValue Flag;
1234 SDValue Bits;
1235 SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1236
1237 SDValue Ptr = N->getOperand(Num: 1);
1238 SDValue Aligned = DAG.getNode(Opcode: ISD::AND, DL, VT: Ptr.getValueType(),
1239 Ops: {Ptr, DAG.getConstant(Val: -4, DL, VT: MVT::i64)});
1240 SDValue TS1AM = DAG.getAtomic(Opcode: VEISD::TS1AM, dl: DL, MemVT: N->getMemoryVT(),
1241 VTList: DAG.getVTList(VT1: Op.getNode()->getValueType(ResNo: 0),
1242 VT2: Op.getNode()->getValueType(ResNo: 1)),
1243 Ops: {N->getChain(), Aligned, Flag, NewVal},
1244 MMO: N->getMemOperand());
1245
1246 SDValue Result = finalizeTS1AM(Op, DAG, Data: TS1AM, Bits);
1247 SDValue Chain = TS1AM.getValue(R: 1);
1248 return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
1249 }
1250 // Otherwise, let llvm legalize it.
1251 return Op;
1252}
1253
1254SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
1255 SelectionDAG &DAG) const {
1256 return makeAddress(Op, DAG);
1257}
1258
1259SDValue VETargetLowering::lowerBlockAddress(SDValue Op,
1260 SelectionDAG &DAG) const {
1261 return makeAddress(Op, DAG);
1262}
1263
1264SDValue VETargetLowering::lowerConstantPool(SDValue Op,
1265 SelectionDAG &DAG) const {
1266 return makeAddress(Op, DAG);
1267}
1268
1269SDValue
1270VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op,
1271 SelectionDAG &DAG) const {
1272 SDLoc DL(Op);
1273
1274 // Generate the following code:
1275 // t1: ch,glue = callseq_start t0, 0, 0
1276 // t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
1277 // t3: ch,glue = callseq_end t2, 0, 0, t2:2
1278 // t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
1279 SDValue Label = withTargetFlags(Op, TF: 0, DAG);
1280 EVT PtrVT = Op.getValueType();
1281
1282 // Lowering the machine isd will make sure everything is in the right
1283 // location.
1284 SDValue Chain = DAG.getEntryNode();
1285 SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
1286 const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
1287 MF: DAG.getMachineFunction(), CC: CallingConv::C);
1288 Chain = DAG.getCALLSEQ_START(Chain, InSize: 64, OutSize: 0, DL);
1289 SDValue Args[] = {Chain, Label, DAG.getRegisterMask(RegMask: Mask), Chain.getValue(R: 1)};
1290 Chain = DAG.getNode(Opcode: VEISD::GETTLSADDR, DL, VTList: NodeTys, Ops: Args);
1291 Chain = DAG.getCALLSEQ_END(Chain, Size1: 64, Size2: 0, Glue: Chain.getValue(R: 1), DL);
1292 Chain = DAG.getCopyFromReg(Chain, dl: DL, Reg: VE::SX0, VT: PtrVT, Glue: Chain.getValue(R: 1));
1293
1294 // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1295 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1296 MFI.setHasCalls(true);
1297
1298 // Also generate code to prepare a GOT register if it is PIC.
1299 if (isPositionIndependent()) {
1300 MachineFunction &MF = DAG.getMachineFunction();
1301 Subtarget->getInstrInfo()->getGlobalBaseReg(MF: &MF);
1302 }
1303
1304 return Chain;
1305}
1306
1307SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
1308 SelectionDAG &DAG) const {
1309 // The current implementation of nld (2.26) doesn't allow local exec model
1310 // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
1311 // generate the general dynamic model code sequence.
1312 //
1313 // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
1314 return lowerToTLSGeneralDynamicModel(Op, DAG);
1315}
1316
1317SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1318 return makeAddress(Op, DAG);
1319}
1320
1321// Lower a f128 load into two f64 loads.
1322static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
1323 SDLoc DL(Op);
1324 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Val: Op.getNode());
1325 assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1326 Align Alignment = LdNode->getAlign();
1327 if (Alignment > 8)
1328 Alignment = Align(8);
1329
1330 SDValue Lo64 =
1331 DAG.getLoad(VT: MVT::f64, dl: DL, Chain: LdNode->getChain(), Ptr: LdNode->getBasePtr(),
1332 PtrInfo: LdNode->getPointerInfo(), Alignment,
1333 MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1334 : MachineMemOperand::MONone);
1335 EVT AddrVT = LdNode->getBasePtr().getValueType();
1336 SDValue HiPtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: LdNode->getBasePtr(),
1337 N2: DAG.getConstant(Val: 8, DL, VT: AddrVT));
1338 SDValue Hi64 =
1339 DAG.getLoad(VT: MVT::f64, dl: DL, Chain: LdNode->getChain(), Ptr: HiPtr,
1340 PtrInfo: LdNode->getPointerInfo(), Alignment,
1341 MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1342 : MachineMemOperand::MONone);
1343
1344 SDValue SubRegEven = DAG.getTargetConstant(Val: VE::sub_even, DL, VT: MVT::i32);
1345 SDValue SubRegOdd = DAG.getTargetConstant(Val: VE::sub_odd, DL, VT: MVT::i32);
1346
1347 // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1348 SDNode *InFP128 =
1349 DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::f128);
1350 InFP128 = DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL, VT: MVT::f128,
1351 Op1: SDValue(InFP128, 0), Op2: Hi64, Op3: SubRegEven);
1352 InFP128 = DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL, VT: MVT::f128,
1353 Op1: SDValue(InFP128, 0), Op2: Lo64, Op3: SubRegOdd);
1354 SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1),
1355 SDValue(Hi64.getNode(), 1)};
1356 SDValue OutChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1357 SDValue Ops[2] = {SDValue(InFP128, 0), OutChain};
1358 return DAG.getMergeValues(Ops, dl: DL);
1359}
1360
1361// Lower a vXi1 load into following instructions
1362// LDrii %1, (,%addr)
1363// LVMxir %vm, 0, %1
1364// LDrii %2, 8(,%addr)
1365// LVMxir %vm, 0, %2
1366// ...
1367static SDValue lowerLoadI1(SDValue Op, SelectionDAG &DAG) {
1368 SDLoc DL(Op);
1369 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Val: Op.getNode());
1370 assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1371
1372 SDValue BasePtr = LdNode->getBasePtr();
1373 Align Alignment = LdNode->getAlign();
1374 if (Alignment > 8)
1375 Alignment = Align(8);
1376
1377 EVT AddrVT = BasePtr.getValueType();
1378 EVT MemVT = LdNode->getMemoryVT();
1379 if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1380 SDValue OutChains[4];
1381 SDNode *VM = DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MemVT);
1382 for (int i = 0; i < 4; ++i) {
1383 // Generate load dag and prepare chains.
1384 SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1385 N2: DAG.getConstant(Val: 8 * i, DL, VT: AddrVT));
1386 SDValue Val =
1387 DAG.getLoad(VT: MVT::i64, dl: DL, Chain: LdNode->getChain(), Ptr: Addr,
1388 PtrInfo: LdNode->getPointerInfo(), Alignment,
1389 MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1390 : MachineMemOperand::MONone);
1391 OutChains[i] = SDValue(Val.getNode(), 1);
1392
1393 VM = DAG.getMachineNode(Opcode: VE::LVMir_m, dl: DL, VT: MVT::i64,
1394 Op1: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64), Op2: Val,
1395 Op3: SDValue(VM, 0));
1396 }
1397 SDValue OutChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1398 SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1399 return DAG.getMergeValues(Ops, dl: DL);
1400 } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1401 SDValue OutChains[8];
1402 SDNode *VM = DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MemVT);
1403 for (int i = 0; i < 8; ++i) {
1404 // Generate load dag and prepare chains.
1405 SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1406 N2: DAG.getConstant(Val: 8 * i, DL, VT: AddrVT));
1407 SDValue Val =
1408 DAG.getLoad(VT: MVT::i64, dl: DL, Chain: LdNode->getChain(), Ptr: Addr,
1409 PtrInfo: LdNode->getPointerInfo(), Alignment,
1410 MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1411 : MachineMemOperand::MONone);
1412 OutChains[i] = SDValue(Val.getNode(), 1);
1413
1414 VM = DAG.getMachineNode(Opcode: VE::LVMyir_y, dl: DL, VT: MVT::i64,
1415 Op1: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64), Op2: Val,
1416 Op3: SDValue(VM, 0));
1417 }
1418 SDValue OutChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1419 SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1420 return DAG.getMergeValues(Ops, dl: DL);
1421 } else {
1422 // Otherwise, ask llvm to expand it.
1423 return SDValue();
1424 }
1425}
1426
1427SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1428 LoadSDNode *LdNode = cast<LoadSDNode>(Val: Op.getNode());
1429 EVT MemVT = LdNode->getMemoryVT();
1430
1431 // If VPU is enabled, always expand non-mask vector loads to VVP
1432 if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(SomeVT: MemVT))
1433 return lowerToVVP(Op, DAG);
1434
1435 SDValue BasePtr = LdNode->getBasePtr();
1436 if (isa<FrameIndexSDNode>(Val: BasePtr.getNode())) {
1437 // Do not expand store instruction with frame index here because of
1438 // dependency problems. We expand it later in eliminateFrameIndex().
1439 return Op;
1440 }
1441
1442 if (MemVT == MVT::f128)
1443 return lowerLoadF128(Op, DAG);
1444 if (isMaskType(SomeVT: MemVT))
1445 return lowerLoadI1(Op, DAG);
1446
1447 return Op;
1448}
1449
1450// Lower a f128 store into two f64 stores.
1451static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
1452 SDLoc DL(Op);
1453 StoreSDNode *StNode = dyn_cast<StoreSDNode>(Val: Op.getNode());
1454 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1455
1456 SDValue SubRegEven = DAG.getTargetConstant(Val: VE::sub_even, DL, VT: MVT::i32);
1457 SDValue SubRegOdd = DAG.getTargetConstant(Val: VE::sub_odd, DL, VT: MVT::i32);
1458
1459 SDNode *Hi64 = DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT: MVT::i64,
1460 Op1: StNode->getValue(), Op2: SubRegEven);
1461 SDNode *Lo64 = DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT: MVT::i64,
1462 Op1: StNode->getValue(), Op2: SubRegOdd);
1463
1464 Align Alignment = StNode->getAlign();
1465 if (Alignment > 8)
1466 Alignment = Align(8);
1467
1468 // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1469 SDValue OutChains[2];
1470 OutChains[0] =
1471 DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue(Lo64, 0),
1472 Ptr: StNode->getBasePtr(), PtrInfo: MachinePointerInfo(), Alignment,
1473 MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1474 : MachineMemOperand::MONone);
1475 EVT AddrVT = StNode->getBasePtr().getValueType();
1476 SDValue HiPtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: StNode->getBasePtr(),
1477 N2: DAG.getConstant(Val: 8, DL, VT: AddrVT));
1478 OutChains[1] =
1479 DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue(Hi64, 0), Ptr: HiPtr,
1480 PtrInfo: MachinePointerInfo(), Alignment,
1481 MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1482 : MachineMemOperand::MONone);
1483 return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1484}
1485
1486// Lower a vXi1 store into following instructions
1487// SVMi %1, %vm, 0
1488// STrii %1, (,%addr)
1489// SVMi %2, %vm, 1
1490// STrii %2, 8(,%addr)
1491// ...
1492static SDValue lowerStoreI1(SDValue Op, SelectionDAG &DAG) {
1493 SDLoc DL(Op);
1494 StoreSDNode *StNode = dyn_cast<StoreSDNode>(Val: Op.getNode());
1495 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1496
1497 SDValue BasePtr = StNode->getBasePtr();
1498 Align Alignment = StNode->getAlign();
1499 if (Alignment > 8)
1500 Alignment = Align(8);
1501 EVT AddrVT = BasePtr.getValueType();
1502 EVT MemVT = StNode->getMemoryVT();
1503 if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1504 SDValue OutChains[4];
1505 for (int i = 0; i < 4; ++i) {
1506 SDNode *V =
1507 DAG.getMachineNode(Opcode: VE::SVMmi, dl: DL, VT: MVT::i64, Op1: StNode->getValue(),
1508 Op2: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64));
1509 SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1510 N2: DAG.getConstant(Val: 8 * i, DL, VT: AddrVT));
1511 OutChains[i] =
1512 DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue(V, 0), Ptr: Addr,
1513 PtrInfo: MachinePointerInfo(), Alignment,
1514 MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1515 : MachineMemOperand::MONone);
1516 }
1517 return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1518 } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1519 SDValue OutChains[8];
1520 for (int i = 0; i < 8; ++i) {
1521 SDNode *V =
1522 DAG.getMachineNode(Opcode: VE::SVMyi, dl: DL, VT: MVT::i64, Op1: StNode->getValue(),
1523 Op2: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64));
1524 SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1525 N2: DAG.getConstant(Val: 8 * i, DL, VT: AddrVT));
1526 OutChains[i] =
1527 DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue(V, 0), Ptr: Addr,
1528 PtrInfo: MachinePointerInfo(), Alignment,
1529 MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1530 : MachineMemOperand::MONone);
1531 }
1532 return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1533 } else {
1534 // Otherwise, ask llvm to expand it.
1535 return SDValue();
1536 }
1537}
1538
1539SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1540 StoreSDNode *StNode = cast<StoreSDNode>(Val: Op.getNode());
1541 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1542 EVT MemVT = StNode->getMemoryVT();
1543
1544 // If VPU is enabled, always expand non-mask vector stores to VVP
1545 if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(SomeVT: MemVT))
1546 return lowerToVVP(Op, DAG);
1547
1548 SDValue BasePtr = StNode->getBasePtr();
1549 if (isa<FrameIndexSDNode>(Val: BasePtr.getNode())) {
1550 // Do not expand store instruction with frame index here because of
1551 // dependency problems. We expand it later in eliminateFrameIndex().
1552 return Op;
1553 }
1554
1555 if (MemVT == MVT::f128)
1556 return lowerStoreF128(Op, DAG);
1557 if (isMaskType(SomeVT: MemVT))
1558 return lowerStoreI1(Op, DAG);
1559
1560 // Otherwise, ask llvm to expand it.
1561 return SDValue();
1562}
1563
1564SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1565 MachineFunction &MF = DAG.getMachineFunction();
1566 VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
1567 auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
1568
1569 // Need frame address to find the address of VarArgsFrameIndex.
1570 MF.getFrameInfo().setFrameAddressIsTaken(true);
1571
1572 // vastart just stores the address of the VarArgsFrameIndex slot into the
1573 // memory location argument.
1574 SDLoc DL(Op);
1575 SDValue Offset =
1576 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: DAG.getRegister(Reg: VE::SX9, VT: PtrVT),
1577 N2: DAG.getIntPtrConstant(Val: FuncInfo->getVarArgsFrameOffset(), DL));
1578 const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue();
1579 return DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL, Val: Offset, Ptr: Op.getOperand(i: 1),
1580 PtrInfo: MachinePointerInfo(SV));
1581}
1582
1583SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
1584 SDNode *Node = Op.getNode();
1585 EVT VT = Node->getValueType(ResNo: 0);
1586 SDValue InChain = Node->getOperand(Num: 0);
1587 SDValue VAListPtr = Node->getOperand(Num: 1);
1588 EVT PtrVT = VAListPtr.getValueType();
1589 const Value *SV = cast<SrcValueSDNode>(Val: Node->getOperand(Num: 2))->getValue();
1590 SDLoc DL(Node);
1591 SDValue VAList =
1592 DAG.getLoad(VT: PtrVT, dl: DL, Chain: InChain, Ptr: VAListPtr, PtrInfo: MachinePointerInfo(SV));
1593 SDValue Chain = VAList.getValue(R: 1);
1594 SDValue NextPtr;
1595
1596 if (VT == MVT::f128) {
1597 // VE f128 values must be stored with 16 bytes alignment. We don't
1598 // know the actual alignment of VAList, so we take alignment of it
1599 // dynamically.
1600 int Align = 16;
1601 VAList = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList,
1602 N2: DAG.getConstant(Val: Align - 1, DL, VT: PtrVT));
1603 VAList = DAG.getNode(Opcode: ISD::AND, DL, VT: PtrVT, N1: VAList,
1604 N2: DAG.getConstant(Val: -Align, DL, VT: PtrVT));
1605 // Increment the pointer, VAList, by 16 to the next vaarg.
1606 NextPtr =
1607 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList, N2: DAG.getIntPtrConstant(Val: 16, DL));
1608 } else if (VT == MVT::f32) {
1609 // float --> need special handling like below.
1610 // 0 4
1611 // +------+------+
1612 // | empty| float|
1613 // +------+------+
1614 // Increment the pointer, VAList, by 8 to the next vaarg.
1615 NextPtr =
1616 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList, N2: DAG.getIntPtrConstant(Val: 8, DL));
1617 // Then, adjust VAList.
1618 unsigned InternalOffset = 4;
1619 VAList = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList,
1620 N2: DAG.getConstant(Val: InternalOffset, DL, VT: PtrVT));
1621 } else {
1622 // Increment the pointer, VAList, by 8 to the next vaarg.
1623 NextPtr =
1624 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList, N2: DAG.getIntPtrConstant(Val: 8, DL));
1625 }
1626
1627 // Store the incremented VAList to the legalized pointer.
1628 InChain = DAG.getStore(Chain, dl: DL, Val: NextPtr, Ptr: VAListPtr, PtrInfo: MachinePointerInfo(SV));
1629
1630 // Load the actual argument out of the pointer VAList.
1631 // We can't count on greater alignment than the word size.
1632 return DAG.getLoad(
1633 VT, dl: DL, Chain: InChain, Ptr: VAList, PtrInfo: MachinePointerInfo(),
1634 Alignment: Align(std::min(a: PtrVT.getSizeInBits(), b: VT.getSizeInBits()) / 8));
1635}
1636
1637SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
1638 SelectionDAG &DAG) const {
1639 // Generate following code.
1640 // (void)__llvm_grow_stack(size);
1641 // ret = GETSTACKTOP; // pseudo instruction
1642 SDLoc DL(Op);
1643
1644 // Get the inputs.
1645 SDNode *Node = Op.getNode();
1646 SDValue Chain = Op.getOperand(i: 0);
1647 SDValue Size = Op.getOperand(i: 1);
1648 MaybeAlign Alignment(Op.getConstantOperandVal(i: 2));
1649 EVT VT = Node->getValueType(ResNo: 0);
1650
1651 // Chain the dynamic stack allocation so that it doesn't modify the stack
1652 // pointer when other instructions are using the stack.
1653 Chain = DAG.getCALLSEQ_START(Chain, InSize: 0, OutSize: 0, DL);
1654
1655 const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
1656 Align StackAlign = TFI.getStackAlign();
1657 bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
1658
1659 // Prepare arguments
1660 TargetLowering::ArgListTy Args;
1661 TargetLowering::ArgListEntry Entry;
1662 Entry.Node = Size;
1663 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(Context&: *DAG.getContext());
1664 Args.push_back(x: Entry);
1665 if (NeedsAlign) {
1666 Entry.Node = DAG.getConstant(Val: ~(Alignment->value() - 1ULL), DL, VT);
1667 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(Context&: *DAG.getContext());
1668 Args.push_back(x: Entry);
1669 }
1670 Type *RetTy = Type::getVoidTy(C&: *DAG.getContext());
1671
1672 EVT PtrVT = Op.getValueType();
1673 SDValue Callee;
1674 if (NeedsAlign) {
1675 Callee = DAG.getTargetExternalSymbol(Sym: "__ve_grow_stack_align", VT: PtrVT, TargetFlags: 0);
1676 } else {
1677 Callee = DAG.getTargetExternalSymbol(Sym: "__ve_grow_stack", VT: PtrVT, TargetFlags: 0);
1678 }
1679
1680 TargetLowering::CallLoweringInfo CLI(DAG);
1681 CLI.setDebugLoc(DL)
1682 .setChain(Chain)
1683 .setCallee(CC: CallingConv::PreserveAll, ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
1684 .setDiscardResult(true);
1685 std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
1686 Chain = pair.second;
1687 SDValue Result = DAG.getNode(Opcode: VEISD::GETSTACKTOP, DL, VT, Operand: Chain);
1688 if (NeedsAlign) {
1689 Result = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Result,
1690 N2: DAG.getConstant(Val: (Alignment->value() - 1ULL), DL, VT));
1691 Result = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Result,
1692 N2: DAG.getConstant(Val: ~(Alignment->value() - 1ULL), DL, VT));
1693 }
1694 // Chain = Result.getValue(1);
1695 Chain = DAG.getCALLSEQ_END(Chain, Size1: 0, Size2: 0, Glue: SDValue(), DL);
1696
1697 SDValue Ops[2] = {Result, Chain};
1698 return DAG.getMergeValues(Ops, dl: DL);
1699}
1700
1701SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
1702 SelectionDAG &DAG) const {
1703 SDLoc DL(Op);
1704 return DAG.getNode(Opcode: VEISD::EH_SJLJ_LONGJMP, DL, VT: MVT::Other, N1: Op.getOperand(i: 0),
1705 N2: Op.getOperand(i: 1));
1706}
1707
1708SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
1709 SelectionDAG &DAG) const {
1710 SDLoc DL(Op);
1711 return DAG.getNode(Opcode: VEISD::EH_SJLJ_SETJMP, DL,
1712 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other), N1: Op.getOperand(i: 0),
1713 N2: Op.getOperand(i: 1));
1714}
1715
1716SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
1717 SelectionDAG &DAG) const {
1718 SDLoc DL(Op);
1719 return DAG.getNode(Opcode: VEISD::EH_SJLJ_SETUP_DISPATCH, DL, VT: MVT::Other,
1720 Operand: Op.getOperand(i: 0));
1721}
1722
1723static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
1724 const VETargetLowering &TLI,
1725 const VESubtarget *Subtarget) {
1726 SDLoc DL(Op);
1727 MachineFunction &MF = DAG.getMachineFunction();
1728 EVT PtrVT = TLI.getPointerTy(DL: MF.getDataLayout());
1729
1730 MachineFrameInfo &MFI = MF.getFrameInfo();
1731 MFI.setFrameAddressIsTaken(true);
1732
1733 unsigned Depth = Op.getConstantOperandVal(i: 0);
1734 const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
1735 Register FrameReg = RegInfo->getFrameRegister(MF);
1736 SDValue FrameAddr =
1737 DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT: PtrVT);
1738 while (Depth--)
1739 FrameAddr = DAG.getLoad(VT: Op.getValueType(), dl: DL, Chain: DAG.getEntryNode(),
1740 Ptr: FrameAddr, PtrInfo: MachinePointerInfo());
1741 return FrameAddr;
1742}
1743
1744static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
1745 const VETargetLowering &TLI,
1746 const VESubtarget *Subtarget) {
1747 MachineFunction &MF = DAG.getMachineFunction();
1748 MachineFrameInfo &MFI = MF.getFrameInfo();
1749 MFI.setReturnAddressIsTaken(true);
1750
1751 if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG))
1752 return SDValue();
1753
1754 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);
1755
1756 SDLoc DL(Op);
1757 EVT VT = Op.getValueType();
1758 SDValue Offset = DAG.getConstant(Val: 8, DL, VT);
1759 return DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(),
1760 Ptr: DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr, N2: Offset),
1761 PtrInfo: MachinePointerInfo());
1762}
1763
1764SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1765 SelectionDAG &DAG) const {
1766 SDLoc DL(Op);
1767 unsigned IntNo = Op.getConstantOperandVal(i: 0);
1768 switch (IntNo) {
1769 default: // Don't custom lower most intrinsics.
1770 return SDValue();
1771 case Intrinsic::eh_sjlj_lsda: {
1772 MachineFunction &MF = DAG.getMachineFunction();
1773 MVT VT = Op.getSimpleValueType();
1774 const VETargetMachine *TM =
1775 static_cast<const VETargetMachine *>(&DAG.getTarget());
1776
1777 // Create GCC_except_tableXX string. The real symbol for that will be
1778 // generated in EHStreamer::emitExceptionTable() later. So, we just
1779 // borrow it's name here.
1780 TM->getStrList()->push_back(x: std::string(
1781 (Twine("GCC_except_table") + Twine(MF.getFunctionNumber())).str()));
1782 SDValue Addr =
1783 DAG.getTargetExternalSymbol(Sym: TM->getStrList()->back().c_str(), VT, TargetFlags: 0);
1784 if (isPositionIndependent()) {
1785 Addr = makeHiLoPair(Op: Addr, HiTF: VEMCExpr::VK_VE_GOTOFF_HI32,
1786 LoTF: VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1787 SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT);
1788 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: GlobalBase, N2: Addr);
1789 }
1790 return makeHiLoPair(Op: Addr, HiTF: VEMCExpr::VK_VE_HI32, LoTF: VEMCExpr::VK_VE_LO32, DAG);
1791 }
1792 }
1793}
1794
1795static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) {
1796 if (!isa<BuildVectorSDNode>(Val: N))
1797 return false;
1798 const auto *BVN = cast<BuildVectorSDNode>(Val: N);
1799
1800 // Find first non-undef insertion.
1801 unsigned Idx;
1802 for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) {
1803 auto ElemV = BVN->getOperand(Num: Idx);
1804 if (!ElemV->isUndef())
1805 break;
1806 }
1807 // Catch the (hypothetical) all-undef case.
1808 if (Idx == BVN->getNumOperands())
1809 return false;
1810 // Remember insertion.
1811 UniqueIdx = Idx++;
1812 // Verify that all other insertions are undef.
1813 for (; Idx < BVN->getNumOperands(); ++Idx) {
1814 auto ElemV = BVN->getOperand(Num: Idx);
1815 if (!ElemV->isUndef())
1816 return false;
1817 }
1818 return true;
1819}
1820
1821static SDValue getSplatValue(SDNode *N) {
1822 if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(Val: N)) {
1823 return BuildVec->getSplatValue();
1824 }
1825 return SDValue();
1826}
1827
1828SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
1829 SelectionDAG &DAG) const {
1830 VECustomDAG CDAG(DAG, Op);
1831 MVT ResultVT = Op.getSimpleValueType();
1832
1833 // If there is just one element, expand to INSERT_VECTOR_ELT.
1834 unsigned UniqueIdx;
1835 if (getUniqueInsertion(N: Op.getNode(), UniqueIdx)) {
1836 SDValue AccuV = CDAG.getUNDEF(VT: Op.getValueType());
1837 auto ElemV = Op->getOperand(Num: UniqueIdx);
1838 SDValue IdxV = CDAG.getConstant(Val: UniqueIdx, VT: MVT::i64);
1839 return CDAG.getNode(OC: ISD::INSERT_VECTOR_ELT, ResVT: ResultVT, OpV: {AccuV, ElemV, IdxV});
1840 }
1841
1842 // Else emit a broadcast.
1843 if (SDValue ScalarV = getSplatValue(N: Op.getNode())) {
1844 unsigned NumEls = ResultVT.getVectorNumElements();
1845 auto AVL = CDAG.getConstant(Val: NumEls, VT: MVT::i32);
1846 return CDAG.getBroadcast(ResultVT, Scalar: ScalarV, AVL);
1847 }
1848
1849 // Expand
1850 return SDValue();
1851}
1852
1853TargetLowering::LegalizeAction
1854VETargetLowering::getCustomOperationAction(SDNode &Op) const {
1855 // Custom legalization on VVP_* and VEC_* opcodes is required to pack-legalize
1856 // these operations (transform nodes such that their AVL parameter refers to
1857 // packs of 64bit, instead of number of elements.
1858
1859 // Packing opcodes are created with a pack-legal AVL (LEGALAVL). No need to
1860 // re-visit them.
1861 if (isPackingSupportOpcode(Opc: Op.getOpcode()))
1862 return Legal;
1863
1864 // Custom lower to legalize AVL for packed mode.
1865 if (isVVPOrVEC(Op.getOpcode()))
1866 return Custom;
1867 return Legal;
1868}
1869
1870SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1871 LLVM_DEBUG(dbgs() << "::LowerOperation "; Op.dump(&DAG));
1872 unsigned Opcode = Op.getOpcode();
1873
1874 /// Scalar isel.
1875 switch (Opcode) {
1876 case ISD::ATOMIC_FENCE:
1877 return lowerATOMIC_FENCE(Op, DAG);
1878 case ISD::ATOMIC_SWAP:
1879 return lowerATOMIC_SWAP(Op, DAG);
1880 case ISD::BlockAddress:
1881 return lowerBlockAddress(Op, DAG);
1882 case ISD::ConstantPool:
1883 return lowerConstantPool(Op, DAG);
1884 case ISD::DYNAMIC_STACKALLOC:
1885 return lowerDYNAMIC_STACKALLOC(Op, DAG);
1886 case ISD::EH_SJLJ_LONGJMP:
1887 return lowerEH_SJLJ_LONGJMP(Op, DAG);
1888 case ISD::EH_SJLJ_SETJMP:
1889 return lowerEH_SJLJ_SETJMP(Op, DAG);
1890 case ISD::EH_SJLJ_SETUP_DISPATCH:
1891 return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
1892 case ISD::FRAMEADDR:
1893 return lowerFRAMEADDR(Op, DAG, TLI: *this, Subtarget);
1894 case ISD::GlobalAddress:
1895 return lowerGlobalAddress(Op, DAG);
1896 case ISD::GlobalTLSAddress:
1897 return lowerGlobalTLSAddress(Op, DAG);
1898 case ISD::INTRINSIC_WO_CHAIN:
1899 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
1900 case ISD::JumpTable:
1901 return lowerJumpTable(Op, DAG);
1902 case ISD::LOAD:
1903 return lowerLOAD(Op, DAG);
1904 case ISD::RETURNADDR:
1905 return lowerRETURNADDR(Op, DAG, TLI: *this, Subtarget);
1906 case ISD::BUILD_VECTOR:
1907 return lowerBUILD_VECTOR(Op, DAG);
1908 case ISD::STORE:
1909 return lowerSTORE(Op, DAG);
1910 case ISD::VASTART:
1911 return lowerVASTART(Op, DAG);
1912 case ISD::VAARG:
1913 return lowerVAARG(Op, DAG);
1914
1915 case ISD::INSERT_VECTOR_ELT:
1916 return lowerINSERT_VECTOR_ELT(Op, DAG);
1917 case ISD::EXTRACT_VECTOR_ELT:
1918 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1919 }
1920
1921 /// Vector isel.
1922 if (ISD::isVPOpcode(Opcode))
1923 return lowerToVVP(Op, DAG);
1924
1925 switch (Opcode) {
1926 default:
1927 llvm_unreachable("Should not custom lower this!");
1928
1929 // Legalize the AVL of this internal node.
1930 case VEISD::VEC_BROADCAST:
1931#define ADD_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME:
1932#include "VVPNodes.def"
1933 // AVL already legalized.
1934 if (getAnnotatedNodeAVL(Op).second)
1935 return Op;
1936 return legalizeInternalVectorOp(Op, DAG);
1937
1938 // Translate into a VEC_*/VVP_* layer operation.
1939 case ISD::MLOAD:
1940 case ISD::MSTORE:
1941#define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
1942#include "VVPNodes.def"
1943 if (isMaskArithmetic(Op) && isPackedVectorType(SomeVT: Op.getValueType()))
1944 return splitMaskArithmetic(Op, DAG);
1945 return lowerToVVP(Op, DAG);
1946 }
1947}
1948/// } Custom Lower
1949
1950void VETargetLowering::ReplaceNodeResults(SDNode *N,
1951 SmallVectorImpl<SDValue> &Results,
1952 SelectionDAG &DAG) const {
1953 switch (N->getOpcode()) {
1954 case ISD::ATOMIC_SWAP:
1955 // Let LLVM expand atomic swap instruction through LowerOperation.
1956 return;
1957 default:
1958 LLVM_DEBUG(N->dumpr(&DAG));
1959 llvm_unreachable("Do not know how to custom type legalize this operation!");
1960 }
1961}
1962
1963/// JumpTable for VE.
1964///
1965/// VE cannot generate relocatable symbol in jump table. VE cannot
1966/// generate expressions using symbols in both text segment and data
1967/// segment like below.
1968/// .4byte .LBB0_2-.LJTI0_0
1969/// So, we generate offset from the top of function like below as
1970/// a custom label.
1971/// .4byte .LBB0_2-<function name>
1972
1973unsigned VETargetLowering::getJumpTableEncoding() const {
1974 // Use custom label for PIC.
1975 if (isPositionIndependent())
1976 return MachineJumpTableInfo::EK_Custom32;
1977
1978 // Otherwise, use the normal jump table encoding heuristics.
1979 return TargetLowering::getJumpTableEncoding();
1980}
1981
1982const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
1983 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
1984 unsigned Uid, MCContext &Ctx) const {
1985 assert(isPositionIndependent());
1986
1987 // Generate custom label for PIC like below.
1988 // .4bytes .LBB0_2-<function name>
1989 const auto *Value = MCSymbolRefExpr::create(Symbol: MBB->getSymbol(), Ctx);
1990 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: MBB->getParent()->getName().data());
1991 const auto *Base = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
1992 return MCBinaryExpr::createSub(LHS: Value, RHS: Base, Ctx);
1993}
1994
1995SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
1996 SelectionDAG &DAG) const {
1997 assert(isPositionIndependent());
1998 SDLoc DL(Table);
1999 Function *Function = &DAG.getMachineFunction().getFunction();
2000 assert(Function != nullptr);
2001 auto PtrTy = getPointerTy(DL: DAG.getDataLayout(), AS: Function->getAddressSpace());
2002
2003 // In the jump table, we have following values in PIC mode.
2004 // .4bytes .LBB0_2-<function name>
2005 // We need to add this value and the address of this function to generate
2006 // .LBB0_2 label correctly under PIC mode. So, we want to generate following
2007 // instructions:
2008 // lea %reg, fun@gotoff_lo
2009 // and %reg, %reg, (32)0
2010 // lea.sl %reg, fun@gotoff_hi(%reg, %got)
2011 // In order to do so, we need to genarate correctly marked DAG node using
2012 // makeHiLoPair.
2013 SDValue Op = DAG.getGlobalAddress(GV: Function, DL, VT: PtrTy);
2014 SDValue HiLo = makeHiLoPair(Op, HiTF: VEMCExpr::VK_VE_GOTOFF_HI32,
2015 LoTF: VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
2016 SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT: PtrTy);
2017 return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrTy, N1: GlobalBase, N2: HiLo);
2018}
2019
2020Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB,
2021 MachineBasicBlock::iterator I,
2022 MachineBasicBlock *TargetBB,
2023 const DebugLoc &DL) const {
2024 MachineFunction *MF = MBB.getParent();
2025 MachineRegisterInfo &MRI = MF->getRegInfo();
2026 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2027
2028 const TargetRegisterClass *RC = &VE::I64RegClass;
2029 Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2030 Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2031 Register Result = MRI.createVirtualRegister(RegClass: RC);
2032
2033 if (isPositionIndependent()) {
2034 // Create following instructions for local linkage PIC code.
2035 // lea %Tmp1, TargetBB@gotoff_lo
2036 // and %Tmp2, %Tmp1, (32)0
2037 // lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2038 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2039 .addImm(Val: 0)
2040 .addImm(Val: 0)
2041 .addMBB(MBB: TargetBB, TargetFlags: VEMCExpr::VK_VE_GOTOFF_LO32);
2042 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2043 .addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2044 .addImm(Val: M0(Val: 32));
2045 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: Result)
2046 .addReg(RegNo: VE::SX15)
2047 .addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2048 .addMBB(MBB: TargetBB, TargetFlags: VEMCExpr::VK_VE_GOTOFF_HI32);
2049 } else {
2050 // Create following instructions for non-PIC code.
2051 // lea %Tmp1, TargetBB@lo
2052 // and %Tmp2, %Tmp1, (32)0
2053 // lea.sl %Result, TargetBB@hi(%Tmp2)
2054 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2055 .addImm(Val: 0)
2056 .addImm(Val: 0)
2057 .addMBB(MBB: TargetBB, TargetFlags: VEMCExpr::VK_VE_LO32);
2058 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2059 .addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2060 .addImm(Val: M0(Val: 32));
2061 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrii), DestReg: Result)
2062 .addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2063 .addImm(Val: 0)
2064 .addMBB(MBB: TargetBB, TargetFlags: VEMCExpr::VK_VE_HI32);
2065 }
2066 return Result;
2067}
2068
2069Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB,
2070 MachineBasicBlock::iterator I,
2071 StringRef Symbol, const DebugLoc &DL,
2072 bool IsLocal = false,
2073 bool IsCall = false) const {
2074 MachineFunction *MF = MBB.getParent();
2075 MachineRegisterInfo &MRI = MF->getRegInfo();
2076 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2077
2078 const TargetRegisterClass *RC = &VE::I64RegClass;
2079 Register Result = MRI.createVirtualRegister(RegClass: RC);
2080
2081 if (isPositionIndependent()) {
2082 if (IsCall && !IsLocal) {
2083 // Create following instructions for non-local linkage PIC code function
2084 // calls. These instructions uses IC and magic number -24, so we expand
2085 // them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
2086 // lea %Reg, Symbol@plt_lo(-24)
2087 // and %Reg, %Reg, (32)0
2088 // sic %s16
2089 // lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
2090 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::GETFUNPLT), DestReg: Result)
2091 .addExternalSymbol(FnName: "abort");
2092 } else if (IsLocal) {
2093 Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2094 Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2095 // Create following instructions for local linkage PIC code.
2096 // lea %Tmp1, Symbol@gotoff_lo
2097 // and %Tmp2, %Tmp1, (32)0
2098 // lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2099 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2100 .addImm(Val: 0)
2101 .addImm(Val: 0)
2102 .addExternalSymbol(FnName: Symbol.data(), TargetFlags: VEMCExpr::VK_VE_GOTOFF_LO32);
2103 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2104 .addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2105 .addImm(Val: M0(Val: 32));
2106 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: Result)
2107 .addReg(RegNo: VE::SX15)
2108 .addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2109 .addExternalSymbol(FnName: Symbol.data(), TargetFlags: VEMCExpr::VK_VE_GOTOFF_HI32);
2110 } else {
2111 Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2112 Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2113 // Create following instructions for not local linkage PIC code.
2114 // lea %Tmp1, Symbol@got_lo
2115 // and %Tmp2, %Tmp1, (32)0
2116 // lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2117 // ld %Result, 0(%Tmp3)
2118 Register Tmp3 = MRI.createVirtualRegister(RegClass: RC);
2119 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2120 .addImm(Val: 0)
2121 .addImm(Val: 0)
2122 .addExternalSymbol(FnName: Symbol.data(), TargetFlags: VEMCExpr::VK_VE_GOT_LO32);
2123 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2124 .addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2125 .addImm(Val: M0(Val: 32));
2126 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: Tmp3)
2127 .addReg(RegNo: VE::SX15)
2128 .addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2129 .addExternalSymbol(FnName: Symbol.data(), TargetFlags: VEMCExpr::VK_VE_GOT_HI32);
2130 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: Result)
2131 .addReg(RegNo: Tmp3, flags: getKillRegState(B: true))
2132 .addImm(Val: 0)
2133 .addImm(Val: 0);
2134 }
2135 } else {
2136 Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2137 Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2138 // Create following instructions for non-PIC code.
2139 // lea %Tmp1, Symbol@lo
2140 // and %Tmp2, %Tmp1, (32)0
2141 // lea.sl %Result, Symbol@hi(%Tmp2)
2142 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2143 .addImm(Val: 0)
2144 .addImm(Val: 0)
2145 .addExternalSymbol(FnName: Symbol.data(), TargetFlags: VEMCExpr::VK_VE_LO32);
2146 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2147 .addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2148 .addImm(Val: M0(Val: 32));
2149 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrii), DestReg: Result)
2150 .addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2151 .addImm(Val: 0)
2152 .addExternalSymbol(FnName: Symbol.data(), TargetFlags: VEMCExpr::VK_VE_HI32);
2153 }
2154 return Result;
2155}
2156
2157void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI,
2158 MachineBasicBlock *MBB,
2159 MachineBasicBlock *DispatchBB,
2160 int FI, int Offset) const {
2161 DebugLoc DL = MI.getDebugLoc();
2162 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2163
2164 Register LabelReg =
2165 prepareMBB(MBB&: *MBB, I: MachineBasicBlock::iterator(MI), TargetBB: DispatchBB, DL);
2166
2167 // Store an address of DispatchBB to a given jmpbuf[1] where has next IC
2168 // referenced by longjmp (throw) later.
2169 MachineInstrBuilder MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::STrii));
2170 addFrameReference(MIB, FI, Offset); // jmpbuf[1]
2171 MIB.addReg(RegNo: LabelReg, flags: getKillRegState(B: true));
2172}
2173
2174MachineBasicBlock *
2175VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
2176 MachineBasicBlock *MBB) const {
2177 DebugLoc DL = MI.getDebugLoc();
2178 MachineFunction *MF = MBB->getParent();
2179 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2180 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
2181 MachineRegisterInfo &MRI = MF->getRegInfo();
2182
2183 const BasicBlock *BB = MBB->getBasicBlock();
2184 MachineFunction::iterator I = ++MBB->getIterator();
2185
2186 // Memory Reference.
2187 SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
2188 MI.memoperands_end());
2189 Register BufReg = MI.getOperand(i: 1).getReg();
2190
2191 Register DstReg;
2192
2193 DstReg = MI.getOperand(i: 0).getReg();
2194 const TargetRegisterClass *RC = MRI.getRegClass(Reg: DstReg);
2195 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
2196 (void)TRI;
2197 Register MainDestReg = MRI.createVirtualRegister(RegClass: RC);
2198 Register RestoreDestReg = MRI.createVirtualRegister(RegClass: RC);
2199
2200 // For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
2201 // instructions. SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
2202 //
2203 // ThisMBB:
2204 // buf[3] = %s17 iff %s17 is used as BP
2205 // buf[1] = RestoreMBB as IC after longjmp
2206 // # SjLjSetup RestoreMBB
2207 //
2208 // MainMBB:
2209 // v_main = 0
2210 //
2211 // SinkMBB:
2212 // v = phi(v_main, MainMBB, v_restore, RestoreMBB)
2213 // ...
2214 //
2215 // RestoreMBB:
2216 // %s17 = buf[3] = iff %s17 is used as BP
2217 // v_restore = 1
2218 // goto SinkMBB
2219
2220 MachineBasicBlock *ThisMBB = MBB;
2221 MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
2222 MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
2223 MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
2224 MF->insert(MBBI: I, MBB: MainMBB);
2225 MF->insert(MBBI: I, MBB: SinkMBB);
2226 MF->push_back(MBB: RestoreMBB);
2227 RestoreMBB->setMachineBlockAddressTaken();
2228
2229 // Transfer the remainder of BB and its successor edges to SinkMBB.
2230 SinkMBB->splice(Where: SinkMBB->begin(), Other: MBB,
2231 From: std::next(x: MachineBasicBlock::iterator(MI)), To: MBB->end());
2232 SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
2233
2234 // ThisMBB:
2235 Register LabelReg =
2236 prepareMBB(MBB&: *MBB, I: MachineBasicBlock::iterator(MI), TargetBB: RestoreMBB, DL);
2237
2238 // Store BP in buf[3] iff this function is using BP.
2239 const VEFrameLowering *TFI = Subtarget->getFrameLowering();
2240 if (TFI->hasBP(MF: *MF)) {
2241 MachineInstrBuilder MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::STrii));
2242 MIB.addReg(RegNo: BufReg);
2243 MIB.addImm(Val: 0);
2244 MIB.addImm(Val: 24);
2245 MIB.addReg(RegNo: VE::SX17);
2246 MIB.setMemRefs(MMOs);
2247 }
2248
2249 // Store IP in buf[1].
2250 MachineInstrBuilder MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::STrii));
2251 MIB.add(MO: MI.getOperand(i: 1)); // we can preserve the kill flags here.
2252 MIB.addImm(Val: 0);
2253 MIB.addImm(Val: 8);
2254 MIB.addReg(RegNo: LabelReg, flags: getKillRegState(B: true));
2255 MIB.setMemRefs(MMOs);
2256
2257 // SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
2258
2259 // Insert setup.
2260 MIB =
2261 BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::EH_SjLj_Setup)).addMBB(MBB: RestoreMBB);
2262
2263 const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2264 MIB.addRegMask(Mask: RegInfo->getNoPreservedMask());
2265 ThisMBB->addSuccessor(Succ: MainMBB);
2266 ThisMBB->addSuccessor(Succ: RestoreMBB);
2267
2268 // MainMBB:
2269 BuildMI(BB: MainMBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: MainDestReg)
2270 .addImm(Val: 0)
2271 .addImm(Val: 0)
2272 .addImm(Val: 0);
2273 MainMBB->addSuccessor(Succ: SinkMBB);
2274
2275 // SinkMBB:
2276 BuildMI(BB&: *SinkMBB, I: SinkMBB->begin(), MIMD: DL, MCID: TII->get(Opcode: VE::PHI), DestReg: DstReg)
2277 .addReg(RegNo: MainDestReg)
2278 .addMBB(MBB: MainMBB)
2279 .addReg(RegNo: RestoreDestReg)
2280 .addMBB(MBB: RestoreMBB);
2281
2282 // RestoreMBB:
2283 // Restore BP from buf[3] iff this function is using BP. The address of
2284 // buf is in SX10.
2285 // FIXME: Better to not use SX10 here
2286 if (TFI->hasBP(MF: *MF)) {
2287 MachineInstrBuilder MIB =
2288 BuildMI(BB: RestoreMBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: VE::SX17);
2289 MIB.addReg(RegNo: VE::SX10);
2290 MIB.addImm(Val: 0);
2291 MIB.addImm(Val: 24);
2292 MIB.setMemRefs(MMOs);
2293 }
2294 BuildMI(BB: RestoreMBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: RestoreDestReg)
2295 .addImm(Val: 0)
2296 .addImm(Val: 0)
2297 .addImm(Val: 1);
2298 BuildMI(BB: RestoreMBB, MIMD: DL, MCID: TII->get(Opcode: VE::BRCFLa_t)).addMBB(MBB: SinkMBB);
2299 RestoreMBB->addSuccessor(Succ: SinkMBB);
2300
2301 MI.eraseFromParent();
2302 return SinkMBB;
2303}
2304
2305MachineBasicBlock *
2306VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
2307 MachineBasicBlock *MBB) const {
2308 DebugLoc DL = MI.getDebugLoc();
2309 MachineFunction *MF = MBB->getParent();
2310 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2311 MachineRegisterInfo &MRI = MF->getRegInfo();
2312
2313 // Memory Reference.
2314 SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
2315 MI.memoperands_end());
2316 Register BufReg = MI.getOperand(i: 0).getReg();
2317
2318 Register Tmp = MRI.createVirtualRegister(RegClass: &VE::I64RegClass);
2319 // Since FP is only updated here but NOT referenced, it's treated as GPR.
2320 Register FP = VE::SX9;
2321 Register SP = VE::SX11;
2322
2323 MachineInstrBuilder MIB;
2324
2325 MachineBasicBlock *ThisMBB = MBB;
2326
2327 // For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
2328 //
2329 // ThisMBB:
2330 // %fp = load buf[0]
2331 // %jmp = load buf[1]
2332 // %s10 = buf ; Store an address of buf to SX10 for RestoreMBB
2333 // %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
2334 // jmp %jmp
2335
2336 // Reload FP.
2337 MIB = BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: FP);
2338 MIB.addReg(RegNo: BufReg);
2339 MIB.addImm(Val: 0);
2340 MIB.addImm(Val: 0);
2341 MIB.setMemRefs(MMOs);
2342
2343 // Reload IP.
2344 MIB = BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: Tmp);
2345 MIB.addReg(RegNo: BufReg);
2346 MIB.addImm(Val: 0);
2347 MIB.addImm(Val: 8);
2348 MIB.setMemRefs(MMOs);
2349
2350 // Copy BufReg to SX10 for later use in setjmp.
2351 // FIXME: Better to not use SX10 here
2352 BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::ORri), DestReg: VE::SX10)
2353 .addReg(RegNo: BufReg)
2354 .addImm(Val: 0);
2355
2356 // Reload SP.
2357 MIB = BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: SP);
2358 MIB.add(MO: MI.getOperand(i: 0)); // we can preserve the kill flags here.
2359 MIB.addImm(Val: 0);
2360 MIB.addImm(Val: 16);
2361 MIB.setMemRefs(MMOs);
2362
2363 // Jump.
2364 BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::BCFLari_t))
2365 .addReg(RegNo: Tmp, flags: getKillRegState(B: true))
2366 .addImm(Val: 0);
2367
2368 MI.eraseFromParent();
2369 return ThisMBB;
2370}
2371
2372MachineBasicBlock *
2373VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI,
2374 MachineBasicBlock *BB) const {
2375 DebugLoc DL = MI.getDebugLoc();
2376 MachineFunction *MF = BB->getParent();
2377 MachineFrameInfo &MFI = MF->getFrameInfo();
2378 MachineRegisterInfo &MRI = MF->getRegInfo();
2379 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2380 int FI = MFI.getFunctionContextIndex();
2381
2382 // Get a mapping of the call site numbers to all of the landing pads they're
2383 // associated with.
2384 DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad;
2385 unsigned MaxCSNum = 0;
2386 for (auto &MBB : *MF) {
2387 if (!MBB.isEHPad())
2388 continue;
2389
2390 MCSymbol *Sym = nullptr;
2391 for (const auto &MI : MBB) {
2392 if (MI.isDebugInstr())
2393 continue;
2394
2395 assert(MI.isEHLabel() && "expected EH_LABEL");
2396 Sym = MI.getOperand(i: 0).getMCSymbol();
2397 break;
2398 }
2399
2400 if (!MF->hasCallSiteLandingPad(Sym))
2401 continue;
2402
2403 for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
2404 CallSiteNumToLPad[CSI].push_back(Elt: &MBB);
2405 MaxCSNum = std::max(a: MaxCSNum, b: CSI);
2406 }
2407 }
2408
2409 // Get an ordered list of the machine basic blocks for the jump table.
2410 std::vector<MachineBasicBlock *> LPadList;
2411 SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs;
2412 LPadList.reserve(n: CallSiteNumToLPad.size());
2413
2414 for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
2415 for (auto &LP : CallSiteNumToLPad[CSI]) {
2416 LPadList.push_back(x: LP);
2417 InvokeBBs.insert(I: LP->pred_begin(), E: LP->pred_end());
2418 }
2419 }
2420
2421 assert(!LPadList.empty() &&
2422 "No landing pad destinations for the dispatch jump table!");
2423
2424 // The %fn_context is allocated like below (from --print-after=sjljehprepare):
2425 // %fn_context = alloca { i8*, i64, [4 x i64], i8*, i8*, [5 x i8*] }
2426 //
2427 // This `[5 x i8*]` is jmpbuf, so jmpbuf[1] is FI+72.
2428 // First `i64` is callsite, so callsite is FI+8.
2429 static const int OffsetIC = 72;
2430 static const int OffsetCS = 8;
2431
2432 // Create the MBBs for the dispatch code like following:
2433 //
2434 // ThisMBB:
2435 // Prepare DispatchBB address and store it to buf[1].
2436 // ...
2437 //
2438 // DispatchBB:
2439 // %s15 = GETGOT iff isPositionIndependent
2440 // %callsite = load callsite
2441 // brgt.l.t #size of callsites, %callsite, DispContBB
2442 //
2443 // TrapBB:
2444 // Call abort.
2445 //
2446 // DispContBB:
2447 // %breg = address of jump table
2448 // %pc = load and calculate next pc from %breg and %callsite
2449 // jmp %pc
2450
2451 // Shove the dispatch's address into the return slot in the function context.
2452 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
2453 DispatchBB->setIsEHPad(true);
2454
2455 // Trap BB will causes trap like `assert(0)`.
2456 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
2457 DispatchBB->addSuccessor(Succ: TrapBB);
2458
2459 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
2460 DispatchBB->addSuccessor(Succ: DispContBB);
2461
2462 // Insert MBBs.
2463 MF->push_back(MBB: DispatchBB);
2464 MF->push_back(MBB: DispContBB);
2465 MF->push_back(MBB: TrapBB);
2466
2467 // Insert code to call abort in the TrapBB.
2468 Register Abort = prepareSymbol(MBB&: *TrapBB, I: TrapBB->end(), Symbol: "abort", DL,
2469 /* Local */ IsLocal: false, /* Call */ IsCall: true);
2470 BuildMI(BB: TrapBB, MIMD: DL, MCID: TII->get(Opcode: VE::BSICrii), DestReg: VE::SX10)
2471 .addReg(RegNo: Abort, flags: getKillRegState(B: true))
2472 .addImm(Val: 0)
2473 .addImm(Val: 0);
2474
2475 // Insert code into the entry block that creates and registers the function
2476 // context.
2477 setupEntryBlockForSjLj(MI, MBB: BB, DispatchBB, FI, Offset: OffsetIC);
2478
2479 // Create the jump table and associated information
2480 unsigned JTE = getJumpTableEncoding();
2481 MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTEntryKind: JTE);
2482 unsigned MJTI = JTI->createJumpTableIndex(DestBBs: LPadList);
2483
2484 const VERegisterInfo &RI = TII->getRegisterInfo();
2485 // Add a register mask with no preserved registers. This results in all
2486 // registers being marked as clobbered.
2487 BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::NOP))
2488 .addRegMask(Mask: RI.getNoPreservedMask());
2489
2490 if (isPositionIndependent()) {
2491 // Force to generate GETGOT, since current implementation doesn't store GOT
2492 // register.
2493 BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::GETGOT), DestReg: VE::SX15);
2494 }
2495
2496 // IReg is used as an index in a memory operand and therefore can't be SP
2497 const TargetRegisterClass *RC = &VE::I64RegClass;
2498 Register IReg = MRI.createVirtualRegister(RegClass: RC);
2499 addFrameReference(MIB: BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDLZXrii), DestReg: IReg), FI,
2500 Offset: OffsetCS);
2501 if (LPadList.size() < 64) {
2502 BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::BRCFLir_t))
2503 .addImm(Val: VECC::CC_ILE)
2504 .addImm(Val: LPadList.size())
2505 .addReg(RegNo: IReg)
2506 .addMBB(MBB: TrapBB);
2507 } else {
2508 assert(LPadList.size() <= 0x7FFFFFFF && "Too large Landing Pad!");
2509 Register TmpReg = MRI.createVirtualRegister(RegClass: RC);
2510 BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: TmpReg)
2511 .addImm(Val: 0)
2512 .addImm(Val: 0)
2513 .addImm(Val: LPadList.size());
2514 BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::BRCFLrr_t))
2515 .addImm(Val: VECC::CC_ILE)
2516 .addReg(RegNo: TmpReg, flags: getKillRegState(B: true))
2517 .addReg(RegNo: IReg)
2518 .addMBB(MBB: TrapBB);
2519 }
2520
2521 Register BReg = MRI.createVirtualRegister(RegClass: RC);
2522 Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2523 Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2524
2525 if (isPositionIndependent()) {
2526 // Create following instructions for local linkage PIC code.
2527 // lea %Tmp1, .LJTI0_0@gotoff_lo
2528 // and %Tmp2, %Tmp1, (32)0
2529 // lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2530 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2531 .addImm(Val: 0)
2532 .addImm(Val: 0)
2533 .addJumpTableIndex(Idx: MJTI, TargetFlags: VEMCExpr::VK_VE_GOTOFF_LO32);
2534 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2535 .addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2536 .addImm(Val: M0(Val: 32));
2537 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: BReg)
2538 .addReg(RegNo: VE::SX15)
2539 .addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2540 .addJumpTableIndex(Idx: MJTI, TargetFlags: VEMCExpr::VK_VE_GOTOFF_HI32);
2541 } else {
2542 // Create following instructions for non-PIC code.
2543 // lea %Tmp1, .LJTI0_0@lo
2544 // and %Tmp2, %Tmp1, (32)0
2545 // lea.sl %BReg, .LJTI0_0@hi(%Tmp2)
2546 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2547 .addImm(Val: 0)
2548 .addImm(Val: 0)
2549 .addJumpTableIndex(Idx: MJTI, TargetFlags: VEMCExpr::VK_VE_LO32);
2550 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2551 .addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2552 .addImm(Val: M0(Val: 32));
2553 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrii), DestReg: BReg)
2554 .addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2555 .addImm(Val: 0)
2556 .addJumpTableIndex(Idx: MJTI, TargetFlags: VEMCExpr::VK_VE_HI32);
2557 }
2558
2559 switch (JTE) {
2560 case MachineJumpTableInfo::EK_BlockAddress: {
2561 // Generate simple block address code for no-PIC model.
2562 // sll %Tmp1, %IReg, 3
2563 // lds %TReg, 0(%Tmp1, %BReg)
2564 // bcfla %TReg
2565
2566 Register TReg = MRI.createVirtualRegister(RegClass: RC);
2567 Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2568
2569 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::SLLri), DestReg: Tmp1)
2570 .addReg(RegNo: IReg, flags: getKillRegState(B: true))
2571 .addImm(Val: 3);
2572 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDrri), DestReg: TReg)
2573 .addReg(RegNo: BReg, flags: getKillRegState(B: true))
2574 .addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2575 .addImm(Val: 0);
2576 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::BCFLari_t))
2577 .addReg(RegNo: TReg, flags: getKillRegState(B: true))
2578 .addImm(Val: 0);
2579 break;
2580 }
2581 case MachineJumpTableInfo::EK_Custom32: {
2582 // Generate block address code using differences from the function pointer
2583 // for PIC model.
2584 // sll %Tmp1, %IReg, 2
2585 // ldl.zx %OReg, 0(%Tmp1, %BReg)
2586 // Prepare function address in BReg2.
2587 // adds.l %TReg, %BReg2, %OReg
2588 // bcfla %TReg
2589
2590 assert(isPositionIndependent());
2591 Register OReg = MRI.createVirtualRegister(RegClass: RC);
2592 Register TReg = MRI.createVirtualRegister(RegClass: RC);
2593 Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2594
2595 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::SLLri), DestReg: Tmp1)
2596 .addReg(RegNo: IReg, flags: getKillRegState(B: true))
2597 .addImm(Val: 2);
2598 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDLZXrri), DestReg: OReg)
2599 .addReg(RegNo: BReg, flags: getKillRegState(B: true))
2600 .addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2601 .addImm(Val: 0);
2602 Register BReg2 =
2603 prepareSymbol(MBB&: *DispContBB, I: DispContBB->end(),
2604 Symbol: DispContBB->getParent()->getName(), DL, /* Local */ IsLocal: true);
2605 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::ADDSLrr), DestReg: TReg)
2606 .addReg(RegNo: OReg, flags: getKillRegState(B: true))
2607 .addReg(RegNo: BReg2, flags: getKillRegState(B: true));
2608 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::BCFLari_t))
2609 .addReg(RegNo: TReg, flags: getKillRegState(B: true))
2610 .addImm(Val: 0);
2611 break;
2612 }
2613 default:
2614 llvm_unreachable("Unexpected jump table encoding");
2615 }
2616
2617 // Add the jump table entries as successors to the MBB.
2618 SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs;
2619 for (auto &LP : LPadList)
2620 if (SeenMBBs.insert(Ptr: LP).second)
2621 DispContBB->addSuccessor(Succ: LP);
2622
2623 // N.B. the order the invoke BBs are processed in doesn't matter here.
2624 SmallVector<MachineBasicBlock *, 64> MBBLPads;
2625 const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
2626 for (MachineBasicBlock *MBB : InvokeBBs) {
2627 // Remove the landing pad successor from the invoke block and replace it
2628 // with the new dispatch block.
2629 // Keep a copy of Successors since it's modified inside the loop.
2630 SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(),
2631 MBB->succ_rend());
2632 // FIXME: Avoid quadratic complexity.
2633 for (auto *MBBS : Successors) {
2634 if (MBBS->isEHPad()) {
2635 MBB->removeSuccessor(Succ: MBBS);
2636 MBBLPads.push_back(Elt: MBBS);
2637 }
2638 }
2639
2640 MBB->addSuccessor(Succ: DispatchBB);
2641
2642 // Find the invoke call and mark all of the callee-saved registers as
2643 // 'implicit defined' so that they're spilled. This prevents code from
2644 // moving instructions to before the EH block, where they will never be
2645 // executed.
2646 for (auto &II : reverse(C&: *MBB)) {
2647 if (!II.isCall())
2648 continue;
2649
2650 DenseMap<Register, bool> DefRegs;
2651 for (auto &MOp : II.operands())
2652 if (MOp.isReg())
2653 DefRegs[MOp.getReg()] = true;
2654
2655 MachineInstrBuilder MIB(*MF, &II);
2656 for (unsigned RI = 0; SavedRegs[RI]; ++RI) {
2657 Register Reg = SavedRegs[RI];
2658 if (!DefRegs[Reg])
2659 MIB.addReg(RegNo: Reg, flags: RegState::ImplicitDefine | RegState::Dead);
2660 }
2661
2662 break;
2663 }
2664 }
2665
2666 // Mark all former landing pads as non-landing pads. The dispatch is the only
2667 // landing pad now.
2668 for (auto &LP : MBBLPads)
2669 LP->setIsEHPad(false);
2670
2671 // The instruction is gone now.
2672 MI.eraseFromParent();
2673 return BB;
2674}
2675
2676MachineBasicBlock *
2677VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2678 MachineBasicBlock *BB) const {
2679 switch (MI.getOpcode()) {
2680 default:
2681 llvm_unreachable("Unknown Custom Instruction!");
2682 case VE::EH_SjLj_LongJmp:
2683 return emitEHSjLjLongJmp(MI, MBB: BB);
2684 case VE::EH_SjLj_SetJmp:
2685 return emitEHSjLjSetJmp(MI, MBB: BB);
2686 case VE::EH_SjLj_Setup_Dispatch:
2687 return emitSjLjDispatchBlock(MI, BB);
2688 }
2689}
2690
2691static bool isSimm7(SDValue V) {
2692 EVT VT = V.getValueType();
2693 if (VT.isVector())
2694 return false;
2695
2696 if (VT.isInteger()) {
2697 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: V))
2698 return isInt<7>(x: C->getSExtValue());
2699 } else if (VT.isFloatingPoint()) {
2700 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: V)) {
2701 if (VT == MVT::f32 || VT == MVT::f64) {
2702 const APInt &Imm = C->getValueAPF().bitcastToAPInt();
2703 uint64_t Val = Imm.getSExtValue();
2704 if (Imm.getBitWidth() == 32)
2705 Val <<= 32; // Immediate value of float place at higher bits on VE.
2706 return isInt<7>(x: Val);
2707 }
2708 }
2709 }
2710 return false;
2711}
2712
2713static bool isMImm(SDValue V) {
2714 EVT VT = V.getValueType();
2715 if (VT.isVector())
2716 return false;
2717
2718 if (VT.isInteger()) {
2719 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: V))
2720 return isMImmVal(Val: getImmVal(N: C));
2721 } else if (VT.isFloatingPoint()) {
2722 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: V)) {
2723 if (VT == MVT::f32) {
2724 // Float value places at higher bits, so ignore lower 32 bits.
2725 return isMImm32Val(Val: getFpImmVal(N: C) >> 32);
2726 } else if (VT == MVT::f64) {
2727 return isMImmVal(Val: getFpImmVal(N: C));
2728 }
2729 }
2730 }
2731 return false;
2732}
2733
2734static unsigned decideComp(EVT SrcVT, ISD::CondCode CC) {
2735 if (SrcVT.isFloatingPoint()) {
2736 if (SrcVT == MVT::f128)
2737 return VEISD::CMPQ;
2738 return VEISD::CMPF;
2739 }
2740 return isSignedIntSetCC(Code: CC) ? VEISD::CMPI : VEISD::CMPU;
2741}
2742
2743static EVT decideCompType(EVT SrcVT) {
2744 if (SrcVT == MVT::f128)
2745 return MVT::f64;
2746 return SrcVT;
2747}
2748
2749static bool safeWithoutCompWithNull(EVT SrcVT, ISD::CondCode CC,
2750 bool WithCMov) {
2751 if (SrcVT.isFloatingPoint()) {
2752 // For the case of floating point setcc, only unordered comparison
2753 // or general comparison with -enable-no-nans-fp-math option reach
2754 // here, so it is safe even if values are NaN. Only f128 doesn't
2755 // safe since VE uses f64 result of f128 comparison.
2756 return SrcVT != MVT::f128;
2757 }
2758 if (isIntEqualitySetCC(Code: CC)) {
2759 // For the case of equal or not equal, it is safe without comparison with 0.
2760 return true;
2761 }
2762 if (WithCMov) {
2763 // For the case of integer setcc with cmov, all signed comparison with 0
2764 // are safe.
2765 return isSignedIntSetCC(Code: CC);
2766 }
2767 // For the case of integer setcc, only signed 64 bits comparison is safe.
2768 // For unsigned, "CMPU 0x80000000, 0" has to be greater than 0, but it becomes
2769 // less than 0 witout CMPU. For 32 bits, other half of 32 bits are
2770 // uncoditional, so it is not safe too without CMPI..
2771 return isSignedIntSetCC(Code: CC) && SrcVT == MVT::i64;
2772}
2773
2774static SDValue generateComparison(EVT VT, SDValue LHS, SDValue RHS,
2775 ISD::CondCode CC, bool WithCMov,
2776 const SDLoc &DL, SelectionDAG &DAG) {
2777 // Compare values. If RHS is 0 and it is safe to calculate without
2778 // comparison, we don't generate an instruction for comparison.
2779 EVT CompVT = decideCompType(SrcVT: VT);
2780 if (CompVT == VT && safeWithoutCompWithNull(SrcVT: VT, CC, WithCMov) &&
2781 (isNullConstant(V: RHS) || isNullFPConstant(V: RHS))) {
2782 return LHS;
2783 }
2784 return DAG.getNode(Opcode: decideComp(SrcVT: VT, CC), DL, VT: CompVT, N1: LHS, N2: RHS);
2785}
2786
2787SDValue VETargetLowering::combineSelect(SDNode *N,
2788 DAGCombinerInfo &DCI) const {
2789 assert(N->getOpcode() == ISD::SELECT &&
2790 "Should be called with a SELECT node");
2791 ISD::CondCode CC = ISD::CondCode::SETNE;
2792 SDValue Cond = N->getOperand(Num: 0);
2793 SDValue True = N->getOperand(Num: 1);
2794 SDValue False = N->getOperand(Num: 2);
2795
2796 // We handle only scalar SELECT.
2797 EVT VT = N->getValueType(ResNo: 0);
2798 if (VT.isVector())
2799 return SDValue();
2800
2801 // Peform combineSelect after leagalize DAG.
2802 if (!DCI.isAfterLegalizeDAG())
2803 return SDValue();
2804
2805 EVT VT0 = Cond.getValueType();
2806 if (isMImm(V: True)) {
2807 // VE's condition move can handle MImm in True clause, so nothing to do.
2808 } else if (isMImm(V: False)) {
2809 // VE's condition move can handle MImm in True clause, so swap True and
2810 // False clauses if False has MImm value. And, update condition code.
2811 std::swap(a&: True, b&: False);
2812 CC = getSetCCInverse(Operation: CC, Type: VT0);
2813 }
2814
2815 SDLoc DL(N);
2816 SelectionDAG &DAG = DCI.DAG;
2817 VECC::CondCode VECCVal;
2818 if (VT0.isFloatingPoint()) {
2819 VECCVal = fpCondCode2Fcc(CC);
2820 } else {
2821 VECCVal = intCondCode2Icc(CC);
2822 }
2823 SDValue Ops[] = {Cond, True, False,
2824 DAG.getConstant(Val: VECCVal, DL, VT: MVT::i32)};
2825 return DAG.getNode(Opcode: VEISD::CMOV, DL, VT, Ops);
2826}
2827
2828SDValue VETargetLowering::combineSelectCC(SDNode *N,
2829 DAGCombinerInfo &DCI) const {
2830 assert(N->getOpcode() == ISD::SELECT_CC &&
2831 "Should be called with a SELECT_CC node");
2832 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 4))->get();
2833 SDValue LHS = N->getOperand(Num: 0);
2834 SDValue RHS = N->getOperand(Num: 1);
2835 SDValue True = N->getOperand(Num: 2);
2836 SDValue False = N->getOperand(Num: 3);
2837
2838 // We handle only scalar SELECT_CC.
2839 EVT VT = N->getValueType(ResNo: 0);
2840 if (VT.isVector())
2841 return SDValue();
2842
2843 // Peform combineSelectCC after leagalize DAG.
2844 if (!DCI.isAfterLegalizeDAG())
2845 return SDValue();
2846
2847 // We handle only i32/i64/f32/f64/f128 comparisons.
2848 EVT LHSVT = LHS.getValueType();
2849 assert(LHSVT == RHS.getValueType());
2850 switch (LHSVT.getSimpleVT().SimpleTy) {
2851 case MVT::i32:
2852 case MVT::i64:
2853 case MVT::f32:
2854 case MVT::f64:
2855 case MVT::f128:
2856 break;
2857 default:
2858 // Return SDValue to let llvm handle other types.
2859 return SDValue();
2860 }
2861
2862 if (isMImm(V: RHS)) {
2863 // VE's comparison can handle MImm in RHS, so nothing to do.
2864 } else if (isSimm7(V: RHS)) {
2865 // VE's comparison can handle Simm7 in LHS, so swap LHS and RHS, and
2866 // update condition code.
2867 std::swap(a&: LHS, b&: RHS);
2868 CC = getSetCCSwappedOperands(Operation: CC);
2869 }
2870 if (isMImm(V: True)) {
2871 // VE's condition move can handle MImm in True clause, so nothing to do.
2872 } else if (isMImm(V: False)) {
2873 // VE's condition move can handle MImm in True clause, so swap True and
2874 // False clauses if False has MImm value. And, update condition code.
2875 std::swap(a&: True, b&: False);
2876 CC = getSetCCInverse(Operation: CC, Type: LHSVT);
2877 }
2878
2879 SDLoc DL(N);
2880 SelectionDAG &DAG = DCI.DAG;
2881
2882 bool WithCMov = true;
2883 SDValue CompNode = generateComparison(VT: LHSVT, LHS, RHS, CC, WithCMov, DL, DAG);
2884
2885 VECC::CondCode VECCVal;
2886 if (LHSVT.isFloatingPoint()) {
2887 VECCVal = fpCondCode2Fcc(CC);
2888 } else {
2889 VECCVal = intCondCode2Icc(CC);
2890 }
2891 SDValue Ops[] = {CompNode, True, False,
2892 DAG.getConstant(Val: VECCVal, DL, VT: MVT::i32)};
2893 return DAG.getNode(Opcode: VEISD::CMOV, DL, VT, Ops);
2894}
2895
2896static bool isI32InsnAllUses(const SDNode *User, const SDNode *N);
2897static bool isI32Insn(const SDNode *User, const SDNode *N) {
2898 switch (User->getOpcode()) {
2899 default:
2900 return false;
2901 case ISD::ADD:
2902 case ISD::SUB:
2903 case ISD::MUL:
2904 case ISD::SDIV:
2905 case ISD::UDIV:
2906 case ISD::SETCC:
2907 case ISD::SMIN:
2908 case ISD::SMAX:
2909 case ISD::SHL:
2910 case ISD::SRA:
2911 case ISD::BSWAP:
2912 case ISD::SINT_TO_FP:
2913 case ISD::UINT_TO_FP:
2914 case ISD::BR_CC:
2915 case ISD::BITCAST:
2916 case ISD::ATOMIC_CMP_SWAP:
2917 case ISD::ATOMIC_SWAP:
2918 case VEISD::CMPU:
2919 case VEISD::CMPI:
2920 return true;
2921 case ISD::SRL:
2922 if (N->getOperand(Num: 0).getOpcode() != ISD::SRL)
2923 return true;
2924 // (srl (trunc (srl ...))) may be optimized by combining srl, so
2925 // doesn't optimize trunc now.
2926 return false;
2927 case ISD::SELECT_CC:
2928 if (User->getOperand(Num: 2).getNode() != N &&
2929 User->getOperand(Num: 3).getNode() != N)
2930 return true;
2931 return isI32InsnAllUses(User, N);
2932 case VEISD::CMOV:
2933 // CMOV in (cmov (trunc ...), true, false, int-comparison) is safe.
2934 // However, trunc in true or false clauses is not safe.
2935 if (User->getOperand(Num: 1).getNode() != N &&
2936 User->getOperand(Num: 2).getNode() != N &&
2937 isa<ConstantSDNode>(Val: User->getOperand(Num: 3))) {
2938 VECC::CondCode VECCVal =
2939 static_cast<VECC::CondCode>(User->getConstantOperandVal(Num: 3));
2940 return isIntVECondCode(CC: VECCVal);
2941 }
2942 [[fallthrough]];
2943 case ISD::AND:
2944 case ISD::OR:
2945 case ISD::XOR:
2946 case ISD::SELECT:
2947 case ISD::CopyToReg:
2948 // Check all use of selections, bit operations, and copies. If all of them
2949 // are safe, optimize truncate to extract_subreg.
2950 return isI32InsnAllUses(User, N);
2951 }
2952}
2953
2954static bool isI32InsnAllUses(const SDNode *User, const SDNode *N) {
2955 // Check all use of User node. If all of them are safe, optimize
2956 // truncate to extract_subreg.
2957 for (const SDNode *U : User->uses()) {
2958 switch (U->getOpcode()) {
2959 default:
2960 // If the use is an instruction which treats the source operand as i32,
2961 // it is safe to avoid truncate here.
2962 if (isI32Insn(User: U, N))
2963 continue;
2964 break;
2965 case ISD::ANY_EXTEND:
2966 case ISD::SIGN_EXTEND:
2967 case ISD::ZERO_EXTEND: {
2968 // Special optimizations to the combination of ext and trunc.
2969 // (ext ... (select ... (trunc ...))) is safe to avoid truncate here
2970 // since this truncate instruction clears higher 32 bits which is filled
2971 // by one of ext instructions later.
2972 assert(N->getValueType(0) == MVT::i32 &&
2973 "find truncate to not i32 integer");
2974 if (User->getOpcode() == ISD::SELECT_CC ||
2975 User->getOpcode() == ISD::SELECT || User->getOpcode() == VEISD::CMOV)
2976 continue;
2977 break;
2978 }
2979 }
2980 return false;
2981 }
2982 return true;
2983}
2984
2985// Optimize TRUNCATE in DAG combining. Optimizing it in CUSTOM lower is
2986// sometime too early. Optimizing it in DAG pattern matching in VEInstrInfo.td
2987// is sometime too late. So, doing it at here.
2988SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
2989 DAGCombinerInfo &DCI) const {
2990 assert(N->getOpcode() == ISD::TRUNCATE &&
2991 "Should be called with a TRUNCATE node");
2992
2993 SelectionDAG &DAG = DCI.DAG;
2994 SDLoc DL(N);
2995 EVT VT = N->getValueType(ResNo: 0);
2996
2997 // We prefer to do this when all types are legal.
2998 if (!DCI.isAfterLegalizeDAG())
2999 return SDValue();
3000
3001 // Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
3002 if (N->getOperand(Num: 0)->getOpcode() == ISD::SELECT_CC &&
3003 isa<ConstantSDNode>(Val: N->getOperand(Num: 0)->getOperand(Num: 0)) &&
3004 isa<ConstantSDNode>(Val: N->getOperand(Num: 0)->getOperand(Num: 1)))
3005 return SDValue();
3006
3007 // Check all use of this TRUNCATE.
3008 for (const SDNode *User : N->uses()) {
3009 // Make sure that we're not going to replace TRUNCATE for non i32
3010 // instructions.
3011 //
3012 // FIXME: Although we could sometimes handle this, and it does occur in
3013 // practice that one of the condition inputs to the select is also one of
3014 // the outputs, we currently can't deal with this.
3015 if (isI32Insn(User, N))
3016 continue;
3017
3018 return SDValue();
3019 }
3020
3021 SDValue SubI32 = DAG.getTargetConstant(Val: VE::sub_i32, DL, VT: MVT::i32);
3022 return SDValue(DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT,
3023 Op1: N->getOperand(Num: 0), Op2: SubI32),
3024 0);
3025}
3026
3027SDValue VETargetLowering::PerformDAGCombine(SDNode *N,
3028 DAGCombinerInfo &DCI) const {
3029 switch (N->getOpcode()) {
3030 default:
3031 break;
3032 case ISD::SELECT:
3033 return combineSelect(N, DCI);
3034 case ISD::SELECT_CC:
3035 return combineSelectCC(N, DCI);
3036 case ISD::TRUNCATE:
3037 return combineTRUNCATE(N, DCI);
3038 }
3039
3040 return SDValue();
3041}
3042
3043//===----------------------------------------------------------------------===//
3044// VE Inline Assembly Support
3045//===----------------------------------------------------------------------===//
3046
3047VETargetLowering::ConstraintType
3048VETargetLowering::getConstraintType(StringRef Constraint) const {
3049 if (Constraint.size() == 1) {
3050 switch (Constraint[0]) {
3051 default:
3052 break;
3053 case 'v': // vector registers
3054 return C_RegisterClass;
3055 }
3056 }
3057 return TargetLowering::getConstraintType(Constraint);
3058}
3059
3060std::pair<unsigned, const TargetRegisterClass *>
3061VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
3062 StringRef Constraint,
3063 MVT VT) const {
3064 const TargetRegisterClass *RC = nullptr;
3065 if (Constraint.size() == 1) {
3066 switch (Constraint[0]) {
3067 default:
3068 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3069 case 'r':
3070 RC = &VE::I64RegClass;
3071 break;
3072 case 'v':
3073 RC = &VE::V64RegClass;
3074 break;
3075 }
3076 return std::make_pair(x: 0U, y&: RC);
3077 }
3078
3079 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3080}
3081
3082//===----------------------------------------------------------------------===//
3083// VE Target Optimization Support
3084//===----------------------------------------------------------------------===//
3085
3086unsigned VETargetLowering::getMinimumJumpTableEntries() const {
3087 // Specify 8 for PIC model to relieve the impact of PIC load instructions.
3088 if (isJumpTableRelative())
3089 return 8;
3090
3091 return TargetLowering::getMinimumJumpTableEntries();
3092}
3093
3094bool VETargetLowering::hasAndNot(SDValue Y) const {
3095 EVT VT = Y.getValueType();
3096
3097 // VE doesn't have vector and not instruction.
3098 if (VT.isVector())
3099 return false;
3100
3101 // VE allows different immediate values for X and Y where ~X & Y.
3102 // Only simm7 works for X, and only mimm works for Y on VE. However, this
3103 // function is used to check whether an immediate value is OK for and-not
3104 // instruction as both X and Y. Generating additional instruction to
3105 // retrieve an immediate value is no good since the purpose of this
3106 // function is to convert a series of 3 instructions to another series of
3107 // 3 instructions with better parallelism. Therefore, we return false
3108 // for all immediate values now.
3109 // FIXME: Change hasAndNot function to have two operands to make it work
3110 // correctly with Aurora VE.
3111 if (isa<ConstantSDNode>(Val: Y))
3112 return false;
3113
3114 // It's ok for generic registers.
3115 return true;
3116}
3117
3118SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3119 SelectionDAG &DAG) const {
3120 assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
3121 MVT VT = Op.getOperand(i: 0).getSimpleValueType();
3122
3123 // Special treatment for packed V64 types.
3124 assert(VT == MVT::v512i32 || VT == MVT::v512f32);
3125 (void)VT;
3126 // Example of codes:
3127 // %packed_v = extractelt %vr, %idx / 2
3128 // %v = %packed_v >> (%idx % 2 * 32)
3129 // %res = %v & 0xffffffff
3130
3131 SDValue Vec = Op.getOperand(i: 0);
3132 SDValue Idx = Op.getOperand(i: 1);
3133 SDLoc DL(Op);
3134 SDValue Result = Op;
3135 if (false /* Idx->isConstant() */) {
3136 // TODO: optimized implementation using constant values
3137 } else {
3138 SDValue Const1 = DAG.getConstant(Val: 1, DL, VT: MVT::i64);
3139 SDValue HalfIdx = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {Idx, Const1});
3140 SDValue PackedElt =
3141 SDValue(DAG.getMachineNode(Opcode: VE::LVSvr, dl: DL, VT: MVT::i64, Ops: {Vec, HalfIdx}), 0);
3142 SDValue AndIdx = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {Idx, Const1});
3143 SDValue Shift = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i64, Ops: {AndIdx, Const1});
3144 SDValue Const5 = DAG.getConstant(Val: 5, DL, VT: MVT::i64);
3145 Shift = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, Ops: {Shift, Const5});
3146 PackedElt = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {PackedElt, Shift});
3147 SDValue Mask = DAG.getConstant(Val: 0xFFFFFFFFL, DL, VT: MVT::i64);
3148 PackedElt = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {PackedElt, Mask});
3149 SDValue SubI32 = DAG.getTargetConstant(Val: VE::sub_i32, DL, VT: MVT::i32);
3150 Result = SDValue(DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL,
3151 VT: MVT::i32, Op1: PackedElt, Op2: SubI32),
3152 0);
3153
3154 if (Op.getSimpleValueType() == MVT::f32) {
3155 Result = DAG.getBitcast(VT: MVT::f32, V: Result);
3156 } else {
3157 assert(Op.getSimpleValueType() == MVT::i32);
3158 }
3159 }
3160 return Result;
3161}
3162
3163SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3164 SelectionDAG &DAG) const {
3165 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
3166 MVT VT = Op.getOperand(i: 0).getSimpleValueType();
3167
3168 // Special treatment for packed V64 types.
3169 assert(VT == MVT::v512i32 || VT == MVT::v512f32);
3170 (void)VT;
3171 // The v512i32 and v512f32 starts from upper bits (0..31). This "upper
3172 // bits" required `val << 32` from C implementation's point of view.
3173 //
3174 // Example of codes:
3175 // %packed_elt = extractelt %vr, (%idx >> 1)
3176 // %shift = ((%idx & 1) ^ 1) << 5
3177 // %packed_elt &= 0xffffffff00000000 >> shift
3178 // %packed_elt |= (zext %val) << shift
3179 // %vr = insertelt %vr, %packed_elt, (%idx >> 1)
3180
3181 SDLoc DL(Op);
3182 SDValue Vec = Op.getOperand(i: 0);
3183 SDValue Val = Op.getOperand(i: 1);
3184 SDValue Idx = Op.getOperand(i: 2);
3185 if (Idx.getSimpleValueType() == MVT::i32)
3186 Idx = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: Idx);
3187 if (Val.getSimpleValueType() == MVT::f32)
3188 Val = DAG.getBitcast(VT: MVT::i32, V: Val);
3189 assert(Val.getSimpleValueType() == MVT::i32);
3190 Val = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: Val);
3191
3192 SDValue Result = Op;
3193 if (false /* Idx->isConstant()*/) {
3194 // TODO: optimized implementation using constant values
3195 } else {
3196 SDValue Const1 = DAG.getConstant(Val: 1, DL, VT: MVT::i64);
3197 SDValue HalfIdx = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {Idx, Const1});
3198 SDValue PackedElt =
3199 SDValue(DAG.getMachineNode(Opcode: VE::LVSvr, dl: DL, VT: MVT::i64, Ops: {Vec, HalfIdx}), 0);
3200 SDValue AndIdx = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {Idx, Const1});
3201 SDValue Shift = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i64, Ops: {AndIdx, Const1});
3202 SDValue Const5 = DAG.getConstant(Val: 5, DL, VT: MVT::i64);
3203 Shift = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, Ops: {Shift, Const5});
3204 SDValue Mask = DAG.getConstant(Val: 0xFFFFFFFF00000000L, DL, VT: MVT::i64);
3205 Mask = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {Mask, Shift});
3206 PackedElt = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {PackedElt, Mask});
3207 Val = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, Ops: {Val, Shift});
3208 PackedElt = DAG.getNode(Opcode: ISD::OR, DL, VT: MVT::i64, Ops: {PackedElt, Val});
3209 Result =
3210 SDValue(DAG.getMachineNode(Opcode: VE::LSVrr_v, dl: DL, VT: Vec.getSimpleValueType(),
3211 Ops: {HalfIdx, PackedElt, Vec}),
3212 0);
3213 }
3214 return Result;
3215}
3216