1//===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the interfaces that VE uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "VEISelLowering.h"
15#include "MCTargetDesc/VEMCAsmInfo.h"
16#include "VECustomDAG.h"
17#include "VEInstrBuilder.h"
18#include "VEMachineFunctionInfo.h"
19#include "VERegisterInfo.h"
20#include "VESelectionDAGInfo.h"
21#include "VETargetMachine.h"
22#include "llvm/ADT/StringSwitch.h"
23#include "llvm/CodeGen/CallingConvLower.h"
24#include "llvm/CodeGen/MachineFrameInfo.h"
25#include "llvm/CodeGen/MachineFunction.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineJumpTableInfo.h"
28#include "llvm/CodeGen/MachineModuleInfo.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/CodeGen/SelectionDAG.h"
31#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
32#include "llvm/IR/DerivedTypes.h"
33#include "llvm/IR/Function.h"
34#include "llvm/IR/IRBuilder.h"
35#include "llvm/IR/Module.h"
36#include "llvm/Support/ErrorHandling.h"
37using namespace llvm;
38
39#define DEBUG_TYPE "ve-lower"
40
41//===----------------------------------------------------------------------===//
42// Calling Convention Implementation
43//===----------------------------------------------------------------------===//
44
45#include "VEGenCallingConv.inc"
46
47CCAssignFn *getReturnCC(CallingConv::ID CallConv) {
48 switch (CallConv) {
49 default:
50 return RetCC_VE_C;
51 case CallingConv::Fast:
52 return RetCC_VE_Fast;
53 }
54}
55
56CCAssignFn *getParamCC(CallingConv::ID CallConv, bool IsVarArg) {
57 if (IsVarArg)
58 return CC_VE2;
59 switch (CallConv) {
60 default:
61 return CC_VE_C;
62 case CallingConv::Fast:
63 return CC_VE_Fast;
64 }
65}
66
67bool VETargetLowering::CanLowerReturn(
68 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
69 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
70 const Type *RetTy) const {
71 CCAssignFn *RetCC = getReturnCC(CallConv);
72 SmallVector<CCValAssign, 16> RVLocs;
73 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
74 return CCInfo.CheckReturn(Outs, Fn: RetCC);
75}
76
77static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
78 MVT::v256f32, MVT::v512f32, MVT::v256f64};
79
80static const MVT AllMaskVTs[] = {MVT::v256i1, MVT::v512i1};
81
82static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
83
84void VETargetLowering::initRegisterClasses() {
85 // Set up the register classes.
86 addRegisterClass(VT: MVT::i32, RC: &VE::I32RegClass);
87 addRegisterClass(VT: MVT::i64, RC: &VE::I64RegClass);
88 addRegisterClass(VT: MVT::f32, RC: &VE::F32RegClass);
89 addRegisterClass(VT: MVT::f64, RC: &VE::I64RegClass);
90 addRegisterClass(VT: MVT::f128, RC: &VE::F128RegClass);
91
92 if (Subtarget->enableVPU()) {
93 for (MVT VecVT : AllVectorVTs)
94 addRegisterClass(VT: VecVT, RC: &VE::V64RegClass);
95 addRegisterClass(VT: MVT::v256i1, RC: &VE::VMRegClass);
96 addRegisterClass(VT: MVT::v512i1, RC: &VE::VM512RegClass);
97 }
98}
99
100void VETargetLowering::initSPUActions() {
101 const auto &TM = getTargetMachine();
102 /// Load & Store {
103
104 // VE doesn't have i1 sign extending load.
105 for (MVT VT : MVT::integer_valuetypes()) {
106 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
107 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
108 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
109 setTruncStoreAction(ValVT: VT, MemVT: MVT::i1, Action: Expand);
110 }
111
112 // VE doesn't have floating point extload/truncstore, so expand them.
113 for (MVT FPVT : MVT::fp_valuetypes()) {
114 for (MVT OtherFPVT : MVT::fp_valuetypes()) {
115 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: FPVT, MemVT: OtherFPVT, Action: Expand);
116 setTruncStoreAction(ValVT: FPVT, MemVT: OtherFPVT, Action: Expand);
117 }
118 }
119
120 // VE doesn't have fp128 load/store, so expand them in custom lower.
121 setOperationAction(Op: ISD::LOAD, VT: MVT::f128, Action: Custom);
122 setOperationAction(Op: ISD::STORE, VT: MVT::f128, Action: Custom);
123
124 /// } Load & Store
125
126 // Custom legalize address nodes into LO/HI parts.
127 MVT PtrVT = MVT::getIntegerVT(BitWidth: TM.getPointerSizeInBits(AS: 0));
128 setOperationAction(Op: ISD::BlockAddress, VT: PtrVT, Action: Custom);
129 setOperationAction(Op: ISD::GlobalAddress, VT: PtrVT, Action: Custom);
130 setOperationAction(Op: ISD::GlobalTLSAddress, VT: PtrVT, Action: Custom);
131 setOperationAction(Op: ISD::ConstantPool, VT: PtrVT, Action: Custom);
132 setOperationAction(Op: ISD::JumpTable, VT: PtrVT, Action: Custom);
133
134 /// VAARG handling {
135 setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom);
136 // VAARG needs to be lowered to access with 8 bytes alignment.
137 setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Custom);
138 // Use the default implementation.
139 setOperationAction(Op: ISD::VACOPY, VT: MVT::Other, Action: Expand);
140 setOperationAction(Op: ISD::VAEND, VT: MVT::Other, Action: Expand);
141 /// } VAARG handling
142
143 /// Stack {
144 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i32, Action: Custom);
145 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i64, Action: Custom);
146
147 // Use the default implementation.
148 setOperationAction(Op: ISD::STACKSAVE, VT: MVT::Other, Action: Expand);
149 setOperationAction(Op: ISD::STACKRESTORE, VT: MVT::Other, Action: Expand);
150 /// } Stack
151
152 /// Branch {
153
154 // VE doesn't have BRCOND
155 setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Expand);
156
157 // BR_JT is not implemented yet.
158 setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
159
160 /// } Branch
161
162 /// Int Ops {
163 for (MVT IntVT : {MVT::i32, MVT::i64}) {
164 // VE has no REM or DIVREM operations.
165 setOperationAction(Op: ISD::UREM, VT: IntVT, Action: Expand);
166 setOperationAction(Op: ISD::SREM, VT: IntVT, Action: Expand);
167 setOperationAction(Op: ISD::SDIVREM, VT: IntVT, Action: Expand);
168 setOperationAction(Op: ISD::UDIVREM, VT: IntVT, Action: Expand);
169
170 // VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
171 setOperationAction(Op: ISD::SHL_PARTS, VT: IntVT, Action: Expand);
172 setOperationAction(Op: ISD::SRA_PARTS, VT: IntVT, Action: Expand);
173 setOperationAction(Op: ISD::SRL_PARTS, VT: IntVT, Action: Expand);
174
175 // VE has no MULHU/S or U/SMUL_LOHI operations.
176 // TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
177 setOperationAction(Op: ISD::MULHU, VT: IntVT, Action: Expand);
178 setOperationAction(Op: ISD::MULHS, VT: IntVT, Action: Expand);
179 setOperationAction(Op: ISD::UMUL_LOHI, VT: IntVT, Action: Expand);
180 setOperationAction(Op: ISD::SMUL_LOHI, VT: IntVT, Action: Expand);
181
182 // VE has no CTTZ, ROTL, ROTR operations.
183 setOperationAction(Op: ISD::CTTZ, VT: IntVT, Action: Expand);
184 setOperationAction(Op: ISD::ROTL, VT: IntVT, Action: Expand);
185 setOperationAction(Op: ISD::ROTR, VT: IntVT, Action: Expand);
186
187 // VE has 64 bits instruction which works as i64 BSWAP operation. This
188 // instruction works fine as i32 BSWAP operation with an additional
189 // parameter. Use isel patterns to lower BSWAP.
190 setOperationAction(Op: ISD::BSWAP, VT: IntVT, Action: Legal);
191
192 // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
193 // operations. Use isel patterns for i64, promote for i32.
194 LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
195 setOperationAction(Op: ISD::BITREVERSE, VT: IntVT, Action: Act);
196 setOperationAction(Op: ISD::CTLZ, VT: IntVT, Action: Act);
197 setOperationAction(Op: ISD::CTLZ_ZERO_UNDEF, VT: IntVT, Action: Act);
198 setOperationAction(Op: ISD::CTPOP, VT: IntVT, Action: Act);
199
200 // VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
201 // Use isel patterns for i64, promote for i32.
202 setOperationAction(Op: ISD::AND, VT: IntVT, Action: Act);
203 setOperationAction(Op: ISD::OR, VT: IntVT, Action: Act);
204 setOperationAction(Op: ISD::XOR, VT: IntVT, Action: Act);
205
206 // Legal smax and smin
207 setOperationAction(Op: ISD::SMAX, VT: IntVT, Action: Legal);
208 setOperationAction(Op: ISD::SMIN, VT: IntVT, Action: Legal);
209 }
210 /// } Int Ops
211
212 /// Conversion {
213 // VE doesn't have instructions for fp<->uint, so expand them by llvm
214 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Promote); // use i64
215 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i32, Action: Promote); // use i64
216 setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i64, Action: Expand);
217 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Expand);
218
219 // fp16 not supported
220 for (MVT FPVT : MVT::fp_valuetypes()) {
221 setOperationAction(Op: ISD::FP16_TO_FP, VT: FPVT, Action: Expand);
222 setOperationAction(Op: ISD::FP_TO_FP16, VT: FPVT, Action: Expand);
223 }
224 /// } Conversion
225
226 /// Floating-point Ops {
227 /// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
228 /// and fcmp.
229
230 // VE doesn't have following floating point operations.
231 for (MVT VT : MVT::fp_valuetypes()) {
232 setOperationAction(Op: ISD::FNEG, VT, Action: Expand);
233 setOperationAction(Op: ISD::FREM, VT, Action: LibCall);
234 }
235
236 // VE doesn't have fdiv of f128.
237 setOperationAction(Op: ISD::FDIV, VT: MVT::f128, Action: Expand);
238
239 for (MVT FPVT : {MVT::f32, MVT::f64}) {
240 // f32 and f64 uses ConstantFP. f128 uses ConstantPool.
241 setOperationAction(Op: ISD::ConstantFP, VT: FPVT, Action: Legal);
242 }
243 /// } Floating-point Ops
244
245 /// Floating-point math functions {
246
247 // VE doesn't have following floating point math functions.
248 for (MVT VT : MVT::fp_valuetypes()) {
249 setOperationAction(Op: ISD::FABS, VT, Action: Expand);
250 setOperationAction(Op: ISD::FCOPYSIGN, VT, Action: Expand);
251 setOperationAction(Op: ISD::FCOS, VT, Action: Expand);
252 setOperationAction(Op: ISD::FMA, VT, Action: Expand);
253 setOperationAction(Op: ISD::FPOW, VT, Action: Expand);
254 setOperationAction(Op: ISD::FSIN, VT, Action: Expand);
255 setOperationAction(Op: ISD::FSQRT, VT, Action: Expand);
256 }
257
258 // VE has single and double FMINNUM and FMAXNUM
259 for (MVT VT : {MVT::f32, MVT::f64}) {
260 setOperationAction(Ops: {ISD::FMAXNUM, ISD::FMINNUM}, VT, Action: Legal);
261 }
262
263 /// } Floating-point math functions
264
265 /// Atomic instructions {
266
267 setMaxAtomicSizeInBitsSupported(64);
268 setMinCmpXchgSizeInBits(32);
269 setSupportsUnalignedAtomics(false);
270
271 // Use custom inserter for ATOMIC_FENCE.
272 setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom);
273
274 // Other atomic instructions.
275 for (MVT VT : MVT::integer_valuetypes()) {
276 // Support i8/i16 atomic swap.
277 setOperationAction(Op: ISD::ATOMIC_SWAP, VT, Action: Custom);
278
279 // FIXME: Support "atmam" instructions.
280 setOperationAction(Op: ISD::ATOMIC_LOAD_ADD, VT, Action: Expand);
281 setOperationAction(Op: ISD::ATOMIC_LOAD_SUB, VT, Action: Expand);
282 setOperationAction(Op: ISD::ATOMIC_LOAD_AND, VT, Action: Expand);
283 setOperationAction(Op: ISD::ATOMIC_LOAD_OR, VT, Action: Expand);
284
285 // VE doesn't have follwing instructions.
286 setOperationAction(Op: ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Action: Expand);
287 setOperationAction(Op: ISD::ATOMIC_LOAD_CLR, VT, Action: Expand);
288 setOperationAction(Op: ISD::ATOMIC_LOAD_XOR, VT, Action: Expand);
289 setOperationAction(Op: ISD::ATOMIC_LOAD_NAND, VT, Action: Expand);
290 setOperationAction(Op: ISD::ATOMIC_LOAD_MIN, VT, Action: Expand);
291 setOperationAction(Op: ISD::ATOMIC_LOAD_MAX, VT, Action: Expand);
292 setOperationAction(Op: ISD::ATOMIC_LOAD_UMIN, VT, Action: Expand);
293 setOperationAction(Op: ISD::ATOMIC_LOAD_UMAX, VT, Action: Expand);
294 }
295
296 /// } Atomic instructions
297
298 /// SJLJ instructions {
299 setOperationAction(Op: ISD::EH_SJLJ_LONGJMP, VT: MVT::Other, Action: Custom);
300 setOperationAction(Op: ISD::EH_SJLJ_SETJMP, VT: MVT::i32, Action: Custom);
301 setOperationAction(Op: ISD::EH_SJLJ_SETUP_DISPATCH, VT: MVT::Other, Action: Custom);
302 /// } SJLJ instructions
303
304 // Intrinsic instructions
305 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
306}
307
308void VETargetLowering::initVPUActions() {
309 for (MVT LegalMaskVT : AllMaskVTs)
310 setOperationAction(Op: ISD::BUILD_VECTOR, VT: LegalMaskVT, Action: Custom);
311
312 for (unsigned Opc : {ISD::AND, ISD::OR, ISD::XOR})
313 setOperationAction(Op: Opc, VT: MVT::v512i1, Action: Custom);
314
315 for (MVT LegalVecVT : AllVectorVTs) {
316 setOperationAction(Op: ISD::BUILD_VECTOR, VT: LegalVecVT, Action: Custom);
317 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: LegalVecVT, Action: Legal);
318 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: LegalVecVT, Action: Legal);
319 // Translate all vector instructions with legal element types to VVP_*
320 // nodes.
321 // TODO We will custom-widen into VVP_* nodes in the future. While we are
322 // buildling the infrastructure for this, we only do this for legal vector
323 // VTs.
324#define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME) \
325 setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
326#define ADD_VVP_OP(VVP_NAME, ISD_NAME) \
327 setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
328 setOperationAction(Op: ISD::EXPERIMENTAL_VP_STRIDED_LOAD, VT: LegalVecVT, Action: Custom);
329 setOperationAction(Op: ISD::EXPERIMENTAL_VP_STRIDED_STORE, VT: LegalVecVT, Action: Custom);
330#include "VVPNodes.def"
331 }
332
333 for (MVT LegalPackedVT : AllPackedVTs) {
334 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: LegalPackedVT, Action: Custom);
335 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: LegalPackedVT, Action: Custom);
336 }
337
338 // vNt32, vNt64 ops (legal element types)
339 for (MVT VT : MVT::vector_valuetypes()) {
340 MVT ElemVT = VT.getVectorElementType();
341 unsigned ElemBits = ElemVT.getScalarSizeInBits();
342 if (ElemBits != 32 && ElemBits != 64)
343 continue;
344
345 for (unsigned MemOpc : {ISD::MLOAD, ISD::MSTORE, ISD::LOAD, ISD::STORE})
346 setOperationAction(Op: MemOpc, VT, Action: Custom);
347
348 const ISD::NodeType IntReductionOCs[] = {
349 ISD::VECREDUCE_ADD, ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND,
350 ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMIN,
351 ISD::VECREDUCE_SMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_UMAX};
352
353 for (unsigned IntRedOpc : IntReductionOCs)
354 setOperationAction(Op: IntRedOpc, VT, Action: Custom);
355 }
356
357 // v256i1 and v512i1 ops
358 for (MVT MaskVT : AllMaskVTs) {
359 // Custom lower mask ops
360 setOperationAction(Op: ISD::STORE, VT: MaskVT, Action: Custom);
361 setOperationAction(Op: ISD::LOAD, VT: MaskVT, Action: Custom);
362 }
363}
364
365SDValue
366VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
367 bool IsVarArg,
368 const SmallVectorImpl<ISD::OutputArg> &Outs,
369 const SmallVectorImpl<SDValue> &OutVals,
370 const SDLoc &DL, SelectionDAG &DAG) const {
371 // CCValAssign - represent the assignment of the return value to locations.
372 SmallVector<CCValAssign, 16> RVLocs;
373
374 // CCState - Info about the registers and stack slot.
375 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
376 *DAG.getContext());
377
378 // Analyze return values.
379 CCInfo.AnalyzeReturn(Outs, Fn: getReturnCC(CallConv));
380
381 SDValue Glue;
382 SmallVector<SDValue, 4> RetOps(1, Chain);
383
384 // Copy the result values into the output registers.
385 for (unsigned i = 0; i != RVLocs.size(); ++i) {
386 CCValAssign &VA = RVLocs[i];
387 assert(VA.isRegLoc() && "Can only return in registers!");
388 assert(!VA.needsCustom() && "Unexpected custom lowering");
389 SDValue OutVal = OutVals[i];
390
391 // Integer return values must be sign or zero extended by the callee.
392 switch (VA.getLocInfo()) {
393 case CCValAssign::Full:
394 break;
395 case CCValAssign::SExt:
396 OutVal = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: VA.getLocVT(), Operand: OutVal);
397 break;
398 case CCValAssign::ZExt:
399 OutVal = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: VA.getLocVT(), Operand: OutVal);
400 break;
401 case CCValAssign::AExt:
402 OutVal = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: VA.getLocVT(), Operand: OutVal);
403 break;
404 case CCValAssign::BCvt: {
405 // Convert a float return value to i64 with padding.
406 // 63 31 0
407 // +------+------+
408 // | float| 0 |
409 // +------+------+
410 assert(VA.getLocVT() == MVT::i64);
411 assert(VA.getValVT() == MVT::f32);
412 SDValue Undef = SDValue(
413 DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::i64), 0);
414 SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
415 OutVal = SDValue(DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL,
416 VT: MVT::i64, Op1: Undef, Op2: OutVal, Op3: Sub_f32),
417 0);
418 break;
419 }
420 default:
421 llvm_unreachable("Unknown loc info!");
422 }
423
424 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: OutVal, Glue);
425
426 // Guarantee that all emitted copies are stuck together with flags.
427 Glue = Chain.getValue(R: 1);
428 RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
429 }
430
431 RetOps[0] = Chain; // Update chain.
432
433 // Add the glue if we have it.
434 if (Glue.getNode())
435 RetOps.push_back(Elt: Glue);
436
437 return DAG.getNode(Opcode: VEISD::RET_GLUE, DL, VT: MVT::Other, Ops: RetOps);
438}
439
440SDValue VETargetLowering::LowerFormalArguments(
441 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
442 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
443 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
444 MachineFunction &MF = DAG.getMachineFunction();
445
446 // Get the base offset of the incoming arguments stack space.
447 unsigned ArgsBaseOffset = Subtarget->getRsaSize();
448 // Get the size of the preserved arguments area
449 unsigned ArgsPreserved = 64;
450
451 // Analyze arguments according to CC_VE.
452 SmallVector<CCValAssign, 16> ArgLocs;
453 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
454 *DAG.getContext());
455 // Allocate the preserved area first.
456 CCInfo.AllocateStack(Size: ArgsPreserved, Alignment: Align(8));
457 // We already allocated the preserved area, so the stack offset computed
458 // by CC_VE would be correct now.
459 CCInfo.AnalyzeFormalArguments(Ins, Fn: getParamCC(CallConv, IsVarArg: false));
460
461 for (const CCValAssign &VA : ArgLocs) {
462 assert(!VA.needsCustom() && "Unexpected custom lowering");
463 if (VA.isRegLoc()) {
464 // This argument is passed in a register.
465 // All integer register arguments are promoted by the caller to i64.
466
467 // Create a virtual register for the promoted live-in value.
468 Register VReg =
469 MF.addLiveIn(PReg: VA.getLocReg(), RC: getRegClassFor(VT: VA.getLocVT()));
470 SDValue Arg = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: VA.getLocVT());
471
472 // The caller promoted the argument, so insert an Assert?ext SDNode so we
473 // won't promote the value again in this function.
474 switch (VA.getLocInfo()) {
475 case CCValAssign::SExt:
476 Arg = DAG.getNode(Opcode: ISD::AssertSext, DL, VT: VA.getLocVT(), N1: Arg,
477 N2: DAG.getValueType(VA.getValVT()));
478 break;
479 case CCValAssign::ZExt:
480 Arg = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: VA.getLocVT(), N1: Arg,
481 N2: DAG.getValueType(VA.getValVT()));
482 break;
483 case CCValAssign::BCvt: {
484 // Extract a float argument from i64 with padding.
485 // 63 31 0
486 // +------+------+
487 // | float| 0 |
488 // +------+------+
489 assert(VA.getLocVT() == MVT::i64);
490 assert(VA.getValVT() == MVT::f32);
491 SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
492 Arg = SDValue(DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL,
493 VT: MVT::f32, Op1: Arg, Op2: Sub_f32),
494 0);
495 break;
496 }
497 default:
498 break;
499 }
500
501 // Truncate the register down to the argument type.
502 if (VA.isExtInLoc())
503 Arg = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VA.getValVT(), Operand: Arg);
504
505 InVals.push_back(Elt: Arg);
506 continue;
507 }
508
509 // The registers are exhausted. This argument was passed on the stack.
510 assert(VA.isMemLoc());
511 // The CC_VE_Full/Half functions compute stack offsets relative to the
512 // beginning of the arguments area at %fp + the size of reserved area.
513 unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
514 unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
515
516 // Adjust offset for a float argument by adding 4 since the argument is
517 // stored in 8 bytes buffer with offset like below. LLVM generates
518 // 4 bytes load instruction, so need to adjust offset here. This
519 // adjustment is required in only LowerFormalArguments. In LowerCall,
520 // a float argument is converted to i64 first, and stored as 8 bytes
521 // data, which is required by ABI, so no need for adjustment.
522 // 0 4
523 // +------+------+
524 // | empty| float|
525 // +------+------+
526 if (VA.getValVT() == MVT::f32)
527 Offset += 4;
528
529 int FI = MF.getFrameInfo().CreateFixedObject(Size: ValSize, SPOffset: Offset, IsImmutable: true);
530 InVals.push_back(
531 Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain,
532 Ptr: DAG.getFrameIndex(FI, VT: getPointerTy(DL: MF.getDataLayout())),
533 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
534 }
535
536 if (!IsVarArg)
537 return Chain;
538
539 // This function takes variable arguments, some of which may have been passed
540 // in registers %s0-%s8.
541 //
542 // The va_start intrinsic needs to know the offset to the first variable
543 // argument.
544 // TODO: need to calculate offset correctly once we support f128.
545 unsigned ArgOffset = ArgLocs.size() * 8;
546 VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
547 // Skip the reserved area at the top of stack.
548 FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
549
550 return Chain;
551}
552
553// FIXME? Maybe this could be a TableGen attribute on some registers and
554// this table could be generated automatically from RegInfo.
555Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
556 const MachineFunction &MF) const {
557 Register Reg = StringSwitch<Register>(RegName)
558 .Case(S: "sp", Value: VE::SX11) // Stack pointer
559 .Case(S: "fp", Value: VE::SX9) // Frame pointer
560 .Case(S: "sl", Value: VE::SX8) // Stack limit
561 .Case(S: "lr", Value: VE::SX10) // Link register
562 .Case(S: "tp", Value: VE::SX14) // Thread pointer
563 .Case(S: "outer", Value: VE::SX12) // Outer regiser
564 .Case(S: "info", Value: VE::SX17) // Info area register
565 .Case(S: "got", Value: VE::SX15) // Global offset table register
566 .Case(S: "plt", Value: VE::SX16) // Procedure linkage table register
567 .Default(Value: Register());
568 return Reg;
569}
570
571//===----------------------------------------------------------------------===//
572// TargetLowering Implementation
573//===----------------------------------------------------------------------===//
574
575SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
576 SmallVectorImpl<SDValue> &InVals) const {
577 SelectionDAG &DAG = CLI.DAG;
578 SDLoc DL = CLI.DL;
579 SDValue Chain = CLI.Chain;
580 auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
581
582 // VE target does not yet support tail call optimization.
583 CLI.IsTailCall = false;
584
585 // Get the base offset of the outgoing arguments stack space.
586 unsigned ArgsBaseOffset = Subtarget->getRsaSize();
587 // Get the size of the preserved arguments area
588 unsigned ArgsPreserved = 8 * 8u;
589
590 // Analyze operands of the call, assigning locations to each operand.
591 SmallVector<CCValAssign, 16> ArgLocs;
592 CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
593 *DAG.getContext());
594 // Allocate the preserved area first.
595 CCInfo.AllocateStack(Size: ArgsPreserved, Alignment: Align(8));
596 // We already allocated the preserved area, so the stack offset computed
597 // by CC_VE would be correct now.
598 CCInfo.AnalyzeCallOperands(Outs: CLI.Outs, Fn: getParamCC(CallConv: CLI.CallConv, IsVarArg: false));
599
600 // VE requires to use both register and stack for varargs or no-prototyped
601 // functions.
602 bool UseBoth = CLI.IsVarArg;
603
604 // Analyze operands again if it is required to store BOTH.
605 SmallVector<CCValAssign, 16> ArgLocs2;
606 CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
607 ArgLocs2, *DAG.getContext());
608 if (UseBoth)
609 CCInfo2.AnalyzeCallOperands(Outs: CLI.Outs, Fn: getParamCC(CallConv: CLI.CallConv, IsVarArg: true));
610
611 // Get the size of the outgoing arguments stack space requirement.
612 unsigned ArgsSize = CCInfo.getStackSize();
613
614 // Keep stack frames 16-byte aligned.
615 ArgsSize = alignTo(Value: ArgsSize, Align: 16);
616
617 // Adjust the stack pointer to make room for the arguments.
618 // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
619 // with more than 6 arguments.
620 Chain = DAG.getCALLSEQ_START(Chain, InSize: ArgsSize, OutSize: 0, DL);
621
622 // Collect the set of registers to pass to the function and their values.
623 // This will be emitted as a sequence of CopyToReg nodes glued to the call
624 // instruction.
625 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
626
627 // Collect chains from all the memory opeations that copy arguments to the
628 // stack. They must follow the stack pointer adjustment above and precede the
629 // call instruction itself.
630 SmallVector<SDValue, 8> MemOpChains;
631
632 // VE needs to get address of callee function in a register
633 // So, prepare to copy it to SX12 here.
634
635 // If the callee is a GlobalAddress node (quite common, every direct call is)
636 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
637 // Likewise ExternalSymbol -> TargetExternalSymbol.
638 SDValue Callee = CLI.Callee;
639
640 bool IsPICCall = isPositionIndependent();
641
642 // PC-relative references to external symbols should go through $stub.
643 // If so, we need to prepare GlobalBaseReg first.
644 const TargetMachine &TM = DAG.getTarget();
645 const GlobalValue *GV = nullptr;
646 auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Val&: Callee);
647 if (CalleeG)
648 GV = CalleeG->getGlobal();
649 bool Local = TM.shouldAssumeDSOLocal(GV);
650 bool UsePlt = !Local;
651 MachineFunction &MF = DAG.getMachineFunction();
652
653 // Turn GlobalAddress/ExternalSymbol node into a value node
654 // containing the address of them here.
655 if (CalleeG) {
656 if (IsPICCall) {
657 if (UsePlt)
658 Subtarget->getInstrInfo()->getGlobalBaseReg(MF: &MF);
659 Callee = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: 0, TargetFlags: 0);
660 Callee = DAG.getNode(Opcode: VEISD::GETFUNPLT, DL, VT: PtrVT, Operand: Callee);
661 } else {
662 Callee = makeHiLoPair(Op: Callee, HiTF: VE::S_HI32, LoTF: VE::S_LO32, DAG);
663 }
664 } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
665 if (IsPICCall) {
666 if (UsePlt)
667 Subtarget->getInstrInfo()->getGlobalBaseReg(MF: &MF);
668 Callee = DAG.getTargetExternalSymbol(Sym: E->getSymbol(), VT: PtrVT, TargetFlags: 0);
669 Callee = DAG.getNode(Opcode: VEISD::GETFUNPLT, DL, VT: PtrVT, Operand: Callee);
670 } else {
671 Callee = makeHiLoPair(Op: Callee, HiTF: VE::S_HI32, LoTF: VE::S_LO32, DAG);
672 }
673 }
674
675 RegsToPass.push_back(Elt: std::make_pair(x: VE::SX12, y&: Callee));
676
677 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
678 CCValAssign &VA = ArgLocs[i];
679 SDValue Arg = CLI.OutVals[i];
680
681 // Promote the value if needed.
682 switch (VA.getLocInfo()) {
683 default:
684 llvm_unreachable("Unknown location info!");
685 case CCValAssign::Full:
686 break;
687 case CCValAssign::SExt:
688 Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: VA.getLocVT(), Operand: Arg);
689 break;
690 case CCValAssign::ZExt:
691 Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: VA.getLocVT(), Operand: Arg);
692 break;
693 case CCValAssign::AExt:
694 Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: VA.getLocVT(), Operand: Arg);
695 break;
696 case CCValAssign::BCvt: {
697 // Convert a float argument to i64 with padding.
698 // 63 31 0
699 // +------+------+
700 // | float| 0 |
701 // +------+------+
702 assert(VA.getLocVT() == MVT::i64);
703 assert(VA.getValVT() == MVT::f32);
704 SDValue Undef = SDValue(
705 DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::i64), 0);
706 SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
707 Arg = SDValue(DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL,
708 VT: MVT::i64, Op1: Undef, Op2: Arg, Op3: Sub_f32),
709 0);
710 break;
711 }
712 }
713
714 if (VA.isRegLoc()) {
715 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
716 if (!UseBoth)
717 continue;
718 VA = ArgLocs2[i];
719 }
720
721 assert(VA.isMemLoc());
722
723 // Create a store off the stack pointer for this argument.
724 SDValue StackPtr = DAG.getRegister(Reg: VE::SX11, VT: PtrVT);
725 // The argument area starts at %fp/%sp + the size of reserved area.
726 SDValue PtrOff =
727 DAG.getIntPtrConstant(Val: VA.getLocMemOffset() + ArgsBaseOffset, DL);
728 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: PtrOff);
729 MemOpChains.push_back(
730 Elt: DAG.getStore(Chain, dl: DL, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo()));
731 }
732
733 // Emit all stores, make sure they occur before the call.
734 if (!MemOpChains.empty())
735 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOpChains);
736
737 // Build a sequence of CopyToReg nodes glued together with token chain and
738 // glue operands which copy the outgoing args into registers. The InGlue is
739 // necessary since all emitted instructions must be stuck together in order
740 // to pass the live physical registers.
741 SDValue InGlue;
742 for (const auto &[Reg, N] : RegsToPass) {
743 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg, N, Glue: InGlue);
744 InGlue = Chain.getValue(R: 1);
745 }
746
747 // Build the operands for the call instruction itself.
748 SmallVector<SDValue, 8> Ops;
749 Ops.push_back(Elt: Chain);
750 for (const auto &[Reg, N] : RegsToPass)
751 Ops.push_back(Elt: DAG.getRegister(Reg, VT: N.getValueType()));
752
753 // Add a register mask operand representing the call-preserved registers.
754 const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
755 const uint32_t *Mask =
756 TRI->getCallPreservedMask(MF: DAG.getMachineFunction(), CC: CLI.CallConv);
757 assert(Mask && "Missing call preserved mask for calling convention");
758 Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
759
760 // Make sure the CopyToReg nodes are glued to the call instruction which
761 // consumes the registers.
762 if (InGlue.getNode())
763 Ops.push_back(Elt: InGlue);
764
765 // Now the call itself.
766 SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
767 Chain = DAG.getNode(Opcode: VEISD::CALL, DL, VTList: NodeTys, Ops);
768 InGlue = Chain.getValue(R: 1);
769
770 // Revert the stack pointer immediately after the call.
771 Chain = DAG.getCALLSEQ_END(Chain, Size1: ArgsSize, Size2: 0, Glue: InGlue, DL);
772 InGlue = Chain.getValue(R: 1);
773
774 // Now extract the return values. This is more or less the same as
775 // LowerFormalArguments.
776
777 // Assign locations to each value returned by this call.
778 SmallVector<CCValAssign, 16> RVLocs;
779 CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
780 *DAG.getContext());
781
782 // Set inreg flag manually for codegen generated library calls that
783 // return float.
784 if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
785 CLI.Ins[0].Flags.setInReg();
786
787 RVInfo.AnalyzeCallResult(Ins: CLI.Ins, Fn: getReturnCC(CallConv: CLI.CallConv));
788
789 // Copy all of the result registers out of their specified physreg.
790 for (unsigned i = 0; i != RVLocs.size(); ++i) {
791 CCValAssign &VA = RVLocs[i];
792 assert(!VA.needsCustom() && "Unexpected custom lowering");
793 Register Reg = VA.getLocReg();
794
795 // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
796 // reside in the same register in the high and low bits. Reuse the
797 // CopyFromReg previous node to avoid duplicate copies.
798 SDValue RV;
799 if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Val: Chain.getOperand(i: 1)))
800 if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
801 RV = Chain.getValue(R: 0);
802
803 // But usually we'll create a new CopyFromReg for a different register.
804 if (!RV.getNode()) {
805 RV = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: RVLocs[i].getLocVT(), Glue: InGlue);
806 Chain = RV.getValue(R: 1);
807 InGlue = Chain.getValue(R: 2);
808 }
809
810 // The callee promoted the return value, so insert an Assert?ext SDNode so
811 // we won't promote the value again in this function.
812 switch (VA.getLocInfo()) {
813 case CCValAssign::SExt:
814 RV = DAG.getNode(Opcode: ISD::AssertSext, DL, VT: VA.getLocVT(), N1: RV,
815 N2: DAG.getValueType(VA.getValVT()));
816 break;
817 case CCValAssign::ZExt:
818 RV = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: VA.getLocVT(), N1: RV,
819 N2: DAG.getValueType(VA.getValVT()));
820 break;
821 case CCValAssign::BCvt: {
822 // Extract a float return value from i64 with padding.
823 // 63 31 0
824 // +------+------+
825 // | float| 0 |
826 // +------+------+
827 assert(VA.getLocVT() == MVT::i64);
828 assert(VA.getValVT() == MVT::f32);
829 SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
830 RV = SDValue(DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL,
831 VT: MVT::f32, Op1: RV, Op2: Sub_f32),
832 0);
833 break;
834 }
835 default:
836 break;
837 }
838
839 // Truncate the register down to the return value type.
840 if (VA.isExtInLoc())
841 RV = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VA.getValVT(), Operand: RV);
842
843 InVals.push_back(Elt: RV);
844 }
845
846 return Chain;
847}
848
849bool VETargetLowering::isOffsetFoldingLegal(
850 const GlobalAddressSDNode *GA) const {
851 // VE uses 64 bit addressing, so we need multiple instructions to generate
852 // an address. Folding address with offset increases the number of
853 // instructions, so that we disable it here. Offsets will be folded in
854 // the DAG combine later if it worth to do so.
855 return false;
856}
857
858/// isFPImmLegal - Returns true if the target can instruction select the
859/// specified FP immediate natively. If false, the legalizer will
860/// materialize the FP immediate as a load from a constant pool.
861bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
862 bool ForCodeSize) const {
863 return VT == MVT::f32 || VT == MVT::f64;
864}
865
866/// Determine if the target supports unaligned memory accesses.
867///
868/// This function returns true if the target allows unaligned memory accesses
869/// of the specified type in the given address space. If true, it also returns
870/// whether the unaligned memory access is "fast" in the last argument by
871/// reference. This is used, for example, in situations where an array
872/// copy/move/set is converted to a sequence of store operations. Its use
873/// helps to ensure that such replacements don't generate code that causes an
874/// alignment error (trap) on the target machine.
875bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
876 unsigned AddrSpace,
877 Align A,
878 MachineMemOperand::Flags,
879 unsigned *Fast) const {
880 if (Fast) {
881 // It's fast anytime on VE
882 *Fast = 1;
883 }
884 return true;
885}
886
887VETargetLowering::VETargetLowering(const TargetMachine &TM,
888 const VESubtarget &STI)
889 : TargetLowering(TM, STI), Subtarget(&STI) {
890 // Instructions which use registers as conditionals examine all the
891 // bits (as does the pseudo SELECT_CC expansion). I don't think it
892 // matters much whether it's ZeroOrOneBooleanContent, or
893 // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
894 // former.
895 setBooleanContents(ZeroOrOneBooleanContent);
896 setBooleanVectorContents(ZeroOrOneBooleanContent);
897
898 initRegisterClasses();
899 initSPUActions();
900 initVPUActions();
901
902 setStackPointerRegisterToSaveRestore(VE::SX11);
903
904 // We have target-specific dag combine patterns for the following nodes:
905 setTargetDAGCombine(ISD::TRUNCATE);
906 setTargetDAGCombine(ISD::SELECT);
907 setTargetDAGCombine(ISD::SELECT_CC);
908
909 // Set function alignment to 16 bytes
910 setMinFunctionAlignment(Align(16));
911
912 // VE stores all argument by 8 bytes alignment
913 setMinStackArgumentAlignment(Align(8));
914
915 computeRegisterProperties(TRI: Subtarget->getRegisterInfo());
916}
917
918EVT VETargetLowering::getSetCCResultType(const DataLayout &,
919 LLVMContext &Context, EVT VT) const {
920 if (VT.isVector())
921 return VT.changeVectorElementType(Context, EltVT: MVT::i1);
922 return MVT::i32;
923}
924
925// Convert to a target node and set target flags.
926SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
927 SelectionDAG &DAG) const {
928 if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op))
929 return DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: SDLoc(GA),
930 VT: GA->getValueType(ResNo: 0), offset: GA->getOffset(), TargetFlags: TF);
931
932 if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Val&: Op))
933 return DAG.getTargetBlockAddress(BA: BA->getBlockAddress(), VT: Op.getValueType(),
934 Offset: 0, TargetFlags: TF);
935
936 if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Val&: Op))
937 return DAG.getTargetConstantPool(C: CP->getConstVal(), VT: CP->getValueType(ResNo: 0),
938 Align: CP->getAlign(), Offset: CP->getOffset(), TargetFlags: TF);
939
940 if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Val&: Op))
941 return DAG.getTargetExternalSymbol(Sym: ES->getSymbol(), VT: ES->getValueType(ResNo: 0),
942 TargetFlags: TF);
943
944 if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Val&: Op))
945 return DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: JT->getValueType(ResNo: 0), TargetFlags: TF);
946
947 llvm_unreachable("Unhandled address SDNode");
948}
949
950// Split Op into high and low parts according to HiTF and LoTF.
951// Return an ADD node combining the parts.
952SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
953 SelectionDAG &DAG) const {
954 SDLoc DL(Op);
955 EVT VT = Op.getValueType();
956 SDValue Hi = DAG.getNode(Opcode: VEISD::Hi, DL, VT, Operand: withTargetFlags(Op, TF: HiTF, DAG));
957 SDValue Lo = DAG.getNode(Opcode: VEISD::Lo, DL, VT, Operand: withTargetFlags(Op, TF: LoTF, DAG));
958 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Hi, N2: Lo);
959}
960
961// Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
962// or ExternalSymbol SDNode.
963SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
964 SDLoc DL(Op);
965 EVT PtrVT = Op.getValueType();
966
967 // Handle PIC mode first. VE needs a got load for every variable!
968 if (isPositionIndependent()) {
969 auto GlobalN = dyn_cast<GlobalAddressSDNode>(Val&: Op);
970
971 if (isa<ConstantPoolSDNode>(Val: Op) || isa<JumpTableSDNode>(Val: Op) ||
972 (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
973 // Create following instructions for local linkage PIC code.
974 // lea %reg, label@gotoff_lo
975 // and %reg, %reg, (32)0
976 // lea.sl %reg, label@gotoff_hi(%reg, %got)
977 SDValue HiLo =
978 makeHiLoPair(Op, HiTF: VE::S_GOTOFF_HI32, LoTF: VE::S_GOTOFF_LO32, DAG);
979 SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT: PtrVT);
980 return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: GlobalBase, N2: HiLo);
981 }
982 // Create following instructions for not local linkage PIC code.
983 // lea %reg, label@got_lo
984 // and %reg, %reg, (32)0
985 // lea.sl %reg, label@got_hi(%reg)
986 // ld %reg, (%reg, %got)
987 SDValue HiLo = makeHiLoPair(Op, HiTF: VE::S_GOT_HI32, LoTF: VE::S_GOT_LO32, DAG);
988 SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT: PtrVT);
989 SDValue AbsAddr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: GlobalBase, N2: HiLo);
990 return DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: AbsAddr,
991 PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction()));
992 }
993
994 // This is one of the absolute code models.
995 switch (getTargetMachine().getCodeModel()) {
996 default:
997 llvm_unreachable("Unsupported absolute code model");
998 case CodeModel::Small:
999 case CodeModel::Medium:
1000 case CodeModel::Large:
1001 // abs64.
1002 return makeHiLoPair(Op, HiTF: VE::S_HI32, LoTF: VE::S_LO32, DAG);
1003 }
1004}
1005
1006/// Custom Lower {
1007
1008// The mappings for emitLeading/TrailingFence for VE is designed by following
1009// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
1010Instruction *VETargetLowering::emitLeadingFence(IRBuilderBase &Builder,
1011 Instruction *Inst,
1012 AtomicOrdering Ord) const {
1013 switch (Ord) {
1014 case AtomicOrdering::NotAtomic:
1015 case AtomicOrdering::Unordered:
1016 llvm_unreachable("Invalid fence: unordered/non-atomic");
1017 case AtomicOrdering::Monotonic:
1018 case AtomicOrdering::Acquire:
1019 return nullptr; // Nothing to do
1020 case AtomicOrdering::Release:
1021 case AtomicOrdering::AcquireRelease:
1022 return Builder.CreateFence(Ordering: AtomicOrdering::Release);
1023 case AtomicOrdering::SequentiallyConsistent:
1024 if (!Inst->hasAtomicStore())
1025 return nullptr; // Nothing to do
1026 return Builder.CreateFence(Ordering: AtomicOrdering::SequentiallyConsistent);
1027 }
1028 llvm_unreachable("Unknown fence ordering in emitLeadingFence");
1029}
1030
1031Instruction *VETargetLowering::emitTrailingFence(IRBuilderBase &Builder,
1032 Instruction *Inst,
1033 AtomicOrdering Ord) const {
1034 switch (Ord) {
1035 case AtomicOrdering::NotAtomic:
1036 case AtomicOrdering::Unordered:
1037 llvm_unreachable("Invalid fence: unordered/not-atomic");
1038 case AtomicOrdering::Monotonic:
1039 case AtomicOrdering::Release:
1040 return nullptr; // Nothing to do
1041 case AtomicOrdering::Acquire:
1042 case AtomicOrdering::AcquireRelease:
1043 return Builder.CreateFence(Ordering: AtomicOrdering::Acquire);
1044 case AtomicOrdering::SequentiallyConsistent:
1045 return Builder.CreateFence(Ordering: AtomicOrdering::SequentiallyConsistent);
1046 }
1047 llvm_unreachable("Unknown fence ordering in emitTrailingFence");
1048}
1049
1050SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
1051 SelectionDAG &DAG) const {
1052 SDLoc DL(Op);
1053 AtomicOrdering FenceOrdering =
1054 static_cast<AtomicOrdering>(Op.getConstantOperandVal(i: 1));
1055 SyncScope::ID FenceSSID =
1056 static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: 2));
1057
1058 // VE uses Release consistency, so need a fence instruction if it is a
1059 // cross-thread fence.
1060 if (FenceSSID == SyncScope::System) {
1061 switch (FenceOrdering) {
1062 case AtomicOrdering::NotAtomic:
1063 case AtomicOrdering::Unordered:
1064 case AtomicOrdering::Monotonic:
1065 // No need to generate fencem instruction here.
1066 break;
1067 case AtomicOrdering::Acquire:
1068 // Generate "fencem 2" as acquire fence.
1069 return SDValue(DAG.getMachineNode(Opcode: VE::FENCEM, dl: DL, VT: MVT::Other,
1070 Op1: DAG.getTargetConstant(Val: 2, DL, VT: MVT::i32),
1071 Op2: Op.getOperand(i: 0)),
1072 0);
1073 case AtomicOrdering::Release:
1074 // Generate "fencem 1" as release fence.
1075 return SDValue(DAG.getMachineNode(Opcode: VE::FENCEM, dl: DL, VT: MVT::Other,
1076 Op1: DAG.getTargetConstant(Val: 1, DL, VT: MVT::i32),
1077 Op2: Op.getOperand(i: 0)),
1078 0);
1079 case AtomicOrdering::AcquireRelease:
1080 case AtomicOrdering::SequentiallyConsistent:
1081 // Generate "fencem 3" as acq_rel and seq_cst fence.
1082 // FIXME: "fencem 3" doesn't wait for PCIe deveices accesses,
1083 // so seq_cst may require more instruction for them.
1084 return SDValue(DAG.getMachineNode(Opcode: VE::FENCEM, dl: DL, VT: MVT::Other,
1085 Op1: DAG.getTargetConstant(Val: 3, DL, VT: MVT::i32),
1086 Op2: Op.getOperand(i: 0)),
1087 0);
1088 }
1089 }
1090
1091 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1092 return DAG.getNode(Opcode: ISD::MEMBARRIER, DL, VT: MVT::Other, Operand: Op.getOperand(i: 0));
1093}
1094
1095TargetLowering::AtomicExpansionKind
1096VETargetLowering::shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const {
1097 // We have TS1AM implementation for i8/i16/i32/i64, so use it.
1098 if (AI->getOperation() == AtomicRMWInst::Xchg) {
1099 return AtomicExpansionKind::None;
1100 }
1101 // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
1102
1103 // Otherwise, expand it using compare and exchange instruction to not call
1104 // __sync_fetch_and_* functions.
1105 return AtomicExpansionKind::CmpXChg;
1106}
1107
1108static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
1109 SDValue &Bits) {
1110 SDLoc DL(Op);
1111 AtomicSDNode *N = cast<AtomicSDNode>(Val&: Op);
1112 SDValue Ptr = N->getOperand(Num: 1);
1113 SDValue Val = N->getOperand(Num: 2);
1114 EVT PtrVT = Ptr.getValueType();
1115 bool Byte = N->getMemoryVT() == MVT::i8;
1116 // Remainder = AND Ptr, 3
1117 // Flag = 1 << Remainder ; If Byte is true (1 byte swap flag)
1118 // Flag = 3 << Remainder ; If Byte is false (2 bytes swap flag)
1119 // Bits = Remainder << 3
1120 // NewVal = Val << Bits
1121 SDValue Const3 = DAG.getConstant(Val: 3, DL, VT: PtrVT);
1122 SDValue Remainder = DAG.getNode(Opcode: ISD::AND, DL, VT: PtrVT, Ops: {Ptr, Const3});
1123 SDValue Mask = Byte ? DAG.getConstant(Val: 1, DL, VT: MVT::i32)
1124 : DAG.getConstant(Val: 3, DL, VT: MVT::i32);
1125 Flag = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i32, Ops: {Mask, Remainder});
1126 Bits = DAG.getNode(Opcode: ISD::SHL, DL, VT: PtrVT, Ops: {Remainder, Const3});
1127 return DAG.getNode(Opcode: ISD::SHL, DL, VT: Val.getValueType(), Ops: {Val, Bits});
1128}
1129
1130static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,
1131 SDValue Bits) {
1132 SDLoc DL(Op);
1133 EVT VT = Data.getValueType();
1134 bool Byte = cast<AtomicSDNode>(Val&: Op)->getMemoryVT() == MVT::i8;
1135 // NewData = Data >> Bits
1136 // Result = NewData & 0xff ; If Byte is true (1 byte)
1137 // Result = NewData & 0xffff ; If Byte is false (2 bytes)
1138
1139 SDValue NewData = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Data, N2: Bits);
1140 return DAG.getNode(Opcode: ISD::AND, DL, VT,
1141 Ops: {NewData, DAG.getConstant(Val: Byte ? 0xff : 0xffff, DL, VT)});
1142}
1143
1144SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,
1145 SelectionDAG &DAG) const {
1146 SDLoc DL(Op);
1147 AtomicSDNode *N = cast<AtomicSDNode>(Val&: Op);
1148
1149 if (N->getMemoryVT() == MVT::i8) {
1150 // For i8, use "ts1am"
1151 // Input:
1152 // ATOMIC_SWAP Ptr, Val, Order
1153 //
1154 // Output:
1155 // Remainder = AND Ptr, 3
1156 // Flag = 1 << Remainder ; 1 byte swap flag for TS1AM inst.
1157 // Bits = Remainder << 3
1158 // NewVal = Val << Bits
1159 //
1160 // Aligned = AND Ptr, -4
1161 // Data = TS1AM Aligned, Flag, NewVal
1162 //
1163 // NewData = Data >> Bits
1164 // Result = NewData & 0xff ; 1 byte result
1165 SDValue Flag;
1166 SDValue Bits;
1167 SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1168
1169 SDValue Ptr = N->getOperand(Num: 1);
1170 SDValue Aligned =
1171 DAG.getNode(Opcode: ISD::AND, DL, VT: Ptr.getValueType(),
1172 Ops: {Ptr, DAG.getSignedConstant(Val: -4, DL, VT: MVT::i64)});
1173 SDValue TS1AM = DAG.getAtomic(Opcode: VEISD::TS1AM, dl: DL, MemVT: N->getMemoryVT(),
1174 VTList: DAG.getVTList(VT1: Op.getNode()->getValueType(ResNo: 0),
1175 VT2: Op.getNode()->getValueType(ResNo: 1)),
1176 Ops: {N->getChain(), Aligned, Flag, NewVal},
1177 MMO: N->getMemOperand());
1178
1179 SDValue Result = finalizeTS1AM(Op, DAG, Data: TS1AM, Bits);
1180 SDValue Chain = TS1AM.getValue(R: 1);
1181 return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
1182 }
1183 if (N->getMemoryVT() == MVT::i16) {
1184 // For i16, use "ts1am"
1185 SDValue Flag;
1186 SDValue Bits;
1187 SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1188
1189 SDValue Ptr = N->getOperand(Num: 1);
1190 SDValue Aligned =
1191 DAG.getNode(Opcode: ISD::AND, DL, VT: Ptr.getValueType(),
1192 Ops: {Ptr, DAG.getSignedConstant(Val: -4, DL, VT: MVT::i64)});
1193 SDValue TS1AM = DAG.getAtomic(Opcode: VEISD::TS1AM, dl: DL, MemVT: N->getMemoryVT(),
1194 VTList: DAG.getVTList(VT1: Op.getNode()->getValueType(ResNo: 0),
1195 VT2: Op.getNode()->getValueType(ResNo: 1)),
1196 Ops: {N->getChain(), Aligned, Flag, NewVal},
1197 MMO: N->getMemOperand());
1198
1199 SDValue Result = finalizeTS1AM(Op, DAG, Data: TS1AM, Bits);
1200 SDValue Chain = TS1AM.getValue(R: 1);
1201 return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
1202 }
1203 // Otherwise, let llvm legalize it.
1204 return Op;
1205}
1206
1207SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
1208 SelectionDAG &DAG) const {
1209 return makeAddress(Op, DAG);
1210}
1211
1212SDValue VETargetLowering::lowerBlockAddress(SDValue Op,
1213 SelectionDAG &DAG) const {
1214 return makeAddress(Op, DAG);
1215}
1216
1217SDValue VETargetLowering::lowerConstantPool(SDValue Op,
1218 SelectionDAG &DAG) const {
1219 return makeAddress(Op, DAG);
1220}
1221
1222SDValue
1223VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op,
1224 SelectionDAG &DAG) const {
1225 SDLoc DL(Op);
1226
1227 // Generate the following code:
1228 // t1: ch,glue = callseq_start t0, 0, 0
1229 // t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
1230 // t3: ch,glue = callseq_end t2, 0, 0, t2:2
1231 // t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
1232 SDValue Label = withTargetFlags(Op, TF: 0, DAG);
1233 EVT PtrVT = Op.getValueType();
1234
1235 // Lowering the machine isd will make sure everything is in the right
1236 // location.
1237 SDValue Chain = DAG.getEntryNode();
1238 SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
1239 const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
1240 MF: DAG.getMachineFunction(), CC: CallingConv::C);
1241 Chain = DAG.getCALLSEQ_START(Chain, InSize: 64, OutSize: 0, DL);
1242 SDValue Args[] = {Chain, Label, DAG.getRegisterMask(RegMask: Mask), Chain.getValue(R: 1)};
1243 Chain = DAG.getNode(Opcode: VEISD::GETTLSADDR, DL, VTList: NodeTys, Ops: Args);
1244 Chain = DAG.getCALLSEQ_END(Chain, Size1: 64, Size2: 0, Glue: Chain.getValue(R: 1), DL);
1245 Chain = DAG.getCopyFromReg(Chain, dl: DL, Reg: VE::SX0, VT: PtrVT, Glue: Chain.getValue(R: 1));
1246
1247 // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1248 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1249 MFI.setHasCalls(true);
1250
1251 // Also generate code to prepare a GOT register if it is PIC.
1252 if (isPositionIndependent()) {
1253 MachineFunction &MF = DAG.getMachineFunction();
1254 Subtarget->getInstrInfo()->getGlobalBaseReg(MF: &MF);
1255 }
1256
1257 return Chain;
1258}
1259
1260SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
1261 SelectionDAG &DAG) const {
1262 // The current implementation of nld (2.26) doesn't allow local exec model
1263 // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
1264 // generate the general dynamic model code sequence.
1265 //
1266 // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
1267 return lowerToTLSGeneralDynamicModel(Op, DAG);
1268}
1269
1270SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1271 return makeAddress(Op, DAG);
1272}
1273
1274// Lower a f128 load into two f64 loads.
1275static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
1276 SDLoc DL(Op);
1277 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Val: Op.getNode());
1278 assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1279 Align Alignment = LdNode->getAlign();
1280 if (Alignment > 8)
1281 Alignment = Align(8);
1282
1283 SDValue Lo64 =
1284 DAG.getLoad(VT: MVT::f64, dl: DL, Chain: LdNode->getChain(), Ptr: LdNode->getBasePtr(),
1285 PtrInfo: LdNode->getPointerInfo(), Alignment,
1286 MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1287 : MachineMemOperand::MONone);
1288 EVT AddrVT = LdNode->getBasePtr().getValueType();
1289 SDValue HiPtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: LdNode->getBasePtr(),
1290 N2: DAG.getConstant(Val: 8, DL, VT: AddrVT));
1291 SDValue Hi64 =
1292 DAG.getLoad(VT: MVT::f64, dl: DL, Chain: LdNode->getChain(), Ptr: HiPtr,
1293 PtrInfo: LdNode->getPointerInfo(), Alignment,
1294 MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1295 : MachineMemOperand::MONone);
1296
1297 SDValue SubRegEven = DAG.getTargetConstant(Val: VE::sub_even, DL, VT: MVT::i32);
1298 SDValue SubRegOdd = DAG.getTargetConstant(Val: VE::sub_odd, DL, VT: MVT::i32);
1299
1300 // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1301 SDNode *InFP128 =
1302 DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::f128);
1303 InFP128 = DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL, VT: MVT::f128,
1304 Op1: SDValue(InFP128, 0), Op2: Hi64, Op3: SubRegEven);
1305 InFP128 = DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL, VT: MVT::f128,
1306 Op1: SDValue(InFP128, 0), Op2: Lo64, Op3: SubRegOdd);
1307 SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1),
1308 SDValue(Hi64.getNode(), 1)};
1309 SDValue OutChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1310 SDValue Ops[2] = {SDValue(InFP128, 0), OutChain};
1311 return DAG.getMergeValues(Ops, dl: DL);
1312}
1313
1314// Lower a vXi1 load into following instructions
1315// LDrii %1, (,%addr)
1316// LVMxir %vm, 0, %1
1317// LDrii %2, 8(,%addr)
1318// LVMxir %vm, 0, %2
1319// ...
1320static SDValue lowerLoadI1(SDValue Op, SelectionDAG &DAG) {
1321 SDLoc DL(Op);
1322 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Val: Op.getNode());
1323 assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1324
1325 SDValue BasePtr = LdNode->getBasePtr();
1326 Align Alignment = LdNode->getAlign();
1327 if (Alignment > 8)
1328 Alignment = Align(8);
1329
1330 EVT AddrVT = BasePtr.getValueType();
1331 EVT MemVT = LdNode->getMemoryVT();
1332 if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1333 SDValue OutChains[4];
1334 SDNode *VM = DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MemVT);
1335 for (int i = 0; i < 4; ++i) {
1336 // Generate load dag and prepare chains.
1337 SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1338 N2: DAG.getConstant(Val: 8 * i, DL, VT: AddrVT));
1339 SDValue Val =
1340 DAG.getLoad(VT: MVT::i64, dl: DL, Chain: LdNode->getChain(), Ptr: Addr,
1341 PtrInfo: LdNode->getPointerInfo(), Alignment,
1342 MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1343 : MachineMemOperand::MONone);
1344 OutChains[i] = SDValue(Val.getNode(), 1);
1345
1346 VM = DAG.getMachineNode(Opcode: VE::LVMir_m, dl: DL, VT: MVT::i64,
1347 Op1: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64), Op2: Val,
1348 Op3: SDValue(VM, 0));
1349 }
1350 SDValue OutChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1351 SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1352 return DAG.getMergeValues(Ops, dl: DL);
1353 } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1354 SDValue OutChains[8];
1355 SDNode *VM = DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MemVT);
1356 for (int i = 0; i < 8; ++i) {
1357 // Generate load dag and prepare chains.
1358 SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1359 N2: DAG.getConstant(Val: 8 * i, DL, VT: AddrVT));
1360 SDValue Val =
1361 DAG.getLoad(VT: MVT::i64, dl: DL, Chain: LdNode->getChain(), Ptr: Addr,
1362 PtrInfo: LdNode->getPointerInfo(), Alignment,
1363 MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1364 : MachineMemOperand::MONone);
1365 OutChains[i] = SDValue(Val.getNode(), 1);
1366
1367 VM = DAG.getMachineNode(Opcode: VE::LVMyir_y, dl: DL, VT: MVT::i64,
1368 Op1: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64), Op2: Val,
1369 Op3: SDValue(VM, 0));
1370 }
1371 SDValue OutChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1372 SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1373 return DAG.getMergeValues(Ops, dl: DL);
1374 } else {
1375 // Otherwise, ask llvm to expand it.
1376 return SDValue();
1377 }
1378}
1379
1380SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1381 LoadSDNode *LdNode = cast<LoadSDNode>(Val: Op.getNode());
1382 EVT MemVT = LdNode->getMemoryVT();
1383
1384 // If VPU is enabled, always expand non-mask vector loads to VVP
1385 if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(SomeVT: MemVT))
1386 return lowerToVVP(Op, DAG);
1387
1388 SDValue BasePtr = LdNode->getBasePtr();
1389 if (isa<FrameIndexSDNode>(Val: BasePtr.getNode())) {
1390 // Do not expand store instruction with frame index here because of
1391 // dependency problems. We expand it later in eliminateFrameIndex().
1392 return Op;
1393 }
1394
1395 if (MemVT == MVT::f128)
1396 return lowerLoadF128(Op, DAG);
1397 if (isMaskType(SomeVT: MemVT))
1398 return lowerLoadI1(Op, DAG);
1399
1400 return Op;
1401}
1402
1403// Lower a f128 store into two f64 stores.
1404static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
1405 SDLoc DL(Op);
1406 StoreSDNode *StNode = dyn_cast<StoreSDNode>(Val: Op.getNode());
1407 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1408
1409 SDValue SubRegEven = DAG.getTargetConstant(Val: VE::sub_even, DL, VT: MVT::i32);
1410 SDValue SubRegOdd = DAG.getTargetConstant(Val: VE::sub_odd, DL, VT: MVT::i32);
1411
1412 SDNode *Hi64 = DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT: MVT::i64,
1413 Op1: StNode->getValue(), Op2: SubRegEven);
1414 SDNode *Lo64 = DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT: MVT::i64,
1415 Op1: StNode->getValue(), Op2: SubRegOdd);
1416
1417 Align Alignment = StNode->getAlign();
1418 if (Alignment > 8)
1419 Alignment = Align(8);
1420
1421 // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1422 SDValue OutChains[2];
1423 OutChains[0] =
1424 DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue(Lo64, 0),
1425 Ptr: StNode->getBasePtr(), PtrInfo: MachinePointerInfo(), Alignment,
1426 MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1427 : MachineMemOperand::MONone);
1428 EVT AddrVT = StNode->getBasePtr().getValueType();
1429 SDValue HiPtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: StNode->getBasePtr(),
1430 N2: DAG.getConstant(Val: 8, DL, VT: AddrVT));
1431 OutChains[1] =
1432 DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue(Hi64, 0), Ptr: HiPtr,
1433 PtrInfo: MachinePointerInfo(), Alignment,
1434 MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1435 : MachineMemOperand::MONone);
1436 return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1437}
1438
1439// Lower a vXi1 store into following instructions
1440// SVMi %1, %vm, 0
1441// STrii %1, (,%addr)
1442// SVMi %2, %vm, 1
1443// STrii %2, 8(,%addr)
1444// ...
1445static SDValue lowerStoreI1(SDValue Op, SelectionDAG &DAG) {
1446 SDLoc DL(Op);
1447 StoreSDNode *StNode = dyn_cast<StoreSDNode>(Val: Op.getNode());
1448 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1449
1450 SDValue BasePtr = StNode->getBasePtr();
1451 Align Alignment = StNode->getAlign();
1452 if (Alignment > 8)
1453 Alignment = Align(8);
1454 EVT AddrVT = BasePtr.getValueType();
1455 EVT MemVT = StNode->getMemoryVT();
1456 if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1457 SDValue OutChains[4];
1458 for (int i = 0; i < 4; ++i) {
1459 SDNode *V =
1460 DAG.getMachineNode(Opcode: VE::SVMmi, dl: DL, VT: MVT::i64, Op1: StNode->getValue(),
1461 Op2: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64));
1462 SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1463 N2: DAG.getConstant(Val: 8 * i, DL, VT: AddrVT));
1464 OutChains[i] =
1465 DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue(V, 0), Ptr: Addr,
1466 PtrInfo: MachinePointerInfo(), Alignment,
1467 MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1468 : MachineMemOperand::MONone);
1469 }
1470 return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1471 } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1472 SDValue OutChains[8];
1473 for (int i = 0; i < 8; ++i) {
1474 SDNode *V =
1475 DAG.getMachineNode(Opcode: VE::SVMyi, dl: DL, VT: MVT::i64, Op1: StNode->getValue(),
1476 Op2: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64));
1477 SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1478 N2: DAG.getConstant(Val: 8 * i, DL, VT: AddrVT));
1479 OutChains[i] =
1480 DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue(V, 0), Ptr: Addr,
1481 PtrInfo: MachinePointerInfo(), Alignment,
1482 MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1483 : MachineMemOperand::MONone);
1484 }
1485 return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1486 } else {
1487 // Otherwise, ask llvm to expand it.
1488 return SDValue();
1489 }
1490}
1491
1492SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1493 StoreSDNode *StNode = cast<StoreSDNode>(Val: Op.getNode());
1494 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1495 EVT MemVT = StNode->getMemoryVT();
1496
1497 // If VPU is enabled, always expand non-mask vector stores to VVP
1498 if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(SomeVT: MemVT))
1499 return lowerToVVP(Op, DAG);
1500
1501 SDValue BasePtr = StNode->getBasePtr();
1502 if (isa<FrameIndexSDNode>(Val: BasePtr.getNode())) {
1503 // Do not expand store instruction with frame index here because of
1504 // dependency problems. We expand it later in eliminateFrameIndex().
1505 return Op;
1506 }
1507
1508 if (MemVT == MVT::f128)
1509 return lowerStoreF128(Op, DAG);
1510 if (isMaskType(SomeVT: MemVT))
1511 return lowerStoreI1(Op, DAG);
1512
1513 // Otherwise, ask llvm to expand it.
1514 return SDValue();
1515}
1516
1517SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1518 MachineFunction &MF = DAG.getMachineFunction();
1519 VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
1520 auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
1521
1522 // Need frame address to find the address of VarArgsFrameIndex.
1523 MF.getFrameInfo().setFrameAddressIsTaken(true);
1524
1525 // vastart just stores the address of the VarArgsFrameIndex slot into the
1526 // memory location argument.
1527 SDLoc DL(Op);
1528 SDValue Offset =
1529 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: DAG.getRegister(Reg: VE::SX9, VT: PtrVT),
1530 N2: DAG.getIntPtrConstant(Val: FuncInfo->getVarArgsFrameOffset(), DL));
1531 const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue();
1532 return DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL, Val: Offset, Ptr: Op.getOperand(i: 1),
1533 PtrInfo: MachinePointerInfo(SV));
1534}
1535
1536SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
1537 SDNode *Node = Op.getNode();
1538 EVT VT = Node->getValueType(ResNo: 0);
1539 SDValue InChain = Node->getOperand(Num: 0);
1540 SDValue VAListPtr = Node->getOperand(Num: 1);
1541 EVT PtrVT = VAListPtr.getValueType();
1542 const Value *SV = cast<SrcValueSDNode>(Val: Node->getOperand(Num: 2))->getValue();
1543 SDLoc DL(Node);
1544 SDValue VAList =
1545 DAG.getLoad(VT: PtrVT, dl: DL, Chain: InChain, Ptr: VAListPtr, PtrInfo: MachinePointerInfo(SV));
1546 SDValue Chain = VAList.getValue(R: 1);
1547 SDValue NextPtr;
1548
1549 if (VT == MVT::f128) {
1550 // VE f128 values must be stored with 16 bytes alignment. We don't
1551 // know the actual alignment of VAList, so we take alignment of it
1552 // dynamically.
1553 int Align = 16;
1554 VAList = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList,
1555 N2: DAG.getConstant(Val: Align - 1, DL, VT: PtrVT));
1556 VAList = DAG.getNode(Opcode: ISD::AND, DL, VT: PtrVT, N1: VAList,
1557 N2: DAG.getSignedConstant(Val: -Align, DL, VT: PtrVT));
1558 // Increment the pointer, VAList, by 16 to the next vaarg.
1559 NextPtr =
1560 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList, N2: DAG.getIntPtrConstant(Val: 16, DL));
1561 } else if (VT == MVT::f32) {
1562 // float --> need special handling like below.
1563 // 0 4
1564 // +------+------+
1565 // | empty| float|
1566 // +------+------+
1567 // Increment the pointer, VAList, by 8 to the next vaarg.
1568 NextPtr =
1569 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList, N2: DAG.getIntPtrConstant(Val: 8, DL));
1570 // Then, adjust VAList.
1571 unsigned InternalOffset = 4;
1572 VAList = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList,
1573 N2: DAG.getConstant(Val: InternalOffset, DL, VT: PtrVT));
1574 } else {
1575 // Increment the pointer, VAList, by 8 to the next vaarg.
1576 NextPtr =
1577 DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList, N2: DAG.getIntPtrConstant(Val: 8, DL));
1578 }
1579
1580 // Store the incremented VAList to the legalized pointer.
1581 InChain = DAG.getStore(Chain, dl: DL, Val: NextPtr, Ptr: VAListPtr, PtrInfo: MachinePointerInfo(SV));
1582
1583 // Load the actual argument out of the pointer VAList.
1584 // We can't count on greater alignment than the word size.
1585 return DAG.getLoad(
1586 VT, dl: DL, Chain: InChain, Ptr: VAList, PtrInfo: MachinePointerInfo(),
1587 Alignment: Align(std::min(a: PtrVT.getSizeInBits(), b: VT.getSizeInBits()) / 8));
1588}
1589
1590SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
1591 SelectionDAG &DAG) const {
1592 // Generate following code.
1593 // (void)__llvm_grow_stack(size);
1594 // ret = GETSTACKTOP; // pseudo instruction
1595 SDLoc DL(Op);
1596
1597 // Get the inputs.
1598 SDNode *Node = Op.getNode();
1599 SDValue Chain = Op.getOperand(i: 0);
1600 SDValue Size = Op.getOperand(i: 1);
1601 MaybeAlign Alignment(Op.getConstantOperandVal(i: 2));
1602 EVT VT = Node->getValueType(ResNo: 0);
1603
1604 // Chain the dynamic stack allocation so that it doesn't modify the stack
1605 // pointer when other instructions are using the stack.
1606 Chain = DAG.getCALLSEQ_START(Chain, InSize: 0, OutSize: 0, DL);
1607
1608 const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
1609 Align StackAlign = TFI.getStackAlign();
1610 bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
1611
1612 // Prepare arguments
1613 TargetLowering::ArgListTy Args;
1614 Args.emplace_back(args&: Size, args: Size.getValueType().getTypeForEVT(Context&: *DAG.getContext()));
1615 if (NeedsAlign) {
1616 SDValue Align = DAG.getConstant(Val: ~(Alignment->value() - 1ULL), DL, VT);
1617 Args.emplace_back(args&: Align,
1618 args: Align.getValueType().getTypeForEVT(Context&: *DAG.getContext()));
1619 }
1620 Type *RetTy = Type::getVoidTy(C&: *DAG.getContext());
1621
1622 EVT PtrVT = Op.getValueType();
1623 SDValue Callee;
1624 if (NeedsAlign) {
1625 Callee = DAG.getTargetExternalSymbol(Sym: "__ve_grow_stack_align", VT: PtrVT, TargetFlags: 0);
1626 } else {
1627 Callee = DAG.getTargetExternalSymbol(Sym: "__ve_grow_stack", VT: PtrVT, TargetFlags: 0);
1628 }
1629
1630 TargetLowering::CallLoweringInfo CLI(DAG);
1631 CLI.setDebugLoc(DL)
1632 .setChain(Chain)
1633 .setCallee(CC: CallingConv::PreserveAll, ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
1634 .setDiscardResult(true);
1635 std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
1636 Chain = pair.second;
1637 SDValue Result = DAG.getNode(Opcode: VEISD::GETSTACKTOP, DL, VT, Operand: Chain);
1638 if (NeedsAlign) {
1639 Result = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Result,
1640 N2: DAG.getConstant(Val: (Alignment->value() - 1ULL), DL, VT));
1641 Result = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Result,
1642 N2: DAG.getConstant(Val: ~(Alignment->value() - 1ULL), DL, VT));
1643 }
1644 // Chain = Result.getValue(1);
1645 Chain = DAG.getCALLSEQ_END(Chain, Size1: 0, Size2: 0, Glue: SDValue(), DL);
1646
1647 SDValue Ops[2] = {Result, Chain};
1648 return DAG.getMergeValues(Ops, dl: DL);
1649}
1650
1651SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
1652 SelectionDAG &DAG) const {
1653 SDLoc DL(Op);
1654 return DAG.getNode(Opcode: VEISD::EH_SJLJ_LONGJMP, DL, VT: MVT::Other, N1: Op.getOperand(i: 0),
1655 N2: Op.getOperand(i: 1));
1656}
1657
1658SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
1659 SelectionDAG &DAG) const {
1660 SDLoc DL(Op);
1661 return DAG.getNode(Opcode: VEISD::EH_SJLJ_SETJMP, DL,
1662 VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other), N1: Op.getOperand(i: 0),
1663 N2: Op.getOperand(i: 1));
1664}
1665
1666SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
1667 SelectionDAG &DAG) const {
1668 SDLoc DL(Op);
1669 return DAG.getNode(Opcode: VEISD::EH_SJLJ_SETUP_DISPATCH, DL, VT: MVT::Other,
1670 Operand: Op.getOperand(i: 0));
1671}
1672
1673static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
1674 const VETargetLowering &TLI,
1675 const VESubtarget *Subtarget) {
1676 SDLoc DL(Op);
1677 MachineFunction &MF = DAG.getMachineFunction();
1678 EVT PtrVT = TLI.getPointerTy(DL: MF.getDataLayout());
1679
1680 MachineFrameInfo &MFI = MF.getFrameInfo();
1681 MFI.setFrameAddressIsTaken(true);
1682
1683 unsigned Depth = Op.getConstantOperandVal(i: 0);
1684 const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
1685 Register FrameReg = RegInfo->getFrameRegister(MF);
1686 SDValue FrameAddr =
1687 DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT: PtrVT);
1688 while (Depth--)
1689 FrameAddr = DAG.getLoad(VT: Op.getValueType(), dl: DL, Chain: DAG.getEntryNode(),
1690 Ptr: FrameAddr, PtrInfo: MachinePointerInfo());
1691 return FrameAddr;
1692}
1693
1694static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
1695 const VETargetLowering &TLI,
1696 const VESubtarget *Subtarget) {
1697 MachineFunction &MF = DAG.getMachineFunction();
1698 MachineFrameInfo &MFI = MF.getFrameInfo();
1699 MFI.setReturnAddressIsTaken(true);
1700
1701 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);
1702
1703 SDLoc DL(Op);
1704 EVT VT = Op.getValueType();
1705 SDValue Offset = DAG.getConstant(Val: 8, DL, VT);
1706 return DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(),
1707 Ptr: DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr, N2: Offset),
1708 PtrInfo: MachinePointerInfo());
1709}
1710
1711SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1712 SelectionDAG &DAG) const {
1713 SDLoc DL(Op);
1714 unsigned IntNo = Op.getConstantOperandVal(i: 0);
1715 switch (IntNo) {
1716 default: // Don't custom lower most intrinsics.
1717 return SDValue();
1718 case Intrinsic::eh_sjlj_lsda: {
1719 MachineFunction &MF = DAG.getMachineFunction();
1720 MVT VT = Op.getSimpleValueType();
1721 const VETargetMachine *TM =
1722 static_cast<const VETargetMachine *>(&DAG.getTarget());
1723
1724 // Create GCC_except_tableXX string. The real symbol for that will be
1725 // generated in EHStreamer::emitExceptionTable() later. So, we just
1726 // borrow it's name here.
1727 TM->getStrList()->push_back(x: std::string(
1728 (Twine("GCC_except_table") + Twine(MF.getFunctionNumber())).str()));
1729 SDValue Addr =
1730 DAG.getTargetExternalSymbol(Sym: TM->getStrList()->back().c_str(), VT, TargetFlags: 0);
1731 if (isPositionIndependent()) {
1732 Addr = makeHiLoPair(Op: Addr, HiTF: VE::S_GOTOFF_HI32, LoTF: VE::S_GOTOFF_LO32, DAG);
1733 SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT);
1734 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: GlobalBase, N2: Addr);
1735 }
1736 return makeHiLoPair(Op: Addr, HiTF: VE::S_HI32, LoTF: VE::S_LO32, DAG);
1737 }
1738 }
1739}
1740
1741static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) {
1742 if (!isa<BuildVectorSDNode>(Val: N))
1743 return false;
1744 const auto *BVN = cast<BuildVectorSDNode>(Val: N);
1745
1746 // Find first non-undef insertion.
1747 unsigned Idx;
1748 for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) {
1749 auto ElemV = BVN->getOperand(Num: Idx);
1750 if (!ElemV->isUndef())
1751 break;
1752 }
1753 // Catch the (hypothetical) all-undef case.
1754 if (Idx == BVN->getNumOperands())
1755 return false;
1756 // Remember insertion.
1757 UniqueIdx = Idx++;
1758 // Verify that all other insertions are undef.
1759 for (; Idx < BVN->getNumOperands(); ++Idx) {
1760 auto ElemV = BVN->getOperand(Num: Idx);
1761 if (!ElemV->isUndef())
1762 return false;
1763 }
1764 return true;
1765}
1766
1767static SDValue getSplatValue(SDNode *N) {
1768 if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(Val: N)) {
1769 return BuildVec->getSplatValue();
1770 }
1771 return SDValue();
1772}
1773
1774SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
1775 SelectionDAG &DAG) const {
1776 VECustomDAG CDAG(DAG, Op);
1777 MVT ResultVT = Op.getSimpleValueType();
1778
1779 // If there is just one element, expand to INSERT_VECTOR_ELT.
1780 unsigned UniqueIdx;
1781 if (getUniqueInsertion(N: Op.getNode(), UniqueIdx)) {
1782 SDValue AccuV = CDAG.getUNDEF(VT: Op.getValueType());
1783 auto ElemV = Op->getOperand(Num: UniqueIdx);
1784 SDValue IdxV = CDAG.getConstant(Val: UniqueIdx, VT: MVT::i64);
1785 return CDAG.getNode(OC: ISD::INSERT_VECTOR_ELT, ResVT: ResultVT, OpV: {AccuV, ElemV, IdxV});
1786 }
1787
1788 // Else emit a broadcast.
1789 if (SDValue ScalarV = getSplatValue(N: Op.getNode())) {
1790 unsigned NumEls = ResultVT.getVectorNumElements();
1791 auto AVL = CDAG.getConstant(Val: NumEls, VT: MVT::i32);
1792 return CDAG.getBroadcast(ResultVT, Scalar: ScalarV, AVL);
1793 }
1794
1795 // Expand
1796 return SDValue();
1797}
1798
1799TargetLowering::LegalizeAction
1800VETargetLowering::getCustomOperationAction(SDNode &Op) const {
1801 // Custom legalization on VVP_* and VEC_* opcodes is required to pack-legalize
1802 // these operations (transform nodes such that their AVL parameter refers to
1803 // packs of 64bit, instead of number of elements.
1804
1805 // Packing opcodes are created with a pack-legal AVL (LEGALAVL). No need to
1806 // re-visit them.
1807 if (isPackingSupportOpcode(Opc: Op.getOpcode()))
1808 return Legal;
1809
1810 // Custom lower to legalize AVL for packed mode.
1811 if (isVVPOrVEC(Op.getOpcode()))
1812 return Custom;
1813 return Legal;
1814}
1815
1816SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1817 LLVM_DEBUG(dbgs() << "::LowerOperation "; Op.dump(&DAG));
1818 unsigned Opcode = Op.getOpcode();
1819
1820 /// Scalar isel.
1821 switch (Opcode) {
1822 case ISD::ATOMIC_FENCE:
1823 return lowerATOMIC_FENCE(Op, DAG);
1824 case ISD::ATOMIC_SWAP:
1825 return lowerATOMIC_SWAP(Op, DAG);
1826 case ISD::BlockAddress:
1827 return lowerBlockAddress(Op, DAG);
1828 case ISD::ConstantPool:
1829 return lowerConstantPool(Op, DAG);
1830 case ISD::DYNAMIC_STACKALLOC:
1831 return lowerDYNAMIC_STACKALLOC(Op, DAG);
1832 case ISD::EH_SJLJ_LONGJMP:
1833 return lowerEH_SJLJ_LONGJMP(Op, DAG);
1834 case ISD::EH_SJLJ_SETJMP:
1835 return lowerEH_SJLJ_SETJMP(Op, DAG);
1836 case ISD::EH_SJLJ_SETUP_DISPATCH:
1837 return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
1838 case ISD::FRAMEADDR:
1839 return lowerFRAMEADDR(Op, DAG, TLI: *this, Subtarget);
1840 case ISD::GlobalAddress:
1841 return lowerGlobalAddress(Op, DAG);
1842 case ISD::GlobalTLSAddress:
1843 return lowerGlobalTLSAddress(Op, DAG);
1844 case ISD::INTRINSIC_WO_CHAIN:
1845 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
1846 case ISD::JumpTable:
1847 return lowerJumpTable(Op, DAG);
1848 case ISD::LOAD:
1849 return lowerLOAD(Op, DAG);
1850 case ISD::RETURNADDR:
1851 return lowerRETURNADDR(Op, DAG, TLI: *this, Subtarget);
1852 case ISD::BUILD_VECTOR:
1853 return lowerBUILD_VECTOR(Op, DAG);
1854 case ISD::STORE:
1855 return lowerSTORE(Op, DAG);
1856 case ISD::VASTART:
1857 return lowerVASTART(Op, DAG);
1858 case ISD::VAARG:
1859 return lowerVAARG(Op, DAG);
1860
1861 case ISD::INSERT_VECTOR_ELT:
1862 return lowerINSERT_VECTOR_ELT(Op, DAG);
1863 case ISD::EXTRACT_VECTOR_ELT:
1864 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1865 }
1866
1867 /// Vector isel.
1868 if (ISD::isVPOpcode(Opcode))
1869 return lowerToVVP(Op, DAG);
1870
1871 switch (Opcode) {
1872 default:
1873 llvm_unreachable("Should not custom lower this!");
1874
1875 // Legalize the AVL of this internal node.
1876 case VEISD::VEC_BROADCAST:
1877#define ADD_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME:
1878#include "VVPNodes.def"
1879 // AVL already legalized.
1880 if (getAnnotatedNodeAVL(Op).second)
1881 return Op;
1882 return legalizeInternalVectorOp(Op, DAG);
1883
1884 // Translate into a VEC_*/VVP_* layer operation.
1885 case ISD::MLOAD:
1886 case ISD::MSTORE:
1887#define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
1888#include "VVPNodes.def"
1889 if (isMaskArithmetic(Op) && isPackedVectorType(SomeVT: Op.getValueType()))
1890 return splitMaskArithmetic(Op, DAG);
1891 return lowerToVVP(Op, DAG);
1892 }
1893}
1894/// } Custom Lower
1895
1896void VETargetLowering::ReplaceNodeResults(SDNode *N,
1897 SmallVectorImpl<SDValue> &Results,
1898 SelectionDAG &DAG) const {
1899 switch (N->getOpcode()) {
1900 case ISD::ATOMIC_SWAP:
1901 // Let LLVM expand atomic swap instruction through LowerOperation.
1902 return;
1903 default:
1904 LLVM_DEBUG(N->dumpr(&DAG));
1905 llvm_unreachable("Do not know how to custom type legalize this operation!");
1906 }
1907}
1908
1909/// JumpTable for VE.
1910///
1911/// VE cannot generate relocatable symbol in jump table. VE cannot
1912/// generate expressions using symbols in both text segment and data
1913/// segment like below.
1914/// .4byte .LBB0_2-.LJTI0_0
1915/// So, we generate offset from the top of function like below as
1916/// a custom label.
1917/// .4byte .LBB0_2-<function name>
1918
1919unsigned VETargetLowering::getJumpTableEncoding() const {
1920 // Use custom label for PIC.
1921 if (isPositionIndependent())
1922 return MachineJumpTableInfo::EK_Custom32;
1923
1924 // Otherwise, use the normal jump table encoding heuristics.
1925 return TargetLowering::getJumpTableEncoding();
1926}
1927
1928const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
1929 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
1930 unsigned Uid, MCContext &Ctx) const {
1931 assert(isPositionIndependent());
1932
1933 // Generate custom label for PIC like below.
1934 // .4bytes .LBB0_2-<function name>
1935 const auto *Value = MCSymbolRefExpr::create(Symbol: MBB->getSymbol(), Ctx);
1936 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: MBB->getParent()->getName().data());
1937 const auto *Base = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
1938 return MCBinaryExpr::createSub(LHS: Value, RHS: Base, Ctx);
1939}
1940
1941SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
1942 SelectionDAG &DAG) const {
1943 assert(isPositionIndependent());
1944 SDLoc DL(Table);
1945 Function *Function = &DAG.getMachineFunction().getFunction();
1946 assert(Function != nullptr);
1947 auto PtrTy = getPointerTy(DL: DAG.getDataLayout(), AS: Function->getAddressSpace());
1948
1949 // In the jump table, we have following values in PIC mode.
1950 // .4bytes .LBB0_2-<function name>
1951 // We need to add this value and the address of this function to generate
1952 // .LBB0_2 label correctly under PIC mode. So, we want to generate following
1953 // instructions:
1954 // lea %reg, fun@gotoff_lo
1955 // and %reg, %reg, (32)0
1956 // lea.sl %reg, fun@gotoff_hi(%reg, %got)
1957 // In order to do so, we need to genarate correctly marked DAG node using
1958 // makeHiLoPair.
1959 SDValue Op = DAG.getGlobalAddress(GV: Function, DL, VT: PtrTy);
1960 SDValue HiLo = makeHiLoPair(Op, HiTF: VE::S_GOTOFF_HI32, LoTF: VE::S_GOTOFF_LO32, DAG);
1961 SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT: PtrTy);
1962 return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrTy, N1: GlobalBase, N2: HiLo);
1963}
1964
1965Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB,
1966 MachineBasicBlock::iterator I,
1967 MachineBasicBlock *TargetBB,
1968 const DebugLoc &DL) const {
1969 MachineFunction *MF = MBB.getParent();
1970 MachineRegisterInfo &MRI = MF->getRegInfo();
1971 const VEInstrInfo *TII = Subtarget->getInstrInfo();
1972
1973 const TargetRegisterClass *RC = &VE::I64RegClass;
1974 Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
1975 Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
1976 Register Result = MRI.createVirtualRegister(RegClass: RC);
1977
1978 if (isPositionIndependent()) {
1979 // Create following instructions for local linkage PIC code.
1980 // lea %Tmp1, TargetBB@gotoff_lo
1981 // and %Tmp2, %Tmp1, (32)0
1982 // lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
1983 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
1984 .addImm(Val: 0)
1985 .addImm(Val: 0)
1986 .addMBB(MBB: TargetBB, TargetFlags: VE::S_GOTOFF_LO32);
1987 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
1988 .addReg(RegNo: Tmp1, Flags: getKillRegState(B: true))
1989 .addImm(Val: M0(Val: 32));
1990 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: Result)
1991 .addReg(RegNo: VE::SX15)
1992 .addReg(RegNo: Tmp2, Flags: getKillRegState(B: true))
1993 .addMBB(MBB: TargetBB, TargetFlags: VE::S_GOTOFF_HI32);
1994 } else {
1995 // Create following instructions for non-PIC code.
1996 // lea %Tmp1, TargetBB@lo
1997 // and %Tmp2, %Tmp1, (32)0
1998 // lea.sl %Result, TargetBB@hi(%Tmp2)
1999 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2000 .addImm(Val: 0)
2001 .addImm(Val: 0)
2002 .addMBB(MBB: TargetBB, TargetFlags: VE::S_LO32);
2003 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2004 .addReg(RegNo: Tmp1, Flags: getKillRegState(B: true))
2005 .addImm(Val: M0(Val: 32));
2006 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrii), DestReg: Result)
2007 .addReg(RegNo: Tmp2, Flags: getKillRegState(B: true))
2008 .addImm(Val: 0)
2009 .addMBB(MBB: TargetBB, TargetFlags: VE::S_HI32);
2010 }
2011 return Result;
2012}
2013
2014Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB,
2015 MachineBasicBlock::iterator I,
2016 StringRef Symbol, const DebugLoc &DL,
2017 bool IsLocal = false,
2018 bool IsCall = false) const {
2019 MachineFunction *MF = MBB.getParent();
2020 MachineRegisterInfo &MRI = MF->getRegInfo();
2021 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2022
2023 const TargetRegisterClass *RC = &VE::I64RegClass;
2024 Register Result = MRI.createVirtualRegister(RegClass: RC);
2025
2026 if (isPositionIndependent()) {
2027 if (IsCall && !IsLocal) {
2028 // Create following instructions for non-local linkage PIC code function
2029 // calls. These instructions uses IC and magic number -24, so we expand
2030 // them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
2031 // lea %Reg, Symbol@plt_lo(-24)
2032 // and %Reg, %Reg, (32)0
2033 // sic %s16
2034 // lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
2035 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::GETFUNPLT), DestReg: Result)
2036 .addExternalSymbol(FnName: "abort");
2037 } else if (IsLocal) {
2038 Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2039 Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2040 // Create following instructions for local linkage PIC code.
2041 // lea %Tmp1, Symbol@gotoff_lo
2042 // and %Tmp2, %Tmp1, (32)0
2043 // lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2044 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2045 .addImm(Val: 0)
2046 .addImm(Val: 0)
2047 .addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_GOTOFF_LO32);
2048 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2049 .addReg(RegNo: Tmp1, Flags: getKillRegState(B: true))
2050 .addImm(Val: M0(Val: 32));
2051 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: Result)
2052 .addReg(RegNo: VE::SX15)
2053 .addReg(RegNo: Tmp2, Flags: getKillRegState(B: true))
2054 .addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_GOTOFF_HI32);
2055 } else {
2056 Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2057 Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2058 // Create following instructions for not local linkage PIC code.
2059 // lea %Tmp1, Symbol@got_lo
2060 // and %Tmp2, %Tmp1, (32)0
2061 // lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2062 // ld %Result, 0(%Tmp3)
2063 Register Tmp3 = MRI.createVirtualRegister(RegClass: RC);
2064 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2065 .addImm(Val: 0)
2066 .addImm(Val: 0)
2067 .addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_GOT_LO32);
2068 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2069 .addReg(RegNo: Tmp1, Flags: getKillRegState(B: true))
2070 .addImm(Val: M0(Val: 32));
2071 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: Tmp3)
2072 .addReg(RegNo: VE::SX15)
2073 .addReg(RegNo: Tmp2, Flags: getKillRegState(B: true))
2074 .addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_GOT_HI32);
2075 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: Result)
2076 .addReg(RegNo: Tmp3, Flags: getKillRegState(B: true))
2077 .addImm(Val: 0)
2078 .addImm(Val: 0);
2079 }
2080 } else {
2081 Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2082 Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2083 // Create following instructions for non-PIC code.
2084 // lea %Tmp1, Symbol@lo
2085 // and %Tmp2, %Tmp1, (32)0
2086 // lea.sl %Result, Symbol@hi(%Tmp2)
2087 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2088 .addImm(Val: 0)
2089 .addImm(Val: 0)
2090 .addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_LO32);
2091 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2092 .addReg(RegNo: Tmp1, Flags: getKillRegState(B: true))
2093 .addImm(Val: M0(Val: 32));
2094 BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrii), DestReg: Result)
2095 .addReg(RegNo: Tmp2, Flags: getKillRegState(B: true))
2096 .addImm(Val: 0)
2097 .addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_HI32);
2098 }
2099 return Result;
2100}
2101
2102void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI,
2103 MachineBasicBlock *MBB,
2104 MachineBasicBlock *DispatchBB,
2105 int FI, int Offset) const {
2106 DebugLoc DL = MI.getDebugLoc();
2107 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2108
2109 Register LabelReg =
2110 prepareMBB(MBB&: *MBB, I: MachineBasicBlock::iterator(MI), TargetBB: DispatchBB, DL);
2111
2112 // Store an address of DispatchBB to a given jmpbuf[1] where has next IC
2113 // referenced by longjmp (throw) later.
2114 MachineInstrBuilder MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::STrii));
2115 addFrameReference(MIB, FI, Offset); // jmpbuf[1]
2116 MIB.addReg(RegNo: LabelReg, Flags: getKillRegState(B: true));
2117}
2118
2119MachineBasicBlock *
2120VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
2121 MachineBasicBlock *MBB) const {
2122 DebugLoc DL = MI.getDebugLoc();
2123 MachineFunction *MF = MBB->getParent();
2124 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2125 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
2126 MachineRegisterInfo &MRI = MF->getRegInfo();
2127
2128 const BasicBlock *BB = MBB->getBasicBlock();
2129 MachineFunction::iterator I = ++MBB->getIterator();
2130
2131 // Memory Reference.
2132 SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands());
2133 Register BufReg = MI.getOperand(i: 1).getReg();
2134
2135 Register DstReg;
2136
2137 DstReg = MI.getOperand(i: 0).getReg();
2138 const TargetRegisterClass *RC = MRI.getRegClass(Reg: DstReg);
2139 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
2140 (void)TRI;
2141 Register MainDestReg = MRI.createVirtualRegister(RegClass: RC);
2142 Register RestoreDestReg = MRI.createVirtualRegister(RegClass: RC);
2143
2144 // For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
2145 // instructions. SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
2146 //
2147 // ThisMBB:
2148 // buf[3] = %s17 iff %s17 is used as BP
2149 // buf[1] = RestoreMBB as IC after longjmp
2150 // # SjLjSetup RestoreMBB
2151 //
2152 // MainMBB:
2153 // v_main = 0
2154 //
2155 // SinkMBB:
2156 // v = phi(v_main, MainMBB, v_restore, RestoreMBB)
2157 // ...
2158 //
2159 // RestoreMBB:
2160 // %s17 = buf[3] = iff %s17 is used as BP
2161 // v_restore = 1
2162 // goto SinkMBB
2163
2164 MachineBasicBlock *ThisMBB = MBB;
2165 MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
2166 MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
2167 MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
2168 MF->insert(MBBI: I, MBB: MainMBB);
2169 MF->insert(MBBI: I, MBB: SinkMBB);
2170 MF->push_back(MBB: RestoreMBB);
2171 RestoreMBB->setMachineBlockAddressTaken();
2172
2173 // Transfer the remainder of BB and its successor edges to SinkMBB.
2174 SinkMBB->splice(Where: SinkMBB->begin(), Other: MBB,
2175 From: std::next(x: MachineBasicBlock::iterator(MI)), To: MBB->end());
2176 SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
2177
2178 // ThisMBB:
2179 Register LabelReg =
2180 prepareMBB(MBB&: *MBB, I: MachineBasicBlock::iterator(MI), TargetBB: RestoreMBB, DL);
2181
2182 // Store BP in buf[3] iff this function is using BP.
2183 const VEFrameLowering *TFI = Subtarget->getFrameLowering();
2184 if (TFI->hasBP(MF: *MF)) {
2185 MachineInstrBuilder MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::STrii));
2186 MIB.addReg(RegNo: BufReg);
2187 MIB.addImm(Val: 0);
2188 MIB.addImm(Val: 24);
2189 MIB.addReg(RegNo: VE::SX17);
2190 MIB.setMemRefs(MMOs);
2191 }
2192
2193 // Store IP in buf[1].
2194 MachineInstrBuilder MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::STrii));
2195 MIB.add(MO: MI.getOperand(i: 1)); // we can preserve the kill flags here.
2196 MIB.addImm(Val: 0);
2197 MIB.addImm(Val: 8);
2198 MIB.addReg(RegNo: LabelReg, Flags: getKillRegState(B: true));
2199 MIB.setMemRefs(MMOs);
2200
2201 // SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
2202
2203 // Insert setup.
2204 MIB =
2205 BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::EH_SjLj_Setup)).addMBB(MBB: RestoreMBB);
2206
2207 const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2208 MIB.addRegMask(Mask: RegInfo->getNoPreservedMask());
2209 ThisMBB->addSuccessor(Succ: MainMBB);
2210 ThisMBB->addSuccessor(Succ: RestoreMBB);
2211
2212 // MainMBB:
2213 BuildMI(BB: MainMBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: MainDestReg)
2214 .addImm(Val: 0)
2215 .addImm(Val: 0)
2216 .addImm(Val: 0);
2217 MainMBB->addSuccessor(Succ: SinkMBB);
2218
2219 // SinkMBB:
2220 BuildMI(BB&: *SinkMBB, I: SinkMBB->begin(), MIMD: DL, MCID: TII->get(Opcode: VE::PHI), DestReg: DstReg)
2221 .addReg(RegNo: MainDestReg)
2222 .addMBB(MBB: MainMBB)
2223 .addReg(RegNo: RestoreDestReg)
2224 .addMBB(MBB: RestoreMBB);
2225
2226 // RestoreMBB:
2227 // Restore BP from buf[3] iff this function is using BP. The address of
2228 // buf is in SX10.
2229 // FIXME: Better to not use SX10 here
2230 if (TFI->hasBP(MF: *MF)) {
2231 MachineInstrBuilder MIB =
2232 BuildMI(BB: RestoreMBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: VE::SX17);
2233 MIB.addReg(RegNo: VE::SX10);
2234 MIB.addImm(Val: 0);
2235 MIB.addImm(Val: 24);
2236 MIB.setMemRefs(MMOs);
2237 }
2238 BuildMI(BB: RestoreMBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: RestoreDestReg)
2239 .addImm(Val: 0)
2240 .addImm(Val: 0)
2241 .addImm(Val: 1);
2242 BuildMI(BB: RestoreMBB, MIMD: DL, MCID: TII->get(Opcode: VE::BRCFLa_t)).addMBB(MBB: SinkMBB);
2243 RestoreMBB->addSuccessor(Succ: SinkMBB);
2244
2245 MI.eraseFromParent();
2246 return SinkMBB;
2247}
2248
2249MachineBasicBlock *
2250VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
2251 MachineBasicBlock *MBB) const {
2252 DebugLoc DL = MI.getDebugLoc();
2253 MachineFunction *MF = MBB->getParent();
2254 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2255 MachineRegisterInfo &MRI = MF->getRegInfo();
2256
2257 // Memory Reference.
2258 SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands());
2259 Register BufReg = MI.getOperand(i: 0).getReg();
2260
2261 Register Tmp = MRI.createVirtualRegister(RegClass: &VE::I64RegClass);
2262 // Since FP is only updated here but NOT referenced, it's treated as GPR.
2263 Register FP = VE::SX9;
2264 Register SP = VE::SX11;
2265
2266 MachineInstrBuilder MIB;
2267
2268 MachineBasicBlock *ThisMBB = MBB;
2269
2270 // For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
2271 //
2272 // ThisMBB:
2273 // %fp = load buf[0]
2274 // %jmp = load buf[1]
2275 // %s10 = buf ; Store an address of buf to SX10 for RestoreMBB
2276 // %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
2277 // jmp %jmp
2278
2279 // Reload FP.
2280 MIB = BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: FP);
2281 MIB.addReg(RegNo: BufReg);
2282 MIB.addImm(Val: 0);
2283 MIB.addImm(Val: 0);
2284 MIB.setMemRefs(MMOs);
2285
2286 // Reload IP.
2287 MIB = BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: Tmp);
2288 MIB.addReg(RegNo: BufReg);
2289 MIB.addImm(Val: 0);
2290 MIB.addImm(Val: 8);
2291 MIB.setMemRefs(MMOs);
2292
2293 // Copy BufReg to SX10 for later use in setjmp.
2294 // FIXME: Better to not use SX10 here
2295 BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::ORri), DestReg: VE::SX10)
2296 .addReg(RegNo: BufReg)
2297 .addImm(Val: 0);
2298
2299 // Reload SP.
2300 MIB = BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: SP);
2301 MIB.add(MO: MI.getOperand(i: 0)); // we can preserve the kill flags here.
2302 MIB.addImm(Val: 0);
2303 MIB.addImm(Val: 16);
2304 MIB.setMemRefs(MMOs);
2305
2306 // Jump.
2307 BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::BCFLari_t))
2308 .addReg(RegNo: Tmp, Flags: getKillRegState(B: true))
2309 .addImm(Val: 0);
2310
2311 MI.eraseFromParent();
2312 return ThisMBB;
2313}
2314
2315MachineBasicBlock *
2316VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI,
2317 MachineBasicBlock *BB) const {
2318 DebugLoc DL = MI.getDebugLoc();
2319 MachineFunction *MF = BB->getParent();
2320 MachineFrameInfo &MFI = MF->getFrameInfo();
2321 MachineRegisterInfo &MRI = MF->getRegInfo();
2322 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2323 int FI = MFI.getFunctionContextIndex();
2324
2325 // Get a mapping of the call site numbers to all of the landing pads they're
2326 // associated with.
2327 DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad;
2328 unsigned MaxCSNum = 0;
2329 for (auto &MBB : *MF) {
2330 if (!MBB.isEHPad())
2331 continue;
2332
2333 MCSymbol *Sym = nullptr;
2334 for (const auto &MI : MBB) {
2335 if (MI.isDebugInstr())
2336 continue;
2337
2338 assert(MI.isEHLabel() && "expected EH_LABEL");
2339 Sym = MI.getOperand(i: 0).getMCSymbol();
2340 break;
2341 }
2342
2343 if (!MF->hasCallSiteLandingPad(Sym))
2344 continue;
2345
2346 for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
2347 CallSiteNumToLPad[CSI].push_back(Elt: &MBB);
2348 MaxCSNum = std::max(a: MaxCSNum, b: CSI);
2349 }
2350 }
2351
2352 // Get an ordered list of the machine basic blocks for the jump table.
2353 std::vector<MachineBasicBlock *> LPadList;
2354 SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs;
2355 LPadList.reserve(n: CallSiteNumToLPad.size());
2356
2357 for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
2358 for (auto &LP : CallSiteNumToLPad[CSI]) {
2359 LPadList.push_back(x: LP);
2360 InvokeBBs.insert_range(R: LP->predecessors());
2361 }
2362 }
2363
2364 assert(!LPadList.empty() &&
2365 "No landing pad destinations for the dispatch jump table!");
2366
2367 // The %fn_context is allocated like below (from --print-after=sjljehprepare):
2368 // %fn_context = alloca { i8*, i64, [4 x i64], i8*, i8*, [5 x i8*] }
2369 //
2370 // This `[5 x i8*]` is jmpbuf, so jmpbuf[1] is FI+72.
2371 // First `i64` is callsite, so callsite is FI+8.
2372 static const int OffsetIC = 72;
2373 static const int OffsetCS = 8;
2374
2375 // Create the MBBs for the dispatch code like following:
2376 //
2377 // ThisMBB:
2378 // Prepare DispatchBB address and store it to buf[1].
2379 // ...
2380 //
2381 // DispatchBB:
2382 // %s15 = GETGOT iff isPositionIndependent
2383 // %callsite = load callsite
2384 // brgt.l.t #size of callsites, %callsite, DispContBB
2385 //
2386 // TrapBB:
2387 // Call abort.
2388 //
2389 // DispContBB:
2390 // %breg = address of jump table
2391 // %pc = load and calculate next pc from %breg and %callsite
2392 // jmp %pc
2393
2394 // Shove the dispatch's address into the return slot in the function context.
2395 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
2396 DispatchBB->setIsEHPad(true);
2397
2398 // Trap BB will causes trap like `assert(0)`.
2399 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
2400 DispatchBB->addSuccessor(Succ: TrapBB);
2401
2402 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
2403 DispatchBB->addSuccessor(Succ: DispContBB);
2404
2405 // Insert MBBs.
2406 MF->push_back(MBB: DispatchBB);
2407 MF->push_back(MBB: DispContBB);
2408 MF->push_back(MBB: TrapBB);
2409
2410 // Insert code to call abort in the TrapBB.
2411 Register Abort = prepareSymbol(MBB&: *TrapBB, I: TrapBB->end(), Symbol: "abort", DL,
2412 /* Local */ IsLocal: false, /* Call */ IsCall: true);
2413 BuildMI(BB: TrapBB, MIMD: DL, MCID: TII->get(Opcode: VE::BSICrii), DestReg: VE::SX10)
2414 .addReg(RegNo: Abort, Flags: getKillRegState(B: true))
2415 .addImm(Val: 0)
2416 .addImm(Val: 0);
2417
2418 // Insert code into the entry block that creates and registers the function
2419 // context.
2420 setupEntryBlockForSjLj(MI, MBB: BB, DispatchBB, FI, Offset: OffsetIC);
2421
2422 // Create the jump table and associated information
2423 unsigned JTE = getJumpTableEncoding();
2424 MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTEntryKind: JTE);
2425 unsigned MJTI = JTI->createJumpTableIndex(DestBBs: LPadList);
2426
2427 const VERegisterInfo &RI = TII->getRegisterInfo();
2428 // Add a register mask with no preserved registers. This results in all
2429 // registers being marked as clobbered.
2430 BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::NOP))
2431 .addRegMask(Mask: RI.getNoPreservedMask());
2432
2433 if (isPositionIndependent()) {
2434 // Force to generate GETGOT, since current implementation doesn't store GOT
2435 // register.
2436 BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::GETGOT), DestReg: VE::SX15);
2437 }
2438
2439 // IReg is used as an index in a memory operand and therefore can't be SP
2440 const TargetRegisterClass *RC = &VE::I64RegClass;
2441 Register IReg = MRI.createVirtualRegister(RegClass: RC);
2442 addFrameReference(MIB: BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDLZXrii), DestReg: IReg), FI,
2443 Offset: OffsetCS);
2444 if (LPadList.size() < 64) {
2445 BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::BRCFLir_t))
2446 .addImm(Val: VECC::CC_ILE)
2447 .addImm(Val: LPadList.size())
2448 .addReg(RegNo: IReg)
2449 .addMBB(MBB: TrapBB);
2450 } else {
2451 assert(LPadList.size() <= 0x7FFFFFFF && "Too large Landing Pad!");
2452 Register TmpReg = MRI.createVirtualRegister(RegClass: RC);
2453 BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: TmpReg)
2454 .addImm(Val: 0)
2455 .addImm(Val: 0)
2456 .addImm(Val: LPadList.size());
2457 BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::BRCFLrr_t))
2458 .addImm(Val: VECC::CC_ILE)
2459 .addReg(RegNo: TmpReg, Flags: getKillRegState(B: true))
2460 .addReg(RegNo: IReg)
2461 .addMBB(MBB: TrapBB);
2462 }
2463
2464 Register BReg = MRI.createVirtualRegister(RegClass: RC);
2465 Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2466 Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2467
2468 if (isPositionIndependent()) {
2469 // Create following instructions for local linkage PIC code.
2470 // lea %Tmp1, .LJTI0_0@gotoff_lo
2471 // and %Tmp2, %Tmp1, (32)0
2472 // lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2473 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2474 .addImm(Val: 0)
2475 .addImm(Val: 0)
2476 .addJumpTableIndex(Idx: MJTI, TargetFlags: VE::S_GOTOFF_LO32);
2477 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2478 .addReg(RegNo: Tmp1, Flags: getKillRegState(B: true))
2479 .addImm(Val: M0(Val: 32));
2480 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: BReg)
2481 .addReg(RegNo: VE::SX15)
2482 .addReg(RegNo: Tmp2, Flags: getKillRegState(B: true))
2483 .addJumpTableIndex(Idx: MJTI, TargetFlags: VE::S_GOTOFF_HI32);
2484 } else {
2485 // Create following instructions for non-PIC code.
2486 // lea %Tmp1, .LJTI0_0@lo
2487 // and %Tmp2, %Tmp1, (32)0
2488 // lea.sl %BReg, .LJTI0_0@hi(%Tmp2)
2489 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2490 .addImm(Val: 0)
2491 .addImm(Val: 0)
2492 .addJumpTableIndex(Idx: MJTI, TargetFlags: VE::S_LO32);
2493 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2494 .addReg(RegNo: Tmp1, Flags: getKillRegState(B: true))
2495 .addImm(Val: M0(Val: 32));
2496 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrii), DestReg: BReg)
2497 .addReg(RegNo: Tmp2, Flags: getKillRegState(B: true))
2498 .addImm(Val: 0)
2499 .addJumpTableIndex(Idx: MJTI, TargetFlags: VE::S_HI32);
2500 }
2501
2502 switch (JTE) {
2503 case MachineJumpTableInfo::EK_BlockAddress: {
2504 // Generate simple block address code for no-PIC model.
2505 // sll %Tmp1, %IReg, 3
2506 // lds %TReg, 0(%Tmp1, %BReg)
2507 // bcfla %TReg
2508
2509 Register TReg = MRI.createVirtualRegister(RegClass: RC);
2510 Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2511
2512 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::SLLri), DestReg: Tmp1)
2513 .addReg(RegNo: IReg, Flags: getKillRegState(B: true))
2514 .addImm(Val: 3);
2515 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDrri), DestReg: TReg)
2516 .addReg(RegNo: BReg, Flags: getKillRegState(B: true))
2517 .addReg(RegNo: Tmp1, Flags: getKillRegState(B: true))
2518 .addImm(Val: 0);
2519 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::BCFLari_t))
2520 .addReg(RegNo: TReg, Flags: getKillRegState(B: true))
2521 .addImm(Val: 0);
2522 break;
2523 }
2524 case MachineJumpTableInfo::EK_Custom32: {
2525 // Generate block address code using differences from the function pointer
2526 // for PIC model.
2527 // sll %Tmp1, %IReg, 2
2528 // ldl.zx %OReg, 0(%Tmp1, %BReg)
2529 // Prepare function address in BReg2.
2530 // adds.l %TReg, %BReg2, %OReg
2531 // bcfla %TReg
2532
2533 assert(isPositionIndependent());
2534 Register OReg = MRI.createVirtualRegister(RegClass: RC);
2535 Register TReg = MRI.createVirtualRegister(RegClass: RC);
2536 Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2537
2538 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::SLLri), DestReg: Tmp1)
2539 .addReg(RegNo: IReg, Flags: getKillRegState(B: true))
2540 .addImm(Val: 2);
2541 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDLZXrri), DestReg: OReg)
2542 .addReg(RegNo: BReg, Flags: getKillRegState(B: true))
2543 .addReg(RegNo: Tmp1, Flags: getKillRegState(B: true))
2544 .addImm(Val: 0);
2545 Register BReg2 =
2546 prepareSymbol(MBB&: *DispContBB, I: DispContBB->end(),
2547 Symbol: DispContBB->getParent()->getName(), DL, /* Local */ IsLocal: true);
2548 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::ADDSLrr), DestReg: TReg)
2549 .addReg(RegNo: OReg, Flags: getKillRegState(B: true))
2550 .addReg(RegNo: BReg2, Flags: getKillRegState(B: true));
2551 BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::BCFLari_t))
2552 .addReg(RegNo: TReg, Flags: getKillRegState(B: true))
2553 .addImm(Val: 0);
2554 break;
2555 }
2556 default:
2557 llvm_unreachable("Unexpected jump table encoding");
2558 }
2559
2560 // Add the jump table entries as successors to the MBB.
2561 SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs;
2562 for (auto &LP : LPadList)
2563 if (SeenMBBs.insert(Ptr: LP).second)
2564 DispContBB->addSuccessor(Succ: LP);
2565
2566 // N.B. the order the invoke BBs are processed in doesn't matter here.
2567 SmallVector<MachineBasicBlock *, 64> MBBLPads;
2568 const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
2569 for (MachineBasicBlock *MBB : InvokeBBs) {
2570 // Remove the landing pad successor from the invoke block and replace it
2571 // with the new dispatch block.
2572 // Keep a copy of Successors since it's modified inside the loop.
2573 SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(),
2574 MBB->succ_rend());
2575 // FIXME: Avoid quadratic complexity.
2576 for (auto *MBBS : Successors) {
2577 if (MBBS->isEHPad()) {
2578 MBB->removeSuccessor(Succ: MBBS);
2579 MBBLPads.push_back(Elt: MBBS);
2580 }
2581 }
2582
2583 MBB->addSuccessor(Succ: DispatchBB);
2584
2585 // Find the invoke call and mark all of the callee-saved registers as
2586 // 'implicit defined' so that they're spilled. This prevents code from
2587 // moving instructions to before the EH block, where they will never be
2588 // executed.
2589 for (auto &II : reverse(C&: *MBB)) {
2590 if (!II.isCall())
2591 continue;
2592
2593 DenseSet<Register> DefRegs;
2594 for (auto &MOp : II.operands())
2595 if (MOp.isReg())
2596 DefRegs.insert(V: MOp.getReg());
2597
2598 MachineInstrBuilder MIB(*MF, &II);
2599 for (unsigned RI = 0; SavedRegs[RI]; ++RI) {
2600 Register Reg = SavedRegs[RI];
2601 if (!DefRegs.contains(V: Reg))
2602 MIB.addReg(RegNo: Reg, Flags: RegState::ImplicitDefine | RegState::Dead);
2603 }
2604
2605 break;
2606 }
2607 }
2608
2609 // Mark all former landing pads as non-landing pads. The dispatch is the only
2610 // landing pad now.
2611 for (auto &LP : MBBLPads)
2612 LP->setIsEHPad(false);
2613
2614 // The instruction is gone now.
2615 MI.eraseFromParent();
2616 return BB;
2617}
2618
2619MachineBasicBlock *
2620VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2621 MachineBasicBlock *BB) const {
2622 switch (MI.getOpcode()) {
2623 default:
2624 llvm_unreachable("Unknown Custom Instruction!");
2625 case VE::EH_SjLj_LongJmp:
2626 return emitEHSjLjLongJmp(MI, MBB: BB);
2627 case VE::EH_SjLj_SetJmp:
2628 return emitEHSjLjSetJmp(MI, MBB: BB);
2629 case VE::EH_SjLj_Setup_Dispatch:
2630 return emitSjLjDispatchBlock(MI, BB);
2631 }
2632}
2633
2634static bool isSimm7(SDValue V) {
2635 EVT VT = V.getValueType();
2636 if (VT.isVector())
2637 return false;
2638
2639 if (VT.isInteger()) {
2640 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: V))
2641 return isInt<7>(x: C->getSExtValue());
2642 } else if (VT.isFloatingPoint()) {
2643 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: V)) {
2644 if (VT == MVT::f32 || VT == MVT::f64) {
2645 const APInt &Imm = C->getValueAPF().bitcastToAPInt();
2646 uint64_t Val = Imm.getSExtValue();
2647 if (Imm.getBitWidth() == 32)
2648 Val <<= 32; // Immediate value of float place at higher bits on VE.
2649 return isInt<7>(x: Val);
2650 }
2651 }
2652 }
2653 return false;
2654}
2655
2656static bool isMImm(SDValue V) {
2657 EVT VT = V.getValueType();
2658 if (VT.isVector())
2659 return false;
2660
2661 if (VT.isInteger()) {
2662 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: V))
2663 return isMImmVal(Val: getImmVal(N: C));
2664 } else if (VT.isFloatingPoint()) {
2665 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: V)) {
2666 if (VT == MVT::f32) {
2667 // Float value places at higher bits, so ignore lower 32 bits.
2668 return isMImm32Val(Val: getFpImmVal(N: C) >> 32);
2669 } else if (VT == MVT::f64) {
2670 return isMImmVal(Val: getFpImmVal(N: C));
2671 }
2672 }
2673 }
2674 return false;
2675}
2676
2677static unsigned decideComp(EVT SrcVT, ISD::CondCode CC) {
2678 if (SrcVT.isFloatingPoint()) {
2679 if (SrcVT == MVT::f128)
2680 return VEISD::CMPQ;
2681 return VEISD::CMPF;
2682 }
2683 return isSignedIntSetCC(Code: CC) ? VEISD::CMPI : VEISD::CMPU;
2684}
2685
2686static EVT decideCompType(EVT SrcVT) {
2687 if (SrcVT == MVT::f128)
2688 return MVT::f64;
2689 return SrcVT;
2690}
2691
2692static bool safeWithoutCompWithNull(EVT SrcVT, ISD::CondCode CC,
2693 bool WithCMov) {
2694 if (SrcVT.isFloatingPoint()) {
2695 // For the case of floating point setcc, only unordered comparison
2696 // or general comparison with -enable-no-nans-fp-math option reach
2697 // here, so it is safe even if values are NaN. Only f128 doesn't
2698 // safe since VE uses f64 result of f128 comparison.
2699 return SrcVT != MVT::f128;
2700 }
2701 if (isIntEqualitySetCC(Code: CC)) {
2702 // For the case of equal or not equal, it is safe without comparison with 0.
2703 return true;
2704 }
2705 if (WithCMov) {
2706 // For the case of integer setcc with cmov, all signed comparison with 0
2707 // are safe.
2708 return isSignedIntSetCC(Code: CC);
2709 }
2710 // For the case of integer setcc, only signed 64 bits comparison is safe.
2711 // For unsigned, "CMPU 0x80000000, 0" has to be greater than 0, but it becomes
2712 // less than 0 witout CMPU. For 32 bits, other half of 32 bits are
2713 // uncoditional, so it is not safe too without CMPI..
2714 return isSignedIntSetCC(Code: CC) && SrcVT == MVT::i64;
2715}
2716
2717static SDValue generateComparison(EVT VT, SDValue LHS, SDValue RHS,
2718 ISD::CondCode CC, bool WithCMov,
2719 const SDLoc &DL, SelectionDAG &DAG) {
2720 // Compare values. If RHS is 0 and it is safe to calculate without
2721 // comparison, we don't generate an instruction for comparison.
2722 EVT CompVT = decideCompType(SrcVT: VT);
2723 if (CompVT == VT && safeWithoutCompWithNull(SrcVT: VT, CC, WithCMov) &&
2724 (isNullConstant(V: RHS) || isNullFPConstant(V: RHS))) {
2725 return LHS;
2726 }
2727 return DAG.getNode(Opcode: decideComp(SrcVT: VT, CC), DL, VT: CompVT, N1: LHS, N2: RHS);
2728}
2729
2730SDValue VETargetLowering::combineSelect(SDNode *N,
2731 DAGCombinerInfo &DCI) const {
2732 assert(N->getOpcode() == ISD::SELECT &&
2733 "Should be called with a SELECT node");
2734 ISD::CondCode CC = ISD::CondCode::SETNE;
2735 SDValue Cond = N->getOperand(Num: 0);
2736 SDValue True = N->getOperand(Num: 1);
2737 SDValue False = N->getOperand(Num: 2);
2738
2739 // We handle only scalar SELECT.
2740 EVT VT = N->getValueType(ResNo: 0);
2741 if (VT.isVector())
2742 return SDValue();
2743
2744 // Peform combineSelect after leagalize DAG.
2745 if (!DCI.isAfterLegalizeDAG())
2746 return SDValue();
2747
2748 EVT VT0 = Cond.getValueType();
2749 if (isMImm(V: True)) {
2750 // VE's condition move can handle MImm in True clause, so nothing to do.
2751 } else if (isMImm(V: False)) {
2752 // VE's condition move can handle MImm in True clause, so swap True and
2753 // False clauses if False has MImm value. And, update condition code.
2754 std::swap(a&: True, b&: False);
2755 CC = getSetCCInverse(Operation: CC, Type: VT0);
2756 }
2757
2758 SDLoc DL(N);
2759 SelectionDAG &DAG = DCI.DAG;
2760 VECC::CondCode VECCVal;
2761 if (VT0.isFloatingPoint()) {
2762 VECCVal = fpCondCode2Fcc(CC);
2763 } else {
2764 VECCVal = intCondCode2Icc(CC);
2765 }
2766 SDValue Ops[] = {Cond, True, False,
2767 DAG.getConstant(Val: VECCVal, DL, VT: MVT::i32)};
2768 return DAG.getNode(Opcode: VEISD::CMOV, DL, VT, Ops);
2769}
2770
2771SDValue VETargetLowering::combineSelectCC(SDNode *N,
2772 DAGCombinerInfo &DCI) const {
2773 assert(N->getOpcode() == ISD::SELECT_CC &&
2774 "Should be called with a SELECT_CC node");
2775 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 4))->get();
2776 SDValue LHS = N->getOperand(Num: 0);
2777 SDValue RHS = N->getOperand(Num: 1);
2778 SDValue True = N->getOperand(Num: 2);
2779 SDValue False = N->getOperand(Num: 3);
2780
2781 // We handle only scalar SELECT_CC.
2782 EVT VT = N->getValueType(ResNo: 0);
2783 if (VT.isVector())
2784 return SDValue();
2785
2786 // Peform combineSelectCC after leagalize DAG.
2787 if (!DCI.isAfterLegalizeDAG())
2788 return SDValue();
2789
2790 // We handle only i32/i64/f32/f64/f128 comparisons.
2791 EVT LHSVT = LHS.getValueType();
2792 assert(LHSVT == RHS.getValueType());
2793 switch (LHSVT.getSimpleVT().SimpleTy) {
2794 case MVT::i32:
2795 case MVT::i64:
2796 case MVT::f32:
2797 case MVT::f64:
2798 case MVT::f128:
2799 break;
2800 default:
2801 // Return SDValue to let llvm handle other types.
2802 return SDValue();
2803 }
2804
2805 if (isMImm(V: RHS)) {
2806 // VE's comparison can handle MImm in RHS, so nothing to do.
2807 } else if (isSimm7(V: RHS)) {
2808 // VE's comparison can handle Simm7 in LHS, so swap LHS and RHS, and
2809 // update condition code.
2810 std::swap(a&: LHS, b&: RHS);
2811 CC = getSetCCSwappedOperands(Operation: CC);
2812 }
2813 if (isMImm(V: True)) {
2814 // VE's condition move can handle MImm in True clause, so nothing to do.
2815 } else if (isMImm(V: False)) {
2816 // VE's condition move can handle MImm in True clause, so swap True and
2817 // False clauses if False has MImm value. And, update condition code.
2818 std::swap(a&: True, b&: False);
2819 CC = getSetCCInverse(Operation: CC, Type: LHSVT);
2820 }
2821
2822 SDLoc DL(N);
2823 SelectionDAG &DAG = DCI.DAG;
2824
2825 bool WithCMov = true;
2826 SDValue CompNode = generateComparison(VT: LHSVT, LHS, RHS, CC, WithCMov, DL, DAG);
2827
2828 VECC::CondCode VECCVal;
2829 if (LHSVT.isFloatingPoint()) {
2830 VECCVal = fpCondCode2Fcc(CC);
2831 } else {
2832 VECCVal = intCondCode2Icc(CC);
2833 }
2834 SDValue Ops[] = {CompNode, True, False,
2835 DAG.getConstant(Val: VECCVal, DL, VT: MVT::i32)};
2836 return DAG.getNode(Opcode: VEISD::CMOV, DL, VT, Ops);
2837}
2838
2839static bool isI32InsnAllUses(const SDNode *User, const SDNode *N);
2840static bool isI32Insn(const SDNode *User, const SDNode *N) {
2841 switch (User->getOpcode()) {
2842 default:
2843 return false;
2844 case ISD::ADD:
2845 case ISD::SUB:
2846 case ISD::MUL:
2847 case ISD::SDIV:
2848 case ISD::UDIV:
2849 case ISD::SETCC:
2850 case ISD::SMIN:
2851 case ISD::SMAX:
2852 case ISD::SHL:
2853 case ISD::SRA:
2854 case ISD::BSWAP:
2855 case ISD::SINT_TO_FP:
2856 case ISD::UINT_TO_FP:
2857 case ISD::BR_CC:
2858 case ISD::BITCAST:
2859 case ISD::ATOMIC_CMP_SWAP:
2860 case ISD::ATOMIC_SWAP:
2861 case VEISD::CMPU:
2862 case VEISD::CMPI:
2863 return true;
2864 case ISD::SRL:
2865 if (N->getOperand(Num: 0).getOpcode() != ISD::SRL)
2866 return true;
2867 // (srl (trunc (srl ...))) may be optimized by combining srl, so
2868 // doesn't optimize trunc now.
2869 return false;
2870 case ISD::SELECT_CC:
2871 if (User->getOperand(Num: 2).getNode() != N &&
2872 User->getOperand(Num: 3).getNode() != N)
2873 return true;
2874 return isI32InsnAllUses(User, N);
2875 case VEISD::CMOV:
2876 // CMOV in (cmov (trunc ...), true, false, int-comparison) is safe.
2877 // However, trunc in true or false clauses is not safe.
2878 if (User->getOperand(Num: 1).getNode() != N &&
2879 User->getOperand(Num: 2).getNode() != N &&
2880 isa<ConstantSDNode>(Val: User->getOperand(Num: 3))) {
2881 VECC::CondCode VECCVal =
2882 static_cast<VECC::CondCode>(User->getConstantOperandVal(Num: 3));
2883 return isIntVECondCode(CC: VECCVal);
2884 }
2885 [[fallthrough]];
2886 case ISD::AND:
2887 case ISD::OR:
2888 case ISD::XOR:
2889 case ISD::SELECT:
2890 case ISD::CopyToReg:
2891 // Check all use of selections, bit operations, and copies. If all of them
2892 // are safe, optimize truncate to extract_subreg.
2893 return isI32InsnAllUses(User, N);
2894 }
2895}
2896
2897static bool isI32InsnAllUses(const SDNode *User, const SDNode *N) {
2898 // Check all use of User node. If all of them are safe, optimize
2899 // truncate to extract_subreg.
2900 for (const SDNode *U : User->users()) {
2901 switch (U->getOpcode()) {
2902 default:
2903 // If the use is an instruction which treats the source operand as i32,
2904 // it is safe to avoid truncate here.
2905 if (isI32Insn(User: U, N))
2906 continue;
2907 break;
2908 case ISD::ANY_EXTEND:
2909 case ISD::SIGN_EXTEND:
2910 case ISD::ZERO_EXTEND: {
2911 // Special optimizations to the combination of ext and trunc.
2912 // (ext ... (select ... (trunc ...))) is safe to avoid truncate here
2913 // since this truncate instruction clears higher 32 bits which is filled
2914 // by one of ext instructions later.
2915 assert(N->getValueType(0) == MVT::i32 &&
2916 "find truncate to not i32 integer");
2917 if (User->getOpcode() == ISD::SELECT_CC ||
2918 User->getOpcode() == ISD::SELECT || User->getOpcode() == VEISD::CMOV)
2919 continue;
2920 break;
2921 }
2922 }
2923 return false;
2924 }
2925 return true;
2926}
2927
2928// Optimize TRUNCATE in DAG combining. Optimizing it in CUSTOM lower is
2929// sometime too early. Optimizing it in DAG pattern matching in VEInstrInfo.td
2930// is sometime too late. So, doing it at here.
2931SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
2932 DAGCombinerInfo &DCI) const {
2933 assert(N->getOpcode() == ISD::TRUNCATE &&
2934 "Should be called with a TRUNCATE node");
2935
2936 SelectionDAG &DAG = DCI.DAG;
2937 SDLoc DL(N);
2938 EVT VT = N->getValueType(ResNo: 0);
2939
2940 // We prefer to do this when all types are legal.
2941 if (!DCI.isAfterLegalizeDAG())
2942 return SDValue();
2943
2944 // Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
2945 if (N->getOperand(Num: 0)->getOpcode() == ISD::SELECT_CC &&
2946 isa<ConstantSDNode>(Val: N->getOperand(Num: 0)->getOperand(Num: 0)) &&
2947 isa<ConstantSDNode>(Val: N->getOperand(Num: 0)->getOperand(Num: 1)))
2948 return SDValue();
2949
2950 // Check all use of this TRUNCATE.
2951 for (const SDNode *User : N->users()) {
2952 // Make sure that we're not going to replace TRUNCATE for non i32
2953 // instructions.
2954 //
2955 // FIXME: Although we could sometimes handle this, and it does occur in
2956 // practice that one of the condition inputs to the select is also one of
2957 // the outputs, we currently can't deal with this.
2958 if (isI32Insn(User, N))
2959 continue;
2960
2961 return SDValue();
2962 }
2963
2964 SDValue SubI32 = DAG.getTargetConstant(Val: VE::sub_i32, DL, VT: MVT::i32);
2965 return SDValue(DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT,
2966 Op1: N->getOperand(Num: 0), Op2: SubI32),
2967 0);
2968}
2969
2970SDValue VETargetLowering::PerformDAGCombine(SDNode *N,
2971 DAGCombinerInfo &DCI) const {
2972 switch (N->getOpcode()) {
2973 default:
2974 break;
2975 case ISD::SELECT:
2976 return combineSelect(N, DCI);
2977 case ISD::SELECT_CC:
2978 return combineSelectCC(N, DCI);
2979 case ISD::TRUNCATE:
2980 return combineTRUNCATE(N, DCI);
2981 }
2982
2983 return SDValue();
2984}
2985
2986//===----------------------------------------------------------------------===//
2987// VE Inline Assembly Support
2988//===----------------------------------------------------------------------===//
2989
2990VETargetLowering::ConstraintType
2991VETargetLowering::getConstraintType(StringRef Constraint) const {
2992 if (Constraint.size() == 1) {
2993 switch (Constraint[0]) {
2994 default:
2995 break;
2996 case 'v': // vector registers
2997 return C_RegisterClass;
2998 }
2999 }
3000 return TargetLowering::getConstraintType(Constraint);
3001}
3002
3003std::pair<unsigned, const TargetRegisterClass *>
3004VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
3005 StringRef Constraint,
3006 MVT VT) const {
3007 const TargetRegisterClass *RC = nullptr;
3008 if (Constraint.size() == 1) {
3009 switch (Constraint[0]) {
3010 default:
3011 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3012 case 'r':
3013 RC = &VE::I64RegClass;
3014 break;
3015 case 'v':
3016 RC = &VE::V64RegClass;
3017 break;
3018 }
3019 return std::make_pair(x: 0U, y&: RC);
3020 }
3021
3022 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3023}
3024
3025//===----------------------------------------------------------------------===//
3026// VE Target Optimization Support
3027//===----------------------------------------------------------------------===//
3028
3029unsigned VETargetLowering::getMinimumJumpTableEntries() const {
3030 // Specify 8 for PIC model to relieve the impact of PIC load instructions.
3031 if (isJumpTableRelative())
3032 return 8;
3033
3034 return TargetLowering::getMinimumJumpTableEntries();
3035}
3036
3037bool VETargetLowering::hasAndNot(SDValue Y) const {
3038 EVT VT = Y.getValueType();
3039
3040 // VE doesn't have vector and not instruction.
3041 if (VT.isVector())
3042 return false;
3043
3044 // VE allows different immediate values for X and Y where ~X & Y.
3045 // Only simm7 works for X, and only mimm works for Y on VE. However, this
3046 // function is used to check whether an immediate value is OK for and-not
3047 // instruction as both X and Y. Generating additional instruction to
3048 // retrieve an immediate value is no good since the purpose of this
3049 // function is to convert a series of 3 instructions to another series of
3050 // 3 instructions with better parallelism. Therefore, we return false
3051 // for all immediate values now.
3052 // FIXME: Change hasAndNot function to have two operands to make it work
3053 // correctly with Aurora VE.
3054 if (isa<ConstantSDNode>(Val: Y))
3055 return false;
3056
3057 // It's ok for generic registers.
3058 return true;
3059}
3060
3061SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3062 SelectionDAG &DAG) const {
3063 assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
3064 MVT VT = Op.getOperand(i: 0).getSimpleValueType();
3065
3066 // Special treatment for packed V64 types.
3067 assert(VT == MVT::v512i32 || VT == MVT::v512f32);
3068 (void)VT;
3069 // Example of codes:
3070 // %packed_v = extractelt %vr, %idx / 2
3071 // %v = %packed_v >> (%idx % 2 * 32)
3072 // %res = %v & 0xffffffff
3073
3074 SDValue Vec = Op.getOperand(i: 0);
3075 SDValue Idx = Op.getOperand(i: 1);
3076 SDLoc DL(Op);
3077 SDValue Result = Op;
3078 if (false /* Idx->isConstant() */) {
3079 // TODO: optimized implementation using constant values
3080 } else {
3081 SDValue Const1 = DAG.getConstant(Val: 1, DL, VT: MVT::i64);
3082 SDValue HalfIdx = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {Idx, Const1});
3083 SDValue PackedElt =
3084 SDValue(DAG.getMachineNode(Opcode: VE::LVSvr, dl: DL, VT: MVT::i64, Ops: {Vec, HalfIdx}), 0);
3085 SDValue AndIdx = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {Idx, Const1});
3086 SDValue Shift = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i64, Ops: {AndIdx, Const1});
3087 SDValue Const5 = DAG.getConstant(Val: 5, DL, VT: MVT::i64);
3088 Shift = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, Ops: {Shift, Const5});
3089 PackedElt = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {PackedElt, Shift});
3090 SDValue Mask = DAG.getConstant(Val: 0xFFFFFFFFL, DL, VT: MVT::i64);
3091 PackedElt = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {PackedElt, Mask});
3092 SDValue SubI32 = DAG.getTargetConstant(Val: VE::sub_i32, DL, VT: MVT::i32);
3093 Result = SDValue(DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL,
3094 VT: MVT::i32, Op1: PackedElt, Op2: SubI32),
3095 0);
3096
3097 if (Op.getSimpleValueType() == MVT::f32) {
3098 Result = DAG.getBitcast(VT: MVT::f32, V: Result);
3099 } else {
3100 assert(Op.getSimpleValueType() == MVT::i32);
3101 }
3102 }
3103 return Result;
3104}
3105
3106SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3107 SelectionDAG &DAG) const {
3108 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
3109 MVT VT = Op.getOperand(i: 0).getSimpleValueType();
3110
3111 // Special treatment for packed V64 types.
3112 assert(VT == MVT::v512i32 || VT == MVT::v512f32);
3113 (void)VT;
3114 // The v512i32 and v512f32 starts from upper bits (0..31). This "upper
3115 // bits" required `val << 32` from C implementation's point of view.
3116 //
3117 // Example of codes:
3118 // %packed_elt = extractelt %vr, (%idx >> 1)
3119 // %shift = ((%idx & 1) ^ 1) << 5
3120 // %packed_elt &= 0xffffffff00000000 >> shift
3121 // %packed_elt |= (zext %val) << shift
3122 // %vr = insertelt %vr, %packed_elt, (%idx >> 1)
3123
3124 SDLoc DL(Op);
3125 SDValue Vec = Op.getOperand(i: 0);
3126 SDValue Val = Op.getOperand(i: 1);
3127 SDValue Idx = Op.getOperand(i: 2);
3128 if (Idx.getSimpleValueType() == MVT::i32)
3129 Idx = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: Idx);
3130 if (Val.getSimpleValueType() == MVT::f32)
3131 Val = DAG.getBitcast(VT: MVT::i32, V: Val);
3132 assert(Val.getSimpleValueType() == MVT::i32);
3133 Val = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: Val);
3134
3135 SDValue Result = Op;
3136 if (false /* Idx->isConstant()*/) {
3137 // TODO: optimized implementation using constant values
3138 } else {
3139 SDValue Const1 = DAG.getConstant(Val: 1, DL, VT: MVT::i64);
3140 SDValue HalfIdx = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {Idx, Const1});
3141 SDValue PackedElt =
3142 SDValue(DAG.getMachineNode(Opcode: VE::LVSvr, dl: DL, VT: MVT::i64, Ops: {Vec, HalfIdx}), 0);
3143 SDValue AndIdx = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {Idx, Const1});
3144 SDValue Shift = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i64, Ops: {AndIdx, Const1});
3145 SDValue Const5 = DAG.getConstant(Val: 5, DL, VT: MVT::i64);
3146 Shift = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, Ops: {Shift, Const5});
3147 SDValue Mask = DAG.getConstant(Val: 0xFFFFFFFF00000000L, DL, VT: MVT::i64);
3148 Mask = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {Mask, Shift});
3149 PackedElt = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {PackedElt, Mask});
3150 Val = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, Ops: {Val, Shift});
3151 PackedElt = DAG.getNode(Opcode: ISD::OR, DL, VT: MVT::i64, Ops: {PackedElt, Val});
3152 Result =
3153 SDValue(DAG.getMachineNode(Opcode: VE::LSVrr_v, dl: DL, VT: Vec.getSimpleValueType(),
3154 Ops: {HalfIdx, PackedElt, Vec}),
3155 0);
3156 }
3157 return Result;
3158}
3159