VEISelLowering.cpp source code [llvm_projects/llvm/lib/Target/VE/VEISelLowering.cpp]

1	//===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the interfaces that VE uses to lower LLVM code into a
10	// selection DAG.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "VEISelLowering.h"
15	#include "MCTargetDesc/VEMCAsmInfo.h"
16	#include "VECustomDAG.h"
17	#include "VEInstrBuilder.h"
18	#include "VEMachineFunctionInfo.h"
19	#include "VERegisterInfo.h"
20	#include "VESelectionDAGInfo.h"
21	#include "VETargetMachine.h"
22	#include "llvm/ADT/StringSwitch.h"
23	#include "llvm/CodeGen/CallingConvLower.h"
24	#include "llvm/CodeGen/MachineFrameInfo.h"
25	#include "llvm/CodeGen/MachineFunction.h"
26	#include "llvm/CodeGen/MachineInstrBuilder.h"
27	#include "llvm/CodeGen/MachineJumpTableInfo.h"
28	#include "llvm/CodeGen/MachineModuleInfo.h"
29	#include "llvm/CodeGen/MachineRegisterInfo.h"
30	#include "llvm/CodeGen/SelectionDAG.h"
31	#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
32	#include "llvm/IR/DerivedTypes.h"
33	#include "llvm/IR/Function.h"
34	#include "llvm/IR/IRBuilder.h"
35	#include "llvm/IR/Module.h"
36	#include "llvm/Support/ErrorHandling.h"
37	using namespace llvm;
38
39	#define DEBUG_TYPE "ve-lower"
40
41	//===----------------------------------------------------------------------===//
42	// Calling Convention Implementation
43	//===----------------------------------------------------------------------===//
44
45	#include "VEGenCallingConv.inc"
46
47	CCAssignFn *getReturnCC(CallingConv::ID CallConv) {
48	switch (CallConv) {
49	default:
50	return RetCC_VE_C;
51	case CallingConv::Fast:
52	return RetCC_VE_Fast;
53	}
54	}
55
56	CCAssignFn getParamCC(CallingConv::ID CallConv, bool* IsVarArg) {
57	if (IsVarArg)
58	return CC_VE2;
59	switch (CallConv) {
60	default:
61	return CC_VE_C;
62	case CallingConv::Fast:
63	return CC_VE_Fast;
64	}
65	}
66
67	bool VETargetLowering::CanLowerReturn(
68	CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
69	const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
70	const Type RetTy) const* {
71	CCAssignFn *RetCC = getReturnCC(CallConv);
72	SmallVector<CCValAssign, `16`> RVLocs;
73	CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
74	return CCInfo.CheckReturn(Outs, Fn: RetCC);
75	}
76
77	static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
78	MVT::v256f32, MVT::v512f32, MVT::v256f64};
79
80	static const MVT AllMaskVTs[] = {MVT::v256i1, MVT::v512i1};
81
82	static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
83
84	void VETargetLowering::initRegisterClasses() {
85	// Set up the register classes.
86	addRegisterClass(VT: MVT::i32, RC: &VE::I32RegClass);
87	addRegisterClass(VT: MVT::i64, RC: &VE::I64RegClass);
88	addRegisterClass(VT: MVT::f32, RC: &VE::F32RegClass);
89	addRegisterClass(VT: MVT::f64, RC: &VE::I64RegClass);
90	addRegisterClass(VT: MVT::f128, RC: &VE::F128RegClass);
91
92	if (Subtarget->enableVPU()) {
93	for (MVT VecVT : AllVectorVTs)
94	addRegisterClass(VT: VecVT, RC: &VE::V64RegClass);
95	addRegisterClass(VT: MVT::v256i1, RC: &VE::VMRegClass);
96	addRegisterClass(VT: MVT::v512i1, RC: &VE::VM512RegClass);
97	}
98	}
99
100	void VETargetLowering::initSPUActions() {
101	const auto &TM = getTargetMachine();
102	/// Load & Store {
103
104	// VE doesn't have i1 sign extending load.
105	for (MVT VT : MVT::integer_valuetypes()) {
106	setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
107	setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
108	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
109	setTruncStoreAction(ValVT: VT, MemVT: MVT::i1, Action: Expand);
110	}
111
112	// VE doesn't have floating point extload/truncstore, so expand them.
113	for (MVT FPVT : MVT::fp_valuetypes()) {
114	for (MVT OtherFPVT : MVT::fp_valuetypes()) {
115	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: FPVT, MemVT: OtherFPVT, Action: Expand);
116	setTruncStoreAction(ValVT: FPVT, MemVT: OtherFPVT, Action: Expand);
117	}
118	}
119
120	// VE doesn't have fp128 load/store, so expand them in custom lower.
121	setOperationAction(Op: ISD::LOAD, VT: MVT::f128, Action: Custom);
122	setOperationAction(Op: ISD::STORE, VT: MVT::f128, Action: Custom);
123
124	/// } Load & Store
125
126	// Custom legalize address nodes into LO/HI parts.
127	MVT PtrVT = MVT::getIntegerVT(BitWidth: TM.getPointerSizeInBits(AS: `0`));
128	setOperationAction(Op: ISD::BlockAddress, VT: PtrVT, Action: Custom);
129	setOperationAction(Op: ISD::GlobalAddress, VT: PtrVT, Action: Custom);
130	setOperationAction(Op: ISD::GlobalTLSAddress, VT: PtrVT, Action: Custom);
131	setOperationAction(Op: ISD::ConstantPool, VT: PtrVT, Action: Custom);
132	setOperationAction(Op: ISD::JumpTable, VT: PtrVT, Action: Custom);
133
134	/// VAARG handling {
135	setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom);
136	// VAARG needs to be lowered to access with 8 bytes alignment.
137	setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Custom);
138	// Use the default implementation.
139	setOperationAction(Op: ISD::VACOPY, VT: MVT::Other, Action: Expand);
140	setOperationAction(Op: ISD::VAEND, VT: MVT::Other, Action: Expand);
141	/// } VAARG handling
142
143	/// Stack {
144	setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i32, Action: Custom);
145	setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i64, Action: Custom);
146
147	// Use the default implementation.
148	setOperationAction(Op: ISD::STACKSAVE, VT: MVT::Other, Action: Expand);
149	setOperationAction(Op: ISD::STACKRESTORE, VT: MVT::Other, Action: Expand);
150	/// } Stack
151
152	/// Branch {
153
154	// VE doesn't have BRCOND
155	setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Expand);
156
157	// BR_JT is not implemented yet.
158	setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
159
160	/// } Branch
161
162	/// Int Ops {
163	for (MVT IntVT : {MVT::i32, MVT::i64}) {
164	// VE has no REM or DIVREM operations.
165	setOperationAction(Op: ISD::UREM, VT: IntVT, Action: Expand);
166	setOperationAction(Op: ISD::SREM, VT: IntVT, Action: Expand);
167	setOperationAction(Op: ISD::SDIVREM, VT: IntVT, Action: Expand);
168	setOperationAction(Op: ISD::UDIVREM, VT: IntVT, Action: Expand);
169
170	// VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
171	setOperationAction(Op: ISD::SHL_PARTS, VT: IntVT, Action: Expand);
172	setOperationAction(Op: ISD::SRA_PARTS, VT: IntVT, Action: Expand);
173	setOperationAction(Op: ISD::SRL_PARTS, VT: IntVT, Action: Expand);
174
175	// VE has no MULHU/S or U/SMUL_LOHI operations.
176	// TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
177	setOperationAction(Op: ISD::MULHU, VT: IntVT, Action: Expand);
178	setOperationAction(Op: ISD::MULHS, VT: IntVT, Action: Expand);
179	setOperationAction(Op: ISD::UMUL_LOHI, VT: IntVT, Action: Expand);
180	setOperationAction(Op: ISD::SMUL_LOHI, VT: IntVT, Action: Expand);
181
182	// VE has no CTTZ, ROTL, ROTR operations.
183	setOperationAction(Op: ISD::CTTZ, VT: IntVT, Action: Expand);
184	setOperationAction(Op: ISD::ROTL, VT: IntVT, Action: Expand);
185	setOperationAction(Op: ISD::ROTR, VT: IntVT, Action: Expand);
186
187	// VE has 64 bits instruction which works as i64 BSWAP operation. This
188	// instruction works fine as i32 BSWAP operation with an additional
189	// parameter. Use isel patterns to lower BSWAP.
190	setOperationAction(Op: ISD::BSWAP, VT: IntVT, Action: Legal);
191
192	// VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
193	// operations. Use isel patterns for i64, promote for i32.
194	LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
195	setOperationAction(Op: ISD::BITREVERSE, VT: IntVT, Action: Act);
196	setOperationAction(Op: ISD::CTLZ, VT: IntVT, Action: Act);
197	setOperationAction(Op: ISD::CTLZ_ZERO_UNDEF, VT: IntVT, Action: Act);
198	setOperationAction(Op: ISD::CTPOP, VT: IntVT, Action: Act);
199
200	// VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
201	// Use isel patterns for i64, promote for i32.
202	setOperationAction(Op: ISD::AND, VT: IntVT, Action: Act);
203	setOperationAction(Op: ISD::OR, VT: IntVT, Action: Act);
204	setOperationAction(Op: ISD::XOR, VT: IntVT, Action: Act);
205
206	// Legal smax and smin
207	setOperationAction(Op: ISD::SMAX, VT: IntVT, Action: Legal);
208	setOperationAction(Op: ISD::SMIN, VT: IntVT, Action: Legal);
209	}
210	/// } Int Ops
211
212	/// Conversion {
213	// VE doesn't have instructions for fp<->uint, so expand them by llvm
214	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Promote); // use i64
215	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i32, Action: Promote); // use i64
216	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i64, Action: Expand);
217	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Expand);
218
219	// fp16 not supported
220	for (MVT FPVT : MVT::fp_valuetypes()) {
221	setOperationAction(Op: ISD::FP16_TO_FP, VT: FPVT, Action: Expand);
222	setOperationAction(Op: ISD::FP_TO_FP16, VT: FPVT, Action: Expand);
223	}
224	/// } Conversion
225
226	/// Floating-point Ops {
227	/// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
228	/// and fcmp.
229
230	// VE doesn't have following floating point operations.
231	for (MVT VT : MVT::fp_valuetypes()) {
232	setOperationAction(Op: ISD::FNEG, VT, Action: Expand);
233	setOperationAction(Op: ISD::FREM, VT, Action: LibCall);
234	}
235
236	// VE doesn't have fdiv of f128.
237	setOperationAction(Op: ISD::FDIV, VT: MVT::f128, Action: Expand);
238
239	for (MVT FPVT : {MVT::f32, MVT::f64}) {
240	// f32 and f64 uses ConstantFP. f128 uses ConstantPool.
241	setOperationAction(Op: ISD::ConstantFP, VT: FPVT, Action: Legal);
242	}
243	/// } Floating-point Ops
244
245	/// Floating-point math functions {
246
247	// VE doesn't have following floating point math functions.
248	for (MVT VT : MVT::fp_valuetypes()) {
249	setOperationAction(Op: ISD::FABS, VT, Action: Expand);
250	setOperationAction(Op: ISD::FCOPYSIGN, VT, Action: Expand);
251	setOperationAction(Op: ISD::FCOS, VT, Action: Expand);
252	setOperationAction(Op: ISD::FMA, VT, Action: Expand);
253	setOperationAction(Op: ISD::FPOW, VT, Action: Expand);
254	setOperationAction(Op: ISD::FSIN, VT, Action: Expand);
255	setOperationAction(Op: ISD::FSQRT, VT, Action: Expand);
256	}
257
258	// VE has single and double FMINNUM and FMAXNUM
259	for (MVT VT : {MVT::f32, MVT::f64}) {
260	setOperationAction(Ops: {ISD::FMAXNUM, ISD::FMINNUM}, VT, Action: Legal);
261	}
262
263	/// } Floating-point math functions
264
265	/// Atomic instructions {
266
267	setMaxAtomicSizeInBitsSupported(`64`);
268	setMinCmpXchgSizeInBits(`32`);
269	setSupportsUnalignedAtomics(false);
270
271	// Use custom inserter for ATOMIC_FENCE.
272	setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom);
273
274	// Other atomic instructions.
275	for (MVT VT : MVT::integer_valuetypes()) {
276	// Support i8/i16 atomic swap.
277	setOperationAction(Op: ISD::ATOMIC_SWAP, VT, Action: Custom);
278
279	// FIXME: Support "atmam" instructions.
280	setOperationAction(Op: ISD::ATOMIC_LOAD_ADD, VT, Action: Expand);
281	setOperationAction(Op: ISD::ATOMIC_LOAD_SUB, VT, Action: Expand);
282	setOperationAction(Op: ISD::ATOMIC_LOAD_AND, VT, Action: Expand);
283	setOperationAction(Op: ISD::ATOMIC_LOAD_OR, VT, Action: Expand);
284
285	// VE doesn't have follwing instructions.
286	setOperationAction(Op: ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Action: Expand);
287	setOperationAction(Op: ISD::ATOMIC_LOAD_CLR, VT, Action: Expand);
288	setOperationAction(Op: ISD::ATOMIC_LOAD_XOR, VT, Action: Expand);
289	setOperationAction(Op: ISD::ATOMIC_LOAD_NAND, VT, Action: Expand);
290	setOperationAction(Op: ISD::ATOMIC_LOAD_MIN, VT, Action: Expand);
291	setOperationAction(Op: ISD::ATOMIC_LOAD_MAX, VT, Action: Expand);
292	setOperationAction(Op: ISD::ATOMIC_LOAD_UMIN, VT, Action: Expand);
293	setOperationAction(Op: ISD::ATOMIC_LOAD_UMAX, VT, Action: Expand);
294	}
295
296	/// } Atomic instructions
297
298	/// SJLJ instructions {
299	setOperationAction(Op: ISD::EH_SJLJ_LONGJMP, VT: MVT::Other, Action: Custom);
300	setOperationAction(Op: ISD::EH_SJLJ_SETJMP, VT: MVT::i32, Action: Custom);
301	setOperationAction(Op: ISD::EH_SJLJ_SETUP_DISPATCH, VT: MVT::Other, Action: Custom);
302	/// } SJLJ instructions
303
304	// Intrinsic instructions
305	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
306	}
307
308	void VETargetLowering::initVPUActions() {
309	for (MVT LegalMaskVT : AllMaskVTs)
310	setOperationAction(Op: ISD::BUILD_VECTOR, VT: LegalMaskVT, Action: Custom);
311
312	for (unsigned Opc : {ISD::AND, ISD::OR, ISD::XOR})
313	setOperationAction(Op: Opc, VT: MVT::v512i1, Action: Custom);
314
315	for (MVT LegalVecVT : AllVectorVTs) {
316	setOperationAction(Op: ISD::BUILD_VECTOR, VT: LegalVecVT, Action: Custom);
317	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: LegalVecVT, Action: Legal);
318	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: LegalVecVT, Action: Legal);
319	// Translate all vector instructions with legal element types to VVP_*
320	// nodes.
321	// TODO We will custom-widen into VVP_ nodes in the future. While we are*
322	// buildling the infrastructure for this, we only do this for legal vector
323	// VTs.
324	#define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME) \
325	setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
326	#define ADD_VVP_OP(VVP_NAME, ISD_NAME) \
327	setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
328	setOperationAction(Op: ISD::EXPERIMENTAL_VP_STRIDED_LOAD, VT: LegalVecVT, Action: Custom);
329	setOperationAction(Op: ISD::EXPERIMENTAL_VP_STRIDED_STORE, VT: LegalVecVT, Action: Custom);
330	#include "VVPNodes.def"
331	}
332
333	for (MVT LegalPackedVT : AllPackedVTs) {
334	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: LegalPackedVT, Action: Custom);
335	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: LegalPackedVT, Action: Custom);
336	}
337
338	// vNt32, vNt64 ops (legal element types)
339	for (MVT VT : MVT::vector_valuetypes()) {
340	MVT ElemVT = VT.getVectorElementType();
341	unsigned ElemBits = ElemVT.getScalarSizeInBits();
342	if (ElemBits != `32` && ElemBits != `64`)
343	continue;
344
345	for (unsigned MemOpc : {ISD::MLOAD, ISD::MSTORE, ISD::LOAD, ISD::STORE})
346	setOperationAction(Op: MemOpc, VT, Action: Custom);
347
348	const ISD::NodeType IntReductionOCs[] = {
349	ISD::VECREDUCE_ADD, ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND,
350	ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMIN,
351	ISD::VECREDUCE_SMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_UMAX};
352
353	for (unsigned IntRedOpc : IntReductionOCs)
354	setOperationAction(Op: IntRedOpc, VT, Action: Custom);
355	}
356
357	// v256i1 and v512i1 ops
358	for (MVT MaskVT : AllMaskVTs) {
359	// Custom lower mask ops
360	setOperationAction(Op: ISD::STORE, VT: MaskVT, Action: Custom);
361	setOperationAction(Op: ISD::LOAD, VT: MaskVT, Action: Custom);
362	}
363	}
364
365	SDValue
366	VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
367	bool IsVarArg,
368	const SmallVectorImpl<ISD::OutputArg> &Outs,
369	const SmallVectorImpl<SDValue> &OutVals,
370	const SDLoc &DL, SelectionDAG &DAG) const {
371	// CCValAssign - represent the assignment of the return value to locations.
372	SmallVector<CCValAssign, `16`> RVLocs;
373
374	// CCState - Info about the registers and stack slot.
375	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
376	*DAG.getContext());
377
378	// Analyze return values.
379	CCInfo.AnalyzeReturn(Outs, Fn: getReturnCC(CallConv));
380
381	SDValue Glue;
382	SmallVector<SDValue, `4`> RetOps(`1`, Chain);
383
384	// Copy the result values into the output registers.
385	for (unsigned i = `0`; i != RVLocs.size(); ++i) {
386	CCValAssign &VA = RVLocs [i];
387	assert(VA.isRegLoc() && "Can only return in registers!");
388	assert(!VA.needsCustom() && "Unexpected custom lowering");
389	SDValue OutVal = OutVals [i];
390
391	// Integer return values must be sign or zero extended by the callee.
392	switch (VA.getLocInfo()) {
393	case CCValAssign::Full:
394	break;
395	case CCValAssign::SExt:
396	OutVal = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: VA.getLocVT(), Operand: OutVal);
397	break;
398	case CCValAssign::ZExt:
399	OutVal = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: VA.getLocVT(), Operand: OutVal);
400	break;
401	case CCValAssign::AExt:
402	OutVal = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: VA.getLocVT(), Operand: OutVal);
403	break;
404	case CCValAssign::BCvt: {
405	// Convert a float return value to i64 with padding.
406	// 63 31 0
407	// +------+------+
408	// \| float\| 0 \|
409	// +------+------+
410	assert(VA.getLocVT() == MVT::i64);
411	assert(VA.getValVT() == MVT::f32);
412	SDValue Undef = SDValue (
413	DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::i64), `0`);
414	SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
415	OutVal = SDValue (DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL,
416	VT: MVT::i64, Op1: Undef, Op2: OutVal, Op3: Sub_f32),
417	`0`);
418	break;
419	}
420	default:
421	llvm_unreachable("Unknown loc info!");
422	}
423
424	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: OutVal, Glue);
425
426	// Guarantee that all emitted copies are stuck together with flags.
427	Glue = Chain.getValue(R: `1`);
428	RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
429	}
430
431	RetOps [`0`] = Chain; // Update chain.
432
433	// Add the glue if we have it.
434	if (Glue.getNode())
435	RetOps.push_back(Elt: Glue);
436
437	return DAG.getNode(Opcode: VEISD::RET_GLUE, DL, VT: MVT::Other, Ops: RetOps);
438	}
439
440	SDValue VETargetLowering::LowerFormalArguments(
441	SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
442	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
443	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
444	MachineFunction &MF = DAG.getMachineFunction();
445
446	// Get the base offset of the incoming arguments stack space.
447	unsigned ArgsBaseOffset = Subtarget->getRsaSize();
448	// Get the size of the preserved arguments area
449	unsigned ArgsPreserved = `64`;
450
451	// Analyze arguments according to CC_VE.
452	SmallVector<CCValAssign, `16`> ArgLocs;
453	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
454	*DAG.getContext());
455	// Allocate the preserved area first.
456	CCInfo.AllocateStack(Size: ArgsPreserved, Alignment: Align (`8`));
457	// We already allocated the preserved area, so the stack offset computed
458	// by CC_VE would be correct now.
459	CCInfo.AnalyzeFormalArguments(Ins, Fn: getParamCC(CallConv, IsVarArg: false));
460
461	for (const CCValAssign &VA : ArgLocs) {
462	assert(!VA.needsCustom() && "Unexpected custom lowering");
463	if (VA.isRegLoc()) {
464	// This argument is passed in a register.
465	// All integer register arguments are promoted by the caller to i64.
466
467	// Create a virtual register for the promoted live-in value.
468	Register VReg =
469	MF.addLiveIn(PReg: VA.getLocReg(), RC: getRegClassFor(VT: VA.getLocVT()));
470	SDValue Arg = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: VA.getLocVT());
471
472	// The caller promoted the argument, so insert an Assert?ext SDNode so we
473	// won't promote the value again in this function.
474	switch (VA.getLocInfo()) {
475	case CCValAssign::SExt:
476	Arg = DAG.getNode(Opcode: ISD::AssertSext, DL, VT: VA.getLocVT(), N1: Arg,
477	N2: DAG.getValueType(VA.getValVT()));
478	break;
479	case CCValAssign::ZExt:
480	Arg = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: VA.getLocVT(), N1: Arg,
481	N2: DAG.getValueType(VA.getValVT()));
482	break;
483	case CCValAssign::BCvt: {
484	// Extract a float argument from i64 with padding.
485	// 63 31 0
486	// +------+------+
487	// \| float\| 0 \|
488	// +------+------+
489	assert(VA.getLocVT() == MVT::i64);
490	assert(VA.getValVT() == MVT::f32);
491	SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
492	Arg = SDValue (DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL,
493	VT: MVT::f32, Op1: Arg, Op2: Sub_f32),
494	`0`);
495	break;
496	}
497	default:
498	break;
499	}
500
501	// Truncate the register down to the argument type.
502	if (VA.isExtInLoc())
503	Arg = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VA.getValVT(), Operand: Arg);
504
505	InVals.push_back(Elt: Arg);
506	continue;
507	}
508
509	// The registers are exhausted. This argument was passed on the stack.
510	assert(VA.isMemLoc());
511	// The CC_VE_Full/Half functions compute stack offsets relative to the
512	// beginning of the arguments area at %fp + the size of reserved area.
513	unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
514	unsigned ValSize = VA.getValVT().getSizeInBits() / `8`;
515
516	// Adjust offset for a float argument by adding 4 since the argument is
517	// stored in 8 bytes buffer with offset like below. LLVM generates
518	// 4 bytes load instruction, so need to adjust offset here. This
519	// adjustment is required in only LowerFormalArguments. In LowerCall,
520	// a float argument is converted to i64 first, and stored as 8 bytes
521	// data, which is required by ABI, so no need for adjustment.
522	// 0 4
523	// +------+------+
524	// \| empty\| float\|
525	// +------+------+
526	if (VA.getValVT() == MVT::f32)
527	Offset += `4`;
528
529	int FI = MF.getFrameInfo().CreateFixedObject(Size: ValSize, SPOffset: Offset, IsImmutable: true);
530	InVals.push_back(
531	Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain,
532	Ptr: DAG.getFrameIndex(FI, VT: getPointerTy(DL: MF.getDataLayout())),
533	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
534	}
535
536	if (!IsVarArg)
537	return Chain;
538
539	// This function takes variable arguments, some of which may have been passed
540	// in registers %s0-%s8.
541	//
542	// The va_start intrinsic needs to know the offset to the first variable
543	// argument.
544	// TODO: need to calculate offset correctly once we support f128.
545	unsigned ArgOffset = ArgLocs.size() * `8`;
546	VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
547	// Skip the reserved area at the top of stack.
548	FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
549
550	return Chain;
551	}
552
553	// FIXME? Maybe this could be a TableGen attribute on some registers and
554	// this table could be generated automatically from RegInfo.
555	Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
556	const MachineFunction &MF) const {
557	Register Reg = StringSwitch<Register>(RegName)
558	.Case(S: "sp", Value: VE::SX11) // Stack pointer
559	.Case(S: "fp", Value: VE::SX9) // Frame pointer
560	.Case(S: "sl", Value: VE::SX8) // Stack limit
561	.Case(S: "lr", Value: VE::SX10) // Link register
562	.Case(S: "tp", Value: VE::SX14) // Thread pointer
563	.Case(S: "outer", Value: VE::SX12) // Outer regiser
564	.Case(S: "info", Value: VE::SX17) // Info area register
565	.Case(S: "got", Value: VE::SX15) // Global offset table register
566	.Case(S: "plt", Value: VE::SX16) // Procedure linkage table register
567	.Default(Value: Register ());
568	return Reg;
569	}
570
571	//===----------------------------------------------------------------------===//
572	// TargetLowering Implementation
573	//===----------------------------------------------------------------------===//
574
575	SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
576	SmallVectorImpl<SDValue> &InVals) const {
577	SelectionDAG &DAG = CLI.DAG;
578	SDLoc DL = CLI.DL;
579	SDValue Chain = CLI.Chain;
580	auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
581
582	// VE target does not yet support tail call optimization.
583	CLI.IsTailCall = false;
584
585	// Get the base offset of the outgoing arguments stack space.
586	unsigned ArgsBaseOffset = Subtarget->getRsaSize();
587	// Get the size of the preserved arguments area
588	unsigned ArgsPreserved = `8` * `8u`;
589
590	// Analyze operands of the call, assigning locations to each operand.
591	SmallVector<CCValAssign, `16`> ArgLocs;
592	CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
593	*DAG.getContext());
594	// Allocate the preserved area first.
595	CCInfo.AllocateStack(Size: ArgsPreserved, Alignment: Align (`8`));
596	// We already allocated the preserved area, so the stack offset computed
597	// by CC_VE would be correct now.
598	CCInfo.AnalyzeCallOperands(Outs: CLI.Outs, Fn: getParamCC(CallConv: CLI.CallConv, IsVarArg: false));
599
600	// VE requires to use both register and stack for varargs or no-prototyped
601	// functions.
602	bool UseBoth = CLI.IsVarArg;
603
604	// Analyze operands again if it is required to store BOTH.
605	SmallVector<CCValAssign, `16`> ArgLocs2;
606	CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
607	ArgLocs2, *DAG.getContext());
608	if (UseBoth)
609	CCInfo2.AnalyzeCallOperands(Outs: CLI.Outs, Fn: getParamCC(CallConv: CLI.CallConv, IsVarArg: true));
610
611	// Get the size of the outgoing arguments stack space requirement.
612	unsigned ArgsSize = CCInfo.getStackSize();
613
614	// Keep stack frames 16-byte aligned.
615	ArgsSize = alignTo(Value: ArgsSize, Align: `16`);
616
617	// Adjust the stack pointer to make room for the arguments.
618	// FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
619	// with more than 6 arguments.
620	Chain = DAG.getCALLSEQ_START(Chain, InSize: ArgsSize, OutSize: `0`, DL);
621
622	// Collect the set of registers to pass to the function and their values.
623	// This will be emitted as a sequence of CopyToReg nodes glued to the call
624	// instruction.
625	SmallVector<std::pair<unsigned, SDValue>, `8`> RegsToPass;
626
627	// Collect chains from all the memory opeations that copy arguments to the
628	// stack. They must follow the stack pointer adjustment above and precede the
629	// call instruction itself.
630	SmallVector<SDValue, `8`> MemOpChains;
631
632	// VE needs to get address of callee function in a register
633	// So, prepare to copy it to SX12 here.
634
635	// If the callee is a GlobalAddress node (quite common, every direct call is)
636	// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
637	// Likewise ExternalSymbol -> TargetExternalSymbol.
638	SDValue Callee = CLI.Callee;
639
640	bool IsPICCall = isPositionIndependent();
641
642	// PC-relative references to external symbols should go through $stub.
643	// If so, we need to prepare GlobalBaseReg first.
644	const TargetMachine &TM = DAG.getTarget();
645	const GlobalValue GV = nullptr*;
646	auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Val&: Callee);
647	if (CalleeG)
648	GV = CalleeG->getGlobal();
649	bool Local = TM.shouldAssumeDSOLocal(GV);
650	bool UsePlt = !Local;
651	MachineFunction &MF = DAG.getMachineFunction();
652
653	// Turn GlobalAddress/ExternalSymbol node into a value node
654	// containing the address of them here.
655	if (CalleeG) {
656	if (IsPICCall) {
657	if (UsePlt)
658	Subtarget->getInstrInfo()->getGlobalBaseReg(MF: &MF);
659	Callee = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: `0`, TargetFlags: `0`);
660	Callee = DAG.getNode(Opcode: VEISD::GETFUNPLT, DL, VT: PtrVT, Operand: Callee);
661	} else {
662	Callee = makeHiLoPair(Op: Callee, HiTF: VE::S_HI32, LoTF: VE::S_LO32, DAG);
663	}
664	} else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
665	if (IsPICCall) {
666	if (UsePlt)
667	Subtarget->getInstrInfo()->getGlobalBaseReg(MF: &MF);
668	Callee = DAG.getTargetExternalSymbol(Sym: E->getSymbol(), VT: PtrVT, TargetFlags: `0`);
669	Callee = DAG.getNode(Opcode: VEISD::GETFUNPLT, DL, VT: PtrVT, Operand: Callee);
670	} else {
671	Callee = makeHiLoPair(Op: Callee, HiTF: VE::S_HI32, LoTF: VE::S_LO32, DAG);
672	}
673	}
674
675	RegsToPass.push_back(Elt: std::make_pair(x: VE::SX12, y&: Callee));
676
677	for (unsigned i = `0`, e = ArgLocs.size(); i != e; ++i) {
678	CCValAssign &VA = ArgLocs [i];
679	SDValue Arg = CLI.OutVals [i];
680
681	// Promote the value if needed.
682	switch (VA.getLocInfo()) {
683	default:
684	llvm_unreachable("Unknown location info!");
685	case CCValAssign::Full:
686	break;
687	case CCValAssign::SExt:
688	Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: VA.getLocVT(), Operand: Arg);
689	break;
690	case CCValAssign::ZExt:
691	Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: VA.getLocVT(), Operand: Arg);
692	break;
693	case CCValAssign::AExt:
694	Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: VA.getLocVT(), Operand: Arg);
695	break;
696	case CCValAssign::BCvt: {
697	// Convert a float argument to i64 with padding.
698	// 63 31 0
699	// +------+------+
700	// \| float\| 0 \|
701	// +------+------+
702	assert(VA.getLocVT() == MVT::i64);
703	assert(VA.getValVT() == MVT::f32);
704	SDValue Undef = SDValue (
705	DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::i64), `0`);
706	SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
707	Arg = SDValue (DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL,
708	VT: MVT::i64, Op1: Undef, Op2: Arg, Op3: Sub_f32),
709	`0`);
710	break;
711	}
712	}
713
714	if (VA.isRegLoc()) {
715	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
716	if (!UseBoth)
717	continue;
718	VA = ArgLocs2 [i];
719	}
720
721	assert(VA.isMemLoc());
722
723	// Create a store off the stack pointer for this argument.
724	SDValue StackPtr = DAG.getRegister(Reg: VE::SX11, VT: PtrVT);
725	// The argument area starts at %fp/%sp + the size of reserved area.
726	SDValue PtrOff =
727	DAG.getIntPtrConstant(Val: VA.getLocMemOffset() + ArgsBaseOffset, DL);
728	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: PtrOff);
729	MemOpChains.push_back(
730	Elt: DAG.getStore(Chain, dl: DL, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo ()));
731	}
732
733	// Emit all stores, make sure they occur before the call.
734	if (!MemOpChains.empty())
735	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOpChains);
736
737	// Build a sequence of CopyToReg nodes glued together with token chain and
738	// glue operands which copy the outgoing args into registers. The InGlue is
739	// necessary since all emitted instructions must be stuck together in order
740	// to pass the live physical registers.
741	SDValue InGlue;
742	for (const auto &[Reg, N] : RegsToPass) {
743	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg, N, Glue: InGlue);
744	InGlue = Chain.getValue(R: `1`);
745	}
746
747	// Build the operands for the call instruction itself.
748	SmallVector<SDValue, `8`> Ops;
749	Ops.push_back(Elt: Chain);
750	for (const auto &[Reg, N] : RegsToPass)
751	Ops.push_back(Elt: DAG.getRegister(Reg, VT: N.getValueType()));
752
753	// Add a register mask operand representing the call-preserved registers.
754	const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
755	const uint32_t *Mask =
756	TRI->getCallPreservedMask(MF: DAG.getMachineFunction(), CC: CLI.CallConv);
757	assert(Mask && "Missing call preserved mask for calling convention");
758	Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
759
760	// Make sure the CopyToReg nodes are glued to the call instruction which
761	// consumes the registers.
762	if (InGlue.getNode())
763	Ops.push_back(Elt: InGlue);
764
765	// Now the call itself.
766	SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
767	Chain = DAG.getNode(Opcode: VEISD::CALL, DL, VTList: NodeTys, Ops);
768	InGlue = Chain.getValue(R: `1`);
769
770	// Revert the stack pointer immediately after the call.
771	Chain = DAG.getCALLSEQ_END(Chain, Size1: ArgsSize, Size2: `0`, Glue: InGlue, DL);
772	InGlue = Chain.getValue(R: `1`);
773
774	// Now extract the return values. This is more or less the same as
775	// LowerFormalArguments.
776
777	// Assign locations to each value returned by this call.
778	SmallVector<CCValAssign, `16`> RVLocs;
779	CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
780	*DAG.getContext());
781
782	// Set inreg flag manually for codegen generated library calls that
783	// return float.
784	if (CLI.Ins.size() == `1` && CLI.Ins [`0`].VT == MVT::f32 && !CLI.CB)
785	CLI.Ins [`0`].Flags.setInReg();
786
787	RVInfo.AnalyzeCallResult(Ins: CLI.Ins, Fn: getReturnCC(CallConv: CLI.CallConv));
788
789	// Copy all of the result registers out of their specified physreg.
790	for (unsigned i = `0`; i != RVLocs.size(); ++i) {
791	CCValAssign &VA = RVLocs [i];
792	assert(!VA.needsCustom() && "Unexpected custom lowering");
793	Register Reg = VA.getLocReg();
794
795	// When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
796	// reside in the same register in the high and low bits. Reuse the
797	// CopyFromReg previous node to avoid duplicate copies.
798	SDValue RV;
799	if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Val: Chain.getOperand(i: `1`)))
800	if (SrcReg->getReg() == Reg && Chain ->getOpcode() == ISD::CopyFromReg)
801	RV = Chain.getValue(R: `0`);
802
803	// But usually we'll create a new CopyFromReg for a different register.
804	if (!RV.getNode()) {
805	RV = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: RVLocs [i].getLocVT(), Glue: InGlue);
806	Chain = RV.getValue(R: `1`);
807	InGlue = Chain.getValue(R: `2`);
808	}
809
810	// The callee promoted the return value, so insert an Assert?ext SDNode so
811	// we won't promote the value again in this function.
812	switch (VA.getLocInfo()) {
813	case CCValAssign::SExt:
814	RV = DAG.getNode(Opcode: ISD::AssertSext, DL, VT: VA.getLocVT(), N1: RV,
815	N2: DAG.getValueType(VA.getValVT()));
816	break;
817	case CCValAssign::ZExt:
818	RV = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: VA.getLocVT(), N1: RV,
819	N2: DAG.getValueType(VA.getValVT()));
820	break;
821	case CCValAssign::BCvt: {
822	// Extract a float return value from i64 with padding.
823	// 63 31 0
824	// +------+------+
825	// \| float\| 0 \|
826	// +------+------+
827	assert(VA.getLocVT() == MVT::i64);
828	assert(VA.getValVT() == MVT::f32);
829	SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
830	RV = SDValue (DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL,
831	VT: MVT::f32, Op1: RV, Op2: Sub_f32),
832	`0`);
833	break;
834	}
835	default:
836	break;
837	}
838
839	// Truncate the register down to the return value type.
840	if (VA.isExtInLoc())
841	RV = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VA.getValVT(), Operand: RV);
842
843	InVals.push_back(Elt: RV);
844	}
845
846	return Chain;
847	}
848
849	bool VETargetLowering::isOffsetFoldingLegal(
850	const GlobalAddressSDNode GA) const* {
851	// VE uses 64 bit addressing, so we need multiple instructions to generate
852	// an address. Folding address with offset increases the number of
853	// instructions, so that we disable it here. Offsets will be folded in
854	// the DAG combine later if it worth to do so.
855	return false;
856	}
857
858	/// isFPImmLegal - Returns true if the target can instruction select the
859	/// specified FP immediate natively. If false, the legalizer will
860	/// materialize the FP immediate as a load from a constant pool.
861	bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
862	bool ForCodeSize) const {
863	return VT == MVT::f32 \|\| VT == MVT::f64;
864	}
865
866	/// Determine if the target supports unaligned memory accesses.
867	///
868	/// This function returns true if the target allows unaligned memory accesses
869	/// of the specified type in the given address space. If true, it also returns
870	/// whether the unaligned memory access is "fast" in the last argument by
871	/// reference. This is used, for example, in situations where an array
872	/// copy/move/set is converted to a sequence of store operations. Its use
873	/// helps to ensure that such replacements don't generate code that causes an
874	/// alignment error (trap) on the target machine.
875	bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
876	unsigned AddrSpace,
877	Align A,
878	MachineMemOperand::Flags,
879	unsigned Fast) const* {
880	if (Fast) {
881	// It's fast anytime on VE
882	*Fast = `1`;
883	}
884	return true;
885	}
886
887	VETargetLowering::VETargetLowering(const TargetMachine &TM,
888	const VESubtarget &STI)
889	: TargetLowering (TM, STI), Subtarget(&STI) {
890	// Instructions which use registers as conditionals examine all the
891	// bits (as does the pseudo SELECT_CC expansion). I don't think it
892	// matters much whether it's ZeroOrOneBooleanContent, or
893	// ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
894	// former.
895	setBooleanContents(ZeroOrOneBooleanContent);
896	setBooleanVectorContents(ZeroOrOneBooleanContent);
897
898	initRegisterClasses();
899	initSPUActions();
900	initVPUActions();
901
902	setStackPointerRegisterToSaveRestore(VE::SX11);
903
904	// We have target-specific dag combine patterns for the following nodes:
905	setTargetDAGCombine(ISD::TRUNCATE);
906	setTargetDAGCombine(ISD::SELECT);
907	setTargetDAGCombine(ISD::SELECT_CC);
908
909	// Set function alignment to 16 bytes
910	setMinFunctionAlignment(Align (`16`));
911
912	// VE stores all argument by 8 bytes alignment
913	setMinStackArgumentAlignment(Align (`8`));
914
915	computeRegisterProperties(TRI: Subtarget->getRegisterInfo());
916	}
917
918	EVT VETargetLowering::getSetCCResultType(const DataLayout &,
919	LLVMContext &Context, EVT VT) const {
920	if (VT.isVector())
921	return VT.changeVectorElementType(Context, EltVT: MVT::i1);
922	return MVT::i32;
923	}
924
925	// Convert to a target node and set target flags.
926	SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
927	SelectionDAG &DAG) const {
928	if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op))
929	return DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: SDLoc (GA),
930	VT: GA->getValueType(ResNo: `0`), offset: GA->getOffset(), TargetFlags: TF);
931
932	if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Val&: Op))
933	return DAG.getTargetBlockAddress(BA: BA->getBlockAddress(), VT: Op.getValueType(),
934	Offset: `0`, TargetFlags: TF);
935
936	if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Val&: Op))
937	return DAG.getTargetConstantPool(C: CP->getConstVal(), VT: CP->getValueType(ResNo: `0`),
938	Align: CP->getAlign(), Offset: CP->getOffset(), TargetFlags: TF);
939
940	if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Val&: Op))
941	return DAG.getTargetExternalSymbol(Sym: ES->getSymbol(), VT: ES->getValueType(ResNo: `0`),
942	TargetFlags: TF);
943
944	if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Val&: Op))
945	return DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: JT->getValueType(ResNo: `0`), TargetFlags: TF);
946
947	llvm_unreachable("Unhandled address SDNode");
948	}
949
950	// Split Op into high and low parts according to HiTF and LoTF.
951	// Return an ADD node combining the parts.
952	SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
953	SelectionDAG &DAG) const {
954	SDLoc DL(Op);
955	EVT VT = Op.getValueType();
956	SDValue Hi = DAG.getNode(Opcode: VEISD::Hi, DL, VT, Operand: withTargetFlags(Op, TF: HiTF, DAG));
957	SDValue Lo = DAG.getNode(Opcode: VEISD::Lo, DL, VT, Operand: withTargetFlags(Op, TF: LoTF, DAG));
958	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Hi, N2: Lo);
959	}
960
961	// Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
962	// or ExternalSymbol SDNode.
963	SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
964	SDLoc DL(Op);
965	EVT PtrVT = Op.getValueType();
966
967	// Handle PIC mode first. VE needs a got load for every variable!
968	if (isPositionIndependent()) {
969	auto GlobalN = dyn_cast<GlobalAddressSDNode>(Val&: Op);
970
971	if (isa<ConstantPoolSDNode>(Val: Op) \|\| isa<JumpTableSDNode>(Val: Op) \|\|
972	(GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
973	// Create following instructions for local linkage PIC code.
974	// lea %reg, label@gotoff_lo
975	// and %reg, %reg, (32)0
976	// lea.sl %reg, label@gotoff_hi(%reg, %got)
977	SDValue HiLo =
978	makeHiLoPair(Op, HiTF: VE::S_GOTOFF_HI32, LoTF: VE::S_GOTOFF_LO32, DAG);
979	SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT: PtrVT);
980	return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: GlobalBase, N2: HiLo);
981	}
982	// Create following instructions for not local linkage PIC code.
983	// lea %reg, label@got_lo
984	// and %reg, %reg, (32)0
985	// lea.sl %reg, label@got_hi(%reg)
986	// ld %reg, (%reg, %got)
987	SDValue HiLo = makeHiLoPair(Op, HiTF: VE::S_GOT_HI32, LoTF: VE::S_GOT_LO32, DAG);
988	SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT: PtrVT);
989	SDValue AbsAddr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: GlobalBase, N2: HiLo);
990	return DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: AbsAddr,
991	PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction()));
992	}
993
994	// This is one of the absolute code models.
995	switch (getTargetMachine().getCodeModel()) {
996	default:
997	llvm_unreachable("Unsupported absolute code model");
998	case CodeModel::Small:
999	case CodeModel::Medium:
1000	case CodeModel::Large:
1001	// abs64.
1002	return makeHiLoPair(Op, HiTF: VE::S_HI32, LoTF: VE::S_LO32, DAG);
1003	}
1004	}
1005
1006	/// Custom Lower {
1007
1008	// The mappings for emitLeading/TrailingFence for VE is designed by following
1009	// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
1010	Instruction *VETargetLowering::emitLeadingFence(IRBuilderBase &Builder,
1011	Instruction *Inst,
1012	AtomicOrdering Ord) const {
1013	switch (Ord) {
1014	case AtomicOrdering::NotAtomic:
1015	case AtomicOrdering::Unordered:
1016	llvm_unreachable("Invalid fence: unordered/non-atomic");
1017	case AtomicOrdering::Monotonic:
1018	case AtomicOrdering::Acquire:
1019	return nullptr; // Nothing to do
1020	case AtomicOrdering::Release:
1021	case AtomicOrdering::AcquireRelease:
1022	return Builder.CreateFence(Ordering: AtomicOrdering::Release);
1023	case AtomicOrdering::SequentiallyConsistent:
1024	if (!Inst->hasAtomicStore())
1025	return nullptr; // Nothing to do
1026	return Builder.CreateFence(Ordering: AtomicOrdering::SequentiallyConsistent);
1027	}
1028	llvm_unreachable("Unknown fence ordering in emitLeadingFence");
1029	}
1030
1031	Instruction *VETargetLowering::emitTrailingFence(IRBuilderBase &Builder,
1032	Instruction *Inst,
1033	AtomicOrdering Ord) const {
1034	switch (Ord) {
1035	case AtomicOrdering::NotAtomic:
1036	case AtomicOrdering::Unordered:
1037	llvm_unreachable("Invalid fence: unordered/not-atomic");
1038	case AtomicOrdering::Monotonic:
1039	case AtomicOrdering::Release:
1040	return nullptr; // Nothing to do
1041	case AtomicOrdering::Acquire:
1042	case AtomicOrdering::AcquireRelease:
1043	return Builder.CreateFence(Ordering: AtomicOrdering::Acquire);
1044	case AtomicOrdering::SequentiallyConsistent:
1045	return Builder.CreateFence(Ordering: AtomicOrdering::SequentiallyConsistent);
1046	}
1047	llvm_unreachable("Unknown fence ordering in emitTrailingFence");
1048	}
1049
1050	SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
1051	SelectionDAG &DAG) const {
1052	SDLoc DL(Op);
1053	AtomicOrdering FenceOrdering =
1054	static_cast<AtomicOrdering>(Op.getConstantOperandVal(i: `1`));
1055	SyncScope::ID FenceSSID =
1056	static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: `2`));
1057
1058	// VE uses Release consistency, so need a fence instruction if it is a
1059	// cross-thread fence.
1060	if (FenceSSID == SyncScope::System) {
1061	switch (FenceOrdering) {
1062	case AtomicOrdering::NotAtomic:
1063	case AtomicOrdering::Unordered:
1064	case AtomicOrdering::Monotonic:
1065	// No need to generate fencem instruction here.
1066	break;
1067	case AtomicOrdering::Acquire:
1068	// Generate "fencem 2" as acquire fence.
1069	return SDValue (DAG.getMachineNode(Opcode: VE::FENCEM, dl: DL, VT: MVT::Other,
1070	Op1: DAG.getTargetConstant(Val: `2`, DL, VT: MVT::i32),
1071	Op2: Op.getOperand(i: `0`)),
1072	`0`);
1073	case AtomicOrdering::Release:
1074	// Generate "fencem 1" as release fence.
1075	return SDValue (DAG.getMachineNode(Opcode: VE::FENCEM, dl: DL, VT: MVT::Other,
1076	Op1: DAG.getTargetConstant(Val: `1`, DL, VT: MVT::i32),
1077	Op2: Op.getOperand(i: `0`)),
1078	`0`);
1079	case AtomicOrdering::AcquireRelease:
1080	case AtomicOrdering::SequentiallyConsistent:
1081	// Generate "fencem 3" as acq_rel and seq_cst fence.
1082	// FIXME: "fencem 3" doesn't wait for PCIe deveices accesses,
1083	// so seq_cst may require more instruction for them.
1084	return SDValue (DAG.getMachineNode(Opcode: VE::FENCEM, dl: DL, VT: MVT::Other,
1085	Op1: DAG.getTargetConstant(Val: `3`, DL, VT: MVT::i32),
1086	Op2: Op.getOperand(i: `0`)),
1087	`0`);
1088	}
1089	}
1090
1091	// MEMBARRIER is a compiler barrier; it codegens to a no-op.
1092	return DAG.getNode(Opcode: ISD::MEMBARRIER, DL, VT: MVT::Other, Operand: Op.getOperand(i: `0`));
1093	}
1094
1095	TargetLowering::AtomicExpansionKind
1096	VETargetLowering::shouldExpandAtomicRMWInIR(const AtomicRMWInst AI) const* {
1097	// We have TS1AM implementation for i8/i16/i32/i64, so use it.
1098	if (AI->getOperation() == AtomicRMWInst::Xchg) {
1099	return AtomicExpansionKind::None;
1100	}
1101	// FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
1102
1103	// Otherwise, expand it using compare and exchange instruction to not call
1104	// __sync_fetch_and_ functions.*
1105	return AtomicExpansionKind::CmpXChg;
1106	}
1107
1108	static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
1109	SDValue &Bits) {
1110	SDLoc DL(Op);
1111	AtomicSDNode *N = cast<AtomicSDNode>(Val&: Op);
1112	SDValue Ptr = N->getOperand(Num: `1`);
1113	SDValue Val = N->getOperand(Num: `2`);
1114	EVT PtrVT = Ptr.getValueType();
1115	bool Byte = N->getMemoryVT() == MVT::i8;
1116	// Remainder = AND Ptr, 3
1117	// Flag = 1 << Remainder ; If Byte is true (1 byte swap flag)
1118	// Flag = 3 << Remainder ; If Byte is false (2 bytes swap flag)
1119	// Bits = Remainder << 3
1120	// NewVal = Val << Bits
1121	SDValue Const3 = DAG.getConstant(Val: `3`, DL, VT: PtrVT);
1122	SDValue Remainder = DAG.getNode(Opcode: ISD::AND, DL, VT: PtrVT, Ops: {Ptr, Const3});
1123	SDValue Mask = Byte ? DAG.getConstant(Val: `1`, DL, VT: MVT::i32)
1124	: DAG.getConstant(Val: `3`, DL, VT: MVT::i32);
1125	Flag = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i32, Ops: {Mask, Remainder});
1126	Bits = DAG.getNode(Opcode: ISD::SHL, DL, VT: PtrVT, Ops: {Remainder, Const3});
1127	return DAG.getNode(Opcode: ISD::SHL, DL, VT: Val.getValueType(), Ops: {Val, Bits});
1128	}
1129
1130	static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,
1131	SDValue Bits) {
1132	SDLoc DL(Op);
1133	EVT VT = Data.getValueType();
1134	bool Byte = cast<AtomicSDNode>(Val&: Op)->getMemoryVT() == MVT::i8;
1135	// NewData = Data >> Bits
1136	// Result = NewData & 0xff ; If Byte is true (1 byte)
1137	// Result = NewData & 0xffff ; If Byte is false (2 bytes)
1138
1139	SDValue NewData = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Data, N2: Bits);
1140	return DAG.getNode(Opcode: ISD::AND, DL, VT,
1141	Ops: {NewData, DAG.getConstant(Val: Byte ? `0xff` : `0xffff`, DL, VT)});
1142	}
1143
1144	SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,
1145	SelectionDAG &DAG) const {
1146	SDLoc DL(Op);
1147	AtomicSDNode *N = cast<AtomicSDNode>(Val&: Op);
1148
1149	if (N->getMemoryVT() == MVT::i8) {
1150	// For i8, use "ts1am"
1151	// Input:
1152	// ATOMIC_SWAP Ptr, Val, Order
1153	//
1154	// Output:
1155	// Remainder = AND Ptr, 3
1156	// Flag = 1 << Remainder ; 1 byte swap flag for TS1AM inst.
1157	// Bits = Remainder << 3
1158	// NewVal = Val << Bits
1159	//
1160	// Aligned = AND Ptr, -4
1161	// Data = TS1AM Aligned, Flag, NewVal
1162	//
1163	// NewData = Data >> Bits
1164	// Result = NewData & 0xff ; 1 byte result
1165	SDValue Flag;
1166	SDValue Bits;
1167	SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1168
1169	SDValue Ptr = N->getOperand(Num: `1`);
1170	SDValue Aligned =
1171	DAG.getNode(Opcode: ISD::AND, DL, VT: Ptr.getValueType(),
1172	Ops: {Ptr, DAG.getSignedConstant(Val: -`4`, DL, VT: MVT::i64)});
1173	SDValue TS1AM = DAG.getAtomic(Opcode: VEISD::TS1AM, dl: DL, MemVT: N->getMemoryVT(),
1174	VTList: DAG.getVTList(VT1: Op.getNode()->getValueType(ResNo: `0`),
1175	VT2: Op.getNode()->getValueType(ResNo: `1`)),
1176	Ops: {N->getChain(), Aligned, Flag, NewVal},
1177	MMO: N->getMemOperand());
1178
1179	SDValue Result = finalizeTS1AM(Op, DAG, Data: TS1AM, Bits);
1180	SDValue Chain = TS1AM.getValue(R: `1`);
1181	return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
1182	}
1183	if (N->getMemoryVT() == MVT::i16) {
1184	// For i16, use "ts1am"
1185	SDValue Flag;
1186	SDValue Bits;
1187	SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1188
1189	SDValue Ptr = N->getOperand(Num: `1`);
1190	SDValue Aligned =
1191	DAG.getNode(Opcode: ISD::AND, DL, VT: Ptr.getValueType(),
1192	Ops: {Ptr, DAG.getSignedConstant(Val: -`4`, DL, VT: MVT::i64)});
1193	SDValue TS1AM = DAG.getAtomic(Opcode: VEISD::TS1AM, dl: DL, MemVT: N->getMemoryVT(),
1194	VTList: DAG.getVTList(VT1: Op.getNode()->getValueType(ResNo: `0`),
1195	VT2: Op.getNode()->getValueType(ResNo: `1`)),
1196	Ops: {N->getChain(), Aligned, Flag, NewVal},
1197	MMO: N->getMemOperand());
1198
1199	SDValue Result = finalizeTS1AM(Op, DAG, Data: TS1AM, Bits);
1200	SDValue Chain = TS1AM.getValue(R: `1`);
1201	return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
1202	}
1203	// Otherwise, let llvm legalize it.
1204	return Op;
1205	}
1206
1207	SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
1208	SelectionDAG &DAG) const {
1209	return makeAddress(Op, DAG);
1210	}
1211
1212	SDValue VETargetLowering::lowerBlockAddress(SDValue Op,
1213	SelectionDAG &DAG) const {
1214	return makeAddress(Op, DAG);
1215	}
1216
1217	SDValue VETargetLowering::lowerConstantPool(SDValue Op,
1218	SelectionDAG &DAG) const {
1219	return makeAddress(Op, DAG);
1220	}
1221
1222	SDValue
1223	VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op,
1224	SelectionDAG &DAG) const {
1225	SDLoc DL(Op);
1226
1227	// Generate the following code:
1228	// t1: ch,glue = callseq_start t0, 0, 0
1229	// t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
1230	// t3: ch,glue = callseq_end t2, 0, 0, t2:2
1231	// t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
1232	SDValue Label = withTargetFlags(Op, TF: `0`, DAG);
1233	EVT PtrVT = Op.getValueType();
1234
1235	// Lowering the machine isd will make sure everything is in the right
1236	// location.
1237	SDValue Chain = DAG.getEntryNode();
1238	SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
1239	const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
1240	MF: DAG.getMachineFunction(), CC: CallingConv::C);
1241	Chain = DAG.getCALLSEQ_START(Chain, InSize: `64`, OutSize: `0`, DL);
1242	SDValue Args[] = {Chain, Label, DAG.getRegisterMask(RegMask: Mask), Chain.getValue(R: `1`)};
1243	Chain = DAG.getNode(Opcode: VEISD::GETTLSADDR, DL, VTList: NodeTys, Ops: Args);
1244	Chain = DAG.getCALLSEQ_END(Chain, Size1: `64`, Size2: `0`, Glue: Chain.getValue(R: `1`), DL);
1245	Chain = DAG.getCopyFromReg(Chain, dl: DL, Reg: VE::SX0, VT: PtrVT, Glue: Chain.getValue(R: `1`));
1246
1247	// GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1248	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1249	MFI.setHasCalls(true);
1250
1251	// Also generate code to prepare a GOT register if it is PIC.
1252	if (isPositionIndependent()) {
1253	MachineFunction &MF = DAG.getMachineFunction();
1254	Subtarget->getInstrInfo()->getGlobalBaseReg(MF: &MF);
1255	}
1256
1257	return Chain;
1258	}
1259
1260	SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
1261	SelectionDAG &DAG) const {
1262	// The current implementation of nld (2.26) doesn't allow local exec model
1263	// code described in VE-tls_v1.1.pdf (1) as its input. Instead, we always*
1264	// generate the general dynamic model code sequence.
1265	//
1266	// 1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf*
1267	return lowerToTLSGeneralDynamicModel(Op, DAG);
1268	}
1269
1270	SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1271	return makeAddress(Op, DAG);
1272	}
1273
1274	// Lower a f128 load into two f64 loads.
1275	static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
1276	SDLoc DL(Op);
1277	LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Val: Op.getNode());
1278	assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1279	Align Alignment = LdNode->getAlign();
1280	if (Alignment > `8`)
1281	Alignment = Align (`8`);
1282
1283	SDValue Lo64 =
1284	DAG.getLoad(VT: MVT::f64, dl: DL, Chain: LdNode->getChain(), Ptr: LdNode->getBasePtr(),
1285	PtrInfo: LdNode->getPointerInfo(), Alignment,
1286	MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1287	: MachineMemOperand::MONone);
1288	EVT AddrVT = LdNode->getBasePtr().getValueType();
1289	SDValue HiPtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: LdNode->getBasePtr(),
1290	N2: DAG.getConstant(Val: `8`, DL, VT: AddrVT));
1291	SDValue Hi64 =
1292	DAG.getLoad(VT: MVT::f64, dl: DL, Chain: LdNode->getChain(), Ptr: HiPtr,
1293	PtrInfo: LdNode->getPointerInfo(), Alignment,
1294	MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1295	: MachineMemOperand::MONone);
1296
1297	SDValue SubRegEven = DAG.getTargetConstant(Val: VE::sub_even, DL, VT: MVT::i32);
1298	SDValue SubRegOdd = DAG.getTargetConstant(Val: VE::sub_odd, DL, VT: MVT::i32);
1299
1300	// VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1301	SDNode *InFP128 =
1302	DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::f128);
1303	InFP128 = DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL, VT: MVT::f128,
1304	Op1: SDValue (InFP128, `0`), Op2: Hi64, Op3: SubRegEven);
1305	InFP128 = DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL, VT: MVT::f128,
1306	Op1: SDValue (InFP128, `0`), Op2: Lo64, Op3: SubRegOdd);
1307	SDValue OutChains[`2`] = {SDValue (Lo64.getNode(), `1`),
1308	SDValue (Hi64.getNode(), `1`)};
1309	SDValue OutChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1310	SDValue Ops[`2`] = {SDValue (InFP128, `0`), OutChain};
1311	return DAG.getMergeValues(Ops, dl: DL);
1312	}
1313
1314	// Lower a vXi1 load into following instructions
1315	// LDrii %1, (,%addr)
1316	// LVMxir %vm, 0, %1
1317	// LDrii %2, 8(,%addr)
1318	// LVMxir %vm, 0, %2
1319	// ...
1320	static SDValue lowerLoadI1(SDValue Op, SelectionDAG &DAG) {
1321	SDLoc DL(Op);
1322	LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Val: Op.getNode());
1323	assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1324
1325	SDValue BasePtr = LdNode->getBasePtr();
1326	Align Alignment = LdNode->getAlign();
1327	if (Alignment > `8`)
1328	Alignment = Align (`8`);
1329
1330	EVT AddrVT = BasePtr.getValueType();
1331	EVT MemVT = LdNode->getMemoryVT();
1332	if (MemVT == MVT::v256i1 \|\| MemVT == MVT::v4i64) {
1333	SDValue OutChains[`4`];
1334	SDNode *VM = DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MemVT);
1335	for (int i = `0`; i < `4`; ++i) {
1336	// Generate load dag and prepare chains.
1337	SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1338	N2: DAG.getConstant(Val: `8` * i, DL, VT: AddrVT));
1339	SDValue Val =
1340	DAG.getLoad(VT: MVT::i64, dl: DL, Chain: LdNode->getChain(), Ptr: Addr,
1341	PtrInfo: LdNode->getPointerInfo(), Alignment,
1342	MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1343	: MachineMemOperand::MONone);
1344	OutChains[i] = SDValue (Val.getNode(), `1`);
1345
1346	VM = DAG.getMachineNode(Opcode: VE::LVMir_m, dl: DL, VT: MVT::i64,
1347	Op1: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64), Op2: Val,
1348	Op3: SDValue (VM, `0`));
1349	}
1350	SDValue OutChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1351	SDValue Ops[`2`] = {SDValue (VM, `0`), OutChain};
1352	return DAG.getMergeValues(Ops, dl: DL);
1353	} else if (MemVT == MVT::v512i1 \|\| MemVT == MVT::v8i64) {
1354	SDValue OutChains[`8`];
1355	SDNode *VM = DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MemVT);
1356	for (int i = `0`; i < `8`; ++i) {
1357	// Generate load dag and prepare chains.
1358	SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1359	N2: DAG.getConstant(Val: `8` * i, DL, VT: AddrVT));
1360	SDValue Val =
1361	DAG.getLoad(VT: MVT::i64, dl: DL, Chain: LdNode->getChain(), Ptr: Addr,
1362	PtrInfo: LdNode->getPointerInfo(), Alignment,
1363	MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1364	: MachineMemOperand::MONone);
1365	OutChains[i] = SDValue (Val.getNode(), `1`);
1366
1367	VM = DAG.getMachineNode(Opcode: VE::LVMyir_y, dl: DL, VT: MVT::i64,
1368	Op1: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64), Op2: Val,
1369	Op3: SDValue (VM, `0`));
1370	}
1371	SDValue OutChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1372	SDValue Ops[`2`] = {SDValue (VM, `0`), OutChain};
1373	return DAG.getMergeValues(Ops, dl: DL);
1374	} else {
1375	// Otherwise, ask llvm to expand it.
1376	return SDValue ();
1377	}
1378	}
1379
1380	SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1381	LoadSDNode *LdNode = cast<LoadSDNode>(Val: Op.getNode());
1382	EVT MemVT = LdNode->getMemoryVT();
1383
1384	// If VPU is enabled, always expand non-mask vector loads to VVP
1385	if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(SomeVT: MemVT))
1386	return lowerToVVP(Op, DAG);
1387
1388	SDValue BasePtr = LdNode->getBasePtr();
1389	if (isa<FrameIndexSDNode>(Val: BasePtr.getNode())) {
1390	// Do not expand store instruction with frame index here because of
1391	// dependency problems. We expand it later in eliminateFrameIndex().
1392	return Op;
1393	}
1394
1395	if (MemVT == MVT::f128)
1396	return lowerLoadF128(Op, DAG);
1397	if (isMaskType(SomeVT: MemVT))
1398	return lowerLoadI1(Op, DAG);
1399
1400	return Op;
1401	}
1402
1403	// Lower a f128 store into two f64 stores.
1404	static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
1405	SDLoc DL(Op);
1406	StoreSDNode *StNode = dyn_cast<StoreSDNode>(Val: Op.getNode());
1407	assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1408
1409	SDValue SubRegEven = DAG.getTargetConstant(Val: VE::sub_even, DL, VT: MVT::i32);
1410	SDValue SubRegOdd = DAG.getTargetConstant(Val: VE::sub_odd, DL, VT: MVT::i32);
1411
1412	SDNode *Hi64 = DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT: MVT::i64,
1413	Op1: StNode->getValue(), Op2: SubRegEven);
1414	SDNode *Lo64 = DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT: MVT::i64,
1415	Op1: StNode->getValue(), Op2: SubRegOdd);
1416
1417	Align Alignment = StNode->getAlign();
1418	if (Alignment > `8`)
1419	Alignment = Align (`8`);
1420
1421	// VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1422	SDValue OutChains[`2`];
1423	OutChains[`0`] =
1424	DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue (Lo64, `0`),
1425	Ptr: StNode->getBasePtr(), PtrInfo: MachinePointerInfo (), Alignment,
1426	MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1427	: MachineMemOperand::MONone);
1428	EVT AddrVT = StNode->getBasePtr().getValueType();
1429	SDValue HiPtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: StNode->getBasePtr(),
1430	N2: DAG.getConstant(Val: `8`, DL, VT: AddrVT));
1431	OutChains[`1`] =
1432	DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue (Hi64, `0`), Ptr: HiPtr,
1433	PtrInfo: MachinePointerInfo (), Alignment,
1434	MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1435	: MachineMemOperand::MONone);
1436	return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1437	}
1438
1439	// Lower a vXi1 store into following instructions
1440	// SVMi %1, %vm, 0
1441	// STrii %1, (,%addr)
1442	// SVMi %2, %vm, 1
1443	// STrii %2, 8(,%addr)
1444	// ...
1445	static SDValue lowerStoreI1(SDValue Op, SelectionDAG &DAG) {
1446	SDLoc DL(Op);
1447	StoreSDNode *StNode = dyn_cast<StoreSDNode>(Val: Op.getNode());
1448	assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1449
1450	SDValue BasePtr = StNode->getBasePtr();
1451	Align Alignment = StNode->getAlign();
1452	if (Alignment > `8`)
1453	Alignment = Align (`8`);
1454	EVT AddrVT = BasePtr.getValueType();
1455	EVT MemVT = StNode->getMemoryVT();
1456	if (MemVT == MVT::v256i1 \|\| MemVT == MVT::v4i64) {
1457	SDValue OutChains[`4`];
1458	for (int i = `0`; i < `4`; ++i) {
1459	SDNode *V =
1460	DAG.getMachineNode(Opcode: VE::SVMmi, dl: DL, VT: MVT::i64, Op1: StNode->getValue(),
1461	Op2: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64));
1462	SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1463	N2: DAG.getConstant(Val: `8` * i, DL, VT: AddrVT));
1464	OutChains[i] =
1465	DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue (V, `0`), Ptr: Addr,
1466	PtrInfo: MachinePointerInfo (), Alignment,
1467	MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1468	: MachineMemOperand::MONone);
1469	}
1470	return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1471	} else if (MemVT == MVT::v512i1 \|\| MemVT == MVT::v8i64) {
1472	SDValue OutChains[`8`];
1473	for (int i = `0`; i < `8`; ++i) {
1474	SDNode *V =
1475	DAG.getMachineNode(Opcode: VE::SVMyi, dl: DL, VT: MVT::i64, Op1: StNode->getValue(),
1476	Op2: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64));
1477	SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1478	N2: DAG.getConstant(Val: `8` * i, DL, VT: AddrVT));
1479	OutChains[i] =
1480	DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue (V, `0`), Ptr: Addr,
1481	PtrInfo: MachinePointerInfo (), Alignment,
1482	MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1483	: MachineMemOperand::MONone);
1484	}
1485	return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1486	} else {
1487	// Otherwise, ask llvm to expand it.
1488	return SDValue ();
1489	}
1490	}
1491
1492	SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1493	StoreSDNode *StNode = cast<StoreSDNode>(Val: Op.getNode());
1494	assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1495	EVT MemVT = StNode->getMemoryVT();
1496
1497	// If VPU is enabled, always expand non-mask vector stores to VVP
1498	if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(SomeVT: MemVT))
1499	return lowerToVVP(Op, DAG);
1500
1501	SDValue BasePtr = StNode->getBasePtr();
1502	if (isa<FrameIndexSDNode>(Val: BasePtr.getNode())) {
1503	// Do not expand store instruction with frame index here because of
1504	// dependency problems. We expand it later in eliminateFrameIndex().
1505	return Op;
1506	}
1507
1508	if (MemVT == MVT::f128)
1509	return lowerStoreF128(Op, DAG);
1510	if (isMaskType(SomeVT: MemVT))
1511	return lowerStoreI1(Op, DAG);
1512
1513	// Otherwise, ask llvm to expand it.
1514	return SDValue ();
1515	}
1516
1517	SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1518	MachineFunction &MF = DAG.getMachineFunction();
1519	VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
1520	auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
1521
1522	// Need frame address to find the address of VarArgsFrameIndex.
1523	MF.getFrameInfo().setFrameAddressIsTaken(true);
1524
1525	// vastart just stores the address of the VarArgsFrameIndex slot into the
1526	// memory location argument.
1527	SDLoc DL(Op);
1528	SDValue Offset =
1529	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: DAG.getRegister(Reg: VE::SX9, VT: PtrVT),
1530	N2: DAG.getIntPtrConstant(Val: FuncInfo->getVarArgsFrameOffset(), DL));
1531	const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: `2`))->getValue();
1532	return DAG.getStore(Chain: Op.getOperand(i: `0`), dl: DL, Val: Offset, Ptr: Op.getOperand(i: `1`),
1533	PtrInfo: MachinePointerInfo (SV));
1534	}
1535
1536	SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
1537	SDNode *Node = Op.getNode();
1538	EVT VT = Node->getValueType(ResNo: `0`);
1539	SDValue InChain = Node->getOperand(Num: `0`);
1540	SDValue VAListPtr = Node->getOperand(Num: `1`);
1541	EVT PtrVT = VAListPtr.getValueType();
1542	const Value *SV = cast<SrcValueSDNode>(Val: Node->getOperand(Num: `2`))->getValue();
1543	SDLoc DL(Node);
1544	SDValue VAList =
1545	DAG.getLoad(VT: PtrVT, dl: DL, Chain: InChain, Ptr: VAListPtr, PtrInfo: MachinePointerInfo (SV));
1546	SDValue Chain = VAList.getValue(R: `1`);
1547	SDValue NextPtr;
1548
1549	if (VT == MVT::f128) {
1550	// VE f128 values must be stored with 16 bytes alignment. We don't
1551	// know the actual alignment of VAList, so we take alignment of it
1552	// dynamically.
1553	int Align = `16`;
1554	VAList = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList,
1555	N2: DAG.getConstant(Val: Align - `1`, DL, VT: PtrVT));
1556	VAList = DAG.getNode(Opcode: ISD::AND, DL, VT: PtrVT, N1: VAList,
1557	N2: DAG.getSignedConstant(Val: -Align, DL, VT: PtrVT));
1558	// Increment the pointer, VAList, by 16 to the next vaarg.
1559	NextPtr =
1560	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList, N2: DAG.getIntPtrConstant(Val: `16`, DL));
1561	} else if (VT == MVT::f32) {
1562	// float --> need special handling like below.
1563	// 0 4
1564	// +------+------+
1565	// \| empty\| float\|
1566	// +------+------+
1567	// Increment the pointer, VAList, by 8 to the next vaarg.
1568	NextPtr =
1569	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList, N2: DAG.getIntPtrConstant(Val: `8`, DL));
1570	// Then, adjust VAList.
1571	unsigned InternalOffset = `4`;
1572	VAList = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList,
1573	N2: DAG.getConstant(Val: InternalOffset, DL, VT: PtrVT));
1574	} else {
1575	// Increment the pointer, VAList, by 8 to the next vaarg.
1576	NextPtr =
1577	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList, N2: DAG.getIntPtrConstant(Val: `8`, DL));
1578	}
1579
1580	// Store the incremented VAList to the legalized pointer.
1581	InChain = DAG.getStore(Chain, dl: DL, Val: NextPtr, Ptr: VAListPtr, PtrInfo: MachinePointerInfo (SV));
1582
1583	// Load the actual argument out of the pointer VAList.
1584	// We can't count on greater alignment than the word size.
1585	return DAG.getLoad(
1586	VT, dl: DL, Chain: InChain, Ptr: VAList, PtrInfo: MachinePointerInfo (),
1587	Alignment: Align (std::min(a: PtrVT.getSizeInBits(), b: VT.getSizeInBits()) / `8`));
1588	}
1589
1590	SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
1591	SelectionDAG &DAG) const {
1592	// Generate following code.
1593	// (void)__llvm_grow_stack(size);
1594	// ret = GETSTACKTOP; // pseudo instruction
1595	SDLoc DL(Op);
1596
1597	// Get the inputs.
1598	SDNode *Node = Op.getNode();
1599	SDValue Chain = Op.getOperand(i: `0`);
1600	SDValue Size = Op.getOperand(i: `1`);
1601	MaybeAlign Alignment(Op.getConstantOperandVal(i: `2`));
1602	EVT VT = Node->getValueType(ResNo: `0`);
1603
1604	// Chain the dynamic stack allocation so that it doesn't modify the stack
1605	// pointer when other instructions are using the stack.
1606	Chain = DAG.getCALLSEQ_START(Chain, InSize: `0`, OutSize: `0`, DL);
1607
1608	const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
1609	Align StackAlign = TFI.getStackAlign();
1610	bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
1611
1612	// Prepare arguments
1613	TargetLowering::ArgListTy Args;
1614	Args.emplace_back(args&: Size, args: Size.getValueType().getTypeForEVT(Context&: *DAG.getContext()));
1615	if (NeedsAlign) {
1616	SDValue Align = DAG.getConstant(Val: ~(Alignment ->value() - `1ULL`), DL, VT);
1617	Args.emplace_back(args&: Align,
1618	args: Align.getValueType().getTypeForEVT(Context&: *DAG.getContext()));
1619	}
1620	Type RetTy = Type::getVoidTy(C&: DAG.getContext());
1621
1622	EVT PtrVT = Op.getValueType();
1623	SDValue Callee;
1624	if (NeedsAlign) {
1625	Callee = DAG.getTargetExternalSymbol(Sym: "__ve_grow_stack_align", VT: PtrVT, TargetFlags: `0`);
1626	} else {
1627	Callee = DAG.getTargetExternalSymbol(Sym: "__ve_grow_stack", VT: PtrVT, TargetFlags: `0`);
1628	}
1629
1630	TargetLowering::CallLoweringInfo CLI(DAG);
1631	CLI.setDebugLoc(DL)
1632	.setChain(Chain)
1633	.setCallee(CC: CallingConv::PreserveAll, ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
1634	.setDiscardResult(true);
1635	std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
1636	Chain = pair.second;
1637	SDValue Result = DAG.getNode(Opcode: VEISD::GETSTACKTOP, DL, VT, Operand: Chain);
1638	if (NeedsAlign) {
1639	Result = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Result,
1640	N2: DAG.getConstant(Val: (Alignment ->value() - `1ULL`), DL, VT));
1641	Result = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Result,
1642	N2: DAG.getConstant(Val: ~(Alignment ->value() - `1ULL`), DL, VT));
1643	}
1644	// Chain = Result.getValue(1);
1645	Chain = DAG.getCALLSEQ_END(Chain, Size1: `0`, Size2: `0`, Glue: SDValue (), DL);
1646
1647	SDValue Ops[`2`] = {Result, Chain};
1648	return DAG.getMergeValues(Ops, dl: DL);
1649	}
1650
1651	SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
1652	SelectionDAG &DAG) const {
1653	SDLoc DL(Op);
1654	return DAG.getNode(Opcode: VEISD::EH_SJLJ_LONGJMP, DL, VT: MVT::Other, N1: Op.getOperand(i: `0`),
1655	N2: Op.getOperand(i: `1`));
1656	}
1657
1658	SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
1659	SelectionDAG &DAG) const {
1660	SDLoc DL(Op);
1661	return DAG.getNode(Opcode: VEISD::EH_SJLJ_SETJMP, DL,
1662	VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other), N1: Op.getOperand(i: `0`),
1663	N2: Op.getOperand(i: `1`));
1664	}
1665
1666	SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
1667	SelectionDAG &DAG) const {
1668	SDLoc DL(Op);
1669	return DAG.getNode(Opcode: VEISD::EH_SJLJ_SETUP_DISPATCH, DL, VT: MVT::Other,
1670	Operand: Op.getOperand(i: `0`));
1671	}
1672
1673	static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
1674	const VETargetLowering &TLI,
1675	const VESubtarget *Subtarget) {
1676	SDLoc DL(Op);
1677	MachineFunction &MF = DAG.getMachineFunction();
1678	EVT PtrVT = TLI.getPointerTy(DL: MF.getDataLayout());
1679
1680	MachineFrameInfo &MFI = MF.getFrameInfo();
1681	MFI.setFrameAddressIsTaken(true);
1682
1683	unsigned Depth = Op.getConstantOperandVal(i: `0`);
1684	const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
1685	Register FrameReg = RegInfo->getFrameRegister(MF);
1686	SDValue FrameAddr =
1687	DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT: PtrVT);
1688	while (Depth--)
1689	FrameAddr = DAG.getLoad(VT: Op.getValueType(), dl: DL, Chain: DAG.getEntryNode(),
1690	Ptr: FrameAddr, PtrInfo: MachinePointerInfo ());
1691	return FrameAddr;
1692	}
1693
1694	static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
1695	const VETargetLowering &TLI,
1696	const VESubtarget *Subtarget) {
1697	MachineFunction &MF = DAG.getMachineFunction();
1698	MachineFrameInfo &MFI = MF.getFrameInfo();
1699	MFI.setReturnAddressIsTaken(true);
1700
1701	SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);
1702
1703	SDLoc DL(Op);
1704	EVT VT = Op.getValueType();
1705	SDValue Offset = DAG.getConstant(Val: `8`, DL, VT);
1706	return DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(),
1707	Ptr: DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr, N2: Offset),
1708	PtrInfo: MachinePointerInfo ());
1709	}
1710
1711	SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1712	SelectionDAG &DAG) const {
1713	SDLoc DL(Op);
1714	unsigned IntNo = Op.getConstantOperandVal(i: `0`);
1715	switch (IntNo) {
1716	default: // Don't custom lower most intrinsics.
1717	return SDValue ();
1718	case Intrinsic::eh_sjlj_lsda: {
1719	MachineFunction &MF = DAG.getMachineFunction();
1720	MVT VT = Op.getSimpleValueType();
1721	const VETargetMachine *TM =
1722	static_cast<const VETargetMachine *>(&DAG.getTarget());
1723
1724	// Create GCC_except_tableXX string. The real symbol for that will be
1725	// generated in EHStreamer::emitExceptionTable() later. So, we just
1726	// borrow it's name here.
1727	TM->getStrList()->push_back(x: std::string(
1728	(Twine ("GCC_except_table") + Twine (MF.getFunctionNumber())).str()));
1729	SDValue Addr =
1730	DAG.getTargetExternalSymbol(Sym: TM->getStrList()->back().c_str(), VT, TargetFlags: `0`);
1731	if (isPositionIndependent()) {
1732	Addr = makeHiLoPair(Op: Addr, HiTF: VE::S_GOTOFF_HI32, LoTF: VE::S_GOTOFF_LO32, DAG);
1733	SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT);
1734	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: GlobalBase, N2: Addr);
1735	}
1736	return makeHiLoPair(Op: Addr, HiTF: VE::S_HI32, LoTF: VE::S_LO32, DAG);
1737	}
1738	}
1739	}
1740
1741	static bool getUniqueInsertion(SDNode N, unsigned* &UniqueIdx) {
1742	if (!isa<BuildVectorSDNode>(Val: N))
1743	return false;
1744	const auto *BVN = cast<BuildVectorSDNode>(Val: N);
1745
1746	// Find first non-undef insertion.
1747	unsigned Idx;
1748	for (Idx = `0`; Idx < BVN->getNumOperands(); ++Idx) {
1749	auto ElemV = BVN->getOperand(Num: Idx);
1750	if (!ElemV ->isUndef())
1751	break;
1752	}
1753	// Catch the (hypothetical) all-undef case.
1754	if (Idx == BVN->getNumOperands())
1755	return false;
1756	// Remember insertion.
1757	UniqueIdx = Idx++;
1758	// Verify that all other insertions are undef.
1759	for (; Idx < BVN->getNumOperands(); ++Idx) {
1760	auto ElemV = BVN->getOperand(Num: Idx);
1761	if (!ElemV ->isUndef())
1762	return false;
1763	}
1764	return true;
1765	}
1766
1767	static SDValue getSplatValue(SDNode *N) {
1768	if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(Val: N)) {
1769	return BuildVec->getSplatValue();
1770	}
1771	return SDValue ();
1772	}
1773
1774	SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
1775	SelectionDAG &DAG) const {
1776	VECustomDAG CDAG(DAG, Op);
1777	MVT ResultVT = Op.getSimpleValueType();
1778
1779	// If there is just one element, expand to INSERT_VECTOR_ELT.
1780	unsigned UniqueIdx;
1781	if (getUniqueInsertion(N: Op.getNode(), UniqueIdx)) {
1782	SDValue AccuV = CDAG.getUNDEF(VT: Op.getValueType());
1783	auto ElemV = Op ->getOperand(Num: UniqueIdx);
1784	SDValue IdxV = CDAG.getConstant(Val: UniqueIdx, VT: MVT::i64);
1785	return CDAG.getNode(OC: ISD::INSERT_VECTOR_ELT, ResVT: ResultVT, OpV: {AccuV, ElemV, IdxV});
1786	}
1787
1788	// Else emit a broadcast.
1789	if (SDValue ScalarV = getSplatValue(N: Op.getNode())) {
1790	unsigned NumEls = ResultVT.getVectorNumElements();
1791	auto AVL = CDAG.getConstant(Val: NumEls, VT: MVT::i32);
1792	return CDAG.getBroadcast(ResultVT, Scalar: ScalarV, AVL);
1793	}
1794
1795	// Expand
1796	return SDValue ();
1797	}
1798
1799	TargetLowering::LegalizeAction
1800	VETargetLowering::getCustomOperationAction(SDNode &Op) const {
1801	// Custom legalization on VVP_ and VEC_* opcodes is required to pack-legalize*
1802	// these operations (transform nodes such that their AVL parameter refers to
1803	// packs of 64bit, instead of number of elements.
1804
1805	// Packing opcodes are created with a pack-legal AVL (LEGALAVL). No need to
1806	// re-visit them.
1807	if (isPackingSupportOpcode(Opc: Op.getOpcode()))
1808	return Legal;
1809
1810	// Custom lower to legalize AVL for packed mode.
1811	if (isVVPOrVEC(Op.getOpcode()))
1812	return Custom;
1813	return Legal;
1814	}
1815
1816	SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1817	LLVM_DEBUG(dbgs() << "::LowerOperation "; Op.dump(&DAG));
1818	unsigned Opcode = Op.getOpcode();
1819
1820	/// Scalar isel.
1821	switch (Opcode) {
1822	case ISD::ATOMIC_FENCE:
1823	return lowerATOMIC_FENCE(Op, DAG);
1824	case ISD::ATOMIC_SWAP:
1825	return lowerATOMIC_SWAP(Op, DAG);
1826	case ISD::BlockAddress:
1827	return lowerBlockAddress(Op, DAG);
1828	case ISD::ConstantPool:
1829	return lowerConstantPool(Op, DAG);
1830	case ISD::DYNAMIC_STACKALLOC:
1831	return lowerDYNAMIC_STACKALLOC(Op, DAG);
1832	case ISD::EH_SJLJ_LONGJMP:
1833	return lowerEH_SJLJ_LONGJMP(Op, DAG);
1834	case ISD::EH_SJLJ_SETJMP:
1835	return lowerEH_SJLJ_SETJMP(Op, DAG);
1836	case ISD::EH_SJLJ_SETUP_DISPATCH:
1837	return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
1838	case ISD::FRAMEADDR:
1839	return lowerFRAMEADDR(Op, DAG, TLI: *this, Subtarget);
1840	case ISD::GlobalAddress:
1841	return lowerGlobalAddress(Op, DAG);
1842	case ISD::GlobalTLSAddress:
1843	return lowerGlobalTLSAddress(Op, DAG);
1844	case ISD::INTRINSIC_WO_CHAIN:
1845	return lowerINTRINSIC_WO_CHAIN(Op, DAG);
1846	case ISD::JumpTable:
1847	return lowerJumpTable(Op, DAG);
1848	case ISD::LOAD:
1849	return lowerLOAD(Op, DAG);
1850	case ISD::RETURNADDR:
1851	return lowerRETURNADDR(Op, DAG, TLI: *this, Subtarget);
1852	case ISD::BUILD_VECTOR:
1853	return lowerBUILD_VECTOR(Op, DAG);
1854	case ISD::STORE:
1855	return lowerSTORE(Op, DAG);
1856	case ISD::VASTART:
1857	return lowerVASTART(Op, DAG);
1858	case ISD::VAARG:
1859	return lowerVAARG(Op, DAG);
1860
1861	case ISD::INSERT_VECTOR_ELT:
1862	return lowerINSERT_VECTOR_ELT(Op, DAG);
1863	case ISD::EXTRACT_VECTOR_ELT:
1864	return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1865	}
1866
1867	/// Vector isel.
1868	if (ISD::isVPOpcode(Opcode))
1869	return lowerToVVP(Op, DAG);
1870
1871	switch (Opcode) {
1872	default:
1873	llvm_unreachable("Should not custom lower this!");
1874
1875	// Legalize the AVL of this internal node.
1876	case VEISD::VEC_BROADCAST:
1877	#define ADD_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME:
1878	#include "VVPNodes.def"
1879	// AVL already legalized.
1880	if (getAnnotatedNodeAVL(Op).second)
1881	return Op;
1882	return legalizeInternalVectorOp(Op, DAG);
1883
1884	// Translate into a VEC_/VVP_* layer operation.*
1885	case ISD::MLOAD:
1886	case ISD::MSTORE:
1887	#define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
1888	#include "VVPNodes.def"
1889	if (isMaskArithmetic(Op) && isPackedVectorType(SomeVT: Op.getValueType()))
1890	return splitMaskArithmetic(Op, DAG);
1891	return lowerToVVP(Op, DAG);
1892	}
1893	}
1894	/// } Custom Lower
1895
1896	void VETargetLowering::ReplaceNodeResults(SDNode *N,
1897	SmallVectorImpl<SDValue> &Results,
1898	SelectionDAG &DAG) const {
1899	switch (N->getOpcode()) {
1900	case ISD::ATOMIC_SWAP:
1901	// Let LLVM expand atomic swap instruction through LowerOperation.
1902	return;
1903	default:
1904	LLVM_DEBUG(N->dumpr(&DAG));
1905	llvm_unreachable("Do not know how to custom type legalize this operation!");
1906	}
1907	}
1908
1909	/// JumpTable for VE.
1910	///
1911	/// VE cannot generate relocatable symbol in jump table. VE cannot
1912	/// generate expressions using symbols in both text segment and data
1913	/// segment like below.
1914	/// .4byte .LBB0_2-.LJTI0_0
1915	/// So, we generate offset from the top of function like below as
1916	/// a custom label.
1917	/// .4byte .LBB0_2-<function name>
1918
1919	unsigned VETargetLowering::getJumpTableEncoding() const {
1920	// Use custom label for PIC.
1921	if (isPositionIndependent())
1922	return MachineJumpTableInfo::EK_Custom32;
1923
1924	// Otherwise, use the normal jump table encoding heuristics.
1925	return TargetLowering::getJumpTableEncoding();
1926	}
1927
1928	const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
1929	const MachineJumpTableInfo MJTI, const* MachineBasicBlock *MBB,
1930	unsigned Uid, MCContext &Ctx) const {
1931	assert(isPositionIndependent());
1932
1933	// Generate custom label for PIC like below.
1934	// .4bytes .LBB0_2-<function name>
1935	const auto *Value = MCSymbolRefExpr::create(Symbol: MBB->getSymbol(), Ctx);
1936	MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: MBB->getParent()->getName().data());
1937	const auto *Base = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
1938	return MCBinaryExpr::createSub(LHS: Value, RHS: Base, Ctx);
1939	}
1940
1941	SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
1942	SelectionDAG &DAG) const {
1943	assert(isPositionIndependent());
1944	SDLoc DL(Table);
1945	Function *Function = &DAG.getMachineFunction().getFunction();
1946	assert(Function != nullptr);
1947	auto PtrTy = getPointerTy(DL: DAG.getDataLayout(), AS: Function->getAddressSpace());
1948
1949	// In the jump table, we have following values in PIC mode.
1950	// .4bytes .LBB0_2-<function name>
1951	// We need to add this value and the address of this function to generate
1952	// .LBB0_2 label correctly under PIC mode. So, we want to generate following
1953	// instructions:
1954	// lea %reg, fun@gotoff_lo
1955	// and %reg, %reg, (32)0
1956	// lea.sl %reg, fun@gotoff_hi(%reg, %got)
1957	// In order to do so, we need to genarate correctly marked DAG node using
1958	// makeHiLoPair.
1959	SDValue Op = DAG.getGlobalAddress(GV: Function, DL, VT: PtrTy);
1960	SDValue HiLo = makeHiLoPair(Op, HiTF: VE::S_GOTOFF_HI32, LoTF: VE::S_GOTOFF_LO32, DAG);
1961	SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT: PtrTy);
1962	return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrTy, N1: GlobalBase, N2: HiLo);
1963	}
1964
1965	Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB,
1966	MachineBasicBlock::iterator I,
1967	MachineBasicBlock *TargetBB,
1968	const DebugLoc &DL) const {
1969	MachineFunction *MF = MBB.getParent();
1970	MachineRegisterInfo &MRI = MF->getRegInfo();
1971	const VEInstrInfo *TII = Subtarget->getInstrInfo();
1972
1973	const TargetRegisterClass *RC = &VE::I64RegClass;
1974	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
1975	Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
1976	Register Result = MRI.createVirtualRegister(RegClass: RC);
1977
1978	if (isPositionIndependent()) {
1979	// Create following instructions for local linkage PIC code.
1980	// lea %Tmp1, TargetBB@gotoff_lo
1981	// and %Tmp2, %Tmp1, (32)0
1982	// lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
1983	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
1984	.addImm(Val: `0`)
1985	.addImm(Val: `0`)
1986	.addMBB(MBB: TargetBB, TargetFlags: VE::S_GOTOFF_LO32);
1987	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
1988	.addReg(RegNo: Tmp1, Flags: getKillRegState(B: true))
1989	.addImm(Val: M0(Val: `32`));
1990	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: Result)
1991	.addReg(RegNo: VE::SX15)
1992	.addReg(RegNo: Tmp2, Flags: getKillRegState(B: true))
1993	.addMBB(MBB: TargetBB, TargetFlags: VE::S_GOTOFF_HI32);
1994	} else {
1995	// Create following instructions for non-PIC code.
1996	// lea %Tmp1, TargetBB@lo
1997	// and %Tmp2, %Tmp1, (32)0
1998	// lea.sl %Result, TargetBB@hi(%Tmp2)
1999	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2000	.addImm(Val: `0`)
2001	.addImm(Val: `0`)
2002	.addMBB(MBB: TargetBB, TargetFlags: VE::S_LO32);
2003	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2004	.addReg(RegNo: Tmp1, Flags: getKillRegState(B: true))
2005	.addImm(Val: M0(Val: `32`));
2006	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrii), DestReg: Result)
2007	.addReg(RegNo: Tmp2, Flags: getKillRegState(B: true))
2008	.addImm(Val: `0`)
2009	.addMBB(MBB: TargetBB, TargetFlags: VE::S_HI32);
2010	}
2011	return Result;
2012	}
2013
2014	Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB,
2015	MachineBasicBlock::iterator I,
2016	StringRef Symbol, const DebugLoc &DL,
2017	bool IsLocal = false,
2018	bool IsCall = false) const {
2019	MachineFunction *MF = MBB.getParent();
2020	MachineRegisterInfo &MRI = MF->getRegInfo();
2021	const VEInstrInfo *TII = Subtarget->getInstrInfo();
2022
2023	const TargetRegisterClass *RC = &VE::I64RegClass;
2024	Register Result = MRI.createVirtualRegister(RegClass: RC);
2025
2026	if (isPositionIndependent()) {
2027	if (IsCall && !IsLocal) {
2028	// Create following instructions for non-local linkage PIC code function
2029	// calls. These instructions uses IC and magic number -24, so we expand
2030	// them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
2031	// lea %Reg, Symbol@plt_lo(-24)
2032	// and %Reg, %Reg, (32)0
2033	// sic %s16
2034	// lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
2035	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::GETFUNPLT), DestReg: Result)
2036	.addExternalSymbol(FnName: "abort");
2037	} else if (IsLocal) {
2038	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2039	Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2040	// Create following instructions for local linkage PIC code.
2041	// lea %Tmp1, Symbol@gotoff_lo
2042	// and %Tmp2, %Tmp1, (32)0
2043	// lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2044	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2045	.addImm(Val: `0`)
2046	.addImm(Val: `0`)
2047	.addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_GOTOFF_LO32);
2048	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2049	.addReg(RegNo: Tmp1, Flags: getKillRegState(B: true))
2050	.addImm(Val: M0(Val: `32`));
2051	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: Result)
2052	.addReg(RegNo: VE::SX15)
2053	.addReg(RegNo: Tmp2, Flags: getKillRegState(B: true))
2054	.addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_GOTOFF_HI32);
2055	} else {
2056	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2057	Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2058	// Create following instructions for not local linkage PIC code.
2059	// lea %Tmp1, Symbol@got_lo
2060	// and %Tmp2, %Tmp1, (32)0
2061	// lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2062	// ld %Result, 0(%Tmp3)
2063	Register Tmp3 = MRI.createVirtualRegister(RegClass: RC);
2064	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2065	.addImm(Val: `0`)
2066	.addImm(Val: `0`)
2067	.addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_GOT_LO32);
2068	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2069	.addReg(RegNo: Tmp1, Flags: getKillRegState(B: true))
2070	.addImm(Val: M0(Val: `32`));
2071	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: Tmp3)
2072	.addReg(RegNo: VE::SX15)
2073	.addReg(RegNo: Tmp2, Flags: getKillRegState(B: true))
2074	.addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_GOT_HI32);
2075	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: Result)
2076	.addReg(RegNo: Tmp3, Flags: getKillRegState(B: true))
2077	.addImm(Val: `0`)
2078	.addImm(Val: `0`);
2079	}
2080	} else {
2081	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2082	Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2083	// Create following instructions for non-PIC code.
2084	// lea %Tmp1, Symbol@lo
2085	// and %Tmp2, %Tmp1, (32)0
2086	// lea.sl %Result, Symbol@hi(%Tmp2)
2087	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2088	.addImm(Val: `0`)
2089	.addImm(Val: `0`)
2090	.addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_LO32);
2091	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2092	.addReg(RegNo: Tmp1, Flags: getKillRegState(B: true))
2093	.addImm(Val: M0(Val: `32`));
2094	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrii), DestReg: Result)
2095	.addReg(RegNo: Tmp2, Flags: getKillRegState(B: true))
2096	.addImm(Val: `0`)
2097	.addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_HI32);
2098	}
2099	return Result;
2100	}
2101
2102	void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI,
2103	MachineBasicBlock *MBB,
2104	MachineBasicBlock *DispatchBB,
2105	int FI, int Offset) const {
2106	DebugLoc DL = MI.getDebugLoc();
2107	const VEInstrInfo *TII = Subtarget->getInstrInfo();
2108
2109	Register LabelReg =
2110	prepareMBB(MBB&: *MBB, I: MachineBasicBlock::iterator (MI), TargetBB: DispatchBB, DL);
2111
2112	// Store an address of DispatchBB to a given jmpbuf[1] where has next IC
2113	// referenced by longjmp (throw) later.
2114	MachineInstrBuilder MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::STrii));
2115	addFrameReference(MIB, FI, Offset); // jmpbuf[1]
2116	MIB.addReg(RegNo: LabelReg, Flags: getKillRegState(B: true));
2117	}
2118
2119	MachineBasicBlock *
2120	VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
2121	MachineBasicBlock MBB) const* {
2122	DebugLoc DL = MI.getDebugLoc();
2123	MachineFunction *MF = MBB->getParent();
2124	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2125	const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
2126	MachineRegisterInfo &MRI = MF->getRegInfo();
2127
2128	const BasicBlock *BB = MBB->getBasicBlock();
2129	MachineFunction::iterator I = ++MBB->getIterator();
2130
2131	// Memory Reference.
2132	SmallVector<MachineMemOperand *, `2`> MMOs(MI.memoperands());
2133	Register BufReg = MI.getOperand(i: `1`).getReg();
2134
2135	Register DstReg;
2136
2137	DstReg = MI.getOperand(i: `0`).getReg();
2138	const TargetRegisterClass *RC = MRI.getRegClass(Reg: DstReg);
2139	assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
2140	(void)TRI;
2141	Register MainDestReg = MRI.createVirtualRegister(RegClass: RC);
2142	Register RestoreDestReg = MRI.createVirtualRegister(RegClass: RC);
2143
2144	// For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
2145	// instructions. SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
2146	//
2147	// ThisMBB:
2148	// buf[3] = %s17 iff %s17 is used as BP
2149	// buf[1] = RestoreMBB as IC after longjmp
2150	// # SjLjSetup RestoreMBB
2151	//
2152	// MainMBB:
2153	// v_main = 0
2154	//
2155	// SinkMBB:
2156	// v = phi(v_main, MainMBB, v_restore, RestoreMBB)
2157	// ...
2158	//
2159	// RestoreMBB:
2160	// %s17 = buf[3] = iff %s17 is used as BP
2161	// v_restore = 1
2162	// goto SinkMBB
2163
2164	MachineBasicBlock *ThisMBB = MBB;
2165	MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
2166	MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
2167	MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
2168	MF->insert(MBBI: I, MBB: MainMBB);
2169	MF->insert(MBBI: I, MBB: SinkMBB);
2170	MF->push_back(MBB: RestoreMBB);
2171	RestoreMBB->setMachineBlockAddressTaken();
2172
2173	// Transfer the remainder of BB and its successor edges to SinkMBB.
2174	SinkMBB->splice(Where: SinkMBB->begin(), Other: MBB,
2175	From: std::next(x: MachineBasicBlock::iterator (MI)), To: MBB->end());
2176	SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
2177
2178	// ThisMBB:
2179	Register LabelReg =
2180	prepareMBB(MBB&: *MBB, I: MachineBasicBlock::iterator (MI), TargetBB: RestoreMBB, DL);
2181
2182	// Store BP in buf[3] iff this function is using BP.
2183	const VEFrameLowering *TFI = Subtarget->getFrameLowering();
2184	if (TFI->hasBP(MF: *MF)) {
2185	MachineInstrBuilder MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::STrii));
2186	MIB.addReg(RegNo: BufReg);
2187	MIB.addImm(Val: `0`);
2188	MIB.addImm(Val: `24`);
2189	MIB.addReg(RegNo: VE::SX17);
2190	MIB.setMemRefs(MMOs);
2191	}
2192
2193	// Store IP in buf[1].
2194	MachineInstrBuilder MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::STrii));
2195	MIB.add(MO: MI.getOperand(i: `1`)); // we can preserve the kill flags here.
2196	MIB.addImm(Val: `0`);
2197	MIB.addImm(Val: `8`);
2198	MIB.addReg(RegNo: LabelReg, Flags: getKillRegState(B: true));
2199	MIB.setMemRefs(MMOs);
2200
2201	// SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
2202
2203	// Insert setup.
2204	MIB =
2205	BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::EH_SjLj_Setup)).addMBB(MBB: RestoreMBB);
2206
2207	const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2208	MIB.addRegMask(Mask: RegInfo->getNoPreservedMask());
2209	ThisMBB->addSuccessor(Succ: MainMBB);
2210	ThisMBB->addSuccessor(Succ: RestoreMBB);
2211
2212	// MainMBB:
2213	BuildMI(BB: MainMBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: MainDestReg)
2214	.addImm(Val: `0`)
2215	.addImm(Val: `0`)
2216	.addImm(Val: `0`);
2217	MainMBB->addSuccessor(Succ: SinkMBB);
2218
2219	// SinkMBB:
2220	BuildMI(BB&: *SinkMBB, I: SinkMBB->begin(), MIMD: DL, MCID: TII->get(Opcode: VE::PHI), DestReg: DstReg)
2221	.addReg(RegNo: MainDestReg)
2222	.addMBB(MBB: MainMBB)
2223	.addReg(RegNo: RestoreDestReg)
2224	.addMBB(MBB: RestoreMBB);
2225
2226	// RestoreMBB:
2227	// Restore BP from buf[3] iff this function is using BP. The address of
2228	// buf is in SX10.
2229	// FIXME: Better to not use SX10 here
2230	if (TFI->hasBP(MF: *MF)) {
2231	MachineInstrBuilder MIB =
2232	BuildMI(BB: RestoreMBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: VE::SX17);
2233	MIB.addReg(RegNo: VE::SX10);
2234	MIB.addImm(Val: `0`);
2235	MIB.addImm(Val: `24`);
2236	MIB.setMemRefs(MMOs);
2237	}
2238	BuildMI(BB: RestoreMBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: RestoreDestReg)
2239	.addImm(Val: `0`)
2240	.addImm(Val: `0`)
2241	.addImm(Val: `1`);
2242	BuildMI(BB: RestoreMBB, MIMD: DL, MCID: TII->get(Opcode: VE::BRCFLa_t)).addMBB(MBB: SinkMBB);
2243	RestoreMBB->addSuccessor(Succ: SinkMBB);
2244
2245	MI.eraseFromParent();
2246	return SinkMBB;
2247	}
2248
2249	MachineBasicBlock *
2250	VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
2251	MachineBasicBlock MBB) const* {
2252	DebugLoc DL = MI.getDebugLoc();
2253	MachineFunction *MF = MBB->getParent();
2254	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2255	MachineRegisterInfo &MRI = MF->getRegInfo();
2256
2257	// Memory Reference.
2258	SmallVector<MachineMemOperand *, `2`> MMOs(MI.memoperands());
2259	Register BufReg = MI.getOperand(i: `0`).getReg();
2260
2261	Register Tmp = MRI.createVirtualRegister(RegClass: &VE::I64RegClass);
2262	// Since FP is only updated here but NOT referenced, it's treated as GPR.
2263	Register FP = VE::SX9;
2264	Register SP = VE::SX11;
2265
2266	MachineInstrBuilder MIB;
2267
2268	MachineBasicBlock *ThisMBB = MBB;
2269
2270	// For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
2271	//
2272	// ThisMBB:
2273	// %fp = load buf[0]
2274	// %jmp = load buf[1]
2275	// %s10 = buf ; Store an address of buf to SX10 for RestoreMBB
2276	// %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
2277	// jmp %jmp
2278
2279	// Reload FP.
2280	MIB = BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: FP);
2281	MIB.addReg(RegNo: BufReg);
2282	MIB.addImm(Val: `0`);
2283	MIB.addImm(Val: `0`);
2284	MIB.setMemRefs(MMOs);
2285
2286	// Reload IP.
2287	MIB = BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: Tmp);
2288	MIB.addReg(RegNo: BufReg);
2289	MIB.addImm(Val: `0`);
2290	MIB.addImm(Val: `8`);
2291	MIB.setMemRefs(MMOs);
2292
2293	// Copy BufReg to SX10 for later use in setjmp.
2294	// FIXME: Better to not use SX10 here
2295	BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::ORri), DestReg: VE::SX10)
2296	.addReg(RegNo: BufReg)
2297	.addImm(Val: `0`);
2298
2299	// Reload SP.
2300	MIB = BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: SP);
2301	MIB.add(MO: MI.getOperand(i: `0`)); // we can preserve the kill flags here.
2302	MIB.addImm(Val: `0`);
2303	MIB.addImm(Val: `16`);
2304	MIB.setMemRefs(MMOs);
2305
2306	// Jump.
2307	BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::BCFLari_t))
2308	.addReg(RegNo: Tmp, Flags: getKillRegState(B: true))
2309	.addImm(Val: `0`);
2310
2311	MI.eraseFromParent();
2312	return ThisMBB;
2313	}
2314
2315	MachineBasicBlock *
2316	VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI,
2317	MachineBasicBlock BB) const* {
2318	DebugLoc DL = MI.getDebugLoc();
2319	MachineFunction *MF = BB->getParent();
2320	MachineFrameInfo &MFI = MF->getFrameInfo();
2321	MachineRegisterInfo &MRI = MF->getRegInfo();
2322	const VEInstrInfo *TII = Subtarget->getInstrInfo();
2323	int FI = MFI.getFunctionContextIndex();
2324
2325	// Get a mapping of the call site numbers to all of the landing pads they're
2326	// associated with.
2327	DenseMap<unsigned, SmallVector<MachineBasicBlock *, `2`>> CallSiteNumToLPad;
2328	unsigned MaxCSNum = `0`;
2329	for (auto &MBB : *MF) {
2330	if (!MBB.isEHPad())
2331	continue;
2332
2333	MCSymbol Sym = nullptr*;
2334	for (const auto &MI : MBB) {
2335	if (MI.isDebugInstr())
2336	continue;
2337
2338	assert(MI.isEHLabel() && "expected EH_LABEL");
2339	Sym = MI.getOperand(i: `0`).getMCSymbol();
2340	break;
2341	}
2342
2343	if (!MF->hasCallSiteLandingPad(Sym))
2344	continue;
2345
2346	for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
2347	CallSiteNumToLPad [CSI].push_back(Elt: &MBB);
2348	MaxCSNum = std::max(a: MaxCSNum, b: CSI);
2349	}
2350	}
2351
2352	// Get an ordered list of the machine basic blocks for the jump table.
2353	std::vector<MachineBasicBlock *> LPadList;
2354	SmallPtrSet<MachineBasicBlock *, `32`> InvokeBBs;
2355	LPadList.reserve(n: CallSiteNumToLPad.size());
2356
2357	for (unsigned CSI = `1`; CSI <= MaxCSNum; ++CSI) {
2358	for (auto &LP : CallSiteNumToLPad [CSI]) {
2359	LPadList.push_back(x: LP);
2360	InvokeBBs.insert_range(R: LP->predecessors());
2361	}
2362	}
2363
2364	assert(!LPadList.empty() &&
2365	"No landing pad destinations for the dispatch jump table!");
2366
2367	// The %fn_context is allocated like below (from --print-after=sjljehprepare):
2368	// %fn_context = alloca { i8, i64, [4 x i64], i8, i8, [5 x i8] }
2369	//
2370	// This `[5 x i8]` is jmpbuf, so jmpbuf[1] is FI+72.*
2371	// First `i64` is callsite, so callsite is FI+8.
2372	static const int OffsetIC = `72`;
2373	static const int OffsetCS = `8`;
2374
2375	// Create the MBBs for the dispatch code like following:
2376	//
2377	// ThisMBB:
2378	// Prepare DispatchBB address and store it to buf[1].
2379	// ...
2380	//
2381	// DispatchBB:
2382	// %s15 = GETGOT iff isPositionIndependent
2383	// %callsite = load callsite
2384	// brgt.l.t #size of callsites, %callsite, DispContBB
2385	//
2386	// TrapBB:
2387	// Call abort.
2388	//
2389	// DispContBB:
2390	// %breg = address of jump table
2391	// %pc = load and calculate next pc from %breg and %callsite
2392	// jmp %pc
2393
2394	// Shove the dispatch's address into the return slot in the function context.
2395	MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
2396	DispatchBB->setIsEHPad(true);
2397
2398	// Trap BB will causes trap like `assert(0)`.
2399	MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
2400	DispatchBB->addSuccessor(Succ: TrapBB);
2401
2402	MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
2403	DispatchBB->addSuccessor(Succ: DispContBB);
2404
2405	// Insert MBBs.
2406	MF->push_back(MBB: DispatchBB);
2407	MF->push_back(MBB: DispContBB);
2408	MF->push_back(MBB: TrapBB);
2409
2410	// Insert code to call abort in the TrapBB.
2411	Register Abort = prepareSymbol(MBB&: *TrapBB, I: TrapBB->end(), Symbol: "abort", DL,
2412	/ Local / IsLocal: false, / Call / IsCall: true);
2413	BuildMI(BB: TrapBB, MIMD: DL, MCID: TII->get(Opcode: VE::BSICrii), DestReg: VE::SX10)
2414	.addReg(RegNo: Abort, Flags: getKillRegState(B: true))
2415	.addImm(Val: `0`)
2416	.addImm(Val: `0`);
2417
2418	// Insert code into the entry block that creates and registers the function
2419	// context.
2420	setupEntryBlockForSjLj(MI, MBB: BB, DispatchBB, FI, Offset: OffsetIC);
2421
2422	// Create the jump table and associated information
2423	unsigned JTE = getJumpTableEncoding();
2424	MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTEntryKind: JTE);
2425	unsigned MJTI = JTI->createJumpTableIndex(DestBBs: LPadList);
2426
2427	const VERegisterInfo &RI = TII->getRegisterInfo();
2428	// Add a register mask with no preserved registers. This results in all
2429	// registers being marked as clobbered.
2430	BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::NOP))
2431	.addRegMask(Mask: RI.getNoPreservedMask());
2432
2433	if (isPositionIndependent()) {
2434	// Force to generate GETGOT, since current implementation doesn't store GOT
2435	// register.
2436	BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::GETGOT), DestReg: VE::SX15);
2437	}
2438
2439	// IReg is used as an index in a memory operand and therefore can't be SP
2440	const TargetRegisterClass *RC = &VE::I64RegClass;
2441	Register IReg = MRI.createVirtualRegister(RegClass: RC);
2442	addFrameReference(MIB: BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDLZXrii), DestReg: IReg), FI,
2443	Offset: OffsetCS);
2444	if (LPadList.size() < `64`) {
2445	BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::BRCFLir_t))
2446	.addImm(Val: VECC::CC_ILE)
2447	.addImm(Val: LPadList.size())
2448	.addReg(RegNo: IReg)
2449	.addMBB(MBB: TrapBB);
2450	} else {
2451	assert(LPadList.size() <= `0x7FFFFFFF` && "Too large Landing Pad!");
2452	Register TmpReg = MRI.createVirtualRegister(RegClass: RC);
2453	BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: TmpReg)
2454	.addImm(Val: `0`)
2455	.addImm(Val: `0`)
2456	.addImm(Val: LPadList.size());
2457	BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::BRCFLrr_t))
2458	.addImm(Val: VECC::CC_ILE)
2459	.addReg(RegNo: TmpReg, Flags: getKillRegState(B: true))
2460	.addReg(RegNo: IReg)
2461	.addMBB(MBB: TrapBB);
2462	}
2463
2464	Register BReg = MRI.createVirtualRegister(RegClass: RC);
2465	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2466	Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2467
2468	if (isPositionIndependent()) {
2469	// Create following instructions for local linkage PIC code.
2470	// lea %Tmp1, .LJTI0_0@gotoff_lo
2471	// and %Tmp2, %Tmp1, (32)0
2472	// lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2473	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2474	.addImm(Val: `0`)
2475	.addImm(Val: `0`)
2476	.addJumpTableIndex(Idx: MJTI, TargetFlags: VE::S_GOTOFF_LO32);
2477	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2478	.addReg(RegNo: Tmp1, Flags: getKillRegState(B: true))
2479	.addImm(Val: M0(Val: `32`));
2480	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: BReg)
2481	.addReg(RegNo: VE::SX15)
2482	.addReg(RegNo: Tmp2, Flags: getKillRegState(B: true))
2483	.addJumpTableIndex(Idx: MJTI, TargetFlags: VE::S_GOTOFF_HI32);
2484	} else {
2485	// Create following instructions for non-PIC code.
2486	// lea %Tmp1, .LJTI0_0@lo
2487	// and %Tmp2, %Tmp1, (32)0
2488	// lea.sl %BReg, .LJTI0_0@hi(%Tmp2)
2489	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2490	.addImm(Val: `0`)
2491	.addImm(Val: `0`)
2492	.addJumpTableIndex(Idx: MJTI, TargetFlags: VE::S_LO32);
2493	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2494	.addReg(RegNo: Tmp1, Flags: getKillRegState(B: true))
2495	.addImm(Val: M0(Val: `32`));
2496	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrii), DestReg: BReg)
2497	.addReg(RegNo: Tmp2, Flags: getKillRegState(B: true))
2498	.addImm(Val: `0`)
2499	.addJumpTableIndex(Idx: MJTI, TargetFlags: VE::S_HI32);
2500	}
2501
2502	switch (JTE) {
2503	case MachineJumpTableInfo::EK_BlockAddress: {
2504	// Generate simple block address code for no-PIC model.
2505	// sll %Tmp1, %IReg, 3
2506	// lds %TReg, 0(%Tmp1, %BReg)
2507	// bcfla %TReg
2508
2509	Register TReg = MRI.createVirtualRegister(RegClass: RC);
2510	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2511
2512	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::SLLri), DestReg: Tmp1)
2513	.addReg(RegNo: IReg, Flags: getKillRegState(B: true))
2514	.addImm(Val: `3`);
2515	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDrri), DestReg: TReg)
2516	.addReg(RegNo: BReg, Flags: getKillRegState(B: true))
2517	.addReg(RegNo: Tmp1, Flags: getKillRegState(B: true))
2518	.addImm(Val: `0`);
2519	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::BCFLari_t))
2520	.addReg(RegNo: TReg, Flags: getKillRegState(B: true))
2521	.addImm(Val: `0`);
2522	break;
2523	}
2524	case MachineJumpTableInfo::EK_Custom32: {
2525	// Generate block address code using differences from the function pointer
2526	// for PIC model.
2527	// sll %Tmp1, %IReg, 2
2528	// ldl.zx %OReg, 0(%Tmp1, %BReg)
2529	// Prepare function address in BReg2.
2530	// adds.l %TReg, %BReg2, %OReg
2531	// bcfla %TReg
2532
2533	assert(isPositionIndependent());
2534	Register OReg = MRI.createVirtualRegister(RegClass: RC);
2535	Register TReg = MRI.createVirtualRegister(RegClass: RC);
2536	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2537
2538	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::SLLri), DestReg: Tmp1)
2539	.addReg(RegNo: IReg, Flags: getKillRegState(B: true))
2540	.addImm(Val: `2`);
2541	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDLZXrri), DestReg: OReg)
2542	.addReg(RegNo: BReg, Flags: getKillRegState(B: true))
2543	.addReg(RegNo: Tmp1, Flags: getKillRegState(B: true))
2544	.addImm(Val: `0`);
2545	Register BReg2 =
2546	prepareSymbol(MBB&: *DispContBB, I: DispContBB->end(),
2547	Symbol: DispContBB->getParent()->getName(), DL, / Local / IsLocal: true);
2548	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::ADDSLrr), DestReg: TReg)
2549	.addReg(RegNo: OReg, Flags: getKillRegState(B: true))
2550	.addReg(RegNo: BReg2, Flags: getKillRegState(B: true));
2551	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::BCFLari_t))
2552	.addReg(RegNo: TReg, Flags: getKillRegState(B: true))
2553	.addImm(Val: `0`);
2554	break;
2555	}
2556	default:
2557	llvm_unreachable("Unexpected jump table encoding");
2558	}
2559
2560	// Add the jump table entries as successors to the MBB.
2561	SmallPtrSet<MachineBasicBlock *, `8`> SeenMBBs;
2562	for (auto &LP : LPadList)
2563	if (SeenMBBs.insert(Ptr: LP).second)
2564	DispContBB->addSuccessor(Succ: LP);
2565
2566	// N.B. the order the invoke BBs are processed in doesn't matter here.
2567	SmallVector<MachineBasicBlock *, `64`> MBBLPads;
2568	const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
2569	for (MachineBasicBlock *MBB : InvokeBBs) {
2570	// Remove the landing pad successor from the invoke block and replace it
2571	// with the new dispatch block.
2572	// Keep a copy of Successors since it's modified inside the loop.
2573	SmallVector<MachineBasicBlock *, `8`> Successors(MBB->succ_rbegin(),
2574	MBB->succ_rend());
2575	// FIXME: Avoid quadratic complexity.
2576	for (auto *MBBS : Successors) {
2577	if (MBBS->isEHPad()) {
2578	MBB->removeSuccessor(Succ: MBBS);
2579	MBBLPads.push_back(Elt: MBBS);
2580	}
2581	}
2582
2583	MBB->addSuccessor(Succ: DispatchBB);
2584
2585	// Find the invoke call and mark all of the callee-saved registers as
2586	// 'implicit defined' so that they're spilled. This prevents code from
2587	// moving instructions to before the EH block, where they will never be
2588	// executed.
2589	for (auto &II : reverse(C&: *MBB)) {
2590	if (!II.isCall())
2591	continue;
2592
2593	DenseSet<Register> DefRegs;
2594	for (auto &MOp : II.operands())
2595	if (MOp.isReg())
2596	DefRegs.insert(V: MOp.getReg());
2597
2598	MachineInstrBuilder MIB(*MF, &II);
2599	for (unsigned RI = `0`; SavedRegs[RI]; ++RI) {
2600	Register Reg = SavedRegs[RI];
2601	if (!DefRegs.contains(V: Reg))
2602	MIB.addReg(RegNo: Reg, Flags: RegState::ImplicitDefine \| RegState::Dead);
2603	}
2604
2605	break;
2606	}
2607	}
2608
2609	// Mark all former landing pads as non-landing pads. The dispatch is the only
2610	// landing pad now.
2611	for (auto &LP : MBBLPads)
2612	LP->setIsEHPad(false);
2613
2614	// The instruction is gone now.
2615	MI.eraseFromParent();
2616	return BB;
2617	}
2618
2619	MachineBasicBlock *
2620	VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2621	MachineBasicBlock BB) const* {
2622	switch (MI.getOpcode()) {
2623	default:
2624	llvm_unreachable("Unknown Custom Instruction!");
2625	case VE::EH_SjLj_LongJmp:
2626	return emitEHSjLjLongJmp(MI, MBB: BB);
2627	case VE::EH_SjLj_SetJmp:
2628	return emitEHSjLjSetJmp(MI, MBB: BB);
2629	case VE::EH_SjLj_Setup_Dispatch:
2630	return emitSjLjDispatchBlock(MI, BB);
2631	}
2632	}
2633
2634	static bool isSimm7(SDValue V) {
2635	EVT VT = V.getValueType();
2636	if (VT.isVector())
2637	return false;
2638
2639	if (VT.isInteger()) {
2640	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: V))
2641	return isInt<`7`>(x: C->getSExtValue());
2642	} else if (VT.isFloatingPoint()) {
2643	if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: V)) {
2644	if (VT == MVT::f32 \|\| VT == MVT::f64) {
2645	const APInt &Imm = C->getValueAPF().bitcastToAPInt();
2646	uint64_t Val = Imm.getSExtValue();
2647	if (Imm.getBitWidth() == `32`)
2648	Val <<= `32`; // Immediate value of float place at higher bits on VE.
2649	return isInt<`7`>(x: Val);
2650	}
2651	}
2652	}
2653	return false;
2654	}
2655
2656	static bool isMImm(SDValue V) {
2657	EVT VT = V.getValueType();
2658	if (VT.isVector())
2659	return false;
2660
2661	if (VT.isInteger()) {
2662	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: V))
2663	return isMImmVal(Val: getImmVal(N: C));
2664	} else if (VT.isFloatingPoint()) {
2665	if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: V)) {
2666	if (VT == MVT::f32) {
2667	// Float value places at higher bits, so ignore lower 32 bits.
2668	return isMImm32Val(Val: getFpImmVal(N: C) >> `32`);
2669	} else if (VT == MVT::f64) {
2670	return isMImmVal(Val: getFpImmVal(N: C));
2671	}
2672	}
2673	}
2674	return false;
2675	}
2676
2677	static unsigned decideComp(EVT SrcVT, ISD::CondCode CC) {
2678	if (SrcVT.isFloatingPoint()) {
2679	if (SrcVT == MVT::f128)
2680	return VEISD::CMPQ;
2681	return VEISD::CMPF;
2682	}
2683	return isSignedIntSetCC(Code: CC) ? VEISD::CMPI : VEISD::CMPU;
2684	}
2685
2686	static EVT decideCompType(EVT SrcVT) {
2687	if (SrcVT == MVT::f128)
2688	return MVT::f64;
2689	return SrcVT;
2690	}
2691
2692	static bool safeWithoutCompWithNull(EVT SrcVT, ISD::CondCode CC,
2693	bool WithCMov) {
2694	if (SrcVT.isFloatingPoint()) {
2695	// For the case of floating point setcc, only unordered comparison
2696	// or general comparison with -enable-no-nans-fp-math option reach
2697	// here, so it is safe even if values are NaN. Only f128 doesn't
2698	// safe since VE uses f64 result of f128 comparison.
2699	return SrcVT != MVT::f128;
2700	}
2701	if (isIntEqualitySetCC(Code: CC)) {
2702	// For the case of equal or not equal, it is safe without comparison with 0.
2703	return true;
2704	}
2705	if (WithCMov) {
2706	// For the case of integer setcc with cmov, all signed comparison with 0
2707	// are safe.
2708	return isSignedIntSetCC(Code: CC);
2709	}
2710	// For the case of integer setcc, only signed 64 bits comparison is safe.
2711	// For unsigned, "CMPU 0x80000000, 0" has to be greater than 0, but it becomes
2712	// less than 0 witout CMPU. For 32 bits, other half of 32 bits are
2713	// uncoditional, so it is not safe too without CMPI..
2714	return isSignedIntSetCC(Code: CC) && SrcVT == MVT::i64;
2715	}
2716
2717	static SDValue generateComparison(EVT VT, SDValue LHS, SDValue RHS,
2718	ISD::CondCode CC, bool WithCMov,
2719	const SDLoc &DL, SelectionDAG &DAG) {
2720	// Compare values. If RHS is 0 and it is safe to calculate without
2721	// comparison, we don't generate an instruction for comparison.
2722	EVT CompVT = decideCompType(SrcVT: VT);
2723	if (CompVT == VT && safeWithoutCompWithNull(SrcVT: VT, CC, WithCMov) &&
2724	(isNullConstant(V: RHS) \|\| isNullFPConstant(V: RHS))) {
2725	return LHS;
2726	}
2727	return DAG.getNode(Opcode: decideComp(SrcVT: VT, CC), DL, VT: CompVT, N1: LHS, N2: RHS);
2728	}
2729
2730	SDValue VETargetLowering::combineSelect(SDNode *N,
2731	DAGCombinerInfo &DCI) const {
2732	assert(N->getOpcode() == ISD::SELECT &&
2733	"Should be called with a SELECT node");
2734	ISD::CondCode CC = ISD::CondCode::SETNE;
2735	SDValue Cond = N->getOperand(Num: `0`);
2736	SDValue True = N->getOperand(Num: `1`);
2737	SDValue False = N->getOperand(Num: `2`);
2738
2739	// We handle only scalar SELECT.
2740	EVT VT = N->getValueType(ResNo: `0`);
2741	if (VT.isVector())
2742	return SDValue ();
2743
2744	// Peform combineSelect after leagalize DAG.
2745	if (!DCI.isAfterLegalizeDAG())
2746	return SDValue ();
2747
2748	EVT VT0 = Cond.getValueType();
2749	if (isMImm(V: True)) {
2750	// VE's condition move can handle MImm in True clause, so nothing to do.
2751	} else if (isMImm(V: False)) {
2752	// VE's condition move can handle MImm in True clause, so swap True and
2753	// False clauses if False has MImm value. And, update condition code.
2754	std::swap(a&: True, b&: False);
2755	CC = getSetCCInverse(Operation: CC, Type: VT0);
2756	}
2757
2758	SDLoc DL(N);
2759	SelectionDAG &DAG = DCI.DAG;
2760	VECC::CondCode VECCVal;
2761	if (VT0.isFloatingPoint()) {
2762	VECCVal = fpCondCode2Fcc(CC);
2763	} else {
2764	VECCVal = intCondCode2Icc(CC);
2765	}
2766	SDValue Ops[] = {Cond, True, False,
2767	DAG.getConstant(Val: VECCVal, DL, VT: MVT::i32)};
2768	return DAG.getNode(Opcode: VEISD::CMOV, DL, VT, Ops);
2769	}
2770
2771	SDValue VETargetLowering::combineSelectCC(SDNode *N,
2772	DAGCombinerInfo &DCI) const {
2773	assert(N->getOpcode() == ISD::SELECT_CC &&
2774	"Should be called with a SELECT_CC node");
2775	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `4`))->get();
2776	SDValue LHS = N->getOperand(Num: `0`);
2777	SDValue RHS = N->getOperand(Num: `1`);
2778	SDValue True = N->getOperand(Num: `2`);
2779	SDValue False = N->getOperand(Num: `3`);
2780
2781	// We handle only scalar SELECT_CC.
2782	EVT VT = N->getValueType(ResNo: `0`);
2783	if (VT.isVector())
2784	return SDValue ();
2785
2786	// Peform combineSelectCC after leagalize DAG.
2787	if (!DCI.isAfterLegalizeDAG())
2788	return SDValue ();
2789
2790	// We handle only i32/i64/f32/f64/f128 comparisons.
2791	EVT LHSVT = LHS.getValueType();
2792	assert(LHSVT == RHS.getValueType());
2793	switch (LHSVT.getSimpleVT().SimpleTy) {
2794	case MVT::i32:
2795	case MVT::i64:
2796	case MVT::f32:
2797	case MVT::f64:
2798	case MVT::f128:
2799	break;
2800	default:
2801	// Return SDValue to let llvm handle other types.
2802	return SDValue ();
2803	}
2804
2805	if (isMImm(V: RHS)) {
2806	// VE's comparison can handle MImm in RHS, so nothing to do.
2807	} else if (isSimm7(V: RHS)) {
2808	// VE's comparison can handle Simm7 in LHS, so swap LHS and RHS, and
2809	// update condition code.
2810	std::swap(a&: LHS, b&: RHS);
2811	CC = getSetCCSwappedOperands(Operation: CC);
2812	}
2813	if (isMImm(V: True)) {
2814	// VE's condition move can handle MImm in True clause, so nothing to do.
2815	} else if (isMImm(V: False)) {
2816	// VE's condition move can handle MImm in True clause, so swap True and
2817	// False clauses if False has MImm value. And, update condition code.
2818	std::swap(a&: True, b&: False);
2819	CC = getSetCCInverse(Operation: CC, Type: LHSVT);
2820	}
2821
2822	SDLoc DL(N);
2823	SelectionDAG &DAG = DCI.DAG;
2824
2825	bool WithCMov = true;
2826	SDValue CompNode = generateComparison(VT: LHSVT, LHS, RHS, CC, WithCMov, DL, DAG);
2827
2828	VECC::CondCode VECCVal;
2829	if (LHSVT.isFloatingPoint()) {
2830	VECCVal = fpCondCode2Fcc(CC);
2831	} else {
2832	VECCVal = intCondCode2Icc(CC);
2833	}
2834	SDValue Ops[] = {CompNode, True, False,
2835	DAG.getConstant(Val: VECCVal, DL, VT: MVT::i32)};
2836	return DAG.getNode(Opcode: VEISD::CMOV, DL, VT, Ops);
2837	}
2838
2839	static bool isI32InsnAllUses(const SDNode User, const* SDNode *N);
2840	static bool isI32Insn(const SDNode User, const* SDNode *N) {
2841	switch (User->getOpcode()) {
2842	default:
2843	return false;
2844	case ISD::ADD:
2845	case ISD::SUB:
2846	case ISD::MUL:
2847	case ISD::SDIV:
2848	case ISD::UDIV:
2849	case ISD::SETCC:
2850	case ISD::SMIN:
2851	case ISD::SMAX:
2852	case ISD::SHL:
2853	case ISD::SRA:
2854	case ISD::BSWAP:
2855	case ISD::SINT_TO_FP:
2856	case ISD::UINT_TO_FP:
2857	case ISD::BR_CC:
2858	case ISD::BITCAST:
2859	case ISD::ATOMIC_CMP_SWAP:
2860	case ISD::ATOMIC_SWAP:
2861	case VEISD::CMPU:
2862	case VEISD::CMPI:
2863	return true;
2864	case ISD::SRL:
2865	if (N->getOperand(Num: `0`).getOpcode() != ISD::SRL)
2866	return true;
2867	// (srl (trunc (srl ...))) may be optimized by combining srl, so
2868	// doesn't optimize trunc now.
2869	return false;
2870	case ISD::SELECT_CC:
2871	if (User->getOperand(Num: `2`).getNode() != N &&
2872	User->getOperand(Num: `3`).getNode() != N)
2873	return true;
2874	return isI32InsnAllUses(User, N);
2875	case VEISD::CMOV:
2876	// CMOV in (cmov (trunc ...), true, false, int-comparison) is safe.
2877	// However, trunc in true or false clauses is not safe.
2878	if (User->getOperand(Num: `1`).getNode() != N &&
2879	User->getOperand(Num: `2`).getNode() != N &&
2880	isa<ConstantSDNode>(Val: User->getOperand(Num: `3`))) {
2881	VECC::CondCode VECCVal =
2882	static_cast<VECC::CondCode>(User->getConstantOperandVal(Num: `3`));
2883	return isIntVECondCode(CC: VECCVal);
2884	}
2885	[[fallthrough]];
2886	case ISD::AND:
2887	case ISD::OR:
2888	case ISD::XOR:
2889	case ISD::SELECT:
2890	case ISD::CopyToReg:
2891	// Check all use of selections, bit operations, and copies. If all of them
2892	// are safe, optimize truncate to extract_subreg.
2893	return isI32InsnAllUses(User, N);
2894	}
2895	}
2896
2897	static bool isI32InsnAllUses(const SDNode User, const* SDNode *N) {
2898	// Check all use of User node. If all of them are safe, optimize
2899	// truncate to extract_subreg.
2900	for (const SDNode *U : User->users()) {
2901	switch (U->getOpcode()) {
2902	default:
2903	// If the use is an instruction which treats the source operand as i32,
2904	// it is safe to avoid truncate here.
2905	if (isI32Insn(User: U, N))
2906	continue;
2907	break;
2908	case ISD::ANY_EXTEND:
2909	case ISD::SIGN_EXTEND:
2910	case ISD::ZERO_EXTEND: {
2911	// Special optimizations to the combination of ext and trunc.
2912	// (ext ... (select ... (trunc ...))) is safe to avoid truncate here
2913	// since this truncate instruction clears higher 32 bits which is filled
2914	// by one of ext instructions later.
2915	assert(N->getValueType(`0`) == MVT::i32 &&
2916	"find truncate to not i32 integer");
2917	if (User->getOpcode() == ISD::SELECT_CC \|\|
2918	User->getOpcode() == ISD::SELECT \|\| User->getOpcode() == VEISD::CMOV)
2919	continue;
2920	break;
2921	}
2922	}
2923	return false;
2924	}
2925	return true;
2926	}
2927
2928	// Optimize TRUNCATE in DAG combining. Optimizing it in CUSTOM lower is
2929	// sometime too early. Optimizing it in DAG pattern matching in VEInstrInfo.td
2930	// is sometime too late. So, doing it at here.
2931	SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
2932	DAGCombinerInfo &DCI) const {
2933	assert(N->getOpcode() == ISD::TRUNCATE &&
2934	"Should be called with a TRUNCATE node");
2935
2936	SelectionDAG &DAG = DCI.DAG;
2937	SDLoc DL(N);
2938	EVT VT = N->getValueType(ResNo: `0`);
2939
2940	// We prefer to do this when all types are legal.
2941	if (!DCI.isAfterLegalizeDAG())
2942	return SDValue ();
2943
2944	// Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
2945	if (N->getOperand(Num: `0`)->getOpcode() == ISD::SELECT_CC &&
2946	isa<ConstantSDNode>(Val: N->getOperand(Num: `0`)->getOperand(Num: `0`)) &&
2947	isa<ConstantSDNode>(Val: N->getOperand(Num: `0`)->getOperand(Num: `1`)))
2948	return SDValue ();
2949
2950	// Check all use of this TRUNCATE.
2951	for (const SDNode *User : N->users()) {
2952	// Make sure that we're not going to replace TRUNCATE for non i32
2953	// instructions.
2954	//
2955	// FIXME: Although we could sometimes handle this, and it does occur in
2956	// practice that one of the condition inputs to the select is also one of
2957	// the outputs, we currently can't deal with this.
2958	if (isI32Insn(User, N))
2959	continue;
2960
2961	return SDValue ();
2962	}
2963
2964	SDValue SubI32 = DAG.getTargetConstant(Val: VE::sub_i32, DL, VT: MVT::i32);
2965	return SDValue (DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT,
2966	Op1: N->getOperand(Num: `0`), Op2: SubI32),
2967	`0`);
2968	}
2969
2970	SDValue VETargetLowering::PerformDAGCombine(SDNode *N,
2971	DAGCombinerInfo &DCI) const {
2972	switch (N->getOpcode()) {
2973	default:
2974	break;
2975	case ISD::SELECT:
2976	return combineSelect(N, DCI);
2977	case ISD::SELECT_CC:
2978	return combineSelectCC(N, DCI);
2979	case ISD::TRUNCATE:
2980	return combineTRUNCATE(N, DCI);
2981	}
2982
2983	return SDValue ();
2984	}
2985
2986	//===----------------------------------------------------------------------===//
2987	// VE Inline Assembly Support
2988	//===----------------------------------------------------------------------===//
2989
2990	VETargetLowering::ConstraintType
2991	VETargetLowering::getConstraintType(StringRef Constraint) const {
2992	if (Constraint.size() == `1`) {
2993	switch (Constraint [`0`]) {
2994	default:
2995	break;
2996	case `'v'`: // vector registers
2997	return C_RegisterClass;
2998	}
2999	}
3000	return TargetLowering::getConstraintType(Constraint);
3001	}
3002
3003	std::pair<unsigned, const TargetRegisterClass *>
3004	VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
3005	StringRef Constraint,
3006	MVT VT) const {
3007	const TargetRegisterClass RC = nullptr*;
3008	if (Constraint.size() == `1`) {
3009	switch (Constraint [`0`]) {
3010	default:
3011	return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3012	case `'r'`:
3013	RC = &VE::I64RegClass;
3014	break;
3015	case `'v'`:
3016	RC = &VE::V64RegClass;
3017	break;
3018	}
3019	return std::make_pair(x: `0U`, y&: RC);
3020	}
3021
3022	return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3023	}
3024
3025	//===----------------------------------------------------------------------===//
3026	// VE Target Optimization Support
3027	//===----------------------------------------------------------------------===//
3028
3029	unsigned VETargetLowering::getMinimumJumpTableEntries() const {
3030	// Specify 8 for PIC model to relieve the impact of PIC load instructions.
3031	if (isJumpTableRelative())
3032	return `8`;
3033
3034	return TargetLowering::getMinimumJumpTableEntries();
3035	}
3036
3037	bool VETargetLowering::hasAndNot(SDValue Y) const {
3038	EVT VT = Y.getValueType();
3039
3040	// VE doesn't have vector and not instruction.
3041	if (VT.isVector())
3042	return false;
3043
3044	// VE allows different immediate values for X and Y where ~X & Y.
3045	// Only simm7 works for X, and only mimm works for Y on VE. However, this
3046	// function is used to check whether an immediate value is OK for and-not
3047	// instruction as both X and Y. Generating additional instruction to
3048	// retrieve an immediate value is no good since the purpose of this
3049	// function is to convert a series of 3 instructions to another series of
3050	// 3 instructions with better parallelism. Therefore, we return false
3051	// for all immediate values now.
3052	// FIXME: Change hasAndNot function to have two operands to make it work
3053	// correctly with Aurora VE.
3054	if (isa<ConstantSDNode>(Val: Y))
3055	return false;
3056
3057	// It's ok for generic registers.
3058	return true;
3059	}
3060
3061	SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3062	SelectionDAG &DAG) const {
3063	assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
3064	MVT VT = Op.getOperand(i: `0`).getSimpleValueType();
3065
3066	// Special treatment for packed V64 types.
3067	assert(VT == MVT::v512i32 \|\| VT == MVT::v512f32);
3068	(void)VT;
3069	// Example of codes:
3070	// %packed_v = extractelt %vr, %idx / 2
3071	// %v = %packed_v >> (%idx % 2 32)*
3072	// %res = %v & 0xffffffff
3073
3074	SDValue Vec = Op.getOperand(i: `0`);
3075	SDValue Idx = Op.getOperand(i: `1`);
3076	SDLoc DL(Op);
3077	SDValue Result = Op;
3078	if (false / Idx->isConstant() /) {
3079	// TODO: optimized implementation using constant values
3080	} else {
3081	SDValue Const1 = DAG.getConstant(Val: `1`, DL, VT: MVT::i64);
3082	SDValue HalfIdx = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {Idx, Const1});
3083	SDValue PackedElt =
3084	SDValue (DAG.getMachineNode(Opcode: VE::LVSvr, dl: DL, VT: MVT::i64, Ops: {Vec, HalfIdx}), `0`);
3085	SDValue AndIdx = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {Idx, Const1});
3086	SDValue Shift = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i64, Ops: {AndIdx, Const1});
3087	SDValue Const5 = DAG.getConstant(Val: `5`, DL, VT: MVT::i64);
3088	Shift = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, Ops: {Shift, Const5});
3089	PackedElt = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {PackedElt, Shift});
3090	SDValue Mask = DAG.getConstant(Val: `0xFFFFFFFFL`, DL, VT: MVT::i64);
3091	PackedElt = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {PackedElt, Mask});
3092	SDValue SubI32 = DAG.getTargetConstant(Val: VE::sub_i32, DL, VT: MVT::i32);
3093	Result = SDValue (DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL,
3094	VT: MVT::i32, Op1: PackedElt, Op2: SubI32),
3095	`0`);
3096
3097	if (Op.getSimpleValueType() == MVT::f32) {
3098	Result = DAG.getBitcast(VT: MVT::f32, V: Result);
3099	} else {
3100	assert(Op.getSimpleValueType() == MVT::i32);
3101	}
3102	}
3103	return Result;
3104	}
3105
3106	SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3107	SelectionDAG &DAG) const {
3108	assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
3109	MVT VT = Op.getOperand(i: `0`).getSimpleValueType();
3110
3111	// Special treatment for packed V64 types.
3112	assert(VT == MVT::v512i32 \|\| VT == MVT::v512f32);
3113	(void)VT;
3114	// The v512i32 and v512f32 starts from upper bits (0..31). This "upper
3115	// bits" required `val << 32` from C implementation's point of view.
3116	//
3117	// Example of codes:
3118	// %packed_elt = extractelt %vr, (%idx >> 1)
3119	// %shift = ((%idx & 1) ^ 1) << 5
3120	// %packed_elt &= 0xffffffff00000000 >> shift
3121	// %packed_elt \|= (zext %val) << shift
3122	// %vr = insertelt %vr, %packed_elt, (%idx >> 1)
3123
3124	SDLoc DL(Op);
3125	SDValue Vec = Op.getOperand(i: `0`);
3126	SDValue Val = Op.getOperand(i: `1`);
3127	SDValue Idx = Op.getOperand(i: `2`);
3128	if (Idx.getSimpleValueType() == MVT::i32)
3129	Idx = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: Idx);
3130	if (Val.getSimpleValueType() == MVT::f32)
3131	Val = DAG.getBitcast(VT: MVT::i32, V: Val);
3132	assert(Val.getSimpleValueType() == MVT::i32);
3133	Val = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: Val);
3134
3135	SDValue Result = Op;
3136	if (false / Idx->isConstant()/) {
3137	// TODO: optimized implementation using constant values
3138	} else {
3139	SDValue Const1 = DAG.getConstant(Val: `1`, DL, VT: MVT::i64);
3140	SDValue HalfIdx = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {Idx, Const1});
3141	SDValue PackedElt =
3142	SDValue (DAG.getMachineNode(Opcode: VE::LVSvr, dl: DL, VT: MVT::i64, Ops: {Vec, HalfIdx}), `0`);
3143	SDValue AndIdx = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {Idx, Const1});
3144	SDValue Shift = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i64, Ops: {AndIdx, Const1});
3145	SDValue Const5 = DAG.getConstant(Val: `5`, DL, VT: MVT::i64);
3146	Shift = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, Ops: {Shift, Const5});
3147	SDValue Mask = DAG.getConstant(Val: `0xFFFFFFFF00000000L`, DL, VT: MVT::i64);
3148	Mask = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {Mask, Shift});
3149	PackedElt = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {PackedElt, Mask});
3150	Val = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, Ops: {Val, Shift});
3151	PackedElt = DAG.getNode(Opcode: ISD::OR, DL, VT: MVT::i64, Ops: {PackedElt, Val});
3152	Result =
3153	SDValue (DAG.getMachineNode(Opcode: VE::LSVrr_v, dl: DL, VT: Vec.getSimpleValueType(),
3154	Ops: {HalfIdx, PackedElt, Vec}),
3155	`0`);
3156	}
3157	return Result;
3158	}
3159

Browse the source code of llvm_projects/llvm/lib/Target/VE/VEISelLowering.cpp