VEISelLowering.cpp source code [llvm_projects/llvm/lib/Target/VE/VEISelLowering.cpp]

1	//===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the interfaces that VE uses to lower LLVM code into a
10	// selection DAG.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "VEISelLowering.h"
15	#include "MCTargetDesc/VEMCAsmInfo.h"
16	#include "VECustomDAG.h"
17	#include "VEInstrBuilder.h"
18	#include "VEMachineFunctionInfo.h"
19	#include "VERegisterInfo.h"
20	#include "VETargetMachine.h"
21	#include "llvm/ADT/StringSwitch.h"
22	#include "llvm/CodeGen/CallingConvLower.h"
23	#include "llvm/CodeGen/MachineFrameInfo.h"
24	#include "llvm/CodeGen/MachineFunction.h"
25	#include "llvm/CodeGen/MachineInstrBuilder.h"
26	#include "llvm/CodeGen/MachineJumpTableInfo.h"
27	#include "llvm/CodeGen/MachineModuleInfo.h"
28	#include "llvm/CodeGen/MachineRegisterInfo.h"
29	#include "llvm/CodeGen/SelectionDAG.h"
30	#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
31	#include "llvm/IR/DerivedTypes.h"
32	#include "llvm/IR/Function.h"
33	#include "llvm/IR/IRBuilder.h"
34	#include "llvm/IR/Module.h"
35	#include "llvm/Support/ErrorHandling.h"
36	using namespace llvm;
37
38	#define DEBUG_TYPE "ve-lower"
39
40	//===----------------------------------------------------------------------===//
41	// Calling Convention Implementation
42	//===----------------------------------------------------------------------===//
43
44	#include "VEGenCallingConv.inc"
45
46	CCAssignFn *getReturnCC(CallingConv::ID CallConv) {
47	switch (CallConv) {
48	default:
49	return RetCC_VE_C;
50	case CallingConv::Fast:
51	return RetCC_VE_Fast;
52	}
53	}
54
55	CCAssignFn getParamCC(CallingConv::ID CallConv, bool* IsVarArg) {
56	if (IsVarArg)
57	return CC_VE2;
58	switch (CallConv) {
59	default:
60	return CC_VE_C;
61	case CallingConv::Fast:
62	return CC_VE_Fast;
63	}
64	}
65
66	bool VETargetLowering::CanLowerReturn(
67	CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
68	const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
69	const Type RetTy) const* {
70	CCAssignFn *RetCC = getReturnCC(CallConv);
71	SmallVector<CCValAssign, `16`> RVLocs;
72	CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
73	return CCInfo.CheckReturn(Outs, Fn: RetCC);
74	}
75
76	static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
77	MVT::v256f32, MVT::v512f32, MVT::v256f64};
78
79	static const MVT AllMaskVTs[] = {MVT::v256i1, MVT::v512i1};
80
81	static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
82
83	void VETargetLowering::initRegisterClasses() {
84	// Set up the register classes.
85	addRegisterClass(VT: MVT::i32, RC: &VE::I32RegClass);
86	addRegisterClass(VT: MVT::i64, RC: &VE::I64RegClass);
87	addRegisterClass(VT: MVT::f32, RC: &VE::F32RegClass);
88	addRegisterClass(VT: MVT::f64, RC: &VE::I64RegClass);
89	addRegisterClass(VT: MVT::f128, RC: &VE::F128RegClass);
90
91	if (Subtarget->enableVPU()) {
92	for (MVT VecVT : AllVectorVTs)
93	addRegisterClass(VT: VecVT, RC: &VE::V64RegClass);
94	addRegisterClass(VT: MVT::v256i1, RC: &VE::VMRegClass);
95	addRegisterClass(VT: MVT::v512i1, RC: &VE::VM512RegClass);
96	}
97	}
98
99	void VETargetLowering::initSPUActions() {
100	const auto &TM = getTargetMachine();
101	/// Load & Store {
102
103	// VE doesn't have i1 sign extending load.
104	for (MVT VT : MVT::integer_valuetypes()) {
105	setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
106	setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
107	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
108	setTruncStoreAction(ValVT: VT, MemVT: MVT::i1, Action: Expand);
109	}
110
111	// VE doesn't have floating point extload/truncstore, so expand them.
112	for (MVT FPVT : MVT::fp_valuetypes()) {
113	for (MVT OtherFPVT : MVT::fp_valuetypes()) {
114	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: FPVT, MemVT: OtherFPVT, Action: Expand);
115	setTruncStoreAction(ValVT: FPVT, MemVT: OtherFPVT, Action: Expand);
116	}
117	}
118
119	// VE doesn't have fp128 load/store, so expand them in custom lower.
120	setOperationAction(Op: ISD::LOAD, VT: MVT::f128, Action: Custom);
121	setOperationAction(Op: ISD::STORE, VT: MVT::f128, Action: Custom);
122
123	/// } Load & Store
124
125	// Custom legalize address nodes into LO/HI parts.
126	MVT PtrVT = MVT::getIntegerVT(BitWidth: TM.getPointerSizeInBits(AS: `0`));
127	setOperationAction(Op: ISD::BlockAddress, VT: PtrVT, Action: Custom);
128	setOperationAction(Op: ISD::GlobalAddress, VT: PtrVT, Action: Custom);
129	setOperationAction(Op: ISD::GlobalTLSAddress, VT: PtrVT, Action: Custom);
130	setOperationAction(Op: ISD::ConstantPool, VT: PtrVT, Action: Custom);
131	setOperationAction(Op: ISD::JumpTable, VT: PtrVT, Action: Custom);
132
133	/// VAARG handling {
134	setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom);
135	// VAARG needs to be lowered to access with 8 bytes alignment.
136	setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Custom);
137	// Use the default implementation.
138	setOperationAction(Op: ISD::VACOPY, VT: MVT::Other, Action: Expand);
139	setOperationAction(Op: ISD::VAEND, VT: MVT::Other, Action: Expand);
140	/// } VAARG handling
141
142	/// Stack {
143	setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i32, Action: Custom);
144	setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i64, Action: Custom);
145
146	// Use the default implementation.
147	setOperationAction(Op: ISD::STACKSAVE, VT: MVT::Other, Action: Expand);
148	setOperationAction(Op: ISD::STACKRESTORE, VT: MVT::Other, Action: Expand);
149	/// } Stack
150
151	/// Branch {
152
153	// VE doesn't have BRCOND
154	setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Expand);
155
156	// BR_JT is not implemented yet.
157	setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
158
159	/// } Branch
160
161	/// Int Ops {
162	for (MVT IntVT : {MVT::i32, MVT::i64}) {
163	// VE has no REM or DIVREM operations.
164	setOperationAction(Op: ISD::UREM, VT: IntVT, Action: Expand);
165	setOperationAction(Op: ISD::SREM, VT: IntVT, Action: Expand);
166	setOperationAction(Op: ISD::SDIVREM, VT: IntVT, Action: Expand);
167	setOperationAction(Op: ISD::UDIVREM, VT: IntVT, Action: Expand);
168
169	// VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
170	setOperationAction(Op: ISD::SHL_PARTS, VT: IntVT, Action: Expand);
171	setOperationAction(Op: ISD::SRA_PARTS, VT: IntVT, Action: Expand);
172	setOperationAction(Op: ISD::SRL_PARTS, VT: IntVT, Action: Expand);
173
174	// VE has no MULHU/S or U/SMUL_LOHI operations.
175	// TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
176	setOperationAction(Op: ISD::MULHU, VT: IntVT, Action: Expand);
177	setOperationAction(Op: ISD::MULHS, VT: IntVT, Action: Expand);
178	setOperationAction(Op: ISD::UMUL_LOHI, VT: IntVT, Action: Expand);
179	setOperationAction(Op: ISD::SMUL_LOHI, VT: IntVT, Action: Expand);
180
181	// VE has no CTTZ, ROTL, ROTR operations.
182	setOperationAction(Op: ISD::CTTZ, VT: IntVT, Action: Expand);
183	setOperationAction(Op: ISD::ROTL, VT: IntVT, Action: Expand);
184	setOperationAction(Op: ISD::ROTR, VT: IntVT, Action: Expand);
185
186	// VE has 64 bits instruction which works as i64 BSWAP operation. This
187	// instruction works fine as i32 BSWAP operation with an additional
188	// parameter. Use isel patterns to lower BSWAP.
189	setOperationAction(Op: ISD::BSWAP, VT: IntVT, Action: Legal);
190
191	// VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
192	// operations. Use isel patterns for i64, promote for i32.
193	LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
194	setOperationAction(Op: ISD::BITREVERSE, VT: IntVT, Action: Act);
195	setOperationAction(Op: ISD::CTLZ, VT: IntVT, Action: Act);
196	setOperationAction(Op: ISD::CTLZ_ZERO_UNDEF, VT: IntVT, Action: Act);
197	setOperationAction(Op: ISD::CTPOP, VT: IntVT, Action: Act);
198
199	// VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
200	// Use isel patterns for i64, promote for i32.
201	setOperationAction(Op: ISD::AND, VT: IntVT, Action: Act);
202	setOperationAction(Op: ISD::OR, VT: IntVT, Action: Act);
203	setOperationAction(Op: ISD::XOR, VT: IntVT, Action: Act);
204
205	// Legal smax and smin
206	setOperationAction(Op: ISD::SMAX, VT: IntVT, Action: Legal);
207	setOperationAction(Op: ISD::SMIN, VT: IntVT, Action: Legal);
208	}
209	/// } Int Ops
210
211	/// Conversion {
212	// VE doesn't have instructions for fp<->uint, so expand them by llvm
213	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Promote); // use i64
214	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i32, Action: Promote); // use i64
215	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i64, Action: Expand);
216	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Expand);
217
218	// fp16 not supported
219	for (MVT FPVT : MVT::fp_valuetypes()) {
220	setOperationAction(Op: ISD::FP16_TO_FP, VT: FPVT, Action: Expand);
221	setOperationAction(Op: ISD::FP_TO_FP16, VT: FPVT, Action: Expand);
222	}
223	/// } Conversion
224
225	/// Floating-point Ops {
226	/// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
227	/// and fcmp.
228
229	// VE doesn't have following floating point operations.
230	for (MVT VT : MVT::fp_valuetypes()) {
231	setOperationAction(Op: ISD::FNEG, VT, Action: Expand);
232	setOperationAction(Op: ISD::FREM, VT, Action: Expand);
233	}
234
235	// VE doesn't have fdiv of f128.
236	setOperationAction(Op: ISD::FDIV, VT: MVT::f128, Action: Expand);
237
238	for (MVT FPVT : {MVT::f32, MVT::f64}) {
239	// f32 and f64 uses ConstantFP. f128 uses ConstantPool.
240	setOperationAction(Op: ISD::ConstantFP, VT: FPVT, Action: Legal);
241	}
242	/// } Floating-point Ops
243
244	/// Floating-point math functions {
245
246	// VE doesn't have following floating point math functions.
247	for (MVT VT : MVT::fp_valuetypes()) {
248	setOperationAction(Op: ISD::FABS, VT, Action: Expand);
249	setOperationAction(Op: ISD::FCOPYSIGN, VT, Action: Expand);
250	setOperationAction(Op: ISD::FCOS, VT, Action: Expand);
251	setOperationAction(Op: ISD::FMA, VT, Action: Expand);
252	setOperationAction(Op: ISD::FPOW, VT, Action: Expand);
253	setOperationAction(Op: ISD::FSIN, VT, Action: Expand);
254	setOperationAction(Op: ISD::FSQRT, VT, Action: Expand);
255	}
256
257	// VE has single and double FMINNUM and FMAXNUM
258	for (MVT VT : {MVT::f32, MVT::f64}) {
259	setOperationAction(Ops: {ISD::FMAXNUM, ISD::FMINNUM}, VT, Action: Legal);
260	}
261
262	/// } Floating-point math functions
263
264	/// Atomic instructions {
265
266	setMaxAtomicSizeInBitsSupported(`64`);
267	setMinCmpXchgSizeInBits(`32`);
268	setSupportsUnalignedAtomics(false);
269
270	// Use custom inserter for ATOMIC_FENCE.
271	setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom);
272
273	// Other atomic instructions.
274	for (MVT VT : MVT::integer_valuetypes()) {
275	// Support i8/i16 atomic swap.
276	setOperationAction(Op: ISD::ATOMIC_SWAP, VT, Action: Custom);
277
278	// FIXME: Support "atmam" instructions.
279	setOperationAction(Op: ISD::ATOMIC_LOAD_ADD, VT, Action: Expand);
280	setOperationAction(Op: ISD::ATOMIC_LOAD_SUB, VT, Action: Expand);
281	setOperationAction(Op: ISD::ATOMIC_LOAD_AND, VT, Action: Expand);
282	setOperationAction(Op: ISD::ATOMIC_LOAD_OR, VT, Action: Expand);
283
284	// VE doesn't have follwing instructions.
285	setOperationAction(Op: ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Action: Expand);
286	setOperationAction(Op: ISD::ATOMIC_LOAD_CLR, VT, Action: Expand);
287	setOperationAction(Op: ISD::ATOMIC_LOAD_XOR, VT, Action: Expand);
288	setOperationAction(Op: ISD::ATOMIC_LOAD_NAND, VT, Action: Expand);
289	setOperationAction(Op: ISD::ATOMIC_LOAD_MIN, VT, Action: Expand);
290	setOperationAction(Op: ISD::ATOMIC_LOAD_MAX, VT, Action: Expand);
291	setOperationAction(Op: ISD::ATOMIC_LOAD_UMIN, VT, Action: Expand);
292	setOperationAction(Op: ISD::ATOMIC_LOAD_UMAX, VT, Action: Expand);
293	}
294
295	/// } Atomic instructions
296
297	/// SJLJ instructions {
298	setOperationAction(Op: ISD::EH_SJLJ_LONGJMP, VT: MVT::Other, Action: Custom);
299	setOperationAction(Op: ISD::EH_SJLJ_SETJMP, VT: MVT::i32, Action: Custom);
300	setOperationAction(Op: ISD::EH_SJLJ_SETUP_DISPATCH, VT: MVT::Other, Action: Custom);
301	/// } SJLJ instructions
302
303	// Intrinsic instructions
304	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
305	}
306
307	void VETargetLowering::initVPUActions() {
308	for (MVT LegalMaskVT : AllMaskVTs)
309	setOperationAction(Op: ISD::BUILD_VECTOR, VT: LegalMaskVT, Action: Custom);
310
311	for (unsigned Opc : {ISD::AND, ISD::OR, ISD::XOR})
312	setOperationAction(Op: Opc, VT: MVT::v512i1, Action: Custom);
313
314	for (MVT LegalVecVT : AllVectorVTs) {
315	setOperationAction(Op: ISD::BUILD_VECTOR, VT: LegalVecVT, Action: Custom);
316	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: LegalVecVT, Action: Legal);
317	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: LegalVecVT, Action: Legal);
318	// Translate all vector instructions with legal element types to VVP_*
319	// nodes.
320	// TODO We will custom-widen into VVP_ nodes in the future. While we are*
321	// buildling the infrastructure for this, we only do this for legal vector
322	// VTs.
323	#define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME) \
324	setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
325	#define ADD_VVP_OP(VVP_NAME, ISD_NAME) \
326	setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
327	setOperationAction(Op: ISD::EXPERIMENTAL_VP_STRIDED_LOAD, VT: LegalVecVT, Action: Custom);
328	setOperationAction(Op: ISD::EXPERIMENTAL_VP_STRIDED_STORE, VT: LegalVecVT, Action: Custom);
329	#include "VVPNodes.def"
330	}
331
332	for (MVT LegalPackedVT : AllPackedVTs) {
333	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: LegalPackedVT, Action: Custom);
334	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: LegalPackedVT, Action: Custom);
335	}
336
337	// vNt32, vNt64 ops (legal element types)
338	for (MVT VT : MVT::vector_valuetypes()) {
339	MVT ElemVT = VT.getVectorElementType();
340	unsigned ElemBits = ElemVT.getScalarSizeInBits();
341	if (ElemBits != `32` && ElemBits != `64`)
342	continue;
343
344	for (unsigned MemOpc : {ISD::MLOAD, ISD::MSTORE, ISD::LOAD, ISD::STORE})
345	setOperationAction(Op: MemOpc, VT, Action: Custom);
346
347	const ISD::NodeType IntReductionOCs[] = {
348	ISD::VECREDUCE_ADD, ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND,
349	ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMIN,
350	ISD::VECREDUCE_SMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_UMAX};
351
352	for (unsigned IntRedOpc : IntReductionOCs)
353	setOperationAction(Op: IntRedOpc, VT, Action: Custom);
354	}
355
356	// v256i1 and v512i1 ops
357	for (MVT MaskVT : AllMaskVTs) {
358	// Custom lower mask ops
359	setOperationAction(Op: ISD::STORE, VT: MaskVT, Action: Custom);
360	setOperationAction(Op: ISD::LOAD, VT: MaskVT, Action: Custom);
361	}
362	}
363
364	SDValue
365	VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
366	bool IsVarArg,
367	const SmallVectorImpl<ISD::OutputArg> &Outs,
368	const SmallVectorImpl<SDValue> &OutVals,
369	const SDLoc &DL, SelectionDAG &DAG) const {
370	// CCValAssign - represent the assignment of the return value to locations.
371	SmallVector<CCValAssign, `16`> RVLocs;
372
373	// CCState - Info about the registers and stack slot.
374	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
375	*DAG.getContext());
376
377	// Analyze return values.
378	CCInfo.AnalyzeReturn(Outs, Fn: getReturnCC(CallConv));
379
380	SDValue Glue;
381	SmallVector<SDValue, `4`> RetOps(`1`, Chain);
382
383	// Copy the result values into the output registers.
384	for (unsigned i = `0`; i != RVLocs.size(); ++i) {
385	CCValAssign &VA = RVLocs [i];
386	assert(VA.isRegLoc() && "Can only return in registers!");
387	assert(!VA.needsCustom() && "Unexpected custom lowering");
388	SDValue OutVal = OutVals [i];
389
390	// Integer return values must be sign or zero extended by the callee.
391	switch (VA.getLocInfo()) {
392	case CCValAssign::Full:
393	break;
394	case CCValAssign::SExt:
395	OutVal = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: VA.getLocVT(), Operand: OutVal);
396	break;
397	case CCValAssign::ZExt:
398	OutVal = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: VA.getLocVT(), Operand: OutVal);
399	break;
400	case CCValAssign::AExt:
401	OutVal = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: VA.getLocVT(), Operand: OutVal);
402	break;
403	case CCValAssign::BCvt: {
404	// Convert a float return value to i64 with padding.
405	// 63 31 0
406	// +------+------+
407	// \| float\| 0 \|
408	// +------+------+
409	assert(VA.getLocVT() == MVT::i64);
410	assert(VA.getValVT() == MVT::f32);
411	SDValue Undef = SDValue (
412	DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::i64), `0`);
413	SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
414	OutVal = SDValue (DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL,
415	VT: MVT::i64, Op1: Undef, Op2: OutVal, Op3: Sub_f32),
416	`0`);
417	break;
418	}
419	default:
420	llvm_unreachable("Unknown loc info!");
421	}
422
423	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: OutVal, Glue);
424
425	// Guarantee that all emitted copies are stuck together with flags.
426	Glue = Chain.getValue(R: `1`);
427	RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
428	}
429
430	RetOps [`0`] = Chain; // Update chain.
431
432	// Add the glue if we have it.
433	if (Glue.getNode())
434	RetOps.push_back(Elt: Glue);
435
436	return DAG.getNode(Opcode: VEISD::RET_GLUE, DL, VT: MVT::Other, Ops: RetOps);
437	}
438
439	SDValue VETargetLowering::LowerFormalArguments(
440	SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
441	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
442	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
443	MachineFunction &MF = DAG.getMachineFunction();
444
445	// Get the base offset of the incoming arguments stack space.
446	unsigned ArgsBaseOffset = Subtarget->getRsaSize();
447	// Get the size of the preserved arguments area
448	unsigned ArgsPreserved = `64`;
449
450	// Analyze arguments according to CC_VE.
451	SmallVector<CCValAssign, `16`> ArgLocs;
452	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
453	*DAG.getContext());
454	// Allocate the preserved area first.
455	CCInfo.AllocateStack(Size: ArgsPreserved, Alignment: Align (`8`));
456	// We already allocated the preserved area, so the stack offset computed
457	// by CC_VE would be correct now.
458	CCInfo.AnalyzeFormalArguments(Ins, Fn: getParamCC(CallConv, IsVarArg: false));
459
460	for (const CCValAssign &VA : ArgLocs) {
461	assert(!VA.needsCustom() && "Unexpected custom lowering");
462	if (VA.isRegLoc()) {
463	// This argument is passed in a register.
464	// All integer register arguments are promoted by the caller to i64.
465
466	// Create a virtual register for the promoted live-in value.
467	Register VReg =
468	MF.addLiveIn(PReg: VA.getLocReg(), RC: getRegClassFor(VT: VA.getLocVT()));
469	SDValue Arg = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: VA.getLocVT());
470
471	// The caller promoted the argument, so insert an Assert?ext SDNode so we
472	// won't promote the value again in this function.
473	switch (VA.getLocInfo()) {
474	case CCValAssign::SExt:
475	Arg = DAG.getNode(Opcode: ISD::AssertSext, DL, VT: VA.getLocVT(), N1: Arg,
476	N2: DAG.getValueType(VA.getValVT()));
477	break;
478	case CCValAssign::ZExt:
479	Arg = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: VA.getLocVT(), N1: Arg,
480	N2: DAG.getValueType(VA.getValVT()));
481	break;
482	case CCValAssign::BCvt: {
483	// Extract a float argument from i64 with padding.
484	// 63 31 0
485	// +------+------+
486	// \| float\| 0 \|
487	// +------+------+
488	assert(VA.getLocVT() == MVT::i64);
489	assert(VA.getValVT() == MVT::f32);
490	SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
491	Arg = SDValue (DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL,
492	VT: MVT::f32, Op1: Arg, Op2: Sub_f32),
493	`0`);
494	break;
495	}
496	default:
497	break;
498	}
499
500	// Truncate the register down to the argument type.
501	if (VA.isExtInLoc())
502	Arg = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VA.getValVT(), Operand: Arg);
503
504	InVals.push_back(Elt: Arg);
505	continue;
506	}
507
508	// The registers are exhausted. This argument was passed on the stack.
509	assert(VA.isMemLoc());
510	// The CC_VE_Full/Half functions compute stack offsets relative to the
511	// beginning of the arguments area at %fp + the size of reserved area.
512	unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
513	unsigned ValSize = VA.getValVT().getSizeInBits() / `8`;
514
515	// Adjust offset for a float argument by adding 4 since the argument is
516	// stored in 8 bytes buffer with offset like below. LLVM generates
517	// 4 bytes load instruction, so need to adjust offset here. This
518	// adjustment is required in only LowerFormalArguments. In LowerCall,
519	// a float argument is converted to i64 first, and stored as 8 bytes
520	// data, which is required by ABI, so no need for adjustment.
521	// 0 4
522	// +------+------+
523	// \| empty\| float\|
524	// +------+------+
525	if (VA.getValVT() == MVT::f32)
526	Offset += `4`;
527
528	int FI = MF.getFrameInfo().CreateFixedObject(Size: ValSize, SPOffset: Offset, IsImmutable: true);
529	InVals.push_back(
530	Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain,
531	Ptr: DAG.getFrameIndex(FI, VT: getPointerTy(DL: MF.getDataLayout())),
532	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
533	}
534
535	if (!IsVarArg)
536	return Chain;
537
538	// This function takes variable arguments, some of which may have been passed
539	// in registers %s0-%s8.
540	//
541	// The va_start intrinsic needs to know the offset to the first variable
542	// argument.
543	// TODO: need to calculate offset correctly once we support f128.
544	unsigned ArgOffset = ArgLocs.size() * `8`;
545	VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
546	// Skip the reserved area at the top of stack.
547	FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
548
549	return Chain;
550	}
551
552	// FIXME? Maybe this could be a TableGen attribute on some registers and
553	// this table could be generated automatically from RegInfo.
554	Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
555	const MachineFunction &MF) const {
556	Register Reg = StringSwitch<Register>(RegName)
557	.Case(S: "sp", Value: VE::SX11) // Stack pointer
558	.Case(S: "fp", Value: VE::SX9) // Frame pointer
559	.Case(S: "sl", Value: VE::SX8) // Stack limit
560	.Case(S: "lr", Value: VE::SX10) // Link register
561	.Case(S: "tp", Value: VE::SX14) // Thread pointer
562	.Case(S: "outer", Value: VE::SX12) // Outer regiser
563	.Case(S: "info", Value: VE::SX17) // Info area register
564	.Case(S: "got", Value: VE::SX15) // Global offset table register
565	.Case(S: "plt", Value: VE::SX16) // Procedure linkage table register
566	.Default(Value: Register ());
567	return Reg;
568	}
569
570	//===----------------------------------------------------------------------===//
571	// TargetLowering Implementation
572	//===----------------------------------------------------------------------===//
573
574	SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
575	SmallVectorImpl<SDValue> &InVals) const {
576	SelectionDAG &DAG = CLI.DAG;
577	SDLoc DL = CLI.DL;
578	SDValue Chain = CLI.Chain;
579	auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
580
581	// VE target does not yet support tail call optimization.
582	CLI.IsTailCall = false;
583
584	// Get the base offset of the outgoing arguments stack space.
585	unsigned ArgsBaseOffset = Subtarget->getRsaSize();
586	// Get the size of the preserved arguments area
587	unsigned ArgsPreserved = `8` * `8u`;
588
589	// Analyze operands of the call, assigning locations to each operand.
590	SmallVector<CCValAssign, `16`> ArgLocs;
591	CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
592	*DAG.getContext());
593	// Allocate the preserved area first.
594	CCInfo.AllocateStack(Size: ArgsPreserved, Alignment: Align (`8`));
595	// We already allocated the preserved area, so the stack offset computed
596	// by CC_VE would be correct now.
597	CCInfo.AnalyzeCallOperands(Outs: CLI.Outs, Fn: getParamCC(CallConv: CLI.CallConv, IsVarArg: false));
598
599	// VE requires to use both register and stack for varargs or no-prototyped
600	// functions.
601	bool UseBoth = CLI.IsVarArg;
602
603	// Analyze operands again if it is required to store BOTH.
604	SmallVector<CCValAssign, `16`> ArgLocs2;
605	CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
606	ArgLocs2, *DAG.getContext());
607	if (UseBoth)
608	CCInfo2.AnalyzeCallOperands(Outs: CLI.Outs, Fn: getParamCC(CallConv: CLI.CallConv, IsVarArg: true));
609
610	// Get the size of the outgoing arguments stack space requirement.
611	unsigned ArgsSize = CCInfo.getStackSize();
612
613	// Keep stack frames 16-byte aligned.
614	ArgsSize = alignTo(Value: ArgsSize, Align: `16`);
615
616	// Adjust the stack pointer to make room for the arguments.
617	// FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
618	// with more than 6 arguments.
619	Chain = DAG.getCALLSEQ_START(Chain, InSize: ArgsSize, OutSize: `0`, DL);
620
621	// Collect the set of registers to pass to the function and their values.
622	// This will be emitted as a sequence of CopyToReg nodes glued to the call
623	// instruction.
624	SmallVector<std::pair<unsigned, SDValue>, `8`> RegsToPass;
625
626	// Collect chains from all the memory opeations that copy arguments to the
627	// stack. They must follow the stack pointer adjustment above and precede the
628	// call instruction itself.
629	SmallVector<SDValue, `8`> MemOpChains;
630
631	// VE needs to get address of callee function in a register
632	// So, prepare to copy it to SX12 here.
633
634	// If the callee is a GlobalAddress node (quite common, every direct call is)
635	// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
636	// Likewise ExternalSymbol -> TargetExternalSymbol.
637	SDValue Callee = CLI.Callee;
638
639	bool IsPICCall = isPositionIndependent();
640
641	// PC-relative references to external symbols should go through $stub.
642	// If so, we need to prepare GlobalBaseReg first.
643	const TargetMachine &TM = DAG.getTarget();
644	const GlobalValue GV = nullptr*;
645	auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Val&: Callee);
646	if (CalleeG)
647	GV = CalleeG->getGlobal();
648	bool Local = TM.shouldAssumeDSOLocal(GV);
649	bool UsePlt = !Local;
650	MachineFunction &MF = DAG.getMachineFunction();
651
652	// Turn GlobalAddress/ExternalSymbol node into a value node
653	// containing the address of them here.
654	if (CalleeG) {
655	if (IsPICCall) {
656	if (UsePlt)
657	Subtarget->getInstrInfo()->getGlobalBaseReg(MF: &MF);
658	Callee = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: `0`, TargetFlags: `0`);
659	Callee = DAG.getNode(Opcode: VEISD::GETFUNPLT, DL, VT: PtrVT, Operand: Callee);
660	} else {
661	Callee = makeHiLoPair(Op: Callee, HiTF: VE::S_HI32, LoTF: VE::S_LO32, DAG);
662	}
663	} else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
664	if (IsPICCall) {
665	if (UsePlt)
666	Subtarget->getInstrInfo()->getGlobalBaseReg(MF: &MF);
667	Callee = DAG.getTargetExternalSymbol(Sym: E->getSymbol(), VT: PtrVT, TargetFlags: `0`);
668	Callee = DAG.getNode(Opcode: VEISD::GETFUNPLT, DL, VT: PtrVT, Operand: Callee);
669	} else {
670	Callee = makeHiLoPair(Op: Callee, HiTF: VE::S_HI32, LoTF: VE::S_LO32, DAG);
671	}
672	}
673
674	RegsToPass.push_back(Elt: std::make_pair(x: VE::SX12, y&: Callee));
675
676	for (unsigned i = `0`, e = ArgLocs.size(); i != e; ++i) {
677	CCValAssign &VA = ArgLocs [i];
678	SDValue Arg = CLI.OutVals [i];
679
680	// Promote the value if needed.
681	switch (VA.getLocInfo()) {
682	default:
683	llvm_unreachable("Unknown location info!");
684	case CCValAssign::Full:
685	break;
686	case CCValAssign::SExt:
687	Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: VA.getLocVT(), Operand: Arg);
688	break;
689	case CCValAssign::ZExt:
690	Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: VA.getLocVT(), Operand: Arg);
691	break;
692	case CCValAssign::AExt:
693	Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: VA.getLocVT(), Operand: Arg);
694	break;
695	case CCValAssign::BCvt: {
696	// Convert a float argument to i64 with padding.
697	// 63 31 0
698	// +------+------+
699	// \| float\| 0 \|
700	// +------+------+
701	assert(VA.getLocVT() == MVT::i64);
702	assert(VA.getValVT() == MVT::f32);
703	SDValue Undef = SDValue (
704	DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::i64), `0`);
705	SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
706	Arg = SDValue (DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL,
707	VT: MVT::i64, Op1: Undef, Op2: Arg, Op3: Sub_f32),
708	`0`);
709	break;
710	}
711	}
712
713	if (VA.isRegLoc()) {
714	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
715	if (!UseBoth)
716	continue;
717	VA = ArgLocs2 [i];
718	}
719
720	assert(VA.isMemLoc());
721
722	// Create a store off the stack pointer for this argument.
723	SDValue StackPtr = DAG.getRegister(Reg: VE::SX11, VT: PtrVT);
724	// The argument area starts at %fp/%sp + the size of reserved area.
725	SDValue PtrOff =
726	DAG.getIntPtrConstant(Val: VA.getLocMemOffset() + ArgsBaseOffset, DL);
727	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: PtrOff);
728	MemOpChains.push_back(
729	Elt: DAG.getStore(Chain, dl: DL, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo ()));
730	}
731
732	// Emit all stores, make sure they occur before the call.
733	if (!MemOpChains.empty())
734	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOpChains);
735
736	// Build a sequence of CopyToReg nodes glued together with token chain and
737	// glue operands which copy the outgoing args into registers. The InGlue is
738	// necessary since all emitted instructions must be stuck together in order
739	// to pass the live physical registers.
740	SDValue InGlue;
741	for (const auto &[Reg, N] : RegsToPass) {
742	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg, N, Glue: InGlue);
743	InGlue = Chain.getValue(R: `1`);
744	}
745
746	// Build the operands for the call instruction itself.
747	SmallVector<SDValue, `8`> Ops;
748	Ops.push_back(Elt: Chain);
749	for (const auto &[Reg, N] : RegsToPass)
750	Ops.push_back(Elt: DAG.getRegister(Reg, VT: N.getValueType()));
751
752	// Add a register mask operand representing the call-preserved registers.
753	const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
754	const uint32_t *Mask =
755	TRI->getCallPreservedMask(MF: DAG.getMachineFunction(), CC: CLI.CallConv);
756	assert(Mask && "Missing call preserved mask for calling convention");
757	Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
758
759	// Make sure the CopyToReg nodes are glued to the call instruction which
760	// consumes the registers.
761	if (InGlue.getNode())
762	Ops.push_back(Elt: InGlue);
763
764	// Now the call itself.
765	SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
766	Chain = DAG.getNode(Opcode: VEISD::CALL, DL, VTList: NodeTys, Ops);
767	InGlue = Chain.getValue(R: `1`);
768
769	// Revert the stack pointer immediately after the call.
770	Chain = DAG.getCALLSEQ_END(Chain, Size1: ArgsSize, Size2: `0`, Glue: InGlue, DL);
771	InGlue = Chain.getValue(R: `1`);
772
773	// Now extract the return values. This is more or less the same as
774	// LowerFormalArguments.
775
776	// Assign locations to each value returned by this call.
777	SmallVector<CCValAssign, `16`> RVLocs;
778	CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
779	*DAG.getContext());
780
781	// Set inreg flag manually for codegen generated library calls that
782	// return float.
783	if (CLI.Ins.size() == `1` && CLI.Ins [`0`].VT == MVT::f32 && !CLI.CB)
784	CLI.Ins [`0`].Flags.setInReg();
785
786	RVInfo.AnalyzeCallResult(Ins: CLI.Ins, Fn: getReturnCC(CallConv: CLI.CallConv));
787
788	// Copy all of the result registers out of their specified physreg.
789	for (unsigned i = `0`; i != RVLocs.size(); ++i) {
790	CCValAssign &VA = RVLocs [i];
791	assert(!VA.needsCustom() && "Unexpected custom lowering");
792	Register Reg = VA.getLocReg();
793
794	// When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
795	// reside in the same register in the high and low bits. Reuse the
796	// CopyFromReg previous node to avoid duplicate copies.
797	SDValue RV;
798	if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Val: Chain.getOperand(i: `1`)))
799	if (SrcReg->getReg() == Reg && Chain ->getOpcode() == ISD::CopyFromReg)
800	RV = Chain.getValue(R: `0`);
801
802	// But usually we'll create a new CopyFromReg for a different register.
803	if (!RV.getNode()) {
804	RV = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: RVLocs [i].getLocVT(), Glue: InGlue);
805	Chain = RV.getValue(R: `1`);
806	InGlue = Chain.getValue(R: `2`);
807	}
808
809	// The callee promoted the return value, so insert an Assert?ext SDNode so
810	// we won't promote the value again in this function.
811	switch (VA.getLocInfo()) {
812	case CCValAssign::SExt:
813	RV = DAG.getNode(Opcode: ISD::AssertSext, DL, VT: VA.getLocVT(), N1: RV,
814	N2: DAG.getValueType(VA.getValVT()));
815	break;
816	case CCValAssign::ZExt:
817	RV = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: VA.getLocVT(), N1: RV,
818	N2: DAG.getValueType(VA.getValVT()));
819	break;
820	case CCValAssign::BCvt: {
821	// Extract a float return value from i64 with padding.
822	// 63 31 0
823	// +------+------+
824	// \| float\| 0 \|
825	// +------+------+
826	assert(VA.getLocVT() == MVT::i64);
827	assert(VA.getValVT() == MVT::f32);
828	SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
829	RV = SDValue (DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL,
830	VT: MVT::f32, Op1: RV, Op2: Sub_f32),
831	`0`);
832	break;
833	}
834	default:
835	break;
836	}
837
838	// Truncate the register down to the return value type.
839	if (VA.isExtInLoc())
840	RV = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VA.getValVT(), Operand: RV);
841
842	InVals.push_back(Elt: RV);
843	}
844
845	return Chain;
846	}
847
848	bool VETargetLowering::isOffsetFoldingLegal(
849	const GlobalAddressSDNode GA) const* {
850	// VE uses 64 bit addressing, so we need multiple instructions to generate
851	// an address. Folding address with offset increases the number of
852	// instructions, so that we disable it here. Offsets will be folded in
853	// the DAG combine later if it worth to do so.
854	return false;
855	}
856
857	/// isFPImmLegal - Returns true if the target can instruction select the
858	/// specified FP immediate natively. If false, the legalizer will
859	/// materialize the FP immediate as a load from a constant pool.
860	bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
861	bool ForCodeSize) const {
862	return VT == MVT::f32 \|\| VT == MVT::f64;
863	}
864
865	/// Determine if the target supports unaligned memory accesses.
866	///
867	/// This function returns true if the target allows unaligned memory accesses
868	/// of the specified type in the given address space. If true, it also returns
869	/// whether the unaligned memory access is "fast" in the last argument by
870	/// reference. This is used, for example, in situations where an array
871	/// copy/move/set is converted to a sequence of store operations. Its use
872	/// helps to ensure that such replacements don't generate code that causes an
873	/// alignment error (trap) on the target machine.
874	bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
875	unsigned AddrSpace,
876	Align A,
877	MachineMemOperand::Flags,
878	unsigned Fast) const* {
879	if (Fast) {
880	// It's fast anytime on VE
881	*Fast = `1`;
882	}
883	return true;
884	}
885
886	VETargetLowering::VETargetLowering(const TargetMachine &TM,
887	const VESubtarget &STI)
888	: TargetLowering (TM), Subtarget(&STI) {
889	// Instructions which use registers as conditionals examine all the
890	// bits (as does the pseudo SELECT_CC expansion). I don't think it
891	// matters much whether it's ZeroOrOneBooleanContent, or
892	// ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
893	// former.
894	setBooleanContents(ZeroOrOneBooleanContent);
895	setBooleanVectorContents(ZeroOrOneBooleanContent);
896
897	initRegisterClasses();
898	initSPUActions();
899	initVPUActions();
900
901	setStackPointerRegisterToSaveRestore(VE::SX11);
902
903	// We have target-specific dag combine patterns for the following nodes:
904	setTargetDAGCombine(ISD::TRUNCATE);
905	setTargetDAGCombine(ISD::SELECT);
906	setTargetDAGCombine(ISD::SELECT_CC);
907
908	// Set function alignment to 16 bytes
909	setMinFunctionAlignment(Align (`16`));
910
911	// VE stores all argument by 8 bytes alignment
912	setMinStackArgumentAlignment(Align (`8`));
913
914	computeRegisterProperties(TRI: Subtarget->getRegisterInfo());
915	}
916
917	const char VETargetLowering::getTargetNodeName(unsigned* Opcode) const {
918	#define TARGET_NODE_CASE(NAME) \
919	case VEISD::NAME: \
920	return "VEISD::" #NAME;
921	switch ((VEISD::NodeType)Opcode) {
922	case VEISD::FIRST_NUMBER:
923	break;
924	TARGET_NODE_CASE(CMPI)
925	TARGET_NODE_CASE(CMPU)
926	TARGET_NODE_CASE(CMPF)
927	TARGET_NODE_CASE(CMPQ)
928	TARGET_NODE_CASE(CMOV)
929	TARGET_NODE_CASE(CALL)
930	TARGET_NODE_CASE(EH_SJLJ_LONGJMP)
931	TARGET_NODE_CASE(EH_SJLJ_SETJMP)
932	TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH)
933	TARGET_NODE_CASE(GETFUNPLT)
934	TARGET_NODE_CASE(GETSTACKTOP)
935	TARGET_NODE_CASE(GETTLSADDR)
936	TARGET_NODE_CASE(GLOBAL_BASE_REG)
937	TARGET_NODE_CASE(Hi)
938	TARGET_NODE_CASE(Lo)
939	TARGET_NODE_CASE(RET_GLUE)
940	TARGET_NODE_CASE(TS1AM)
941	TARGET_NODE_CASE(VEC_UNPACK_LO)
942	TARGET_NODE_CASE(VEC_UNPACK_HI)
943	TARGET_NODE_CASE(VEC_PACK)
944	TARGET_NODE_CASE(VEC_BROADCAST)
945	TARGET_NODE_CASE(REPL_I32)
946	TARGET_NODE_CASE(REPL_F32)
947
948	TARGET_NODE_CASE(LEGALAVL)
949
950	// Register the VVP_ SDNodes.*
951	#define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
952	#include "VVPNodes.def"
953	}
954	#undef TARGET_NODE_CASE
955	return nullptr;
956	}
957
958	EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
959	EVT VT) const {
960	return MVT::i32;
961	}
962
963	// Convert to a target node and set target flags.
964	SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
965	SelectionDAG &DAG) const {
966	if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op))
967	return DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: SDLoc (GA),
968	VT: GA->getValueType(ResNo: `0`), offset: GA->getOffset(), TargetFlags: TF);
969
970	if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Val&: Op))
971	return DAG.getTargetBlockAddress(BA: BA->getBlockAddress(), VT: Op.getValueType(),
972	Offset: `0`, TargetFlags: TF);
973
974	if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Val&: Op))
975	return DAG.getTargetConstantPool(C: CP->getConstVal(), VT: CP->getValueType(ResNo: `0`),
976	Align: CP->getAlign(), Offset: CP->getOffset(), TargetFlags: TF);
977
978	if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Val&: Op))
979	return DAG.getTargetExternalSymbol(Sym: ES->getSymbol(), VT: ES->getValueType(ResNo: `0`),
980	TargetFlags: TF);
981
982	if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Val&: Op))
983	return DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: JT->getValueType(ResNo: `0`), TargetFlags: TF);
984
985	llvm_unreachable("Unhandled address SDNode");
986	}
987
988	// Split Op into high and low parts according to HiTF and LoTF.
989	// Return an ADD node combining the parts.
990	SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
991	SelectionDAG &DAG) const {
992	SDLoc DL(Op);
993	EVT VT = Op.getValueType();
994	SDValue Hi = DAG.getNode(Opcode: VEISD::Hi, DL, VT, Operand: withTargetFlags(Op, TF: HiTF, DAG));
995	SDValue Lo = DAG.getNode(Opcode: VEISD::Lo, DL, VT, Operand: withTargetFlags(Op, TF: LoTF, DAG));
996	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Hi, N2: Lo);
997	}
998
999	// Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
1000	// or ExternalSymbol SDNode.
1001	SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
1002	SDLoc DL(Op);
1003	EVT PtrVT = Op.getValueType();
1004
1005	// Handle PIC mode first. VE needs a got load for every variable!
1006	if (isPositionIndependent()) {
1007	auto GlobalN = dyn_cast<GlobalAddressSDNode>(Val&: Op);
1008
1009	if (isa<ConstantPoolSDNode>(Val: Op) \|\| isa<JumpTableSDNode>(Val: Op) \|\|
1010	(GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
1011	// Create following instructions for local linkage PIC code.
1012	// lea %reg, label@gotoff_lo
1013	// and %reg, %reg, (32)0
1014	// lea.sl %reg, label@gotoff_hi(%reg, %got)
1015	SDValue HiLo =
1016	makeHiLoPair(Op, HiTF: VE::S_GOTOFF_HI32, LoTF: VE::S_GOTOFF_LO32, DAG);
1017	SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT: PtrVT);
1018	return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: GlobalBase, N2: HiLo);
1019	}
1020	// Create following instructions for not local linkage PIC code.
1021	// lea %reg, label@got_lo
1022	// and %reg, %reg, (32)0
1023	// lea.sl %reg, label@got_hi(%reg)
1024	// ld %reg, (%reg, %got)
1025	SDValue HiLo = makeHiLoPair(Op, HiTF: VE::S_GOT_HI32, LoTF: VE::S_GOT_LO32, DAG);
1026	SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT: PtrVT);
1027	SDValue AbsAddr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: GlobalBase, N2: HiLo);
1028	return DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: AbsAddr,
1029	PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction()));
1030	}
1031
1032	// This is one of the absolute code models.
1033	switch (getTargetMachine().getCodeModel()) {
1034	default:
1035	llvm_unreachable("Unsupported absolute code model");
1036	case CodeModel::Small:
1037	case CodeModel::Medium:
1038	case CodeModel::Large:
1039	// abs64.
1040	return makeHiLoPair(Op, HiTF: VE::S_HI32, LoTF: VE::S_LO32, DAG);
1041	}
1042	}
1043
1044	/// Custom Lower {
1045
1046	// The mappings for emitLeading/TrailingFence for VE is designed by following
1047	// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
1048	Instruction *VETargetLowering::emitLeadingFence(IRBuilderBase &Builder,
1049	Instruction *Inst,
1050	AtomicOrdering Ord) const {
1051	switch (Ord) {
1052	case AtomicOrdering::NotAtomic:
1053	case AtomicOrdering::Unordered:
1054	llvm_unreachable("Invalid fence: unordered/non-atomic");
1055	case AtomicOrdering::Monotonic:
1056	case AtomicOrdering::Acquire:
1057	return nullptr; // Nothing to do
1058	case AtomicOrdering::Release:
1059	case AtomicOrdering::AcquireRelease:
1060	return Builder.CreateFence(Ordering: AtomicOrdering::Release);
1061	case AtomicOrdering::SequentiallyConsistent:
1062	if (!Inst->hasAtomicStore())
1063	return nullptr; // Nothing to do
1064	return Builder.CreateFence(Ordering: AtomicOrdering::SequentiallyConsistent);
1065	}
1066	llvm_unreachable("Unknown fence ordering in emitLeadingFence");
1067	}
1068
1069	Instruction *VETargetLowering::emitTrailingFence(IRBuilderBase &Builder,
1070	Instruction *Inst,
1071	AtomicOrdering Ord) const {
1072	switch (Ord) {
1073	case AtomicOrdering::NotAtomic:
1074	case AtomicOrdering::Unordered:
1075	llvm_unreachable("Invalid fence: unordered/not-atomic");
1076	case AtomicOrdering::Monotonic:
1077	case AtomicOrdering::Release:
1078	return nullptr; // Nothing to do
1079	case AtomicOrdering::Acquire:
1080	case AtomicOrdering::AcquireRelease:
1081	return Builder.CreateFence(Ordering: AtomicOrdering::Acquire);
1082	case AtomicOrdering::SequentiallyConsistent:
1083	return Builder.CreateFence(Ordering: AtomicOrdering::SequentiallyConsistent);
1084	}
1085	llvm_unreachable("Unknown fence ordering in emitTrailingFence");
1086	}
1087
1088	SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
1089	SelectionDAG &DAG) const {
1090	SDLoc DL(Op);
1091	AtomicOrdering FenceOrdering =
1092	static_cast<AtomicOrdering>(Op.getConstantOperandVal(i: `1`));
1093	SyncScope::ID FenceSSID =
1094	static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: `2`));
1095
1096	// VE uses Release consistency, so need a fence instruction if it is a
1097	// cross-thread fence.
1098	if (FenceSSID == SyncScope::System) {
1099	switch (FenceOrdering) {
1100	case AtomicOrdering::NotAtomic:
1101	case AtomicOrdering::Unordered:
1102	case AtomicOrdering::Monotonic:
1103	// No need to generate fencem instruction here.
1104	break;
1105	case AtomicOrdering::Acquire:
1106	// Generate "fencem 2" as acquire fence.
1107	return SDValue (DAG.getMachineNode(Opcode: VE::FENCEM, dl: DL, VT: MVT::Other,
1108	Op1: DAG.getTargetConstant(Val: `2`, DL, VT: MVT::i32),
1109	Op2: Op.getOperand(i: `0`)),
1110	`0`);
1111	case AtomicOrdering::Release:
1112	// Generate "fencem 1" as release fence.
1113	return SDValue (DAG.getMachineNode(Opcode: VE::FENCEM, dl: DL, VT: MVT::Other,
1114	Op1: DAG.getTargetConstant(Val: `1`, DL, VT: MVT::i32),
1115	Op2: Op.getOperand(i: `0`)),
1116	`0`);
1117	case AtomicOrdering::AcquireRelease:
1118	case AtomicOrdering::SequentiallyConsistent:
1119	// Generate "fencem 3" as acq_rel and seq_cst fence.
1120	// FIXME: "fencem 3" doesn't wait for PCIe deveices accesses,
1121	// so seq_cst may require more instruction for them.
1122	return SDValue (DAG.getMachineNode(Opcode: VE::FENCEM, dl: DL, VT: MVT::Other,
1123	Op1: DAG.getTargetConstant(Val: `3`, DL, VT: MVT::i32),
1124	Op2: Op.getOperand(i: `0`)),
1125	`0`);
1126	}
1127	}
1128
1129	// MEMBARRIER is a compiler barrier; it codegens to a no-op.
1130	return DAG.getNode(Opcode: ISD::MEMBARRIER, DL, VT: MVT::Other, Operand: Op.getOperand(i: `0`));
1131	}
1132
1133	TargetLowering::AtomicExpansionKind
1134	VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst AI) const* {
1135	// We have TS1AM implementation for i8/i16/i32/i64, so use it.
1136	if (AI->getOperation() == AtomicRMWInst::Xchg) {
1137	return AtomicExpansionKind::None;
1138	}
1139	// FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
1140
1141	// Otherwise, expand it using compare and exchange instruction to not call
1142	// __sync_fetch_and_ functions.*
1143	return AtomicExpansionKind::CmpXChg;
1144	}
1145
1146	static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
1147	SDValue &Bits) {
1148	SDLoc DL(Op);
1149	AtomicSDNode *N = cast<AtomicSDNode>(Val&: Op);
1150	SDValue Ptr = N->getOperand(Num: `1`);
1151	SDValue Val = N->getOperand(Num: `2`);
1152	EVT PtrVT = Ptr.getValueType();
1153	bool Byte = N->getMemoryVT() == MVT::i8;
1154	// Remainder = AND Ptr, 3
1155	// Flag = 1 << Remainder ; If Byte is true (1 byte swap flag)
1156	// Flag = 3 << Remainder ; If Byte is false (2 bytes swap flag)
1157	// Bits = Remainder << 3
1158	// NewVal = Val << Bits
1159	SDValue Const3 = DAG.getConstant(Val: `3`, DL, VT: PtrVT);
1160	SDValue Remainder = DAG.getNode(Opcode: ISD::AND, DL, VT: PtrVT, Ops: {Ptr, Const3});
1161	SDValue Mask = Byte ? DAG.getConstant(Val: `1`, DL, VT: MVT::i32)
1162	: DAG.getConstant(Val: `3`, DL, VT: MVT::i32);
1163	Flag = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i32, Ops: {Mask, Remainder});
1164	Bits = DAG.getNode(Opcode: ISD::SHL, DL, VT: PtrVT, Ops: {Remainder, Const3});
1165	return DAG.getNode(Opcode: ISD::SHL, DL, VT: Val.getValueType(), Ops: {Val, Bits});
1166	}
1167
1168	static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,
1169	SDValue Bits) {
1170	SDLoc DL(Op);
1171	EVT VT = Data.getValueType();
1172	bool Byte = cast<AtomicSDNode>(Val&: Op)->getMemoryVT() == MVT::i8;
1173	// NewData = Data >> Bits
1174	// Result = NewData & 0xff ; If Byte is true (1 byte)
1175	// Result = NewData & 0xffff ; If Byte is false (2 bytes)
1176
1177	SDValue NewData = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Data, N2: Bits);
1178	return DAG.getNode(Opcode: ISD::AND, DL, VT,
1179	Ops: {NewData, DAG.getConstant(Val: Byte ? `0xff` : `0xffff`, DL, VT)});
1180	}
1181
1182	SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,
1183	SelectionDAG &DAG) const {
1184	SDLoc DL(Op);
1185	AtomicSDNode *N = cast<AtomicSDNode>(Val&: Op);
1186
1187	if (N->getMemoryVT() == MVT::i8) {
1188	// For i8, use "ts1am"
1189	// Input:
1190	// ATOMIC_SWAP Ptr, Val, Order
1191	//
1192	// Output:
1193	// Remainder = AND Ptr, 3
1194	// Flag = 1 << Remainder ; 1 byte swap flag for TS1AM inst.
1195	// Bits = Remainder << 3
1196	// NewVal = Val << Bits
1197	//
1198	// Aligned = AND Ptr, -4
1199	// Data = TS1AM Aligned, Flag, NewVal
1200	//
1201	// NewData = Data >> Bits
1202	// Result = NewData & 0xff ; 1 byte result
1203	SDValue Flag;
1204	SDValue Bits;
1205	SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1206
1207	SDValue Ptr = N->getOperand(Num: `1`);
1208	SDValue Aligned =
1209	DAG.getNode(Opcode: ISD::AND, DL, VT: Ptr.getValueType(),
1210	Ops: {Ptr, DAG.getSignedConstant(Val: -`4`, DL, VT: MVT::i64)});
1211	SDValue TS1AM = DAG.getAtomic(Opcode: VEISD::TS1AM, dl: DL, MemVT: N->getMemoryVT(),
1212	VTList: DAG.getVTList(VT1: Op.getNode()->getValueType(ResNo: `0`),
1213	VT2: Op.getNode()->getValueType(ResNo: `1`)),
1214	Ops: {N->getChain(), Aligned, Flag, NewVal},
1215	MMO: N->getMemOperand());
1216
1217	SDValue Result = finalizeTS1AM(Op, DAG, Data: TS1AM, Bits);
1218	SDValue Chain = TS1AM.getValue(R: `1`);
1219	return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
1220	}
1221	if (N->getMemoryVT() == MVT::i16) {
1222	// For i16, use "ts1am"
1223	SDValue Flag;
1224	SDValue Bits;
1225	SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1226
1227	SDValue Ptr = N->getOperand(Num: `1`);
1228	SDValue Aligned =
1229	DAG.getNode(Opcode: ISD::AND, DL, VT: Ptr.getValueType(),
1230	Ops: {Ptr, DAG.getSignedConstant(Val: -`4`, DL, VT: MVT::i64)});
1231	SDValue TS1AM = DAG.getAtomic(Opcode: VEISD::TS1AM, dl: DL, MemVT: N->getMemoryVT(),
1232	VTList: DAG.getVTList(VT1: Op.getNode()->getValueType(ResNo: `0`),
1233	VT2: Op.getNode()->getValueType(ResNo: `1`)),
1234	Ops: {N->getChain(), Aligned, Flag, NewVal},
1235	MMO: N->getMemOperand());
1236
1237	SDValue Result = finalizeTS1AM(Op, DAG, Data: TS1AM, Bits);
1238	SDValue Chain = TS1AM.getValue(R: `1`);
1239	return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
1240	}
1241	// Otherwise, let llvm legalize it.
1242	return Op;
1243	}
1244
1245	SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
1246	SelectionDAG &DAG) const {
1247	return makeAddress(Op, DAG);
1248	}
1249
1250	SDValue VETargetLowering::lowerBlockAddress(SDValue Op,
1251	SelectionDAG &DAG) const {
1252	return makeAddress(Op, DAG);
1253	}
1254
1255	SDValue VETargetLowering::lowerConstantPool(SDValue Op,
1256	SelectionDAG &DAG) const {
1257	return makeAddress(Op, DAG);
1258	}
1259
1260	SDValue
1261	VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op,
1262	SelectionDAG &DAG) const {
1263	SDLoc DL(Op);
1264
1265	// Generate the following code:
1266	// t1: ch,glue = callseq_start t0, 0, 0
1267	// t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
1268	// t3: ch,glue = callseq_end t2, 0, 0, t2:2
1269	// t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
1270	SDValue Label = withTargetFlags(Op, TF: `0`, DAG);
1271	EVT PtrVT = Op.getValueType();
1272
1273	// Lowering the machine isd will make sure everything is in the right
1274	// location.
1275	SDValue Chain = DAG.getEntryNode();
1276	SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
1277	const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
1278	MF: DAG.getMachineFunction(), CC: CallingConv::C);
1279	Chain = DAG.getCALLSEQ_START(Chain, InSize: `64`, OutSize: `0`, DL);
1280	SDValue Args[] = {Chain, Label, DAG.getRegisterMask(RegMask: Mask), Chain.getValue(R: `1`)};
1281	Chain = DAG.getNode(Opcode: VEISD::GETTLSADDR, DL, VTList: NodeTys, Ops: Args);
1282	Chain = DAG.getCALLSEQ_END(Chain, Size1: `64`, Size2: `0`, Glue: Chain.getValue(R: `1`), DL);
1283	Chain = DAG.getCopyFromReg(Chain, dl: DL, Reg: VE::SX0, VT: PtrVT, Glue: Chain.getValue(R: `1`));
1284
1285	// GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1286	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1287	MFI.setHasCalls(true);
1288
1289	// Also generate code to prepare a GOT register if it is PIC.
1290	if (isPositionIndependent()) {
1291	MachineFunction &MF = DAG.getMachineFunction();
1292	Subtarget->getInstrInfo()->getGlobalBaseReg(MF: &MF);
1293	}
1294
1295	return Chain;
1296	}
1297
1298	SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
1299	SelectionDAG &DAG) const {
1300	// The current implementation of nld (2.26) doesn't allow local exec model
1301	// code described in VE-tls_v1.1.pdf (1) as its input. Instead, we always*
1302	// generate the general dynamic model code sequence.
1303	//
1304	// 1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf*
1305	return lowerToTLSGeneralDynamicModel(Op, DAG);
1306	}
1307
1308	SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1309	return makeAddress(Op, DAG);
1310	}
1311
1312	// Lower a f128 load into two f64 loads.
1313	static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
1314	SDLoc DL(Op);
1315	LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Val: Op.getNode());
1316	assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1317	Align Alignment = LdNode->getAlign();
1318	if (Alignment > `8`)
1319	Alignment = Align (`8`);
1320
1321	SDValue Lo64 =
1322	DAG.getLoad(VT: MVT::f64, dl: DL, Chain: LdNode->getChain(), Ptr: LdNode->getBasePtr(),
1323	PtrInfo: LdNode->getPointerInfo(), Alignment,
1324	MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1325	: MachineMemOperand::MONone);
1326	EVT AddrVT = LdNode->getBasePtr().getValueType();
1327	SDValue HiPtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: LdNode->getBasePtr(),
1328	N2: DAG.getConstant(Val: `8`, DL, VT: AddrVT));
1329	SDValue Hi64 =
1330	DAG.getLoad(VT: MVT::f64, dl: DL, Chain: LdNode->getChain(), Ptr: HiPtr,
1331	PtrInfo: LdNode->getPointerInfo(), Alignment,
1332	MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1333	: MachineMemOperand::MONone);
1334
1335	SDValue SubRegEven = DAG.getTargetConstant(Val: VE::sub_even, DL, VT: MVT::i32);
1336	SDValue SubRegOdd = DAG.getTargetConstant(Val: VE::sub_odd, DL, VT: MVT::i32);
1337
1338	// VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1339	SDNode *InFP128 =
1340	DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::f128);
1341	InFP128 = DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL, VT: MVT::f128,
1342	Op1: SDValue (InFP128, `0`), Op2: Hi64, Op3: SubRegEven);
1343	InFP128 = DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL, VT: MVT::f128,
1344	Op1: SDValue (InFP128, `0`), Op2: Lo64, Op3: SubRegOdd);
1345	SDValue OutChains[`2`] = {SDValue (Lo64.getNode(), `1`),
1346	SDValue (Hi64.getNode(), `1`)};
1347	SDValue OutChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1348	SDValue Ops[`2`] = {SDValue (InFP128, `0`), OutChain};
1349	return DAG.getMergeValues(Ops, dl: DL);
1350	}
1351
1352	// Lower a vXi1 load into following instructions
1353	// LDrii %1, (,%addr)
1354	// LVMxir %vm, 0, %1
1355	// LDrii %2, 8(,%addr)
1356	// LVMxir %vm, 0, %2
1357	// ...
1358	static SDValue lowerLoadI1(SDValue Op, SelectionDAG &DAG) {
1359	SDLoc DL(Op);
1360	LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Val: Op.getNode());
1361	assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1362
1363	SDValue BasePtr = LdNode->getBasePtr();
1364	Align Alignment = LdNode->getAlign();
1365	if (Alignment > `8`)
1366	Alignment = Align (`8`);
1367
1368	EVT AddrVT = BasePtr.getValueType();
1369	EVT MemVT = LdNode->getMemoryVT();
1370	if (MemVT == MVT::v256i1 \|\| MemVT == MVT::v4i64) {
1371	SDValue OutChains[`4`];
1372	SDNode *VM = DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MemVT);
1373	for (int i = `0`; i < `4`; ++i) {
1374	// Generate load dag and prepare chains.
1375	SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1376	N2: DAG.getConstant(Val: `8` * i, DL, VT: AddrVT));
1377	SDValue Val =
1378	DAG.getLoad(VT: MVT::i64, dl: DL, Chain: LdNode->getChain(), Ptr: Addr,
1379	PtrInfo: LdNode->getPointerInfo(), Alignment,
1380	MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1381	: MachineMemOperand::MONone);
1382	OutChains[i] = SDValue (Val.getNode(), `1`);
1383
1384	VM = DAG.getMachineNode(Opcode: VE::LVMir_m, dl: DL, VT: MVT::i64,
1385	Op1: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64), Op2: Val,
1386	Op3: SDValue (VM, `0`));
1387	}
1388	SDValue OutChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1389	SDValue Ops[`2`] = {SDValue (VM, `0`), OutChain};
1390	return DAG.getMergeValues(Ops, dl: DL);
1391	} else if (MemVT == MVT::v512i1 \|\| MemVT == MVT::v8i64) {
1392	SDValue OutChains[`8`];
1393	SDNode *VM = DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MemVT);
1394	for (int i = `0`; i < `8`; ++i) {
1395	// Generate load dag and prepare chains.
1396	SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1397	N2: DAG.getConstant(Val: `8` * i, DL, VT: AddrVT));
1398	SDValue Val =
1399	DAG.getLoad(VT: MVT::i64, dl: DL, Chain: LdNode->getChain(), Ptr: Addr,
1400	PtrInfo: LdNode->getPointerInfo(), Alignment,
1401	MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1402	: MachineMemOperand::MONone);
1403	OutChains[i] = SDValue (Val.getNode(), `1`);
1404
1405	VM = DAG.getMachineNode(Opcode: VE::LVMyir_y, dl: DL, VT: MVT::i64,
1406	Op1: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64), Op2: Val,
1407	Op3: SDValue (VM, `0`));
1408	}
1409	SDValue OutChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1410	SDValue Ops[`2`] = {SDValue (VM, `0`), OutChain};
1411	return DAG.getMergeValues(Ops, dl: DL);
1412	} else {
1413	// Otherwise, ask llvm to expand it.
1414	return SDValue ();
1415	}
1416	}
1417
1418	SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1419	LoadSDNode *LdNode = cast<LoadSDNode>(Val: Op.getNode());
1420	EVT MemVT = LdNode->getMemoryVT();
1421
1422	// If VPU is enabled, always expand non-mask vector loads to VVP
1423	if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(SomeVT: MemVT))
1424	return lowerToVVP(Op, DAG);
1425
1426	SDValue BasePtr = LdNode->getBasePtr();
1427	if (isa<FrameIndexSDNode>(Val: BasePtr.getNode())) {
1428	// Do not expand store instruction with frame index here because of
1429	// dependency problems. We expand it later in eliminateFrameIndex().
1430	return Op;
1431	}
1432
1433	if (MemVT == MVT::f128)
1434	return lowerLoadF128(Op, DAG);
1435	if (isMaskType(SomeVT: MemVT))
1436	return lowerLoadI1(Op, DAG);
1437
1438	return Op;
1439	}
1440
1441	// Lower a f128 store into two f64 stores.
1442	static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
1443	SDLoc DL(Op);
1444	StoreSDNode *StNode = dyn_cast<StoreSDNode>(Val: Op.getNode());
1445	assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1446
1447	SDValue SubRegEven = DAG.getTargetConstant(Val: VE::sub_even, DL, VT: MVT::i32);
1448	SDValue SubRegOdd = DAG.getTargetConstant(Val: VE::sub_odd, DL, VT: MVT::i32);
1449
1450	SDNode *Hi64 = DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT: MVT::i64,
1451	Op1: StNode->getValue(), Op2: SubRegEven);
1452	SDNode *Lo64 = DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT: MVT::i64,
1453	Op1: StNode->getValue(), Op2: SubRegOdd);
1454
1455	Align Alignment = StNode->getAlign();
1456	if (Alignment > `8`)
1457	Alignment = Align (`8`);
1458
1459	// VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1460	SDValue OutChains[`2`];
1461	OutChains[`0`] =
1462	DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue (Lo64, `0`),
1463	Ptr: StNode->getBasePtr(), PtrInfo: MachinePointerInfo (), Alignment,
1464	MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1465	: MachineMemOperand::MONone);
1466	EVT AddrVT = StNode->getBasePtr().getValueType();
1467	SDValue HiPtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: StNode->getBasePtr(),
1468	N2: DAG.getConstant(Val: `8`, DL, VT: AddrVT));
1469	OutChains[`1`] =
1470	DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue (Hi64, `0`), Ptr: HiPtr,
1471	PtrInfo: MachinePointerInfo (), Alignment,
1472	MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1473	: MachineMemOperand::MONone);
1474	return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1475	}
1476
1477	// Lower a vXi1 store into following instructions
1478	// SVMi %1, %vm, 0
1479	// STrii %1, (,%addr)
1480	// SVMi %2, %vm, 1
1481	// STrii %2, 8(,%addr)
1482	// ...
1483	static SDValue lowerStoreI1(SDValue Op, SelectionDAG &DAG) {
1484	SDLoc DL(Op);
1485	StoreSDNode *StNode = dyn_cast<StoreSDNode>(Val: Op.getNode());
1486	assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1487
1488	SDValue BasePtr = StNode->getBasePtr();
1489	Align Alignment = StNode->getAlign();
1490	if (Alignment > `8`)
1491	Alignment = Align (`8`);
1492	EVT AddrVT = BasePtr.getValueType();
1493	EVT MemVT = StNode->getMemoryVT();
1494	if (MemVT == MVT::v256i1 \|\| MemVT == MVT::v4i64) {
1495	SDValue OutChains[`4`];
1496	for (int i = `0`; i < `4`; ++i) {
1497	SDNode *V =
1498	DAG.getMachineNode(Opcode: VE::SVMmi, dl: DL, VT: MVT::i64, Op1: StNode->getValue(),
1499	Op2: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64));
1500	SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1501	N2: DAG.getConstant(Val: `8` * i, DL, VT: AddrVT));
1502	OutChains[i] =
1503	DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue (V, `0`), Ptr: Addr,
1504	PtrInfo: MachinePointerInfo (), Alignment,
1505	MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1506	: MachineMemOperand::MONone);
1507	}
1508	return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1509	} else if (MemVT == MVT::v512i1 \|\| MemVT == MVT::v8i64) {
1510	SDValue OutChains[`8`];
1511	for (int i = `0`; i < `8`; ++i) {
1512	SDNode *V =
1513	DAG.getMachineNode(Opcode: VE::SVMyi, dl: DL, VT: MVT::i64, Op1: StNode->getValue(),
1514	Op2: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64));
1515	SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1516	N2: DAG.getConstant(Val: `8` * i, DL, VT: AddrVT));
1517	OutChains[i] =
1518	DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue (V, `0`), Ptr: Addr,
1519	PtrInfo: MachinePointerInfo (), Alignment,
1520	MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1521	: MachineMemOperand::MONone);
1522	}
1523	return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1524	} else {
1525	// Otherwise, ask llvm to expand it.
1526	return SDValue ();
1527	}
1528	}
1529
1530	SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1531	StoreSDNode *StNode = cast<StoreSDNode>(Val: Op.getNode());
1532	assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1533	EVT MemVT = StNode->getMemoryVT();
1534
1535	// If VPU is enabled, always expand non-mask vector stores to VVP
1536	if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(SomeVT: MemVT))
1537	return lowerToVVP(Op, DAG);
1538
1539	SDValue BasePtr = StNode->getBasePtr();
1540	if (isa<FrameIndexSDNode>(Val: BasePtr.getNode())) {
1541	// Do not expand store instruction with frame index here because of
1542	// dependency problems. We expand it later in eliminateFrameIndex().
1543	return Op;
1544	}
1545
1546	if (MemVT == MVT::f128)
1547	return lowerStoreF128(Op, DAG);
1548	if (isMaskType(SomeVT: MemVT))
1549	return lowerStoreI1(Op, DAG);
1550
1551	// Otherwise, ask llvm to expand it.
1552	return SDValue ();
1553	}
1554
1555	SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1556	MachineFunction &MF = DAG.getMachineFunction();
1557	VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
1558	auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
1559
1560	// Need frame address to find the address of VarArgsFrameIndex.
1561	MF.getFrameInfo().setFrameAddressIsTaken(true);
1562
1563	// vastart just stores the address of the VarArgsFrameIndex slot into the
1564	// memory location argument.
1565	SDLoc DL(Op);
1566	SDValue Offset =
1567	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: DAG.getRegister(Reg: VE::SX9, VT: PtrVT),
1568	N2: DAG.getIntPtrConstant(Val: FuncInfo->getVarArgsFrameOffset(), DL));
1569	const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: `2`))->getValue();
1570	return DAG.getStore(Chain: Op.getOperand(i: `0`), dl: DL, Val: Offset, Ptr: Op.getOperand(i: `1`),
1571	PtrInfo: MachinePointerInfo (SV));
1572	}
1573
1574	SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
1575	SDNode *Node = Op.getNode();
1576	EVT VT = Node->getValueType(ResNo: `0`);
1577	SDValue InChain = Node->getOperand(Num: `0`);
1578	SDValue VAListPtr = Node->getOperand(Num: `1`);
1579	EVT PtrVT = VAListPtr.getValueType();
1580	const Value *SV = cast<SrcValueSDNode>(Val: Node->getOperand(Num: `2`))->getValue();
1581	SDLoc DL(Node);
1582	SDValue VAList =
1583	DAG.getLoad(VT: PtrVT, dl: DL, Chain: InChain, Ptr: VAListPtr, PtrInfo: MachinePointerInfo (SV));
1584	SDValue Chain = VAList.getValue(R: `1`);
1585	SDValue NextPtr;
1586
1587	if (VT == MVT::f128) {
1588	// VE f128 values must be stored with 16 bytes alignment. We don't
1589	// know the actual alignment of VAList, so we take alignment of it
1590	// dynamically.
1591	int Align = `16`;
1592	VAList = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList,
1593	N2: DAG.getConstant(Val: Align - `1`, DL, VT: PtrVT));
1594	VAList = DAG.getNode(Opcode: ISD::AND, DL, VT: PtrVT, N1: VAList,
1595	N2: DAG.getSignedConstant(Val: -Align, DL, VT: PtrVT));
1596	// Increment the pointer, VAList, by 16 to the next vaarg.
1597	NextPtr =
1598	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList, N2: DAG.getIntPtrConstant(Val: `16`, DL));
1599	} else if (VT == MVT::f32) {
1600	// float --> need special handling like below.
1601	// 0 4
1602	// +------+------+
1603	// \| empty\| float\|
1604	// +------+------+
1605	// Increment the pointer, VAList, by 8 to the next vaarg.
1606	NextPtr =
1607	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList, N2: DAG.getIntPtrConstant(Val: `8`, DL));
1608	// Then, adjust VAList.
1609	unsigned InternalOffset = `4`;
1610	VAList = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList,
1611	N2: DAG.getConstant(Val: InternalOffset, DL, VT: PtrVT));
1612	} else {
1613	// Increment the pointer, VAList, by 8 to the next vaarg.
1614	NextPtr =
1615	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList, N2: DAG.getIntPtrConstant(Val: `8`, DL));
1616	}
1617
1618	// Store the incremented VAList to the legalized pointer.
1619	InChain = DAG.getStore(Chain, dl: DL, Val: NextPtr, Ptr: VAListPtr, PtrInfo: MachinePointerInfo (SV));
1620
1621	// Load the actual argument out of the pointer VAList.
1622	// We can't count on greater alignment than the word size.
1623	return DAG.getLoad(
1624	VT, dl: DL, Chain: InChain, Ptr: VAList, PtrInfo: MachinePointerInfo (),
1625	Alignment: Align (std::min(a: PtrVT.getSizeInBits(), b: VT.getSizeInBits()) / `8`));
1626	}
1627
1628	SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
1629	SelectionDAG &DAG) const {
1630	// Generate following code.
1631	// (void)__llvm_grow_stack(size);
1632	// ret = GETSTACKTOP; // pseudo instruction
1633	SDLoc DL(Op);
1634
1635	// Get the inputs.
1636	SDNode *Node = Op.getNode();
1637	SDValue Chain = Op.getOperand(i: `0`);
1638	SDValue Size = Op.getOperand(i: `1`);
1639	MaybeAlign Alignment(Op.getConstantOperandVal(i: `2`));
1640	EVT VT = Node->getValueType(ResNo: `0`);
1641
1642	// Chain the dynamic stack allocation so that it doesn't modify the stack
1643	// pointer when other instructions are using the stack.
1644	Chain = DAG.getCALLSEQ_START(Chain, InSize: `0`, OutSize: `0`, DL);
1645
1646	const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
1647	Align StackAlign = TFI.getStackAlign();
1648	bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
1649
1650	// Prepare arguments
1651	TargetLowering::ArgListTy Args;
1652	TargetLowering::ArgListEntry Entry;
1653	Entry.Node = Size;
1654	Entry.Ty = Entry.Node.getValueType().getTypeForEVT(Context&: *DAG.getContext());
1655	Args.push_back(x: Entry);
1656	if (NeedsAlign) {
1657	Entry.Node = DAG.getConstant(Val: ~(Alignment ->value() - `1ULL`), DL, VT);
1658	Entry.Ty = Entry.Node.getValueType().getTypeForEVT(Context&: *DAG.getContext());
1659	Args.push_back(x: Entry);
1660	}
1661	Type RetTy = Type::getVoidTy(C&: DAG.getContext());
1662
1663	EVT PtrVT = Op.getValueType();
1664	SDValue Callee;
1665	if (NeedsAlign) {
1666	Callee = DAG.getTargetExternalSymbol(Sym: "__ve_grow_stack_align", VT: PtrVT, TargetFlags: `0`);
1667	} else {
1668	Callee = DAG.getTargetExternalSymbol(Sym: "__ve_grow_stack", VT: PtrVT, TargetFlags: `0`);
1669	}
1670
1671	TargetLowering::CallLoweringInfo CLI(DAG);
1672	CLI.setDebugLoc(DL)
1673	.setChain(Chain)
1674	.setCallee(CC: CallingConv::PreserveAll, ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
1675	.setDiscardResult(true);
1676	std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
1677	Chain = pair.second;
1678	SDValue Result = DAG.getNode(Opcode: VEISD::GETSTACKTOP, DL, VT, Operand: Chain);
1679	if (NeedsAlign) {
1680	Result = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Result,
1681	N2: DAG.getConstant(Val: (Alignment ->value() - `1ULL`), DL, VT));
1682	Result = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Result,
1683	N2: DAG.getConstant(Val: ~(Alignment ->value() - `1ULL`), DL, VT));
1684	}
1685	// Chain = Result.getValue(1);
1686	Chain = DAG.getCALLSEQ_END(Chain, Size1: `0`, Size2: `0`, Glue: SDValue (), DL);
1687
1688	SDValue Ops[`2`] = {Result, Chain};
1689	return DAG.getMergeValues(Ops, dl: DL);
1690	}
1691
1692	SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
1693	SelectionDAG &DAG) const {
1694	SDLoc DL(Op);
1695	return DAG.getNode(Opcode: VEISD::EH_SJLJ_LONGJMP, DL, VT: MVT::Other, N1: Op.getOperand(i: `0`),
1696	N2: Op.getOperand(i: `1`));
1697	}
1698
1699	SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
1700	SelectionDAG &DAG) const {
1701	SDLoc DL(Op);
1702	return DAG.getNode(Opcode: VEISD::EH_SJLJ_SETJMP, DL,
1703	VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other), N1: Op.getOperand(i: `0`),
1704	N2: Op.getOperand(i: `1`));
1705	}
1706
1707	SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
1708	SelectionDAG &DAG) const {
1709	SDLoc DL(Op);
1710	return DAG.getNode(Opcode: VEISD::EH_SJLJ_SETUP_DISPATCH, DL, VT: MVT::Other,
1711	Operand: Op.getOperand(i: `0`));
1712	}
1713
1714	static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
1715	const VETargetLowering &TLI,
1716	const VESubtarget *Subtarget) {
1717	SDLoc DL(Op);
1718	MachineFunction &MF = DAG.getMachineFunction();
1719	EVT PtrVT = TLI.getPointerTy(DL: MF.getDataLayout());
1720
1721	MachineFrameInfo &MFI = MF.getFrameInfo();
1722	MFI.setFrameAddressIsTaken(true);
1723
1724	unsigned Depth = Op.getConstantOperandVal(i: `0`);
1725	const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
1726	Register FrameReg = RegInfo->getFrameRegister(MF);
1727	SDValue FrameAddr =
1728	DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT: PtrVT);
1729	while (Depth--)
1730	FrameAddr = DAG.getLoad(VT: Op.getValueType(), dl: DL, Chain: DAG.getEntryNode(),
1731	Ptr: FrameAddr, PtrInfo: MachinePointerInfo ());
1732	return FrameAddr;
1733	}
1734
1735	static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
1736	const VETargetLowering &TLI,
1737	const VESubtarget *Subtarget) {
1738	MachineFunction &MF = DAG.getMachineFunction();
1739	MachineFrameInfo &MFI = MF.getFrameInfo();
1740	MFI.setReturnAddressIsTaken(true);
1741
1742	if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG))
1743	return SDValue ();
1744
1745	SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);
1746
1747	SDLoc DL(Op);
1748	EVT VT = Op.getValueType();
1749	SDValue Offset = DAG.getConstant(Val: `8`, DL, VT);
1750	return DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(),
1751	Ptr: DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr, N2: Offset),
1752	PtrInfo: MachinePointerInfo ());
1753	}
1754
1755	SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1756	SelectionDAG &DAG) const {
1757	SDLoc DL(Op);
1758	unsigned IntNo = Op.getConstantOperandVal(i: `0`);
1759	switch (IntNo) {
1760	default: // Don't custom lower most intrinsics.
1761	return SDValue ();
1762	case Intrinsic::eh_sjlj_lsda: {
1763	MachineFunction &MF = DAG.getMachineFunction();
1764	MVT VT = Op.getSimpleValueType();
1765	const VETargetMachine *TM =
1766	static_cast<const VETargetMachine *>(&DAG.getTarget());
1767
1768	// Create GCC_except_tableXX string. The real symbol for that will be
1769	// generated in EHStreamer::emitExceptionTable() later. So, we just
1770	// borrow it's name here.
1771	TM->getStrList()->push_back(x: std::string(
1772	(Twine ("GCC_except_table") + Twine (MF.getFunctionNumber())).str()));
1773	SDValue Addr =
1774	DAG.getTargetExternalSymbol(Sym: TM->getStrList()->back().c_str(), VT, TargetFlags: `0`);
1775	if (isPositionIndependent()) {
1776	Addr = makeHiLoPair(Op: Addr, HiTF: VE::S_GOTOFF_HI32, LoTF: VE::S_GOTOFF_LO32, DAG);
1777	SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT);
1778	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: GlobalBase, N2: Addr);
1779	}
1780	return makeHiLoPair(Op: Addr, HiTF: VE::S_HI32, LoTF: VE::S_LO32, DAG);
1781	}
1782	}
1783	}
1784
1785	static bool getUniqueInsertion(SDNode N, unsigned* &UniqueIdx) {
1786	if (!isa<BuildVectorSDNode>(Val: N))
1787	return false;
1788	const auto *BVN = cast<BuildVectorSDNode>(Val: N);
1789
1790	// Find first non-undef insertion.
1791	unsigned Idx;
1792	for (Idx = `0`; Idx < BVN->getNumOperands(); ++Idx) {
1793	auto ElemV = BVN->getOperand(Num: Idx);
1794	if (!ElemV ->isUndef())
1795	break;
1796	}
1797	// Catch the (hypothetical) all-undef case.
1798	if (Idx == BVN->getNumOperands())
1799	return false;
1800	// Remember insertion.
1801	UniqueIdx = Idx++;
1802	// Verify that all other insertions are undef.
1803	for (; Idx < BVN->getNumOperands(); ++Idx) {
1804	auto ElemV = BVN->getOperand(Num: Idx);
1805	if (!ElemV ->isUndef())
1806	return false;
1807	}
1808	return true;
1809	}
1810
1811	static SDValue getSplatValue(SDNode *N) {
1812	if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(Val: N)) {
1813	return BuildVec->getSplatValue();
1814	}
1815	return SDValue ();
1816	}
1817
1818	SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
1819	SelectionDAG &DAG) const {
1820	VECustomDAG CDAG(DAG, Op);
1821	MVT ResultVT = Op.getSimpleValueType();
1822
1823	// If there is just one element, expand to INSERT_VECTOR_ELT.
1824	unsigned UniqueIdx;
1825	if (getUniqueInsertion(N: Op.getNode(), UniqueIdx)) {
1826	SDValue AccuV = CDAG.getUNDEF(VT: Op.getValueType());
1827	auto ElemV = Op ->getOperand(Num: UniqueIdx);
1828	SDValue IdxV = CDAG.getConstant(Val: UniqueIdx, VT: MVT::i64);
1829	return CDAG.getNode(OC: ISD::INSERT_VECTOR_ELT, ResVT: ResultVT, OpV: {AccuV, ElemV, IdxV});
1830	}
1831
1832	// Else emit a broadcast.
1833	if (SDValue ScalarV = getSplatValue(N: Op.getNode())) {
1834	unsigned NumEls = ResultVT.getVectorNumElements();
1835	auto AVL = CDAG.getConstant(Val: NumEls, VT: MVT::i32);
1836	return CDAG.getBroadcast(ResultVT, Scalar: ScalarV, AVL);
1837	}
1838
1839	// Expand
1840	return SDValue ();
1841	}
1842
1843	TargetLowering::LegalizeAction
1844	VETargetLowering::getCustomOperationAction(SDNode &Op) const {
1845	// Custom legalization on VVP_ and VEC_* opcodes is required to pack-legalize*
1846	// these operations (transform nodes such that their AVL parameter refers to
1847	// packs of 64bit, instead of number of elements.
1848
1849	// Packing opcodes are created with a pack-legal AVL (LEGALAVL). No need to
1850	// re-visit them.
1851	if (isPackingSupportOpcode(Opc: Op.getOpcode()))
1852	return Legal;
1853
1854	// Custom lower to legalize AVL for packed mode.
1855	if (isVVPOrVEC(Op.getOpcode()))
1856	return Custom;
1857	return Legal;
1858	}
1859
1860	SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1861	LLVM_DEBUG(dbgs() << "::LowerOperation "; Op.dump(&DAG));
1862	unsigned Opcode = Op.getOpcode();
1863
1864	/// Scalar isel.
1865	switch (Opcode) {
1866	case ISD::ATOMIC_FENCE:
1867	return lowerATOMIC_FENCE(Op, DAG);
1868	case ISD::ATOMIC_SWAP:
1869	return lowerATOMIC_SWAP(Op, DAG);
1870	case ISD::BlockAddress:
1871	return lowerBlockAddress(Op, DAG);
1872	case ISD::ConstantPool:
1873	return lowerConstantPool(Op, DAG);
1874	case ISD::DYNAMIC_STACKALLOC:
1875	return lowerDYNAMIC_STACKALLOC(Op, DAG);
1876	case ISD::EH_SJLJ_LONGJMP:
1877	return lowerEH_SJLJ_LONGJMP(Op, DAG);
1878	case ISD::EH_SJLJ_SETJMP:
1879	return lowerEH_SJLJ_SETJMP(Op, DAG);
1880	case ISD::EH_SJLJ_SETUP_DISPATCH:
1881	return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
1882	case ISD::FRAMEADDR:
1883	return lowerFRAMEADDR(Op, DAG, TLI: *this, Subtarget);
1884	case ISD::GlobalAddress:
1885	return lowerGlobalAddress(Op, DAG);
1886	case ISD::GlobalTLSAddress:
1887	return lowerGlobalTLSAddress(Op, DAG);
1888	case ISD::INTRINSIC_WO_CHAIN:
1889	return lowerINTRINSIC_WO_CHAIN(Op, DAG);
1890	case ISD::JumpTable:
1891	return lowerJumpTable(Op, DAG);
1892	case ISD::LOAD:
1893	return lowerLOAD(Op, DAG);
1894	case ISD::RETURNADDR:
1895	return lowerRETURNADDR(Op, DAG, TLI: *this, Subtarget);
1896	case ISD::BUILD_VECTOR:
1897	return lowerBUILD_VECTOR(Op, DAG);
1898	case ISD::STORE:
1899	return lowerSTORE(Op, DAG);
1900	case ISD::VASTART:
1901	return lowerVASTART(Op, DAG);
1902	case ISD::VAARG:
1903	return lowerVAARG(Op, DAG);
1904
1905	case ISD::INSERT_VECTOR_ELT:
1906	return lowerINSERT_VECTOR_ELT(Op, DAG);
1907	case ISD::EXTRACT_VECTOR_ELT:
1908	return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1909	}
1910
1911	/// Vector isel.
1912	if (ISD::isVPOpcode(Opcode))
1913	return lowerToVVP(Op, DAG);
1914
1915	switch (Opcode) {
1916	default:
1917	llvm_unreachable("Should not custom lower this!");
1918
1919	// Legalize the AVL of this internal node.
1920	case VEISD::VEC_BROADCAST:
1921	#define ADD_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME:
1922	#include "VVPNodes.def"
1923	// AVL already legalized.
1924	if (getAnnotatedNodeAVL(Op).second)
1925	return Op;
1926	return legalizeInternalVectorOp(Op, DAG);
1927
1928	// Translate into a VEC_/VVP_* layer operation.*
1929	case ISD::MLOAD:
1930	case ISD::MSTORE:
1931	#define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
1932	#include "VVPNodes.def"
1933	if (isMaskArithmetic(Op) && isPackedVectorType(SomeVT: Op.getValueType()))
1934	return splitMaskArithmetic(Op, DAG);
1935	return lowerToVVP(Op, DAG);
1936	}
1937	}
1938	/// } Custom Lower
1939
1940	void VETargetLowering::ReplaceNodeResults(SDNode *N,
1941	SmallVectorImpl<SDValue> &Results,
1942	SelectionDAG &DAG) const {
1943	switch (N->getOpcode()) {
1944	case ISD::ATOMIC_SWAP:
1945	// Let LLVM expand atomic swap instruction through LowerOperation.
1946	return;
1947	default:
1948	LLVM_DEBUG(N->dumpr(&DAG));
1949	llvm_unreachable("Do not know how to custom type legalize this operation!");
1950	}
1951	}
1952
1953	/// JumpTable for VE.
1954	///
1955	/// VE cannot generate relocatable symbol in jump table. VE cannot
1956	/// generate expressions using symbols in both text segment and data
1957	/// segment like below.
1958	/// .4byte .LBB0_2-.LJTI0_0
1959	/// So, we generate offset from the top of function like below as
1960	/// a custom label.
1961	/// .4byte .LBB0_2-<function name>
1962
1963	unsigned VETargetLowering::getJumpTableEncoding() const {
1964	// Use custom label for PIC.
1965	if (isPositionIndependent())
1966	return MachineJumpTableInfo::EK_Custom32;
1967
1968	// Otherwise, use the normal jump table encoding heuristics.
1969	return TargetLowering::getJumpTableEncoding();
1970	}
1971
1972	const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
1973	const MachineJumpTableInfo MJTI, const* MachineBasicBlock *MBB,
1974	unsigned Uid, MCContext &Ctx) const {
1975	assert(isPositionIndependent());
1976
1977	// Generate custom label for PIC like below.
1978	// .4bytes .LBB0_2-<function name>
1979	const auto *Value = MCSymbolRefExpr::create(Symbol: MBB->getSymbol(), Ctx);
1980	MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: MBB->getParent()->getName().data());
1981	const auto *Base = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
1982	return MCBinaryExpr::createSub(LHS: Value, RHS: Base, Ctx);
1983	}
1984
1985	SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
1986	SelectionDAG &DAG) const {
1987	assert(isPositionIndependent());
1988	SDLoc DL(Table);
1989	Function *Function = &DAG.getMachineFunction().getFunction();
1990	assert(Function != nullptr);
1991	auto PtrTy = getPointerTy(DL: DAG.getDataLayout(), AS: Function->getAddressSpace());
1992
1993	// In the jump table, we have following values in PIC mode.
1994	// .4bytes .LBB0_2-<function name>
1995	// We need to add this value and the address of this function to generate
1996	// .LBB0_2 label correctly under PIC mode. So, we want to generate following
1997	// instructions:
1998	// lea %reg, fun@gotoff_lo
1999	// and %reg, %reg, (32)0
2000	// lea.sl %reg, fun@gotoff_hi(%reg, %got)
2001	// In order to do so, we need to genarate correctly marked DAG node using
2002	// makeHiLoPair.
2003	SDValue Op = DAG.getGlobalAddress(GV: Function, DL, VT: PtrTy);
2004	SDValue HiLo = makeHiLoPair(Op, HiTF: VE::S_GOTOFF_HI32, LoTF: VE::S_GOTOFF_LO32, DAG);
2005	SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT: PtrTy);
2006	return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrTy, N1: GlobalBase, N2: HiLo);
2007	}
2008
2009	Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB,
2010	MachineBasicBlock::iterator I,
2011	MachineBasicBlock *TargetBB,
2012	const DebugLoc &DL) const {
2013	MachineFunction *MF = MBB.getParent();
2014	MachineRegisterInfo &MRI = MF->getRegInfo();
2015	const VEInstrInfo *TII = Subtarget->getInstrInfo();
2016
2017	const TargetRegisterClass *RC = &VE::I64RegClass;
2018	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2019	Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2020	Register Result = MRI.createVirtualRegister(RegClass: RC);
2021
2022	if (isPositionIndependent()) {
2023	// Create following instructions for local linkage PIC code.
2024	// lea %Tmp1, TargetBB@gotoff_lo
2025	// and %Tmp2, %Tmp1, (32)0
2026	// lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2027	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2028	.addImm(Val: `0`)
2029	.addImm(Val: `0`)
2030	.addMBB(MBB: TargetBB, TargetFlags: VE::S_GOTOFF_LO32);
2031	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2032	.addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2033	.addImm(Val: M0(Val: `32`));
2034	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: Result)
2035	.addReg(RegNo: VE::SX15)
2036	.addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2037	.addMBB(MBB: TargetBB, TargetFlags: VE::S_GOTOFF_HI32);
2038	} else {
2039	// Create following instructions for non-PIC code.
2040	// lea %Tmp1, TargetBB@lo
2041	// and %Tmp2, %Tmp1, (32)0
2042	// lea.sl %Result, TargetBB@hi(%Tmp2)
2043	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2044	.addImm(Val: `0`)
2045	.addImm(Val: `0`)
2046	.addMBB(MBB: TargetBB, TargetFlags: VE::S_LO32);
2047	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2048	.addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2049	.addImm(Val: M0(Val: `32`));
2050	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrii), DestReg: Result)
2051	.addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2052	.addImm(Val: `0`)
2053	.addMBB(MBB: TargetBB, TargetFlags: VE::S_HI32);
2054	}
2055	return Result;
2056	}
2057
2058	Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB,
2059	MachineBasicBlock::iterator I,
2060	StringRef Symbol, const DebugLoc &DL,
2061	bool IsLocal = false,
2062	bool IsCall = false) const {
2063	MachineFunction *MF = MBB.getParent();
2064	MachineRegisterInfo &MRI = MF->getRegInfo();
2065	const VEInstrInfo *TII = Subtarget->getInstrInfo();
2066
2067	const TargetRegisterClass *RC = &VE::I64RegClass;
2068	Register Result = MRI.createVirtualRegister(RegClass: RC);
2069
2070	if (isPositionIndependent()) {
2071	if (IsCall && !IsLocal) {
2072	// Create following instructions for non-local linkage PIC code function
2073	// calls. These instructions uses IC and magic number -24, so we expand
2074	// them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
2075	// lea %Reg, Symbol@plt_lo(-24)
2076	// and %Reg, %Reg, (32)0
2077	// sic %s16
2078	// lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
2079	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::GETFUNPLT), DestReg: Result)
2080	.addExternalSymbol(FnName: "abort");
2081	} else if (IsLocal) {
2082	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2083	Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2084	// Create following instructions for local linkage PIC code.
2085	// lea %Tmp1, Symbol@gotoff_lo
2086	// and %Tmp2, %Tmp1, (32)0
2087	// lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2088	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2089	.addImm(Val: `0`)
2090	.addImm(Val: `0`)
2091	.addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_GOTOFF_LO32);
2092	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2093	.addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2094	.addImm(Val: M0(Val: `32`));
2095	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: Result)
2096	.addReg(RegNo: VE::SX15)
2097	.addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2098	.addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_GOTOFF_HI32);
2099	} else {
2100	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2101	Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2102	// Create following instructions for not local linkage PIC code.
2103	// lea %Tmp1, Symbol@got_lo
2104	// and %Tmp2, %Tmp1, (32)0
2105	// lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2106	// ld %Result, 0(%Tmp3)
2107	Register Tmp3 = MRI.createVirtualRegister(RegClass: RC);
2108	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2109	.addImm(Val: `0`)
2110	.addImm(Val: `0`)
2111	.addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_GOT_LO32);
2112	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2113	.addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2114	.addImm(Val: M0(Val: `32`));
2115	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: Tmp3)
2116	.addReg(RegNo: VE::SX15)
2117	.addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2118	.addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_GOT_HI32);
2119	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: Result)
2120	.addReg(RegNo: Tmp3, flags: getKillRegState(B: true))
2121	.addImm(Val: `0`)
2122	.addImm(Val: `0`);
2123	}
2124	} else {
2125	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2126	Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2127	// Create following instructions for non-PIC code.
2128	// lea %Tmp1, Symbol@lo
2129	// and %Tmp2, %Tmp1, (32)0
2130	// lea.sl %Result, Symbol@hi(%Tmp2)
2131	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2132	.addImm(Val: `0`)
2133	.addImm(Val: `0`)
2134	.addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_LO32);
2135	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2136	.addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2137	.addImm(Val: M0(Val: `32`));
2138	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrii), DestReg: Result)
2139	.addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2140	.addImm(Val: `0`)
2141	.addExternalSymbol(FnName: Symbol.data(), TargetFlags: VE::S_HI32);
2142	}
2143	return Result;
2144	}
2145
2146	void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI,
2147	MachineBasicBlock *MBB,
2148	MachineBasicBlock *DispatchBB,
2149	int FI, int Offset) const {
2150	DebugLoc DL = MI.getDebugLoc();
2151	const VEInstrInfo *TII = Subtarget->getInstrInfo();
2152
2153	Register LabelReg =
2154	prepareMBB(MBB&: *MBB, I: MachineBasicBlock::iterator (MI), TargetBB: DispatchBB, DL);
2155
2156	// Store an address of DispatchBB to a given jmpbuf[1] where has next IC
2157	// referenced by longjmp (throw) later.
2158	MachineInstrBuilder MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::STrii));
2159	addFrameReference(MIB, FI, Offset); // jmpbuf[1]
2160	MIB.addReg(RegNo: LabelReg, flags: getKillRegState(B: true));
2161	}
2162
2163	MachineBasicBlock *
2164	VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
2165	MachineBasicBlock MBB) const* {
2166	DebugLoc DL = MI.getDebugLoc();
2167	MachineFunction *MF = MBB->getParent();
2168	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2169	const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
2170	MachineRegisterInfo &MRI = MF->getRegInfo();
2171
2172	const BasicBlock *BB = MBB->getBasicBlock();
2173	MachineFunction::iterator I = ++MBB->getIterator();
2174
2175	// Memory Reference.
2176	SmallVector<MachineMemOperand *, `2`> MMOs(MI.memoperands());
2177	Register BufReg = MI.getOperand(i: `1`).getReg();
2178
2179	Register DstReg;
2180
2181	DstReg = MI.getOperand(i: `0`).getReg();
2182	const TargetRegisterClass *RC = MRI.getRegClass(Reg: DstReg);
2183	assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
2184	(void)TRI;
2185	Register MainDestReg = MRI.createVirtualRegister(RegClass: RC);
2186	Register RestoreDestReg = MRI.createVirtualRegister(RegClass: RC);
2187
2188	// For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
2189	// instructions. SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
2190	//
2191	// ThisMBB:
2192	// buf[3] = %s17 iff %s17 is used as BP
2193	// buf[1] = RestoreMBB as IC after longjmp
2194	// # SjLjSetup RestoreMBB
2195	//
2196	// MainMBB:
2197	// v_main = 0
2198	//
2199	// SinkMBB:
2200	// v = phi(v_main, MainMBB, v_restore, RestoreMBB)
2201	// ...
2202	//
2203	// RestoreMBB:
2204	// %s17 = buf[3] = iff %s17 is used as BP
2205	// v_restore = 1
2206	// goto SinkMBB
2207
2208	MachineBasicBlock *ThisMBB = MBB;
2209	MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
2210	MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
2211	MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
2212	MF->insert(MBBI: I, MBB: MainMBB);
2213	MF->insert(MBBI: I, MBB: SinkMBB);
2214	MF->push_back(MBB: RestoreMBB);
2215	RestoreMBB->setMachineBlockAddressTaken();
2216
2217	// Transfer the remainder of BB and its successor edges to SinkMBB.
2218	SinkMBB->splice(Where: SinkMBB->begin(), Other: MBB,
2219	From: std::next(x: MachineBasicBlock::iterator (MI)), To: MBB->end());
2220	SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
2221
2222	// ThisMBB:
2223	Register LabelReg =
2224	prepareMBB(MBB&: *MBB, I: MachineBasicBlock::iterator (MI), TargetBB: RestoreMBB, DL);
2225
2226	// Store BP in buf[3] iff this function is using BP.
2227	const VEFrameLowering *TFI = Subtarget->getFrameLowering();
2228	if (TFI->hasBP(MF: *MF)) {
2229	MachineInstrBuilder MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::STrii));
2230	MIB.addReg(RegNo: BufReg);
2231	MIB.addImm(Val: `0`);
2232	MIB.addImm(Val: `24`);
2233	MIB.addReg(RegNo: VE::SX17);
2234	MIB.setMemRefs(MMOs);
2235	}
2236
2237	// Store IP in buf[1].
2238	MachineInstrBuilder MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::STrii));
2239	MIB.add(MO: MI.getOperand(i: `1`)); // we can preserve the kill flags here.
2240	MIB.addImm(Val: `0`);
2241	MIB.addImm(Val: `8`);
2242	MIB.addReg(RegNo: LabelReg, flags: getKillRegState(B: true));
2243	MIB.setMemRefs(MMOs);
2244
2245	// SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
2246
2247	// Insert setup.
2248	MIB =
2249	BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::EH_SjLj_Setup)).addMBB(MBB: RestoreMBB);
2250
2251	const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2252	MIB.addRegMask(Mask: RegInfo->getNoPreservedMask());
2253	ThisMBB->addSuccessor(Succ: MainMBB);
2254	ThisMBB->addSuccessor(Succ: RestoreMBB);
2255
2256	// MainMBB:
2257	BuildMI(BB: MainMBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: MainDestReg)
2258	.addImm(Val: `0`)
2259	.addImm(Val: `0`)
2260	.addImm(Val: `0`);
2261	MainMBB->addSuccessor(Succ: SinkMBB);
2262
2263	// SinkMBB:
2264	BuildMI(BB&: *SinkMBB, I: SinkMBB->begin(), MIMD: DL, MCID: TII->get(Opcode: VE::PHI), DestReg: DstReg)
2265	.addReg(RegNo: MainDestReg)
2266	.addMBB(MBB: MainMBB)
2267	.addReg(RegNo: RestoreDestReg)
2268	.addMBB(MBB: RestoreMBB);
2269
2270	// RestoreMBB:
2271	// Restore BP from buf[3] iff this function is using BP. The address of
2272	// buf is in SX10.
2273	// FIXME: Better to not use SX10 here
2274	if (TFI->hasBP(MF: *MF)) {
2275	MachineInstrBuilder MIB =
2276	BuildMI(BB: RestoreMBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: VE::SX17);
2277	MIB.addReg(RegNo: VE::SX10);
2278	MIB.addImm(Val: `0`);
2279	MIB.addImm(Val: `24`);
2280	MIB.setMemRefs(MMOs);
2281	}
2282	BuildMI(BB: RestoreMBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: RestoreDestReg)
2283	.addImm(Val: `0`)
2284	.addImm(Val: `0`)
2285	.addImm(Val: `1`);
2286	BuildMI(BB: RestoreMBB, MIMD: DL, MCID: TII->get(Opcode: VE::BRCFLa_t)).addMBB(MBB: SinkMBB);
2287	RestoreMBB->addSuccessor(Succ: SinkMBB);
2288
2289	MI.eraseFromParent();
2290	return SinkMBB;
2291	}
2292
2293	MachineBasicBlock *
2294	VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
2295	MachineBasicBlock MBB) const* {
2296	DebugLoc DL = MI.getDebugLoc();
2297	MachineFunction *MF = MBB->getParent();
2298	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2299	MachineRegisterInfo &MRI = MF->getRegInfo();
2300
2301	// Memory Reference.
2302	SmallVector<MachineMemOperand *, `2`> MMOs(MI.memoperands());
2303	Register BufReg = MI.getOperand(i: `0`).getReg();
2304
2305	Register Tmp = MRI.createVirtualRegister(RegClass: &VE::I64RegClass);
2306	// Since FP is only updated here but NOT referenced, it's treated as GPR.
2307	Register FP = VE::SX9;
2308	Register SP = VE::SX11;
2309
2310	MachineInstrBuilder MIB;
2311
2312	MachineBasicBlock *ThisMBB = MBB;
2313
2314	// For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
2315	//
2316	// ThisMBB:
2317	// %fp = load buf[0]
2318	// %jmp = load buf[1]
2319	// %s10 = buf ; Store an address of buf to SX10 for RestoreMBB
2320	// %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
2321	// jmp %jmp
2322
2323	// Reload FP.
2324	MIB = BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: FP);
2325	MIB.addReg(RegNo: BufReg);
2326	MIB.addImm(Val: `0`);
2327	MIB.addImm(Val: `0`);
2328	MIB.setMemRefs(MMOs);
2329
2330	// Reload IP.
2331	MIB = BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: Tmp);
2332	MIB.addReg(RegNo: BufReg);
2333	MIB.addImm(Val: `0`);
2334	MIB.addImm(Val: `8`);
2335	MIB.setMemRefs(MMOs);
2336
2337	// Copy BufReg to SX10 for later use in setjmp.
2338	// FIXME: Better to not use SX10 here
2339	BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::ORri), DestReg: VE::SX10)
2340	.addReg(RegNo: BufReg)
2341	.addImm(Val: `0`);
2342
2343	// Reload SP.
2344	MIB = BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: SP);
2345	MIB.add(MO: MI.getOperand(i: `0`)); // we can preserve the kill flags here.
2346	MIB.addImm(Val: `0`);
2347	MIB.addImm(Val: `16`);
2348	MIB.setMemRefs(MMOs);
2349
2350	// Jump.
2351	BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::BCFLari_t))
2352	.addReg(RegNo: Tmp, flags: getKillRegState(B: true))
2353	.addImm(Val: `0`);
2354
2355	MI.eraseFromParent();
2356	return ThisMBB;
2357	}
2358
2359	MachineBasicBlock *
2360	VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI,
2361	MachineBasicBlock BB) const* {
2362	DebugLoc DL = MI.getDebugLoc();
2363	MachineFunction *MF = BB->getParent();
2364	MachineFrameInfo &MFI = MF->getFrameInfo();
2365	MachineRegisterInfo &MRI = MF->getRegInfo();
2366	const VEInstrInfo *TII = Subtarget->getInstrInfo();
2367	int FI = MFI.getFunctionContextIndex();
2368
2369	// Get a mapping of the call site numbers to all of the landing pads they're
2370	// associated with.
2371	DenseMap<unsigned, SmallVector<MachineBasicBlock *, `2`>> CallSiteNumToLPad;
2372	unsigned MaxCSNum = `0`;
2373	for (auto &MBB : *MF) {
2374	if (!MBB.isEHPad())
2375	continue;
2376
2377	MCSymbol Sym = nullptr*;
2378	for (const auto &MI : MBB) {
2379	if (MI.isDebugInstr())
2380	continue;
2381
2382	assert(MI.isEHLabel() && "expected EH_LABEL");
2383	Sym = MI.getOperand(i: `0`).getMCSymbol();
2384	break;
2385	}
2386
2387	if (!MF->hasCallSiteLandingPad(Sym))
2388	continue;
2389
2390	for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
2391	CallSiteNumToLPad [CSI].push_back(Elt: &MBB);
2392	MaxCSNum = std::max(a: MaxCSNum, b: CSI);
2393	}
2394	}
2395
2396	// Get an ordered list of the machine basic blocks for the jump table.
2397	std::vector<MachineBasicBlock *> LPadList;
2398	SmallPtrSet<MachineBasicBlock *, `32`> InvokeBBs;
2399	LPadList.reserve(n: CallSiteNumToLPad.size());
2400
2401	for (unsigned CSI = `1`; CSI <= MaxCSNum; ++CSI) {
2402	for (auto &LP : CallSiteNumToLPad [CSI]) {
2403	LPadList.push_back(x: LP);
2404	InvokeBBs.insert_range(R: LP->predecessors());
2405	}
2406	}
2407
2408	assert(!LPadList.empty() &&
2409	"No landing pad destinations for the dispatch jump table!");
2410
2411	// The %fn_context is allocated like below (from --print-after=sjljehprepare):
2412	// %fn_context = alloca { i8, i64, [4 x i64], i8, i8, [5 x i8] }
2413	//
2414	// This `[5 x i8]` is jmpbuf, so jmpbuf[1] is FI+72.*
2415	// First `i64` is callsite, so callsite is FI+8.
2416	static const int OffsetIC = `72`;
2417	static const int OffsetCS = `8`;
2418
2419	// Create the MBBs for the dispatch code like following:
2420	//
2421	// ThisMBB:
2422	// Prepare DispatchBB address and store it to buf[1].
2423	// ...
2424	//
2425	// DispatchBB:
2426	// %s15 = GETGOT iff isPositionIndependent
2427	// %callsite = load callsite
2428	// brgt.l.t #size of callsites, %callsite, DispContBB
2429	//
2430	// TrapBB:
2431	// Call abort.
2432	//
2433	// DispContBB:
2434	// %breg = address of jump table
2435	// %pc = load and calculate next pc from %breg and %callsite
2436	// jmp %pc
2437
2438	// Shove the dispatch's address into the return slot in the function context.
2439	MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
2440	DispatchBB->setIsEHPad(true);
2441
2442	// Trap BB will causes trap like `assert(0)`.
2443	MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
2444	DispatchBB->addSuccessor(Succ: TrapBB);
2445
2446	MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
2447	DispatchBB->addSuccessor(Succ: DispContBB);
2448
2449	// Insert MBBs.
2450	MF->push_back(MBB: DispatchBB);
2451	MF->push_back(MBB: DispContBB);
2452	MF->push_back(MBB: TrapBB);
2453
2454	// Insert code to call abort in the TrapBB.
2455	Register Abort = prepareSymbol(MBB&: *TrapBB, I: TrapBB->end(), Symbol: "abort", DL,
2456	/ Local / IsLocal: false, / Call / IsCall: true);
2457	BuildMI(BB: TrapBB, MIMD: DL, MCID: TII->get(Opcode: VE::BSICrii), DestReg: VE::SX10)
2458	.addReg(RegNo: Abort, flags: getKillRegState(B: true))
2459	.addImm(Val: `0`)
2460	.addImm(Val: `0`);
2461
2462	// Insert code into the entry block that creates and registers the function
2463	// context.
2464	setupEntryBlockForSjLj(MI, MBB: BB, DispatchBB, FI, Offset: OffsetIC);
2465
2466	// Create the jump table and associated information
2467	unsigned JTE = getJumpTableEncoding();
2468	MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTEntryKind: JTE);
2469	unsigned MJTI = JTI->createJumpTableIndex(DestBBs: LPadList);
2470
2471	const VERegisterInfo &RI = TII->getRegisterInfo();
2472	// Add a register mask with no preserved registers. This results in all
2473	// registers being marked as clobbered.
2474	BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::NOP))
2475	.addRegMask(Mask: RI.getNoPreservedMask());
2476
2477	if (isPositionIndependent()) {
2478	// Force to generate GETGOT, since current implementation doesn't store GOT
2479	// register.
2480	BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::GETGOT), DestReg: VE::SX15);
2481	}
2482
2483	// IReg is used as an index in a memory operand and therefore can't be SP
2484	const TargetRegisterClass *RC = &VE::I64RegClass;
2485	Register IReg = MRI.createVirtualRegister(RegClass: RC);
2486	addFrameReference(MIB: BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDLZXrii), DestReg: IReg), FI,
2487	Offset: OffsetCS);
2488	if (LPadList.size() < `64`) {
2489	BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::BRCFLir_t))
2490	.addImm(Val: VECC::CC_ILE)
2491	.addImm(Val: LPadList.size())
2492	.addReg(RegNo: IReg)
2493	.addMBB(MBB: TrapBB);
2494	} else {
2495	assert(LPadList.size() <= `0x7FFFFFFF` && "Too large Landing Pad!");
2496	Register TmpReg = MRI.createVirtualRegister(RegClass: RC);
2497	BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: TmpReg)
2498	.addImm(Val: `0`)
2499	.addImm(Val: `0`)
2500	.addImm(Val: LPadList.size());
2501	BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::BRCFLrr_t))
2502	.addImm(Val: VECC::CC_ILE)
2503	.addReg(RegNo: TmpReg, flags: getKillRegState(B: true))
2504	.addReg(RegNo: IReg)
2505	.addMBB(MBB: TrapBB);
2506	}
2507
2508	Register BReg = MRI.createVirtualRegister(RegClass: RC);
2509	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2510	Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2511
2512	if (isPositionIndependent()) {
2513	// Create following instructions for local linkage PIC code.
2514	// lea %Tmp1, .LJTI0_0@gotoff_lo
2515	// and %Tmp2, %Tmp1, (32)0
2516	// lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2517	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2518	.addImm(Val: `0`)
2519	.addImm(Val: `0`)
2520	.addJumpTableIndex(Idx: MJTI, TargetFlags: VE::S_GOTOFF_LO32);
2521	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2522	.addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2523	.addImm(Val: M0(Val: `32`));
2524	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: BReg)
2525	.addReg(RegNo: VE::SX15)
2526	.addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2527	.addJumpTableIndex(Idx: MJTI, TargetFlags: VE::S_GOTOFF_HI32);
2528	} else {
2529	// Create following instructions for non-PIC code.
2530	// lea %Tmp1, .LJTI0_0@lo
2531	// and %Tmp2, %Tmp1, (32)0
2532	// lea.sl %BReg, .LJTI0_0@hi(%Tmp2)
2533	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2534	.addImm(Val: `0`)
2535	.addImm(Val: `0`)
2536	.addJumpTableIndex(Idx: MJTI, TargetFlags: VE::S_LO32);
2537	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2538	.addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2539	.addImm(Val: M0(Val: `32`));
2540	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrii), DestReg: BReg)
2541	.addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2542	.addImm(Val: `0`)
2543	.addJumpTableIndex(Idx: MJTI, TargetFlags: VE::S_HI32);
2544	}
2545
2546	switch (JTE) {
2547	case MachineJumpTableInfo::EK_BlockAddress: {
2548	// Generate simple block address code for no-PIC model.
2549	// sll %Tmp1, %IReg, 3
2550	// lds %TReg, 0(%Tmp1, %BReg)
2551	// bcfla %TReg
2552
2553	Register TReg = MRI.createVirtualRegister(RegClass: RC);
2554	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2555
2556	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::SLLri), DestReg: Tmp1)
2557	.addReg(RegNo: IReg, flags: getKillRegState(B: true))
2558	.addImm(Val: `3`);
2559	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDrri), DestReg: TReg)
2560	.addReg(RegNo: BReg, flags: getKillRegState(B: true))
2561	.addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2562	.addImm(Val: `0`);
2563	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::BCFLari_t))
2564	.addReg(RegNo: TReg, flags: getKillRegState(B: true))
2565	.addImm(Val: `0`);
2566	break;
2567	}
2568	case MachineJumpTableInfo::EK_Custom32: {
2569	// Generate block address code using differences from the function pointer
2570	// for PIC model.
2571	// sll %Tmp1, %IReg, 2
2572	// ldl.zx %OReg, 0(%Tmp1, %BReg)
2573	// Prepare function address in BReg2.
2574	// adds.l %TReg, %BReg2, %OReg
2575	// bcfla %TReg
2576
2577	assert(isPositionIndependent());
2578	Register OReg = MRI.createVirtualRegister(RegClass: RC);
2579	Register TReg = MRI.createVirtualRegister(RegClass: RC);
2580	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2581
2582	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::SLLri), DestReg: Tmp1)
2583	.addReg(RegNo: IReg, flags: getKillRegState(B: true))
2584	.addImm(Val: `2`);
2585	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDLZXrri), DestReg: OReg)
2586	.addReg(RegNo: BReg, flags: getKillRegState(B: true))
2587	.addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2588	.addImm(Val: `0`);
2589	Register BReg2 =
2590	prepareSymbol(MBB&: *DispContBB, I: DispContBB->end(),
2591	Symbol: DispContBB->getParent()->getName(), DL, / Local / IsLocal: true);
2592	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::ADDSLrr), DestReg: TReg)
2593	.addReg(RegNo: OReg, flags: getKillRegState(B: true))
2594	.addReg(RegNo: BReg2, flags: getKillRegState(B: true));
2595	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::BCFLari_t))
2596	.addReg(RegNo: TReg, flags: getKillRegState(B: true))
2597	.addImm(Val: `0`);
2598	break;
2599	}
2600	default:
2601	llvm_unreachable("Unexpected jump table encoding");
2602	}
2603
2604	// Add the jump table entries as successors to the MBB.
2605	SmallPtrSet<MachineBasicBlock *, `8`> SeenMBBs;
2606	for (auto &LP : LPadList)
2607	if (SeenMBBs.insert(Ptr: LP).second)
2608	DispContBB->addSuccessor(Succ: LP);
2609
2610	// N.B. the order the invoke BBs are processed in doesn't matter here.
2611	SmallVector<MachineBasicBlock *, `64`> MBBLPads;
2612	const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
2613	for (MachineBasicBlock *MBB : InvokeBBs) {
2614	// Remove the landing pad successor from the invoke block and replace it
2615	// with the new dispatch block.
2616	// Keep a copy of Successors since it's modified inside the loop.
2617	SmallVector<MachineBasicBlock *, `8`> Successors(MBB->succ_rbegin(),
2618	MBB->succ_rend());
2619	// FIXME: Avoid quadratic complexity.
2620	for (auto *MBBS : Successors) {
2621	if (MBBS->isEHPad()) {
2622	MBB->removeSuccessor(Succ: MBBS);
2623	MBBLPads.push_back(Elt: MBBS);
2624	}
2625	}
2626
2627	MBB->addSuccessor(Succ: DispatchBB);
2628
2629	// Find the invoke call and mark all of the callee-saved registers as
2630	// 'implicit defined' so that they're spilled. This prevents code from
2631	// moving instructions to before the EH block, where they will never be
2632	// executed.
2633	for (auto &II : reverse(C&: *MBB)) {
2634	if (!II.isCall())
2635	continue;
2636
2637	DenseSet<Register> DefRegs;
2638	for (auto &MOp : II.operands())
2639	if (MOp.isReg())
2640	DefRegs.insert(V: MOp.getReg());
2641
2642	MachineInstrBuilder MIB(*MF, &II);
2643	for (unsigned RI = `0`; SavedRegs[RI]; ++RI) {
2644	Register Reg = SavedRegs[RI];
2645	if (!DefRegs.contains(V: Reg))
2646	MIB.addReg(RegNo: Reg, flags: RegState::ImplicitDefine \| RegState::Dead);
2647	}
2648
2649	break;
2650	}
2651	}
2652
2653	// Mark all former landing pads as non-landing pads. The dispatch is the only
2654	// landing pad now.
2655	for (auto &LP : MBBLPads)
2656	LP->setIsEHPad(false);
2657
2658	// The instruction is gone now.
2659	MI.eraseFromParent();
2660	return BB;
2661	}
2662
2663	MachineBasicBlock *
2664	VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2665	MachineBasicBlock BB) const* {
2666	switch (MI.getOpcode()) {
2667	default:
2668	llvm_unreachable("Unknown Custom Instruction!");
2669	case VE::EH_SjLj_LongJmp:
2670	return emitEHSjLjLongJmp(MI, MBB: BB);
2671	case VE::EH_SjLj_SetJmp:
2672	return emitEHSjLjSetJmp(MI, MBB: BB);
2673	case VE::EH_SjLj_Setup_Dispatch:
2674	return emitSjLjDispatchBlock(MI, BB);
2675	}
2676	}
2677
2678	static bool isSimm7(SDValue V) {
2679	EVT VT = V.getValueType();
2680	if (VT.isVector())
2681	return false;
2682
2683	if (VT.isInteger()) {
2684	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: V))
2685	return isInt<`7`>(x: C->getSExtValue());
2686	} else if (VT.isFloatingPoint()) {
2687	if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: V)) {
2688	if (VT == MVT::f32 \|\| VT == MVT::f64) {
2689	const APInt &Imm = C->getValueAPF().bitcastToAPInt();
2690	uint64_t Val = Imm.getSExtValue();
2691	if (Imm.getBitWidth() == `32`)
2692	Val <<= `32`; // Immediate value of float place at higher bits on VE.
2693	return isInt<`7`>(x: Val);
2694	}
2695	}
2696	}
2697	return false;
2698	}
2699
2700	static bool isMImm(SDValue V) {
2701	EVT VT = V.getValueType();
2702	if (VT.isVector())
2703	return false;
2704
2705	if (VT.isInteger()) {
2706	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: V))
2707	return isMImmVal(Val: getImmVal(N: C));
2708	} else if (VT.isFloatingPoint()) {
2709	if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: V)) {
2710	if (VT == MVT::f32) {
2711	// Float value places at higher bits, so ignore lower 32 bits.
2712	return isMImm32Val(Val: getFpImmVal(N: C) >> `32`);
2713	} else if (VT == MVT::f64) {
2714	return isMImmVal(Val: getFpImmVal(N: C));
2715	}
2716	}
2717	}
2718	return false;
2719	}
2720
2721	static unsigned decideComp(EVT SrcVT, ISD::CondCode CC) {
2722	if (SrcVT.isFloatingPoint()) {
2723	if (SrcVT == MVT::f128)
2724	return VEISD::CMPQ;
2725	return VEISD::CMPF;
2726	}
2727	return isSignedIntSetCC(Code: CC) ? VEISD::CMPI : VEISD::CMPU;
2728	}
2729
2730	static EVT decideCompType(EVT SrcVT) {
2731	if (SrcVT == MVT::f128)
2732	return MVT::f64;
2733	return SrcVT;
2734	}
2735
2736	static bool safeWithoutCompWithNull(EVT SrcVT, ISD::CondCode CC,
2737	bool WithCMov) {
2738	if (SrcVT.isFloatingPoint()) {
2739	// For the case of floating point setcc, only unordered comparison
2740	// or general comparison with -enable-no-nans-fp-math option reach
2741	// here, so it is safe even if values are NaN. Only f128 doesn't
2742	// safe since VE uses f64 result of f128 comparison.
2743	return SrcVT != MVT::f128;
2744	}
2745	if (isIntEqualitySetCC(Code: CC)) {
2746	// For the case of equal or not equal, it is safe without comparison with 0.
2747	return true;
2748	}
2749	if (WithCMov) {
2750	// For the case of integer setcc with cmov, all signed comparison with 0
2751	// are safe.
2752	return isSignedIntSetCC(Code: CC);
2753	}
2754	// For the case of integer setcc, only signed 64 bits comparison is safe.
2755	// For unsigned, "CMPU 0x80000000, 0" has to be greater than 0, but it becomes
2756	// less than 0 witout CMPU. For 32 bits, other half of 32 bits are
2757	// uncoditional, so it is not safe too without CMPI..
2758	return isSignedIntSetCC(Code: CC) && SrcVT == MVT::i64;
2759	}
2760
2761	static SDValue generateComparison(EVT VT, SDValue LHS, SDValue RHS,
2762	ISD::CondCode CC, bool WithCMov,
2763	const SDLoc &DL, SelectionDAG &DAG) {
2764	// Compare values. If RHS is 0 and it is safe to calculate without
2765	// comparison, we don't generate an instruction for comparison.
2766	EVT CompVT = decideCompType(SrcVT: VT);
2767	if (CompVT == VT && safeWithoutCompWithNull(SrcVT: VT, CC, WithCMov) &&
2768	(isNullConstant(V: RHS) \|\| isNullFPConstant(V: RHS))) {
2769	return LHS;
2770	}
2771	return DAG.getNode(Opcode: decideComp(SrcVT: VT, CC), DL, VT: CompVT, N1: LHS, N2: RHS);
2772	}
2773
2774	SDValue VETargetLowering::combineSelect(SDNode *N,
2775	DAGCombinerInfo &DCI) const {
2776	assert(N->getOpcode() == ISD::SELECT &&
2777	"Should be called with a SELECT node");
2778	ISD::CondCode CC = ISD::CondCode::SETNE;
2779	SDValue Cond = N->getOperand(Num: `0`);
2780	SDValue True = N->getOperand(Num: `1`);
2781	SDValue False = N->getOperand(Num: `2`);
2782
2783	// We handle only scalar SELECT.
2784	EVT VT = N->getValueType(ResNo: `0`);
2785	if (VT.isVector())
2786	return SDValue ();
2787
2788	// Peform combineSelect after leagalize DAG.
2789	if (!DCI.isAfterLegalizeDAG())
2790	return SDValue ();
2791
2792	EVT VT0 = Cond.getValueType();
2793	if (isMImm(V: True)) {
2794	// VE's condition move can handle MImm in True clause, so nothing to do.
2795	} else if (isMImm(V: False)) {
2796	// VE's condition move can handle MImm in True clause, so swap True and
2797	// False clauses if False has MImm value. And, update condition code.
2798	std::swap(a&: True, b&: False);
2799	CC = getSetCCInverse(Operation: CC, Type: VT0);
2800	}
2801
2802	SDLoc DL(N);
2803	SelectionDAG &DAG = DCI.DAG;
2804	VECC::CondCode VECCVal;
2805	if (VT0.isFloatingPoint()) {
2806	VECCVal = fpCondCode2Fcc(CC);
2807	} else {
2808	VECCVal = intCondCode2Icc(CC);
2809	}
2810	SDValue Ops[] = {Cond, True, False,
2811	DAG.getConstant(Val: VECCVal, DL, VT: MVT::i32)};
2812	return DAG.getNode(Opcode: VEISD::CMOV, DL, VT, Ops);
2813	}
2814
2815	SDValue VETargetLowering::combineSelectCC(SDNode *N,
2816	DAGCombinerInfo &DCI) const {
2817	assert(N->getOpcode() == ISD::SELECT_CC &&
2818	"Should be called with a SELECT_CC node");
2819	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `4`))->get();
2820	SDValue LHS = N->getOperand(Num: `0`);
2821	SDValue RHS = N->getOperand(Num: `1`);
2822	SDValue True = N->getOperand(Num: `2`);
2823	SDValue False = N->getOperand(Num: `3`);
2824
2825	// We handle only scalar SELECT_CC.
2826	EVT VT = N->getValueType(ResNo: `0`);
2827	if (VT.isVector())
2828	return SDValue ();
2829
2830	// Peform combineSelectCC after leagalize DAG.
2831	if (!DCI.isAfterLegalizeDAG())
2832	return SDValue ();
2833
2834	// We handle only i32/i64/f32/f64/f128 comparisons.
2835	EVT LHSVT = LHS.getValueType();
2836	assert(LHSVT == RHS.getValueType());
2837	switch (LHSVT.getSimpleVT().SimpleTy) {
2838	case MVT::i32:
2839	case MVT::i64:
2840	case MVT::f32:
2841	case MVT::f64:
2842	case MVT::f128:
2843	break;
2844	default:
2845	// Return SDValue to let llvm handle other types.
2846	return SDValue ();
2847	}
2848
2849	if (isMImm(V: RHS)) {
2850	// VE's comparison can handle MImm in RHS, so nothing to do.
2851	} else if (isSimm7(V: RHS)) {
2852	// VE's comparison can handle Simm7 in LHS, so swap LHS and RHS, and
2853	// update condition code.
2854	std::swap(a&: LHS, b&: RHS);
2855	CC = getSetCCSwappedOperands(Operation: CC);
2856	}
2857	if (isMImm(V: True)) {
2858	// VE's condition move can handle MImm in True clause, so nothing to do.
2859	} else if (isMImm(V: False)) {
2860	// VE's condition move can handle MImm in True clause, so swap True and
2861	// False clauses if False has MImm value. And, update condition code.
2862	std::swap(a&: True, b&: False);
2863	CC = getSetCCInverse(Operation: CC, Type: LHSVT);
2864	}
2865
2866	SDLoc DL(N);
2867	SelectionDAG &DAG = DCI.DAG;
2868
2869	bool WithCMov = true;
2870	SDValue CompNode = generateComparison(VT: LHSVT, LHS, RHS, CC, WithCMov, DL, DAG);
2871
2872	VECC::CondCode VECCVal;
2873	if (LHSVT.isFloatingPoint()) {
2874	VECCVal = fpCondCode2Fcc(CC);
2875	} else {
2876	VECCVal = intCondCode2Icc(CC);
2877	}
2878	SDValue Ops[] = {CompNode, True, False,
2879	DAG.getConstant(Val: VECCVal, DL, VT: MVT::i32)};
2880	return DAG.getNode(Opcode: VEISD::CMOV, DL, VT, Ops);
2881	}
2882
2883	static bool isI32InsnAllUses(const SDNode User, const* SDNode *N);
2884	static bool isI32Insn(const SDNode User, const* SDNode *N) {
2885	switch (User->getOpcode()) {
2886	default:
2887	return false;
2888	case ISD::ADD:
2889	case ISD::SUB:
2890	case ISD::MUL:
2891	case ISD::SDIV:
2892	case ISD::UDIV:
2893	case ISD::SETCC:
2894	case ISD::SMIN:
2895	case ISD::SMAX:
2896	case ISD::SHL:
2897	case ISD::SRA:
2898	case ISD::BSWAP:
2899	case ISD::SINT_TO_FP:
2900	case ISD::UINT_TO_FP:
2901	case ISD::BR_CC:
2902	case ISD::BITCAST:
2903	case ISD::ATOMIC_CMP_SWAP:
2904	case ISD::ATOMIC_SWAP:
2905	case VEISD::CMPU:
2906	case VEISD::CMPI:
2907	return true;
2908	case ISD::SRL:
2909	if (N->getOperand(Num: `0`).getOpcode() != ISD::SRL)
2910	return true;
2911	// (srl (trunc (srl ...))) may be optimized by combining srl, so
2912	// doesn't optimize trunc now.
2913	return false;
2914	case ISD::SELECT_CC:
2915	if (User->getOperand(Num: `2`).getNode() != N &&
2916	User->getOperand(Num: `3`).getNode() != N)
2917	return true;
2918	return isI32InsnAllUses(User, N);
2919	case VEISD::CMOV:
2920	// CMOV in (cmov (trunc ...), true, false, int-comparison) is safe.
2921	// However, trunc in true or false clauses is not safe.
2922	if (User->getOperand(Num: `1`).getNode() != N &&
2923	User->getOperand(Num: `2`).getNode() != N &&
2924	isa<ConstantSDNode>(Val: User->getOperand(Num: `3`))) {
2925	VECC::CondCode VECCVal =
2926	static_cast<VECC::CondCode>(User->getConstantOperandVal(Num: `3`));
2927	return isIntVECondCode(CC: VECCVal);
2928	}
2929	[[fallthrough]];
2930	case ISD::AND:
2931	case ISD::OR:
2932	case ISD::XOR:
2933	case ISD::SELECT:
2934	case ISD::CopyToReg:
2935	// Check all use of selections, bit operations, and copies. If all of them
2936	// are safe, optimize truncate to extract_subreg.
2937	return isI32InsnAllUses(User, N);
2938	}
2939	}
2940
2941	static bool isI32InsnAllUses(const SDNode User, const* SDNode *N) {
2942	// Check all use of User node. If all of them are safe, optimize
2943	// truncate to extract_subreg.
2944	for (const SDNode *U : User->users()) {
2945	switch (U->getOpcode()) {
2946	default:
2947	// If the use is an instruction which treats the source operand as i32,
2948	// it is safe to avoid truncate here.
2949	if (isI32Insn(User: U, N))
2950	continue;
2951	break;
2952	case ISD::ANY_EXTEND:
2953	case ISD::SIGN_EXTEND:
2954	case ISD::ZERO_EXTEND: {
2955	// Special optimizations to the combination of ext and trunc.
2956	// (ext ... (select ... (trunc ...))) is safe to avoid truncate here
2957	// since this truncate instruction clears higher 32 bits which is filled
2958	// by one of ext instructions later.
2959	assert(N->getValueType(`0`) == MVT::i32 &&
2960	"find truncate to not i32 integer");
2961	if (User->getOpcode() == ISD::SELECT_CC \|\|
2962	User->getOpcode() == ISD::SELECT \|\| User->getOpcode() == VEISD::CMOV)
2963	continue;
2964	break;
2965	}
2966	}
2967	return false;
2968	}
2969	return true;
2970	}
2971
2972	// Optimize TRUNCATE in DAG combining. Optimizing it in CUSTOM lower is
2973	// sometime too early. Optimizing it in DAG pattern matching in VEInstrInfo.td
2974	// is sometime too late. So, doing it at here.
2975	SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
2976	DAGCombinerInfo &DCI) const {
2977	assert(N->getOpcode() == ISD::TRUNCATE &&
2978	"Should be called with a TRUNCATE node");
2979
2980	SelectionDAG &DAG = DCI.DAG;
2981	SDLoc DL(N);
2982	EVT VT = N->getValueType(ResNo: `0`);
2983
2984	// We prefer to do this when all types are legal.
2985	if (!DCI.isAfterLegalizeDAG())
2986	return SDValue ();
2987
2988	// Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
2989	if (N->getOperand(Num: `0`)->getOpcode() == ISD::SELECT_CC &&
2990	isa<ConstantSDNode>(Val: N->getOperand(Num: `0`)->getOperand(Num: `0`)) &&
2991	isa<ConstantSDNode>(Val: N->getOperand(Num: `0`)->getOperand(Num: `1`)))
2992	return SDValue ();
2993
2994	// Check all use of this TRUNCATE.
2995	for (const SDNode *User : N->users()) {
2996	// Make sure that we're not going to replace TRUNCATE for non i32
2997	// instructions.
2998	//
2999	// FIXME: Although we could sometimes handle this, and it does occur in
3000	// practice that one of the condition inputs to the select is also one of
3001	// the outputs, we currently can't deal with this.
3002	if (isI32Insn(User, N))
3003	continue;
3004
3005	return SDValue ();
3006	}
3007
3008	SDValue SubI32 = DAG.getTargetConstant(Val: VE::sub_i32, DL, VT: MVT::i32);
3009	return SDValue (DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT,
3010	Op1: N->getOperand(Num: `0`), Op2: SubI32),
3011	`0`);
3012	}
3013
3014	SDValue VETargetLowering::PerformDAGCombine(SDNode *N,
3015	DAGCombinerInfo &DCI) const {
3016	switch (N->getOpcode()) {
3017	default:
3018	break;
3019	case ISD::SELECT:
3020	return combineSelect(N, DCI);
3021	case ISD::SELECT_CC:
3022	return combineSelectCC(N, DCI);
3023	case ISD::TRUNCATE:
3024	return combineTRUNCATE(N, DCI);
3025	}
3026
3027	return SDValue ();
3028	}
3029
3030	//===----------------------------------------------------------------------===//
3031	// VE Inline Assembly Support
3032	//===----------------------------------------------------------------------===//
3033
3034	VETargetLowering::ConstraintType
3035	VETargetLowering::getConstraintType(StringRef Constraint) const {
3036	if (Constraint.size() == `1`) {
3037	switch (Constraint [`0`]) {
3038	default:
3039	break;
3040	case `'v'`: // vector registers
3041	return C_RegisterClass;
3042	}
3043	}
3044	return TargetLowering::getConstraintType(Constraint);
3045	}
3046
3047	std::pair<unsigned, const TargetRegisterClass *>
3048	VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
3049	StringRef Constraint,
3050	MVT VT) const {
3051	const TargetRegisterClass RC = nullptr*;
3052	if (Constraint.size() == `1`) {
3053	switch (Constraint [`0`]) {
3054	default:
3055	return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3056	case `'r'`:
3057	RC = &VE::I64RegClass;
3058	break;
3059	case `'v'`:
3060	RC = &VE::V64RegClass;
3061	break;
3062	}
3063	return std::make_pair(x: `0U`, y&: RC);
3064	}
3065
3066	return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3067	}
3068
3069	//===----------------------------------------------------------------------===//
3070	// VE Target Optimization Support
3071	//===----------------------------------------------------------------------===//
3072
3073	unsigned VETargetLowering::getMinimumJumpTableEntries() const {
3074	// Specify 8 for PIC model to relieve the impact of PIC load instructions.
3075	if (isJumpTableRelative())
3076	return `8`;
3077
3078	return TargetLowering::getMinimumJumpTableEntries();
3079	}
3080
3081	bool VETargetLowering::hasAndNot(SDValue Y) const {
3082	EVT VT = Y.getValueType();
3083
3084	// VE doesn't have vector and not instruction.
3085	if (VT.isVector())
3086	return false;
3087
3088	// VE allows different immediate values for X and Y where ~X & Y.
3089	// Only simm7 works for X, and only mimm works for Y on VE. However, this
3090	// function is used to check whether an immediate value is OK for and-not
3091	// instruction as both X and Y. Generating additional instruction to
3092	// retrieve an immediate value is no good since the purpose of this
3093	// function is to convert a series of 3 instructions to another series of
3094	// 3 instructions with better parallelism. Therefore, we return false
3095	// for all immediate values now.
3096	// FIXME: Change hasAndNot function to have two operands to make it work
3097	// correctly with Aurora VE.
3098	if (isa<ConstantSDNode>(Val: Y))
3099	return false;
3100
3101	// It's ok for generic registers.
3102	return true;
3103	}
3104
3105	SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3106	SelectionDAG &DAG) const {
3107	assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
3108	MVT VT = Op.getOperand(i: `0`).getSimpleValueType();
3109
3110	// Special treatment for packed V64 types.
3111	assert(VT == MVT::v512i32 \|\| VT == MVT::v512f32);
3112	(void)VT;
3113	// Example of codes:
3114	// %packed_v = extractelt %vr, %idx / 2
3115	// %v = %packed_v >> (%idx % 2 32)*
3116	// %res = %v & 0xffffffff
3117
3118	SDValue Vec = Op.getOperand(i: `0`);
3119	SDValue Idx = Op.getOperand(i: `1`);
3120	SDLoc DL(Op);
3121	SDValue Result = Op;
3122	if (false / Idx->isConstant() /) {
3123	// TODO: optimized implementation using constant values
3124	} else {
3125	SDValue Const1 = DAG.getConstant(Val: `1`, DL, VT: MVT::i64);
3126	SDValue HalfIdx = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {Idx, Const1});
3127	SDValue PackedElt =
3128	SDValue (DAG.getMachineNode(Opcode: VE::LVSvr, dl: DL, VT: MVT::i64, Ops: {Vec, HalfIdx}), `0`);
3129	SDValue AndIdx = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {Idx, Const1});
3130	SDValue Shift = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i64, Ops: {AndIdx, Const1});
3131	SDValue Const5 = DAG.getConstant(Val: `5`, DL, VT: MVT::i64);
3132	Shift = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, Ops: {Shift, Const5});
3133	PackedElt = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {PackedElt, Shift});
3134	SDValue Mask = DAG.getConstant(Val: `0xFFFFFFFFL`, DL, VT: MVT::i64);
3135	PackedElt = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {PackedElt, Mask});
3136	SDValue SubI32 = DAG.getTargetConstant(Val: VE::sub_i32, DL, VT: MVT::i32);
3137	Result = SDValue (DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL,
3138	VT: MVT::i32, Op1: PackedElt, Op2: SubI32),
3139	`0`);
3140
3141	if (Op.getSimpleValueType() == MVT::f32) {
3142	Result = DAG.getBitcast(VT: MVT::f32, V: Result);
3143	} else {
3144	assert(Op.getSimpleValueType() == MVT::i32);
3145	}
3146	}
3147	return Result;
3148	}
3149
3150	SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3151	SelectionDAG &DAG) const {
3152	assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
3153	MVT VT = Op.getOperand(i: `0`).getSimpleValueType();
3154
3155	// Special treatment for packed V64 types.
3156	assert(VT == MVT::v512i32 \|\| VT == MVT::v512f32);
3157	(void)VT;
3158	// The v512i32 and v512f32 starts from upper bits (0..31). This "upper
3159	// bits" required `val << 32` from C implementation's point of view.
3160	//
3161	// Example of codes:
3162	// %packed_elt = extractelt %vr, (%idx >> 1)
3163	// %shift = ((%idx & 1) ^ 1) << 5
3164	// %packed_elt &= 0xffffffff00000000 >> shift
3165	// %packed_elt \|= (zext %val) << shift
3166	// %vr = insertelt %vr, %packed_elt, (%idx >> 1)
3167
3168	SDLoc DL(Op);
3169	SDValue Vec = Op.getOperand(i: `0`);
3170	SDValue Val = Op.getOperand(i: `1`);
3171	SDValue Idx = Op.getOperand(i: `2`);
3172	if (Idx.getSimpleValueType() == MVT::i32)
3173	Idx = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: Idx);
3174	if (Val.getSimpleValueType() == MVT::f32)
3175	Val = DAG.getBitcast(VT: MVT::i32, V: Val);
3176	assert(Val.getSimpleValueType() == MVT::i32);
3177	Val = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: Val);
3178
3179	SDValue Result = Op;
3180	if (false / Idx->isConstant()/) {
3181	// TODO: optimized implementation using constant values
3182	} else {
3183	SDValue Const1 = DAG.getConstant(Val: `1`, DL, VT: MVT::i64);
3184	SDValue HalfIdx = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {Idx, Const1});
3185	SDValue PackedElt =
3186	SDValue (DAG.getMachineNode(Opcode: VE::LVSvr, dl: DL, VT: MVT::i64, Ops: {Vec, HalfIdx}), `0`);
3187	SDValue AndIdx = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {Idx, Const1});
3188	SDValue Shift = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i64, Ops: {AndIdx, Const1});
3189	SDValue Const5 = DAG.getConstant(Val: `5`, DL, VT: MVT::i64);
3190	Shift = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, Ops: {Shift, Const5});
3191	SDValue Mask = DAG.getConstant(Val: `0xFFFFFFFF00000000L`, DL, VT: MVT::i64);
3192	Mask = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {Mask, Shift});
3193	PackedElt = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {PackedElt, Mask});
3194	Val = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, Ops: {Val, Shift});
3195	PackedElt = DAG.getNode(Opcode: ISD::OR, DL, VT: MVT::i64, Ops: {PackedElt, Val});
3196	Result =
3197	SDValue (DAG.getMachineNode(Opcode: VE::LSVrr_v, dl: DL, VT: Vec.getSimpleValueType(),
3198	Ops: {HalfIdx, PackedElt, Vec}),
3199	`0`);
3200	}
3201	return Result;
3202	}
3203

Browse the source code of llvm_projects/llvm/lib/Target/VE/VEISelLowering.cpp