VEISelLowering.cpp source code [llvm_projects/llvm/lib/Target/VE/VEISelLowering.cpp]

1	//===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the interfaces that VE uses to lower LLVM code into a
10	// selection DAG.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "VEISelLowering.h"
15	#include "MCTargetDesc/VEMCExpr.h"
16	#include "VECustomDAG.h"
17	#include "VEInstrBuilder.h"
18	#include "VEMachineFunctionInfo.h"
19	#include "VERegisterInfo.h"
20	#include "VETargetMachine.h"
21	#include "llvm/ADT/StringSwitch.h"
22	#include "llvm/CodeGen/CallingConvLower.h"
23	#include "llvm/CodeGen/MachineFrameInfo.h"
24	#include "llvm/CodeGen/MachineFunction.h"
25	#include "llvm/CodeGen/MachineInstrBuilder.h"
26	#include "llvm/CodeGen/MachineJumpTableInfo.h"
27	#include "llvm/CodeGen/MachineModuleInfo.h"
28	#include "llvm/CodeGen/MachineRegisterInfo.h"
29	#include "llvm/CodeGen/SelectionDAG.h"
30	#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
31	#include "llvm/IR/DerivedTypes.h"
32	#include "llvm/IR/Function.h"
33	#include "llvm/IR/IRBuilder.h"
34	#include "llvm/IR/Module.h"
35	#include "llvm/Support/ErrorHandling.h"
36	#include "llvm/Support/KnownBits.h"
37	using namespace llvm;
38
39	#define DEBUG_TYPE "ve-lower"
40
41	//===----------------------------------------------------------------------===//
42	// Calling Convention Implementation
43	//===----------------------------------------------------------------------===//
44
45	#include "VEGenCallingConv.inc"
46
47	CCAssignFn *getReturnCC(CallingConv::ID CallConv) {
48	switch (CallConv) {
49	default:
50	return RetCC_VE_C;
51	case CallingConv::Fast:
52	return RetCC_VE_Fast;
53	}
54	}
55
56	CCAssignFn getParamCC(CallingConv::ID CallConv, bool* IsVarArg) {
57	if (IsVarArg)
58	return CC_VE2;
59	switch (CallConv) {
60	default:
61	return CC_VE_C;
62	case CallingConv::Fast:
63	return CC_VE_Fast;
64	}
65	}
66
67	bool VETargetLowering::CanLowerReturn(
68	CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
69	const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
70	CCAssignFn *RetCC = getReturnCC(CallConv);
71	SmallVector<CCValAssign, `16`> RVLocs;
72	CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
73	return CCInfo.CheckReturn(Outs, Fn: RetCC);
74	}
75
76	static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
77	MVT::v256f32, MVT::v512f32, MVT::v256f64};
78
79	static const MVT AllMaskVTs[] = {MVT::v256i1, MVT::v512i1};
80
81	static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
82
83	void VETargetLowering::initRegisterClasses() {
84	// Set up the register classes.
85	addRegisterClass(VT: MVT::i32, RC: &VE::I32RegClass);
86	addRegisterClass(VT: MVT::i64, RC: &VE::I64RegClass);
87	addRegisterClass(VT: MVT::f32, RC: &VE::F32RegClass);
88	addRegisterClass(VT: MVT::f64, RC: &VE::I64RegClass);
89	addRegisterClass(VT: MVT::f128, RC: &VE::F128RegClass);
90
91	if (Subtarget->enableVPU()) {
92	for (MVT VecVT : AllVectorVTs)
93	addRegisterClass(VT: VecVT, RC: &VE::V64RegClass);
94	addRegisterClass(VT: MVT::v256i1, RC: &VE::VMRegClass);
95	addRegisterClass(VT: MVT::v512i1, RC: &VE::VM512RegClass);
96	}
97	}
98
99	void VETargetLowering::initSPUActions() {
100	const auto &TM = getTargetMachine();
101	/// Load & Store {
102
103	// VE doesn't have i1 sign extending load.
104	for (MVT VT : MVT::integer_valuetypes()) {
105	setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
106	setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
107	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
108	setTruncStoreAction(ValVT: VT, MemVT: MVT::i1, Action: Expand);
109	}
110
111	// VE doesn't have floating point extload/truncstore, so expand them.
112	for (MVT FPVT : MVT::fp_valuetypes()) {
113	for (MVT OtherFPVT : MVT::fp_valuetypes()) {
114	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: FPVT, MemVT: OtherFPVT, Action: Expand);
115	setTruncStoreAction(ValVT: FPVT, MemVT: OtherFPVT, Action: Expand);
116	}
117	}
118
119	// VE doesn't have fp128 load/store, so expand them in custom lower.
120	setOperationAction(Op: ISD::LOAD, VT: MVT::f128, Action: Custom);
121	setOperationAction(Op: ISD::STORE, VT: MVT::f128, Action: Custom);
122
123	/// } Load & Store
124
125	// Custom legalize address nodes into LO/HI parts.
126	MVT PtrVT = MVT::getIntegerVT(BitWidth: TM.getPointerSizeInBits(AS: `0`));
127	setOperationAction(Op: ISD::BlockAddress, VT: PtrVT, Action: Custom);
128	setOperationAction(Op: ISD::GlobalAddress, VT: PtrVT, Action: Custom);
129	setOperationAction(Op: ISD::GlobalTLSAddress, VT: PtrVT, Action: Custom);
130	setOperationAction(Op: ISD::ConstantPool, VT: PtrVT, Action: Custom);
131	setOperationAction(Op: ISD::JumpTable, VT: PtrVT, Action: Custom);
132
133	/// VAARG handling {
134	setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom);
135	// VAARG needs to be lowered to access with 8 bytes alignment.
136	setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Custom);
137	// Use the default implementation.
138	setOperationAction(Op: ISD::VACOPY, VT: MVT::Other, Action: Expand);
139	setOperationAction(Op: ISD::VAEND, VT: MVT::Other, Action: Expand);
140	/// } VAARG handling
141
142	/// Stack {
143	setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i32, Action: Custom);
144	setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i64, Action: Custom);
145
146	// Use the default implementation.
147	setOperationAction(Op: ISD::STACKSAVE, VT: MVT::Other, Action: Expand);
148	setOperationAction(Op: ISD::STACKRESTORE, VT: MVT::Other, Action: Expand);
149	/// } Stack
150
151	/// Branch {
152
153	// VE doesn't have BRCOND
154	setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Expand);
155
156	// BR_JT is not implemented yet.
157	setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
158
159	/// } Branch
160
161	/// Int Ops {
162	for (MVT IntVT : {MVT::i32, MVT::i64}) {
163	// VE has no REM or DIVREM operations.
164	setOperationAction(Op: ISD::UREM, VT: IntVT, Action: Expand);
165	setOperationAction(Op: ISD::SREM, VT: IntVT, Action: Expand);
166	setOperationAction(Op: ISD::SDIVREM, VT: IntVT, Action: Expand);
167	setOperationAction(Op: ISD::UDIVREM, VT: IntVT, Action: Expand);
168
169	// VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
170	setOperationAction(Op: ISD::SHL_PARTS, VT: IntVT, Action: Expand);
171	setOperationAction(Op: ISD::SRA_PARTS, VT: IntVT, Action: Expand);
172	setOperationAction(Op: ISD::SRL_PARTS, VT: IntVT, Action: Expand);
173
174	// VE has no MULHU/S or U/SMUL_LOHI operations.
175	// TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
176	setOperationAction(Op: ISD::MULHU, VT: IntVT, Action: Expand);
177	setOperationAction(Op: ISD::MULHS, VT: IntVT, Action: Expand);
178	setOperationAction(Op: ISD::UMUL_LOHI, VT: IntVT, Action: Expand);
179	setOperationAction(Op: ISD::SMUL_LOHI, VT: IntVT, Action: Expand);
180
181	// VE has no CTTZ, ROTL, ROTR operations.
182	setOperationAction(Op: ISD::CTTZ, VT: IntVT, Action: Expand);
183	setOperationAction(Op: ISD::ROTL, VT: IntVT, Action: Expand);
184	setOperationAction(Op: ISD::ROTR, VT: IntVT, Action: Expand);
185
186	// VE has 64 bits instruction which works as i64 BSWAP operation. This
187	// instruction works fine as i32 BSWAP operation with an additional
188	// parameter. Use isel patterns to lower BSWAP.
189	setOperationAction(Op: ISD::BSWAP, VT: IntVT, Action: Legal);
190
191	// VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
192	// operations. Use isel patterns for i64, promote for i32.
193	LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
194	setOperationAction(Op: ISD::BITREVERSE, VT: IntVT, Action: Act);
195	setOperationAction(Op: ISD::CTLZ, VT: IntVT, Action: Act);
196	setOperationAction(Op: ISD::CTLZ_ZERO_UNDEF, VT: IntVT, Action: Act);
197	setOperationAction(Op: ISD::CTPOP, VT: IntVT, Action: Act);
198
199	// VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
200	// Use isel patterns for i64, promote for i32.
201	setOperationAction(Op: ISD::AND, VT: IntVT, Action: Act);
202	setOperationAction(Op: ISD::OR, VT: IntVT, Action: Act);
203	setOperationAction(Op: ISD::XOR, VT: IntVT, Action: Act);
204
205	// Legal smax and smin
206	setOperationAction(Op: ISD::SMAX, VT: IntVT, Action: Legal);
207	setOperationAction(Op: ISD::SMIN, VT: IntVT, Action: Legal);
208	}
209	/// } Int Ops
210
211	/// Conversion {
212	// VE doesn't have instructions for fp<->uint, so expand them by llvm
213	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Promote); // use i64
214	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i32, Action: Promote); // use i64
215	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i64, Action: Expand);
216	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Expand);
217
218	// fp16 not supported
219	for (MVT FPVT : MVT::fp_valuetypes()) {
220	setOperationAction(Op: ISD::FP16_TO_FP, VT: FPVT, Action: Expand);
221	setOperationAction(Op: ISD::FP_TO_FP16, VT: FPVT, Action: Expand);
222	}
223	/// } Conversion
224
225	/// Floating-point Ops {
226	/// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
227	/// and fcmp.
228
229	// VE doesn't have following floating point operations.
230	for (MVT VT : MVT::fp_valuetypes()) {
231	setOperationAction(Op: ISD::FNEG, VT, Action: Expand);
232	setOperationAction(Op: ISD::FREM, VT, Action: Expand);
233	}
234
235	// VE doesn't have fdiv of f128.
236	setOperationAction(Op: ISD::FDIV, VT: MVT::f128, Action: Expand);
237
238	for (MVT FPVT : {MVT::f32, MVT::f64}) {
239	// f32 and f64 uses ConstantFP. f128 uses ConstantPool.
240	setOperationAction(Op: ISD::ConstantFP, VT: FPVT, Action: Legal);
241	}
242	/// } Floating-point Ops
243
244	/// Floating-point math functions {
245
246	// VE doesn't have following floating point math functions.
247	for (MVT VT : MVT::fp_valuetypes()) {
248	setOperationAction(Op: ISD::FABS, VT, Action: Expand);
249	setOperationAction(Op: ISD::FCOPYSIGN, VT, Action: Expand);
250	setOperationAction(Op: ISD::FCOS, VT, Action: Expand);
251	setOperationAction(Op: ISD::FMA, VT, Action: Expand);
252	setOperationAction(Op: ISD::FPOW, VT, Action: Expand);
253	setOperationAction(Op: ISD::FSIN, VT, Action: Expand);
254	setOperationAction(Op: ISD::FSQRT, VT, Action: Expand);
255	}
256
257	// VE has single and double FMINNUM and FMAXNUM
258	for (MVT VT : {MVT::f32, MVT::f64}) {
259	setOperationAction(Ops: {ISD::FMAXNUM, ISD::FMINNUM}, VT, Action: Legal);
260	}
261
262	/// } Floating-point math functions
263
264	/// Atomic instructions {
265
266	setMaxAtomicSizeInBitsSupported(`64`);
267	setMinCmpXchgSizeInBits(`32`);
268	setSupportsUnalignedAtomics(false);
269
270	// Use custom inserter for ATOMIC_FENCE.
271	setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom);
272
273	// Other atomic instructions.
274	for (MVT VT : MVT::integer_valuetypes()) {
275	// Support i8/i16 atomic swap.
276	setOperationAction(Op: ISD::ATOMIC_SWAP, VT, Action: Custom);
277
278	// FIXME: Support "atmam" instructions.
279	setOperationAction(Op: ISD::ATOMIC_LOAD_ADD, VT, Action: Expand);
280	setOperationAction(Op: ISD::ATOMIC_LOAD_SUB, VT, Action: Expand);
281	setOperationAction(Op: ISD::ATOMIC_LOAD_AND, VT, Action: Expand);
282	setOperationAction(Op: ISD::ATOMIC_LOAD_OR, VT, Action: Expand);
283
284	// VE doesn't have follwing instructions.
285	setOperationAction(Op: ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Action: Expand);
286	setOperationAction(Op: ISD::ATOMIC_LOAD_CLR, VT, Action: Expand);
287	setOperationAction(Op: ISD::ATOMIC_LOAD_XOR, VT, Action: Expand);
288	setOperationAction(Op: ISD::ATOMIC_LOAD_NAND, VT, Action: Expand);
289	setOperationAction(Op: ISD::ATOMIC_LOAD_MIN, VT, Action: Expand);
290	setOperationAction(Op: ISD::ATOMIC_LOAD_MAX, VT, Action: Expand);
291	setOperationAction(Op: ISD::ATOMIC_LOAD_UMIN, VT, Action: Expand);
292	setOperationAction(Op: ISD::ATOMIC_LOAD_UMAX, VT, Action: Expand);
293	}
294
295	/// } Atomic instructions
296
297	/// SJLJ instructions {
298	setOperationAction(Op: ISD::EH_SJLJ_LONGJMP, VT: MVT::Other, Action: Custom);
299	setOperationAction(Op: ISD::EH_SJLJ_SETJMP, VT: MVT::i32, Action: Custom);
300	setOperationAction(Op: ISD::EH_SJLJ_SETUP_DISPATCH, VT: MVT::Other, Action: Custom);
301	if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
302	setLibcallName(Call: RTLIB::UNWIND_RESUME, Name: "_Unwind_SjLj_Resume");
303	/// } SJLJ instructions
304
305	// Intrinsic instructions
306	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
307	}
308
309	void VETargetLowering::initVPUActions() {
310	for (MVT LegalMaskVT : AllMaskVTs)
311	setOperationAction(Op: ISD::BUILD_VECTOR, VT: LegalMaskVT, Action: Custom);
312
313	for (unsigned Opc : {ISD::AND, ISD::OR, ISD::XOR})
314	setOperationAction(Op: Opc, VT: MVT::v512i1, Action: Custom);
315
316	for (MVT LegalVecVT : AllVectorVTs) {
317	setOperationAction(Op: ISD::BUILD_VECTOR, VT: LegalVecVT, Action: Custom);
318	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: LegalVecVT, Action: Legal);
319	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: LegalVecVT, Action: Legal);
320	// Translate all vector instructions with legal element types to VVP_*
321	// nodes.
322	// TODO We will custom-widen into VVP_ nodes in the future. While we are*
323	// buildling the infrastructure for this, we only do this for legal vector
324	// VTs.
325	#define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME) \
326	setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
327	#define ADD_VVP_OP(VVP_NAME, ISD_NAME) \
328	setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
329	setOperationAction(Op: ISD::EXPERIMENTAL_VP_STRIDED_LOAD, VT: LegalVecVT, Action: Custom);
330	setOperationAction(Op: ISD::EXPERIMENTAL_VP_STRIDED_STORE, VT: LegalVecVT, Action: Custom);
331	#include "VVPNodes.def"
332	}
333
334	for (MVT LegalPackedVT : AllPackedVTs) {
335	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: LegalPackedVT, Action: Custom);
336	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: LegalPackedVT, Action: Custom);
337	}
338
339	// vNt32, vNt64 ops (legal element types)
340	for (MVT VT : MVT::vector_valuetypes()) {
341	MVT ElemVT = VT.getVectorElementType();
342	unsigned ElemBits = ElemVT.getScalarSizeInBits();
343	if (ElemBits != `32` && ElemBits != `64`)
344	continue;
345
346	for (unsigned MemOpc : {ISD::MLOAD, ISD::MSTORE, ISD::LOAD, ISD::STORE})
347	setOperationAction(Op: MemOpc, VT, Action: Custom);
348
349	const ISD::NodeType IntReductionOCs[] = {
350	ISD::VECREDUCE_ADD, ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND,
351	ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMIN,
352	ISD::VECREDUCE_SMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_UMAX};
353
354	for (unsigned IntRedOpc : IntReductionOCs)
355	setOperationAction(Op: IntRedOpc, VT, Action: Custom);
356	}
357
358	// v256i1 and v512i1 ops
359	for (MVT MaskVT : AllMaskVTs) {
360	// Custom lower mask ops
361	setOperationAction(Op: ISD::STORE, VT: MaskVT, Action: Custom);
362	setOperationAction(Op: ISD::LOAD, VT: MaskVT, Action: Custom);
363	}
364	}
365
366	SDValue
367	VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
368	bool IsVarArg,
369	const SmallVectorImpl<ISD::OutputArg> &Outs,
370	const SmallVectorImpl<SDValue> &OutVals,
371	const SDLoc &DL, SelectionDAG &DAG) const {
372	// CCValAssign - represent the assignment of the return value to locations.
373	SmallVector<CCValAssign, `16`> RVLocs;
374
375	// CCState - Info about the registers and stack slot.
376	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
377	*DAG.getContext());
378
379	// Analyze return values.
380	CCInfo.AnalyzeReturn(Outs, Fn: getReturnCC(CallConv));
381
382	SDValue Glue;
383	SmallVector<SDValue, `4`> RetOps(`1`, Chain);
384
385	// Copy the result values into the output registers.
386	for (unsigned i = `0`; i != RVLocs.size(); ++i) {
387	CCValAssign &VA = RVLocs [i];
388	assert(VA.isRegLoc() && "Can only return in registers!");
389	assert(!VA.needsCustom() && "Unexpected custom lowering");
390	SDValue OutVal = OutVals [i];
391
392	// Integer return values must be sign or zero extended by the callee.
393	switch (VA.getLocInfo()) {
394	case CCValAssign::Full:
395	break;
396	case CCValAssign::SExt:
397	OutVal = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: VA.getLocVT(), Operand: OutVal);
398	break;
399	case CCValAssign::ZExt:
400	OutVal = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: VA.getLocVT(), Operand: OutVal);
401	break;
402	case CCValAssign::AExt:
403	OutVal = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: VA.getLocVT(), Operand: OutVal);
404	break;
405	case CCValAssign::BCvt: {
406	// Convert a float return value to i64 with padding.
407	// 63 31 0
408	// +------+------+
409	// \| float\| 0 \|
410	// +------+------+
411	assert(VA.getLocVT() == MVT::i64);
412	assert(VA.getValVT() == MVT::f32);
413	SDValue Undef = SDValue (
414	DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::i64), `0`);
415	SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
416	OutVal = SDValue (DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL,
417	VT: MVT::i64, Op1: Undef, Op2: OutVal, Op3: Sub_f32),
418	`0`);
419	break;
420	}
421	default:
422	llvm_unreachable("Unknown loc info!");
423	}
424
425	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: OutVal, Glue);
426
427	// Guarantee that all emitted copies are stuck together with flags.
428	Glue = Chain.getValue(R: `1`);
429	RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
430	}
431
432	RetOps [`0`] = Chain; // Update chain.
433
434	// Add the glue if we have it.
435	if (Glue.getNode())
436	RetOps.push_back(Elt: Glue);
437
438	return DAG.getNode(Opcode: VEISD::RET_GLUE, DL, VT: MVT::Other, Ops: RetOps);
439	}
440
441	SDValue VETargetLowering::LowerFormalArguments(
442	SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
443	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
444	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
445	MachineFunction &MF = DAG.getMachineFunction();
446
447	// Get the base offset of the incoming arguments stack space.
448	unsigned ArgsBaseOffset = Subtarget->getRsaSize();
449	// Get the size of the preserved arguments area
450	unsigned ArgsPreserved = `64`;
451
452	// Analyze arguments according to CC_VE.
453	SmallVector<CCValAssign, `16`> ArgLocs;
454	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
455	*DAG.getContext());
456	// Allocate the preserved area first.
457	CCInfo.AllocateStack(Size: ArgsPreserved, Alignment: Align (`8`));
458	// We already allocated the preserved area, so the stack offset computed
459	// by CC_VE would be correct now.
460	CCInfo.AnalyzeFormalArguments(Ins, Fn: getParamCC(CallConv, IsVarArg: false));
461
462	for (const CCValAssign &VA : ArgLocs) {
463	assert(!VA.needsCustom() && "Unexpected custom lowering");
464	if (VA.isRegLoc()) {
465	// This argument is passed in a register.
466	// All integer register arguments are promoted by the caller to i64.
467
468	// Create a virtual register for the promoted live-in value.
469	Register VReg =
470	MF.addLiveIn(PReg: VA.getLocReg(), RC: getRegClassFor(VT: VA.getLocVT()));
471	SDValue Arg = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: VA.getLocVT());
472
473	// The caller promoted the argument, so insert an Assert?ext SDNode so we
474	// won't promote the value again in this function.
475	switch (VA.getLocInfo()) {
476	case CCValAssign::SExt:
477	Arg = DAG.getNode(Opcode: ISD::AssertSext, DL, VT: VA.getLocVT(), N1: Arg,
478	N2: DAG.getValueType(VA.getValVT()));
479	break;
480	case CCValAssign::ZExt:
481	Arg = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: VA.getLocVT(), N1: Arg,
482	N2: DAG.getValueType(VA.getValVT()));
483	break;
484	case CCValAssign::BCvt: {
485	// Extract a float argument from i64 with padding.
486	// 63 31 0
487	// +------+------+
488	// \| float\| 0 \|
489	// +------+------+
490	assert(VA.getLocVT() == MVT::i64);
491	assert(VA.getValVT() == MVT::f32);
492	SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
493	Arg = SDValue (DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL,
494	VT: MVT::f32, Op1: Arg, Op2: Sub_f32),
495	`0`);
496	break;
497	}
498	default:
499	break;
500	}
501
502	// Truncate the register down to the argument type.
503	if (VA.isExtInLoc())
504	Arg = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VA.getValVT(), Operand: Arg);
505
506	InVals.push_back(Elt: Arg);
507	continue;
508	}
509
510	// The registers are exhausted. This argument was passed on the stack.
511	assert(VA.isMemLoc());
512	// The CC_VE_Full/Half functions compute stack offsets relative to the
513	// beginning of the arguments area at %fp + the size of reserved area.
514	unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
515	unsigned ValSize = VA.getValVT().getSizeInBits() / `8`;
516
517	// Adjust offset for a float argument by adding 4 since the argument is
518	// stored in 8 bytes buffer with offset like below. LLVM generates
519	// 4 bytes load instruction, so need to adjust offset here. This
520	// adjustment is required in only LowerFormalArguments. In LowerCall,
521	// a float argument is converted to i64 first, and stored as 8 bytes
522	// data, which is required by ABI, so no need for adjustment.
523	// 0 4
524	// +------+------+
525	// \| empty\| float\|
526	// +------+------+
527	if (VA.getValVT() == MVT::f32)
528	Offset += `4`;
529
530	int FI = MF.getFrameInfo().CreateFixedObject(Size: ValSize, SPOffset: Offset, IsImmutable: true);
531	InVals.push_back(
532	Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain,
533	Ptr: DAG.getFrameIndex(FI, VT: getPointerTy(DL: MF.getDataLayout())),
534	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
535	}
536
537	if (!IsVarArg)
538	return Chain;
539
540	// This function takes variable arguments, some of which may have been passed
541	// in registers %s0-%s8.
542	//
543	// The va_start intrinsic needs to know the offset to the first variable
544	// argument.
545	// TODO: need to calculate offset correctly once we support f128.
546	unsigned ArgOffset = ArgLocs.size() * `8`;
547	VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
548	// Skip the reserved area at the top of stack.
549	FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
550
551	return Chain;
552	}
553
554	// FIXME? Maybe this could be a TableGen attribute on some registers and
555	// this table could be generated automatically from RegInfo.
556	Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
557	const MachineFunction &MF) const {
558	Register Reg = StringSwitch<Register>(RegName)
559	.Case(S: "sp", Value: VE::SX11) // Stack pointer
560	.Case(S: "fp", Value: VE::SX9) // Frame pointer
561	.Case(S: "sl", Value: VE::SX8) // Stack limit
562	.Case(S: "lr", Value: VE::SX10) // Link register
563	.Case(S: "tp", Value: VE::SX14) // Thread pointer
564	.Case(S: "outer", Value: VE::SX12) // Outer regiser
565	.Case(S: "info", Value: VE::SX17) // Info area register
566	.Case(S: "got", Value: VE::SX15) // Global offset table register
567	.Case(S: "plt", Value: VE::SX16) // Procedure linkage table register
568	.Default(Value: `0`);
569
570	if (Reg)
571	return Reg;
572
573	report_fatal_error(reason: "Invalid register name global variable");
574	}
575
576	//===----------------------------------------------------------------------===//
577	// TargetLowering Implementation
578	//===----------------------------------------------------------------------===//
579
580	SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
581	SmallVectorImpl<SDValue> &InVals) const {
582	SelectionDAG &DAG = CLI.DAG;
583	SDLoc DL = CLI.DL;
584	SDValue Chain = CLI.Chain;
585	auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
586
587	// VE target does not yet support tail call optimization.
588	CLI.IsTailCall = false;
589
590	// Get the base offset of the outgoing arguments stack space.
591	unsigned ArgsBaseOffset = Subtarget->getRsaSize();
592	// Get the size of the preserved arguments area
593	unsigned ArgsPreserved = `8` * `8u`;
594
595	// Analyze operands of the call, assigning locations to each operand.
596	SmallVector<CCValAssign, `16`> ArgLocs;
597	CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
598	*DAG.getContext());
599	// Allocate the preserved area first.
600	CCInfo.AllocateStack(Size: ArgsPreserved, Alignment: Align (`8`));
601	// We already allocated the preserved area, so the stack offset computed
602	// by CC_VE would be correct now.
603	CCInfo.AnalyzeCallOperands(Outs: CLI.Outs, Fn: getParamCC(CallConv: CLI.CallConv, IsVarArg: false));
604
605	// VE requires to use both register and stack for varargs or no-prototyped
606	// functions.
607	bool UseBoth = CLI.IsVarArg;
608
609	// Analyze operands again if it is required to store BOTH.
610	SmallVector<CCValAssign, `16`> ArgLocs2;
611	CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
612	ArgLocs2, *DAG.getContext());
613	if (UseBoth)
614	CCInfo2.AnalyzeCallOperands(Outs: CLI.Outs, Fn: getParamCC(CallConv: CLI.CallConv, IsVarArg: true));
615
616	// Get the size of the outgoing arguments stack space requirement.
617	unsigned ArgsSize = CCInfo.getStackSize();
618
619	// Keep stack frames 16-byte aligned.
620	ArgsSize = alignTo(Value: ArgsSize, Align: `16`);
621
622	// Adjust the stack pointer to make room for the arguments.
623	// FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
624	// with more than 6 arguments.
625	Chain = DAG.getCALLSEQ_START(Chain, InSize: ArgsSize, OutSize: `0`, DL);
626
627	// Collect the set of registers to pass to the function and their values.
628	// This will be emitted as a sequence of CopyToReg nodes glued to the call
629	// instruction.
630	SmallVector<std::pair<unsigned, SDValue>, `8`> RegsToPass;
631
632	// Collect chains from all the memory opeations that copy arguments to the
633	// stack. They must follow the stack pointer adjustment above and precede the
634	// call instruction itself.
635	SmallVector<SDValue, `8`> MemOpChains;
636
637	// VE needs to get address of callee function in a register
638	// So, prepare to copy it to SX12 here.
639
640	// If the callee is a GlobalAddress node (quite common, every direct call is)
641	// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
642	// Likewise ExternalSymbol -> TargetExternalSymbol.
643	SDValue Callee = CLI.Callee;
644
645	bool IsPICCall = isPositionIndependent();
646
647	// PC-relative references to external symbols should go through $stub.
648	// If so, we need to prepare GlobalBaseReg first.
649	const TargetMachine &TM = DAG.getTarget();
650	const GlobalValue GV = nullptr*;
651	auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Val&: Callee);
652	if (CalleeG)
653	GV = CalleeG->getGlobal();
654	bool Local = TM.shouldAssumeDSOLocal(GV);
655	bool UsePlt = !Local;
656	MachineFunction &MF = DAG.getMachineFunction();
657
658	// Turn GlobalAddress/ExternalSymbol node into a value node
659	// containing the address of them here.
660	if (CalleeG) {
661	if (IsPICCall) {
662	if (UsePlt)
663	Subtarget->getInstrInfo()->getGlobalBaseReg(MF: &MF);
664	Callee = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: `0`, TargetFlags: `0`);
665	Callee = DAG.getNode(Opcode: VEISD::GETFUNPLT, DL, VT: PtrVT, Operand: Callee);
666	} else {
667	Callee =
668	makeHiLoPair(Op: Callee, HiTF: VEMCExpr::VK_VE_HI32, LoTF: VEMCExpr::VK_VE_LO32, DAG);
669	}
670	} else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
671	if (IsPICCall) {
672	if (UsePlt)
673	Subtarget->getInstrInfo()->getGlobalBaseReg(MF: &MF);
674	Callee = DAG.getTargetExternalSymbol(Sym: E->getSymbol(), VT: PtrVT, TargetFlags: `0`);
675	Callee = DAG.getNode(Opcode: VEISD::GETFUNPLT, DL, VT: PtrVT, Operand: Callee);
676	} else {
677	Callee =
678	makeHiLoPair(Op: Callee, HiTF: VEMCExpr::VK_VE_HI32, LoTF: VEMCExpr::VK_VE_LO32, DAG);
679	}
680	}
681
682	RegsToPass.push_back(Elt: std::make_pair(x: VE::SX12, y&: Callee));
683
684	for (unsigned i = `0`, e = ArgLocs.size(); i != e; ++i) {
685	CCValAssign &VA = ArgLocs [i];
686	SDValue Arg = CLI.OutVals [i];
687
688	// Promote the value if needed.
689	switch (VA.getLocInfo()) {
690	default:
691	llvm_unreachable("Unknown location info!");
692	case CCValAssign::Full:
693	break;
694	case CCValAssign::SExt:
695	Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: VA.getLocVT(), Operand: Arg);
696	break;
697	case CCValAssign::ZExt:
698	Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: VA.getLocVT(), Operand: Arg);
699	break;
700	case CCValAssign::AExt:
701	Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: VA.getLocVT(), Operand: Arg);
702	break;
703	case CCValAssign::BCvt: {
704	// Convert a float argument to i64 with padding.
705	// 63 31 0
706	// +------+------+
707	// \| float\| 0 \|
708	// +------+------+
709	assert(VA.getLocVT() == MVT::i64);
710	assert(VA.getValVT() == MVT::f32);
711	SDValue Undef = SDValue (
712	DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::i64), `0`);
713	SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
714	Arg = SDValue (DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL,
715	VT: MVT::i64, Op1: Undef, Op2: Arg, Op3: Sub_f32),
716	`0`);
717	break;
718	}
719	}
720
721	if (VA.isRegLoc()) {
722	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
723	if (!UseBoth)
724	continue;
725	VA = ArgLocs2 [i];
726	}
727
728	assert(VA.isMemLoc());
729
730	// Create a store off the stack pointer for this argument.
731	SDValue StackPtr = DAG.getRegister(Reg: VE::SX11, VT: PtrVT);
732	// The argument area starts at %fp/%sp + the size of reserved area.
733	SDValue PtrOff =
734	DAG.getIntPtrConstant(Val: VA.getLocMemOffset() + ArgsBaseOffset, DL);
735	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: PtrOff);
736	MemOpChains.push_back(
737	Elt: DAG.getStore(Chain, dl: DL, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo ()));
738	}
739
740	// Emit all stores, make sure they occur before the call.
741	if (!MemOpChains.empty())
742	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOpChains);
743
744	// Build a sequence of CopyToReg nodes glued together with token chain and
745	// glue operands which copy the outgoing args into registers. The InGlue is
746	// necessary since all emitted instructions must be stuck together in order
747	// to pass the live physical registers.
748	SDValue InGlue;
749	for (unsigned i = `0`, e = RegsToPass.size(); i != e; ++i) {
750	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegsToPass [i].first,
751	N: RegsToPass [i].second, Glue: InGlue);
752	InGlue = Chain.getValue(R: `1`);
753	}
754
755	// Build the operands for the call instruction itself.
756	SmallVector<SDValue, `8`> Ops;
757	Ops.push_back(Elt: Chain);
758	for (unsigned i = `0`, e = RegsToPass.size(); i != e; ++i)
759	Ops.push_back(Elt: DAG.getRegister(Reg: RegsToPass [i].first,
760	VT: RegsToPass [i].second.getValueType()));
761
762	// Add a register mask operand representing the call-preserved registers.
763	const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
764	const uint32_t *Mask =
765	TRI->getCallPreservedMask(MF: DAG.getMachineFunction(), CC: CLI.CallConv);
766	assert(Mask && "Missing call preserved mask for calling convention");
767	Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
768
769	// Make sure the CopyToReg nodes are glued to the call instruction which
770	// consumes the registers.
771	if (InGlue.getNode())
772	Ops.push_back(Elt: InGlue);
773
774	// Now the call itself.
775	SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
776	Chain = DAG.getNode(Opcode: VEISD::CALL, DL, VTList: NodeTys, Ops);
777	InGlue = Chain.getValue(R: `1`);
778
779	// Revert the stack pointer immediately after the call.
780	Chain = DAG.getCALLSEQ_END(Chain, Size1: ArgsSize, Size2: `0`, Glue: InGlue, DL);
781	InGlue = Chain.getValue(R: `1`);
782
783	// Now extract the return values. This is more or less the same as
784	// LowerFormalArguments.
785
786	// Assign locations to each value returned by this call.
787	SmallVector<CCValAssign, `16`> RVLocs;
788	CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
789	*DAG.getContext());
790
791	// Set inreg flag manually for codegen generated library calls that
792	// return float.
793	if (CLI.Ins.size() == `1` && CLI.Ins [`0`].VT == MVT::f32 && !CLI.CB)
794	CLI.Ins [`0`].Flags.setInReg();
795
796	RVInfo.AnalyzeCallResult(Ins: CLI.Ins, Fn: getReturnCC(CallConv: CLI.CallConv));
797
798	// Copy all of the result registers out of their specified physreg.
799	for (unsigned i = `0`; i != RVLocs.size(); ++i) {
800	CCValAssign &VA = RVLocs [i];
801	assert(!VA.needsCustom() && "Unexpected custom lowering");
802	Register Reg = VA.getLocReg();
803
804	// When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
805	// reside in the same register in the high and low bits. Reuse the
806	// CopyFromReg previous node to avoid duplicate copies.
807	SDValue RV;
808	if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Val: Chain.getOperand(i: `1`)))
809	if (SrcReg->getReg() == Reg && Chain ->getOpcode() == ISD::CopyFromReg)
810	RV = Chain.getValue(R: `0`);
811
812	// But usually we'll create a new CopyFromReg for a different register.
813	if (!RV.getNode()) {
814	RV = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: RVLocs [i].getLocVT(), Glue: InGlue);
815	Chain = RV.getValue(R: `1`);
816	InGlue = Chain.getValue(R: `2`);
817	}
818
819	// The callee promoted the return value, so insert an Assert?ext SDNode so
820	// we won't promote the value again in this function.
821	switch (VA.getLocInfo()) {
822	case CCValAssign::SExt:
823	RV = DAG.getNode(Opcode: ISD::AssertSext, DL, VT: VA.getLocVT(), N1: RV,
824	N2: DAG.getValueType(VA.getValVT()));
825	break;
826	case CCValAssign::ZExt:
827	RV = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: VA.getLocVT(), N1: RV,
828	N2: DAG.getValueType(VA.getValVT()));
829	break;
830	case CCValAssign::BCvt: {
831	// Extract a float return value from i64 with padding.
832	// 63 31 0
833	// +------+------+
834	// \| float\| 0 \|
835	// +------+------+
836	assert(VA.getLocVT() == MVT::i64);
837	assert(VA.getValVT() == MVT::f32);
838	SDValue Sub_f32 = DAG.getTargetConstant(Val: VE::sub_f32, DL, VT: MVT::i32);
839	RV = SDValue (DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL,
840	VT: MVT::f32, Op1: RV, Op2: Sub_f32),
841	`0`);
842	break;
843	}
844	default:
845	break;
846	}
847
848	// Truncate the register down to the return value type.
849	if (VA.isExtInLoc())
850	RV = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VA.getValVT(), Operand: RV);
851
852	InVals.push_back(Elt: RV);
853	}
854
855	return Chain;
856	}
857
858	bool VETargetLowering::isOffsetFoldingLegal(
859	const GlobalAddressSDNode GA) const* {
860	// VE uses 64 bit addressing, so we need multiple instructions to generate
861	// an address. Folding address with offset increases the number of
862	// instructions, so that we disable it here. Offsets will be folded in
863	// the DAG combine later if it worth to do so.
864	return false;
865	}
866
867	/// isFPImmLegal - Returns true if the target can instruction select the
868	/// specified FP immediate natively. If false, the legalizer will
869	/// materialize the FP immediate as a load from a constant pool.
870	bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
871	bool ForCodeSize) const {
872	return VT == MVT::f32 \|\| VT == MVT::f64;
873	}
874
875	/// Determine if the target supports unaligned memory accesses.
876	///
877	/// This function returns true if the target allows unaligned memory accesses
878	/// of the specified type in the given address space. If true, it also returns
879	/// whether the unaligned memory access is "fast" in the last argument by
880	/// reference. This is used, for example, in situations where an array
881	/// copy/move/set is converted to a sequence of store operations. Its use
882	/// helps to ensure that such replacements don't generate code that causes an
883	/// alignment error (trap) on the target machine.
884	bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
885	unsigned AddrSpace,
886	Align A,
887	MachineMemOperand::Flags,
888	unsigned Fast) const* {
889	if (Fast) {
890	// It's fast anytime on VE
891	*Fast = `1`;
892	}
893	return true;
894	}
895
896	VETargetLowering::VETargetLowering(const TargetMachine &TM,
897	const VESubtarget &STI)
898	: TargetLowering (TM), Subtarget(&STI) {
899	// Instructions which use registers as conditionals examine all the
900	// bits (as does the pseudo SELECT_CC expansion). I don't think it
901	// matters much whether it's ZeroOrOneBooleanContent, or
902	// ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
903	// former.
904	setBooleanContents(ZeroOrOneBooleanContent);
905	setBooleanVectorContents(ZeroOrOneBooleanContent);
906
907	initRegisterClasses();
908	initSPUActions();
909	initVPUActions();
910
911	setStackPointerRegisterToSaveRestore(VE::SX11);
912
913	// We have target-specific dag combine patterns for the following nodes:
914	setTargetDAGCombine(ISD::TRUNCATE);
915	setTargetDAGCombine(ISD::SELECT);
916	setTargetDAGCombine(ISD::SELECT_CC);
917
918	// Set function alignment to 16 bytes
919	setMinFunctionAlignment(Align (`16`));
920
921	// VE stores all argument by 8 bytes alignment
922	setMinStackArgumentAlignment(Align (`8`));
923
924	computeRegisterProperties(TRI: Subtarget->getRegisterInfo());
925	}
926
927	const char VETargetLowering::getTargetNodeName(unsigned* Opcode) const {
928	#define TARGET_NODE_CASE(NAME) \
929	case VEISD::NAME: \
930	return "VEISD::" #NAME;
931	switch ((VEISD::NodeType)Opcode) {
932	case VEISD::FIRST_NUMBER:
933	break;
934	TARGET_NODE_CASE(CMPI)
935	TARGET_NODE_CASE(CMPU)
936	TARGET_NODE_CASE(CMPF)
937	TARGET_NODE_CASE(CMPQ)
938	TARGET_NODE_CASE(CMOV)
939	TARGET_NODE_CASE(CALL)
940	TARGET_NODE_CASE(EH_SJLJ_LONGJMP)
941	TARGET_NODE_CASE(EH_SJLJ_SETJMP)
942	TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH)
943	TARGET_NODE_CASE(GETFUNPLT)
944	TARGET_NODE_CASE(GETSTACKTOP)
945	TARGET_NODE_CASE(GETTLSADDR)
946	TARGET_NODE_CASE(GLOBAL_BASE_REG)
947	TARGET_NODE_CASE(Hi)
948	TARGET_NODE_CASE(Lo)
949	TARGET_NODE_CASE(RET_GLUE)
950	TARGET_NODE_CASE(TS1AM)
951	TARGET_NODE_CASE(VEC_UNPACK_LO)
952	TARGET_NODE_CASE(VEC_UNPACK_HI)
953	TARGET_NODE_CASE(VEC_PACK)
954	TARGET_NODE_CASE(VEC_BROADCAST)
955	TARGET_NODE_CASE(REPL_I32)
956	TARGET_NODE_CASE(REPL_F32)
957
958	TARGET_NODE_CASE(LEGALAVL)
959
960	// Register the VVP_ SDNodes.*
961	#define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
962	#include "VVPNodes.def"
963	}
964	#undef TARGET_NODE_CASE
965	return nullptr;
966	}
967
968	EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
969	EVT VT) const {
970	return MVT::i32;
971	}
972
973	// Convert to a target node and set target flags.
974	SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
975	SelectionDAG &DAG) const {
976	if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op))
977	return DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: SDLoc (GA),
978	VT: GA->getValueType(ResNo: `0`), offset: GA->getOffset(), TargetFlags: TF);
979
980	if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Val&: Op))
981	return DAG.getTargetBlockAddress(BA: BA->getBlockAddress(), VT: Op.getValueType(),
982	Offset: `0`, TargetFlags: TF);
983
984	if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Val&: Op))
985	return DAG.getTargetConstantPool(C: CP->getConstVal(), VT: CP->getValueType(ResNo: `0`),
986	Align: CP->getAlign(), Offset: CP->getOffset(), TargetFlags: TF);
987
988	if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Val&: Op))
989	return DAG.getTargetExternalSymbol(Sym: ES->getSymbol(), VT: ES->getValueType(ResNo: `0`),
990	TargetFlags: TF);
991
992	if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Val&: Op))
993	return DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: JT->getValueType(ResNo: `0`), TargetFlags: TF);
994
995	llvm_unreachable("Unhandled address SDNode");
996	}
997
998	// Split Op into high and low parts according to HiTF and LoTF.
999	// Return an ADD node combining the parts.
1000	SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
1001	SelectionDAG &DAG) const {
1002	SDLoc DL(Op);
1003	EVT VT = Op.getValueType();
1004	SDValue Hi = DAG.getNode(Opcode: VEISD::Hi, DL, VT, Operand: withTargetFlags(Op, TF: HiTF, DAG));
1005	SDValue Lo = DAG.getNode(Opcode: VEISD::Lo, DL, VT, Operand: withTargetFlags(Op, TF: LoTF, DAG));
1006	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Hi, N2: Lo);
1007	}
1008
1009	// Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
1010	// or ExternalSymbol SDNode.
1011	SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
1012	SDLoc DL(Op);
1013	EVT PtrVT = Op.getValueType();
1014
1015	// Handle PIC mode first. VE needs a got load for every variable!
1016	if (isPositionIndependent()) {
1017	auto GlobalN = dyn_cast<GlobalAddressSDNode>(Val&: Op);
1018
1019	if (isa<ConstantPoolSDNode>(Val: Op) \|\| isa<JumpTableSDNode>(Val: Op) \|\|
1020	(GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
1021	// Create following instructions for local linkage PIC code.
1022	// lea %reg, label@gotoff_lo
1023	// and %reg, %reg, (32)0
1024	// lea.sl %reg, label@gotoff_hi(%reg, %got)
1025	SDValue HiLo = makeHiLoPair(Op, HiTF: VEMCExpr::VK_VE_GOTOFF_HI32,
1026	LoTF: VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1027	SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT: PtrVT);
1028	return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: GlobalBase, N2: HiLo);
1029	}
1030	// Create following instructions for not local linkage PIC code.
1031	// lea %reg, label@got_lo
1032	// and %reg, %reg, (32)0
1033	// lea.sl %reg, label@got_hi(%reg)
1034	// ld %reg, (%reg, %got)
1035	SDValue HiLo = makeHiLoPair(Op, HiTF: VEMCExpr::VK_VE_GOT_HI32,
1036	LoTF: VEMCExpr::VK_VE_GOT_LO32, DAG);
1037	SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT: PtrVT);
1038	SDValue AbsAddr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: GlobalBase, N2: HiLo);
1039	return DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: AbsAddr,
1040	PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction()));
1041	}
1042
1043	// This is one of the absolute code models.
1044	switch (getTargetMachine().getCodeModel()) {
1045	default:
1046	llvm_unreachable("Unsupported absolute code model");
1047	case CodeModel::Small:
1048	case CodeModel::Medium:
1049	case CodeModel::Large:
1050	// abs64.
1051	return makeHiLoPair(Op, HiTF: VEMCExpr::VK_VE_HI32, LoTF: VEMCExpr::VK_VE_LO32, DAG);
1052	}
1053	}
1054
1055	/// Custom Lower {
1056
1057	// The mappings for emitLeading/TrailingFence for VE is designed by following
1058	// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
1059	Instruction *VETargetLowering::emitLeadingFence(IRBuilderBase &Builder,
1060	Instruction *Inst,
1061	AtomicOrdering Ord) const {
1062	switch (Ord) {
1063	case AtomicOrdering::NotAtomic:
1064	case AtomicOrdering::Unordered:
1065	llvm_unreachable("Invalid fence: unordered/non-atomic");
1066	case AtomicOrdering::Monotonic:
1067	case AtomicOrdering::Acquire:
1068	return nullptr; // Nothing to do
1069	case AtomicOrdering::Release:
1070	case AtomicOrdering::AcquireRelease:
1071	return Builder.CreateFence(Ordering: AtomicOrdering::Release);
1072	case AtomicOrdering::SequentiallyConsistent:
1073	if (!Inst->hasAtomicStore())
1074	return nullptr; // Nothing to do
1075	return Builder.CreateFence(Ordering: AtomicOrdering::SequentiallyConsistent);
1076	}
1077	llvm_unreachable("Unknown fence ordering in emitLeadingFence");
1078	}
1079
1080	Instruction *VETargetLowering::emitTrailingFence(IRBuilderBase &Builder,
1081	Instruction *Inst,
1082	AtomicOrdering Ord) const {
1083	switch (Ord) {
1084	case AtomicOrdering::NotAtomic:
1085	case AtomicOrdering::Unordered:
1086	llvm_unreachable("Invalid fence: unordered/not-atomic");
1087	case AtomicOrdering::Monotonic:
1088	case AtomicOrdering::Release:
1089	return nullptr; // Nothing to do
1090	case AtomicOrdering::Acquire:
1091	case AtomicOrdering::AcquireRelease:
1092	return Builder.CreateFence(Ordering: AtomicOrdering::Acquire);
1093	case AtomicOrdering::SequentiallyConsistent:
1094	return Builder.CreateFence(Ordering: AtomicOrdering::SequentiallyConsistent);
1095	}
1096	llvm_unreachable("Unknown fence ordering in emitTrailingFence");
1097	}
1098
1099	SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
1100	SelectionDAG &DAG) const {
1101	SDLoc DL(Op);
1102	AtomicOrdering FenceOrdering =
1103	static_cast<AtomicOrdering>(Op.getConstantOperandVal(i: `1`));
1104	SyncScope::ID FenceSSID =
1105	static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: `2`));
1106
1107	// VE uses Release consistency, so need a fence instruction if it is a
1108	// cross-thread fence.
1109	if (FenceSSID == SyncScope::System) {
1110	switch (FenceOrdering) {
1111	case AtomicOrdering::NotAtomic:
1112	case AtomicOrdering::Unordered:
1113	case AtomicOrdering::Monotonic:
1114	// No need to generate fencem instruction here.
1115	break;
1116	case AtomicOrdering::Acquire:
1117	// Generate "fencem 2" as acquire fence.
1118	return SDValue (DAG.getMachineNode(Opcode: VE::FENCEM, dl: DL, VT: MVT::Other,
1119	Op1: DAG.getTargetConstant(Val: `2`, DL, VT: MVT::i32),
1120	Op2: Op.getOperand(i: `0`)),
1121	`0`);
1122	case AtomicOrdering::Release:
1123	// Generate "fencem 1" as release fence.
1124	return SDValue (DAG.getMachineNode(Opcode: VE::FENCEM, dl: DL, VT: MVT::Other,
1125	Op1: DAG.getTargetConstant(Val: `1`, DL, VT: MVT::i32),
1126	Op2: Op.getOperand(i: `0`)),
1127	`0`);
1128	case AtomicOrdering::AcquireRelease:
1129	case AtomicOrdering::SequentiallyConsistent:
1130	// Generate "fencem 3" as acq_rel and seq_cst fence.
1131	// FIXME: "fencem 3" doesn't wait for PCIe deveices accesses,
1132	// so seq_cst may require more instruction for them.
1133	return SDValue (DAG.getMachineNode(Opcode: VE::FENCEM, dl: DL, VT: MVT::Other,
1134	Op1: DAG.getTargetConstant(Val: `3`, DL, VT: MVT::i32),
1135	Op2: Op.getOperand(i: `0`)),
1136	`0`);
1137	}
1138	}
1139
1140	// MEMBARRIER is a compiler barrier; it codegens to a no-op.
1141	return DAG.getNode(Opcode: ISD::MEMBARRIER, DL, VT: MVT::Other, Operand: Op.getOperand(i: `0`));
1142	}
1143
1144	TargetLowering::AtomicExpansionKind
1145	VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst AI) const* {
1146	// We have TS1AM implementation for i8/i16/i32/i64, so use it.
1147	if (AI->getOperation() == AtomicRMWInst::Xchg) {
1148	return AtomicExpansionKind::None;
1149	}
1150	// FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
1151
1152	// Otherwise, expand it using compare and exchange instruction to not call
1153	// __sync_fetch_and_ functions.*
1154	return AtomicExpansionKind::CmpXChg;
1155	}
1156
1157	static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
1158	SDValue &Bits) {
1159	SDLoc DL(Op);
1160	AtomicSDNode *N = cast<AtomicSDNode>(Val&: Op);
1161	SDValue Ptr = N->getOperand(Num: `1`);
1162	SDValue Val = N->getOperand(Num: `2`);
1163	EVT PtrVT = Ptr.getValueType();
1164	bool Byte = N->getMemoryVT() == MVT::i8;
1165	// Remainder = AND Ptr, 3
1166	// Flag = 1 << Remainder ; If Byte is true (1 byte swap flag)
1167	// Flag = 3 << Remainder ; If Byte is false (2 bytes swap flag)
1168	// Bits = Remainder << 3
1169	// NewVal = Val << Bits
1170	SDValue Const3 = DAG.getConstant(Val: `3`, DL, VT: PtrVT);
1171	SDValue Remainder = DAG.getNode(Opcode: ISD::AND, DL, VT: PtrVT, Ops: {Ptr, Const3});
1172	SDValue Mask = Byte ? DAG.getConstant(Val: `1`, DL, VT: MVT::i32)
1173	: DAG.getConstant(Val: `3`, DL, VT: MVT::i32);
1174	Flag = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i32, Ops: {Mask, Remainder});
1175	Bits = DAG.getNode(Opcode: ISD::SHL, DL, VT: PtrVT, Ops: {Remainder, Const3});
1176	return DAG.getNode(Opcode: ISD::SHL, DL, VT: Val.getValueType(), Ops: {Val, Bits});
1177	}
1178
1179	static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,
1180	SDValue Bits) {
1181	SDLoc DL(Op);
1182	EVT VT = Data.getValueType();
1183	bool Byte = cast<AtomicSDNode>(Val&: Op)->getMemoryVT() == MVT::i8;
1184	// NewData = Data >> Bits
1185	// Result = NewData & 0xff ; If Byte is true (1 byte)
1186	// Result = NewData & 0xffff ; If Byte is false (2 bytes)
1187
1188	SDValue NewData = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Data, N2: Bits);
1189	return DAG.getNode(Opcode: ISD::AND, DL, VT,
1190	Ops: {NewData, DAG.getConstant(Val: Byte ? `0xff` : `0xffff`, DL, VT)});
1191	}
1192
1193	SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,
1194	SelectionDAG &DAG) const {
1195	SDLoc DL(Op);
1196	AtomicSDNode *N = cast<AtomicSDNode>(Val&: Op);
1197
1198	if (N->getMemoryVT() == MVT::i8) {
1199	// For i8, use "ts1am"
1200	// Input:
1201	// ATOMIC_SWAP Ptr, Val, Order
1202	//
1203	// Output:
1204	// Remainder = AND Ptr, 3
1205	// Flag = 1 << Remainder ; 1 byte swap flag for TS1AM inst.
1206	// Bits = Remainder << 3
1207	// NewVal = Val << Bits
1208	//
1209	// Aligned = AND Ptr, -4
1210	// Data = TS1AM Aligned, Flag, NewVal
1211	//
1212	// NewData = Data >> Bits
1213	// Result = NewData & 0xff ; 1 byte result
1214	SDValue Flag;
1215	SDValue Bits;
1216	SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1217
1218	SDValue Ptr = N->getOperand(Num: `1`);
1219	SDValue Aligned = DAG.getNode(Opcode: ISD::AND, DL, VT: Ptr.getValueType(),
1220	Ops: {Ptr, DAG.getConstant(Val: -`4`, DL, VT: MVT::i64)});
1221	SDValue TS1AM = DAG.getAtomic(Opcode: VEISD::TS1AM, dl: DL, MemVT: N->getMemoryVT(),
1222	VTList: DAG.getVTList(VT1: Op.getNode()->getValueType(ResNo: `0`),
1223	VT2: Op.getNode()->getValueType(ResNo: `1`)),
1224	Ops: {N->getChain(), Aligned, Flag, NewVal},
1225	MMO: N->getMemOperand());
1226
1227	SDValue Result = finalizeTS1AM(Op, DAG, Data: TS1AM, Bits);
1228	SDValue Chain = TS1AM.getValue(R: `1`);
1229	return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
1230	}
1231	if (N->getMemoryVT() == MVT::i16) {
1232	// For i16, use "ts1am"
1233	SDValue Flag;
1234	SDValue Bits;
1235	SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1236
1237	SDValue Ptr = N->getOperand(Num: `1`);
1238	SDValue Aligned = DAG.getNode(Opcode: ISD::AND, DL, VT: Ptr.getValueType(),
1239	Ops: {Ptr, DAG.getConstant(Val: -`4`, DL, VT: MVT::i64)});
1240	SDValue TS1AM = DAG.getAtomic(Opcode: VEISD::TS1AM, dl: DL, MemVT: N->getMemoryVT(),
1241	VTList: DAG.getVTList(VT1: Op.getNode()->getValueType(ResNo: `0`),
1242	VT2: Op.getNode()->getValueType(ResNo: `1`)),
1243	Ops: {N->getChain(), Aligned, Flag, NewVal},
1244	MMO: N->getMemOperand());
1245
1246	SDValue Result = finalizeTS1AM(Op, DAG, Data: TS1AM, Bits);
1247	SDValue Chain = TS1AM.getValue(R: `1`);
1248	return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
1249	}
1250	// Otherwise, let llvm legalize it.
1251	return Op;
1252	}
1253
1254	SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
1255	SelectionDAG &DAG) const {
1256	return makeAddress(Op, DAG);
1257	}
1258
1259	SDValue VETargetLowering::lowerBlockAddress(SDValue Op,
1260	SelectionDAG &DAG) const {
1261	return makeAddress(Op, DAG);
1262	}
1263
1264	SDValue VETargetLowering::lowerConstantPool(SDValue Op,
1265	SelectionDAG &DAG) const {
1266	return makeAddress(Op, DAG);
1267	}
1268
1269	SDValue
1270	VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op,
1271	SelectionDAG &DAG) const {
1272	SDLoc DL(Op);
1273
1274	// Generate the following code:
1275	// t1: ch,glue = callseq_start t0, 0, 0
1276	// t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
1277	// t3: ch,glue = callseq_end t2, 0, 0, t2:2
1278	// t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
1279	SDValue Label = withTargetFlags(Op, TF: `0`, DAG);
1280	EVT PtrVT = Op.getValueType();
1281
1282	// Lowering the machine isd will make sure everything is in the right
1283	// location.
1284	SDValue Chain = DAG.getEntryNode();
1285	SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
1286	const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
1287	MF: DAG.getMachineFunction(), CC: CallingConv::C);
1288	Chain = DAG.getCALLSEQ_START(Chain, InSize: `64`, OutSize: `0`, DL);
1289	SDValue Args[] = {Chain, Label, DAG.getRegisterMask(RegMask: Mask), Chain.getValue(R: `1`)};
1290	Chain = DAG.getNode(Opcode: VEISD::GETTLSADDR, DL, VTList: NodeTys, Ops: Args);
1291	Chain = DAG.getCALLSEQ_END(Chain, Size1: `64`, Size2: `0`, Glue: Chain.getValue(R: `1`), DL);
1292	Chain = DAG.getCopyFromReg(Chain, dl: DL, Reg: VE::SX0, VT: PtrVT, Glue: Chain.getValue(R: `1`));
1293
1294	// GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1295	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1296	MFI.setHasCalls(true);
1297
1298	// Also generate code to prepare a GOT register if it is PIC.
1299	if (isPositionIndependent()) {
1300	MachineFunction &MF = DAG.getMachineFunction();
1301	Subtarget->getInstrInfo()->getGlobalBaseReg(MF: &MF);
1302	}
1303
1304	return Chain;
1305	}
1306
1307	SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
1308	SelectionDAG &DAG) const {
1309	// The current implementation of nld (2.26) doesn't allow local exec model
1310	// code described in VE-tls_v1.1.pdf (1) as its input. Instead, we always*
1311	// generate the general dynamic model code sequence.
1312	//
1313	// 1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf*
1314	return lowerToTLSGeneralDynamicModel(Op, DAG);
1315	}
1316
1317	SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1318	return makeAddress(Op, DAG);
1319	}
1320
1321	// Lower a f128 load into two f64 loads.
1322	static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
1323	SDLoc DL(Op);
1324	LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Val: Op.getNode());
1325	assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1326	Align Alignment = LdNode->getAlign();
1327	if (Alignment > `8`)
1328	Alignment = Align (`8`);
1329
1330	SDValue Lo64 =
1331	DAG.getLoad(VT: MVT::f64, dl: DL, Chain: LdNode->getChain(), Ptr: LdNode->getBasePtr(),
1332	PtrInfo: LdNode->getPointerInfo(), Alignment,
1333	MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1334	: MachineMemOperand::MONone);
1335	EVT AddrVT = LdNode->getBasePtr().getValueType();
1336	SDValue HiPtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: LdNode->getBasePtr(),
1337	N2: DAG.getConstant(Val: `8`, DL, VT: AddrVT));
1338	SDValue Hi64 =
1339	DAG.getLoad(VT: MVT::f64, dl: DL, Chain: LdNode->getChain(), Ptr: HiPtr,
1340	PtrInfo: LdNode->getPointerInfo(), Alignment,
1341	MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1342	: MachineMemOperand::MONone);
1343
1344	SDValue SubRegEven = DAG.getTargetConstant(Val: VE::sub_even, DL, VT: MVT::i32);
1345	SDValue SubRegOdd = DAG.getTargetConstant(Val: VE::sub_odd, DL, VT: MVT::i32);
1346
1347	// VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1348	SDNode *InFP128 =
1349	DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MVT::f128);
1350	InFP128 = DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL, VT: MVT::f128,
1351	Op1: SDValue (InFP128, `0`), Op2: Hi64, Op3: SubRegEven);
1352	InFP128 = DAG.getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: DL, VT: MVT::f128,
1353	Op1: SDValue (InFP128, `0`), Op2: Lo64, Op3: SubRegOdd);
1354	SDValue OutChains[`2`] = {SDValue (Lo64.getNode(), `1`),
1355	SDValue (Hi64.getNode(), `1`)};
1356	SDValue OutChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1357	SDValue Ops[`2`] = {SDValue (InFP128, `0`), OutChain};
1358	return DAG.getMergeValues(Ops, dl: DL);
1359	}
1360
1361	// Lower a vXi1 load into following instructions
1362	// LDrii %1, (,%addr)
1363	// LVMxir %vm, 0, %1
1364	// LDrii %2, 8(,%addr)
1365	// LVMxir %vm, 0, %2
1366	// ...
1367	static SDValue lowerLoadI1(SDValue Op, SelectionDAG &DAG) {
1368	SDLoc DL(Op);
1369	LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Val: Op.getNode());
1370	assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1371
1372	SDValue BasePtr = LdNode->getBasePtr();
1373	Align Alignment = LdNode->getAlign();
1374	if (Alignment > `8`)
1375	Alignment = Align (`8`);
1376
1377	EVT AddrVT = BasePtr.getValueType();
1378	EVT MemVT = LdNode->getMemoryVT();
1379	if (MemVT == MVT::v256i1 \|\| MemVT == MVT::v4i64) {
1380	SDValue OutChains[`4`];
1381	SDNode *VM = DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MemVT);
1382	for (int i = `0`; i < `4`; ++i) {
1383	// Generate load dag and prepare chains.
1384	SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1385	N2: DAG.getConstant(Val: `8` * i, DL, VT: AddrVT));
1386	SDValue Val =
1387	DAG.getLoad(VT: MVT::i64, dl: DL, Chain: LdNode->getChain(), Ptr: Addr,
1388	PtrInfo: LdNode->getPointerInfo(), Alignment,
1389	MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1390	: MachineMemOperand::MONone);
1391	OutChains[i] = SDValue (Val.getNode(), `1`);
1392
1393	VM = DAG.getMachineNode(Opcode: VE::LVMir_m, dl: DL, VT: MVT::i64,
1394	Op1: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64), Op2: Val,
1395	Op3: SDValue (VM, `0`));
1396	}
1397	SDValue OutChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1398	SDValue Ops[`2`] = {SDValue (VM, `0`), OutChain};
1399	return DAG.getMergeValues(Ops, dl: DL);
1400	} else if (MemVT == MVT::v512i1 \|\| MemVT == MVT::v8i64) {
1401	SDValue OutChains[`8`];
1402	SDNode *VM = DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MemVT);
1403	for (int i = `0`; i < `8`; ++i) {
1404	// Generate load dag and prepare chains.
1405	SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1406	N2: DAG.getConstant(Val: `8` * i, DL, VT: AddrVT));
1407	SDValue Val =
1408	DAG.getLoad(VT: MVT::i64, dl: DL, Chain: LdNode->getChain(), Ptr: Addr,
1409	PtrInfo: LdNode->getPointerInfo(), Alignment,
1410	MMOFlags: LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1411	: MachineMemOperand::MONone);
1412	OutChains[i] = SDValue (Val.getNode(), `1`);
1413
1414	VM = DAG.getMachineNode(Opcode: VE::LVMyir_y, dl: DL, VT: MVT::i64,
1415	Op1: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64), Op2: Val,
1416	Op3: SDValue (VM, `0`));
1417	}
1418	SDValue OutChain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1419	SDValue Ops[`2`] = {SDValue (VM, `0`), OutChain};
1420	return DAG.getMergeValues(Ops, dl: DL);
1421	} else {
1422	// Otherwise, ask llvm to expand it.
1423	return SDValue ();
1424	}
1425	}
1426
1427	SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1428	LoadSDNode *LdNode = cast<LoadSDNode>(Val: Op.getNode());
1429	EVT MemVT = LdNode->getMemoryVT();
1430
1431	// If VPU is enabled, always expand non-mask vector loads to VVP
1432	if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(SomeVT: MemVT))
1433	return lowerToVVP(Op, DAG);
1434
1435	SDValue BasePtr = LdNode->getBasePtr();
1436	if (isa<FrameIndexSDNode>(Val: BasePtr.getNode())) {
1437	// Do not expand store instruction with frame index here because of
1438	// dependency problems. We expand it later in eliminateFrameIndex().
1439	return Op;
1440	}
1441
1442	if (MemVT == MVT::f128)
1443	return lowerLoadF128(Op, DAG);
1444	if (isMaskType(SomeVT: MemVT))
1445	return lowerLoadI1(Op, DAG);
1446
1447	return Op;
1448	}
1449
1450	// Lower a f128 store into two f64 stores.
1451	static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
1452	SDLoc DL(Op);
1453	StoreSDNode *StNode = dyn_cast<StoreSDNode>(Val: Op.getNode());
1454	assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1455
1456	SDValue SubRegEven = DAG.getTargetConstant(Val: VE::sub_even, DL, VT: MVT::i32);
1457	SDValue SubRegOdd = DAG.getTargetConstant(Val: VE::sub_odd, DL, VT: MVT::i32);
1458
1459	SDNode *Hi64 = DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT: MVT::i64,
1460	Op1: StNode->getValue(), Op2: SubRegEven);
1461	SDNode *Lo64 = DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT: MVT::i64,
1462	Op1: StNode->getValue(), Op2: SubRegOdd);
1463
1464	Align Alignment = StNode->getAlign();
1465	if (Alignment > `8`)
1466	Alignment = Align (`8`);
1467
1468	// VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1469	SDValue OutChains[`2`];
1470	OutChains[`0`] =
1471	DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue (Lo64, `0`),
1472	Ptr: StNode->getBasePtr(), PtrInfo: MachinePointerInfo (), Alignment,
1473	MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1474	: MachineMemOperand::MONone);
1475	EVT AddrVT = StNode->getBasePtr().getValueType();
1476	SDValue HiPtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: StNode->getBasePtr(),
1477	N2: DAG.getConstant(Val: `8`, DL, VT: AddrVT));
1478	OutChains[`1`] =
1479	DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue (Hi64, `0`), Ptr: HiPtr,
1480	PtrInfo: MachinePointerInfo (), Alignment,
1481	MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1482	: MachineMemOperand::MONone);
1483	return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1484	}
1485
1486	// Lower a vXi1 store into following instructions
1487	// SVMi %1, %vm, 0
1488	// STrii %1, (,%addr)
1489	// SVMi %2, %vm, 1
1490	// STrii %2, 8(,%addr)
1491	// ...
1492	static SDValue lowerStoreI1(SDValue Op, SelectionDAG &DAG) {
1493	SDLoc DL(Op);
1494	StoreSDNode *StNode = dyn_cast<StoreSDNode>(Val: Op.getNode());
1495	assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1496
1497	SDValue BasePtr = StNode->getBasePtr();
1498	Align Alignment = StNode->getAlign();
1499	if (Alignment > `8`)
1500	Alignment = Align (`8`);
1501	EVT AddrVT = BasePtr.getValueType();
1502	EVT MemVT = StNode->getMemoryVT();
1503	if (MemVT == MVT::v256i1 \|\| MemVT == MVT::v4i64) {
1504	SDValue OutChains[`4`];
1505	for (int i = `0`; i < `4`; ++i) {
1506	SDNode *V =
1507	DAG.getMachineNode(Opcode: VE::SVMmi, dl: DL, VT: MVT::i64, Op1: StNode->getValue(),
1508	Op2: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64));
1509	SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1510	N2: DAG.getConstant(Val: `8` * i, DL, VT: AddrVT));
1511	OutChains[i] =
1512	DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue (V, `0`), Ptr: Addr,
1513	PtrInfo: MachinePointerInfo (), Alignment,
1514	MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1515	: MachineMemOperand::MONone);
1516	}
1517	return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1518	} else if (MemVT == MVT::v512i1 \|\| MemVT == MVT::v8i64) {
1519	SDValue OutChains[`8`];
1520	for (int i = `0`; i < `8`; ++i) {
1521	SDNode *V =
1522	DAG.getMachineNode(Opcode: VE::SVMyi, dl: DL, VT: MVT::i64, Op1: StNode->getValue(),
1523	Op2: DAG.getTargetConstant(Val: i, DL, VT: MVT::i64));
1524	SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1525	N2: DAG.getConstant(Val: `8` * i, DL, VT: AddrVT));
1526	OutChains[i] =
1527	DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue (V, `0`), Ptr: Addr,
1528	PtrInfo: MachinePointerInfo (), Alignment,
1529	MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1530	: MachineMemOperand::MONone);
1531	}
1532	return DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: OutChains);
1533	} else {
1534	// Otherwise, ask llvm to expand it.
1535	return SDValue ();
1536	}
1537	}
1538
1539	SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1540	StoreSDNode *StNode = cast<StoreSDNode>(Val: Op.getNode());
1541	assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1542	EVT MemVT = StNode->getMemoryVT();
1543
1544	// If VPU is enabled, always expand non-mask vector stores to VVP
1545	if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(SomeVT: MemVT))
1546	return lowerToVVP(Op, DAG);
1547
1548	SDValue BasePtr = StNode->getBasePtr();
1549	if (isa<FrameIndexSDNode>(Val: BasePtr.getNode())) {
1550	// Do not expand store instruction with frame index here because of
1551	// dependency problems. We expand it later in eliminateFrameIndex().
1552	return Op;
1553	}
1554
1555	if (MemVT == MVT::f128)
1556	return lowerStoreF128(Op, DAG);
1557	if (isMaskType(SomeVT: MemVT))
1558	return lowerStoreI1(Op, DAG);
1559
1560	// Otherwise, ask llvm to expand it.
1561	return SDValue ();
1562	}
1563
1564	SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1565	MachineFunction &MF = DAG.getMachineFunction();
1566	VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
1567	auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
1568
1569	// Need frame address to find the address of VarArgsFrameIndex.
1570	MF.getFrameInfo().setFrameAddressIsTaken(true);
1571
1572	// vastart just stores the address of the VarArgsFrameIndex slot into the
1573	// memory location argument.
1574	SDLoc DL(Op);
1575	SDValue Offset =
1576	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: DAG.getRegister(Reg: VE::SX9, VT: PtrVT),
1577	N2: DAG.getIntPtrConstant(Val: FuncInfo->getVarArgsFrameOffset(), DL));
1578	const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: `2`))->getValue();
1579	return DAG.getStore(Chain: Op.getOperand(i: `0`), dl: DL, Val: Offset, Ptr: Op.getOperand(i: `1`),
1580	PtrInfo: MachinePointerInfo (SV));
1581	}
1582
1583	SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
1584	SDNode *Node = Op.getNode();
1585	EVT VT = Node->getValueType(ResNo: `0`);
1586	SDValue InChain = Node->getOperand(Num: `0`);
1587	SDValue VAListPtr = Node->getOperand(Num: `1`);
1588	EVT PtrVT = VAListPtr.getValueType();
1589	const Value *SV = cast<SrcValueSDNode>(Val: Node->getOperand(Num: `2`))->getValue();
1590	SDLoc DL(Node);
1591	SDValue VAList =
1592	DAG.getLoad(VT: PtrVT, dl: DL, Chain: InChain, Ptr: VAListPtr, PtrInfo: MachinePointerInfo (SV));
1593	SDValue Chain = VAList.getValue(R: `1`);
1594	SDValue NextPtr;
1595
1596	if (VT == MVT::f128) {
1597	// VE f128 values must be stored with 16 bytes alignment. We don't
1598	// know the actual alignment of VAList, so we take alignment of it
1599	// dynamically.
1600	int Align = `16`;
1601	VAList = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList,
1602	N2: DAG.getConstant(Val: Align - `1`, DL, VT: PtrVT));
1603	VAList = DAG.getNode(Opcode: ISD::AND, DL, VT: PtrVT, N1: VAList,
1604	N2: DAG.getConstant(Val: -Align, DL, VT: PtrVT));
1605	// Increment the pointer, VAList, by 16 to the next vaarg.
1606	NextPtr =
1607	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList, N2: DAG.getIntPtrConstant(Val: `16`, DL));
1608	} else if (VT == MVT::f32) {
1609	// float --> need special handling like below.
1610	// 0 4
1611	// +------+------+
1612	// \| empty\| float\|
1613	// +------+------+
1614	// Increment the pointer, VAList, by 8 to the next vaarg.
1615	NextPtr =
1616	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList, N2: DAG.getIntPtrConstant(Val: `8`, DL));
1617	// Then, adjust VAList.
1618	unsigned InternalOffset = `4`;
1619	VAList = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList,
1620	N2: DAG.getConstant(Val: InternalOffset, DL, VT: PtrVT));
1621	} else {
1622	// Increment the pointer, VAList, by 8 to the next vaarg.
1623	NextPtr =
1624	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList, N2: DAG.getIntPtrConstant(Val: `8`, DL));
1625	}
1626
1627	// Store the incremented VAList to the legalized pointer.
1628	InChain = DAG.getStore(Chain, dl: DL, Val: NextPtr, Ptr: VAListPtr, PtrInfo: MachinePointerInfo (SV));
1629
1630	// Load the actual argument out of the pointer VAList.
1631	// We can't count on greater alignment than the word size.
1632	return DAG.getLoad(
1633	VT, dl: DL, Chain: InChain, Ptr: VAList, PtrInfo: MachinePointerInfo (),
1634	Alignment: Align (std::min(a: PtrVT.getSizeInBits(), b: VT.getSizeInBits()) / `8`));
1635	}
1636
1637	SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
1638	SelectionDAG &DAG) const {
1639	// Generate following code.
1640	// (void)__llvm_grow_stack(size);
1641	// ret = GETSTACKTOP; // pseudo instruction
1642	SDLoc DL(Op);
1643
1644	// Get the inputs.
1645	SDNode *Node = Op.getNode();
1646	SDValue Chain = Op.getOperand(i: `0`);
1647	SDValue Size = Op.getOperand(i: `1`);
1648	MaybeAlign Alignment(Op.getConstantOperandVal(i: `2`));
1649	EVT VT = Node->getValueType(ResNo: `0`);
1650
1651	// Chain the dynamic stack allocation so that it doesn't modify the stack
1652	// pointer when other instructions are using the stack.
1653	Chain = DAG.getCALLSEQ_START(Chain, InSize: `0`, OutSize: `0`, DL);
1654
1655	const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
1656	Align StackAlign = TFI.getStackAlign();
1657	bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
1658
1659	// Prepare arguments
1660	TargetLowering::ArgListTy Args;
1661	TargetLowering::ArgListEntry Entry;
1662	Entry.Node = Size;
1663	Entry.Ty = Entry.Node.getValueType().getTypeForEVT(Context&: *DAG.getContext());
1664	Args.push_back(x: Entry);
1665	if (NeedsAlign) {
1666	Entry.Node = DAG.getConstant(Val: ~(Alignment ->value() - `1ULL`), DL, VT);
1667	Entry.Ty = Entry.Node.getValueType().getTypeForEVT(Context&: *DAG.getContext());
1668	Args.push_back(x: Entry);
1669	}
1670	Type RetTy = Type::getVoidTy(C&: DAG.getContext());
1671
1672	EVT PtrVT = Op.getValueType();
1673	SDValue Callee;
1674	if (NeedsAlign) {
1675	Callee = DAG.getTargetExternalSymbol(Sym: "__ve_grow_stack_align", VT: PtrVT, TargetFlags: `0`);
1676	} else {
1677	Callee = DAG.getTargetExternalSymbol(Sym: "__ve_grow_stack", VT: PtrVT, TargetFlags: `0`);
1678	}
1679
1680	TargetLowering::CallLoweringInfo CLI(DAG);
1681	CLI.setDebugLoc(DL)
1682	.setChain(Chain)
1683	.setCallee(CC: CallingConv::PreserveAll, ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
1684	.setDiscardResult(true);
1685	std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
1686	Chain = pair.second;
1687	SDValue Result = DAG.getNode(Opcode: VEISD::GETSTACKTOP, DL, VT, Operand: Chain);
1688	if (NeedsAlign) {
1689	Result = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Result,
1690	N2: DAG.getConstant(Val: (Alignment ->value() - `1ULL`), DL, VT));
1691	Result = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Result,
1692	N2: DAG.getConstant(Val: ~(Alignment ->value() - `1ULL`), DL, VT));
1693	}
1694	// Chain = Result.getValue(1);
1695	Chain = DAG.getCALLSEQ_END(Chain, Size1: `0`, Size2: `0`, Glue: SDValue (), DL);
1696
1697	SDValue Ops[`2`] = {Result, Chain};
1698	return DAG.getMergeValues(Ops, dl: DL);
1699	}
1700
1701	SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
1702	SelectionDAG &DAG) const {
1703	SDLoc DL(Op);
1704	return DAG.getNode(Opcode: VEISD::EH_SJLJ_LONGJMP, DL, VT: MVT::Other, N1: Op.getOperand(i: `0`),
1705	N2: Op.getOperand(i: `1`));
1706	}
1707
1708	SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
1709	SelectionDAG &DAG) const {
1710	SDLoc DL(Op);
1711	return DAG.getNode(Opcode: VEISD::EH_SJLJ_SETJMP, DL,
1712	VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other), N1: Op.getOperand(i: `0`),
1713	N2: Op.getOperand(i: `1`));
1714	}
1715
1716	SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
1717	SelectionDAG &DAG) const {
1718	SDLoc DL(Op);
1719	return DAG.getNode(Opcode: VEISD::EH_SJLJ_SETUP_DISPATCH, DL, VT: MVT::Other,
1720	Operand: Op.getOperand(i: `0`));
1721	}
1722
1723	static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
1724	const VETargetLowering &TLI,
1725	const VESubtarget *Subtarget) {
1726	SDLoc DL(Op);
1727	MachineFunction &MF = DAG.getMachineFunction();
1728	EVT PtrVT = TLI.getPointerTy(DL: MF.getDataLayout());
1729
1730	MachineFrameInfo &MFI = MF.getFrameInfo();
1731	MFI.setFrameAddressIsTaken(true);
1732
1733	unsigned Depth = Op.getConstantOperandVal(i: `0`);
1734	const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
1735	Register FrameReg = RegInfo->getFrameRegister(MF);
1736	SDValue FrameAddr =
1737	DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT: PtrVT);
1738	while (Depth--)
1739	FrameAddr = DAG.getLoad(VT: Op.getValueType(), dl: DL, Chain: DAG.getEntryNode(),
1740	Ptr: FrameAddr, PtrInfo: MachinePointerInfo ());
1741	return FrameAddr;
1742	}
1743
1744	static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
1745	const VETargetLowering &TLI,
1746	const VESubtarget *Subtarget) {
1747	MachineFunction &MF = DAG.getMachineFunction();
1748	MachineFrameInfo &MFI = MF.getFrameInfo();
1749	MFI.setReturnAddressIsTaken(true);
1750
1751	if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG))
1752	return SDValue ();
1753
1754	SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);
1755
1756	SDLoc DL(Op);
1757	EVT VT = Op.getValueType();
1758	SDValue Offset = DAG.getConstant(Val: `8`, DL, VT);
1759	return DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(),
1760	Ptr: DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr, N2: Offset),
1761	PtrInfo: MachinePointerInfo ());
1762	}
1763
1764	SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1765	SelectionDAG &DAG) const {
1766	SDLoc DL(Op);
1767	unsigned IntNo = Op.getConstantOperandVal(i: `0`);
1768	switch (IntNo) {
1769	default: // Don't custom lower most intrinsics.
1770	return SDValue ();
1771	case Intrinsic::eh_sjlj_lsda: {
1772	MachineFunction &MF = DAG.getMachineFunction();
1773	MVT VT = Op.getSimpleValueType();
1774	const VETargetMachine *TM =
1775	static_cast<const VETargetMachine *>(&DAG.getTarget());
1776
1777	// Create GCC_except_tableXX string. The real symbol for that will be
1778	// generated in EHStreamer::emitExceptionTable() later. So, we just
1779	// borrow it's name here.
1780	TM->getStrList()->push_back(x: std::string(
1781	(Twine ("GCC_except_table") + Twine (MF.getFunctionNumber())).str()));
1782	SDValue Addr =
1783	DAG.getTargetExternalSymbol(Sym: TM->getStrList()->back().c_str(), VT, TargetFlags: `0`);
1784	if (isPositionIndependent()) {
1785	Addr = makeHiLoPair(Op: Addr, HiTF: VEMCExpr::VK_VE_GOTOFF_HI32,
1786	LoTF: VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1787	SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT);
1788	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: GlobalBase, N2: Addr);
1789	}
1790	return makeHiLoPair(Op: Addr, HiTF: VEMCExpr::VK_VE_HI32, LoTF: VEMCExpr::VK_VE_LO32, DAG);
1791	}
1792	}
1793	}
1794
1795	static bool getUniqueInsertion(SDNode N, unsigned* &UniqueIdx) {
1796	if (!isa<BuildVectorSDNode>(Val: N))
1797	return false;
1798	const auto *BVN = cast<BuildVectorSDNode>(Val: N);
1799
1800	// Find first non-undef insertion.
1801	unsigned Idx;
1802	for (Idx = `0`; Idx < BVN->getNumOperands(); ++Idx) {
1803	auto ElemV = BVN->getOperand(Num: Idx);
1804	if (!ElemV ->isUndef())
1805	break;
1806	}
1807	// Catch the (hypothetical) all-undef case.
1808	if (Idx == BVN->getNumOperands())
1809	return false;
1810	// Remember insertion.
1811	UniqueIdx = Idx++;
1812	// Verify that all other insertions are undef.
1813	for (; Idx < BVN->getNumOperands(); ++Idx) {
1814	auto ElemV = BVN->getOperand(Num: Idx);
1815	if (!ElemV ->isUndef())
1816	return false;
1817	}
1818	return true;
1819	}
1820
1821	static SDValue getSplatValue(SDNode *N) {
1822	if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(Val: N)) {
1823	return BuildVec->getSplatValue();
1824	}
1825	return SDValue ();
1826	}
1827
1828	SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
1829	SelectionDAG &DAG) const {
1830	VECustomDAG CDAG(DAG, Op);
1831	MVT ResultVT = Op.getSimpleValueType();
1832
1833	// If there is just one element, expand to INSERT_VECTOR_ELT.
1834	unsigned UniqueIdx;
1835	if (getUniqueInsertion(N: Op.getNode(), UniqueIdx)) {
1836	SDValue AccuV = CDAG.getUNDEF(VT: Op.getValueType());
1837	auto ElemV = Op ->getOperand(Num: UniqueIdx);
1838	SDValue IdxV = CDAG.getConstant(Val: UniqueIdx, VT: MVT::i64);
1839	return CDAG.getNode(OC: ISD::INSERT_VECTOR_ELT, ResVT: ResultVT, OpV: {AccuV, ElemV, IdxV});
1840	}
1841
1842	// Else emit a broadcast.
1843	if (SDValue ScalarV = getSplatValue(N: Op.getNode())) {
1844	unsigned NumEls = ResultVT.getVectorNumElements();
1845	auto AVL = CDAG.getConstant(Val: NumEls, VT: MVT::i32);
1846	return CDAG.getBroadcast(ResultVT, Scalar: ScalarV, AVL);
1847	}
1848
1849	// Expand
1850	return SDValue ();
1851	}
1852
1853	TargetLowering::LegalizeAction
1854	VETargetLowering::getCustomOperationAction(SDNode &Op) const {
1855	// Custom legalization on VVP_ and VEC_* opcodes is required to pack-legalize*
1856	// these operations (transform nodes such that their AVL parameter refers to
1857	// packs of 64bit, instead of number of elements.
1858
1859	// Packing opcodes are created with a pack-legal AVL (LEGALAVL). No need to
1860	// re-visit them.
1861	if (isPackingSupportOpcode(Opc: Op.getOpcode()))
1862	return Legal;
1863
1864	// Custom lower to legalize AVL for packed mode.
1865	if (isVVPOrVEC(Op.getOpcode()))
1866	return Custom;
1867	return Legal;
1868	}
1869
1870	SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1871	LLVM_DEBUG(dbgs() << "::LowerOperation "; Op.dump(&DAG));
1872	unsigned Opcode = Op.getOpcode();
1873
1874	/// Scalar isel.
1875	switch (Opcode) {
1876	case ISD::ATOMIC_FENCE:
1877	return lowerATOMIC_FENCE(Op, DAG);
1878	case ISD::ATOMIC_SWAP:
1879	return lowerATOMIC_SWAP(Op, DAG);
1880	case ISD::BlockAddress:
1881	return lowerBlockAddress(Op, DAG);
1882	case ISD::ConstantPool:
1883	return lowerConstantPool(Op, DAG);
1884	case ISD::DYNAMIC_STACKALLOC:
1885	return lowerDYNAMIC_STACKALLOC(Op, DAG);
1886	case ISD::EH_SJLJ_LONGJMP:
1887	return lowerEH_SJLJ_LONGJMP(Op, DAG);
1888	case ISD::EH_SJLJ_SETJMP:
1889	return lowerEH_SJLJ_SETJMP(Op, DAG);
1890	case ISD::EH_SJLJ_SETUP_DISPATCH:
1891	return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
1892	case ISD::FRAMEADDR:
1893	return lowerFRAMEADDR(Op, DAG, TLI: *this, Subtarget);
1894	case ISD::GlobalAddress:
1895	return lowerGlobalAddress(Op, DAG);
1896	case ISD::GlobalTLSAddress:
1897	return lowerGlobalTLSAddress(Op, DAG);
1898	case ISD::INTRINSIC_WO_CHAIN:
1899	return lowerINTRINSIC_WO_CHAIN(Op, DAG);
1900	case ISD::JumpTable:
1901	return lowerJumpTable(Op, DAG);
1902	case ISD::LOAD:
1903	return lowerLOAD(Op, DAG);
1904	case ISD::RETURNADDR:
1905	return lowerRETURNADDR(Op, DAG, TLI: *this, Subtarget);
1906	case ISD::BUILD_VECTOR:
1907	return lowerBUILD_VECTOR(Op, DAG);
1908	case ISD::STORE:
1909	return lowerSTORE(Op, DAG);
1910	case ISD::VASTART:
1911	return lowerVASTART(Op, DAG);
1912	case ISD::VAARG:
1913	return lowerVAARG(Op, DAG);
1914
1915	case ISD::INSERT_VECTOR_ELT:
1916	return lowerINSERT_VECTOR_ELT(Op, DAG);
1917	case ISD::EXTRACT_VECTOR_ELT:
1918	return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1919	}
1920
1921	/// Vector isel.
1922	if (ISD::isVPOpcode(Opcode))
1923	return lowerToVVP(Op, DAG);
1924
1925	switch (Opcode) {
1926	default:
1927	llvm_unreachable("Should not custom lower this!");
1928
1929	// Legalize the AVL of this internal node.
1930	case VEISD::VEC_BROADCAST:
1931	#define ADD_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME:
1932	#include "VVPNodes.def"
1933	// AVL already legalized.
1934	if (getAnnotatedNodeAVL(Op).second)
1935	return Op;
1936	return legalizeInternalVectorOp(Op, DAG);
1937
1938	// Translate into a VEC_/VVP_* layer operation.*
1939	case ISD::MLOAD:
1940	case ISD::MSTORE:
1941	#define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
1942	#include "VVPNodes.def"
1943	if (isMaskArithmetic(Op) && isPackedVectorType(SomeVT: Op.getValueType()))
1944	return splitMaskArithmetic(Op, DAG);
1945	return lowerToVVP(Op, DAG);
1946	}
1947	}
1948	/// } Custom Lower
1949
1950	void VETargetLowering::ReplaceNodeResults(SDNode *N,
1951	SmallVectorImpl<SDValue> &Results,
1952	SelectionDAG &DAG) const {
1953	switch (N->getOpcode()) {
1954	case ISD::ATOMIC_SWAP:
1955	// Let LLVM expand atomic swap instruction through LowerOperation.
1956	return;
1957	default:
1958	LLVM_DEBUG(N->dumpr(&DAG));
1959	llvm_unreachable("Do not know how to custom type legalize this operation!");
1960	}
1961	}
1962
1963	/// JumpTable for VE.
1964	///
1965	/// VE cannot generate relocatable symbol in jump table. VE cannot
1966	/// generate expressions using symbols in both text segment and data
1967	/// segment like below.
1968	/// .4byte .LBB0_2-.LJTI0_0
1969	/// So, we generate offset from the top of function like below as
1970	/// a custom label.
1971	/// .4byte .LBB0_2-<function name>
1972
1973	unsigned VETargetLowering::getJumpTableEncoding() const {
1974	// Use custom label for PIC.
1975	if (isPositionIndependent())
1976	return MachineJumpTableInfo::EK_Custom32;
1977
1978	// Otherwise, use the normal jump table encoding heuristics.
1979	return TargetLowering::getJumpTableEncoding();
1980	}
1981
1982	const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
1983	const MachineJumpTableInfo MJTI, const* MachineBasicBlock *MBB,
1984	unsigned Uid, MCContext &Ctx) const {
1985	assert(isPositionIndependent());
1986
1987	// Generate custom label for PIC like below.
1988	// .4bytes .LBB0_2-<function name>
1989	const auto *Value = MCSymbolRefExpr::create(Symbol: MBB->getSymbol(), Ctx);
1990	MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: MBB->getParent()->getName().data());
1991	const auto *Base = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
1992	return MCBinaryExpr::createSub(LHS: Value, RHS: Base, Ctx);
1993	}
1994
1995	SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
1996	SelectionDAG &DAG) const {
1997	assert(isPositionIndependent());
1998	SDLoc DL(Table);
1999	Function *Function = &DAG.getMachineFunction().getFunction();
2000	assert(Function != nullptr);
2001	auto PtrTy = getPointerTy(DL: DAG.getDataLayout(), AS: Function->getAddressSpace());
2002
2003	// In the jump table, we have following values in PIC mode.
2004	// .4bytes .LBB0_2-<function name>
2005	// We need to add this value and the address of this function to generate
2006	// .LBB0_2 label correctly under PIC mode. So, we want to generate following
2007	// instructions:
2008	// lea %reg, fun@gotoff_lo
2009	// and %reg, %reg, (32)0
2010	// lea.sl %reg, fun@gotoff_hi(%reg, %got)
2011	// In order to do so, we need to genarate correctly marked DAG node using
2012	// makeHiLoPair.
2013	SDValue Op = DAG.getGlobalAddress(GV: Function, DL, VT: PtrTy);
2014	SDValue HiLo = makeHiLoPair(Op, HiTF: VEMCExpr::VK_VE_GOTOFF_HI32,
2015	LoTF: VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
2016	SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT: PtrTy);
2017	return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrTy, N1: GlobalBase, N2: HiLo);
2018	}
2019
2020	Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB,
2021	MachineBasicBlock::iterator I,
2022	MachineBasicBlock *TargetBB,
2023	const DebugLoc &DL) const {
2024	MachineFunction *MF = MBB.getParent();
2025	MachineRegisterInfo &MRI = MF->getRegInfo();
2026	const VEInstrInfo *TII = Subtarget->getInstrInfo();
2027
2028	const TargetRegisterClass *RC = &VE::I64RegClass;
2029	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2030	Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2031	Register Result = MRI.createVirtualRegister(RegClass: RC);
2032
2033	if (isPositionIndependent()) {
2034	// Create following instructions for local linkage PIC code.
2035	// lea %Tmp1, TargetBB@gotoff_lo
2036	// and %Tmp2, %Tmp1, (32)0
2037	// lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2038	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2039	.addImm(Val: `0`)
2040	.addImm(Val: `0`)
2041	.addMBB(MBB: TargetBB, TargetFlags: VEMCExpr::VK_VE_GOTOFF_LO32);
2042	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2043	.addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2044	.addImm(Val: M0(Val: `32`));
2045	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: Result)
2046	.addReg(RegNo: VE::SX15)
2047	.addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2048	.addMBB(MBB: TargetBB, TargetFlags: VEMCExpr::VK_VE_GOTOFF_HI32);
2049	} else {
2050	// Create following instructions for non-PIC code.
2051	// lea %Tmp1, TargetBB@lo
2052	// and %Tmp2, %Tmp1, (32)0
2053	// lea.sl %Result, TargetBB@hi(%Tmp2)
2054	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2055	.addImm(Val: `0`)
2056	.addImm(Val: `0`)
2057	.addMBB(MBB: TargetBB, TargetFlags: VEMCExpr::VK_VE_LO32);
2058	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2059	.addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2060	.addImm(Val: M0(Val: `32`));
2061	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrii), DestReg: Result)
2062	.addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2063	.addImm(Val: `0`)
2064	.addMBB(MBB: TargetBB, TargetFlags: VEMCExpr::VK_VE_HI32);
2065	}
2066	return Result;
2067	}
2068
2069	Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB,
2070	MachineBasicBlock::iterator I,
2071	StringRef Symbol, const DebugLoc &DL,
2072	bool IsLocal = false,
2073	bool IsCall = false) const {
2074	MachineFunction *MF = MBB.getParent();
2075	MachineRegisterInfo &MRI = MF->getRegInfo();
2076	const VEInstrInfo *TII = Subtarget->getInstrInfo();
2077
2078	const TargetRegisterClass *RC = &VE::I64RegClass;
2079	Register Result = MRI.createVirtualRegister(RegClass: RC);
2080
2081	if (isPositionIndependent()) {
2082	if (IsCall && !IsLocal) {
2083	// Create following instructions for non-local linkage PIC code function
2084	// calls. These instructions uses IC and magic number -24, so we expand
2085	// them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
2086	// lea %Reg, Symbol@plt_lo(-24)
2087	// and %Reg, %Reg, (32)0
2088	// sic %s16
2089	// lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
2090	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::GETFUNPLT), DestReg: Result)
2091	.addExternalSymbol(FnName: "abort");
2092	} else if (IsLocal) {
2093	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2094	Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2095	// Create following instructions for local linkage PIC code.
2096	// lea %Tmp1, Symbol@gotoff_lo
2097	// and %Tmp2, %Tmp1, (32)0
2098	// lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2099	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2100	.addImm(Val: `0`)
2101	.addImm(Val: `0`)
2102	.addExternalSymbol(FnName: Symbol.data(), TargetFlags: VEMCExpr::VK_VE_GOTOFF_LO32);
2103	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2104	.addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2105	.addImm(Val: M0(Val: `32`));
2106	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: Result)
2107	.addReg(RegNo: VE::SX15)
2108	.addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2109	.addExternalSymbol(FnName: Symbol.data(), TargetFlags: VEMCExpr::VK_VE_GOTOFF_HI32);
2110	} else {
2111	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2112	Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2113	// Create following instructions for not local linkage PIC code.
2114	// lea %Tmp1, Symbol@got_lo
2115	// and %Tmp2, %Tmp1, (32)0
2116	// lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2117	// ld %Result, 0(%Tmp3)
2118	Register Tmp3 = MRI.createVirtualRegister(RegClass: RC);
2119	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2120	.addImm(Val: `0`)
2121	.addImm(Val: `0`)
2122	.addExternalSymbol(FnName: Symbol.data(), TargetFlags: VEMCExpr::VK_VE_GOT_LO32);
2123	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2124	.addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2125	.addImm(Val: M0(Val: `32`));
2126	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: Tmp3)
2127	.addReg(RegNo: VE::SX15)
2128	.addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2129	.addExternalSymbol(FnName: Symbol.data(), TargetFlags: VEMCExpr::VK_VE_GOT_HI32);
2130	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: Result)
2131	.addReg(RegNo: Tmp3, flags: getKillRegState(B: true))
2132	.addImm(Val: `0`)
2133	.addImm(Val: `0`);
2134	}
2135	} else {
2136	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2137	Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2138	// Create following instructions for non-PIC code.
2139	// lea %Tmp1, Symbol@lo
2140	// and %Tmp2, %Tmp1, (32)0
2141	// lea.sl %Result, Symbol@hi(%Tmp2)
2142	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2143	.addImm(Val: `0`)
2144	.addImm(Val: `0`)
2145	.addExternalSymbol(FnName: Symbol.data(), TargetFlags: VEMCExpr::VK_VE_LO32);
2146	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2147	.addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2148	.addImm(Val: M0(Val: `32`));
2149	BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrii), DestReg: Result)
2150	.addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2151	.addImm(Val: `0`)
2152	.addExternalSymbol(FnName: Symbol.data(), TargetFlags: VEMCExpr::VK_VE_HI32);
2153	}
2154	return Result;
2155	}
2156
2157	void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI,
2158	MachineBasicBlock *MBB,
2159	MachineBasicBlock *DispatchBB,
2160	int FI, int Offset) const {
2161	DebugLoc DL = MI.getDebugLoc();
2162	const VEInstrInfo *TII = Subtarget->getInstrInfo();
2163
2164	Register LabelReg =
2165	prepareMBB(MBB&: *MBB, I: MachineBasicBlock::iterator (MI), TargetBB: DispatchBB, DL);
2166
2167	// Store an address of DispatchBB to a given jmpbuf[1] where has next IC
2168	// referenced by longjmp (throw) later.
2169	MachineInstrBuilder MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::STrii));
2170	addFrameReference(MIB, FI, Offset); // jmpbuf[1]
2171	MIB.addReg(RegNo: LabelReg, flags: getKillRegState(B: true));
2172	}
2173
2174	MachineBasicBlock *
2175	VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
2176	MachineBasicBlock MBB) const* {
2177	DebugLoc DL = MI.getDebugLoc();
2178	MachineFunction *MF = MBB->getParent();
2179	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2180	const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
2181	MachineRegisterInfo &MRI = MF->getRegInfo();
2182
2183	const BasicBlock *BB = MBB->getBasicBlock();
2184	MachineFunction::iterator I = ++MBB->getIterator();
2185
2186	// Memory Reference.
2187	SmallVector<MachineMemOperand *, `2`> MMOs(MI.memoperands_begin(),
2188	MI.memoperands_end());
2189	Register BufReg = MI.getOperand(i: `1`).getReg();
2190
2191	Register DstReg;
2192
2193	DstReg = MI.getOperand(i: `0`).getReg();
2194	const TargetRegisterClass *RC = MRI.getRegClass(Reg: DstReg);
2195	assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
2196	(void)TRI;
2197	Register MainDestReg = MRI.createVirtualRegister(RegClass: RC);
2198	Register RestoreDestReg = MRI.createVirtualRegister(RegClass: RC);
2199
2200	// For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
2201	// instructions. SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
2202	//
2203	// ThisMBB:
2204	// buf[3] = %s17 iff %s17 is used as BP
2205	// buf[1] = RestoreMBB as IC after longjmp
2206	// # SjLjSetup RestoreMBB
2207	//
2208	// MainMBB:
2209	// v_main = 0
2210	//
2211	// SinkMBB:
2212	// v = phi(v_main, MainMBB, v_restore, RestoreMBB)
2213	// ...
2214	//
2215	// RestoreMBB:
2216	// %s17 = buf[3] = iff %s17 is used as BP
2217	// v_restore = 1
2218	// goto SinkMBB
2219
2220	MachineBasicBlock *ThisMBB = MBB;
2221	MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
2222	MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
2223	MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
2224	MF->insert(MBBI: I, MBB: MainMBB);
2225	MF->insert(MBBI: I, MBB: SinkMBB);
2226	MF->push_back(MBB: RestoreMBB);
2227	RestoreMBB->setMachineBlockAddressTaken();
2228
2229	// Transfer the remainder of BB and its successor edges to SinkMBB.
2230	SinkMBB->splice(Where: SinkMBB->begin(), Other: MBB,
2231	From: std::next(x: MachineBasicBlock::iterator (MI)), To: MBB->end());
2232	SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
2233
2234	// ThisMBB:
2235	Register LabelReg =
2236	prepareMBB(MBB&: *MBB, I: MachineBasicBlock::iterator (MI), TargetBB: RestoreMBB, DL);
2237
2238	// Store BP in buf[3] iff this function is using BP.
2239	const VEFrameLowering *TFI = Subtarget->getFrameLowering();
2240	if (TFI->hasBP(MF: *MF)) {
2241	MachineInstrBuilder MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::STrii));
2242	MIB.addReg(RegNo: BufReg);
2243	MIB.addImm(Val: `0`);
2244	MIB.addImm(Val: `24`);
2245	MIB.addReg(RegNo: VE::SX17);
2246	MIB.setMemRefs(MMOs);
2247	}
2248
2249	// Store IP in buf[1].
2250	MachineInstrBuilder MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::STrii));
2251	MIB.add(MO: MI.getOperand(i: `1`)); // we can preserve the kill flags here.
2252	MIB.addImm(Val: `0`);
2253	MIB.addImm(Val: `8`);
2254	MIB.addReg(RegNo: LabelReg, flags: getKillRegState(B: true));
2255	MIB.setMemRefs(MMOs);
2256
2257	// SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
2258
2259	// Insert setup.
2260	MIB =
2261	BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::EH_SjLj_Setup)).addMBB(MBB: RestoreMBB);
2262
2263	const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2264	MIB.addRegMask(Mask: RegInfo->getNoPreservedMask());
2265	ThisMBB->addSuccessor(Succ: MainMBB);
2266	ThisMBB->addSuccessor(Succ: RestoreMBB);
2267
2268	// MainMBB:
2269	BuildMI(BB: MainMBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: MainDestReg)
2270	.addImm(Val: `0`)
2271	.addImm(Val: `0`)
2272	.addImm(Val: `0`);
2273	MainMBB->addSuccessor(Succ: SinkMBB);
2274
2275	// SinkMBB:
2276	BuildMI(BB&: *SinkMBB, I: SinkMBB->begin(), MIMD: DL, MCID: TII->get(Opcode: VE::PHI), DestReg: DstReg)
2277	.addReg(RegNo: MainDestReg)
2278	.addMBB(MBB: MainMBB)
2279	.addReg(RegNo: RestoreDestReg)
2280	.addMBB(MBB: RestoreMBB);
2281
2282	// RestoreMBB:
2283	// Restore BP from buf[3] iff this function is using BP. The address of
2284	// buf is in SX10.
2285	// FIXME: Better to not use SX10 here
2286	if (TFI->hasBP(MF: *MF)) {
2287	MachineInstrBuilder MIB =
2288	BuildMI(BB: RestoreMBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: VE::SX17);
2289	MIB.addReg(RegNo: VE::SX10);
2290	MIB.addImm(Val: `0`);
2291	MIB.addImm(Val: `24`);
2292	MIB.setMemRefs(MMOs);
2293	}
2294	BuildMI(BB: RestoreMBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: RestoreDestReg)
2295	.addImm(Val: `0`)
2296	.addImm(Val: `0`)
2297	.addImm(Val: `1`);
2298	BuildMI(BB: RestoreMBB, MIMD: DL, MCID: TII->get(Opcode: VE::BRCFLa_t)).addMBB(MBB: SinkMBB);
2299	RestoreMBB->addSuccessor(Succ: SinkMBB);
2300
2301	MI.eraseFromParent();
2302	return SinkMBB;
2303	}
2304
2305	MachineBasicBlock *
2306	VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
2307	MachineBasicBlock MBB) const* {
2308	DebugLoc DL = MI.getDebugLoc();
2309	MachineFunction *MF = MBB->getParent();
2310	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2311	MachineRegisterInfo &MRI = MF->getRegInfo();
2312
2313	// Memory Reference.
2314	SmallVector<MachineMemOperand *, `2`> MMOs(MI.memoperands_begin(),
2315	MI.memoperands_end());
2316	Register BufReg = MI.getOperand(i: `0`).getReg();
2317
2318	Register Tmp = MRI.createVirtualRegister(RegClass: &VE::I64RegClass);
2319	// Since FP is only updated here but NOT referenced, it's treated as GPR.
2320	Register FP = VE::SX9;
2321	Register SP = VE::SX11;
2322
2323	MachineInstrBuilder MIB;
2324
2325	MachineBasicBlock *ThisMBB = MBB;
2326
2327	// For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
2328	//
2329	// ThisMBB:
2330	// %fp = load buf[0]
2331	// %jmp = load buf[1]
2332	// %s10 = buf ; Store an address of buf to SX10 for RestoreMBB
2333	// %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
2334	// jmp %jmp
2335
2336	// Reload FP.
2337	MIB = BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: FP);
2338	MIB.addReg(RegNo: BufReg);
2339	MIB.addImm(Val: `0`);
2340	MIB.addImm(Val: `0`);
2341	MIB.setMemRefs(MMOs);
2342
2343	// Reload IP.
2344	MIB = BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: Tmp);
2345	MIB.addReg(RegNo: BufReg);
2346	MIB.addImm(Val: `0`);
2347	MIB.addImm(Val: `8`);
2348	MIB.setMemRefs(MMOs);
2349
2350	// Copy BufReg to SX10 for later use in setjmp.
2351	// FIXME: Better to not use SX10 here
2352	BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::ORri), DestReg: VE::SX10)
2353	.addReg(RegNo: BufReg)
2354	.addImm(Val: `0`);
2355
2356	// Reload SP.
2357	MIB = BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::LDrii), DestReg: SP);
2358	MIB.add(MO: MI.getOperand(i: `0`)); // we can preserve the kill flags here.
2359	MIB.addImm(Val: `0`);
2360	MIB.addImm(Val: `16`);
2361	MIB.setMemRefs(MMOs);
2362
2363	// Jump.
2364	BuildMI(BB&: *ThisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: VE::BCFLari_t))
2365	.addReg(RegNo: Tmp, flags: getKillRegState(B: true))
2366	.addImm(Val: `0`);
2367
2368	MI.eraseFromParent();
2369	return ThisMBB;
2370	}
2371
2372	MachineBasicBlock *
2373	VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI,
2374	MachineBasicBlock BB) const* {
2375	DebugLoc DL = MI.getDebugLoc();
2376	MachineFunction *MF = BB->getParent();
2377	MachineFrameInfo &MFI = MF->getFrameInfo();
2378	MachineRegisterInfo &MRI = MF->getRegInfo();
2379	const VEInstrInfo *TII = Subtarget->getInstrInfo();
2380	int FI = MFI.getFunctionContextIndex();
2381
2382	// Get a mapping of the call site numbers to all of the landing pads they're
2383	// associated with.
2384	DenseMap<unsigned, SmallVector<MachineBasicBlock *, `2`>> CallSiteNumToLPad;
2385	unsigned MaxCSNum = `0`;
2386	for (auto &MBB : *MF) {
2387	if (!MBB.isEHPad())
2388	continue;
2389
2390	MCSymbol Sym = nullptr*;
2391	for (const auto &MI : MBB) {
2392	if (MI.isDebugInstr())
2393	continue;
2394
2395	assert(MI.isEHLabel() && "expected EH_LABEL");
2396	Sym = MI.getOperand(i: `0`).getMCSymbol();
2397	break;
2398	}
2399
2400	if (!MF->hasCallSiteLandingPad(Sym))
2401	continue;
2402
2403	for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
2404	CallSiteNumToLPad [CSI].push_back(Elt: &MBB);
2405	MaxCSNum = std::max(a: MaxCSNum, b: CSI);
2406	}
2407	}
2408
2409	// Get an ordered list of the machine basic blocks for the jump table.
2410	std::vector<MachineBasicBlock *> LPadList;
2411	SmallPtrSet<MachineBasicBlock *, `32`> InvokeBBs;
2412	LPadList.reserve(n: CallSiteNumToLPad.size());
2413
2414	for (unsigned CSI = `1`; CSI <= MaxCSNum; ++CSI) {
2415	for (auto &LP : CallSiteNumToLPad [CSI]) {
2416	LPadList.push_back(x: LP);
2417	InvokeBBs.insert(I: LP->pred_begin(), E: LP->pred_end());
2418	}
2419	}
2420
2421	assert(!LPadList.empty() &&
2422	"No landing pad destinations for the dispatch jump table!");
2423
2424	// The %fn_context is allocated like below (from --print-after=sjljehprepare):
2425	// %fn_context = alloca { i8, i64, [4 x i64], i8, i8, [5 x i8] }
2426	//
2427	// This `[5 x i8]` is jmpbuf, so jmpbuf[1] is FI+72.*
2428	// First `i64` is callsite, so callsite is FI+8.
2429	static const int OffsetIC = `72`;
2430	static const int OffsetCS = `8`;
2431
2432	// Create the MBBs for the dispatch code like following:
2433	//
2434	// ThisMBB:
2435	// Prepare DispatchBB address and store it to buf[1].
2436	// ...
2437	//
2438	// DispatchBB:
2439	// %s15 = GETGOT iff isPositionIndependent
2440	// %callsite = load callsite
2441	// brgt.l.t #size of callsites, %callsite, DispContBB
2442	//
2443	// TrapBB:
2444	// Call abort.
2445	//
2446	// DispContBB:
2447	// %breg = address of jump table
2448	// %pc = load and calculate next pc from %breg and %callsite
2449	// jmp %pc
2450
2451	// Shove the dispatch's address into the return slot in the function context.
2452	MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
2453	DispatchBB->setIsEHPad(true);
2454
2455	// Trap BB will causes trap like `assert(0)`.
2456	MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
2457	DispatchBB->addSuccessor(Succ: TrapBB);
2458
2459	MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
2460	DispatchBB->addSuccessor(Succ: DispContBB);
2461
2462	// Insert MBBs.
2463	MF->push_back(MBB: DispatchBB);
2464	MF->push_back(MBB: DispContBB);
2465	MF->push_back(MBB: TrapBB);
2466
2467	// Insert code to call abort in the TrapBB.
2468	Register Abort = prepareSymbol(MBB&: *TrapBB, I: TrapBB->end(), Symbol: "abort", DL,
2469	/ Local / IsLocal: false, / Call / IsCall: true);
2470	BuildMI(BB: TrapBB, MIMD: DL, MCID: TII->get(Opcode: VE::BSICrii), DestReg: VE::SX10)
2471	.addReg(RegNo: Abort, flags: getKillRegState(B: true))
2472	.addImm(Val: `0`)
2473	.addImm(Val: `0`);
2474
2475	// Insert code into the entry block that creates and registers the function
2476	// context.
2477	setupEntryBlockForSjLj(MI, MBB: BB, DispatchBB, FI, Offset: OffsetIC);
2478
2479	// Create the jump table and associated information
2480	unsigned JTE = getJumpTableEncoding();
2481	MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTEntryKind: JTE);
2482	unsigned MJTI = JTI->createJumpTableIndex(DestBBs: LPadList);
2483
2484	const VERegisterInfo &RI = TII->getRegisterInfo();
2485	// Add a register mask with no preserved registers. This results in all
2486	// registers being marked as clobbered.
2487	BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::NOP))
2488	.addRegMask(Mask: RI.getNoPreservedMask());
2489
2490	if (isPositionIndependent()) {
2491	// Force to generate GETGOT, since current implementation doesn't store GOT
2492	// register.
2493	BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::GETGOT), DestReg: VE::SX15);
2494	}
2495
2496	// IReg is used as an index in a memory operand and therefore can't be SP
2497	const TargetRegisterClass *RC = &VE::I64RegClass;
2498	Register IReg = MRI.createVirtualRegister(RegClass: RC);
2499	addFrameReference(MIB: BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDLZXrii), DestReg: IReg), FI,
2500	Offset: OffsetCS);
2501	if (LPadList.size() < `64`) {
2502	BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::BRCFLir_t))
2503	.addImm(Val: VECC::CC_ILE)
2504	.addImm(Val: LPadList.size())
2505	.addReg(RegNo: IReg)
2506	.addMBB(MBB: TrapBB);
2507	} else {
2508	assert(LPadList.size() <= `0x7FFFFFFF` && "Too large Landing Pad!");
2509	Register TmpReg = MRI.createVirtualRegister(RegClass: RC);
2510	BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: TmpReg)
2511	.addImm(Val: `0`)
2512	.addImm(Val: `0`)
2513	.addImm(Val: LPadList.size());
2514	BuildMI(BB: DispatchBB, MIMD: DL, MCID: TII->get(Opcode: VE::BRCFLrr_t))
2515	.addImm(Val: VECC::CC_ILE)
2516	.addReg(RegNo: TmpReg, flags: getKillRegState(B: true))
2517	.addReg(RegNo: IReg)
2518	.addMBB(MBB: TrapBB);
2519	}
2520
2521	Register BReg = MRI.createVirtualRegister(RegClass: RC);
2522	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2523	Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2524
2525	if (isPositionIndependent()) {
2526	// Create following instructions for local linkage PIC code.
2527	// lea %Tmp1, .LJTI0_0@gotoff_lo
2528	// and %Tmp2, %Tmp1, (32)0
2529	// lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2530	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2531	.addImm(Val: `0`)
2532	.addImm(Val: `0`)
2533	.addJumpTableIndex(Idx: MJTI, TargetFlags: VEMCExpr::VK_VE_GOTOFF_LO32);
2534	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2535	.addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2536	.addImm(Val: M0(Val: `32`));
2537	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrri), DestReg: BReg)
2538	.addReg(RegNo: VE::SX15)
2539	.addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2540	.addJumpTableIndex(Idx: MJTI, TargetFlags: VEMCExpr::VK_VE_GOTOFF_HI32);
2541	} else {
2542	// Create following instructions for non-PIC code.
2543	// lea %Tmp1, .LJTI0_0@lo
2544	// and %Tmp2, %Tmp1, (32)0
2545	// lea.sl %BReg, .LJTI0_0@hi(%Tmp2)
2546	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEAzii), DestReg: Tmp1)
2547	.addImm(Val: `0`)
2548	.addImm(Val: `0`)
2549	.addJumpTableIndex(Idx: MJTI, TargetFlags: VEMCExpr::VK_VE_LO32);
2550	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::ANDrm), DestReg: Tmp2)
2551	.addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2552	.addImm(Val: M0(Val: `32`));
2553	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LEASLrii), DestReg: BReg)
2554	.addReg(RegNo: Tmp2, flags: getKillRegState(B: true))
2555	.addImm(Val: `0`)
2556	.addJumpTableIndex(Idx: MJTI, TargetFlags: VEMCExpr::VK_VE_HI32);
2557	}
2558
2559	switch (JTE) {
2560	case MachineJumpTableInfo::EK_BlockAddress: {
2561	// Generate simple block address code for no-PIC model.
2562	// sll %Tmp1, %IReg, 3
2563	// lds %TReg, 0(%Tmp1, %BReg)
2564	// bcfla %TReg
2565
2566	Register TReg = MRI.createVirtualRegister(RegClass: RC);
2567	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2568
2569	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::SLLri), DestReg: Tmp1)
2570	.addReg(RegNo: IReg, flags: getKillRegState(B: true))
2571	.addImm(Val: `3`);
2572	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDrri), DestReg: TReg)
2573	.addReg(RegNo: BReg, flags: getKillRegState(B: true))
2574	.addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2575	.addImm(Val: `0`);
2576	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::BCFLari_t))
2577	.addReg(RegNo: TReg, flags: getKillRegState(B: true))
2578	.addImm(Val: `0`);
2579	break;
2580	}
2581	case MachineJumpTableInfo::EK_Custom32: {
2582	// Generate block address code using differences from the function pointer
2583	// for PIC model.
2584	// sll %Tmp1, %IReg, 2
2585	// ldl.zx %OReg, 0(%Tmp1, %BReg)
2586	// Prepare function address in BReg2.
2587	// adds.l %TReg, %BReg2, %OReg
2588	// bcfla %TReg
2589
2590	assert(isPositionIndependent());
2591	Register OReg = MRI.createVirtualRegister(RegClass: RC);
2592	Register TReg = MRI.createVirtualRegister(RegClass: RC);
2593	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2594
2595	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::SLLri), DestReg: Tmp1)
2596	.addReg(RegNo: IReg, flags: getKillRegState(B: true))
2597	.addImm(Val: `2`);
2598	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::LDLZXrri), DestReg: OReg)
2599	.addReg(RegNo: BReg, flags: getKillRegState(B: true))
2600	.addReg(RegNo: Tmp1, flags: getKillRegState(B: true))
2601	.addImm(Val: `0`);
2602	Register BReg2 =
2603	prepareSymbol(MBB&: *DispContBB, I: DispContBB->end(),
2604	Symbol: DispContBB->getParent()->getName(), DL, / Local / IsLocal: true);
2605	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::ADDSLrr), DestReg: TReg)
2606	.addReg(RegNo: OReg, flags: getKillRegState(B: true))
2607	.addReg(RegNo: BReg2, flags: getKillRegState(B: true));
2608	BuildMI(BB: DispContBB, MIMD: DL, MCID: TII->get(Opcode: VE::BCFLari_t))
2609	.addReg(RegNo: TReg, flags: getKillRegState(B: true))
2610	.addImm(Val: `0`);
2611	break;
2612	}
2613	default:
2614	llvm_unreachable("Unexpected jump table encoding");
2615	}
2616
2617	// Add the jump table entries as successors to the MBB.
2618	SmallPtrSet<MachineBasicBlock *, `8`> SeenMBBs;
2619	for (auto &LP : LPadList)
2620	if (SeenMBBs.insert(Ptr: LP).second)
2621	DispContBB->addSuccessor(Succ: LP);
2622
2623	// N.B. the order the invoke BBs are processed in doesn't matter here.
2624	SmallVector<MachineBasicBlock *, `64`> MBBLPads;
2625	const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
2626	for (MachineBasicBlock *MBB : InvokeBBs) {
2627	// Remove the landing pad successor from the invoke block and replace it
2628	// with the new dispatch block.
2629	// Keep a copy of Successors since it's modified inside the loop.
2630	SmallVector<MachineBasicBlock *, `8`> Successors(MBB->succ_rbegin(),
2631	MBB->succ_rend());
2632	// FIXME: Avoid quadratic complexity.
2633	for (auto *MBBS : Successors) {
2634	if (MBBS->isEHPad()) {
2635	MBB->removeSuccessor(Succ: MBBS);
2636	MBBLPads.push_back(Elt: MBBS);
2637	}
2638	}
2639
2640	MBB->addSuccessor(Succ: DispatchBB);
2641
2642	// Find the invoke call and mark all of the callee-saved registers as
2643	// 'implicit defined' so that they're spilled. This prevents code from
2644	// moving instructions to before the EH block, where they will never be
2645	// executed.
2646	for (auto &II : reverse(C&: *MBB)) {
2647	if (!II.isCall())
2648	continue;
2649
2650	DenseMap<Register, bool> DefRegs;
2651	for (auto &MOp : II.operands())
2652	if (MOp.isReg())
2653	DefRegs [MOp.getReg()] = true;
2654
2655	MachineInstrBuilder MIB(*MF, &II);
2656	for (unsigned RI = `0`; SavedRegs[RI]; ++RI) {
2657	Register Reg = SavedRegs[RI];
2658	if (!DefRegs [Reg])
2659	MIB.addReg(RegNo: Reg, flags: RegState::ImplicitDefine \| RegState::Dead);
2660	}
2661
2662	break;
2663	}
2664	}
2665
2666	// Mark all former landing pads as non-landing pads. The dispatch is the only
2667	// landing pad now.
2668	for (auto &LP : MBBLPads)
2669	LP->setIsEHPad(false);
2670
2671	// The instruction is gone now.
2672	MI.eraseFromParent();
2673	return BB;
2674	}
2675
2676	MachineBasicBlock *
2677	VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2678	MachineBasicBlock BB) const* {
2679	switch (MI.getOpcode()) {
2680	default:
2681	llvm_unreachable("Unknown Custom Instruction!");
2682	case VE::EH_SjLj_LongJmp:
2683	return emitEHSjLjLongJmp(MI, MBB: BB);
2684	case VE::EH_SjLj_SetJmp:
2685	return emitEHSjLjSetJmp(MI, MBB: BB);
2686	case VE::EH_SjLj_Setup_Dispatch:
2687	return emitSjLjDispatchBlock(MI, BB);
2688	}
2689	}
2690
2691	static bool isSimm7(SDValue V) {
2692	EVT VT = V.getValueType();
2693	if (VT.isVector())
2694	return false;
2695
2696	if (VT.isInteger()) {
2697	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: V))
2698	return isInt<`7`>(x: C->getSExtValue());
2699	} else if (VT.isFloatingPoint()) {
2700	if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: V)) {
2701	if (VT == MVT::f32 \|\| VT == MVT::f64) {
2702	const APInt &Imm = C->getValueAPF().bitcastToAPInt();
2703	uint64_t Val = Imm.getSExtValue();
2704	if (Imm.getBitWidth() == `32`)
2705	Val <<= `32`; // Immediate value of float place at higher bits on VE.
2706	return isInt<`7`>(x: Val);
2707	}
2708	}
2709	}
2710	return false;
2711	}
2712
2713	static bool isMImm(SDValue V) {
2714	EVT VT = V.getValueType();
2715	if (VT.isVector())
2716	return false;
2717
2718	if (VT.isInteger()) {
2719	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: V))
2720	return isMImmVal(Val: getImmVal(N: C));
2721	} else if (VT.isFloatingPoint()) {
2722	if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: V)) {
2723	if (VT == MVT::f32) {
2724	// Float value places at higher bits, so ignore lower 32 bits.
2725	return isMImm32Val(Val: getFpImmVal(N: C) >> `32`);
2726	} else if (VT == MVT::f64) {
2727	return isMImmVal(Val: getFpImmVal(N: C));
2728	}
2729	}
2730	}
2731	return false;
2732	}
2733
2734	static unsigned decideComp(EVT SrcVT, ISD::CondCode CC) {
2735	if (SrcVT.isFloatingPoint()) {
2736	if (SrcVT == MVT::f128)
2737	return VEISD::CMPQ;
2738	return VEISD::CMPF;
2739	}
2740	return isSignedIntSetCC(Code: CC) ? VEISD::CMPI : VEISD::CMPU;
2741	}
2742
2743	static EVT decideCompType(EVT SrcVT) {
2744	if (SrcVT == MVT::f128)
2745	return MVT::f64;
2746	return SrcVT;
2747	}
2748
2749	static bool safeWithoutCompWithNull(EVT SrcVT, ISD::CondCode CC,
2750	bool WithCMov) {
2751	if (SrcVT.isFloatingPoint()) {
2752	// For the case of floating point setcc, only unordered comparison
2753	// or general comparison with -enable-no-nans-fp-math option reach
2754	// here, so it is safe even if values are NaN. Only f128 doesn't
2755	// safe since VE uses f64 result of f128 comparison.
2756	return SrcVT != MVT::f128;
2757	}
2758	if (isIntEqualitySetCC(Code: CC)) {
2759	// For the case of equal or not equal, it is safe without comparison with 0.
2760	return true;
2761	}
2762	if (WithCMov) {
2763	// For the case of integer setcc with cmov, all signed comparison with 0
2764	// are safe.
2765	return isSignedIntSetCC(Code: CC);
2766	}
2767	// For the case of integer setcc, only signed 64 bits comparison is safe.
2768	// For unsigned, "CMPU 0x80000000, 0" has to be greater than 0, but it becomes
2769	// less than 0 witout CMPU. For 32 bits, other half of 32 bits are
2770	// uncoditional, so it is not safe too without CMPI..
2771	return isSignedIntSetCC(Code: CC) && SrcVT == MVT::i64;
2772	}
2773
2774	static SDValue generateComparison(EVT VT, SDValue LHS, SDValue RHS,
2775	ISD::CondCode CC, bool WithCMov,
2776	const SDLoc &DL, SelectionDAG &DAG) {
2777	// Compare values. If RHS is 0 and it is safe to calculate without
2778	// comparison, we don't generate an instruction for comparison.
2779	EVT CompVT = decideCompType(SrcVT: VT);
2780	if (CompVT == VT && safeWithoutCompWithNull(SrcVT: VT, CC, WithCMov) &&
2781	(isNullConstant(V: RHS) \|\| isNullFPConstant(V: RHS))) {
2782	return LHS;
2783	}
2784	return DAG.getNode(Opcode: decideComp(SrcVT: VT, CC), DL, VT: CompVT, N1: LHS, N2: RHS);
2785	}
2786
2787	SDValue VETargetLowering::combineSelect(SDNode *N,
2788	DAGCombinerInfo &DCI) const {
2789	assert(N->getOpcode() == ISD::SELECT &&
2790	"Should be called with a SELECT node");
2791	ISD::CondCode CC = ISD::CondCode::SETNE;
2792	SDValue Cond = N->getOperand(Num: `0`);
2793	SDValue True = N->getOperand(Num: `1`);
2794	SDValue False = N->getOperand(Num: `2`);
2795
2796	// We handle only scalar SELECT.
2797	EVT VT = N->getValueType(ResNo: `0`);
2798	if (VT.isVector())
2799	return SDValue ();
2800
2801	// Peform combineSelect after leagalize DAG.
2802	if (!DCI.isAfterLegalizeDAG())
2803	return SDValue ();
2804
2805	EVT VT0 = Cond.getValueType();
2806	if (isMImm(V: True)) {
2807	// VE's condition move can handle MImm in True clause, so nothing to do.
2808	} else if (isMImm(V: False)) {
2809	// VE's condition move can handle MImm in True clause, so swap True and
2810	// False clauses if False has MImm value. And, update condition code.
2811	std::swap(a&: True, b&: False);
2812	CC = getSetCCInverse(Operation: CC, Type: VT0);
2813	}
2814
2815	SDLoc DL(N);
2816	SelectionDAG &DAG = DCI.DAG;
2817	VECC::CondCode VECCVal;
2818	if (VT0.isFloatingPoint()) {
2819	VECCVal = fpCondCode2Fcc(CC);
2820	} else {
2821	VECCVal = intCondCode2Icc(CC);
2822	}
2823	SDValue Ops[] = {Cond, True, False,
2824	DAG.getConstant(Val: VECCVal, DL, VT: MVT::i32)};
2825	return DAG.getNode(Opcode: VEISD::CMOV, DL, VT, Ops);
2826	}
2827
2828	SDValue VETargetLowering::combineSelectCC(SDNode *N,
2829	DAGCombinerInfo &DCI) const {
2830	assert(N->getOpcode() == ISD::SELECT_CC &&
2831	"Should be called with a SELECT_CC node");
2832	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `4`))->get();
2833	SDValue LHS = N->getOperand(Num: `0`);
2834	SDValue RHS = N->getOperand(Num: `1`);
2835	SDValue True = N->getOperand(Num: `2`);
2836	SDValue False = N->getOperand(Num: `3`);
2837
2838	// We handle only scalar SELECT_CC.
2839	EVT VT = N->getValueType(ResNo: `0`);
2840	if (VT.isVector())
2841	return SDValue ();
2842
2843	// Peform combineSelectCC after leagalize DAG.
2844	if (!DCI.isAfterLegalizeDAG())
2845	return SDValue ();
2846
2847	// We handle only i32/i64/f32/f64/f128 comparisons.
2848	EVT LHSVT = LHS.getValueType();
2849	assert(LHSVT == RHS.getValueType());
2850	switch (LHSVT.getSimpleVT().SimpleTy) {
2851	case MVT::i32:
2852	case MVT::i64:
2853	case MVT::f32:
2854	case MVT::f64:
2855	case MVT::f128:
2856	break;
2857	default:
2858	// Return SDValue to let llvm handle other types.
2859	return SDValue ();
2860	}
2861
2862	if (isMImm(V: RHS)) {
2863	// VE's comparison can handle MImm in RHS, so nothing to do.
2864	} else if (isSimm7(V: RHS)) {
2865	// VE's comparison can handle Simm7 in LHS, so swap LHS and RHS, and
2866	// update condition code.
2867	std::swap(a&: LHS, b&: RHS);
2868	CC = getSetCCSwappedOperands(Operation: CC);
2869	}
2870	if (isMImm(V: True)) {
2871	// VE's condition move can handle MImm in True clause, so nothing to do.
2872	} else if (isMImm(V: False)) {
2873	// VE's condition move can handle MImm in True clause, so swap True and
2874	// False clauses if False has MImm value. And, update condition code.
2875	std::swap(a&: True, b&: False);
2876	CC = getSetCCInverse(Operation: CC, Type: LHSVT);
2877	}
2878
2879	SDLoc DL(N);
2880	SelectionDAG &DAG = DCI.DAG;
2881
2882	bool WithCMov = true;
2883	SDValue CompNode = generateComparison(VT: LHSVT, LHS, RHS, CC, WithCMov, DL, DAG);
2884
2885	VECC::CondCode VECCVal;
2886	if (LHSVT.isFloatingPoint()) {
2887	VECCVal = fpCondCode2Fcc(CC);
2888	} else {
2889	VECCVal = intCondCode2Icc(CC);
2890	}
2891	SDValue Ops[] = {CompNode, True, False,
2892	DAG.getConstant(Val: VECCVal, DL, VT: MVT::i32)};
2893	return DAG.getNode(Opcode: VEISD::CMOV, DL, VT, Ops);
2894	}
2895
2896	static bool isI32InsnAllUses(const SDNode User, const* SDNode *N);
2897	static bool isI32Insn(const SDNode User, const* SDNode *N) {
2898	switch (User->getOpcode()) {
2899	default:
2900	return false;
2901	case ISD::ADD:
2902	case ISD::SUB:
2903	case ISD::MUL:
2904	case ISD::SDIV:
2905	case ISD::UDIV:
2906	case ISD::SETCC:
2907	case ISD::SMIN:
2908	case ISD::SMAX:
2909	case ISD::SHL:
2910	case ISD::SRA:
2911	case ISD::BSWAP:
2912	case ISD::SINT_TO_FP:
2913	case ISD::UINT_TO_FP:
2914	case ISD::BR_CC:
2915	case ISD::BITCAST:
2916	case ISD::ATOMIC_CMP_SWAP:
2917	case ISD::ATOMIC_SWAP:
2918	case VEISD::CMPU:
2919	case VEISD::CMPI:
2920	return true;
2921	case ISD::SRL:
2922	if (N->getOperand(Num: `0`).getOpcode() != ISD::SRL)
2923	return true;
2924	// (srl (trunc (srl ...))) may be optimized by combining srl, so
2925	// doesn't optimize trunc now.
2926	return false;
2927	case ISD::SELECT_CC:
2928	if (User->getOperand(Num: `2`).getNode() != N &&
2929	User->getOperand(Num: `3`).getNode() != N)
2930	return true;
2931	return isI32InsnAllUses(User, N);
2932	case VEISD::CMOV:
2933	// CMOV in (cmov (trunc ...), true, false, int-comparison) is safe.
2934	// However, trunc in true or false clauses is not safe.
2935	if (User->getOperand(Num: `1`).getNode() != N &&
2936	User->getOperand(Num: `2`).getNode() != N &&
2937	isa<ConstantSDNode>(Val: User->getOperand(Num: `3`))) {
2938	VECC::CondCode VECCVal =
2939	static_cast<VECC::CondCode>(User->getConstantOperandVal(Num: `3`));
2940	return isIntVECondCode(CC: VECCVal);
2941	}
2942	[[fallthrough]];
2943	case ISD::AND:
2944	case ISD::OR:
2945	case ISD::XOR:
2946	case ISD::SELECT:
2947	case ISD::CopyToReg:
2948	// Check all use of selections, bit operations, and copies. If all of them
2949	// are safe, optimize truncate to extract_subreg.
2950	return isI32InsnAllUses(User, N);
2951	}
2952	}
2953
2954	static bool isI32InsnAllUses(const SDNode User, const* SDNode *N) {
2955	// Check all use of User node. If all of them are safe, optimize
2956	// truncate to extract_subreg.
2957	for (const SDNode *U : User->uses()) {
2958	switch (U->getOpcode()) {
2959	default:
2960	// If the use is an instruction which treats the source operand as i32,
2961	// it is safe to avoid truncate here.
2962	if (isI32Insn(User: U, N))
2963	continue;
2964	break;
2965	case ISD::ANY_EXTEND:
2966	case ISD::SIGN_EXTEND:
2967	case ISD::ZERO_EXTEND: {
2968	// Special optimizations to the combination of ext and trunc.
2969	// (ext ... (select ... (trunc ...))) is safe to avoid truncate here
2970	// since this truncate instruction clears higher 32 bits which is filled
2971	// by one of ext instructions later.
2972	assert(N->getValueType(`0`) == MVT::i32 &&
2973	"find truncate to not i32 integer");
2974	if (User->getOpcode() == ISD::SELECT_CC \|\|
2975	User->getOpcode() == ISD::SELECT \|\| User->getOpcode() == VEISD::CMOV)
2976	continue;
2977	break;
2978	}
2979	}
2980	return false;
2981	}
2982	return true;
2983	}
2984
2985	// Optimize TRUNCATE in DAG combining. Optimizing it in CUSTOM lower is
2986	// sometime too early. Optimizing it in DAG pattern matching in VEInstrInfo.td
2987	// is sometime too late. So, doing it at here.
2988	SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
2989	DAGCombinerInfo &DCI) const {
2990	assert(N->getOpcode() == ISD::TRUNCATE &&
2991	"Should be called with a TRUNCATE node");
2992
2993	SelectionDAG &DAG = DCI.DAG;
2994	SDLoc DL(N);
2995	EVT VT = N->getValueType(ResNo: `0`);
2996
2997	// We prefer to do this when all types are legal.
2998	if (!DCI.isAfterLegalizeDAG())
2999	return SDValue ();
3000
3001	// Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
3002	if (N->getOperand(Num: `0`)->getOpcode() == ISD::SELECT_CC &&
3003	isa<ConstantSDNode>(Val: N->getOperand(Num: `0`)->getOperand(Num: `0`)) &&
3004	isa<ConstantSDNode>(Val: N->getOperand(Num: `0`)->getOperand(Num: `1`)))
3005	return SDValue ();
3006
3007	// Check all use of this TRUNCATE.
3008	for (const SDNode *User : N->uses()) {
3009	// Make sure that we're not going to replace TRUNCATE for non i32
3010	// instructions.
3011	//
3012	// FIXME: Although we could sometimes handle this, and it does occur in
3013	// practice that one of the condition inputs to the select is also one of
3014	// the outputs, we currently can't deal with this.
3015	if (isI32Insn(User, N))
3016	continue;
3017
3018	return SDValue ();
3019	}
3020
3021	SDValue SubI32 = DAG.getTargetConstant(Val: VE::sub_i32, DL, VT: MVT::i32);
3022	return SDValue (DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT,
3023	Op1: N->getOperand(Num: `0`), Op2: SubI32),
3024	`0`);
3025	}
3026
3027	SDValue VETargetLowering::PerformDAGCombine(SDNode *N,
3028	DAGCombinerInfo &DCI) const {
3029	switch (N->getOpcode()) {
3030	default:
3031	break;
3032	case ISD::SELECT:
3033	return combineSelect(N, DCI);
3034	case ISD::SELECT_CC:
3035	return combineSelectCC(N, DCI);
3036	case ISD::TRUNCATE:
3037	return combineTRUNCATE(N, DCI);
3038	}
3039
3040	return SDValue ();
3041	}
3042
3043	//===----------------------------------------------------------------------===//
3044	// VE Inline Assembly Support
3045	//===----------------------------------------------------------------------===//
3046
3047	VETargetLowering::ConstraintType
3048	VETargetLowering::getConstraintType(StringRef Constraint) const {
3049	if (Constraint.size() == `1`) {
3050	switch (Constraint [`0`]) {
3051	default:
3052	break;
3053	case `'v'`: // vector registers
3054	return C_RegisterClass;
3055	}
3056	}
3057	return TargetLowering::getConstraintType(Constraint);
3058	}
3059
3060	std::pair<unsigned, const TargetRegisterClass *>
3061	VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
3062	StringRef Constraint,
3063	MVT VT) const {
3064	const TargetRegisterClass RC = nullptr*;
3065	if (Constraint.size() == `1`) {
3066	switch (Constraint [`0`]) {
3067	default:
3068	return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3069	case `'r'`:
3070	RC = &VE::I64RegClass;
3071	break;
3072	case `'v'`:
3073	RC = &VE::V64RegClass;
3074	break;
3075	}
3076	return std::make_pair(x: `0U`, y&: RC);
3077	}
3078
3079	return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3080	}
3081
3082	//===----------------------------------------------------------------------===//
3083	// VE Target Optimization Support
3084	//===----------------------------------------------------------------------===//
3085
3086	unsigned VETargetLowering::getMinimumJumpTableEntries() const {
3087	// Specify 8 for PIC model to relieve the impact of PIC load instructions.
3088	if (isJumpTableRelative())
3089	return `8`;
3090
3091	return TargetLowering::getMinimumJumpTableEntries();
3092	}
3093
3094	bool VETargetLowering::hasAndNot(SDValue Y) const {
3095	EVT VT = Y.getValueType();
3096
3097	// VE doesn't have vector and not instruction.
3098	if (VT.isVector())
3099	return false;
3100
3101	// VE allows different immediate values for X and Y where ~X & Y.
3102	// Only simm7 works for X, and only mimm works for Y on VE. However, this
3103	// function is used to check whether an immediate value is OK for and-not
3104	// instruction as both X and Y. Generating additional instruction to
3105	// retrieve an immediate value is no good since the purpose of this
3106	// function is to convert a series of 3 instructions to another series of
3107	// 3 instructions with better parallelism. Therefore, we return false
3108	// for all immediate values now.
3109	// FIXME: Change hasAndNot function to have two operands to make it work
3110	// correctly with Aurora VE.
3111	if (isa<ConstantSDNode>(Val: Y))
3112	return false;
3113
3114	// It's ok for generic registers.
3115	return true;
3116	}
3117
3118	SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3119	SelectionDAG &DAG) const {
3120	assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
3121	MVT VT = Op.getOperand(i: `0`).getSimpleValueType();
3122
3123	// Special treatment for packed V64 types.
3124	assert(VT == MVT::v512i32 \|\| VT == MVT::v512f32);
3125	(void)VT;
3126	// Example of codes:
3127	// %packed_v = extractelt %vr, %idx / 2
3128	// %v = %packed_v >> (%idx % 2 32)*
3129	// %res = %v & 0xffffffff
3130
3131	SDValue Vec = Op.getOperand(i: `0`);
3132	SDValue Idx = Op.getOperand(i: `1`);
3133	SDLoc DL(Op);
3134	SDValue Result = Op;
3135	if (false / Idx->isConstant() /) {
3136	// TODO: optimized implementation using constant values
3137	} else {
3138	SDValue Const1 = DAG.getConstant(Val: `1`, DL, VT: MVT::i64);
3139	SDValue HalfIdx = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {Idx, Const1});
3140	SDValue PackedElt =
3141	SDValue (DAG.getMachineNode(Opcode: VE::LVSvr, dl: DL, VT: MVT::i64, Ops: {Vec, HalfIdx}), `0`);
3142	SDValue AndIdx = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {Idx, Const1});
3143	SDValue Shift = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i64, Ops: {AndIdx, Const1});
3144	SDValue Const5 = DAG.getConstant(Val: `5`, DL, VT: MVT::i64);
3145	Shift = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, Ops: {Shift, Const5});
3146	PackedElt = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {PackedElt, Shift});
3147	SDValue Mask = DAG.getConstant(Val: `0xFFFFFFFFL`, DL, VT: MVT::i64);
3148	PackedElt = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {PackedElt, Mask});
3149	SDValue SubI32 = DAG.getTargetConstant(Val: VE::sub_i32, DL, VT: MVT::i32);
3150	Result = SDValue (DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL,
3151	VT: MVT::i32, Op1: PackedElt, Op2: SubI32),
3152	`0`);
3153
3154	if (Op.getSimpleValueType() == MVT::f32) {
3155	Result = DAG.getBitcast(VT: MVT::f32, V: Result);
3156	} else {
3157	assert(Op.getSimpleValueType() == MVT::i32);
3158	}
3159	}
3160	return Result;
3161	}
3162
3163	SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3164	SelectionDAG &DAG) const {
3165	assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
3166	MVT VT = Op.getOperand(i: `0`).getSimpleValueType();
3167
3168	// Special treatment for packed V64 types.
3169	assert(VT == MVT::v512i32 \|\| VT == MVT::v512f32);
3170	(void)VT;
3171	// The v512i32 and v512f32 starts from upper bits (0..31). This "upper
3172	// bits" required `val << 32` from C implementation's point of view.
3173	//
3174	// Example of codes:
3175	// %packed_elt = extractelt %vr, (%idx >> 1)
3176	// %shift = ((%idx & 1) ^ 1) << 5
3177	// %packed_elt &= 0xffffffff00000000 >> shift
3178	// %packed_elt \|= (zext %val) << shift
3179	// %vr = insertelt %vr, %packed_elt, (%idx >> 1)
3180
3181	SDLoc DL(Op);
3182	SDValue Vec = Op.getOperand(i: `0`);
3183	SDValue Val = Op.getOperand(i: `1`);
3184	SDValue Idx = Op.getOperand(i: `2`);
3185	if (Idx.getSimpleValueType() == MVT::i32)
3186	Idx = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: Idx);
3187	if (Val.getSimpleValueType() == MVT::f32)
3188	Val = DAG.getBitcast(VT: MVT::i32, V: Val);
3189	assert(Val.getSimpleValueType() == MVT::i32);
3190	Val = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: Val);
3191
3192	SDValue Result = Op;
3193	if (false / Idx->isConstant()/) {
3194	// TODO: optimized implementation using constant values
3195	} else {
3196	SDValue Const1 = DAG.getConstant(Val: `1`, DL, VT: MVT::i64);
3197	SDValue HalfIdx = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {Idx, Const1});
3198	SDValue PackedElt =
3199	SDValue (DAG.getMachineNode(Opcode: VE::LVSvr, dl: DL, VT: MVT::i64, Ops: {Vec, HalfIdx}), `0`);
3200	SDValue AndIdx = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {Idx, Const1});
3201	SDValue Shift = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i64, Ops: {AndIdx, Const1});
3202	SDValue Const5 = DAG.getConstant(Val: `5`, DL, VT: MVT::i64);
3203	Shift = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, Ops: {Shift, Const5});
3204	SDValue Mask = DAG.getConstant(Val: `0xFFFFFFFF00000000L`, DL, VT: MVT::i64);
3205	Mask = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, Ops: {Mask, Shift});
3206	PackedElt = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i64, Ops: {PackedElt, Mask});
3207	Val = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i64, Ops: {Val, Shift});
3208	PackedElt = DAG.getNode(Opcode: ISD::OR, DL, VT: MVT::i64, Ops: {PackedElt, Val});
3209	Result =
3210	SDValue (DAG.getMachineNode(Opcode: VE::LSVrr_v, dl: DL, VT: Vec.getSimpleValueType(),
3211	Ops: {HalfIdx, PackedElt, Vec}),
3212	`0`);
3213	}
3214	return Result;
3215	}
3216

Browse the source code of llvm_projects/llvm/lib/Target/VE/VEISelLowering.cpp