R600ISelLowering.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp]

1	//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// Custom DAG lowering for R600
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "R600ISelLowering.h"
15	#include "AMDGPU.h"
16	#include "AMDGPUSelectionDAGInfo.h"
17	#include "MCTargetDesc/R600MCTargetDesc.h"
18	#include "R600Defines.h"
19	#include "R600MachineFunctionInfo.h"
20	#include "R600Subtarget.h"
21	#include "R600TargetMachine.h"
22	#include "llvm/CodeGen/MachineFunction.h"
23	#include "llvm/IR/IntrinsicsAMDGPU.h"
24	#include "llvm/IR/IntrinsicsR600.h"
25	#include "llvm/Passes/CodeGenPassBuilder.h"
26
27	using namespace llvm;
28
29	#include "R600GenCallingConv.inc"
30
31	R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
32	const R600Subtarget &STI)
33	: AMDGPUTargetLowering (TM, STI, STI), Subtarget(&STI),
34	Gen(STI.getGeneration()) {
35	addRegisterClass(VT: MVT::f32, RC: &R600::R600_Reg32RegClass);
36	addRegisterClass(VT: MVT::i32, RC: &R600::R600_Reg32RegClass);
37	addRegisterClass(VT: MVT::v2f32, RC: &R600::R600_Reg64RegClass);
38	addRegisterClass(VT: MVT::v2i32, RC: &R600::R600_Reg64RegClass);
39	addRegisterClass(VT: MVT::v4f32, RC: &R600::R600_Reg128RegClass);
40	addRegisterClass(VT: MVT::v4i32, RC: &R600::R600_Reg128RegClass);
41
42	setBooleanContents(ZeroOrNegativeOneBooleanContent);
43	setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
44
45	computeRegisterProperties(TRI: Subtarget->getRegisterInfo());
46
47	// Legalize loads and stores to the private address space.
48	setOperationAction(Ops: ISD::LOAD, VTs: {MVT::i32, MVT::v2i32, MVT::v4i32}, Action: Custom);
49
50	// EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
51	// spaces, so it is custom lowered to handle those where it isn't.
52	for (auto Op : {ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD})
53	for (MVT VT : MVT::integer_valuetypes()) {
54	setLoadExtAction(ExtType: Op, ValVT: VT, MemVT: MVT::i1, Action: Promote);
55	setLoadExtAction(ExtType: Op, ValVT: VT, MemVT: MVT::i8, Action: Custom);
56	setLoadExtAction(ExtType: Op, ValVT: VT, MemVT: MVT::i16, Action: Custom);
57	}
58
59	// Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
60	setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: MVT::v2i32,
61	MemVT: MVT::v2i1, Action: Expand);
62
63	setLoadExtAction(ExtTypes: {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT: MVT::v4i32,
64	MemVT: MVT::v4i1, Action: Expand);
65
66	setOperationAction(Ops: ISD::STORE, VTs: {MVT::i8, MVT::i32, MVT::v2i32, MVT::v4i32},
67	Action: Custom);
68
69	setTruncStoreAction(ValVT: MVT::i32, MemVT: MVT::i8, Action: Custom);
70	setTruncStoreAction(ValVT: MVT::i32, MemVT: MVT::i16, Action: Custom);
71	// We need to include these since trunc STORES to PRIVATE need
72	// special handling to accommodate RMW
73	setTruncStoreAction(ValVT: MVT::v2i32, MemVT: MVT::v2i16, Action: Custom);
74	setTruncStoreAction(ValVT: MVT::v4i32, MemVT: MVT::v4i16, Action: Custom);
75	setTruncStoreAction(ValVT: MVT::v8i32, MemVT: MVT::v8i16, Action: Custom);
76	setTruncStoreAction(ValVT: MVT::v16i32, MemVT: MVT::v16i16, Action: Custom);
77	setTruncStoreAction(ValVT: MVT::v32i32, MemVT: MVT::v32i16, Action: Custom);
78	setTruncStoreAction(ValVT: MVT::v2i32, MemVT: MVT::v2i8, Action: Custom);
79	setTruncStoreAction(ValVT: MVT::v4i32, MemVT: MVT::v4i8, Action: Custom);
80	setTruncStoreAction(ValVT: MVT::v8i32, MemVT: MVT::v8i8, Action: Custom);
81	setTruncStoreAction(ValVT: MVT::v16i32, MemVT: MVT::v16i8, Action: Custom);
82	setTruncStoreAction(ValVT: MVT::v32i32, MemVT: MVT::v32i8, Action: Custom);
83
84	// Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
85	setTruncStoreAction(ValVT: MVT::v2i32, MemVT: MVT::v2i1, Action: Expand);
86	setTruncStoreAction(ValVT: MVT::v4i32, MemVT: MVT::v4i1, Action: Expand);
87
88	// Set condition code actions
89	setCondCodeAction(CCs: {ISD::SETO, ISD::SETUO, ISD::SETLT, ISD::SETLE, ISD::SETOLT,
90	ISD::SETOLE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGE,
91	ISD::SETUGT, ISD::SETULT, ISD::SETULE},
92	VT: MVT::f32, Action: Expand);
93
94	setCondCodeAction(CCs: {ISD::SETLE, ISD::SETLT, ISD::SETULE, ISD::SETULT},
95	VT: MVT::i32, Action: Expand);
96
97	setOperationAction(Ops: {ISD::FCOS, ISD::FSIN}, VT: MVT::f32, Action: Custom);
98
99	setOperationAction(Ops: ISD::SETCC, VTs: {MVT::v4i32, MVT::v2i32}, Action: Expand);
100
101	setOperationAction(Ops: ISD::BR_CC, VTs: {MVT::i32, MVT::f32}, Action: Expand);
102	setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Custom);
103
104	setOperationAction(Op: ISD::FSUB, VT: MVT::f32, Action: Expand);
105
106	setOperationAction(Ops: ISD::IS_FPCLASS,
107	VTs: {MVT::f32, MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,
108	MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32},
109	Action: Expand);
110
111	setOperationAction(Ops: {ISD::FCEIL, ISD::FTRUNC, ISD::FROUNDEVEN, ISD::FFLOOR},
112	VT: MVT::f64, Action: Custom);
113
114	setOperationAction(Ops: ISD::SELECT_CC, VTs: {MVT::f32, MVT::i32}, Action: Custom);
115
116	setOperationAction(Ops: ISD::SETCC, VTs: {MVT::i32, MVT::f32}, Action: Expand);
117	setOperationAction(Ops: {ISD::FP_TO_UINT, ISD::FP_TO_SINT}, VTs: {MVT::i1, MVT::i64},
118	Action: Custom);
119
120	setOperationAction(Ops: ISD::SELECT, VTs: {MVT::i32, MVT::f32, MVT::v2i32, MVT::v4i32},
121	Action: Expand);
122
123	// ADD, SUB overflow.
124	// TODO: turn these into Legal?
125	if (Subtarget->hasCARRY())
126	setOperationAction(Op: ISD::UADDO, VT: MVT::i32, Action: Custom);
127
128	if (Subtarget->hasBORROW())
129	setOperationAction(Op: ISD::USUBO, VT: MVT::i32, Action: Custom);
130
131	// Expand sign extension of vectors
132	if (!Subtarget->hasBFE())
133	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
134
135	setOperationAction(Ops: ISD::SIGN_EXTEND_INREG, VTs: {MVT::v2i1, MVT::v4i1}, Action: Expand);
136
137	if (!Subtarget->hasBFE())
138	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i8, Action: Expand);
139	setOperationAction(Ops: ISD::SIGN_EXTEND_INREG, VTs: {MVT::v2i8, MVT::v4i8}, Action: Expand);
140
141	if (!Subtarget->hasBFE())
142	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i16, Action: Expand);
143	setOperationAction(Ops: ISD::SIGN_EXTEND_INREG, VTs: {MVT::v2i16, MVT::v4i16}, Action: Expand);
144
145	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i32, Action: Legal);
146	setOperationAction(Ops: ISD::SIGN_EXTEND_INREG, VTs: {MVT::v2i32, MVT::v4i32}, Action: Expand);
147
148	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::Other, Action: Expand);
149
150	setOperationAction(Op: ISD::FrameIndex, VT: MVT::i32, Action: Custom);
151
152	setOperationAction(Ops: ISD::EXTRACT_VECTOR_ELT,
153	VTs: {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Action: Custom);
154
155	setOperationAction(Ops: ISD::INSERT_VECTOR_ELT,
156	VTs: {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Action: Custom);
157
158	// We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
159	// to be Legal/Custom in order to avoid library calls.
160	setOperationAction(Ops: {ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, VT: MVT::i32,
161	Action: Custom);
162
163	if (!Subtarget->hasFMA())
164	setOperationAction(Ops: ISD::FMA, VTs: {MVT::f32, MVT::f64}, Action: Expand);
165
166	// FIXME: May need no denormals check
167	setOperationAction(Op: ISD::FMAD, VT: MVT::f32, Action: Legal);
168
169	if (!Subtarget->hasBFI())
170	// fcopysign can be done in a single instruction with BFI.
171	setOperationAction(Ops: ISD::FCOPYSIGN, VTs: {MVT::f32, MVT::f64}, Action: Expand);
172
173	if (!Subtarget->hasBCNT(Size: `32`))
174	setOperationAction(Op: ISD::CTPOP, VT: MVT::i32, Action: Expand);
175
176	if (!Subtarget->hasBCNT(Size: `64`))
177	setOperationAction(Op: ISD::CTPOP, VT: MVT::i64, Action: Expand);
178
179	if (Subtarget->hasFFBH())
180	setOperationAction(Op: ISD::CTLZ_ZERO_UNDEF, VT: MVT::i32, Action: Custom);
181
182	if (Subtarget->hasFFBL())
183	setOperationAction(Op: ISD::CTTZ_ZERO_UNDEF, VT: MVT::i32, Action: Custom);
184
185	// FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
186	// need it for R600.
187	if (Subtarget->hasBFE())
188	setHasExtractBitsInsn(true);
189
190	setOperationAction(Op: ISD::GlobalAddress, VT: MVT::i32, Action: Custom);
191	setOperationAction(Op: ISD::ADDRSPACECAST, VT: MVT::i32, Action: Custom);
192
193	const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
194	for (MVT VT : ScalarIntVTs)
195	setOperationAction(Ops: {ISD::ADDC, ISD::SUBC, ISD::ADDE, ISD::SUBE}, VT,
196	Action: Expand);
197
198	// LLVM will expand these to atomic_cmp_swap(0)
199	// and atomic_swap, respectively.
200	setOperationAction(Ops: {ISD::ATOMIC_LOAD, ISD::ATOMIC_STORE}, VT: MVT::i32, Action: Expand);
201
202	// We need to custom lower some of the intrinsics
203	setOperationAction(Ops: {ISD::INTRINSIC_VOID, ISD::INTRINSIC_WO_CHAIN}, VT: MVT::Other,
204	Action: Custom);
205
206	setSchedulingPreference(Sched::Source);
207
208	setTargetDAGCombine({ISD::FP_ROUND, ISD::FP_TO_SINT, ISD::EXTRACT_VECTOR_ELT,
209	ISD::SELECT_CC, ISD::INSERT_VECTOR_ELT, ISD::LOAD});
210	}
211
212	static inline bool isEOP(MachineBasicBlock::iterator I) {
213	if (std::next(x: I) == I ->getParent()->end())
214	return false;
215	return std::next(x: I)->getOpcode() == R600::RETURN;
216	}
217
218	MachineBasicBlock *
219	R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
220	MachineBasicBlock BB) const* {
221	MachineFunction *MF = BB->getParent();
222	MachineRegisterInfo &MRI = MF->getRegInfo();
223	MachineBasicBlock::iterator I = MI;
224	const R600InstrInfo *TII = Subtarget->getInstrInfo();
225
226	switch (MI.getOpcode()) {
227	default:
228	// Replace LDS__RET instruction that don't have any uses with the*
229	// equivalent LDS__NORET instruction.*
230	if (TII->isLDSRetInstr(Opcode: MI.getOpcode())) {
231	int DstIdx = TII->getOperandIdx(Opcode: MI.getOpcode(), Op: R600::OpName::dst);
232	assert(DstIdx != -`1`);
233	MachineInstrBuilder NewMI;
234	// FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
235	// LDS_1A2D support and remove this special case.
236	if (!MRI.use_empty(RegNo: MI.getOperand(i: DstIdx).getReg()) \|\|
237	MI.getOpcode() == R600::LDS_CMPST_RET)
238	return BB;
239
240	NewMI = BuildMI(BB&: *BB, I, MIMD: BB->findDebugLoc(MBBI: I),
241	MCID: TII->get(Opcode: R600::getLDSNoRetOp(Opcode: MI.getOpcode())));
242	for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands()))
243	NewMI.add(MO);
244	} else {
245	return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, MBB: BB);
246	}
247	break;
248
249	case R600::FABS_R600: {
250	MachineInstr *NewMI = TII->buildDefaultInstruction(
251	MBB&: *BB, I, Opcode: R600::MOV, DstReg: MI.getOperand(i: `0`).getReg(),
252	Src0Reg: MI.getOperand(i: `1`).getReg());
253	TII->addFlag(MI&: *NewMI, SrcIdx: `0`, MO_FLAG_ABS);
254	break;
255	}
256
257	case R600::FNEG_R600: {
258	MachineInstr *NewMI = TII->buildDefaultInstruction(
259	MBB&: *BB, I, Opcode: R600::MOV, DstReg: MI.getOperand(i: `0`).getReg(),
260	Src0Reg: MI.getOperand(i: `1`).getReg());
261	TII->addFlag(MI&: *NewMI, SrcIdx: `0`, MO_FLAG_NEG);
262	break;
263	}
264
265	case R600::MASK_WRITE: {
266	Register maskedRegister = MI.getOperand(i: `0`).getReg();
267	assert(maskedRegister.isVirtual());
268	MachineInstr * defInstr = MRI.getVRegDef(Reg: maskedRegister);
269	TII->addFlag(MI&: *defInstr, SrcIdx: `0`, MO_FLAG_MASK);
270	break;
271	}
272
273	case R600::MOV_IMM_F32:
274	TII->buildMovImm(BB&: *BB, I, DstReg: MI.getOperand(i: `0`).getReg(), Imm: MI.getOperand(i: `1`)
275	.getFPImm()
276	->getValueAPF()
277	.bitcastToAPInt()
278	.getZExtValue());
279	break;
280
281	case R600::MOV_IMM_I32:
282	TII->buildMovImm(BB&: *BB, I, DstReg: MI.getOperand(i: `0`).getReg(),
283	Imm: MI.getOperand(i: `1`).getImm());
284	break;
285
286	case R600::MOV_IMM_GLOBAL_ADDR: {
287	//TODO: Perhaps combine this instruction with the next if possible
288	auto MIB = TII->buildDefaultInstruction(
289	MBB&: *BB, I: MI, Opcode: R600::MOV, DstReg: MI.getOperand(i: `0`).getReg(), Src0Reg: R600::ALU_LITERAL_X);
290	int Idx = TII->getOperandIdx(MI: *MIB, Op: R600::OpName::literal);
291	//TODO: Ugh this is rather ugly
292	const MachineOperand &MO = MI.getOperand(i: `1`);
293	MIB ->getOperand(i: Idx).ChangeToGA(GV: MO.getGlobal(), Offset: MO.getOffset(),
294	TargetFlags: MO.getTargetFlags());
295	break;
296	}
297
298	case R600::CONST_COPY: {
299	MachineInstr *NewMI = TII->buildDefaultInstruction(
300	MBB&: *BB, I: MI, Opcode: R600::MOV, DstReg: MI.getOperand(i: `0`).getReg(), Src0Reg: R600::ALU_CONST);
301	TII->setImmOperand(MI&: *NewMI, Op: R600::OpName::src0_sel,
302	Imm: MI.getOperand(i: `1`).getImm());
303	break;
304	}
305
306	case R600::RAT_WRITE_CACHELESS_32_eg:
307	case R600::RAT_WRITE_CACHELESS_64_eg:
308	case R600::RAT_WRITE_CACHELESS_128_eg:
309	BuildMI(BB&: *BB, I, MIMD: BB->findDebugLoc(MBBI: I), MCID: TII->get(Opcode: MI.getOpcode()))
310	.add(MO: MI.getOperand(i: `0`))
311	.add(MO: MI.getOperand(i: `1`))
312	.addImm(Val: isEOP(I)); // Set End of program bit
313	break;
314
315	case R600::RAT_STORE_TYPED_eg:
316	BuildMI(BB&: *BB, I, MIMD: BB->findDebugLoc(MBBI: I), MCID: TII->get(Opcode: MI.getOpcode()))
317	.add(MO: MI.getOperand(i: `0`))
318	.add(MO: MI.getOperand(i: `1`))
319	.add(MO: MI.getOperand(i: `2`))
320	.addImm(Val: isEOP(I)); // Set End of program bit
321	break;
322
323	case R600::BRANCH:
324	BuildMI(BB&: *BB, I, MIMD: BB->findDebugLoc(MBBI: I), MCID: TII->get(Opcode: R600::JUMP))
325	.add(MO: MI.getOperand(i: `0`));
326	break;
327
328	case R600::BRANCH_COND_f32: {
329	MachineInstr *NewMI =
330	BuildMI(BB&: *BB, I, MIMD: BB->findDebugLoc(MBBI: I), MCID: TII->get(Opcode: R600::PRED_X),
331	DestReg: R600::PREDICATE_BIT)
332	.add(MO: MI.getOperand(i: `1`))
333	.addImm(Val: R600::PRED_SETNE)
334	.addImm(Val: `0`); // Flags
335	TII->addFlag(MI&: *NewMI, SrcIdx: `0`, MO_FLAG_PUSH);
336	BuildMI(BB&: *BB, I, MIMD: BB->findDebugLoc(MBBI: I), MCID: TII->get(Opcode: R600::JUMP_COND))
337	.add(MO: MI.getOperand(i: `0`))
338	.addReg(RegNo: R600::PREDICATE_BIT, Flags: RegState::Kill);
339	break;
340	}
341
342	case R600::BRANCH_COND_i32: {
343	MachineInstr *NewMI =
344	BuildMI(BB&: *BB, I, MIMD: BB->findDebugLoc(MBBI: I), MCID: TII->get(Opcode: R600::PRED_X),
345	DestReg: R600::PREDICATE_BIT)
346	.add(MO: MI.getOperand(i: `1`))
347	.addImm(Val: R600::PRED_SETNE_INT)
348	.addImm(Val: `0`); // Flags
349	TII->addFlag(MI&: *NewMI, SrcIdx: `0`, MO_FLAG_PUSH);
350	BuildMI(BB&: *BB, I, MIMD: BB->findDebugLoc(MBBI: I), MCID: TII->get(Opcode: R600::JUMP_COND))
351	.add(MO: MI.getOperand(i: `0`))
352	.addReg(RegNo: R600::PREDICATE_BIT, Flags: RegState::Kill);
353	break;
354	}
355
356	case R600::EG_ExportSwz:
357	case R600::R600_ExportSwz: {
358	// Instruction is left unmodified if its not the last one of its type
359	bool isLastInstructionOfItsType = true;
360	unsigned InstExportType = MI.getOperand(i: `1`).getImm();
361	for (MachineBasicBlock::iterator NextExportInst = std::next(x: I),
362	EndBlock = BB->end(); NextExportInst != EndBlock;
363	NextExportInst = std::next(x: NextExportInst)) {
364	if (NextExportInst ->getOpcode() == R600::EG_ExportSwz \|\|
365	NextExportInst ->getOpcode() == R600::R600_ExportSwz) {
366	unsigned CurrentInstExportType = NextExportInst ->getOperand(i: `1`)
367	.getImm();
368	if (CurrentInstExportType == InstExportType) {
369	isLastInstructionOfItsType = false;
370	break;
371	}
372	}
373	}
374	bool EOP = isEOP(I);
375	if (!EOP && !isLastInstructionOfItsType)
376	return BB;
377	unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? `84` : `40`;
378	BuildMI(BB&: *BB, I, MIMD: BB->findDebugLoc(MBBI: I), MCID: TII->get(Opcode: MI.getOpcode()))
379	.add(MO: MI.getOperand(i: `0`))
380	.add(MO: MI.getOperand(i: `1`))
381	.add(MO: MI.getOperand(i: `2`))
382	.add(MO: MI.getOperand(i: `3`))
383	.add(MO: MI.getOperand(i: `4`))
384	.add(MO: MI.getOperand(i: `5`))
385	.add(MO: MI.getOperand(i: `6`))
386	.addImm(Val: CfInst)
387	.addImm(Val: EOP);
388	break;
389	}
390	case R600::RETURN: {
391	return BB;
392	}
393	}
394
395	MI.eraseFromParent();
396	return BB;
397	}
398
399	//===----------------------------------------------------------------------===//
400	// Custom DAG Lowering Operations
401	//===----------------------------------------------------------------------===//
402
403	SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
404	MachineFunction &MF = DAG.getMachineFunction();
405	R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
406	switch (Op.getOpcode()) {
407	default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
408	case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
409	case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
410	case ISD::SHL_PARTS:
411	case ISD::SRA_PARTS:
412	case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
413	case ISD::UADDO: return LowerUADDSUBO(Op, DAG, mainop: ISD::ADD, ovf: AMDGPUISD::CARRY);
414	case ISD::USUBO: return LowerUADDSUBO(Op, DAG, mainop: ISD::SUB, ovf: AMDGPUISD::BORROW);
415	case ISD::FCOS:
416	case ISD::FSIN: return LowerTrig(Op, DAG);
417	case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
418	case ISD::STORE: return LowerSTORE(Op, DAG);
419	case ISD::LOAD: {
420	SDValue Result = LowerLOAD(Op, DAG);
421	assert((!Result.getNode() \|\|
422	Result.getNode()->getNumValues() == `2`) &&
423	"Load should return a value and a chain");
424	return Result;
425	}
426
427	case ISD::BRCOND: return LowerBRCOND(Op, DAG);
428	case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
429	case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
430	case ISD::ADDRSPACECAST:
431	return lowerADDRSPACECAST(Op, DAG);
432	case ISD::INTRINSIC_VOID: {
433	SDValue Chain = Op.getOperand(i: `0`);
434	unsigned IntrinsicID = Op.getConstantOperandVal(i: `1`);
435	switch (IntrinsicID) {
436	case Intrinsic::r600_store_swizzle: {
437	SDLoc DL(Op);
438	const SDValue Args[`8`] = {
439	Chain,
440	Op.getOperand(i: `2`), // Export Value
441	Op.getOperand(i: `3`), // ArrayBase
442	Op.getOperand(i: `4`), // Type
443	DAG.getConstant(Val: `0`, DL, VT: MVT::i32), // SWZ_X
444	DAG.getConstant(Val: `1`, DL, VT: MVT::i32), // SWZ_Y
445	DAG.getConstant(Val: `2`, DL, VT: MVT::i32), // SWZ_Z
446	DAG.getConstant(Val: `3`, DL, VT: MVT::i32) // SWZ_W
447	};
448	return DAG.getNode(Opcode: AMDGPUISD::R600_EXPORT, DL, VT: Op.getValueType(), Ops: Args);
449	}
450
451	// default for switch(IntrinsicID)
452	default: break;
453	}
454	// break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
455	break;
456	}
457	case ISD::INTRINSIC_WO_CHAIN: {
458	unsigned IntrinsicID = Op.getConstantOperandVal(i: `0`);
459	EVT VT = Op.getValueType();
460	SDLoc DL(Op);
461	switch (IntrinsicID) {
462	case Intrinsic::r600_tex:
463	case Intrinsic::r600_texc: {
464	unsigned TextureOp;
465	switch (IntrinsicID) {
466	case Intrinsic::r600_tex:
467	TextureOp = `0`;
468	break;
469	case Intrinsic::r600_texc:
470	TextureOp = `1`;
471	break;
472	default:
473	llvm_unreachable("unhandled texture operation");
474	}
475
476	SDValue TexArgs[`19`] = {
477	DAG.getConstant(Val: TextureOp, DL, VT: MVT::i32),
478	Op.getOperand(i: `1`),
479	DAG.getConstant(Val: `0`, DL, VT: MVT::i32),
480	DAG.getConstant(Val: `1`, DL, VT: MVT::i32),
481	DAG.getConstant(Val: `2`, DL, VT: MVT::i32),
482	DAG.getConstant(Val: `3`, DL, VT: MVT::i32),
483	Op.getOperand(i: `2`),
484	Op.getOperand(i: `3`),
485	Op.getOperand(i: `4`),
486	DAG.getConstant(Val: `0`, DL, VT: MVT::i32),
487	DAG.getConstant(Val: `1`, DL, VT: MVT::i32),
488	DAG.getConstant(Val: `2`, DL, VT: MVT::i32),
489	DAG.getConstant(Val: `3`, DL, VT: MVT::i32),
490	Op.getOperand(i: `5`),
491	Op.getOperand(i: `6`),
492	Op.getOperand(i: `7`),
493	Op.getOperand(i: `8`),
494	Op.getOperand(i: `9`),
495	Op.getOperand(i: `10`)
496	};
497	return DAG.getNode(Opcode: AMDGPUISD::TEXTURE_FETCH, DL, VT: MVT::v4f32, Ops: TexArgs);
498	}
499	case Intrinsic::r600_dot4: {
500	SDValue Args[`8`] = {
501	DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: MVT::f32, N1: Op.getOperand(i: `1`),
502	N2: DAG.getConstant(Val: `0`, DL, VT: MVT::i32)),
503	DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: MVT::f32, N1: Op.getOperand(i: `2`),
504	N2: DAG.getConstant(Val: `0`, DL, VT: MVT::i32)),
505	DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: MVT::f32, N1: Op.getOperand(i: `1`),
506	N2: DAG.getConstant(Val: `1`, DL, VT: MVT::i32)),
507	DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: MVT::f32, N1: Op.getOperand(i: `2`),
508	N2: DAG.getConstant(Val: `1`, DL, VT: MVT::i32)),
509	DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: MVT::f32, N1: Op.getOperand(i: `1`),
510	N2: DAG.getConstant(Val: `2`, DL, VT: MVT::i32)),
511	DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: MVT::f32, N1: Op.getOperand(i: `2`),
512	N2: DAG.getConstant(Val: `2`, DL, VT: MVT::i32)),
513	DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: MVT::f32, N1: Op.getOperand(i: `1`),
514	N2: DAG.getConstant(Val: `3`, DL, VT: MVT::i32)),
515	DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: MVT::f32, N1: Op.getOperand(i: `2`),
516	N2: DAG.getConstant(Val: `3`, DL, VT: MVT::i32))
517	};
518	return DAG.getNode(Opcode: AMDGPUISD::DOT4, DL, VT: MVT::f32, Ops: Args);
519	}
520
521	case Intrinsic::r600_implicitarg_ptr: {
522	MVT PtrVT = getPointerTy(DL: DAG.getDataLayout(), AS: AMDGPUAS::PARAM_I_ADDRESS);
523	uint32_t ByteOffset = getImplicitParameterOffset(MF, Param: FIRST_IMPLICIT);
524	return DAG.getConstant(Val: ByteOffset, DL, VT: PtrVT);
525	}
526	case Intrinsic::r600_read_ngroups_x:
527	return LowerImplicitParameter(DAG, VT, DL, DwordOffset: `0`);
528	case Intrinsic::r600_read_ngroups_y:
529	return LowerImplicitParameter(DAG, VT, DL, DwordOffset: `1`);
530	case Intrinsic::r600_read_ngroups_z:
531	return LowerImplicitParameter(DAG, VT, DL, DwordOffset: `2`);
532	case Intrinsic::r600_read_global_size_x:
533	return LowerImplicitParameter(DAG, VT, DL, DwordOffset: `3`);
534	case Intrinsic::r600_read_global_size_y:
535	return LowerImplicitParameter(DAG, VT, DL, DwordOffset: `4`);
536	case Intrinsic::r600_read_global_size_z:
537	return LowerImplicitParameter(DAG, VT, DL, DwordOffset: `5`);
538	case Intrinsic::r600_read_local_size_x:
539	return LowerImplicitParameter(DAG, VT, DL, DwordOffset: `6`);
540	case Intrinsic::r600_read_local_size_y:
541	return LowerImplicitParameter(DAG, VT, DL, DwordOffset: `7`);
542	case Intrinsic::r600_read_local_size_z:
543	return LowerImplicitParameter(DAG, VT, DL, DwordOffset: `8`);
544
545	case Intrinsic::r600_read_tgid_x:
546	case Intrinsic::amdgcn_workgroup_id_x:
547	return CreateLiveInRegisterRaw(DAG, RC: &R600::R600_TReg32RegClass,
548	Reg: R600::T1_X, VT);
549	case Intrinsic::r600_read_tgid_y:
550	case Intrinsic::amdgcn_workgroup_id_y:
551	return CreateLiveInRegisterRaw(DAG, RC: &R600::R600_TReg32RegClass,
552	Reg: R600::T1_Y, VT);
553	case Intrinsic::r600_read_tgid_z:
554	case Intrinsic::amdgcn_workgroup_id_z:
555	return CreateLiveInRegisterRaw(DAG, RC: &R600::R600_TReg32RegClass,
556	Reg: R600::T1_Z, VT);
557	case Intrinsic::r600_read_tidig_x:
558	case Intrinsic::amdgcn_workitem_id_x:
559	return CreateLiveInRegisterRaw(DAG, RC: &R600::R600_TReg32RegClass,
560	Reg: R600::T0_X, VT);
561	case Intrinsic::r600_read_tidig_y:
562	case Intrinsic::amdgcn_workitem_id_y:
563	return CreateLiveInRegisterRaw(DAG, RC: &R600::R600_TReg32RegClass,
564	Reg: R600::T0_Y, VT);
565	case Intrinsic::r600_read_tidig_z:
566	case Intrinsic::amdgcn_workitem_id_z:
567	return CreateLiveInRegisterRaw(DAG, RC: &R600::R600_TReg32RegClass,
568	Reg: R600::T0_Z, VT);
569
570	case Intrinsic::r600_recipsqrt_ieee:
571	return DAG.getNode(Opcode: AMDGPUISD::RSQ, DL, VT, Operand: Op.getOperand(i: `1`));
572
573	case Intrinsic::r600_recipsqrt_clamped:
574	return DAG.getNode(Opcode: AMDGPUISD::RSQ_CLAMP, DL, VT, Operand: Op.getOperand(i: `1`));
575	default:
576	return Op;
577	}
578
579	// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
580	break;
581	}
582	} // end switch(Op.getOpcode())
583	return SDValue ();
584	}
585
586	void R600TargetLowering::ReplaceNodeResults(SDNode *N,
587	SmallVectorImpl<SDValue> &Results,
588	SelectionDAG &DAG) const {
589	switch (N->getOpcode()) {
590	default:
591	AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
592	return;
593	case ISD::FP_TO_UINT:
594	if (N->getValueType(ResNo: `0`) == MVT::i1) {
595	Results.push_back(Elt: lowerFP_TO_UINT(Op: N->getOperand(Num: `0`), DAG));
596	return;
597	}
598	// Since we don't care about out of bounds values we can use FP_TO_SINT for
599	// uints too. The DAGLegalizer code for uint considers some extra cases
600	// which are not necessary here.
601	[[fallthrough]];
602	case ISD::FP_TO_SINT: {
603	if (N->getValueType(ResNo: `0`) == MVT::i1) {
604	Results.push_back(Elt: lowerFP_TO_SINT(Op: N->getOperand(Num: `0`), DAG));
605	return;
606	}
607
608	SDValue Result;
609	if (expandFP_TO_SINT(N, Result, DAG))
610	Results.push_back(Elt: Result);
611	return;
612	}
613	case ISD::SDIVREM: {
614	SDValue Op = SDValue (N, `1`);
615	SDValue RES = LowerSDIVREM(Op, DAG);
616	Results.push_back(Elt: RES);
617	Results.push_back(Elt: RES.getValue(R: `1`));
618	break;
619	}
620	case ISD::UDIVREM: {
621	SDValue Op = SDValue (N, `0`);
622	LowerUDIVREM64(Op, DAG, Results);
623	break;
624	}
625	}
626	}
627
628	SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
629	SDValue Vector) const {
630	SDLoc DL(Vector);
631	EVT VecVT = Vector.getValueType();
632	EVT EltVT = VecVT.getVectorElementType();
633	SmallVector<SDValue, `8`> Args;
634
635	for (unsigned i = `0`, e = VecVT.getVectorNumElements(); i != e; ++i) {
636	Args.push_back(Elt: DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltVT, N1: Vector,
637	N2: DAG.getVectorIdxConstant(Val: i, DL)));
638	}
639
640	return DAG.getNode(Opcode: AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VT: VecVT, Ops: Args);
641	}
642
643	SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
644	SelectionDAG &DAG) const {
645	SDLoc DL(Op);
646	SDValue Vector = Op.getOperand(i: `0`);
647	SDValue Index = Op.getOperand(i: `1`);
648
649	if (isa<ConstantSDNode>(Val: Index) \|\|
650	Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
651	return Op;
652
653	Vector = vectorToVerticalVector(DAG, Vector);
654	return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: Op.getValueType(),
655	N1: Vector, N2: Index);
656	}
657
658	SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
659	SelectionDAG &DAG) const {
660	SDLoc DL(Op);
661	SDValue Vector = Op.getOperand(i: `0`);
662	SDValue Value = Op.getOperand(i: `1`);
663	SDValue Index = Op.getOperand(i: `2`);
664
665	if (isa<ConstantSDNode>(Val: Index) \|\|
666	Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
667	return Op;
668
669	Vector = vectorToVerticalVector(DAG, Vector);
670	SDValue Insert = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: Op.getValueType(),
671	N1: Vector, N2: Value, N3: Index);
672	return vectorToVerticalVector(DAG, Vector: Insert);
673	}
674
675	SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
676	SDValue Op,
677	SelectionDAG &DAG) const {
678	GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Val&: Op);
679	if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
680	return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
681
682	const DataLayout &DL = DAG.getDataLayout();
683	const GlobalValue *GV = GSD->getGlobal();
684	MVT ConstPtrVT = getPointerTy(DL, AS: AMDGPUAS::CONSTANT_ADDRESS);
685
686	SDValue GA = DAG.getTargetGlobalAddress(GV, DL: SDLoc (GSD), VT: ConstPtrVT);
687	return DAG.getNode(Opcode: AMDGPUISD::CONST_DATA_PTR, DL: SDLoc (GSD), VT: ConstPtrVT, Operand: GA);
688	}
689
690	SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
691	// On hw >= R700, COS/SIN input must be between -1. and 1.
692	// Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
693	EVT VT = Op.getValueType();
694	SDValue Arg = Op.getOperand(i: `0`);
695	SDLoc DL(Op);
696
697	// TODO: Should this propagate fast-math-flags?
698	SDValue FractPart = DAG.getNode(Opcode: AMDGPUISD::FRACT, DL, VT,
699	Operand: DAG.getNode(Opcode: ISD::FADD, DL, VT,
700	N1: DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: Arg,
701	N2: DAG.getConstantFP(Val: `0.15915494309`, DL, VT: MVT::f32)),
702	N2: DAG.getConstantFP(Val: `0.5`, DL, VT: MVT::f32)));
703	unsigned TrigNode;
704	switch (Op.getOpcode()) {
705	case ISD::FCOS:
706	TrigNode = AMDGPUISD::COS_HW;
707	break;
708	case ISD::FSIN:
709	TrigNode = AMDGPUISD::SIN_HW;
710	break;
711	default:
712	llvm_unreachable("Wrong trig opcode");
713	}
714	SDValue TrigVal = DAG.getNode(Opcode: TrigNode, DL, VT,
715	Operand: DAG.getNode(Opcode: ISD::FADD, DL, VT, N1: FractPart,
716	N2: DAG.getConstantFP(Val: -`0.5`, DL, VT: MVT::f32)));
717	if (Gen >= AMDGPUSubtarget::R700)
718	return TrigVal;
719	// On R600 hw, COS/SIN input must be between -Pi and Pi.
720	return DAG.getNode(Opcode: ISD::FMUL, DL, VT, N1: TrigVal,
721	N2: DAG.getConstantFP(Val: numbers::pif, DL, VT: MVT::f32));
722	}
723
724	SDValue R600TargetLowering::LowerShiftParts(SDValue Op,
725	SelectionDAG &DAG) const {
726	SDValue Lo, Hi;
727	expandShiftParts(N: Op.getNode(), Lo, Hi, DAG);
728	return DAG.getMergeValues(Ops: {Lo, Hi}, dl: SDLoc (Op));
729	}
730
731	SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
732	unsigned mainop, unsigned ovf) const {
733	SDLoc DL(Op);
734	EVT VT = Op.getValueType();
735
736	SDValue Lo = Op.getOperand(i: `0`);
737	SDValue Hi = Op.getOperand(i: `1`);
738
739	SDValue OVF = DAG.getNode(Opcode: ovf, DL, VT, N1: Lo, N2: Hi);
740	// Extend sign.
741	OVF = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT, N1: OVF,
742	N2: DAG.getValueType(MVT::i1));
743
744	SDValue Res = DAG.getNode(Opcode: mainop, DL, VT, N1: Lo, N2: Hi);
745
746	return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: Res, N2: OVF);
747	}
748
749	SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
750	SDLoc DL(Op);
751	return DAG.getNode(
752	Opcode: ISD::SETCC,
753	DL,
754	VT: MVT::i1,
755	N1: Op, N2: DAG.getConstantFP(Val: `1.0f`, DL, VT: MVT::f32),
756	N3: DAG.getCondCode(Cond: ISD::SETEQ));
757	}
758
759	SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
760	SDLoc DL(Op);
761	return DAG.getNode(
762	Opcode: ISD::SETCC,
763	DL,
764	VT: MVT::i1,
765	N1: Op, N2: DAG.getConstantFP(Val: -`1.0f`, DL, VT: MVT::f32),
766	N3: DAG.getCondCode(Cond: ISD::SETEQ));
767	}
768
769	SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
770	const SDLoc &DL,
771	unsigned DwordOffset) const {
772	unsigned ByteOffset = DwordOffset * `4`;
773	PointerType *PtrType =
774	PointerType::get(C&: *DAG.getContext(), AddressSpace: AMDGPUAS::PARAM_I_ADDRESS);
775
776	// We shouldn't be using an offset wider than 16-bits for implicit parameters.
777	assert(isInt<`16`>(ByteOffset));
778
779	return DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(),
780	Ptr: DAG.getConstant(Val: ByteOffset, DL, VT: MVT::i32), // PTR
781	PtrInfo: MachinePointerInfo (ConstantPointerNull::get(T: PtrType)));
782	}
783
784	bool R600TargetLowering::isZero(SDValue Op) const {
785	if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Val&: Op))
786	return Cst->isZero();
787	if (ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Val&: Op))
788	return CstFP->isZero();
789	return false;
790	}
791
792	bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
793	if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Val&: Op)) {
794	return CFP->isExactlyValue(V: `1.0`);
795	}
796	return isAllOnesConstant(V: Op);
797	}
798
799	bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
800	if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Val&: Op)) {
801	return CFP->getValueAPF().isZero();
802	}
803	return isNullConstant(V: Op);
804	}
805
806	SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
807	SDLoc DL(Op);
808	EVT VT = Op.getValueType();
809
810	SDValue LHS = Op.getOperand(i: `0`);
811	SDValue RHS = Op.getOperand(i: `1`);
812	SDValue True = Op.getOperand(i: `2`);
813	SDValue False = Op.getOperand(i: `3`);
814	SDValue CC = Op.getOperand(i: `4`);
815	SDValue Temp;
816
817	if (VT == MVT::f32) {
818	DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
819	SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
820	if (MinMax)
821	return MinMax;
822	}
823
824	// LHS and RHS are guaranteed to be the same value type
825	EVT CompareVT = LHS.getValueType();
826
827	// Check if we can lower this to a native operation.
828
829	// Try to lower to a SET instruction:*
830	//
831	// SET can match the following patterns:*
832	//
833	// select_cc f32, f32, -1, 0, cc_supported
834	// select_cc f32, f32, 1.0f, 0.0f, cc_supported
835	// select_cc i32, i32, -1, 0, cc_supported
836	//
837
838	// Move hardware True/False values to the correct operand.
839	if (isHWTrueValue(Op: False) && isHWFalseValue(Op: True)) {
840	ISD::CondCode CCOpcode = cast<CondCodeSDNode>(Val&: CC)->get();
841	ISD::CondCode InverseCC = ISD::getSetCCInverse(Operation: CCOpcode, Type: CompareVT);
842	if (isCondCodeLegal(CC: InverseCC, VT: CompareVT.getSimpleVT())) {
843	std::swap(a&: False, b&: True);
844	CC = DAG.getCondCode(Cond: InverseCC);
845	} else {
846	ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(Operation: InverseCC);
847	if (isCondCodeLegal(CC: SwapInvCC, VT: CompareVT.getSimpleVT())) {
848	std::swap(a&: False, b&: True);
849	std::swap(a&: LHS, b&: RHS);
850	CC = DAG.getCondCode(Cond: SwapInvCC);
851	}
852	}
853	}
854
855	if (isHWTrueValue(Op: True) && isHWFalseValue(Op: False) &&
856	(CompareVT == VT \|\| VT == MVT::i32)) {
857	// This can be matched by a SET instruction.*
858	return DAG.getNode(Opcode: ISD::SELECT_CC, DL, VT, N1: LHS, N2: RHS, N3: True, N4: False, N5: CC);
859	}
860
861	// Try to lower to a CND instruction:*
862	//
863	// CND can match the following patterns:*
864	//
865	// select_cc f32, 0.0, f32, f32, cc_supported
866	// select_cc f32, 0.0, i32, i32, cc_supported
867	// select_cc i32, 0, f32, f32, cc_supported
868	// select_cc i32, 0, i32, i32, cc_supported
869	//
870
871	// Try to move the zero value to the RHS
872	if (isZero(Op: LHS)) {
873	ISD::CondCode CCOpcode = cast<CondCodeSDNode>(Val&: CC)->get();
874	// Try swapping the operands
875	ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(Operation: CCOpcode);
876	if (isCondCodeLegal(CC: CCSwapped, VT: CompareVT.getSimpleVT())) {
877	std::swap(a&: LHS, b&: RHS);
878	CC = DAG.getCondCode(Cond: CCSwapped);
879	} else {
880	// Try inverting the condition and then swapping the operands
881	ISD::CondCode CCInv = ISD::getSetCCInverse(Operation: CCOpcode, Type: CompareVT);
882	CCSwapped = ISD::getSetCCSwappedOperands(Operation: CCInv);
883	if (isCondCodeLegal(CC: CCSwapped, VT: CompareVT.getSimpleVT())) {
884	std::swap(a&: True, b&: False);
885	std::swap(a&: LHS, b&: RHS);
886	CC = DAG.getCondCode(Cond: CCSwapped);
887	}
888	}
889	}
890	if (isZero(Op: RHS)) {
891	SDValue Cond = LHS;
892	SDValue Zero = RHS;
893	ISD::CondCode CCOpcode = cast<CondCodeSDNode>(Val&: CC)->get();
894	if (CompareVT != VT) {
895	// Bitcast True / False to the correct types. This will end up being
896	// a nop, but it allows us to define only a single pattern in the
897	// .TD files for each CND instruction rather than having to have*
898	// one pattern for integer True/False and one for fp True/False
899	True = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: CompareVT, Operand: True);
900	False = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: CompareVT, Operand: False);
901	}
902
903	switch (CCOpcode) {
904	case ISD::SETONE:
905	case ISD::SETUNE:
906	case ISD::SETNE:
907	CCOpcode = ISD::getSetCCInverse(Operation: CCOpcode, Type: CompareVT);
908	Temp = True;
909	True = False;
910	False = Temp;
911	break;
912	default:
913	break;
914	}
915	SDValue SelectNode = DAG.getNode(Opcode: ISD::SELECT_CC, DL, VT: CompareVT,
916	N1: Cond, N2: Zero,
917	N3: True, N4: False,
918	N5: DAG.getCondCode(Cond: CCOpcode));
919	return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: SelectNode);
920	}
921
922	// If we make it this for it means we have no native instructions to handle
923	// this SELECT_CC, so we must lower it.
924	SDValue HWTrue, HWFalse;
925
926	if (CompareVT == MVT::f32) {
927	HWTrue = DAG.getConstantFP(Val: `1.0f`, DL, VT: CompareVT);
928	HWFalse = DAG.getConstantFP(Val: `0.0f`, DL, VT: CompareVT);
929	} else if (CompareVT == MVT::i32) {
930	HWTrue = DAG.getAllOnesConstant(DL, VT: CompareVT);
931	HWFalse = DAG.getConstant(Val: `0`, DL, VT: CompareVT);
932	}
933	else {
934	llvm_unreachable("Unhandled value type in LowerSELECT_CC");
935	}
936
937	// Lower this unsupported SELECT_CC into a combination of two supported
938	// SELECT_CC operations.
939	SDValue Cond = DAG.getNode(Opcode: ISD::SELECT_CC, DL, VT: CompareVT, N1: LHS, N2: RHS, N3: HWTrue, N4: HWFalse, N5: CC);
940
941	return DAG.getNode(Opcode: ISD::SELECT_CC, DL, VT,
942	N1: Cond, N2: HWFalse,
943	N3: True, N4: False,
944	N5: DAG.getCondCode(Cond: ISD::SETNE));
945	}
946
947	SDValue R600TargetLowering::lowerADDRSPACECAST(SDValue Op,
948	SelectionDAG &DAG) const {
949	SDLoc SL(Op);
950	EVT VT = Op.getValueType();
951
952	const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(Val&: Op);
953	unsigned SrcAS = ASC->getSrcAddressSpace();
954	unsigned DestAS = ASC->getDestAddressSpace();
955
956	if (isNullConstant(V: Op.getOperand(i: `0`)) && SrcAS == AMDGPUAS::FLAT_ADDRESS)
957	return DAG.getSignedConstant(Val: AMDGPU::getNullPointerValue(AS: DestAS), DL: SL, VT);
958
959	return Op;
960	}
961
962	/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
963	/// convert these pointers to a register index. Each register holds
964	/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
965	/// \p StackWidth, which tells us how many of the 4 sub-registers will be used
966	/// for indirect addressing.
967	SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
968	unsigned StackWidth,
969	SelectionDAG &DAG) const {
970	unsigned SRLPad;
971	switch(StackWidth) {
972	case `1`:
973	SRLPad = `2`;
974	break;
975	case `2`:
976	SRLPad = `3`;
977	break;
978	case `4`:
979	SRLPad = `4`;
980	break;
981	default: llvm_unreachable("Invalid stack width");
982	}
983
984	SDLoc DL(Ptr);
985	return DAG.getNode(Opcode: ISD::SRL, DL, VT: Ptr.getValueType(), N1: Ptr,
986	N2: DAG.getConstant(Val: SRLPad, DL, VT: MVT::i32));
987	}
988
989	void R600TargetLowering::getStackAddress(unsigned StackWidth,
990	unsigned ElemIdx,
991	unsigned &Channel,
992	unsigned &PtrIncr) const {
993	switch (StackWidth) {
994	default:
995	case `1`:
996	Channel = `0`;
997	if (ElemIdx > `0`) {
998	PtrIncr = `1`;
999	} else {
1000	PtrIncr = `0`;
1001	}
1002	break;
1003	case `2`:
1004	Channel = ElemIdx % `2`;
1005	if (ElemIdx == `2`) {
1006	PtrIncr = `1`;
1007	} else {
1008	PtrIncr = `0`;
1009	}
1010	break;
1011	case `4`:
1012	Channel = ElemIdx;
1013	PtrIncr = `0`;
1014	break;
1015	}
1016	}
1017
1018	SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1019	SelectionDAG &DAG) const {
1020	SDLoc DL(Store);
1021	//TODO: Who creates the i8 stores?
1022	assert(Store->isTruncatingStore()
1023	\|\| Store->getValue().getValueType() == MVT::i8);
1024	assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
1025
1026	SDValue Mask;
1027	if (Store->getMemoryVT() == MVT::i8) {
1028	assert(Store->getAlign() >= `1`);
1029	Mask = DAG.getConstant(Val: `0xff`, DL, VT: MVT::i32);
1030	} else if (Store->getMemoryVT() == MVT::i16) {
1031	assert(Store->getAlign() >= `2`);
1032	Mask = DAG.getConstant(Val: `0xffff`, DL, VT: MVT::i32);
1033	} else {
1034	llvm_unreachable("Unsupported private trunc store");
1035	}
1036
1037	SDValue OldChain = Store->getChain();
1038	bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1039	// Skip dummy
1040	SDValue Chain = VectorTrunc ? OldChain ->getOperand(Num: `0`) : OldChain;
1041	SDValue BasePtr = Store->getBasePtr();
1042	SDValue Offset = Store->getOffset();
1043	EVT MemVT = Store->getMemoryVT();
1044
1045	SDValue LoadPtr = BasePtr;
1046	if (!Offset.isUndef()) {
1047	LoadPtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i32, N1: BasePtr, N2: Offset);
1048	}
1049
1050	// Get dword location
1051	// TODO: this should be eliminated by the future SHR ptr, 2
1052	SDValue Ptr = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i32, N1: LoadPtr,
1053	N2: DAG.getConstant(Val: `0xfffffffc`, DL, VT: MVT::i32));
1054
1055	// Load dword
1056	// TODO: can we be smarter about machine pointer info?
1057	MachinePointerInfo PtrInfo(AMDGPUAS::PRIVATE_ADDRESS);
1058	SDValue Dst = DAG.getLoad(VT: MVT::i32, dl: DL, Chain, Ptr, PtrInfo);
1059
1060	Chain = Dst.getValue(R: `1`);
1061
1062	// Get offset in dword
1063	SDValue ByteIdx = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i32, N1: LoadPtr,
1064	N2: DAG.getConstant(Val: `0x3`, DL, VT: MVT::i32));
1065
1066	// Convert byte offset to bit shift
1067	SDValue ShiftAmt = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i32, N1: ByteIdx,
1068	N2: DAG.getConstant(Val: `3`, DL, VT: MVT::i32));
1069
1070	// TODO: Contrary to the name of the function,
1071	// it also handles sub i32 non-truncating stores (like i1)
1072	SDValue SExtValue = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: MVT::i32,
1073	Operand: Store->getValue());
1074
1075	// Mask the value to the right type
1076	SDValue MaskedValue = DAG.getZeroExtendInReg(Op: SExtValue, DL, VT: MemVT);
1077
1078	// Shift the value in place
1079	SDValue ShiftedValue = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i32,
1080	N1: MaskedValue, N2: ShiftAmt);
1081
1082	// Shift the mask in place
1083	SDValue DstMask = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i32, N1: Mask, N2: ShiftAmt);
1084
1085	// Invert the mask. NOTE: if we had native ROL instructions we could
1086	// use inverted mask
1087	DstMask = DAG.getNOT(DL, Val: DstMask, VT: MVT::i32);
1088
1089	// Cleanup the target bits
1090	Dst = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i32, N1: Dst, N2: DstMask);
1091
1092	// Add the new bits
1093	SDValue Value = DAG.getNode(Opcode: ISD::OR, DL, VT: MVT::i32, N1: Dst, N2: ShiftedValue);
1094
1095	// Store dword
1096	// TODO: Can we be smarter about MachinePointerInfo?
1097	SDValue NewStore = DAG.getStore(Chain, dl: DL, Val: Value, Ptr, PtrInfo);
1098
1099	// If we are part of expanded vector, make our neighbors depend on this store
1100	if (VectorTrunc) {
1101	// Make all other vector elements depend on this store
1102	Chain = DAG.getNode(Opcode: AMDGPUISD::DUMMY_CHAIN, DL, VT: MVT::Other, Operand: NewStore);
1103	DAG.ReplaceAllUsesOfValueWith(From: OldChain, To: Chain);
1104	}
1105	return NewStore;
1106	}
1107
1108	SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1109	StoreSDNode *StoreNode = cast<StoreSDNode>(Val&: Op);
1110	unsigned AS = StoreNode->getAddressSpace();
1111
1112	SDValue Chain = StoreNode->getChain();
1113	SDValue Ptr = StoreNode->getBasePtr();
1114	SDValue Value = StoreNode->getValue();
1115
1116	EVT VT = Value.getValueType();
1117	EVT MemVT = StoreNode->getMemoryVT();
1118	EVT PtrVT = Ptr.getValueType();
1119
1120	SDLoc DL(Op);
1121
1122	const bool TruncatingStore = StoreNode->isTruncatingStore();
1123
1124	// Neither LOCAL nor PRIVATE can do vectors at the moment
1125	if ((AS == AMDGPUAS::LOCAL_ADDRESS \|\| AS == AMDGPUAS::PRIVATE_ADDRESS \|\|
1126	TruncatingStore) &&
1127	VT.isVector()) {
1128	if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
1129	// Add an extra level of chain to isolate this vector
1130	SDValue NewChain = DAG.getNode(Opcode: AMDGPUISD::DUMMY_CHAIN, DL, VT: MVT::Other, Operand: Chain);
1131	SmallVector<SDValue, `4`> NewOps(StoreNode->ops());
1132	NewOps [`0`] = NewChain;
1133	StoreNode = cast<StoreSDNode>(Val: DAG.UpdateNodeOperands(N: StoreNode, Ops: NewOps));
1134	}
1135
1136	return scalarizeVectorStore(ST: StoreNode, DAG);
1137	}
1138
1139	Align Alignment = StoreNode->getAlign();
1140	if (Alignment < MemVT.getStoreSize() &&
1141	!allowsMisalignedMemoryAccesses(VT: MemVT, AS, Alignment,
1142	Flags: StoreNode->getMemOperand()->getFlags(),
1143	IsFast: nullptr)) {
1144	return expandUnalignedStore(ST: StoreNode, DAG);
1145	}
1146
1147	SDValue DWordAddr = DAG.getNode(Opcode: ISD::SRL, DL, VT: PtrVT, N1: Ptr,
1148	N2: DAG.getConstant(Val: `2`, DL, VT: PtrVT));
1149
1150	if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1151	// It is beneficial to create MSKOR here instead of combiner to avoid
1152	// artificial dependencies introduced by RMW
1153	if (TruncatingStore) {
1154	assert(VT.bitsLE(MVT::i32));
1155	SDValue MaskConstant;
1156	if (MemVT == MVT::i8) {
1157	MaskConstant = DAG.getConstant(Val: `0xFF`, DL, VT: MVT::i32);
1158	} else {
1159	assert(MemVT == MVT::i16);
1160	assert(StoreNode->getAlign() >= `2`);
1161	MaskConstant = DAG.getConstant(Val: `0xFFFF`, DL, VT: MVT::i32);
1162	}
1163
1164	SDValue ByteIndex = DAG.getNode(Opcode: ISD::AND, DL, VT: PtrVT, N1: Ptr,
1165	N2: DAG.getConstant(Val: `0x00000003`, DL, VT: PtrVT));
1166	SDValue BitShift = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ByteIndex,
1167	N2: DAG.getConstant(Val: `3`, DL, VT));
1168
1169	// Put the mask in correct place
1170	SDValue Mask = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: MaskConstant, N2: BitShift);
1171
1172	// Put the value bits in correct place
1173	SDValue TruncValue = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Value, N2: MaskConstant);
1174	SDValue ShiftedValue = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: TruncValue, N2: BitShift);
1175
1176	// XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1177	// vector instead.
1178	SDValue Src[`4`] = {
1179	ShiftedValue,
1180	DAG.getConstant(Val: `0`, DL, VT: MVT::i32),
1181	DAG.getConstant(Val: `0`, DL, VT: MVT::i32),
1182	Mask
1183	};
1184	SDValue Input = DAG.getBuildVector(VT: MVT::v4i32, DL, Ops: Src);
1185	SDValue Args[`3`] = { Chain, Input, DWordAddr };
1186	return DAG.getMemIntrinsicNode(Opcode: AMDGPUISD::STORE_MSKOR, dl: DL,
1187	VTList: Op ->getVTList(), Ops: Args, MemVT,
1188	MMO: StoreNode->getMemOperand());
1189	}
1190	if (Ptr ->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(VT: MVT::i32)) {
1191	// Convert pointer from byte address to dword address.
1192	Ptr = DAG.getNode(Opcode: AMDGPUISD::DWORDADDR, DL, VT: PtrVT, Operand: DWordAddr);
1193
1194	if (StoreNode->isIndexed()) {
1195	llvm_unreachable("Indexed stores not supported yet");
1196	} else {
1197	Chain = DAG.getStore(Chain, dl: DL, Val: Value, Ptr, MMO: StoreNode->getMemOperand());
1198	}
1199	return Chain;
1200	}
1201	}
1202
1203	// GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1204	if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1205	return SDValue ();
1206
1207	if (MemVT.bitsLT(VT: MVT::i32))
1208	return lowerPrivateTruncStore(Store: StoreNode, DAG);
1209
1210	// Standard i32+ store, tag it with DWORDADDR to note that the address
1211	// has been shifted
1212	if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1213	Ptr = DAG.getNode(Opcode: AMDGPUISD::DWORDADDR, DL, VT: PtrVT, Operand: DWordAddr);
1214	return DAG.getStore(Chain, dl: DL, Val: Value, Ptr, MMO: StoreNode->getMemOperand());
1215	}
1216
1217	// Tagged i32+ stores will be matched by patterns
1218	return SDValue ();
1219	}
1220
1221	// return (512 + (kc_bank << 12)
1222	static int
1223	ConstantAddressBlock(unsigned AddressSpace) {
1224	switch (AddressSpace) {
1225	case AMDGPUAS::CONSTANT_BUFFER_0:
1226	return `512`;
1227	case AMDGPUAS::CONSTANT_BUFFER_1:
1228	return `512` + `4096`;
1229	case AMDGPUAS::CONSTANT_BUFFER_2:
1230	return `512` + `4096` * `2`;
1231	case AMDGPUAS::CONSTANT_BUFFER_3:
1232	return `512` + `4096` * `3`;
1233	case AMDGPUAS::CONSTANT_BUFFER_4:
1234	return `512` + `4096` * `4`;
1235	case AMDGPUAS::CONSTANT_BUFFER_5:
1236	return `512` + `4096` * `5`;
1237	case AMDGPUAS::CONSTANT_BUFFER_6:
1238	return `512` + `4096` * `6`;
1239	case AMDGPUAS::CONSTANT_BUFFER_7:
1240	return `512` + `4096` * `7`;
1241	case AMDGPUAS::CONSTANT_BUFFER_8:
1242	return `512` + `4096` * `8`;
1243	case AMDGPUAS::CONSTANT_BUFFER_9:
1244	return `512` + `4096` * `9`;
1245	case AMDGPUAS::CONSTANT_BUFFER_10:
1246	return `512` + `4096` * `10`;
1247	case AMDGPUAS::CONSTANT_BUFFER_11:
1248	return `512` + `4096` * `11`;
1249	case AMDGPUAS::CONSTANT_BUFFER_12:
1250	return `512` + `4096` * `12`;
1251	case AMDGPUAS::CONSTANT_BUFFER_13:
1252	return `512` + `4096` * `13`;
1253	case AMDGPUAS::CONSTANT_BUFFER_14:
1254	return `512` + `4096` * `14`;
1255	case AMDGPUAS::CONSTANT_BUFFER_15:
1256	return `512` + `4096` * `15`;
1257	default:
1258	return -`1`;
1259	}
1260	}
1261
1262	SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1263	SelectionDAG &DAG) const {
1264	SDLoc DL(Op);
1265	LoadSDNode *Load = cast<LoadSDNode>(Val&: Op);
1266	ISD::LoadExtType ExtType = Load->getExtensionType();
1267	EVT MemVT = Load->getMemoryVT();
1268	assert(Load->getAlign() >= MemVT.getStoreSize());
1269
1270	SDValue BasePtr = Load->getBasePtr();
1271	SDValue Chain = Load->getChain();
1272	SDValue Offset = Load->getOffset();
1273
1274	SDValue LoadPtr = BasePtr;
1275	if (!Offset.isUndef()) {
1276	LoadPtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i32, N1: BasePtr, N2: Offset);
1277	}
1278
1279	// Get dword location
1280	// NOTE: this should be eliminated by the future SHR ptr, 2
1281	SDValue Ptr = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i32, N1: LoadPtr,
1282	N2: DAG.getConstant(Val: `0xfffffffc`, DL, VT: MVT::i32));
1283
1284	// Load dword
1285	// TODO: can we be smarter about machine pointer info?
1286	MachinePointerInfo PtrInfo(AMDGPUAS::PRIVATE_ADDRESS);
1287	SDValue Read = DAG.getLoad(VT: MVT::i32, dl: DL, Chain, Ptr, PtrInfo);
1288
1289	// Get offset within the register.
1290	SDValue ByteIdx = DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i32,
1291	N1: LoadPtr, N2: DAG.getConstant(Val: `0x3`, DL, VT: MVT::i32));
1292
1293	// Bit offset of target byte (byteIdx 8).*
1294	SDValue ShiftAmt = DAG.getNode(Opcode: ISD::SHL, DL, VT: MVT::i32, N1: ByteIdx,
1295	N2: DAG.getConstant(Val: `3`, DL, VT: MVT::i32));
1296
1297	// Shift to the right.
1298	SDValue Ret = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i32, N1: Read, N2: ShiftAmt);
1299
1300	// Eliminate the upper bits by setting them to ...
1301	EVT MemEltVT = MemVT.getScalarType();
1302
1303	if (ExtType == ISD::SEXTLOAD) { // ... ones.
1304	SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1305	Ret = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i32, N1: Ret, N2: MemEltVTNode);
1306	} else { // ... or zeros.
1307	Ret = DAG.getZeroExtendInReg(Op: Ret, DL, VT: MemEltVT);
1308	}
1309
1310	SDValue Ops[] = {
1311	Ret,
1312	Read.getValue(R: `1`) // This should be our output chain
1313	};
1314
1315	return DAG.getMergeValues(Ops, dl: DL);
1316	}
1317
1318	SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1319	LoadSDNode *LoadNode = cast<LoadSDNode>(Val&: Op);
1320	unsigned AS = LoadNode->getAddressSpace();
1321	EVT MemVT = LoadNode->getMemoryVT();
1322	ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1323
1324	if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1325	ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(VT: MVT::i32)) {
1326	return lowerPrivateExtLoad(Op, DAG);
1327	}
1328
1329	SDLoc DL(Op);
1330	EVT VT = Op.getValueType();
1331	SDValue Chain = LoadNode->getChain();
1332	SDValue Ptr = LoadNode->getBasePtr();
1333
1334	if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS \|\|
1335	LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
1336	VT.isVector()) {
1337	SDValue Ops[`2`];
1338	std::tie(args&: Ops[`0`], args&: Ops[`1`]) = scalarizeVectorLoad(LD: LoadNode, DAG);
1339	return DAG.getMergeValues(Ops, dl: DL);
1340	}
1341
1342	// This is still used for explicit load from addrspace(8)
1343	int ConstantBlock = ConstantAddressBlock(AddressSpace: LoadNode->getAddressSpace());
1344	if (ConstantBlock > -`1` &&
1345	((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) \|\|
1346	(LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1347	SDValue Result;
1348	if (isa<Constant>(Val: LoadNode->getMemOperand()->getValue()) \|\|
1349	isa<ConstantSDNode>(Val: Ptr)) {
1350	return constBufferLoad(LoadNode, Block: LoadNode->getAddressSpace(), DAG);
1351	}
1352	// TODO: Does this even work?
1353	// non-constant ptr can't be folded, keeps it as a v4f32 load
1354	Result = DAG.getNode(Opcode: AMDGPUISD::CONST_ADDRESS, DL, VT: MVT::v4i32,
1355	N1: DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i32, N1: Ptr,
1356	N2: DAG.getConstant(Val: `4`, DL, VT: MVT::i32)),
1357	N2: DAG.getConstant(Val: LoadNode->getAddressSpace() -
1358	AMDGPUAS::CONSTANT_BUFFER_0,
1359	DL, VT: MVT::i32));
1360
1361	if (!VT.isVector()) {
1362	Result = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: MVT::i32, N1: Result,
1363	N2: DAG.getConstant(Val: `0`, DL, VT: MVT::i32));
1364	}
1365
1366	SDValue MergedValues[`2`] = {
1367	Result,
1368	Chain
1369	};
1370	return DAG.getMergeValues(Ops: MergedValues, dl: DL);
1371	}
1372
1373	// For most operations returning SDValue() will result in the node being
1374	// expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1375	// need to manually expand loads that may be legal in some address spaces and
1376	// illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1377	// compute shaders, since the data is sign extended when it is uploaded to the
1378	// buffer. However SEXT loads from other address spaces are not supported, so
1379	// we need to expand them here.
1380	if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1381	assert(!MemVT.isVector() && (MemVT == MVT::i16 \|\| MemVT == MVT::i8));
1382	SDValue NewLoad = DAG.getExtLoad(
1383	ExtType: ISD::EXTLOAD, dl: DL, VT, Chain, Ptr, PtrInfo: LoadNode->getPointerInfo(), MemVT,
1384	Alignment: LoadNode->getAlign(), MMOFlags: LoadNode->getMemOperand()->getFlags());
1385	SDValue Res = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT, N1: NewLoad,
1386	N2: DAG.getValueType(MemVT));
1387
1388	SDValue MergedValues[`2`] = { Res, Chain };
1389	return DAG.getMergeValues(Ops: MergedValues, dl: DL);
1390	}
1391
1392	if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1393	return SDValue ();
1394	}
1395
1396	// DWORDADDR ISD marks already shifted address
1397	if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1398	assert(VT == MVT::i32);
1399	Ptr = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i32, N1: Ptr, N2: DAG.getConstant(Val: `2`, DL, VT: MVT::i32));
1400	Ptr = DAG.getNode(Opcode: AMDGPUISD::DWORDADDR, DL, VT: MVT::i32, Operand: Ptr);
1401	return DAG.getLoad(VT: MVT::i32, dl: DL, Chain, Ptr, MMO: LoadNode->getMemOperand());
1402	}
1403	return SDValue ();
1404	}
1405
1406	SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1407	SDValue Chain = Op.getOperand(i: `0`);
1408	SDValue Cond = Op.getOperand(i: `1`);
1409	SDValue Jump = Op.getOperand(i: `2`);
1410
1411	return DAG.getNode(Opcode: AMDGPUISD::BRANCH_COND, DL: SDLoc (Op), VT: Op.getValueType(),
1412	N1: Chain, N2: Jump, N3: Cond);
1413	}
1414
1415	SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1416	SelectionDAG &DAG) const {
1417	MachineFunction &MF = DAG.getMachineFunction();
1418	const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1419
1420	FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Val&: Op);
1421
1422	unsigned FrameIndex = FIN->getIndex();
1423	Register IgnoredFrameReg;
1424	StackOffset Offset =
1425	TFL->getFrameIndexReference(MF, FI: FrameIndex, FrameReg&: IgnoredFrameReg);
1426	return DAG.getConstant(Val: Offset.getFixed() * `4` * TFL->getStackWidth(MF),
1427	DL: SDLoc (Op), VT: Op.getValueType());
1428	}
1429
1430	CCAssignFn *R600TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
1431	bool IsVarArg) const {
1432	switch (CC) {
1433	case CallingConv::AMDGPU_KERNEL:
1434	case CallingConv::SPIR_KERNEL:
1435	case CallingConv::C:
1436	case CallingConv::Fast:
1437	case CallingConv::Cold:
1438	llvm_unreachable("kernels should not be handled here");
1439	case CallingConv::AMDGPU_VS:
1440	case CallingConv::AMDGPU_GS:
1441	case CallingConv::AMDGPU_PS:
1442	case CallingConv::AMDGPU_CS:
1443	case CallingConv::AMDGPU_HS:
1444	case CallingConv::AMDGPU_ES:
1445	case CallingConv::AMDGPU_LS:
1446	return CC_R600;
1447	default:
1448	reportFatalUsageError(reason: "unsupported calling convention");
1449	}
1450	}
1451
1452	/// XXX Only kernel functions are supported, so we can assume for now that
1453	/// every function is a kernel function, but in the future we should use
1454	/// separate calling conventions for kernel and non-kernel functions.
1455	SDValue R600TargetLowering::LowerFormalArguments(
1456	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1457	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1458	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1459	SmallVector<CCValAssign, `16`> ArgLocs;
1460	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1461	*DAG.getContext());
1462	MachineFunction &MF = DAG.getMachineFunction();
1463
1464	if (AMDGPU::isShader(CC: CallConv)) {
1465	CCInfo.AnalyzeFormalArguments(Ins, Fn: CCAssignFnForCall(CC: CallConv, IsVarArg: isVarArg));
1466	} else {
1467	analyzeFormalArgumentsCompute(State&: CCInfo, Ins);
1468	}
1469
1470	for (unsigned i = `0`, e = Ins.size(); i < e; ++i) {
1471	CCValAssign &VA = ArgLocs [i];
1472	const ISD::InputArg &In = Ins [i];
1473	EVT VT = In.VT;
1474	EVT MemVT = VA.getLocVT();
1475	if (!VT.isVector() && MemVT.isVector()) {
1476	// Get load source type if scalarized.
1477	MemVT = MemVT.getVectorElementType();
1478	}
1479
1480	if (VT.isInteger() && !MemVT.isInteger())
1481	MemVT = MemVT.changeTypeToInteger();
1482
1483	if (AMDGPU::isShader(CC: CallConv)) {
1484	Register Reg = MF.addLiveIn(PReg: VA.getLocReg(), RC: &R600::R600_Reg128RegClass);
1485	SDValue Register = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT);
1486	InVals.push_back(Elt: Register);
1487	continue;
1488	}
1489
1490	// i64 isn't a legal type, so the register type used ends up as i32, which
1491	// isn't expected here. It attempts to create this sextload, but it ends up
1492	// being invalid. Somehow this seems to work with i64 arguments, but breaks
1493	// for <1 x i64>.
1494
1495	// The first 36 bytes of the input buffer contains information about
1496	// thread group and global sizes.
1497	ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
1498	if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1499	if (VT.isFloatingPoint()) {
1500	Ext = ISD::EXTLOAD;
1501	} else {
1502	// FIXME: This should really check the extload type, but the handling of
1503	// extload vector parameters seems to be broken.
1504
1505	// Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1506	Ext = ISD::SEXTLOAD;
1507	}
1508	}
1509
1510	// Compute the offset from the value.
1511	// XXX - I think PartOffset should give you this, but it seems to give the
1512	// size of the register which isn't useful.
1513
1514	unsigned PartOffset = VA.getLocMemOffset();
1515	Align Alignment = commonAlignment(A: Align (VT.getStoreSize()), Offset: PartOffset);
1516
1517	MachinePointerInfo PtrInfo(AMDGPUAS::PARAM_I_ADDRESS);
1518	SDValue Arg = DAG.getLoad(
1519	AM: ISD::UNINDEXED, ExtType: Ext, VT, dl: DL, Chain,
1520	Ptr: DAG.getConstant(Val: PartOffset, DL, VT: MVT::i32), Offset: DAG.getUNDEF(VT: MVT::i32),
1521	PtrInfo,
1522	MemVT, Alignment, MMOFlags: MachineMemOperand::MONonTemporal \|
1523	MachineMemOperand::MODereferenceable \|
1524	MachineMemOperand::MOInvariant);
1525
1526	InVals.push_back(Elt: Arg);
1527	}
1528	return Chain;
1529	}
1530
1531	EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1532	EVT VT) const {
1533	if (!VT.isVector())
1534	return MVT::i32;
1535	return VT.changeVectorElementTypeToInteger();
1536	}
1537
1538	bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
1539	const MachineFunction &MF) const {
1540	// Local and Private addresses do not handle vectors. Limit to i32
1541	if ((AS == AMDGPUAS::LOCAL_ADDRESS \|\| AS == AMDGPUAS::PRIVATE_ADDRESS)) {
1542	return (MemVT.getSizeInBits() <= `32`);
1543	}
1544	return true;
1545	}
1546
1547	bool R600TargetLowering::allowsMisalignedMemoryAccesses(
1548	EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1549	unsigned IsFast) const* {
1550	if (IsFast)
1551	*IsFast = `0`;
1552
1553	if (!VT.isSimple() \|\| VT == MVT::Other)
1554	return false;
1555
1556	if (VT.bitsLT(VT: MVT::i32))
1557	return false;
1558
1559	// TODO: This is a rough estimate.
1560	if (IsFast)
1561	*IsFast = `1`;
1562
1563	return VT.bitsGT(VT: MVT::i32) && Alignment >= Align (`4`);
1564	}
1565
1566	static SDValue CompactSwizzlableVector(
1567	SelectionDAG &DAG, SDValue VectorEntry,
1568	DenseMap<unsigned, unsigned> &RemapSwizzle) {
1569	assert(RemapSwizzle.empty());
1570
1571	SDLoc DL(VectorEntry);
1572	EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1573
1574	SDValue NewBldVec[`4`];
1575	for (unsigned i = `0`; i < `4`; i++)
1576	NewBldVec[i] = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltTy, N1: VectorEntry,
1577	N2: DAG.getIntPtrConstant(Val: i, DL));
1578
1579	for (unsigned i = `0`; i < `4`; i++) {
1580	if (NewBldVec[i].isUndef())
1581	// We mask write here to teach later passes that the ith element of this
1582	// vector is undef. Thus we can use it to reduce 128 bits reg usage,
1583	// break false dependencies and additionally make assembly easier to read.
1584	RemapSwizzle [i] = `7`; // SEL_MASK_WRITE
1585	if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: NewBldVec[i])) {
1586	if (C->isZero()) {
1587	RemapSwizzle [i] = `4`; // SEL_0
1588	NewBldVec[i] = DAG.getUNDEF(VT: MVT::f32);
1589	} else if (C->isExactlyValue(V: `1.0`)) {
1590	RemapSwizzle [i] = `5`; // SEL_1
1591	NewBldVec[i] = DAG.getUNDEF(VT: MVT::f32);
1592	}
1593	}
1594
1595	if (NewBldVec[i].isUndef())
1596	continue;
1597
1598	for (unsigned j = `0`; j < i; j++) {
1599	if (NewBldVec[i] == NewBldVec[j]) {
1600	NewBldVec[i] = DAG.getUNDEF(VT: NewBldVec[i].getValueType());
1601	RemapSwizzle [i] = j;
1602	break;
1603	}
1604	}
1605	}
1606
1607	return DAG.getBuildVector(VT: VectorEntry.getValueType(), DL: SDLoc (VectorEntry),
1608	Ops: NewBldVec);
1609	}
1610
1611	static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
1612	DenseMap<unsigned, unsigned> &RemapSwizzle) {
1613	assert(RemapSwizzle.empty());
1614
1615	SDLoc DL(VectorEntry);
1616	EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1617
1618	SDValue NewBldVec[`4`];
1619	bool isUnmovable[`4`] = {false, false, false, false};
1620	for (unsigned i = `0`; i < `4`; i++)
1621	NewBldVec[i] = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: EltTy, N1: VectorEntry,
1622	N2: DAG.getIntPtrConstant(Val: i, DL));
1623
1624	for (unsigned i = `0`; i < `4`; i++) {
1625	RemapSwizzle [i] = i;
1626	if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1627	unsigned Idx = NewBldVec[i].getConstantOperandVal(i: `1`);
1628	if (i == Idx)
1629	isUnmovable[Idx] = true;
1630	}
1631	}
1632
1633	for (unsigned i = `0`; i < `4`; i++) {
1634	if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1635	unsigned Idx = NewBldVec[i].getConstantOperandVal(i: `1`);
1636	if (isUnmovable[Idx])
1637	continue;
1638	// Swap i and Idx
1639	std::swap(a&: NewBldVec[Idx], b&: NewBldVec[i]);
1640	std::swap(a&: RemapSwizzle [i], b&: RemapSwizzle [Idx]);
1641	break;
1642	}
1643	}
1644
1645	return DAG.getBuildVector(VT: VectorEntry.getValueType(), DL: SDLoc (VectorEntry),
1646	Ops: NewBldVec);
1647	}
1648
1649	SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[],
1650	SelectionDAG &DAG,
1651	const SDLoc &DL) const {
1652	// Old -> New swizzle values
1653	DenseMap<unsigned, unsigned> SwizzleRemap;
1654
1655	BuildVector = CompactSwizzlableVector(DAG, VectorEntry: BuildVector, RemapSwizzle&: SwizzleRemap);
1656	for (unsigned i = `0`; i < `4`; i++) {
1657	unsigned Idx = Swz[i]->getAsZExtVal();
1658	auto It = SwizzleRemap.find(Val: Idx);
1659	if (It != SwizzleRemap.end())
1660	Swz[i] = DAG.getConstant(Val: It ->second, DL, VT: MVT::i32);
1661	}
1662
1663	SwizzleRemap.clear();
1664	BuildVector = ReorganizeVector(DAG, VectorEntry: BuildVector, RemapSwizzle&: SwizzleRemap);
1665	for (unsigned i = `0`; i < `4`; i++) {
1666	unsigned Idx = Swz[i]->getAsZExtVal();
1667	auto It = SwizzleRemap.find(Val: Idx);
1668	if (It != SwizzleRemap.end())
1669	Swz[i] = DAG.getConstant(Val: It ->second, DL, VT: MVT::i32);
1670	}
1671
1672	return BuildVector;
1673	}
1674
1675	SDValue R600TargetLowering::constBufferLoad(LoadSDNode LoadNode, int* Block,
1676	SelectionDAG &DAG) const {
1677	SDLoc DL(LoadNode);
1678	EVT VT = LoadNode->getValueType(ResNo: `0`);
1679	SDValue Chain = LoadNode->getChain();
1680	SDValue Ptr = LoadNode->getBasePtr();
1681	assert (isa<ConstantSDNode>(Ptr));
1682
1683	//TODO: Support smaller loads
1684	if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 \|\| !ISD::isNON_EXTLoad(N: LoadNode))
1685	return SDValue ();
1686
1687	if (LoadNode->getAlign() < Align (`4`))
1688	return SDValue ();
1689
1690	int ConstantBlock = ConstantAddressBlock(AddressSpace: Block);
1691
1692	SDValue Slots[`4`];
1693	for (unsigned i = `0`; i < `4`; i++) {
1694	// We want Const position encoded with the following formula :
1695	// (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1696	// const_index is Ptr computed by llvm using an alignment of 16.
1697	// Thus we add (((512 + (kc_bank << 12)) + chan ) 4 here and*
1698	// then div by 4 at the ISel step
1699	SDValue NewPtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: Ptr.getValueType(), N1: Ptr,
1700	N2: DAG.getConstant(Val: `4` * i + ConstantBlock * `16`, DL, VT: MVT::i32));
1701	Slots[i] = DAG.getNode(Opcode: AMDGPUISD::CONST_ADDRESS, DL, VT: MVT::i32, Operand: NewPtr);
1702	}
1703	EVT NewVT = MVT::v4i32;
1704	unsigned NumElements = `4`;
1705	if (VT.isVector()) {
1706	NewVT = VT;
1707	NumElements = VT.getVectorNumElements();
1708	}
1709	SDValue Result = DAG.getBuildVector(VT: NewVT, DL, Ops: ArrayRef(Slots, NumElements));
1710	if (!VT.isVector()) {
1711	Result = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: MVT::i32, N1: Result,
1712	N2: DAG.getConstant(Val: `0`, DL, VT: MVT::i32));
1713	}
1714	SDValue MergedValues[`2`] = {
1715	Result,
1716	Chain
1717	};
1718	return DAG.getMergeValues(Ops: MergedValues, dl: DL);
1719	}
1720
1721	//===----------------------------------------------------------------------===//
1722	// Custom DAG Optimizations
1723	//===----------------------------------------------------------------------===//
1724
1725	SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
1726	DAGCombinerInfo &DCI) const {
1727	SelectionDAG &DAG = DCI.DAG;
1728	SDLoc DL(N);
1729
1730	switch (N->getOpcode()) {
1731	// (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1732	case ISD::FP_ROUND: {
1733	SDValue Arg = N->getOperand(Num: `0`);
1734	if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1735	return DAG.getNode(Opcode: ISD::UINT_TO_FP, DL, VT: N->getValueType(ResNo: `0`),
1736	Operand: Arg.getOperand(i: `0`));
1737	}
1738	break;
1739	}
1740
1741	// (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1742	// (i32 select_cc f32, f32, -1, 0 cc)
1743	//
1744	// Mesa's GLSL frontend generates the above pattern a lot and we can lower
1745	// this to one of the SET_DX10 instructions.*
1746	case ISD::FP_TO_SINT: {
1747	SDValue FNeg = N->getOperand(Num: `0`);
1748	if (FNeg.getOpcode() != ISD::FNEG) {
1749	return SDValue ();
1750	}
1751	SDValue SelectCC = FNeg.getOperand(i: `0`);
1752	if (SelectCC.getOpcode() != ISD::SELECT_CC \|\|
1753	SelectCC.getOperand(i: `0`).getValueType() != MVT::f32 \|\| // LHS
1754	SelectCC.getOperand(i: `2`).getValueType() != MVT::f32 \|\| // True
1755	!isHWTrueValue(Op: SelectCC.getOperand(i: `2`)) \|\|
1756	!isHWFalseValue(Op: SelectCC.getOperand(i: `3`))) {
1757	return SDValue ();
1758	}
1759
1760	return DAG.getNode(Opcode: ISD::SELECT_CC, DL, VT: N->getValueType(ResNo: `0`),
1761	N1: SelectCC.getOperand(i: `0`), // LHS
1762	N2: SelectCC.getOperand(i: `1`), // RHS
1763	N3: DAG.getAllOnesConstant(DL, VT: MVT::i32), // True
1764	N4: DAG.getConstant(Val: `0`, DL, VT: MVT::i32), // False
1765	N5: SelectCC.getOperand(i: `4`)); // CC
1766	}
1767
1768	// insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1769	// => build_vector elt0, ... , NewEltIdx, ... , eltN
1770	case ISD::INSERT_VECTOR_ELT: {
1771	SDValue InVec = N->getOperand(Num: `0`);
1772	SDValue InVal = N->getOperand(Num: `1`);
1773	SDValue EltNo = N->getOperand(Num: `2`);
1774
1775	// If the inserted element is an UNDEF, just use the input vector.
1776	if (InVal.isUndef())
1777	return InVec;
1778
1779	EVT VT = InVec.getValueType();
1780
1781	// If we can't generate a legal BUILD_VECTOR, exit
1782	if (!isOperationLegal(Op: ISD::BUILD_VECTOR, VT))
1783	return SDValue ();
1784
1785	// Check that we know which element is being inserted
1786	if (!isa<ConstantSDNode>(Val: EltNo))
1787	return SDValue ();
1788	unsigned Elt = EltNo ->getAsZExtVal();
1789
1790	// Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1791	// be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1792	// vector elements.
1793	SmallVector<SDValue, `8`> Ops;
1794	if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1795	Ops.append(in_start: InVec.getNode()->op_begin(),
1796	in_end: InVec.getNode()->op_end());
1797	} else if (InVec.isUndef()) {
1798	unsigned NElts = VT.getVectorNumElements();
1799	Ops.append(NumInputs: NElts, Elt: DAG.getUNDEF(VT: InVal.getValueType()));
1800	} else {
1801	return SDValue ();
1802	}
1803
1804	// Insert the element
1805	if (Elt < Ops.size()) {
1806	// All the operands of BUILD_VECTOR must have the same type;
1807	// we enforce that here.
1808	EVT OpVT = Ops [`0`].getValueType();
1809	if (InVal.getValueType() != OpVT)
1810	InVal = OpVT.bitsGT(VT: InVal.getValueType()) ?
1811	DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: OpVT, Operand: InVal) :
1812	DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: OpVT, Operand: InVal);
1813	Ops [Elt] = InVal;
1814	}
1815
1816	// Return the new vector
1817	return DAG.getBuildVector(VT, DL, Ops);
1818	}
1819
1820	// Extract_vec (Build_vector) generated by custom lowering
1821	// also needs to be customly combined
1822	case ISD::EXTRACT_VECTOR_ELT: {
1823	SDValue Arg = N->getOperand(Num: `0`);
1824	if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1825	if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`))) {
1826	unsigned Element = Const->getZExtValue();
1827	return Arg ->getOperand(Num: Element);
1828	}
1829	}
1830	if (Arg.getOpcode() == ISD::BITCAST &&
1831	Arg.getOperand(i: `0`).getOpcode() == ISD::BUILD_VECTOR &&
1832	(Arg.getOperand(i: `0`).getValueType().getVectorNumElements() ==
1833	Arg.getValueType().getVectorNumElements())) {
1834	if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`))) {
1835	unsigned Element = Const->getZExtValue();
1836	return DAG.getNode(Opcode: ISD::BITCAST, DL, VTList: N->getVTList(),
1837	N: Arg ->getOperand(Num: `0`).getOperand(i: Element));
1838	}
1839	}
1840	break;
1841	}
1842
1843	case ISD::SELECT_CC: {
1844	// Try common optimizations
1845	if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI))
1846	return Ret;
1847
1848	// fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1849	// selectcc x, y, a, b, inv(cc)
1850	//
1851	// fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1852	// selectcc x, y, a, b, cc
1853	SDValue LHS = N->getOperand(Num: `0`);
1854	if (LHS.getOpcode() != ISD::SELECT_CC) {
1855	return SDValue ();
1856	}
1857
1858	SDValue RHS = N->getOperand(Num: `1`);
1859	SDValue True = N->getOperand(Num: `2`);
1860	SDValue False = N->getOperand(Num: `3`);
1861	ISD::CondCode NCC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `4`))->get();
1862
1863	if (LHS.getOperand(i: `2`).getNode() != True.getNode() \|\|
1864	LHS.getOperand(i: `3`).getNode() != False.getNode() \|\|
1865	RHS.getNode() != False.getNode()) {
1866	return SDValue ();
1867	}
1868
1869	switch (NCC) {
1870	default: return SDValue ();
1871	case ISD::SETNE: return LHS;
1872	case ISD::SETEQ: {
1873	ISD::CondCode LHSCC = cast<CondCodeSDNode>(Val: LHS.getOperand(i: `4`))->get();
1874	LHSCC = ISD::getSetCCInverse(Operation: LHSCC, Type: LHS.getOperand(i: `0`).getValueType());
1875	if (DCI.isBeforeLegalizeOps() \|\|
1876	isCondCodeLegal(CC: LHSCC, VT: LHS.getOperand(i: `0`).getSimpleValueType()))
1877	return DAG.getSelectCC(DL,
1878	LHS: LHS.getOperand(i: `0`),
1879	RHS: LHS.getOperand(i: `1`),
1880	True: LHS.getOperand(i: `2`),
1881	False: LHS.getOperand(i: `3`),
1882	Cond: LHSCC);
1883	break;
1884	}
1885	}
1886	return SDValue ();
1887	}
1888
1889	case AMDGPUISD::R600_EXPORT: {
1890	SDValue Arg = N->getOperand(Num: `1`);
1891	if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1892	break;
1893
1894	SDValue NewArgs[`8`] = {
1895	N->getOperand(Num: `0`), // Chain
1896	SDValue (),
1897	N->getOperand(Num: `2`), // ArrayBase
1898	N->getOperand(Num: `3`), // Type
1899	N->getOperand(Num: `4`), // SWZ_X
1900	N->getOperand(Num: `5`), // SWZ_Y
1901	N->getOperand(Num: `6`), // SWZ_Z
1902	N->getOperand(Num: `7`) // SWZ_W
1903	};
1904	NewArgs[`1`] = OptimizeSwizzle(BuildVector: N->getOperand(Num: `1`), Swz: &NewArgs[`4`], DAG, DL);
1905	return DAG.getNode(Opcode: AMDGPUISD::R600_EXPORT, DL, VTList: N->getVTList(), Ops: NewArgs);
1906	}
1907	case AMDGPUISD::TEXTURE_FETCH: {
1908	SDValue Arg = N->getOperand(Num: `1`);
1909	if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1910	break;
1911
1912	SDValue NewArgs[`19`] = {
1913	N->getOperand(Num: `0`),
1914	N->getOperand(Num: `1`),
1915	N->getOperand(Num: `2`),
1916	N->getOperand(Num: `3`),
1917	N->getOperand(Num: `4`),
1918	N->getOperand(Num: `5`),
1919	N->getOperand(Num: `6`),
1920	N->getOperand(Num: `7`),
1921	N->getOperand(Num: `8`),
1922	N->getOperand(Num: `9`),
1923	N->getOperand(Num: `10`),
1924	N->getOperand(Num: `11`),
1925	N->getOperand(Num: `12`),
1926	N->getOperand(Num: `13`),
1927	N->getOperand(Num: `14`),
1928	N->getOperand(Num: `15`),
1929	N->getOperand(Num: `16`),
1930	N->getOperand(Num: `17`),
1931	N->getOperand(Num: `18`),
1932	};
1933	NewArgs[`1`] = OptimizeSwizzle(BuildVector: N->getOperand(Num: `1`), Swz: &NewArgs[`2`], DAG, DL);
1934	return DAG.getNode(Opcode: AMDGPUISD::TEXTURE_FETCH, DL, VTList: N->getVTList(), Ops: NewArgs);
1935	}
1936
1937	case ISD::LOAD: {
1938	LoadSDNode *LoadNode = cast<LoadSDNode>(Val: N);
1939	SDValue Ptr = LoadNode->getBasePtr();
1940	if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
1941	isa<ConstantSDNode>(Val: Ptr))
1942	return constBufferLoad(LoadNode, Block: AMDGPUAS::CONSTANT_BUFFER_0, DAG);
1943	break;
1944	}
1945
1946	default: break;
1947	}
1948
1949	return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1950	}
1951
1952	bool R600TargetLowering::FoldOperand(SDNode ParentNode, unsigned* SrcIdx,
1953	SDValue &Src, SDValue &Neg, SDValue &Abs,
1954	SDValue &Sel, SDValue &Imm,
1955	SelectionDAG &DAG) const {
1956	const R600InstrInfo *TII = Subtarget->getInstrInfo();
1957	if (!Src.isMachineOpcode())
1958	return false;
1959
1960	switch (Src.getMachineOpcode()) {
1961	case R600::FNEG_R600:
1962	if (!Neg.getNode())
1963	return false;
1964	Src = Src.getOperand(i: `0`);
1965	Neg = DAG.getTargetConstant(Val: `1`, DL: SDLoc (ParentNode), VT: MVT::i32);
1966	return true;
1967	case R600::FABS_R600:
1968	if (!Abs.getNode())
1969	return false;
1970	Src = Src.getOperand(i: `0`);
1971	Abs = DAG.getTargetConstant(Val: `1`, DL: SDLoc (ParentNode), VT: MVT::i32);
1972	return true;
1973	case R600::CONST_COPY: {
1974	unsigned Opcode = ParentNode->getMachineOpcode();
1975	bool HasDst = TII->getOperandIdx(Opcode, Op: R600::OpName::dst) > -`1`;
1976
1977	if (!Sel.getNode())
1978	return false;
1979
1980	SDValue CstOffset = Src.getOperand(i: `0`);
1981	if (ParentNode->getValueType(ResNo: `0`).isVector())
1982	return false;
1983
1984	// Gather constants values
1985	int SrcIndices[] = {
1986	TII->getOperandIdx(Opcode, Op: R600::OpName::src0),
1987	TII->getOperandIdx(Opcode, Op: R600::OpName::src1),
1988	TII->getOperandIdx(Opcode, Op: R600::OpName::src2),
1989	TII->getOperandIdx(Opcode, Op: R600::OpName::src0_X),
1990	TII->getOperandIdx(Opcode, Op: R600::OpName::src0_Y),
1991	TII->getOperandIdx(Opcode, Op: R600::OpName::src0_Z),
1992	TII->getOperandIdx(Opcode, Op: R600::OpName::src0_W),
1993	TII->getOperandIdx(Opcode, Op: R600::OpName::src1_X),
1994	TII->getOperandIdx(Opcode, Op: R600::OpName::src1_Y),
1995	TII->getOperandIdx(Opcode, Op: R600::OpName::src1_Z),
1996	TII->getOperandIdx(Opcode, Op: R600::OpName::src1_W)
1997	};
1998	std::vector<unsigned> Consts;
1999	for (int OtherSrcIdx : SrcIndices) {
2000	int OtherSelIdx = TII->getSelIdx(Opcode, SrcIdx: OtherSrcIdx);
2001	if (OtherSrcIdx < `0` \|\| OtherSelIdx < `0`)
2002	continue;
2003	if (HasDst) {
2004	OtherSrcIdx--;
2005	OtherSelIdx--;
2006	}
2007	if (RegisterSDNode *Reg =
2008	dyn_cast<RegisterSDNode>(Val: ParentNode->getOperand(Num: OtherSrcIdx))) {
2009	if (Reg->getReg() == R600::ALU_CONST) {
2010	Consts.push_back(x: ParentNode->getConstantOperandVal(Num: OtherSelIdx));
2011	}
2012	}
2013	}
2014
2015	ConstantSDNode *Cst = cast<ConstantSDNode>(Val&: CstOffset);
2016	Consts.push_back(x: Cst->getZExtValue());
2017	if (!TII->fitsConstReadLimitations(Consts)) {
2018	return false;
2019	}
2020
2021	Sel = CstOffset;
2022	Src = DAG.getRegister(Reg: R600::ALU_CONST, VT: MVT::f32);
2023	return true;
2024	}
2025	case R600::MOV_IMM_GLOBAL_ADDR:
2026	// Check if the Imm slot is used. Taken from below.
2027	if (Imm ->getAsZExtVal())
2028	return false;
2029	Imm = Src.getOperand(i: `0`);
2030	Src = DAG.getRegister(Reg: R600::ALU_LITERAL_X, VT: MVT::i32);
2031	return true;
2032	case R600::MOV_IMM_I32:
2033	case R600::MOV_IMM_F32: {
2034	unsigned ImmReg = R600::ALU_LITERAL_X;
2035	uint64_t ImmValue = `0`;
2036
2037	if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
2038	ConstantFPSDNode *FPC = cast<ConstantFPSDNode>(Val: Src.getOperand(i: `0`));
2039	float FloatValue = FPC->getValueAPF().convertToFloat();
2040	if (FloatValue == `0.0`) {
2041	ImmReg = R600::ZERO;
2042	} else if (FloatValue == `0.5`) {
2043	ImmReg = R600::HALF;
2044	} else if (FloatValue == `1.0`) {
2045	ImmReg = R600::ONE;
2046	} else {
2047	ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2048	}
2049	} else {
2050	uint64_t Value = Src.getConstantOperandVal(i: `0`);
2051	if (Value == `0`) {
2052	ImmReg = R600::ZERO;
2053	} else if (Value == `1`) {
2054	ImmReg = R600::ONE_INT;
2055	} else {
2056	ImmValue = Value;
2057	}
2058	}
2059
2060	// Check that we aren't already using an immediate.
2061	// XXX: It's possible for an instruction to have more than one
2062	// immediate operand, but this is not supported yet.
2063	if (ImmReg == R600::ALU_LITERAL_X) {
2064	if (!Imm.getNode())
2065	return false;
2066	ConstantSDNode *C = cast<ConstantSDNode>(Val&: Imm);
2067	if (C->getZExtValue())
2068	return false;
2069	Imm = DAG.getTargetConstant(Val: ImmValue, DL: SDLoc (ParentNode), VT: MVT::i32);
2070	}
2071	Src = DAG.getRegister(Reg: ImmReg, VT: MVT::i32);
2072	return true;
2073	}
2074	default:
2075	return false;
2076	}
2077	}
2078
2079	/// Fold the instructions after selecting them
2080	SDNode R600TargetLowering::PostISelFolding(MachineSDNode Node,
2081	SelectionDAG &DAG) const {
2082	const R600InstrInfo *TII = Subtarget->getInstrInfo();
2083	if (!Node->isMachineOpcode())
2084	return Node;
2085
2086	unsigned Opcode = Node->getMachineOpcode();
2087	SDValue FakeOp;
2088
2089	std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2090
2091	if (Opcode == R600::DOT_4) {
2092	int OperandIdx[] = {
2093	TII->getOperandIdx(Opcode, Op: R600::OpName::src0_X),
2094	TII->getOperandIdx(Opcode, Op: R600::OpName::src0_Y),
2095	TII->getOperandIdx(Opcode, Op: R600::OpName::src0_Z),
2096	TII->getOperandIdx(Opcode, Op: R600::OpName::src0_W),
2097	TII->getOperandIdx(Opcode, Op: R600::OpName::src1_X),
2098	TII->getOperandIdx(Opcode, Op: R600::OpName::src1_Y),
2099	TII->getOperandIdx(Opcode, Op: R600::OpName::src1_Z),
2100	TII->getOperandIdx(Opcode, Op: R600::OpName::src1_W)
2101	};
2102	int NegIdx[] = {
2103	TII->getOperandIdx(Opcode, Op: R600::OpName::src0_neg_X),
2104	TII->getOperandIdx(Opcode, Op: R600::OpName::src0_neg_Y),
2105	TII->getOperandIdx(Opcode, Op: R600::OpName::src0_neg_Z),
2106	TII->getOperandIdx(Opcode, Op: R600::OpName::src0_neg_W),
2107	TII->getOperandIdx(Opcode, Op: R600::OpName::src1_neg_X),
2108	TII->getOperandIdx(Opcode, Op: R600::OpName::src1_neg_Y),
2109	TII->getOperandIdx(Opcode, Op: R600::OpName::src1_neg_Z),
2110	TII->getOperandIdx(Opcode, Op: R600::OpName::src1_neg_W)
2111	};
2112	int AbsIdx[] = {
2113	TII->getOperandIdx(Opcode, Op: R600::OpName::src0_abs_X),
2114	TII->getOperandIdx(Opcode, Op: R600::OpName::src0_abs_Y),
2115	TII->getOperandIdx(Opcode, Op: R600::OpName::src0_abs_Z),
2116	TII->getOperandIdx(Opcode, Op: R600::OpName::src0_abs_W),
2117	TII->getOperandIdx(Opcode, Op: R600::OpName::src1_abs_X),
2118	TII->getOperandIdx(Opcode, Op: R600::OpName::src1_abs_Y),
2119	TII->getOperandIdx(Opcode, Op: R600::OpName::src1_abs_Z),
2120	TII->getOperandIdx(Opcode, Op: R600::OpName::src1_abs_W)
2121	};
2122	for (unsigned i = `0`; i < `8`; i++) {
2123	if (OperandIdx[i] < `0`)
2124	return Node;
2125	SDValue &Src = Ops [OperandIdx[i] - `1`];
2126	SDValue &Neg = Ops [NegIdx[i] - `1`];
2127	SDValue &Abs = Ops [AbsIdx[i] - `1`];
2128	bool HasDst = TII->getOperandIdx(Opcode, Op: R600::OpName::dst) > -`1`;
2129	int SelIdx = TII->getSelIdx(Opcode, SrcIdx: OperandIdx[i]);
2130	if (HasDst)
2131	SelIdx--;
2132	SDValue &Sel = (SelIdx > -`1`) ? Ops [SelIdx] : FakeOp;
2133	if (FoldOperand(ParentNode: Node, SrcIdx: i, Src, Neg, Abs, Sel, Imm&: FakeOp, DAG))
2134	return DAG.getMachineNode(Opcode, dl: SDLoc (Node), VTs: Node->getVTList(), Ops);
2135	}
2136	} else if (Opcode == R600::REG_SEQUENCE) {
2137	for (unsigned i = `1`, e = Node->getNumOperands(); i < e; i += `2`) {
2138	SDValue &Src = Ops [i];
2139	if (FoldOperand(ParentNode: Node, SrcIdx: i, Src, Neg&: FakeOp, Abs&: FakeOp, Sel&: FakeOp, Imm&: FakeOp, DAG))
2140	return DAG.getMachineNode(Opcode, dl: SDLoc (Node), VTs: Node->getVTList(), Ops);
2141	}
2142	} else {
2143	if (!TII->hasInstrModifiers(Opcode))
2144	return Node;
2145	int OperandIdx[] = {
2146	TII->getOperandIdx(Opcode, Op: R600::OpName::src0),
2147	TII->getOperandIdx(Opcode, Op: R600::OpName::src1),
2148	TII->getOperandIdx(Opcode, Op: R600::OpName::src2)
2149	};
2150	int NegIdx[] = {
2151	TII->getOperandIdx(Opcode, Op: R600::OpName::src0_neg),
2152	TII->getOperandIdx(Opcode, Op: R600::OpName::src1_neg),
2153	TII->getOperandIdx(Opcode, Op: R600::OpName::src2_neg)
2154	};
2155	int AbsIdx[] = {
2156	TII->getOperandIdx(Opcode, Op: R600::OpName::src0_abs),
2157	TII->getOperandIdx(Opcode, Op: R600::OpName::src1_abs),
2158	-`1`
2159	};
2160	for (unsigned i = `0`; i < `3`; i++) {
2161	if (OperandIdx[i] < `0`)
2162	return Node;
2163	SDValue &Src = Ops [OperandIdx[i] - `1`];
2164	SDValue &Neg = Ops [NegIdx[i] - `1`];
2165	SDValue FakeAbs;
2166	SDValue &Abs = (AbsIdx[i] > -`1`) ? Ops [AbsIdx[i] - `1`] : FakeAbs;
2167	bool HasDst = TII->getOperandIdx(Opcode, Op: R600::OpName::dst) > -`1`;
2168	int SelIdx = TII->getSelIdx(Opcode, SrcIdx: OperandIdx[i]);
2169	int ImmIdx = TII->getOperandIdx(Opcode, Op: R600::OpName::literal);
2170	if (HasDst) {
2171	SelIdx--;
2172	ImmIdx--;
2173	}
2174	SDValue &Sel = (SelIdx > -`1`) ? Ops [SelIdx] : FakeOp;
2175	SDValue &Imm = Ops [ImmIdx];
2176	if (FoldOperand(ParentNode: Node, SrcIdx: i, Src, Neg, Abs, Sel, Imm, DAG))
2177	return DAG.getMachineNode(Opcode, dl: SDLoc (Node), VTs: Node->getVTList(), Ops);
2178	}
2179	}
2180
2181	return Node;
2182	}
2183
2184	TargetLowering::AtomicExpansionKind
2185	R600TargetLowering::shouldExpandAtomicRMWInIR(const AtomicRMWInst RMW) const* {
2186	switch (RMW->getOperation()) {
2187	case AtomicRMWInst::Nand:
2188	case AtomicRMWInst::FAdd:
2189	case AtomicRMWInst::FSub:
2190	case AtomicRMWInst::FMax:
2191	case AtomicRMWInst::FMin:
2192	case AtomicRMWInst::USubCond:
2193	case AtomicRMWInst::USubSat:
2194	return AtomicExpansionKind::CmpXChg;
2195	case AtomicRMWInst::UIncWrap:
2196	case AtomicRMWInst::UDecWrap:
2197	// FIXME: Cayman at least appears to have instructions for this, but the
2198	// instruction definitions appear to be missing.
2199	return AtomicExpansionKind::CmpXChg;
2200	case AtomicRMWInst::Xchg: {
2201	const DataLayout &DL = RMW->getFunction()->getDataLayout();
2202	unsigned ValSize = DL.getTypeSizeInBits(Ty: RMW->getType());
2203	if (ValSize == `32` \|\| ValSize == `64`)
2204	return AtomicExpansionKind::None;
2205	return AtomicExpansionKind::CmpXChg;
2206	}
2207	default:
2208	if (auto *IntTy = dyn_cast<IntegerType>(Val: RMW->getType())) {
2209	unsigned Size = IntTy->getBitWidth();
2210	if (Size == `32` \|\| Size == `64`)
2211	return AtomicExpansionKind::None;
2212	}
2213
2214	return AtomicExpansionKind::CmpXChg;
2215	}
2216
2217	llvm_unreachable("covered atomicrmw op switch");
2218	}
2219

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp