PPCISelLowering.cpp source code [llvm_projects/llvm/lib/Target/PowerPC/PPCISelLowering.cpp]

1	//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the PPCISelLowering class.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "PPCISelLowering.h"
14	#include "MCTargetDesc/PPCMCTargetDesc.h"
15	#include "MCTargetDesc/PPCPredicates.h"
16	#include "PPC.h"
17	#include "PPCCCState.h"
18	#include "PPCCallingConv.h"
19	#include "PPCFrameLowering.h"
20	#include "PPCInstrInfo.h"
21	#include "PPCMachineFunctionInfo.h"
22	#include "PPCPerfectShuffle.h"
23	#include "PPCRegisterInfo.h"
24	#include "PPCSubtarget.h"
25	#include "PPCTargetMachine.h"
26	#include "llvm/ADT/APFloat.h"
27	#include "llvm/ADT/APInt.h"
28	#include "llvm/ADT/APSInt.h"
29	#include "llvm/ADT/ArrayRef.h"
30	#include "llvm/ADT/DenseMap.h"
31	#include "llvm/ADT/STLExtras.h"
32	#include "llvm/ADT/SmallPtrSet.h"
33	#include "llvm/ADT/SmallSet.h"
34	#include "llvm/ADT/SmallVector.h"
35	#include "llvm/ADT/Statistic.h"
36	#include "llvm/ADT/StringRef.h"
37	#include "llvm/CodeGen/CallingConvLower.h"
38	#include "llvm/CodeGen/ISDOpcodes.h"
39	#include "llvm/CodeGen/LivePhysRegs.h"
40	#include "llvm/CodeGen/MachineBasicBlock.h"
41	#include "llvm/CodeGen/MachineFrameInfo.h"
42	#include "llvm/CodeGen/MachineFunction.h"
43	#include "llvm/CodeGen/MachineInstr.h"
44	#include "llvm/CodeGen/MachineInstrBuilder.h"
45	#include "llvm/CodeGen/MachineJumpTableInfo.h"
46	#include "llvm/CodeGen/MachineLoopInfo.h"
47	#include "llvm/CodeGen/MachineMemOperand.h"
48	#include "llvm/CodeGen/MachineModuleInfo.h"
49	#include "llvm/CodeGen/MachineOperand.h"
50	#include "llvm/CodeGen/MachineRegisterInfo.h"
51	#include "llvm/CodeGen/SelectionDAG.h"
52	#include "llvm/CodeGen/SelectionDAGNodes.h"
53	#include "llvm/CodeGen/TargetInstrInfo.h"
54	#include "llvm/CodeGen/TargetLowering.h"
55	#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
56	#include "llvm/CodeGen/TargetRegisterInfo.h"
57	#include "llvm/CodeGen/ValueTypes.h"
58	#include "llvm/CodeGenTypes/MachineValueType.h"
59	#include "llvm/IR/CallingConv.h"
60	#include "llvm/IR/Constant.h"
61	#include "llvm/IR/Constants.h"
62	#include "llvm/IR/DataLayout.h"
63	#include "llvm/IR/DebugLoc.h"
64	#include "llvm/IR/DerivedTypes.h"
65	#include "llvm/IR/Function.h"
66	#include "llvm/IR/GlobalValue.h"
67	#include "llvm/IR/IRBuilder.h"
68	#include "llvm/IR/Instructions.h"
69	#include "llvm/IR/Intrinsics.h"
70	#include "llvm/IR/IntrinsicsPowerPC.h"
71	#include "llvm/IR/Module.h"
72	#include "llvm/IR/Type.h"
73	#include "llvm/IR/Use.h"
74	#include "llvm/IR/Value.h"
75	#include "llvm/MC/MCContext.h"
76	#include "llvm/MC/MCExpr.h"
77	#include "llvm/MC/MCSectionXCOFF.h"
78	#include "llvm/MC/MCSymbolXCOFF.h"
79	#include "llvm/Support/AtomicOrdering.h"
80	#include "llvm/Support/BranchProbability.h"
81	#include "llvm/Support/Casting.h"
82	#include "llvm/Support/CodeGen.h"
83	#include "llvm/Support/CommandLine.h"
84	#include "llvm/Support/Compiler.h"
85	#include "llvm/Support/Debug.h"
86	#include "llvm/Support/ErrorHandling.h"
87	#include "llvm/Support/Format.h"
88	#include "llvm/Support/KnownBits.h"
89	#include "llvm/Support/MathExtras.h"
90	#include "llvm/Support/raw_ostream.h"
91	#include "llvm/Target/TargetMachine.h"
92	#include "llvm/Target/TargetOptions.h"
93	#include <algorithm>
94	#include <cassert>
95	#include <cstdint>
96	#include <iterator>
97	#include <list>
98	#include <optional>
99	#include <utility>
100	#include <vector>
101
102	using namespace llvm;
103
104	#define DEBUG_TYPE "ppc-lowering"
105
106	static cl::opt<bool> DisableP10StoreForward(
107	"disable-p10-store-forward",
108	cl::desc ("disable P10 store forward-friendly conversion"), cl::Hidden,
109	cl::init(Val: false));
110
111	static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
112	cl::desc ("disable preincrement load/store generation on PPC"), cl::Hidden);
113
114	static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
115	cl::desc ("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
116
117	static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
118	cl::desc ("disable unaligned load/store generation on PPC"), cl::Hidden);
119
120	static cl::opt<bool> DisableSCO("disable-ppc-sco",
121	cl::desc ("disable sibling call optimization on ppc"), cl::Hidden);
122
123	static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
124	cl::desc ("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
125
126	static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
127	cl::desc ("use absolute jump tables on ppc"), cl::Hidden);
128
129	static cl::opt<bool>
130	DisablePerfectShuffle("ppc-disable-perfect-shuffle",
131	cl::desc ("disable vector permute decomposition"),
132	cl::init(Val: true), cl::Hidden);
133
134	cl::opt<bool> DisableAutoPairedVecSt(
135	"disable-auto-paired-vec-st",
136	cl::desc ("disable automatically generated 32byte paired vector stores"),
137	cl::init(Val: true), cl::Hidden);
138
139	static cl::opt<unsigned> PPCMinimumJumpTableEntries(
140	"ppc-min-jump-table-entries", cl::init(Val: `64`), cl::Hidden,
141	cl::desc ("Set minimum number of entries to use a jump table on PPC"));
142
143	static cl::opt<unsigned> PPCGatherAllAliasesMaxDepth(
144	"ppc-gather-alias-max-depth", cl::init(Val: `18`), cl::Hidden,
145	cl::desc ("max depth when checking alias info in GatherAllAliases()"));
146
147	static cl::opt<unsigned> PPCAIXTLSModelOptUseIEForLDLimit(
148	"ppc-aix-shared-lib-tls-model-opt-limit", cl::init(Val: `1`), cl::Hidden,
149	cl::desc ("Set inclusive limit count of TLS local-dynamic access(es) in a "
150	"function to use initial-exec"));
151
152	STATISTIC(NumTailCalls, "Number of tail calls");
153	STATISTIC(NumSiblingCalls, "Number of sibling calls");
154	STATISTIC(ShufflesHandledWithVPERM,
155	"Number of shuffles lowered to a VPERM or XXPERM");
156	STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
157
158	static bool isNByteElemShuffleMask(ShuffleVectorSDNode , unsigned, int*);
159
160	static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
161
162	static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
163
164	// A faster local-[exec\|dynamic] TLS access sequence (enabled with the
165	// -maix-small-local-[exec\|dynamic]-tls option) can be produced for TLS
166	// variables; consistent with the IBM XL compiler, we apply a max size of
167	// slightly under 32KB.
168	constexpr uint64_t AIXSmallTlsPolicySizeLimit = `32751`;
169
170	// FIXME: Remove this once the bug has been fixed!
171	extern cl::opt<bool> ANDIGlueBug;
172
173	PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
174	const PPCSubtarget &STI)
175	: TargetLowering (TM), Subtarget(STI) {
176	// Initialize map that relates the PPC addressing modes to the computed flags
177	// of a load/store instruction. The map is used to determine the optimal
178	// addressing mode when selecting load and stores.
179	initializeAddrModeMap();
180	// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
181	// arguments are at least 4/8 bytes aligned.
182	bool isPPC64 = Subtarget.isPPC64();
183	setMinStackArgumentAlignment(isPPC64 ? Align (`8`) : Align (`4`));
184	const MVT RegVT = Subtarget.getScalarIntVT();
185
186	// Set up the register classes.
187	addRegisterClass(VT: MVT::i32, RC: &PPC::GPRCRegClass);
188	if (!useSoftFloat()) {
189	if (hasSPE()) {
190	addRegisterClass(VT: MVT::f32, RC: &PPC::GPRCRegClass);
191	// EFPU2 APU only supports f32
192	if (!Subtarget.hasEFPU2())
193	addRegisterClass(VT: MVT::f64, RC: &PPC::SPERCRegClass);
194	} else {
195	addRegisterClass(VT: MVT::f32, RC: &PPC::F4RCRegClass);
196	addRegisterClass(VT: MVT::f64, RC: &PPC::F8RCRegClass);
197	}
198	}
199
200	setOperationAction(Op: ISD::UADDO, VT: RegVT, Action: Custom);
201	setOperationAction(Op: ISD::USUBO, VT: RegVT, Action: Custom);
202
203	// PowerPC uses addo_carry,subo_carry to propagate carry.
204	setOperationAction(Op: ISD::UADDO_CARRY, VT: RegVT, Action: Custom);
205	setOperationAction(Op: ISD::USUBO_CARRY, VT: RegVT, Action: Custom);
206
207	// On P10, the default lowering generates better code using the
208	// setbc instruction.
209	if (!Subtarget.hasP10Vector()) {
210	setOperationAction(Op: ISD::SSUBO, VT: MVT::i32, Action: Custom);
211	if (isPPC64)
212	setOperationAction(Op: ISD::SSUBO, VT: MVT::i64, Action: Custom);
213	}
214
215	// Match BITREVERSE to customized fast code sequence in the td file.
216	setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i32, Action: Legal);
217	setOperationAction(Op: ISD::BITREVERSE, VT: MVT::i64, Action: Legal);
218
219	// Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
220	setOperationAction(Op: ISD::ATOMIC_CMP_SWAP, VT: MVT::i32, Action: Custom);
221
222	// Custom lower inline assembly to check for special registers.
223	setOperationAction(Op: ISD::INLINEASM, VT: MVT::Other, Action: Custom);
224	setOperationAction(Op: ISD::INLINEASM_BR, VT: MVT::Other, Action: Custom);
225
226	// PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
227	for (MVT VT : MVT::integer_valuetypes()) {
228	setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
229	setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i8, Action: Expand);
230	}
231
232	setTruncStoreAction(ValVT: MVT::f128, MemVT: MVT::f16, Action: Expand);
233	setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f128, Action: Expand);
234
235	if (Subtarget.isISA3_0()) {
236	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f128, MemVT: MVT::f16, Action: Legal);
237	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Legal);
238	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Legal);
239	setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Legal);
240	setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Legal);
241	} else {
242	// No extending loads from f16 or HW conversions back and forth.
243	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f128, MemVT: MVT::f16, Action: Expand);
244	setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f128, Action: Expand);
245	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
246	setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f64, Action: Expand);
247	setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f64, Action: Expand);
248	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
249	setOperationAction(Op: ISD::FP16_TO_FP, VT: MVT::f32, Action: Expand);
250	setOperationAction(Op: ISD::FP_TO_FP16, VT: MVT::f32, Action: Expand);
251	setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f16, Action: Expand);
252	setTruncStoreAction(ValVT: MVT::f32, MemVT: MVT::f16, Action: Expand);
253	}
254
255	setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
256
257	// PowerPC has pre-inc load and store's.
258	setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i1, Action: Legal);
259	setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i8, Action: Legal);
260	setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i16, Action: Legal);
261	setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i32, Action: Legal);
262	setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::i64, Action: Legal);
263	setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i1, Action: Legal);
264	setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i8, Action: Legal);
265	setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i16, Action: Legal);
266	setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i32, Action: Legal);
267	setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::i64, Action: Legal);
268	if (!Subtarget.hasSPE()) {
269	setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::f32, Action: Legal);
270	setIndexedLoadAction(IdxModes: ISD::PRE_INC, VT: MVT::f64, Action: Legal);
271	setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::f32, Action: Legal);
272	setIndexedStoreAction(IdxModes: ISD::PRE_INC, VT: MVT::f64, Action: Legal);
273	}
274
275	if (Subtarget.useCRBits()) {
276	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
277
278	if (isPPC64 \|\| Subtarget.hasFPCVT()) {
279	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i1, Action: Promote);
280	AddPromotedToType(Opc: ISD::STRICT_SINT_TO_FP, OrigVT: MVT::i1, DestVT: RegVT);
281	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i1, Action: Promote);
282	AddPromotedToType(Opc: ISD::STRICT_UINT_TO_FP, OrigVT: MVT::i1, DestVT: RegVT);
283
284	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i1, Action: Promote);
285	AddPromotedToType(Opc: ISD::SINT_TO_FP, OrigVT: MVT::i1, DestVT: RegVT);
286	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i1, Action: Promote);
287	AddPromotedToType(Opc: ISD::UINT_TO_FP, OrigVT: MVT::i1, DestVT: RegVT);
288
289	setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i1, Action: Promote);
290	AddPromotedToType(Opc: ISD::STRICT_FP_TO_SINT, OrigVT: MVT::i1, DestVT: RegVT);
291	setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i1, Action: Promote);
292	AddPromotedToType(Opc: ISD::STRICT_FP_TO_UINT, OrigVT: MVT::i1, DestVT: RegVT);
293
294	setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i1, Action: Promote);
295	AddPromotedToType(Opc: ISD::FP_TO_SINT, OrigVT: MVT::i1, DestVT: RegVT);
296	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i1, Action: Promote);
297	AddPromotedToType(Opc: ISD::FP_TO_UINT, OrigVT: MVT::i1, DestVT: RegVT);
298	} else {
299	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i1, Action: Custom);
300	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i1, Action: Custom);
301	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i1, Action: Custom);
302	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i1, Action: Custom);
303	}
304
305	// PowerPC does not support direct load/store of condition registers.
306	setOperationAction(Op: ISD::LOAD, VT: MVT::i1, Action: Custom);
307	setOperationAction(Op: ISD::STORE, VT: MVT::i1, Action: Custom);
308
309	// FIXME: Remove this once the ANDI glue bug is fixed:
310	if (ANDIGlueBug)
311	setOperationAction(Op: ISD::TRUNCATE, VT: MVT::i1, Action: Custom);
312
313	for (MVT VT : MVT::integer_valuetypes()) {
314	setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
315	setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
316	setTruncStoreAction(ValVT: VT, MemVT: MVT::i1, Action: Expand);
317	}
318
319	addRegisterClass(VT: MVT::i1, RC: &PPC::CRBITRCRegClass);
320	}
321
322	// Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
323	// PPC (the libcall is not available).
324	setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::ppcf128, Action: Custom);
325	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::ppcf128, Action: Custom);
326	setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::ppcf128, Action: Custom);
327	setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::ppcf128, Action: Custom);
328
329	// We do not currently implement these libm ops for PowerPC.
330	setOperationAction(Op: ISD::FFLOOR, VT: MVT::ppcf128, Action: Expand);
331	setOperationAction(Op: ISD::FCEIL, VT: MVT::ppcf128, Action: Expand);
332	setOperationAction(Op: ISD::FTRUNC, VT: MVT::ppcf128, Action: Expand);
333	setOperationAction(Op: ISD::FRINT, VT: MVT::ppcf128, Action: Expand);
334	setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::ppcf128, Action: Expand);
335	setOperationAction(Op: ISD::FREM, VT: MVT::ppcf128, Action: Expand);
336
337	// PowerPC has no SREM/UREM instructions unless we are on P9
338	// On P9 we may use a hardware instruction to compute the remainder.
339	// When the result of both the remainder and the division is required it is
340	// more efficient to compute the remainder from the result of the division
341	// rather than use the remainder instruction. The instructions are legalized
342	// directly because the DivRemPairsPass performs the transformation at the IR
343	// level.
344	if (Subtarget.isISA3_0()) {
345	setOperationAction(Op: ISD::SREM, VT: MVT::i32, Action: Legal);
346	setOperationAction(Op: ISD::UREM, VT: MVT::i32, Action: Legal);
347	setOperationAction(Op: ISD::SREM, VT: MVT::i64, Action: Legal);
348	setOperationAction(Op: ISD::UREM, VT: MVT::i64, Action: Legal);
349	} else {
350	setOperationAction(Op: ISD::SREM, VT: MVT::i32, Action: Expand);
351	setOperationAction(Op: ISD::UREM, VT: MVT::i32, Action: Expand);
352	setOperationAction(Op: ISD::SREM, VT: MVT::i64, Action: Expand);
353	setOperationAction(Op: ISD::UREM, VT: MVT::i64, Action: Expand);
354	}
355
356	// Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
357	setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i32, Action: Expand);
358	setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i32, Action: Expand);
359	setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i64, Action: Expand);
360	setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i64, Action: Expand);
361	setOperationAction(Op: ISD::UDIVREM, VT: MVT::i32, Action: Expand);
362	setOperationAction(Op: ISD::SDIVREM, VT: MVT::i32, Action: Expand);
363	setOperationAction(Op: ISD::UDIVREM, VT: MVT::i64, Action: Expand);
364	setOperationAction(Op: ISD::SDIVREM, VT: MVT::i64, Action: Expand);
365
366	// Handle constrained floating-point operations of scalar.
367	// TODO: Handle SPE specific operation.
368	setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::f32, Action: Legal);
369	setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::f32, Action: Legal);
370	setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::f32, Action: Legal);
371	setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::f32, Action: Legal);
372	setOperationAction(Op: ISD::STRICT_FP_ROUND, VT: MVT::f32, Action: Legal);
373
374	setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::f64, Action: Legal);
375	setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::f64, Action: Legal);
376	setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::f64, Action: Legal);
377	setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::f64, Action: Legal);
378
379	if (!Subtarget.hasSPE()) {
380	setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::f32, Action: Legal);
381	setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::f64, Action: Legal);
382	}
383
384	if (Subtarget.hasVSX()) {
385	setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::f32, Action: Legal);
386	setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::f64, Action: Legal);
387	}
388
389	if (Subtarget.hasFSQRT()) {
390	setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::f32, Action: Legal);
391	setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::f64, Action: Legal);
392	}
393
394	if (Subtarget.hasFPRND()) {
395	setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::f32, Action: Legal);
396	setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::f32, Action: Legal);
397	setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::f32, Action: Legal);
398	setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::f32, Action: Legal);
399
400	setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::f64, Action: Legal);
401	setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::f64, Action: Legal);
402	setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::f64, Action: Legal);
403	setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::f64, Action: Legal);
404	}
405
406	// We don't support sin/cos/sqrt/fmod/pow
407	setOperationAction(Op: ISD::FSIN , VT: MVT::f64, Action: Expand);
408	setOperationAction(Op: ISD::FCOS , VT: MVT::f64, Action: Expand);
409	setOperationAction(Op: ISD::FSINCOS, VT: MVT::f64, Action: Expand);
410	setOperationAction(Op: ISD::FREM , VT: MVT::f64, Action: Expand);
411	setOperationAction(Op: ISD::FPOW , VT: MVT::f64, Action: Expand);
412	setOperationAction(Op: ISD::FSIN , VT: MVT::f32, Action: Expand);
413	setOperationAction(Op: ISD::FCOS , VT: MVT::f32, Action: Expand);
414	setOperationAction(Op: ISD::FSINCOS, VT: MVT::f32, Action: Expand);
415	setOperationAction(Op: ISD::FREM , VT: MVT::f32, Action: Expand);
416	setOperationAction(Op: ISD::FPOW , VT: MVT::f32, Action: Expand);
417
418	// MASS transformation for LLVM intrinsics with replicating fast-math flag
419	// to be consistent to PPCGenScalarMASSEntries pass
420	if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
421	setOperationAction(Op: ISD::FSIN , VT: MVT::f64, Action: Custom);
422	setOperationAction(Op: ISD::FCOS , VT: MVT::f64, Action: Custom);
423	setOperationAction(Op: ISD::FPOW , VT: MVT::f64, Action: Custom);
424	setOperationAction(Op: ISD::FLOG, VT: MVT::f64, Action: Custom);
425	setOperationAction(Op: ISD::FLOG10, VT: MVT::f64, Action: Custom);
426	setOperationAction(Op: ISD::FEXP, VT: MVT::f64, Action: Custom);
427	setOperationAction(Op: ISD::FSIN , VT: MVT::f32, Action: Custom);
428	setOperationAction(Op: ISD::FCOS , VT: MVT::f32, Action: Custom);
429	setOperationAction(Op: ISD::FPOW , VT: MVT::f32, Action: Custom);
430	setOperationAction(Op: ISD::FLOG, VT: MVT::f32, Action: Custom);
431	setOperationAction(Op: ISD::FLOG10, VT: MVT::f32, Action: Custom);
432	setOperationAction(Op: ISD::FEXP, VT: MVT::f32, Action: Custom);
433	}
434
435	if (Subtarget.hasSPE()) {
436	setOperationAction(Op: ISD::FMA , VT: MVT::f64, Action: Expand);
437	setOperationAction(Op: ISD::FMA , VT: MVT::f32, Action: Expand);
438	} else {
439	setOperationAction(Op: ISD::FMA , VT: MVT::f64, Action: Legal);
440	setOperationAction(Op: ISD::FMA , VT: MVT::f32, Action: Legal);
441	setOperationAction(Op: ISD::GET_ROUNDING, VT: MVT::i32, Action: Custom);
442	setOperationAction(Op: ISD::SET_ROUNDING, VT: MVT::Other, Action: Custom);
443	}
444
445	if (Subtarget.hasSPE())
446	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
447
448	// If we're enabling GP optimizations, use hardware square root
449	if (!Subtarget.hasFSQRT() &&
450	!(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
451	Subtarget.hasFRE()))
452	setOperationAction(Op: ISD::FSQRT, VT: MVT::f64, Action: Expand);
453
454	if (!Subtarget.hasFSQRT() &&
455	!(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
456	Subtarget.hasFRES()))
457	setOperationAction(Op: ISD::FSQRT, VT: MVT::f32, Action: Expand);
458
459	if (Subtarget.hasFCPSGN()) {
460	setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f64, Action: Legal);
461	setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f32, Action: Legal);
462	} else {
463	setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f64, Action: Expand);
464	setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f32, Action: Expand);
465	}
466
467	if (Subtarget.hasFPRND()) {
468	setOperationAction(Op: ISD::FFLOOR, VT: MVT::f64, Action: Legal);
469	setOperationAction(Op: ISD::FCEIL, VT: MVT::f64, Action: Legal);
470	setOperationAction(Op: ISD::FTRUNC, VT: MVT::f64, Action: Legal);
471	setOperationAction(Op: ISD::FROUND, VT: MVT::f64, Action: Legal);
472
473	setOperationAction(Op: ISD::FFLOOR, VT: MVT::f32, Action: Legal);
474	setOperationAction(Op: ISD::FCEIL, VT: MVT::f32, Action: Legal);
475	setOperationAction(Op: ISD::FTRUNC, VT: MVT::f32, Action: Legal);
476	setOperationAction(Op: ISD::FROUND, VT: MVT::f32, Action: Legal);
477	}
478
479	// Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
480	// instruction xxbrd to speed up scalar BSWAP64.
481	if (Subtarget.isISA3_1()) {
482	setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Legal);
483	setOperationAction(Op: ISD::BSWAP, VT: MVT::i64, Action: Legal);
484	} else {
485	setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Expand);
486	setOperationAction(Op: ISD::BSWAP, VT: MVT::i64,
487	Action: (Subtarget.hasP9Vector() && isPPC64) ? Custom : Expand);
488	}
489
490	// CTPOP or CTTZ were introduced in P8/P9 respectively
491	if (Subtarget.isISA3_0()) {
492	setOperationAction(Op: ISD::CTTZ , VT: MVT::i32 , Action: Legal);
493	setOperationAction(Op: ISD::CTTZ , VT: MVT::i64 , Action: Legal);
494	} else {
495	setOperationAction(Op: ISD::CTTZ , VT: MVT::i32 , Action: Expand);
496	setOperationAction(Op: ISD::CTTZ , VT: MVT::i64 , Action: Expand);
497	}
498
499	if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
500	setOperationAction(Op: ISD::CTPOP, VT: MVT::i32 , Action: Legal);
501	setOperationAction(Op: ISD::CTPOP, VT: MVT::i64 , Action: Legal);
502	} else {
503	setOperationAction(Op: ISD::CTPOP, VT: MVT::i32 , Action: Expand);
504	setOperationAction(Op: ISD::CTPOP, VT: MVT::i64 , Action: Expand);
505	}
506
507	// PowerPC does not have ROTR
508	setOperationAction(Op: ISD::ROTR, VT: MVT::i32 , Action: Expand);
509	setOperationAction(Op: ISD::ROTR, VT: MVT::i64 , Action: Expand);
510
511	if (!Subtarget.useCRBits()) {
512	// PowerPC does not have Select
513	setOperationAction(Op: ISD::SELECT, VT: MVT::i32, Action: Expand);
514	setOperationAction(Op: ISD::SELECT, VT: MVT::i64, Action: Expand);
515	setOperationAction(Op: ISD::SELECT, VT: MVT::f32, Action: Expand);
516	setOperationAction(Op: ISD::SELECT, VT: MVT::f64, Action: Expand);
517	}
518
519	// PowerPC wants to turn select_cc of FP into fsel when possible.
520	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f32, Action: Custom);
521	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f64, Action: Custom);
522
523	// PowerPC wants to optimize integer setcc a bit
524	if (!Subtarget.useCRBits())
525	setOperationAction(Op: ISD::SETCC, VT: MVT::i32, Action: Custom);
526
527	if (Subtarget.hasFPU()) {
528	setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f32, Action: Legal);
529	setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f64, Action: Legal);
530	setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f128, Action: Legal);
531
532	setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f32, Action: Legal);
533	setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f64, Action: Legal);
534	setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f128, Action: Legal);
535	}
536
537	// PowerPC does not have BRCOND which requires SetCC
538	if (!Subtarget.useCRBits())
539	setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Expand);
540
541	setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
542
543	if (Subtarget.hasSPE()) {
544	// SPE has built-in conversions
545	setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i32, Action: Legal);
546	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i32, Action: Legal);
547	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i32, Action: Legal);
548	setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Legal);
549	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i32, Action: Legal);
550	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i32, Action: Legal);
551
552	// SPE supports signaling compare of f32/f64.
553	setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f32, Action: Legal);
554	setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f64, Action: Legal);
555	} else {
556	// PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
557	setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i32, Action: Custom);
558	setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Custom);
559
560	// PowerPC does not have [U\|S]INT_TO_FP
561	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i32, Action: Expand);
562	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i32, Action: Expand);
563	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i32, Action: Expand);
564	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i32, Action: Expand);
565	}
566
567	if (Subtarget.hasDirectMove() && isPPC64) {
568	setOperationAction(Op: ISD::BITCAST, VT: MVT::f32, Action: Legal);
569	setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Legal);
570	setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Legal);
571	setOperationAction(Op: ISD::BITCAST, VT: MVT::f64, Action: Legal);
572	if (TM.Options.UnsafeFPMath) {
573	setOperationAction(Op: ISD::LRINT, VT: MVT::f64, Action: Legal);
574	setOperationAction(Op: ISD::LRINT, VT: MVT::f32, Action: Legal);
575	setOperationAction(Op: ISD::LLRINT, VT: MVT::f64, Action: Legal);
576	setOperationAction(Op: ISD::LLRINT, VT: MVT::f32, Action: Legal);
577	setOperationAction(Op: ISD::LROUND, VT: MVT::f64, Action: Legal);
578	setOperationAction(Op: ISD::LROUND, VT: MVT::f32, Action: Legal);
579	setOperationAction(Op: ISD::LLROUND, VT: MVT::f64, Action: Legal);
580	setOperationAction(Op: ISD::LLROUND, VT: MVT::f32, Action: Legal);
581	}
582	} else {
583	setOperationAction(Op: ISD::BITCAST, VT: MVT::f32, Action: Expand);
584	setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Expand);
585	setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Expand);
586	setOperationAction(Op: ISD::BITCAST, VT: MVT::f64, Action: Expand);
587	}
588
589	// We cannot sextinreg(i1). Expand to shifts.
590	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
591
592	// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
593	// SjLj exception handling but a light-weight setjmp/longjmp replacement to
594	// support continuation, user-level threading, and etc.. As a result, no
595	// other SjLj exception interfaces are implemented and please don't build
596	// your own exception handling based on them.
597	// LLVM/Clang supports zero-cost DWARF exception handling.
598	setOperationAction(Op: ISD::EH_SJLJ_SETJMP, VT: MVT::i32, Action: Custom);
599	setOperationAction(Op: ISD::EH_SJLJ_LONGJMP, VT: MVT::Other, Action: Custom);
600
601	// We want to legalize GlobalAddress and ConstantPool nodes into the
602	// appropriate instructions to materialize the address.
603	setOperationAction(Op: ISD::GlobalAddress, VT: MVT::i32, Action: Custom);
604	setOperationAction(Op: ISD::GlobalTLSAddress, VT: MVT::i32, Action: Custom);
605	setOperationAction(Op: ISD::BlockAddress, VT: MVT::i32, Action: Custom);
606	setOperationAction(Op: ISD::ConstantPool, VT: MVT::i32, Action: Custom);
607	setOperationAction(Op: ISD::JumpTable, VT: MVT::i32, Action: Custom);
608	setOperationAction(Op: ISD::GlobalAddress, VT: MVT::i64, Action: Custom);
609	setOperationAction(Op: ISD::GlobalTLSAddress, VT: MVT::i64, Action: Custom);
610	setOperationAction(Op: ISD::BlockAddress, VT: MVT::i64, Action: Custom);
611	setOperationAction(Op: ISD::ConstantPool, VT: MVT::i64, Action: Custom);
612	setOperationAction(Op: ISD::JumpTable, VT: MVT::i64, Action: Custom);
613
614	// TRAP is legal.
615	setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Legal);
616
617	// TRAMPOLINE is custom lowered.
618	setOperationAction(Op: ISD::INIT_TRAMPOLINE, VT: MVT::Other, Action: Custom);
619	setOperationAction(Op: ISD::ADJUST_TRAMPOLINE, VT: MVT::Other, Action: Custom);
620
621	// VASTART needs to be custom lowered to use the VarArgsFrameIndex
622	setOperationAction(Op: ISD::VASTART , VT: MVT::Other, Action: Custom);
623
624	if (Subtarget.is64BitELFABI()) {
625	// VAARG always uses double-word chunks, so promote anything smaller.
626	setOperationAction(Op: ISD::VAARG, VT: MVT::i1, Action: Promote);
627	AddPromotedToType(Opc: ISD::VAARG, OrigVT: MVT::i1, DestVT: MVT::i64);
628	setOperationAction(Op: ISD::VAARG, VT: MVT::i8, Action: Promote);
629	AddPromotedToType(Opc: ISD::VAARG, OrigVT: MVT::i8, DestVT: MVT::i64);
630	setOperationAction(Op: ISD::VAARG, VT: MVT::i16, Action: Promote);
631	AddPromotedToType(Opc: ISD::VAARG, OrigVT: MVT::i16, DestVT: MVT::i64);
632	setOperationAction(Op: ISD::VAARG, VT: MVT::i32, Action: Promote);
633	AddPromotedToType(Opc: ISD::VAARG, OrigVT: MVT::i32, DestVT: MVT::i64);
634	setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Expand);
635	} else if (Subtarget.is32BitELFABI()) {
636	// VAARG is custom lowered with the 32-bit SVR4 ABI.
637	setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Custom);
638	setOperationAction(Op: ISD::VAARG, VT: MVT::i64, Action: Custom);
639	} else
640	setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Expand);
641
642	// VACOPY is custom lowered with the 32-bit SVR4 ABI.
643	if (Subtarget.is32BitELFABI())
644	setOperationAction(Op: ISD::VACOPY , VT: MVT::Other, Action: Custom);
645	else
646	setOperationAction(Op: ISD::VACOPY , VT: MVT::Other, Action: Expand);
647
648	// Use the default implementation.
649	setOperationAction(Op: ISD::VAEND , VT: MVT::Other, Action: Expand);
650	setOperationAction(Op: ISD::STACKSAVE , VT: MVT::Other, Action: Expand);
651	setOperationAction(Op: ISD::STACKRESTORE , VT: MVT::Other, Action: Custom);
652	setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i32 , Action: Custom);
653	setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i64 , Action: Custom);
654	setOperationAction(Op: ISD::GET_DYNAMIC_AREA_OFFSET, VT: MVT::i32, Action: Custom);
655	setOperationAction(Op: ISD::GET_DYNAMIC_AREA_OFFSET, VT: MVT::i64, Action: Custom);
656	setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i32, Action: Custom);
657	setOperationAction(Op: ISD::EH_DWARF_CFA, VT: MVT::i64, Action: Custom);
658
659	// We want to custom lower some of our intrinsics.
660	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
661	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::f64, Action: Custom);
662	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::ppcf128, Action: Custom);
663	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::v4f32, Action: Custom);
664	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::v2f64, Action: Custom);
665
666	// To handle counter-based loop conditions.
667	setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::i1, Action: Custom);
668
669	setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i8, Action: Custom);
670	setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i16, Action: Custom);
671	setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i32, Action: Custom);
672	setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom);
673
674	// Comparisons that require checking two conditions.
675	if (Subtarget.hasSPE()) {
676	setCondCodeAction(CCs: ISD::SETO, VT: MVT::f32, Action: Expand);
677	setCondCodeAction(CCs: ISD::SETO, VT: MVT::f64, Action: Expand);
678	setCondCodeAction(CCs: ISD::SETUO, VT: MVT::f32, Action: Expand);
679	setCondCodeAction(CCs: ISD::SETUO, VT: MVT::f64, Action: Expand);
680	}
681	setCondCodeAction(CCs: ISD::SETULT, VT: MVT::f32, Action: Expand);
682	setCondCodeAction(CCs: ISD::SETULT, VT: MVT::f64, Action: Expand);
683	setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f32, Action: Expand);
684	setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f64, Action: Expand);
685	setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::f32, Action: Expand);
686	setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::f64, Action: Expand);
687	setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f32, Action: Expand);
688	setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f64, Action: Expand);
689	setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::f32, Action: Expand);
690	setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::f64, Action: Expand);
691	setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f32, Action: Expand);
692	setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f64, Action: Expand);
693
694	setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f32, Action: Legal);
695	setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f64, Action: Legal);
696
697	if (Subtarget.has64BitSupport()) {
698	// They also have instructions for converting between i64 and fp.
699	setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i64, Action: Custom);
700	setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i64, Action: Expand);
701	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i64, Action: Custom);
702	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i64, Action: Expand);
703	setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i64, Action: Custom);
704	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i64, Action: Expand);
705	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Custom);
706	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Expand);
707	// This is just the low 32 bits of a (signed) fp->i64 conversion.
708	// We cannot do this with Promote because i64 is not a legal type.
709	setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i32, Action: Custom);
710	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Custom);
711
712	if (Subtarget.hasLFIWAX() \|\| isPPC64) {
713	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i32, Action: Custom);
714	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i32, Action: Custom);
715	}
716	} else {
717	// PowerPC does not have FP_TO_UINT on 32-bit implementations.
718	if (Subtarget.hasSPE()) {
719	setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i32, Action: Legal);
720	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Legal);
721	} else {
722	setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i32, Action: Expand);
723	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Expand);
724	}
725	}
726
727	// With the instructions enabled under FPCVT, we can do everything.
728	if (Subtarget.hasFPCVT()) {
729	if (Subtarget.has64BitSupport()) {
730	setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i64, Action: Custom);
731	setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i64, Action: Custom);
732	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i64, Action: Custom);
733	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i64, Action: Custom);
734	setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i64, Action: Custom);
735	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i64, Action: Custom);
736	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i64, Action: Custom);
737	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i64, Action: Custom);
738	}
739
740	setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::i32, Action: Custom);
741	setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::i32, Action: Custom);
742	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::i32, Action: Custom);
743	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::i32, Action: Custom);
744	setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::i32, Action: Custom);
745	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::i32, Action: Custom);
746	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::i32, Action: Custom);
747	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::i32, Action: Custom);
748	}
749
750	if (Subtarget.use64BitRegs()) {
751	// 64-bit PowerPC implementations can support i64 types directly
752	addRegisterClass(VT: MVT::i64, RC: &PPC::G8RCRegClass);
753	// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
754	setOperationAction(Op: ISD::BUILD_PAIR, VT: MVT::i64, Action: Expand);
755	// 64-bit PowerPC wants to expand i128 shifts itself.
756	setOperationAction(Op: ISD::SHL_PARTS, VT: MVT::i64, Action: Custom);
757	setOperationAction(Op: ISD::SRA_PARTS, VT: MVT::i64, Action: Custom);
758	setOperationAction(Op: ISD::SRL_PARTS, VT: MVT::i64, Action: Custom);
759	} else {
760	// 32-bit PowerPC wants to expand i64 shifts itself.
761	setOperationAction(Op: ISD::SHL_PARTS, VT: MVT::i32, Action: Custom);
762	setOperationAction(Op: ISD::SRA_PARTS, VT: MVT::i32, Action: Custom);
763	setOperationAction(Op: ISD::SRL_PARTS, VT: MVT::i32, Action: Custom);
764	}
765
766	// PowerPC has better expansions for funnel shifts than the generic
767	// TargetLowering::expandFunnelShift.
768	if (Subtarget.has64BitSupport()) {
769	setOperationAction(Op: ISD::FSHL, VT: MVT::i64, Action: Custom);
770	setOperationAction(Op: ISD::FSHR, VT: MVT::i64, Action: Custom);
771	}
772	setOperationAction(Op: ISD::FSHL, VT: MVT::i32, Action: Custom);
773	setOperationAction(Op: ISD::FSHR, VT: MVT::i32, Action: Custom);
774
775	if (Subtarget.hasVSX()) {
776	setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f64, Action: Legal);
777	setOperationAction(Op: ISD::FMAXNUM_IEEE, VT: MVT::f32, Action: Legal);
778	setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f64, Action: Legal);
779	setOperationAction(Op: ISD::FMINNUM_IEEE, VT: MVT::f32, Action: Legal);
780	}
781
782	if (Subtarget.hasAltivec()) {
783	for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
784	setOperationAction(Op: ISD::SADDSAT, VT, Action: Legal);
785	setOperationAction(Op: ISD::SSUBSAT, VT, Action: Legal);
786	setOperationAction(Op: ISD::UADDSAT, VT, Action: Legal);
787	setOperationAction(Op: ISD::USUBSAT, VT, Action: Legal);
788	}
789	// First set operation action for all vector types to expand. Then we
790	// will selectively turn on ones that can be effectively codegen'd.
791	for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
792	// add/sub are legal for all supported vector VT's.
793	setOperationAction(Op: ISD::ADD, VT, Action: Legal);
794	setOperationAction(Op: ISD::SUB, VT, Action: Legal);
795
796	// For v2i64, these are only valid with P8Vector. This is corrected after
797	// the loop.
798	if (VT.getSizeInBits() <= `128` && VT.getScalarSizeInBits() <= `64`) {
799	setOperationAction(Op: ISD::SMAX, VT, Action: Legal);
800	setOperationAction(Op: ISD::SMIN, VT, Action: Legal);
801	setOperationAction(Op: ISD::UMAX, VT, Action: Legal);
802	setOperationAction(Op: ISD::UMIN, VT, Action: Legal);
803	}
804	else {
805	setOperationAction(Op: ISD::SMAX, VT, Action: Expand);
806	setOperationAction(Op: ISD::SMIN, VT, Action: Expand);
807	setOperationAction(Op: ISD::UMAX, VT, Action: Expand);
808	setOperationAction(Op: ISD::UMIN, VT, Action: Expand);
809	}
810
811	if (Subtarget.hasVSX()) {
812	setOperationAction(Op: ISD::FMAXNUM, VT, Action: Legal);
813	setOperationAction(Op: ISD::FMINNUM, VT, Action: Legal);
814	}
815
816	// Vector instructions introduced in P8
817	if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
818	setOperationAction(Op: ISD::CTPOP, VT, Action: Legal);
819	setOperationAction(Op: ISD::CTLZ, VT, Action: Legal);
820	}
821	else {
822	setOperationAction(Op: ISD::CTPOP, VT, Action: Expand);
823	setOperationAction(Op: ISD::CTLZ, VT, Action: Expand);
824	}
825
826	// Vector instructions introduced in P9
827	if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
828	setOperationAction(Op: ISD::CTTZ, VT, Action: Legal);
829	else
830	setOperationAction(Op: ISD::CTTZ, VT, Action: Expand);
831
832	// We promote all shuffles to v16i8.
833	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT, Action: Promote);
834	AddPromotedToType (Opc: ISD::VECTOR_SHUFFLE, OrigVT: VT, DestVT: MVT::v16i8);
835
836	// We promote all non-typed operations to v4i32.
837	setOperationAction(Op: ISD::AND , VT, Action: Promote);
838	AddPromotedToType (Opc: ISD::AND , OrigVT: VT, DestVT: MVT::v4i32);
839	setOperationAction(Op: ISD::OR , VT, Action: Promote);
840	AddPromotedToType (Opc: ISD::OR , OrigVT: VT, DestVT: MVT::v4i32);
841	setOperationAction(Op: ISD::XOR , VT, Action: Promote);
842	AddPromotedToType (Opc: ISD::XOR , OrigVT: VT, DestVT: MVT::v4i32);
843	setOperationAction(Op: ISD::LOAD , VT, Action: Promote);
844	AddPromotedToType (Opc: ISD::LOAD , OrigVT: VT, DestVT: MVT::v4i32);
845	setOperationAction(Op: ISD::SELECT, VT, Action: Promote);
846	AddPromotedToType (Opc: ISD::SELECT, OrigVT: VT, DestVT: MVT::v4i32);
847	setOperationAction(Op: ISD::VSELECT, VT, Action: Legal);
848	setOperationAction(Op: ISD::SELECT_CC, VT, Action: Promote);
849	AddPromotedToType (Opc: ISD::SELECT_CC, OrigVT: VT, DestVT: MVT::v4i32);
850	setOperationAction(Op: ISD::STORE, VT, Action: Promote);
851	AddPromotedToType (Opc: ISD::STORE, OrigVT: VT, DestVT: MVT::v4i32);
852
853	// No other operations are legal.
854	setOperationAction(Op: ISD::MUL , VT, Action: Expand);
855	setOperationAction(Op: ISD::SDIV, VT, Action: Expand);
856	setOperationAction(Op: ISD::SREM, VT, Action: Expand);
857	setOperationAction(Op: ISD::UDIV, VT, Action: Expand);
858	setOperationAction(Op: ISD::UREM, VT, Action: Expand);
859	setOperationAction(Op: ISD::FDIV, VT, Action: Expand);
860	setOperationAction(Op: ISD::FREM, VT, Action: Expand);
861	setOperationAction(Op: ISD::FNEG, VT, Action: Expand);
862	setOperationAction(Op: ISD::FSQRT, VT, Action: Expand);
863	setOperationAction(Op: ISD::FLOG, VT, Action: Expand);
864	setOperationAction(Op: ISD::FLOG10, VT, Action: Expand);
865	setOperationAction(Op: ISD::FLOG2, VT, Action: Expand);
866	setOperationAction(Op: ISD::FEXP, VT, Action: Expand);
867	setOperationAction(Op: ISD::FEXP2, VT, Action: Expand);
868	setOperationAction(Op: ISD::FSIN, VT, Action: Expand);
869	setOperationAction(Op: ISD::FCOS, VT, Action: Expand);
870	setOperationAction(Op: ISD::FABS, VT, Action: Expand);
871	setOperationAction(Op: ISD::FFLOOR, VT, Action: Expand);
872	setOperationAction(Op: ISD::FCEIL, VT, Action: Expand);
873	setOperationAction(Op: ISD::FTRUNC, VT, Action: Expand);
874	setOperationAction(Op: ISD::FRINT, VT, Action: Expand);
875	setOperationAction(Op: ISD::FLDEXP, VT, Action: Expand);
876	setOperationAction(Op: ISD::FNEARBYINT, VT, Action: Expand);
877	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT, Action: Expand);
878	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT, Action: Expand);
879	setOperationAction(Op: ISD::BUILD_VECTOR, VT, Action: Expand);
880	setOperationAction(Op: ISD::MULHU, VT, Action: Expand);
881	setOperationAction(Op: ISD::MULHS, VT, Action: Expand);
882	setOperationAction(Op: ISD::UMUL_LOHI, VT, Action: Expand);
883	setOperationAction(Op: ISD::SMUL_LOHI, VT, Action: Expand);
884	setOperationAction(Op: ISD::UDIVREM, VT, Action: Expand);
885	setOperationAction(Op: ISD::SDIVREM, VT, Action: Expand);
886	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT, Action: Expand);
887	setOperationAction(Op: ISD::FPOW, VT, Action: Expand);
888	setOperationAction(Op: ISD::BSWAP, VT, Action: Expand);
889	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT, Action: Expand);
890	setOperationAction(Op: ISD::ROTL, VT, Action: Expand);
891	setOperationAction(Op: ISD::ROTR, VT, Action: Expand);
892
893	for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
894	setTruncStoreAction(ValVT: VT, MemVT: InnerVT, Action: Expand);
895	setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
896	setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
897	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: InnerVT, Action: Expand);
898	}
899	}
900	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::v4i32, Action: Expand);
901	if (!Subtarget.hasP8Vector()) {
902	setOperationAction(Op: ISD::SMAX, VT: MVT::v2i64, Action: Expand);
903	setOperationAction(Op: ISD::SMIN, VT: MVT::v2i64, Action: Expand);
904	setOperationAction(Op: ISD::UMAX, VT: MVT::v2i64, Action: Expand);
905	setOperationAction(Op: ISD::UMIN, VT: MVT::v2i64, Action: Expand);
906	}
907
908	// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
909	// with merges, splats, etc.
910	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: MVT::v16i8, Action: Custom);
911
912	// Vector truncates to sub-word integer that fit in an Altivec/VSX register
913	// are cheap, so handle them before they get expanded to scalar.
914	setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v8i8, Action: Custom);
915	setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v4i8, Action: Custom);
916	setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v2i8, Action: Custom);
917	setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v4i16, Action: Custom);
918	setOperationAction(Op: ISD::TRUNCATE, VT: MVT::v2i16, Action: Custom);
919
920	setOperationAction(Op: ISD::AND , VT: MVT::v4i32, Action: Legal);
921	setOperationAction(Op: ISD::OR , VT: MVT::v4i32, Action: Legal);
922	setOperationAction(Op: ISD::XOR , VT: MVT::v4i32, Action: Legal);
923	setOperationAction(Op: ISD::LOAD , VT: MVT::v4i32, Action: Legal);
924	setOperationAction(Op: ISD::SELECT, VT: MVT::v4i32,
925	Action: Subtarget.useCRBits() ? Legal : Expand);
926	setOperationAction(Op: ISD::STORE , VT: MVT::v4i32, Action: Legal);
927	setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::v4i32, Action: Legal);
928	setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::v4i32, Action: Legal);
929	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v4i32, Action: Legal);
930	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v4i32, Action: Legal);
931	setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::v4i32, Action: Legal);
932	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::v4i32, Action: Legal);
933	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v4i32, Action: Legal);
934	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v4i32, Action: Legal);
935	setOperationAction(Op: ISD::FFLOOR, VT: MVT::v4f32, Action: Legal);
936	setOperationAction(Op: ISD::FCEIL, VT: MVT::v4f32, Action: Legal);
937	setOperationAction(Op: ISD::FTRUNC, VT: MVT::v4f32, Action: Legal);
938	setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::v4f32, Action: Legal);
939
940	// Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
941	setOperationAction(Op: ISD::ROTL, VT: MVT::v1i128, Action: Custom);
942	// With hasAltivec set, we can lower ISD::ROTL to vrl(b\|h\|w).
943	if (Subtarget.hasAltivec())
944	for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
945	setOperationAction(Op: ISD::ROTL, VT, Action: Legal);
946	// With hasP8Altivec set, we can lower ISD::ROTL to vrld.
947	if (Subtarget.hasP8Altivec())
948	setOperationAction(Op: ISD::ROTL, VT: MVT::v2i64, Action: Legal);
949
950	addRegisterClass(VT: MVT::v4f32, RC: &PPC::VRRCRegClass);
951	addRegisterClass(VT: MVT::v4i32, RC: &PPC::VRRCRegClass);
952	addRegisterClass(VT: MVT::v8i16, RC: &PPC::VRRCRegClass);
953	addRegisterClass(VT: MVT::v16i8, RC: &PPC::VRRCRegClass);
954
955	setOperationAction(Op: ISD::MUL, VT: MVT::v4f32, Action: Legal);
956	setOperationAction(Op: ISD::FMA, VT: MVT::v4f32, Action: Legal);
957
958	if (Subtarget.hasVSX()) {
959	setOperationAction(Op: ISD::FDIV, VT: MVT::v4f32, Action: Legal);
960	setOperationAction(Op: ISD::FSQRT, VT: MVT::v4f32, Action: Legal);
961	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v2f64, Action: Custom);
962	}
963
964	if (Subtarget.hasP8Altivec())
965	setOperationAction(Op: ISD::MUL, VT: MVT::v4i32, Action: Legal);
966	else
967	setOperationAction(Op: ISD::MUL, VT: MVT::v4i32, Action: Custom);
968
969	if (Subtarget.isISA3_1()) {
970	setOperationAction(Op: ISD::MUL, VT: MVT::v2i64, Action: Legal);
971	setOperationAction(Op: ISD::MULHS, VT: MVT::v2i64, Action: Legal);
972	setOperationAction(Op: ISD::MULHU, VT: MVT::v2i64, Action: Legal);
973	setOperationAction(Op: ISD::MULHS, VT: MVT::v4i32, Action: Legal);
974	setOperationAction(Op: ISD::MULHU, VT: MVT::v4i32, Action: Legal);
975	setOperationAction(Op: ISD::UDIV, VT: MVT::v2i64, Action: Legal);
976	setOperationAction(Op: ISD::SDIV, VT: MVT::v2i64, Action: Legal);
977	setOperationAction(Op: ISD::UDIV, VT: MVT::v4i32, Action: Legal);
978	setOperationAction(Op: ISD::SDIV, VT: MVT::v4i32, Action: Legal);
979	setOperationAction(Op: ISD::UREM, VT: MVT::v2i64, Action: Legal);
980	setOperationAction(Op: ISD::SREM, VT: MVT::v2i64, Action: Legal);
981	setOperationAction(Op: ISD::UREM, VT: MVT::v4i32, Action: Legal);
982	setOperationAction(Op: ISD::SREM, VT: MVT::v4i32, Action: Legal);
983	setOperationAction(Op: ISD::UREM, VT: MVT::v1i128, Action: Legal);
984	setOperationAction(Op: ISD::SREM, VT: MVT::v1i128, Action: Legal);
985	setOperationAction(Op: ISD::UDIV, VT: MVT::v1i128, Action: Legal);
986	setOperationAction(Op: ISD::SDIV, VT: MVT::v1i128, Action: Legal);
987	setOperationAction(Op: ISD::ROTL, VT: MVT::v1i128, Action: Legal);
988	}
989
990	setOperationAction(Op: ISD::MUL, VT: MVT::v8i16, Action: Legal);
991	setOperationAction(Op: ISD::MUL, VT: MVT::v16i8, Action: Custom);
992
993	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v4f32, Action: Custom);
994	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v4i32, Action: Custom);
995	// LE is P8+/64-bit so direct moves are supported and these operations
996	// are legal. The custom transformation requires 64-bit since we need a
997	// pair of stores that will cover a 128-bit load for P10.
998	if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
999	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v2i64, Action: Custom);
1000	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v8i16, Action: Custom);
1001	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v16i8, Action: Custom);
1002	}
1003
1004	setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v16i8, Action: Custom);
1005	setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v8i16, Action: Custom);
1006	setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v4i32, Action: Custom);
1007	setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v4f32, Action: Custom);
1008
1009	// Altivec does not contain unordered floating-point compare instructions
1010	setCondCodeAction(CCs: ISD::SETUO, VT: MVT::v4f32, Action: Expand);
1011	setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::v4f32, Action: Expand);
1012	setCondCodeAction(CCs: ISD::SETO, VT: MVT::v4f32, Action: Expand);
1013	setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v4f32, Action: Expand);
1014
1015	if (Subtarget.hasVSX()) {
1016	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v2f64, Action: Legal);
1017	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v2f64, Action: Legal);
1018	if (Subtarget.hasP8Vector()) {
1019	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v4f32, Action: Legal);
1020	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v4f32, Action: Legal);
1021	}
1022	if (Subtarget.hasDirectMove() && isPPC64) {
1023	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v16i8, Action: Legal);
1024	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v8i16, Action: Legal);
1025	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v4i32, Action: Legal);
1026	setOperationAction(Op: ISD::SCALAR_TO_VECTOR, VT: MVT::v2i64, Action: Legal);
1027	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v16i8, Action: Legal);
1028	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v8i16, Action: Legal);
1029	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v4i32, Action: Legal);
1030	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v2i64, Action: Legal);
1031	}
1032	setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: MVT::v2f64, Action: Legal);
1033
1034	// The nearbyint variants are not allowed to raise the inexact exception
1035	// so we can only code-gen them with unsafe math.
1036	if (TM.Options.UnsafeFPMath) {
1037	setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::f64, Action: Legal);
1038	setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::f32, Action: Legal);
1039	}
1040
1041	setOperationAction(Op: ISD::FFLOOR, VT: MVT::v2f64, Action: Legal);
1042	setOperationAction(Op: ISD::FCEIL, VT: MVT::v2f64, Action: Legal);
1043	setOperationAction(Op: ISD::FTRUNC, VT: MVT::v2f64, Action: Legal);
1044	setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::v2f64, Action: Legal);
1045	setOperationAction(Op: ISD::FRINT, VT: MVT::v2f64, Action: Legal);
1046	setOperationAction(Op: ISD::FROUND, VT: MVT::v2f64, Action: Legal);
1047	setOperationAction(Op: ISD::FROUND, VT: MVT::f64, Action: Legal);
1048	setOperationAction(Op: ISD::FRINT, VT: MVT::f64, Action: Legal);
1049
1050	setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::v4f32, Action: Legal);
1051	setOperationAction(Op: ISD::FRINT, VT: MVT::v4f32, Action: Legal);
1052	setOperationAction(Op: ISD::FROUND, VT: MVT::v4f32, Action: Legal);
1053	setOperationAction(Op: ISD::FROUND, VT: MVT::f32, Action: Legal);
1054	setOperationAction(Op: ISD::FRINT, VT: MVT::f32, Action: Legal);
1055
1056	setOperationAction(Op: ISD::MUL, VT: MVT::v2f64, Action: Legal);
1057	setOperationAction(Op: ISD::FMA, VT: MVT::v2f64, Action: Legal);
1058
1059	setOperationAction(Op: ISD::FDIV, VT: MVT::v2f64, Action: Legal);
1060	setOperationAction(Op: ISD::FSQRT, VT: MVT::v2f64, Action: Legal);
1061
1062	// Share the Altivec comparison restrictions.
1063	setCondCodeAction(CCs: ISD::SETUO, VT: MVT::v2f64, Action: Expand);
1064	setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::v2f64, Action: Expand);
1065	setCondCodeAction(CCs: ISD::SETO, VT: MVT::v2f64, Action: Expand);
1066	setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v2f64, Action: Expand);
1067
1068	setOperationAction(Op: ISD::LOAD, VT: MVT::v2f64, Action: Legal);
1069	setOperationAction(Op: ISD::STORE, VT: MVT::v2f64, Action: Legal);
1070
1071	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: MVT::v2f64, Action: Custom);
1072
1073	if (Subtarget.hasP8Vector())
1074	addRegisterClass(VT: MVT::f32, RC: &PPC::VSSRCRegClass);
1075
1076	addRegisterClass(VT: MVT::f64, RC: &PPC::VSFRCRegClass);
1077
1078	addRegisterClass(VT: MVT::v4i32, RC: &PPC::VSRCRegClass);
1079	addRegisterClass(VT: MVT::v4f32, RC: &PPC::VSRCRegClass);
1080	addRegisterClass(VT: MVT::v2f64, RC: &PPC::VSRCRegClass);
1081
1082	if (Subtarget.hasP8Altivec()) {
1083	setOperationAction(Op: ISD::SHL, VT: MVT::v2i64, Action: Legal);
1084	setOperationAction(Op: ISD::SRA, VT: MVT::v2i64, Action: Legal);
1085	setOperationAction(Op: ISD::SRL, VT: MVT::v2i64, Action: Legal);
1086
1087	// 128 bit shifts can be accomplished via 3 instructions for SHL and
1088	// SRL, but not for SRA because of the instructions available:
1089	// VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1090	// doing
1091	setOperationAction(Op: ISD::SHL, VT: MVT::v1i128, Action: Expand);
1092	setOperationAction(Op: ISD::SRL, VT: MVT::v1i128, Action: Expand);
1093	setOperationAction(Op: ISD::SRA, VT: MVT::v1i128, Action: Expand);
1094
1095	setOperationAction(Op: ISD::SETCC, VT: MVT::v2i64, Action: Legal);
1096	}
1097	else {
1098	setOperationAction(Op: ISD::SHL, VT: MVT::v2i64, Action: Expand);
1099	setOperationAction(Op: ISD::SRA, VT: MVT::v2i64, Action: Expand);
1100	setOperationAction(Op: ISD::SRL, VT: MVT::v2i64, Action: Expand);
1101
1102	setOperationAction(Op: ISD::SETCC, VT: MVT::v2i64, Action: Custom);
1103
1104	// VSX v2i64 only supports non-arithmetic operations.
1105	setOperationAction(Op: ISD::ADD, VT: MVT::v2i64, Action: Expand);
1106	setOperationAction(Op: ISD::SUB, VT: MVT::v2i64, Action: Expand);
1107	}
1108
1109	if (Subtarget.isISA3_1())
1110	setOperationAction(Op: ISD::SETCC, VT: MVT::v1i128, Action: Legal);
1111	else
1112	setOperationAction(Op: ISD::SETCC, VT: MVT::v1i128, Action: Expand);
1113
1114	setOperationAction(Op: ISD::LOAD, VT: MVT::v2i64, Action: Promote);
1115	AddPromotedToType (Opc: ISD::LOAD, OrigVT: MVT::v2i64, DestVT: MVT::v2f64);
1116	setOperationAction(Op: ISD::STORE, VT: MVT::v2i64, Action: Promote);
1117	AddPromotedToType (Opc: ISD::STORE, OrigVT: MVT::v2i64, DestVT: MVT::v2f64);
1118
1119	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: MVT::v2i64, Action: Custom);
1120
1121	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v2i64, Action: Legal);
1122	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v2i64, Action: Legal);
1123	setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT: MVT::v2i64, Action: Legal);
1124	setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT: MVT::v2i64, Action: Legal);
1125	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v2i64, Action: Legal);
1126	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v2i64, Action: Legal);
1127	setOperationAction(Op: ISD::FP_TO_SINT, VT: MVT::v2i64, Action: Legal);
1128	setOperationAction(Op: ISD::FP_TO_UINT, VT: MVT::v2i64, Action: Legal);
1129
1130	// Custom handling for partial vectors of integers converted to
1131	// floating point. We already have optimal handling for v2i32 through
1132	// the DAG combine, so those aren't necessary.
1133	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v2i8, Action: Custom);
1134	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v4i8, Action: Custom);
1135	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v2i16, Action: Custom);
1136	setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT: MVT::v4i16, Action: Custom);
1137	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v2i8, Action: Custom);
1138	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v4i8, Action: Custom);
1139	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v2i16, Action: Custom);
1140	setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: MVT::v4i16, Action: Custom);
1141	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v2i8, Action: Custom);
1142	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v4i8, Action: Custom);
1143	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v2i16, Action: Custom);
1144	setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v4i16, Action: Custom);
1145	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v2i8, Action: Custom);
1146	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v4i8, Action: Custom);
1147	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v2i16, Action: Custom);
1148	setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v4i16, Action: Custom);
1149
1150	setOperationAction(Op: ISD::FNEG, VT: MVT::v4f32, Action: Legal);
1151	setOperationAction(Op: ISD::FNEG, VT: MVT::v2f64, Action: Legal);
1152	setOperationAction(Op: ISD::FABS, VT: MVT::v4f32, Action: Legal);
1153	setOperationAction(Op: ISD::FABS, VT: MVT::v2f64, Action: Legal);
1154	setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::v4f32, Action: Legal);
1155	setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::v2f64, Action: Legal);
1156
1157	setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v2i64, Action: Custom);
1158	setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v2f64, Action: Custom);
1159
1160	// Handle constrained floating-point operations of vector.
1161	// The predictor is `hasVSX` because altivec instruction has
1162	// no exception but VSX vector instruction has.
1163	setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::v4f32, Action: Legal);
1164	setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::v4f32, Action: Legal);
1165	setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::v4f32, Action: Legal);
1166	setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::v4f32, Action: Legal);
1167	setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::v4f32, Action: Legal);
1168	setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::v4f32, Action: Legal);
1169	setOperationAction(Op: ISD::STRICT_FMAXNUM, VT: MVT::v4f32, Action: Legal);
1170	setOperationAction(Op: ISD::STRICT_FMINNUM, VT: MVT::v4f32, Action: Legal);
1171	setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::v4f32, Action: Legal);
1172	setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::v4f32, Action: Legal);
1173	setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::v4f32, Action: Legal);
1174	setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::v4f32, Action: Legal);
1175	setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::v4f32, Action: Legal);
1176
1177	setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::v2f64, Action: Legal);
1178	setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::v2f64, Action: Legal);
1179	setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::v2f64, Action: Legal);
1180	setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::v2f64, Action: Legal);
1181	setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::v2f64, Action: Legal);
1182	setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::v2f64, Action: Legal);
1183	setOperationAction(Op: ISD::STRICT_FMAXNUM, VT: MVT::v2f64, Action: Legal);
1184	setOperationAction(Op: ISD::STRICT_FMINNUM, VT: MVT::v2f64, Action: Legal);
1185	setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::v2f64, Action: Legal);
1186	setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::v2f64, Action: Legal);
1187	setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::v2f64, Action: Legal);
1188	setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::v2f64, Action: Legal);
1189	setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::v2f64, Action: Legal);
1190
1191	addRegisterClass(VT: MVT::v2i64, RC: &PPC::VSRCRegClass);
1192	addRegisterClass(VT: MVT::f128, RC: &PPC::VRRCRegClass);
1193
1194	for (MVT FPT : MVT::fp_valuetypes())
1195	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f128, MemVT: FPT, Action: Expand);
1196
1197	// Expand the SELECT to SELECT_CC
1198	setOperationAction(Op: ISD::SELECT, VT: MVT::f128, Action: Expand);
1199
1200	setTruncStoreAction(ValVT: MVT::f128, MemVT: MVT::f64, Action: Expand);
1201	setTruncStoreAction(ValVT: MVT::f128, MemVT: MVT::f32, Action: Expand);
1202
1203	// No implementation for these ops for PowerPC.
1204	setOperationAction(Op: ISD::FSINCOS, VT: MVT::f128, Action: Expand);
1205	setOperationAction(Op: ISD::FSIN, VT: MVT::f128, Action: Expand);
1206	setOperationAction(Op: ISD::FCOS, VT: MVT::f128, Action: Expand);
1207	setOperationAction(Op: ISD::FPOW, VT: MVT::f128, Action: Expand);
1208	setOperationAction(Op: ISD::FPOWI, VT: MVT::f128, Action: Expand);
1209	setOperationAction(Op: ISD::FREM, VT: MVT::f128, Action: Expand);
1210	}
1211
1212	if (Subtarget.hasP8Altivec()) {
1213	addRegisterClass(VT: MVT::v2i64, RC: &PPC::VRRCRegClass);
1214	addRegisterClass(VT: MVT::v1i128, RC: &PPC::VRRCRegClass);
1215	}
1216
1217	if (Subtarget.hasP9Vector()) {
1218	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v4i32, Action: Custom);
1219	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v4f32, Action: Custom);
1220
1221	// Test data class instructions store results in CR bits.
1222	if (Subtarget.useCRBits()) {
1223	setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f32, Action: Custom);
1224	setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f64, Action: Custom);
1225	setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::f128, Action: Custom);
1226	setOperationAction(Op: ISD::IS_FPCLASS, VT: MVT::ppcf128, Action: Custom);
1227	}
1228
1229	// 128 bit shifts can be accomplished via 3 instructions for SHL and
1230	// SRL, but not for SRA because of the instructions available:
1231	// VS{RL} and VS{RL}O.
1232	setOperationAction(Op: ISD::SHL, VT: MVT::v1i128, Action: Legal);
1233	setOperationAction(Op: ISD::SRL, VT: MVT::v1i128, Action: Legal);
1234	setOperationAction(Op: ISD::SRA, VT: MVT::v1i128, Action: Expand);
1235
1236	setOperationAction(Op: ISD::FADD, VT: MVT::f128, Action: Legal);
1237	setOperationAction(Op: ISD::FSUB, VT: MVT::f128, Action: Legal);
1238	setOperationAction(Op: ISD::FDIV, VT: MVT::f128, Action: Legal);
1239	setOperationAction(Op: ISD::FMUL, VT: MVT::f128, Action: Legal);
1240	setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::f128, Action: Legal);
1241
1242	setOperationAction(Op: ISD::FMA, VT: MVT::f128, Action: Legal);
1243	setCondCodeAction(CCs: ISD::SETULT, VT: MVT::f128, Action: Expand);
1244	setCondCodeAction(CCs: ISD::SETUGT, VT: MVT::f128, Action: Expand);
1245	setCondCodeAction(CCs: ISD::SETUEQ, VT: MVT::f128, Action: Expand);
1246	setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::f128, Action: Expand);
1247	setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::f128, Action: Expand);
1248	setCondCodeAction(CCs: ISD::SETONE, VT: MVT::f128, Action: Expand);
1249
1250	setOperationAction(Op: ISD::FTRUNC, VT: MVT::f128, Action: Legal);
1251	setOperationAction(Op: ISD::FRINT, VT: MVT::f128, Action: Legal);
1252	setOperationAction(Op: ISD::FFLOOR, VT: MVT::f128, Action: Legal);
1253	setOperationAction(Op: ISD::FCEIL, VT: MVT::f128, Action: Legal);
1254	setOperationAction(Op: ISD::FNEARBYINT, VT: MVT::f128, Action: Legal);
1255	setOperationAction(Op: ISD::FROUND, VT: MVT::f128, Action: Legal);
1256
1257	setOperationAction(Op: ISD::FP_ROUND, VT: MVT::f64, Action: Legal);
1258	setOperationAction(Op: ISD::FP_ROUND, VT: MVT::f32, Action: Legal);
1259	setOperationAction(Op: ISD::BITCAST, VT: MVT::i128, Action: Custom);
1260
1261	// Handle constrained floating-point operations of fp128
1262	setOperationAction(Op: ISD::STRICT_FADD, VT: MVT::f128, Action: Legal);
1263	setOperationAction(Op: ISD::STRICT_FSUB, VT: MVT::f128, Action: Legal);
1264	setOperationAction(Op: ISD::STRICT_FMUL, VT: MVT::f128, Action: Legal);
1265	setOperationAction(Op: ISD::STRICT_FDIV, VT: MVT::f128, Action: Legal);
1266	setOperationAction(Op: ISD::STRICT_FMA, VT: MVT::f128, Action: Legal);
1267	setOperationAction(Op: ISD::STRICT_FSQRT, VT: MVT::f128, Action: Legal);
1268	setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f128, Action: Legal);
1269	setOperationAction(Op: ISD::STRICT_FP_ROUND, VT: MVT::f64, Action: Legal);
1270	setOperationAction(Op: ISD::STRICT_FP_ROUND, VT: MVT::f32, Action: Legal);
1271	setOperationAction(Op: ISD::STRICT_FRINT, VT: MVT::f128, Action: Legal);
1272	setOperationAction(Op: ISD::STRICT_FNEARBYINT, VT: MVT::f128, Action: Legal);
1273	setOperationAction(Op: ISD::STRICT_FFLOOR, VT: MVT::f128, Action: Legal);
1274	setOperationAction(Op: ISD::STRICT_FCEIL, VT: MVT::f128, Action: Legal);
1275	setOperationAction(Op: ISD::STRICT_FTRUNC, VT: MVT::f128, Action: Legal);
1276	setOperationAction(Op: ISD::STRICT_FROUND, VT: MVT::f128, Action: Legal);
1277	setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::v2f32, Action: Custom);
1278	setOperationAction(Op: ISD::BSWAP, VT: MVT::v8i16, Action: Legal);
1279	setOperationAction(Op: ISD::BSWAP, VT: MVT::v4i32, Action: Legal);
1280	setOperationAction(Op: ISD::BSWAP, VT: MVT::v2i64, Action: Legal);
1281	setOperationAction(Op: ISD::BSWAP, VT: MVT::v1i128, Action: Legal);
1282	} else if (Subtarget.hasVSX()) {
1283	setOperationAction(Op: ISD::LOAD, VT: MVT::f128, Action: Promote);
1284	setOperationAction(Op: ISD::STORE, VT: MVT::f128, Action: Promote);
1285
1286	AddPromotedToType(Opc: ISD::LOAD, OrigVT: MVT::f128, DestVT: MVT::v4i32);
1287	AddPromotedToType(Opc: ISD::STORE, OrigVT: MVT::f128, DestVT: MVT::v4i32);
1288
1289	// Set FADD/FSUB as libcall to avoid the legalizer to expand the
1290	// fp_to_uint and int_to_fp.
1291	setOperationAction(Op: ISD::FADD, VT: MVT::f128, Action: LibCall);
1292	setOperationAction(Op: ISD::FSUB, VT: MVT::f128, Action: LibCall);
1293
1294	setOperationAction(Op: ISD::FMUL, VT: MVT::f128, Action: Expand);
1295	setOperationAction(Op: ISD::FDIV, VT: MVT::f128, Action: Expand);
1296	setOperationAction(Op: ISD::FNEG, VT: MVT::f128, Action: Expand);
1297	setOperationAction(Op: ISD::FABS, VT: MVT::f128, Action: Expand);
1298	setOperationAction(Op: ISD::FSQRT, VT: MVT::f128, Action: Expand);
1299	setOperationAction(Op: ISD::FMA, VT: MVT::f128, Action: Expand);
1300	setOperationAction(Op: ISD::FCOPYSIGN, VT: MVT::f128, Action: Expand);
1301
1302	// Expand the fp_extend if the target type is fp128.
1303	setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::f128, Action: Expand);
1304	setOperationAction(Op: ISD::STRICT_FP_EXTEND, VT: MVT::f128, Action: Expand);
1305
1306	// Expand the fp_round if the source type is fp128.
1307	for (MVT VT : {MVT::f32, MVT::f64}) {
1308	setOperationAction(Op: ISD::FP_ROUND, VT, Action: Custom);
1309	setOperationAction(Op: ISD::STRICT_FP_ROUND, VT, Action: Custom);
1310	}
1311
1312	setOperationAction(Op: ISD::SETCC, VT: MVT::f128, Action: Custom);
1313	setOperationAction(Op: ISD::STRICT_FSETCC, VT: MVT::f128, Action: Custom);
1314	setOperationAction(Op: ISD::STRICT_FSETCCS, VT: MVT::f128, Action: Custom);
1315	setOperationAction(Op: ISD::BR_CC, VT: MVT::f128, Action: Expand);
1316
1317	// Lower following f128 select_cc pattern:
1318	// select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1319	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f128, Action: Custom);
1320
1321	// We need to handle f128 SELECT_CC with integer result type.
1322	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::i32, Action: Custom);
1323	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::i64, Action: isPPC64 ? Custom : Expand);
1324	}
1325
1326	if (Subtarget.hasP9Altivec()) {
1327	if (Subtarget.isISA3_1()) {
1328	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v2i64, Action: Legal);
1329	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v8i16, Action: Legal);
1330	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v16i8, Action: Legal);
1331	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v4i32, Action: Legal);
1332	} else {
1333	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v8i16, Action: Custom);
1334	setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::v16i8, Action: Custom);
1335	}
1336	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v4i8, Action: Legal);
1337	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v4i16, Action: Legal);
1338	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v4i32, Action: Legal);
1339	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i8, Action: Legal);
1340	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i16, Action: Legal);
1341	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i32, Action: Legal);
1342	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::v2i64, Action: Legal);
1343
1344	setOperationAction(Op: ISD::ABDU, VT: MVT::v16i8, Action: Legal);
1345	setOperationAction(Op: ISD::ABDU, VT: MVT::v8i16, Action: Legal);
1346	setOperationAction(Op: ISD::ABDU, VT: MVT::v4i32, Action: Legal);
1347	setOperationAction(Op: ISD::ABDS, VT: MVT::v4i32, Action: Legal);
1348	}
1349
1350	if (Subtarget.hasP10Vector()) {
1351	setOperationAction(Op: ISD::SELECT_CC, VT: MVT::f128, Action: Custom);
1352	}
1353	}
1354
1355	if (Subtarget.pairedVectorMemops()) {
1356	addRegisterClass(VT: MVT::v256i1, RC: &PPC::VSRpRCRegClass);
1357	setOperationAction(Op: ISD::LOAD, VT: MVT::v256i1, Action: Custom);
1358	setOperationAction(Op: ISD::STORE, VT: MVT::v256i1, Action: Custom);
1359	}
1360	if (Subtarget.hasMMA()) {
1361	if (Subtarget.isISAFuture()) {
1362	addRegisterClass(VT: MVT::v512i1, RC: &PPC::WACCRCRegClass);
1363	addRegisterClass(VT: MVT::v1024i1, RC: &PPC::DMRRCRegClass);
1364	addRegisterClass(VT: MVT::v2048i1, RC: &PPC::DMRpRCRegClass);
1365	setOperationAction(Op: ISD::LOAD, VT: MVT::v1024i1, Action: Custom);
1366	setOperationAction(Op: ISD::STORE, VT: MVT::v1024i1, Action: Custom);
1367	setOperationAction(Op: ISD::LOAD, VT: MVT::v2048i1, Action: Custom);
1368	setOperationAction(Op: ISD::STORE, VT: MVT::v2048i1, Action: Custom);
1369	} else {
1370	addRegisterClass(VT: MVT::v512i1, RC: &PPC::UACCRCRegClass);
1371	}
1372	setOperationAction(Op: ISD::LOAD, VT: MVT::v512i1, Action: Custom);
1373	setOperationAction(Op: ISD::STORE, VT: MVT::v512i1, Action: Custom);
1374	setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v512i1, Action: Custom);
1375	}
1376
1377	if (Subtarget.has64BitSupport())
1378	setOperationAction(Op: ISD::PREFETCH, VT: MVT::Other, Action: Legal);
1379
1380	if (Subtarget.isISA3_1())
1381	setOperationAction(Op: ISD::SRA, VT: MVT::v1i128, Action: Legal);
1382
1383	setOperationAction(Op: ISD::READCYCLECOUNTER, VT: MVT::i64, Action: isPPC64 ? Legal : Custom);
1384
1385	if (!isPPC64) {
1386	setOperationAction(Op: ISD::ATOMIC_LOAD, VT: MVT::i64, Action: Expand);
1387	setOperationAction(Op: ISD::ATOMIC_STORE, VT: MVT::i64, Action: Expand);
1388	}
1389
1390	if (shouldInlineQuadwordAtomics()) {
1391	setOperationAction(Op: ISD::ATOMIC_LOAD, VT: MVT::i128, Action: Custom);
1392	setOperationAction(Op: ISD::ATOMIC_STORE, VT: MVT::i128, Action: Custom);
1393	setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::i128, Action: Custom);
1394	}
1395
1396	setBooleanContents(ZeroOrOneBooleanContent);
1397
1398	if (Subtarget.hasAltivec()) {
1399	// Altivec instructions set fields to all zeros or all ones.
1400	setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1401	}
1402
1403	if (shouldInlineQuadwordAtomics())
1404	setMaxAtomicSizeInBitsSupported(`128`);
1405	else if (isPPC64)
1406	setMaxAtomicSizeInBitsSupported(`64`);
1407	else
1408	setMaxAtomicSizeInBitsSupported(`32`);
1409
1410	setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1411
1412	// We have target-specific dag combine patterns for the following nodes:
1413	setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL,
1414	ISD::MUL, ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR});
1415	if (Subtarget.hasFPCVT())
1416	setTargetDAGCombine(ISD::UINT_TO_FP);
1417	setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
1418	if (Subtarget.useCRBits())
1419	setTargetDAGCombine(ISD::BRCOND);
1420	setTargetDAGCombine({ISD::BSWAP, ISD::INTRINSIC_WO_CHAIN,
1421	ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID});
1422
1423	setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, ISD::ANY_EXTEND});
1424
1425	setTargetDAGCombine({ISD::TRUNCATE, ISD::VECTOR_SHUFFLE});
1426
1427	if (Subtarget.useCRBits()) {
1428	setTargetDAGCombine({ISD::TRUNCATE, ISD::SETCC, ISD::SELECT_CC});
1429	}
1430
1431	// With 32 condition bits, we don't need to sink (and duplicate) compares
1432	// aggressively in CodeGenPrep.
1433	if (Subtarget.useCRBits()) {
1434	setHasMultipleConditionRegisters();
1435	setJumpIsExpensive();
1436	}
1437
1438	// TODO: The default entry number is set to 64. This stops most jump table
1439	// generation on PPC. But it is good for current PPC HWs because the indirect
1440	// branch instruction mtctr to the jump table may lead to bad branch predict.
1441	// Re-evaluate this value on future HWs that can do better with mtctr.
1442	setMinimumJumpTableEntries(PPCMinimumJumpTableEntries);
1443
1444	setMinFunctionAlignment(Align (`4`));
1445	setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? `8` : `32`);
1446
1447	auto CPUDirective = Subtarget.getCPUDirective();
1448	switch (CPUDirective) {
1449	default: break;
1450	case PPC::DIR_970:
1451	case PPC::DIR_A2:
1452	case PPC::DIR_E500:
1453	case PPC::DIR_E500mc:
1454	case PPC::DIR_E5500:
1455	case PPC::DIR_PWR4:
1456	case PPC::DIR_PWR5:
1457	case PPC::DIR_PWR5X:
1458	case PPC::DIR_PWR6:
1459	case PPC::DIR_PWR6X:
1460	case PPC::DIR_PWR7:
1461	case PPC::DIR_PWR8:
1462	case PPC::DIR_PWR9:
1463	case PPC::DIR_PWR10:
1464	case PPC::DIR_PWR11:
1465	case PPC::DIR_PWR_FUTURE:
1466	setPrefLoopAlignment(Align (`16`));
1467	setPrefFunctionAlignment(Align (`16`));
1468	break;
1469	}
1470
1471	if (Subtarget.enableMachineScheduler())
1472	setSchedulingPreference(Sched::Source);
1473	else
1474	setSchedulingPreference(Sched::Hybrid);
1475
1476	computeRegisterProperties(TRI: STI.getRegisterInfo());
1477
1478	// The Freescale cores do better with aggressive inlining of memcpy and
1479	// friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1480	if (CPUDirective == PPC::DIR_E500mc \|\| CPUDirective == PPC::DIR_E5500) {
1481	MaxStoresPerMemset = `32`;
1482	MaxStoresPerMemsetOptSize = `16`;
1483	MaxStoresPerMemcpy = `32`;
1484	MaxStoresPerMemcpyOptSize = `8`;
1485	MaxStoresPerMemmove = `32`;
1486	MaxStoresPerMemmoveOptSize = `8`;
1487	} else if (CPUDirective == PPC::DIR_A2) {
1488	// The A2 also benefits from (very) aggressive inlining of memcpy and
1489	// friends. The overhead of a the function call, even when warm, can be
1490	// over one hundred cycles.
1491	MaxStoresPerMemset = `128`;
1492	MaxStoresPerMemcpy = `128`;
1493	MaxStoresPerMemmove = `128`;
1494	MaxLoadsPerMemcmp = `128`;
1495	} else {
1496	MaxLoadsPerMemcmp = `8`;
1497	MaxLoadsPerMemcmpOptSize = `4`;
1498	}
1499
1500	// Enable generation of STXVP instructions by default for mcpu=future.
1501	if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1502	DisableAutoPairedVecSt.getNumOccurrences() == `0`)
1503	DisableAutoPairedVecSt = false;
1504
1505	IsStrictFPEnabled = true;
1506
1507	// Let the subtarget (CPU) decide if a predictable select is more expensive
1508	// than the corresponding branch. This information is used in CGP to decide
1509	// when to convert selects into branches.
1510	PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1511
1512	GatherAllAliasesMaxDepth = PPCGatherAllAliasesMaxDepth;
1513	}
1514
1515	// ********************************* NOTE **********************************
1516	// For selecting load and store instructions, the addressing modes are defined
1517	// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1518	// patterns to match the load the store instructions.
1519	//
1520	// The TD definitions for the addressing modes correspond to their respective
1521	// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1522	// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1523	// address mode flags of a particular node. Afterwards, the computed address
1524	// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1525	// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1526	// accordingly, based on the preferred addressing mode.
1527	//
1528	// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1529	// MemOpFlags contains all the possible flags that can be used to compute the
1530	// optimal addressing mode for load and store instructions.
1531	// AddrMode contains all the possible load and store addressing modes available
1532	// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1533	//
1534	// When adding new load and store instructions, it is possible that new address
1535	// flags may need to be added into MemOpFlags, and a new addressing mode will
1536	// need to be added to AddrMode. An entry of the new addressing mode (consisting
1537	// of the minimal and main distinguishing address flags for the new load/store
1538	// instructions) will need to be added into initializeAddrModeMap() below.
1539	// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1540	// need to be updated to account for selecting the optimal addressing mode.
1541	// *****************************************************************************
1542	/// Initialize the map that relates the different addressing modes of the load
1543	/// and store instructions to a set of flags. This ensures the load/store
1544	/// instruction is correctly matched during instruction selection.
1545	void PPCTargetLowering::initializeAddrModeMap() {
1546	AddrModesMap [PPC::AM_DForm] = {
1547	// LWZ, STW
1548	PPC::MOF_ZExt \| PPC::MOF_RPlusSImm16 \| PPC::MOF_WordInt,
1549	PPC::MOF_ZExt \| PPC::MOF_RPlusLo \| PPC::MOF_WordInt,
1550	PPC::MOF_ZExt \| PPC::MOF_NotAddNorCst \| PPC::MOF_WordInt,
1551	PPC::MOF_ZExt \| PPC::MOF_AddrIsSImm32 \| PPC::MOF_WordInt,
1552	// LBZ, LHZ, STB, STH
1553	PPC::MOF_ZExt \| PPC::MOF_RPlusSImm16 \| PPC::MOF_SubWordInt,
1554	PPC::MOF_ZExt \| PPC::MOF_RPlusLo \| PPC::MOF_SubWordInt,
1555	PPC::MOF_ZExt \| PPC::MOF_NotAddNorCst \| PPC::MOF_SubWordInt,
1556	PPC::MOF_ZExt \| PPC::MOF_AddrIsSImm32 \| PPC::MOF_SubWordInt,
1557	// LHA
1558	PPC::MOF_SExt \| PPC::MOF_RPlusSImm16 \| PPC::MOF_SubWordInt,
1559	PPC::MOF_SExt \| PPC::MOF_RPlusLo \| PPC::MOF_SubWordInt,
1560	PPC::MOF_SExt \| PPC::MOF_NotAddNorCst \| PPC::MOF_SubWordInt,
1561	PPC::MOF_SExt \| PPC::MOF_AddrIsSImm32 \| PPC::MOF_SubWordInt,
1562	// LFS, LFD, STFS, STFD
1563	PPC::MOF_RPlusSImm16 \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetBeforeP9,
1564	PPC::MOF_RPlusLo \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetBeforeP9,
1565	PPC::MOF_NotAddNorCst \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetBeforeP9,
1566	PPC::MOF_AddrIsSImm32 \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetBeforeP9,
1567	};
1568	AddrModesMap [PPC::AM_DSForm] = {
1569	// LWA
1570	PPC::MOF_SExt \| PPC::MOF_RPlusSImm16Mult4 \| PPC::MOF_WordInt,
1571	PPC::MOF_SExt \| PPC::MOF_NotAddNorCst \| PPC::MOF_WordInt,
1572	PPC::MOF_SExt \| PPC::MOF_AddrIsSImm32 \| PPC::MOF_WordInt,
1573	// LD, STD
1574	PPC::MOF_RPlusSImm16Mult4 \| PPC::MOF_DoubleWordInt,
1575	PPC::MOF_NotAddNorCst \| PPC::MOF_DoubleWordInt,
1576	PPC::MOF_AddrIsSImm32 \| PPC::MOF_DoubleWordInt,
1577	// DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1578	PPC::MOF_RPlusSImm16Mult4 \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetP9,
1579	PPC::MOF_NotAddNorCst \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetP9,
1580	PPC::MOF_AddrIsSImm32 \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetP9,
1581	};
1582	AddrModesMap [PPC::AM_DQForm] = {
1583	// LXV, STXV
1584	PPC::MOF_RPlusSImm16Mult16 \| PPC::MOF_Vector \| PPC::MOF_SubtargetP9,
1585	PPC::MOF_NotAddNorCst \| PPC::MOF_Vector \| PPC::MOF_SubtargetP9,
1586	PPC::MOF_AddrIsSImm32 \| PPC::MOF_Vector \| PPC::MOF_SubtargetP9,
1587	};
1588	AddrModesMap [PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 \|
1589	PPC::MOF_SubtargetP10};
1590	// TODO: Add mapping for quadword load/store.
1591	}
1592
1593	/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1594	/// the desired ByVal argument alignment.
1595	static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1596	if (MaxAlign == MaxMaxAlign)
1597	return;
1598	if (VectorType *VTy = dyn_cast<VectorType>(Val: Ty)) {
1599	if (MaxMaxAlign >= `32` &&
1600	VTy->getPrimitiveSizeInBits().getFixedValue() >= `256`)
1601	MaxAlign = Align (`32`);
1602	else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= `128` &&
1603	MaxAlign < `16`)
1604	MaxAlign = Align (`16`);
1605	} else if (ArrayType *ATy = dyn_cast<ArrayType>(Val: Ty)) {
1606	Align EltAlign;
1607	getMaxByValAlign(Ty: ATy->getElementType(), MaxAlign&: EltAlign, MaxMaxAlign);
1608	if (EltAlign > MaxAlign)
1609	MaxAlign = EltAlign;
1610	} else if (StructType *STy = dyn_cast<StructType>(Val: Ty)) {
1611	for (auto *EltTy : STy->elements()) {
1612	Align EltAlign;
1613	getMaxByValAlign(Ty: EltTy, MaxAlign&: EltAlign, MaxMaxAlign);
1614	if (EltAlign > MaxAlign)
1615	MaxAlign = EltAlign;
1616	if (MaxAlign == MaxMaxAlign)
1617	break;
1618	}
1619	}
1620	}
1621
1622	/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1623	/// function arguments in the caller parameter area.
1624	Align PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1625	const DataLayout &DL) const {
1626	// 16byte and wider vectors are passed on 16byte boundary.
1627	// The rest is 8 on PPC64 and 4 on PPC32 boundary.
1628	Align Alignment = Subtarget.isPPC64() ? Align (`8`) : Align (`4`);
1629	if (Subtarget.hasAltivec())
1630	getMaxByValAlign(Ty, MaxAlign&: Alignment, MaxMaxAlign: Align (`16`));
1631	return Alignment;
1632	}
1633
1634	bool PPCTargetLowering::useSoftFloat() const {
1635	return Subtarget.useSoftFloat();
1636	}
1637
1638	bool PPCTargetLowering::hasSPE() const {
1639	return Subtarget.hasSPE();
1640	}
1641
1642	bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1643	return VT.isScalarInteger();
1644	}
1645
1646	bool PPCTargetLowering::shallExtractConstSplatVectorElementToStore(
1647	Type VectorTy, unsigned* ElemSizeInBits, unsigned &Index) const {
1648	if (!Subtarget.isPPC64() \|\| !Subtarget.hasVSX())
1649	return false;
1650
1651	if (auto *VTy = dyn_cast<VectorType>(Val: VectorTy)) {
1652	if (VTy->getScalarType()->isIntegerTy()) {
1653	// ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1654	if (ElemSizeInBits == `32`) {
1655	Index = Subtarget.isLittleEndian() ? `2` : `1`;
1656	return true;
1657	}
1658	if (ElemSizeInBits == `64`) {
1659	Index = Subtarget.isLittleEndian() ? `1` : `0`;
1660	return true;
1661	}
1662	}
1663	}
1664	return false;
1665	}
1666
1667	const char PPCTargetLowering::getTargetNodeName(unsigned* Opcode) const {
1668	switch ((PPCISD::NodeType)Opcode) {
1669	case PPCISD::FIRST_NUMBER: break;
1670	case PPCISD::FSEL: return "PPCISD::FSEL";
1671	case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1672	case PPCISD::XSMINC: return "PPCISD::XSMINC";
1673	case PPCISD::FCFID: return "PPCISD::FCFID";
1674	case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1675	case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1676	case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1677	case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1678	case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1679	case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1680	case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1681	case PPCISD::FRE: return "PPCISD::FRE";
1682	case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1683	case PPCISD::FTSQRT:
1684	return "PPCISD::FTSQRT";
1685	case PPCISD::FSQRT:
1686	return "PPCISD::FSQRT";
1687	case PPCISD::STFIWX: return "PPCISD::STFIWX";
1688	case PPCISD::VPERM: return "PPCISD::VPERM";
1689	case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1690	case PPCISD::XXSPLTI_SP_TO_DP:
1691	return "PPCISD::XXSPLTI_SP_TO_DP";
1692	case PPCISD::XXSPLTI32DX:
1693	return "PPCISD::XXSPLTI32DX";
1694	case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1695	case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1696	case PPCISD::XXPERM:
1697	return "PPCISD::XXPERM";
1698	case PPCISD::VECSHL: return "PPCISD::VECSHL";
1699	case PPCISD::CMPB: return "PPCISD::CMPB";
1700	case PPCISD::Hi: return "PPCISD::Hi";
1701	case PPCISD::Lo: return "PPCISD::Lo";
1702	case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1703	case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1704	case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1705	case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1706	case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1707	case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1708	case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1709	case PPCISD::SRL: return "PPCISD::SRL";
1710	case PPCISD::SRA: return "PPCISD::SRA";
1711	case PPCISD::SHL: return "PPCISD::SHL";
1712	case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1713	case PPCISD::CALL: return "PPCISD::CALL";
1714	case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1715	case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1716	case PPCISD::CALL_RM:
1717	return "PPCISD::CALL_RM";
1718	case PPCISD::CALL_NOP_RM:
1719	return "PPCISD::CALL_NOP_RM";
1720	case PPCISD::CALL_NOTOC_RM:
1721	return "PPCISD::CALL_NOTOC_RM";
1722	case PPCISD::MTCTR: return "PPCISD::MTCTR";
1723	case PPCISD::BCTRL: return "PPCISD::BCTRL";
1724	case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1725	case PPCISD::BCTRL_RM:
1726	return "PPCISD::BCTRL_RM";
1727	case PPCISD::BCTRL_LOAD_TOC_RM:
1728	return "PPCISD::BCTRL_LOAD_TOC_RM";
1729	case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
1730	case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1731	case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1732	case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1733	case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1734	case PPCISD::MFVSR: return "PPCISD::MFVSR";
1735	case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1736	case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1737	case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1738	case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1739	case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
1740	return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1741	case PPCISD::ANDI_rec_1_EQ_BIT:
1742	return "PPCISD::ANDI_rec_1_EQ_BIT";
1743	case PPCISD::ANDI_rec_1_GT_BIT:
1744	return "PPCISD::ANDI_rec_1_GT_BIT";
1745	case PPCISD::VCMP: return "PPCISD::VCMP";
1746	case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1747	case PPCISD::LBRX: return "PPCISD::LBRX";
1748	case PPCISD::STBRX: return "PPCISD::STBRX";
1749	case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1750	case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1751	case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1752	case PPCISD::STXSIX: return "PPCISD::STXSIX";
1753	case PPCISD::VEXTS: return "PPCISD::VEXTS";
1754	case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1755	case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1756	case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1757	case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1758	case PPCISD::ST_VSR_SCAL_INT:
1759	return "PPCISD::ST_VSR_SCAL_INT";
1760	case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1761	case PPCISD::BDNZ: return "PPCISD::BDNZ";
1762	case PPCISD::BDZ: return "PPCISD::BDZ";
1763	case PPCISD::MFFS: return "PPCISD::MFFS";
1764	case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1765	case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1766	case PPCISD::CR6SET: return "PPCISD::CR6SET";
1767	case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1768	case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1769	case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1770	case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1771	case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1772	case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1773	case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1774	case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1775	case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1776	case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1777	case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
1778	case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1779	case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1780	case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX";
1781	case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1782	case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1783	case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1784	case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1785	case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1786	case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1787	case PPCISD::PADDI_DTPREL:
1788	return "PPCISD::PADDI_DTPREL";
1789	case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1790	case PPCISD::SC: return "PPCISD::SC";
1791	case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1792	case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1793	case PPCISD::RFEBB: return "PPCISD::RFEBB";
1794	case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1795	case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1796	case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1797	case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1798	case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1799	case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1800	case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1801	case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1802	case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1803	case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
1804	return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1805	case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
1806	return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1807	case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1808	case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1809	case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1810	case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1811	case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1812	case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1813	case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1814	case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1815	case PPCISD::STRICT_FADDRTZ:
1816	return "PPCISD::STRICT_FADDRTZ";
1817	case PPCISD::STRICT_FCTIDZ:
1818	return "PPCISD::STRICT_FCTIDZ";
1819	case PPCISD::STRICT_FCTIWZ:
1820	return "PPCISD::STRICT_FCTIWZ";
1821	case PPCISD::STRICT_FCTIDUZ:
1822	return "PPCISD::STRICT_FCTIDUZ";
1823	case PPCISD::STRICT_FCTIWUZ:
1824	return "PPCISD::STRICT_FCTIWUZ";
1825	case PPCISD::STRICT_FCFID:
1826	return "PPCISD::STRICT_FCFID";
1827	case PPCISD::STRICT_FCFIDU:
1828	return "PPCISD::STRICT_FCFIDU";
1829	case PPCISD::STRICT_FCFIDS:
1830	return "PPCISD::STRICT_FCFIDS";
1831	case PPCISD::STRICT_FCFIDUS:
1832	return "PPCISD::STRICT_FCFIDUS";
1833	case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1834	case PPCISD::STORE_COND:
1835	return "PPCISD::STORE_COND";
1836	case PPCISD::SETBC:
1837	return "PPCISD::SETBC";
1838	case PPCISD::SETBCR:
1839	return "PPCISD::SETBCR";
1840	case PPCISD::ADDC:
1841	return "PPCISD::ADDC";
1842	case PPCISD::ADDE:
1843	return "PPCISD::ADDE";
1844	case PPCISD::SUBC:
1845	return "PPCISD::SUBC";
1846	case PPCISD::SUBE:
1847	return "PPCISD::SUBE";
1848	}
1849	return nullptr;
1850	}
1851
1852	EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1853	EVT VT) const {
1854	if (!VT.isVector())
1855	return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1856
1857	return VT.changeVectorElementTypeToInteger();
1858	}
1859
1860	bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1861	assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1862	return true;
1863	}
1864
1865	//===----------------------------------------------------------------------===//
1866	// Node matching predicates, for use by the tblgen matching code.
1867	//===----------------------------------------------------------------------===//
1868
1869	/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1870	static bool isFloatingPointZero(SDValue Op) {
1871	if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Val&: Op))
1872	return CFP->getValueAPF().isZero();
1873	else if (ISD::isEXTLoad(N: Op.getNode()) \|\| ISD::isNON_EXTLoad(N: Op.getNode())) {
1874	// Maybe this has already been legalized into the constant pool?
1875	if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Val: Op.getOperand(i: `1`)))
1876	if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: CP->getConstVal()))
1877	return CFP->getValueAPF().isZero();
1878	}
1879	return false;
1880	}
1881
1882	/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1883	/// true if Op is undef or if it matches the specified value.
1884	static bool isConstantOrUndef(int Op, int Val) {
1885	return Op < `0` \|\| Op == Val;
1886	}
1887
1888	/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1889	/// VPKUHUM instruction.
1890	/// The ShuffleKind distinguishes between big-endian operations with
1891	/// two different inputs (0), either-endian operations with two identical
1892	/// inputs (1), and little-endian operations with two different inputs (2).
1893	/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1894	bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode N, unsigned* ShuffleKind,
1895	SelectionDAG &DAG) {
1896	bool IsLE = DAG.getDataLayout().isLittleEndian();
1897	if (ShuffleKind == `0`) {
1898	if (IsLE)
1899	return false;
1900	for (unsigned i = `0`; i != `16`; ++i)
1901	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i), Val: i*`2`+`1`))
1902	return false;
1903	} else if (ShuffleKind == `2`) {
1904	if (!IsLE)
1905	return false;
1906	for (unsigned i = `0`; i != `16`; ++i)
1907	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i), Val: i*`2`))
1908	return false;
1909	} else if (ShuffleKind == `1`) {
1910	unsigned j = IsLE ? `0` : `1`;
1911	for (unsigned i = `0`; i != `8`; ++i)
1912	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i), Val: i*`2`+j) \|\|
1913	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`8`), Val: i*`2`+j))
1914	return false;
1915	}
1916	return true;
1917	}
1918
1919	/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1920	/// VPKUWUM instruction.
1921	/// The ShuffleKind distinguishes between big-endian operations with
1922	/// two different inputs (0), either-endian operations with two identical
1923	/// inputs (1), and little-endian operations with two different inputs (2).
1924	/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1925	bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode N, unsigned* ShuffleKind,
1926	SelectionDAG &DAG) {
1927	bool IsLE = DAG.getDataLayout().isLittleEndian();
1928	if (ShuffleKind == `0`) {
1929	if (IsLE)
1930	return false;
1931	for (unsigned i = `0`; i != `16`; i += `2`)
1932	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*`2`+`2`) \|\|
1933	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`1`), Val: i*`2`+`3`))
1934	return false;
1935	} else if (ShuffleKind == `2`) {
1936	if (!IsLE)
1937	return false;
1938	for (unsigned i = `0`; i != `16`; i += `2`)
1939	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*`2`) \|\|
1940	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`1`), Val: i*`2`+`1`))
1941	return false;
1942	} else if (ShuffleKind == `1`) {
1943	unsigned j = IsLE ? `0` : `2`;
1944	for (unsigned i = `0`; i != `8`; i += `2`)
1945	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*`2`+j) \|\|
1946	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`1`), Val: i*`2`+j+`1`) \|\|
1947	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`8`), Val: i*`2`+j) \|\|
1948	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`9`), Val: i*`2`+j+`1`))
1949	return false;
1950	}
1951	return true;
1952	}
1953
1954	/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1955	/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1956	/// current subtarget.
1957	///
1958	/// The ShuffleKind distinguishes between big-endian operations with
1959	/// two different inputs (0), either-endian operations with two identical
1960	/// inputs (1), and little-endian operations with two different inputs (2).
1961	/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1962	bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode N, unsigned* ShuffleKind,
1963	SelectionDAG &DAG) {
1964	const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1965	if (!Subtarget.hasP8Vector())
1966	return false;
1967
1968	bool IsLE = DAG.getDataLayout().isLittleEndian();
1969	if (ShuffleKind == `0`) {
1970	if (IsLE)
1971	return false;
1972	for (unsigned i = `0`; i != `16`; i += `4`)
1973	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*`2`+`4`) \|\|
1974	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`1`), Val: i*`2`+`5`) \|\|
1975	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`2`), Val: i*`2`+`6`) \|\|
1976	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`3`), Val: i*`2`+`7`))
1977	return false;
1978	} else if (ShuffleKind == `2`) {
1979	if (!IsLE)
1980	return false;
1981	for (unsigned i = `0`; i != `16`; i += `4`)
1982	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*`2`) \|\|
1983	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`1`), Val: i*`2`+`1`) \|\|
1984	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`2`), Val: i*`2`+`2`) \|\|
1985	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`3`), Val: i*`2`+`3`))
1986	return false;
1987	} else if (ShuffleKind == `1`) {
1988	unsigned j = IsLE ? `0` : `4`;
1989	for (unsigned i = `0`; i != `8`; i += `4`)
1990	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*`2`+j) \|\|
1991	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`1`), Val: i*`2`+j+`1`) \|\|
1992	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`2`), Val: i*`2`+j+`2`) \|\|
1993	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`3`), Val: i*`2`+j+`3`) \|\|
1994	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`8`), Val: i*`2`+j) \|\|
1995	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`9`), Val: i*`2`+j+`1`) \|\|
1996	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`10`), Val: i*`2`+j+`2`) \|\|
1997	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`11`), Val: i*`2`+j+`3`))
1998	return false;
1999	}
2000	return true;
2001	}
2002
2003	/// isVMerge - Common function, used to match vmrg shuffles.*
2004	///
2005	static bool isVMerge(ShuffleVectorSDNode N, unsigned* UnitSize,
2006	unsigned LHSStart, unsigned RHSStart) {
2007	if (N->getValueType(ResNo: `0`) != MVT::v16i8)
2008	return false;
2009	assert((UnitSize == `1` \|\| UnitSize == `2` \|\| UnitSize == `4`) &&
2010	"Unsupported merge size!");
2011
2012	for (unsigned i = `0`; i != `8`/UnitSize; ++i) // Step over units
2013	for (unsigned j = `0`; j != UnitSize; ++j) { // Step over bytes within unit
2014	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: iUnitSize`2`+j),
2015	Val: LHSStart+j+i*UnitSize) \|\|
2016	!isConstantOrUndef(Op: N->getMaskElt(Idx: iUnitSize`2`+UnitSize+j),
2017	Val: RHSStart+j+i*UnitSize))
2018	return false;
2019	}
2020	return true;
2021	}
2022
2023	/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2024	/// a VMRGL instruction with the specified unit size (1,2 or 4 bytes).*
2025	/// The ShuffleKind distinguishes between big-endian merges with two
2026	/// different inputs (0), either-endian merges with two identical inputs (1),
2027	/// and little-endian merges with two different inputs (2). For the latter,
2028	/// the input operands are swapped (see PPCInstrAltivec.td).
2029	bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode N, unsigned* UnitSize,
2030	unsigned ShuffleKind, SelectionDAG &DAG) {
2031	if (DAG.getDataLayout().isLittleEndian()) {
2032	if (ShuffleKind == `1`) // unary
2033	return isVMerge(N, UnitSize, LHSStart: `0`, RHSStart: `0`);
2034	else if (ShuffleKind == `2`) // swapped
2035	return isVMerge(N, UnitSize, LHSStart: `0`, RHSStart: `16`);
2036	else
2037	return false;
2038	} else {
2039	if (ShuffleKind == `1`) // unary
2040	return isVMerge(N, UnitSize, LHSStart: `8`, RHSStart: `8`);
2041	else if (ShuffleKind == `0`) // normal
2042	return isVMerge(N, UnitSize, LHSStart: `8`, RHSStart: `24`);
2043	else
2044	return false;
2045	}
2046	}
2047
2048	/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2049	/// a VMRGH instruction with the specified unit size (1,2 or 4 bytes).*
2050	/// The ShuffleKind distinguishes between big-endian merges with two
2051	/// different inputs (0), either-endian merges with two identical inputs (1),
2052	/// and little-endian merges with two different inputs (2). For the latter,
2053	/// the input operands are swapped (see PPCInstrAltivec.td).
2054	bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode N, unsigned* UnitSize,
2055	unsigned ShuffleKind, SelectionDAG &DAG) {
2056	if (DAG.getDataLayout().isLittleEndian()) {
2057	if (ShuffleKind == `1`) // unary
2058	return isVMerge(N, UnitSize, LHSStart: `8`, RHSStart: `8`);
2059	else if (ShuffleKind == `2`) // swapped
2060	return isVMerge(N, UnitSize, LHSStart: `8`, RHSStart: `24`);
2061	else
2062	return false;
2063	} else {
2064	if (ShuffleKind == `1`) // unary
2065	return isVMerge(N, UnitSize, LHSStart: `0`, RHSStart: `0`);
2066	else if (ShuffleKind == `0`) // normal
2067	return isVMerge(N, UnitSize, LHSStart: `0`, RHSStart: `16`);
2068	else
2069	return false;
2070	}
2071	}
2072
2073	/**
2074	* Common function used to match vmrgew and vmrgow shuffles
2075	*
2076	* The indexOffset determines whether to look for even or odd words in
2077	* the shuffle mask. This is based on the of the endianness of the target
2078	* machine.
2079	* - Little Endian:
2080	* - Use offset of 0 to check for odd elements
2081	* - Use offset of 4 to check for even elements
2082	* - Big Endian:
2083	* - Use offset of 0 to check for even elements
2084	* - Use offset of 4 to check for odd elements
2085	* A detailed description of the vector element ordering for little endian and
2086	* big endian can be found at
2087	* http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2088	* Targeting your applications - what little endian and big endian IBM XL C/C++
2089	* compiler differences mean to you
2090	*
2091	* The mask to the shuffle vector instruction specifies the indices of the
2092	* elements from the two input vectors to place in the result. The elements are
2093	* numbered in array-access order, starting with the first vector. These vectors
2094	* are always of type v16i8, thus each vector will contain 16 elements of size
2095	* 8. More info on the shuffle vector can be found in the
2096	* http://llvm.org/docs/LangRef.html#shufflevector-instruction
2097	* Language Reference.
2098	*
2099	* The RHSStartValue indicates whether the same input vectors are used (unary)
2100	* or two different input vectors are used, based on the following:
2101	* - If the instruction uses the same vector for both inputs, the range of the
2102	* indices will be 0 to 15. In this case, the RHSStart value passed should
2103	* be 0.
2104	* - If the instruction has two different vectors then the range of the
2105	* indices will be 0 to 31. In this case, the RHSStart value passed should
2106	* be 16 (indices 0-15 specify elements in the first vector while indices 16
2107	* to 31 specify elements in the second vector).
2108	*
2109	* \param[in] N The shuffle vector SD Node to analyze
2110	* \param[in] IndexOffset Specifies whether to look for even or odd elements
2111	* \param[in] RHSStartValue Specifies the starting index for the righthand input
2112	* vector to the shuffle_vector instruction
2113	* \return true iff this shuffle vector represents an even or odd word merge
2114	*/
2115	static bool isVMerge(ShuffleVectorSDNode N, unsigned* IndexOffset,
2116	unsigned RHSStartValue) {
2117	if (N->getValueType(ResNo: `0`) != MVT::v16i8)
2118	return false;
2119
2120	for (unsigned i = `0`; i < `2`; ++i)
2121	for (unsigned j = `0`; j < `4`; ++j)
2122	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i*`4`+j),
2123	Val: i*RHSStartValue+j+IndexOffset) \|\|
2124	!isConstantOrUndef(Op: N->getMaskElt(Idx: i*`4`+j+`8`),
2125	Val: i*RHSStartValue+j+IndexOffset+`8`))
2126	return false;
2127	return true;
2128	}
2129
2130	/**
2131	* Determine if the specified shuffle mask is suitable for the vmrgew or
2132	* vmrgow instructions.
2133	*
2134	* \param[in] N The shuffle vector SD Node to analyze
2135	* \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2136	* \param[in] ShuffleKind Identify the type of merge:
2137	* - 0 = big-endian merge with two different inputs;
2138	* - 1 = either-endian merge with two identical inputs;
2139	* - 2 = little-endian merge with two different inputs (inputs are swapped for
2140	* little-endian merges).
2141	* \param[in] DAG The current SelectionDAG
2142	* \return true iff this shuffle mask
2143	*/
2144	bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode N, bool* CheckEven,
2145	unsigned ShuffleKind, SelectionDAG &DAG) {
2146	if (DAG.getDataLayout().isLittleEndian()) {
2147	unsigned indexOffset = CheckEven ? `4` : `0`;
2148	if (ShuffleKind == `1`) // Unary
2149	return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: `0`);
2150	else if (ShuffleKind == `2`) // swapped
2151	return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: `16`);
2152	else
2153	return false;
2154	}
2155	else {
2156	unsigned indexOffset = CheckEven ? `0` : `4`;
2157	if (ShuffleKind == `1`) // Unary
2158	return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: `0`);
2159	else if (ShuffleKind == `0`) // Normal
2160	return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: `16`);
2161	else
2162	return false;
2163	}
2164	return false;
2165	}
2166
2167	/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2168	/// amount, otherwise return -1.
2169	/// The ShuffleKind distinguishes between big-endian operations with two
2170	/// different inputs (0), either-endian operations with two identical inputs
2171	/// (1), and little-endian operations with two different inputs (2). For the
2172	/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2173	int PPC::isVSLDOIShuffleMask(SDNode N, unsigned* ShuffleKind,
2174	SelectionDAG &DAG) {
2175	if (N->getValueType(ResNo: `0`) != MVT::v16i8)
2176	return -`1`;
2177
2178	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val: N);
2179
2180	// Find the first non-undef value in the shuffle mask.
2181	unsigned i;
2182	for (i = `0`; i != `16` && SVOp->getMaskElt(Idx: i) < `0`; ++i)
2183	/search/;
2184
2185	if (i == `16`) return -`1`; // all undef.
2186
2187	// Otherwise, check to see if the rest of the elements are consecutively
2188	// numbered from this value.
2189	unsigned ShiftAmt = SVOp->getMaskElt(Idx: i);
2190	if (ShiftAmt < i) return -`1`;
2191
2192	ShiftAmt -= i;
2193	bool isLE = DAG.getDataLayout().isLittleEndian();
2194
2195	if ((ShuffleKind == `0` && !isLE) \|\| (ShuffleKind == `2` && isLE)) {
2196	// Check the rest of the elements to see if they are consecutive.
2197	for (++i; i != `16`; ++i)
2198	if (!isConstantOrUndef(Op: SVOp->getMaskElt(Idx: i), Val: ShiftAmt+i))
2199	return -`1`;
2200	} else if (ShuffleKind == `1`) {
2201	// Check the rest of the elements to see if they are consecutive.
2202	for (++i; i != `16`; ++i)
2203	if (!isConstantOrUndef(Op: SVOp->getMaskElt(Idx: i), Val: (ShiftAmt+i) & `15`))
2204	return -`1`;
2205	} else
2206	return -`1`;
2207
2208	if (isLE)
2209	ShiftAmt = `16` - ShiftAmt;
2210
2211	return ShiftAmt;
2212	}
2213
2214	/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2215	/// specifies a splat of a single element that is suitable for input to
2216	/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2217	bool PPC::isSplatShuffleMask(ShuffleVectorSDNode N, unsigned* EltSize) {
2218	EVT VT = N->getValueType(ResNo: `0`);
2219	if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
2220	return EltSize == `8` && N->getMaskElt(Idx: `0`) == N->getMaskElt(Idx: `1`);
2221
2222	assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2223	EltSize <= `8` && "Can only handle 1,2,4,8 byte element sizes");
2224
2225	// The consecutive indices need to specify an element, not part of two
2226	// different elements. So abandon ship early if this isn't the case.
2227	if (N->getMaskElt(Idx: `0`) % EltSize != `0`)
2228	return false;
2229
2230	// This is a splat operation if each element of the permute is the same, and
2231	// if the value doesn't reference the second vector.
2232	unsigned ElementBase = N->getMaskElt(Idx: `0`);
2233
2234	// FIXME: Handle UNDEF elements too!
2235	if (ElementBase >= `16`)
2236	return false;
2237
2238	// Check that the indices are consecutive, in the case of a multi-byte element
2239	// splatted with a v16i8 mask.
2240	for (unsigned i = `1`; i != EltSize; ++i)
2241	if (N->getMaskElt(Idx: i) < `0` \|\| N->getMaskElt(Idx: i) != (int)(i+ElementBase))
2242	return false;
2243
2244	for (unsigned i = EltSize, e = `16`; i != e; i += EltSize) {
2245	// An UNDEF element is a sequence of UNDEF bytes.
2246	if (N->getMaskElt(Idx: i) < `0`) {
2247	for (unsigned j = `1`; j != EltSize; ++j)
2248	if (N->getMaskElt(Idx: i + j) >= `0`)
2249	return false;
2250	} else
2251	for (unsigned j = `0`; j != EltSize; ++j)
2252	if (N->getMaskElt(Idx: i + j) != N->getMaskElt(Idx: j))
2253	return false;
2254	}
2255	return true;
2256	}
2257
2258	/// Check that the mask is shuffling N byte elements. Within each N byte
2259	/// element of the mask, the indices could be either in increasing or
2260	/// decreasing order as long as they are consecutive.
2261	/// \param[in] N the shuffle vector SD Node to analyze
2262	/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2263	/// Word/DoubleWord/QuadWord).
2264	/// \param[in] StepLen the delta indices number among the N byte element, if
2265	/// the mask is in increasing/decreasing order then it is 1/-1.
2266	/// \return true iff the mask is shuffling N byte elements.
2267	static bool isNByteElemShuffleMask(ShuffleVectorSDNode N, unsigned* Width,
2268	int StepLen) {
2269	assert((Width == `2` \|\| Width == `4` \|\| Width == `8` \|\| Width == `16`) &&
2270	"Unexpected element width.");
2271	assert((StepLen == `1` \|\| StepLen == -`1`) && "Unexpected element width.");
2272
2273	unsigned NumOfElem = `16` / Width;
2274	unsigned MaskVal[`16`]; // Width is never greater than 16
2275	for (unsigned i = `0`; i < NumOfElem; ++i) {
2276	MaskVal[`0`] = N->getMaskElt(Idx: i * Width);
2277	if ((StepLen == `1`) && (MaskVal[`0`] % Width)) {
2278	return false;
2279	} else if ((StepLen == -`1`) && ((MaskVal[`0`] + `1`) % Width)) {
2280	return false;
2281	}
2282
2283	for (unsigned int j = `1`; j < Width; ++j) {
2284	MaskVal[j] = N->getMaskElt(Idx: i * Width + j);
2285	if (MaskVal[j] != MaskVal[j-`1`] + StepLen) {
2286	return false;
2287	}
2288	}
2289	}
2290
2291	return true;
2292	}
2293
2294	bool PPC::isXXINSERTWMask(ShuffleVectorSDNode N, unsigned* &ShiftElts,
2295	unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2296	if (!isNByteElemShuffleMask(N, Width: `4`, StepLen: `1`))
2297	return false;
2298
2299	// Now we look at mask elements 0,4,8,12
2300	unsigned M0 = N->getMaskElt(Idx: `0`) / `4`;
2301	unsigned M1 = N->getMaskElt(Idx: `4`) / `4`;
2302	unsigned M2 = N->getMaskElt(Idx: `8`) / `4`;
2303	unsigned M3 = N->getMaskElt(Idx: `12`) / `4`;
2304	unsigned LittleEndianShifts[] = { `2`, `1`, `0`, `3` };
2305	unsigned BigEndianShifts[] = { `3`, `0`, `1`, `2` };
2306
2307	// Below, let H and L be arbitrary elements of the shuffle mask
2308	// where H is in the range [4,7] and L is in the range [0,3].
2309	// H, 1, 2, 3 or L, 5, 6, 7
2310	if ((M0 > `3` && M1 == `1` && M2 == `2` && M3 == `3`) \|\|
2311	(M0 < `4` && M1 == `5` && M2 == `6` && M3 == `7`)) {
2312	ShiftElts = IsLE ? LittleEndianShifts[M0 & `0x3`] : BigEndianShifts[M0 & `0x3`];
2313	InsertAtByte = IsLE ? `12` : `0`;
2314	Swap = M0 < `4`;
2315	return true;
2316	}
2317	// 0, H, 2, 3 or 4, L, 6, 7
2318	if ((M1 > `3` && M0 == `0` && M2 == `2` && M3 == `3`) \|\|
2319	(M1 < `4` && M0 == `4` && M2 == `6` && M3 == `7`)) {
2320	ShiftElts = IsLE ? LittleEndianShifts[M1 & `0x3`] : BigEndianShifts[M1 & `0x3`];
2321	InsertAtByte = IsLE ? `8` : `4`;
2322	Swap = M1 < `4`;
2323	return true;
2324	}
2325	// 0, 1, H, 3 or 4, 5, L, 7
2326	if ((M2 > `3` && M0 == `0` && M1 == `1` && M3 == `3`) \|\|
2327	(M2 < `4` && M0 == `4` && M1 == `5` && M3 == `7`)) {
2328	ShiftElts = IsLE ? LittleEndianShifts[M2 & `0x3`] : BigEndianShifts[M2 & `0x3`];
2329	InsertAtByte = IsLE ? `4` : `8`;
2330	Swap = M2 < `4`;
2331	return true;
2332	}
2333	// 0, 1, 2, H or 4, 5, 6, L
2334	if ((M3 > `3` && M0 == `0` && M1 == `1` && M2 == `2`) \|\|
2335	(M3 < `4` && M0 == `4` && M1 == `5` && M2 == `6`)) {
2336	ShiftElts = IsLE ? LittleEndianShifts[M3 & `0x3`] : BigEndianShifts[M3 & `0x3`];
2337	InsertAtByte = IsLE ? `0` : `12`;
2338	Swap = M3 < `4`;
2339	return true;
2340	}
2341
2342	// If both vector operands for the shuffle are the same vector, the mask will
2343	// contain only elements from the first one and the second one will be undef.
2344	if (N->getOperand(Num: `1`).isUndef()) {
2345	ShiftElts = `0`;
2346	Swap = true;
2347	unsigned XXINSERTWSrcElem = IsLE ? `2` : `1`;
2348	if (M0 == XXINSERTWSrcElem && M1 == `1` && M2 == `2` && M3 == `3`) {
2349	InsertAtByte = IsLE ? `12` : `0`;
2350	return true;
2351	}
2352	if (M0 == `0` && M1 == XXINSERTWSrcElem && M2 == `2` && M3 == `3`) {
2353	InsertAtByte = IsLE ? `8` : `4`;
2354	return true;
2355	}
2356	if (M0 == `0` && M1 == `1` && M2 == XXINSERTWSrcElem && M3 == `3`) {
2357	InsertAtByte = IsLE ? `4` : `8`;
2358	return true;
2359	}
2360	if (M0 == `0` && M1 == `1` && M2 == `2` && M3 == XXINSERTWSrcElem) {
2361	InsertAtByte = IsLE ? `0` : `12`;
2362	return true;
2363	}
2364	}
2365
2366	return false;
2367	}
2368
2369	bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode N, unsigned* &ShiftElts,
2370	bool &Swap, bool IsLE) {
2371	assert(N->getValueType(`0`) == MVT::v16i8 && "Shuffle vector expects v16i8");
2372	// Ensure each byte index of the word is consecutive.
2373	if (!isNByteElemShuffleMask(N, Width: `4`, StepLen: `1`))
2374	return false;
2375
2376	// Now we look at mask elements 0,4,8,12, which are the beginning of words.
2377	unsigned M0 = N->getMaskElt(Idx: `0`) / `4`;
2378	unsigned M1 = N->getMaskElt(Idx: `4`) / `4`;
2379	unsigned M2 = N->getMaskElt(Idx: `8`) / `4`;
2380	unsigned M3 = N->getMaskElt(Idx: `12`) / `4`;
2381
2382	// If both vector operands for the shuffle are the same vector, the mask will
2383	// contain only elements from the first one and the second one will be undef.
2384	if (N->getOperand(Num: `1`).isUndef()) {
2385	assert(M0 < `4` && "Indexing into an undef vector?");
2386	if (M1 != (M0 + `1`) % `4` \|\| M2 != (M1 + `1`) % `4` \|\| M3 != (M2 + `1`) % `4`)
2387	return false;
2388
2389	ShiftElts = IsLE ? (`4` - M0) % `4` : M0;
2390	Swap = false;
2391	return true;
2392	}
2393
2394	// Ensure each word index of the ShuffleVector Mask is consecutive.
2395	if (M1 != (M0 + `1`) % `8` \|\| M2 != (M1 + `1`) % `8` \|\| M3 != (M2 + `1`) % `8`)
2396	return false;
2397
2398	if (IsLE) {
2399	if (M0 == `0` \|\| M0 == `7` \|\| M0 == `6` \|\| M0 == `5`) {
2400	// Input vectors don't need to be swapped if the leading element
2401	// of the result is one of the 3 left elements of the second vector
2402	// (or if there is no shift to be done at all).
2403	Swap = false;
2404	ShiftElts = (`8` - M0) % `8`;
2405	} else if (M0 == `4` \|\| M0 == `3` \|\| M0 == `2` \|\| M0 == `1`) {
2406	// Input vectors need to be swapped if the leading element
2407	// of the result is one of the 3 left elements of the first vector
2408	// (or if we're shifting by 4 - thereby simply swapping the vectors).
2409	Swap = true;
2410	ShiftElts = (`4` - M0) % `4`;
2411	}
2412
2413	return true;
2414	} else { // BE
2415	if (M0 == `0` \|\| M0 == `1` \|\| M0 == `2` \|\| M0 == `3`) {
2416	// Input vectors don't need to be swapped if the leading element
2417	// of the result is one of the 4 elements of the first vector.
2418	Swap = false;
2419	ShiftElts = M0;
2420	} else if (M0 == `4` \|\| M0 == `5` \|\| M0 == `6` \|\| M0 == `7`) {
2421	// Input vectors need to be swapped if the leading element
2422	// of the result is one of the 4 elements of the right vector.
2423	Swap = true;
2424	ShiftElts = M0 - `4`;
2425	}
2426
2427	return true;
2428	}
2429	}
2430
2431	bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode N, int* Width) {
2432	assert(N->getValueType(`0`) == MVT::v16i8 && "Shuffle vector expects v16i8");
2433
2434	if (!isNByteElemShuffleMask(N, Width, StepLen: -`1`))
2435	return false;
2436
2437	for (int i = `0`; i < `16`; i += Width)
2438	if (N->getMaskElt(Idx: i) != i + Width - `1`)
2439	return false;
2440
2441	return true;
2442	}
2443
2444	bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2445	return isXXBRShuffleMaskHelper(N, Width: `2`);
2446	}
2447
2448	bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2449	return isXXBRShuffleMaskHelper(N, Width: `4`);
2450	}
2451
2452	bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2453	return isXXBRShuffleMaskHelper(N, Width: `8`);
2454	}
2455
2456	bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2457	return isXXBRShuffleMaskHelper(N, Width: `16`);
2458	}
2459
2460	/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2461	/// if the inputs to the instruction should be swapped and set \p DM to the
2462	/// value for the immediate.
2463	/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2464	/// AND element 0 of the result comes from the first input (LE) or second input
2465	/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2466	/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2467	/// mask.
2468	bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode N, unsigned* &DM,
2469	bool &Swap, bool IsLE) {
2470	assert(N->getValueType(`0`) == MVT::v16i8 && "Shuffle vector expects v16i8");
2471
2472	// Ensure each byte index of the double word is consecutive.
2473	if (!isNByteElemShuffleMask(N, Width: `8`, StepLen: `1`))
2474	return false;
2475
2476	unsigned M0 = N->getMaskElt(Idx: `0`) / `8`;
2477	unsigned M1 = N->getMaskElt(Idx: `8`) / `8`;
2478	assert(((M0 \| M1) < `4`) && "A mask element out of bounds?");
2479
2480	// If both vector operands for the shuffle are the same vector, the mask will
2481	// contain only elements from the first one and the second one will be undef.
2482	if (N->getOperand(Num: `1`).isUndef()) {
2483	if ((M0 \| M1) < `2`) {
2484	DM = IsLE ? (((~M1) & `1`) << `1`) + ((~M0) & `1`) : (M0 << `1`) + (M1 & `1`);
2485	Swap = false;
2486	return true;
2487	} else
2488	return false;
2489	}
2490
2491	if (IsLE) {
2492	if (M0 > `1` && M1 < `2`) {
2493	Swap = false;
2494	} else if (M0 < `2` && M1 > `1`) {
2495	M0 = (M0 + `2`) % `4`;
2496	M1 = (M1 + `2`) % `4`;
2497	Swap = true;
2498	} else
2499	return false;
2500
2501	// Note: if control flow comes here that means Swap is already set above
2502	DM = (((~M1) & `1`) << `1`) + ((~M0) & `1`);
2503	return true;
2504	} else { // BE
2505	if (M0 < `2` && M1 > `1`) {
2506	Swap = false;
2507	} else if (M0 > `1` && M1 < `2`) {
2508	M0 = (M0 + `2`) % `4`;
2509	M1 = (M1 + `2`) % `4`;
2510	Swap = true;
2511	} else
2512	return false;
2513
2514	// Note: if control flow comes here that means Swap is already set above
2515	DM = (M0 << `1`) + (M1 & `1`);
2516	return true;
2517	}
2518	}
2519
2520
2521	/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2522	/// appropriate for PPC mnemonics (which have a big endian bias - namely
2523	/// elements are counted from the left of the vector register).
2524	unsigned PPC::getSplatIdxForPPCMnemonics(SDNode N, unsigned* EltSize,
2525	SelectionDAG &DAG) {
2526	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val: N);
2527	assert(isSplatShuffleMask(SVOp, EltSize));
2528	EVT VT = SVOp->getValueType(ResNo: `0`);
2529
2530	if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
2531	return DAG.getDataLayout().isLittleEndian() ? `1` - SVOp->getMaskElt(Idx: `0`)
2532	: SVOp->getMaskElt(Idx: `0`);
2533
2534	if (DAG.getDataLayout().isLittleEndian())
2535	return (`16` / EltSize) - `1` - (SVOp->getMaskElt(Idx: `0`) / EltSize);
2536	else
2537	return SVOp->getMaskElt(Idx: `0`) / EltSize;
2538	}
2539
2540	/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2541	/// by using a vspltis[bhw] instruction of the specified element size, return
2542	/// the constant being splatted. The ByteSize field indicates the number of
2543	/// bytes of each element [124] -> [bhw].
2544	SDValue PPC::get_VSPLTI_elt(SDNode N, unsigned* ByteSize, SelectionDAG &DAG) {
2545	SDValue OpVal;
2546
2547	// If ByteSize of the splat is bigger than the element size of the
2548	// build_vector, then we have a case where we are checking for a splat where
2549	// multiple elements of the buildvector are folded together into a single
2550	// logical element of the splat (e.g. "vsplish 1" to splat {0,1}8).*
2551	unsigned EltSize = `16`/N->getNumOperands();
2552	if (EltSize < ByteSize) {
2553	unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2554	SDValue UniquedVals[`4`];
2555	assert(Multiple > `1` && Multiple <= `4` && "How can this happen?");
2556
2557	// See if all of the elements in the buildvector agree across.
2558	for (unsigned i = `0`, e = N->getNumOperands(); i != e; ++i) {
2559	if (N->getOperand(Num: i).isUndef()) continue;
2560	// If the element isn't a constant, bail fully out.
2561	if (!isa<ConstantSDNode>(Val: N->getOperand(Num: i))) return SDValue ();
2562
2563	if (!UniquedVals[i&(Multiple-`1`)].getNode())
2564	UniquedVals[i&(Multiple-`1`)] = N->getOperand(Num: i);
2565	else if (UniquedVals[i&(Multiple-`1`)] != N->getOperand(Num: i))
2566	return SDValue (); // no match.
2567	}
2568
2569	// Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2570	// either constant or undef values that are identical for each chunk. See
2571	// if these chunks can form into a larger vspltis.*
2572
2573	// Check to see if all of the leading entries are either 0 or -1. If
2574	// neither, then this won't fit into the immediate field.
2575	bool LeadingZero = true;
2576	bool LeadingOnes = true;
2577	for (unsigned i = `0`; i != Multiple-`1`; ++i) {
2578	if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2579
2580	LeadingZero &= isNullConstant(V: UniquedVals[i]);
2581	LeadingOnes &= isAllOnesConstant(V: UniquedVals[i]);
2582	}
2583	// Finally, check the least significant entry.
2584	if (LeadingZero) {
2585	if (!UniquedVals[Multiple-`1`].getNode())
2586	return DAG.getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32); // 0,0,0,undef
2587	int Val = UniquedVals[Multiple - `1`]->getAsZExtVal();
2588	if (Val < `16`) // 0,0,0,4 -> vspltisw(4)
2589	return DAG.getTargetConstant(Val, DL: SDLoc (N), VT: MVT::i32);
2590	}
2591	if (LeadingOnes) {
2592	if (!UniquedVals[Multiple-`1`].getNode())
2593	return DAG.getTargetConstant(Val: ~`0U`, DL: SDLoc (N), VT: MVT::i32); // -1,-1,-1,undef
2594	int Val =cast<ConstantSDNode>(Val&: UniquedVals[Multiple-`1`])->getSExtValue();
2595	if (Val >= -`16`) // -1,-1,-1,-2 -> vspltisw(-2)
2596	return DAG.getTargetConstant(Val, DL: SDLoc (N), VT: MVT::i32);
2597	}
2598
2599	return SDValue ();
2600	}
2601
2602	// Check to see if this buildvec has a single non-undef value in its elements.
2603	for (unsigned i = `0`, e = N->getNumOperands(); i != e; ++i) {
2604	if (N->getOperand(Num: i).isUndef()) continue;
2605	if (!OpVal.getNode())
2606	OpVal = N->getOperand(Num: i);
2607	else if (OpVal != N->getOperand(Num: i))
2608	return SDValue ();
2609	}
2610
2611	if (!OpVal.getNode()) return SDValue (); // All UNDEF: use implicit def.
2612
2613	unsigned ValSizeInBytes = EltSize;
2614	uint64_t Value = `0`;
2615	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: OpVal)) {
2616	Value = CN->getZExtValue();
2617	} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Val&: OpVal)) {
2618	assert(CN->getValueType(`0`) == MVT::f32 && "Only one legal FP vector type!");
2619	Value = llvm::bit_cast<uint32_t>(from: CN->getValueAPF().convertToFloat());
2620	}
2621
2622	// If the splat value is larger than the element value, then we can never do
2623	// this splat. The only case that we could fit the replicated bits into our
2624	// immediate field for would be zero, and we prefer to use vxor for it.
2625	if (ValSizeInBytes < ByteSize) return SDValue ();
2626
2627	// If the element value is larger than the splat value, check if it consists
2628	// of a repeated bit pattern of size ByteSize.
2629	if (!APInt (ValSizeInBytes * `8`, Value).isSplat(SplatSizeInBits: ByteSize * `8`))
2630	return SDValue ();
2631
2632	// Properly sign extend the value.
2633	int MaskVal = SignExtend32(X: Value, B: ByteSize * `8`);
2634
2635	// If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2636	if (MaskVal == `0`) return SDValue ();
2637
2638	// Finally, if this value fits in a 5 bit sext field, return it
2639	if (SignExtend32<`5`>(X: MaskVal) == MaskVal)
2640	return DAG.getSignedTargetConstant(Val: MaskVal, DL: SDLoc (N), VT: MVT::i32);
2641	return SDValue ();
2642	}
2643
2644	//===----------------------------------------------------------------------===//
2645	// Addressing Mode Selection
2646	//===----------------------------------------------------------------------===//
2647
2648	/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2649	/// or 64-bit immediate, and if the value can be accurately represented as a
2650	/// sign extension from a 16-bit value. If so, this returns true and the
2651	/// immediate.
2652	bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2653	if (!isa<ConstantSDNode>(Val: N))
2654	return false;
2655
2656	Imm = (int16_t)N->getAsZExtVal();
2657	if (N->getValueType(ResNo: `0`) == MVT::i32)
2658	return Imm == (int32_t)N->getAsZExtVal();
2659	else
2660	return Imm == (int64_t)N->getAsZExtVal();
2661	}
2662	bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2663	return isIntS16Immediate(N: Op.getNode(), Imm);
2664	}
2665
2666	/// Used when computing address flags for selecting loads and stores.
2667	/// If we have an OR, check if the LHS and RHS are provably disjoint.
2668	/// An OR of two provably disjoint values is equivalent to an ADD.
2669	/// Most PPC load/store instructions compute the effective address as a sum,
2670	/// so doing this conversion is useful.
2671	static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2672	if (N.getOpcode() != ISD::OR)
2673	return false;
2674	KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: `0`));
2675	if (!LHSKnown.Zero.getBoolValue())
2676	return false;
2677	KnownBits RHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: `1`));
2678	return (~(LHSKnown.Zero \| RHSKnown.Zero) == `0`);
2679	}
2680
2681	/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2682	/// be represented as an indexed [r+r] operation.
2683	bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2684	SDValue &Index,
2685	SelectionDAG &DAG) const {
2686	for (SDNode *U : N ->users()) {
2687	if (MemSDNode *Memop = dyn_cast<MemSDNode>(Val: U)) {
2688	if (Memop->getMemoryVT() == MVT::f64) {
2689	Base = N.getOperand(i: `0`);
2690	Index = N.getOperand(i: `1`);
2691	return true;
2692	}
2693	}
2694	}
2695	return false;
2696	}
2697
2698	/// isIntS34Immediate - This method tests if value of node given can be
2699	/// accurately represented as a sign extension from a 34-bit value. If so,
2700	/// this returns true and the immediate.
2701	bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2702	if (!isa<ConstantSDNode>(Val: N))
2703	return false;
2704
2705	Imm = (int64_t)cast<ConstantSDNode>(Val: N)->getSExtValue();
2706	return isInt<`34`>(x: Imm);
2707	}
2708	bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
2709	return isIntS34Immediate(N: Op.getNode(), Imm);
2710	}
2711
2712	/// SelectAddressRegReg - Given the specified addressed, check to see if it
2713	/// can be represented as an indexed [r+r] operation. Returns false if it
2714	/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2715	/// non-zero and N can be represented by a base register plus a signed 16-bit
2716	/// displacement, make a more precise judgement by checking (displacement % \p
2717	/// EncodingAlignment).
2718	bool PPCTargetLowering::SelectAddressRegReg(
2719	SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2720	MaybeAlign EncodingAlignment) const {
2721	// If we have a PC Relative target flag don't select as [reg+reg]. It will be
2722	// a [pc+imm].
2723	if (SelectAddressPCRel(N, Base))
2724	return false;
2725
2726	int16_t Imm = `0`;
2727	if (N.getOpcode() == ISD::ADD) {
2728	// Is there any SPE load/store (f64), which can't handle 16bit offset?
2729	// SPE load/store can only handle 8-bit offsets.
2730	if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2731	return true;
2732	if (isIntS16Immediate(Op: N.getOperand(i: `1`), Imm) &&
2733	(!EncodingAlignment \|\| isAligned(Lhs: *EncodingAlignment, SizeInBytes: Imm)))
2734	return false; // r+i
2735	if (N.getOperand(i: `1`).getOpcode() == PPCISD::Lo)
2736	return false; // r+i
2737
2738	Base = N.getOperand(i: `0`);
2739	Index = N.getOperand(i: `1`);
2740	return true;
2741	} else if (N.getOpcode() == ISD::OR) {
2742	if (isIntS16Immediate(Op: N.getOperand(i: `1`), Imm) &&
2743	(!EncodingAlignment \|\| isAligned(Lhs: *EncodingAlignment, SizeInBytes: Imm)))
2744	return false; // r+i can fold it if we can.
2745
2746	// If this is an or of disjoint bitfields, we can codegen this as an add
2747	// (for better address arithmetic) if the LHS and RHS of the OR are provably
2748	// disjoint.
2749	KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: `0`));
2750
2751	if (LHSKnown.Zero.getBoolValue()) {
2752	KnownBits RHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: `1`));
2753	// If all of the bits are known zero on the LHS or RHS, the add won't
2754	// carry.
2755	if (~(LHSKnown.Zero \| RHSKnown.Zero) == `0`) {
2756	Base = N.getOperand(i: `0`);
2757	Index = N.getOperand(i: `1`);
2758	return true;
2759	}
2760	}
2761	}
2762
2763	return false;
2764	}
2765
2766	// If we happen to be doing an i64 load or store into a stack slot that has
2767	// less than a 4-byte alignment, then the frame-index elimination may need to
2768	// use an indexed load or store instruction (because the offset may not be a
2769	// multiple of 4). The extra register needed to hold the offset comes from the
2770	// register scavenger, and it is possible that the scavenger will need to use
2771	// an emergency spill slot. As a result, we need to make sure that a spill slot
2772	// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2773	// stack slot.
2774	static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2775	// FIXME: This does not handle the LWA case.
2776	if (VT != MVT::i64)
2777	return;
2778
2779	// NOTE: We'll exclude negative FIs here, which come from argument
2780	// lowering, because there are no known test cases triggering this problem
2781	// using packed structures (or similar). We can remove this exclusion if
2782	// we find such a test case. The reason why this is so test-case driven is
2783	// because this entire 'fixup' is only to prevent crashes (from the
2784	// register scavenger) on not-really-valid inputs. For example, if we have:
2785	// %a = alloca i1
2786	// %b = bitcast i1* %a to i64*
2787	// store i64 a, i64 b*
2788	// then the store should really be marked as 'align 1', but is not. If it
2789	// were marked as 'align 1' then the indexed form would have been
2790	// instruction-selected initially, and the problem this 'fixup' is preventing
2791	// won't happen regardless.
2792	if (FrameIdx < `0`)
2793	return;
2794
2795	MachineFunction &MF = DAG.getMachineFunction();
2796	MachineFrameInfo &MFI = MF.getFrameInfo();
2797
2798	if (MFI.getObjectAlign(ObjectIdx: FrameIdx) >= Align (`4`))
2799	return;
2800
2801	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2802	FuncInfo->setHasNonRISpills();
2803	}
2804
2805	/// Returns true if the address N can be represented by a base register plus
2806	/// a signed 16-bit displacement [r+imm], and if it is not better
2807	/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2808	/// displacements that are multiples of that value.
2809	bool PPCTargetLowering::SelectAddressRegImm(
2810	SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2811	MaybeAlign EncodingAlignment) const {
2812	// FIXME dl should come from parent load or store, not from address
2813	SDLoc dl(N);
2814
2815	// If we have a PC Relative target flag don't select as [reg+imm]. It will be
2816	// a [pc+imm].
2817	if (SelectAddressPCRel(N, Base))
2818	return false;
2819
2820	// If this can be more profitably realized as r+r, fail.
2821	if (SelectAddressRegReg(N, Base&: Disp, Index&: Base, DAG, EncodingAlignment))
2822	return false;
2823
2824	if (N.getOpcode() == ISD::ADD) {
2825	int16_t imm = `0`;
2826	if (isIntS16Immediate(Op: N.getOperand(i: `1`), Imm&: imm) &&
2827	(!EncodingAlignment \|\| isAligned(Lhs: *EncodingAlignment, SizeInBytes: imm))) {
2828	Disp = DAG.getSignedTargetConstant(Val: imm, DL: dl, VT: N.getValueType());
2829	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: `0`))) {
2830	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2831	fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
2832	} else {
2833	Base = N.getOperand(i: `0`);
2834	}
2835	return true; // [r+i]
2836	} else if (N.getOperand(i: `1`).getOpcode() == PPCISD::Lo) {
2837	// Match LOAD (ADD (X, Lo(G))).
2838	assert(!N.getOperand(`1`).getConstantOperandVal(`1`) &&
2839	"Cannot handle constant offsets yet!");
2840	Disp = N.getOperand(i: `1`).getOperand(i: `0`); // The global address.
2841	assert(Disp.getOpcode() == ISD::TargetGlobalAddress \|\|
2842	Disp.getOpcode() == ISD::TargetGlobalTLSAddress \|\|
2843	Disp.getOpcode() == ISD::TargetConstantPool \|\|
2844	Disp.getOpcode() == ISD::TargetJumpTable);
2845	Base = N.getOperand(i: `0`);
2846	return true; // [&g+r]
2847	}
2848	} else if (N.getOpcode() == ISD::OR) {
2849	int16_t imm = `0`;
2850	if (isIntS16Immediate(Op: N.getOperand(i: `1`), Imm&: imm) &&
2851	(!EncodingAlignment \|\| isAligned(Lhs: *EncodingAlignment, SizeInBytes: imm))) {
2852	// If this is an or of disjoint bitfields, we can codegen this as an add
2853	// (for better address arithmetic) if the LHS and RHS of the OR are
2854	// provably disjoint.
2855	KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: `0`));
2856
2857	if ((LHSKnown.Zero.getZExtValue()\|~(uint64_t)imm) == ~`0ULL`) {
2858	// If all of the bits are known zero on the LHS or RHS, the add won't
2859	// carry.
2860	if (FrameIndexSDNode *FI =
2861	dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: `0`))) {
2862	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2863	fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
2864	} else {
2865	Base = N.getOperand(i: `0`);
2866	}
2867	Disp = DAG.getTargetConstant(Val: imm, DL: dl, VT: N.getValueType());
2868	return true;
2869	}
2870	}
2871	} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N)) {
2872	// Loading from a constant address.
2873
2874	// If this address fits entirely in a 16-bit sext immediate field, codegen
2875	// this as "d, 0"
2876	int16_t Imm;
2877	if (isIntS16Immediate(N: CN, Imm) &&
2878	(!EncodingAlignment \|\| isAligned(Lhs: *EncodingAlignment, SizeInBytes: Imm))) {
2879	Disp = DAG.getTargetConstant(Val: Imm, DL: dl, VT: CN->getValueType(ResNo: `0`));
2880	Base = DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2881	VT: CN->getValueType(ResNo: `0`));
2882	return true;
2883	}
2884
2885	// Handle 32-bit sext immediates with LIS + addr mode.
2886	if ((CN->getValueType(ResNo: `0`) == MVT::i32 \|\|
2887	(int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2888	(!EncodingAlignment \|\|
2889	isAligned(Lhs: *EncodingAlignment, SizeInBytes: CN->getZExtValue()))) {
2890	int Addr = (int)CN->getZExtValue();
2891
2892	// Otherwise, break this down into an LIS + disp.
2893	Disp = DAG.getTargetConstant(Val: (short)Addr, DL: dl, VT: MVT::i32);
2894
2895	Base = DAG.getTargetConstant(Val: (Addr - (signed short)Addr) >> `16`, DL: dl,
2896	VT: MVT::i32);
2897	unsigned Opc = CN->getValueType(ResNo: `0`) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2898	Base = SDValue (DAG.getMachineNode(Opcode: Opc, dl, VT: CN->getValueType(ResNo: `0`), Op1: Base), `0`);
2899	return true;
2900	}
2901	}
2902
2903	Disp = DAG.getTargetConstant(Val: `0`, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()));
2904	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: N)) {
2905	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2906	fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
2907	} else
2908	Base = N;
2909	return true; // [r+0]
2910	}
2911
2912	/// Similar to the 16-bit case but for instructions that take a 34-bit
2913	/// displacement field (prefixed loads/stores).
2914	bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp,
2915	SDValue &Base,
2916	SelectionDAG &DAG) const {
2917	// Only on 64-bit targets.
2918	if (N.getValueType() != MVT::i64)
2919	return false;
2920
2921	SDLoc dl(N);
2922	int64_t Imm = `0`;
2923
2924	if (N.getOpcode() == ISD::ADD) {
2925	if (!isIntS34Immediate(Op: N.getOperand(i: `1`), Imm))
2926	return false;
2927	Disp = DAG.getSignedTargetConstant(Val: Imm, DL: dl, VT: N.getValueType());
2928	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: `0`)))
2929	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2930	else
2931	Base = N.getOperand(i: `0`);
2932	return true;
2933	}
2934
2935	if (N.getOpcode() == ISD::OR) {
2936	if (!isIntS34Immediate(Op: N.getOperand(i: `1`), Imm))
2937	return false;
2938	// If this is an or of disjoint bitfields, we can codegen this as an add
2939	// (for better address arithmetic) if the LHS and RHS of the OR are
2940	// provably disjoint.
2941	KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: `0`));
2942	if ((LHSKnown.Zero.getZExtValue() \| ~(uint64_t)Imm) != ~`0ULL`)
2943	return false;
2944	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: `0`)))
2945	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2946	else
2947	Base = N.getOperand(i: `0`);
2948	Disp = DAG.getSignedTargetConstant(Val: Imm, DL: dl, VT: N.getValueType());
2949	return true;
2950	}
2951
2952	if (isIntS34Immediate(Op: N, Imm)) { // If the address is a 34-bit const.
2953	Disp = DAG.getSignedTargetConstant(Val: Imm, DL: dl, VT: N.getValueType());
2954	Base = DAG.getRegister(Reg: PPC::ZERO8, VT: N.getValueType());
2955	return true;
2956	}
2957
2958	return false;
2959	}
2960
2961	/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2962	/// represented as an indexed [r+r] operation.
2963	bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2964	SDValue &Index,
2965	SelectionDAG &DAG) const {
2966	// Check to see if we can easily represent this as an [r+r] address. This
2967	// will fail if it thinks that the address is more profitably represented as
2968	// reg+imm, e.g. where imm = 0.
2969	if (SelectAddressRegReg(N, Base, Index, DAG))
2970	return true;
2971
2972	// If the address is the result of an add, we will utilize the fact that the
2973	// address calculation includes an implicit add. However, we can reduce
2974	// register pressure if we do not materialize a constant just for use as the
2975	// index register. We only get rid of the add if it is not an add of a
2976	// value and a 16-bit signed constant and both have a single use.
2977	int16_t imm = `0`;
2978	if (N.getOpcode() == ISD::ADD &&
2979	(!isIntS16Immediate(Op: N.getOperand(i: `1`), Imm&: imm) \|\|
2980	!N.getOperand(i: `1`).hasOneUse() \|\| !N.getOperand(i: `0`).hasOneUse())) {
2981	Base = N.getOperand(i: `0`);
2982	Index = N.getOperand(i: `1`);
2983	return true;
2984	}
2985
2986	// Otherwise, do it the hard way, using R0 as the base register.
2987	Base = DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2988	VT: N.getValueType());
2989	Index = N;
2990	return true;
2991	}
2992
2993	template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2994	Ty *PCRelCand = dyn_cast<Ty>(N);
2995	return PCRelCand && (PPCInstrInfo::hasPCRelFlag(TF: PCRelCand->getTargetFlags()));
2996	}
2997
2998	/// Returns true if this address is a PC Relative address.
2999	/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
3000	/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
3001	bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
3002	// This is a materialize PC Relative node. Always select this as PC Relative.
3003	Base = N;
3004	if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
3005	return true;
3006	if (isValidPCRelNode<ConstantPoolSDNode>(N) \|\|
3007	isValidPCRelNode<GlobalAddressSDNode>(N) \|\|
3008	isValidPCRelNode<JumpTableSDNode>(N) \|\|
3009	isValidPCRelNode<BlockAddressSDNode>(N))
3010	return true;
3011	return false;
3012	}
3013
3014	/// Returns true if we should use a direct load into vector instruction
3015	/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3016	static bool usePartialVectorLoads(SDNode N, const* PPCSubtarget& ST) {
3017
3018	// If there are any other uses other than scalar to vector, then we should
3019	// keep it as a scalar load -> direct move pattern to prevent multiple
3020	// loads.
3021	LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N);
3022	if (!LD)
3023	return false;
3024
3025	EVT MemVT = LD->getMemoryVT();
3026	if (!MemVT.isSimple())
3027	return false;
3028	switch(MemVT.getSimpleVT().SimpleTy) {
3029	case MVT::i64:
3030	break;
3031	case MVT::i32:
3032	if (!ST.hasP8Vector())
3033	return false;
3034	break;
3035	case MVT::i16:
3036	case MVT::i8:
3037	if (!ST.hasP9Vector())
3038	return false;
3039	break;
3040	default:
3041	return false;
3042	}
3043
3044	SDValue LoadedVal(N, `0`);
3045	if (!LoadedVal.hasOneUse())
3046	return false;
3047
3048	for (SDUse &Use : LD->uses())
3049	if (Use.getResNo() == `0` &&
3050	Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3051	Use.getUser()->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
3052	return false;
3053
3054	return true;
3055	}
3056
3057	/// getPreIndexedAddressParts - returns true by value, base pointer and
3058	/// offset pointer and addressing mode by reference if the node's address
3059	/// can be legally represented as pre-indexed load / store address.
3060	bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
3061	SDValue &Offset,
3062	ISD::MemIndexedMode &AM,
3063	SelectionDAG &DAG) const {
3064	if (DisablePPCPreinc) return false;
3065
3066	bool isLoad = true;
3067	SDValue Ptr;
3068	EVT VT;
3069	Align Alignment;
3070	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
3071	Ptr = LD->getBasePtr();
3072	VT = LD->getMemoryVT();
3073	Alignment = LD->getAlign();
3074	} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: N)) {
3075	Ptr = ST->getBasePtr();
3076	VT = ST->getMemoryVT();
3077	Alignment = ST->getAlign();
3078	isLoad = false;
3079	} else
3080	return false;
3081
3082	// Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3083	// instructions because we can fold these into a more efficient instruction
3084	// instead, (such as LXSD).
3085	if (isLoad && usePartialVectorLoads(N, ST: Subtarget)) {
3086	return false;
3087	}
3088
3089	// PowerPC doesn't have preinc load/store instructions for vectors
3090	if (VT.isVector())
3091	return false;
3092
3093	if (SelectAddressRegReg(N: Ptr, Base, Index&: Offset, DAG)) {
3094	// Common code will reject creating a pre-inc form if the base pointer
3095	// is a frame index, or if N is a store and the base pointer is either
3096	// the same as or a predecessor of the value being stored. Check for
3097	// those situations here, and try with swapped Base/Offset instead.
3098	bool Swap = false;
3099
3100	if (isa<FrameIndexSDNode>(Val: Base) \|\| isa<RegisterSDNode>(Val: Base))
3101	Swap = true;
3102	else if (!isLoad) {
3103	SDValue Val = cast<StoreSDNode>(Val: N)->getValue();
3104	if (Val == Base \|\| Base.getNode()->isPredecessorOf(N: Val.getNode()))
3105	Swap = true;
3106	}
3107
3108	if (Swap)
3109	std::swap(a&: Base, b&: Offset);
3110
3111	AM = ISD::PRE_INC;
3112	return true;
3113	}
3114
3115	// LDU/STU can only handle immediates that are a multiple of 4.
3116	if (VT != MVT::i64) {
3117	if (!SelectAddressRegImm(N: Ptr, Disp&: Offset, Base, DAG, EncodingAlignment: std::nullopt))
3118	return false;
3119	} else {
3120	// LDU/STU need an address with at least 4-byte alignment.
3121	if (Alignment < Align (`4`))
3122	return false;
3123
3124	if (!SelectAddressRegImm(N: Ptr, Disp&: Offset, Base, DAG, EncodingAlignment: Align (`4`)))
3125	return false;
3126	}
3127
3128	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
3129	// PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3130	// sext i32 to i64 when addr mode is r+i.
3131	if (LD->getValueType(ResNo: `0`) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3132	LD->getExtensionType() == ISD::SEXTLOAD &&
3133	isa<ConstantSDNode>(Val: Offset))
3134	return false;
3135	}
3136
3137	AM = ISD::PRE_INC;
3138	return true;
3139	}
3140
3141	//===----------------------------------------------------------------------===//
3142	// LowerOperation implementation
3143	//===----------------------------------------------------------------------===//
3144
3145	/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3146	/// and LoOpFlags to the target MO flags.
3147	static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3148	unsigned &HiOpFlags, unsigned &LoOpFlags,
3149	const GlobalValue GV = nullptr*) {
3150	HiOpFlags = PPCII::MO_HA;
3151	LoOpFlags = PPCII::MO_LO;
3152
3153	// Don't use the pic base if not in PIC relocation model.
3154	if (IsPIC) {
3155	HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3156	LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3157	}
3158	}
3159
3160	static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3161	SelectionDAG &DAG) {
3162	SDLoc DL(HiPart);
3163	EVT PtrVT = HiPart.getValueType();
3164	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: PtrVT);
3165
3166	SDValue Hi = DAG.getNode(Opcode: PPCISD::Hi, DL, VT: PtrVT, N1: HiPart, N2: Zero);
3167	SDValue Lo = DAG.getNode(Opcode: PPCISD::Lo, DL, VT: PtrVT, N1: LoPart, N2: Zero);
3168
3169	// With PIC, the first instruction is actually "GR+hi(&G)".
3170	if (isPIC)
3171	Hi = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT,
3172	N1: DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL, VT: PtrVT), N2: Hi);
3173
3174	// Generate non-pic code that has direct accesses to the constant pool.
3175	// The address of the global is just (hi(&g)+lo(&g)).
3176	return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Hi, N2: Lo);
3177	}
3178
3179	static void setUsesTOCBasePtr(MachineFunction &MF) {
3180	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3181	FuncInfo->setUsesTOCBasePtr();
3182	}
3183
3184	static void setUsesTOCBasePtr(SelectionDAG &DAG) {
3185	setUsesTOCBasePtr(DAG.getMachineFunction());
3186	}
3187
3188	SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3189	SDValue GA) const {
3190	EVT VT = Subtarget.getScalarIntVT();
3191	SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(Reg: PPC::X2, VT)
3192	: Subtarget.isAIXABI()
3193	? DAG.getRegister(Reg: PPC::R2, VT)
3194	: DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT);
3195	SDValue Ops[] = { GA, Reg };
3196	return DAG.getMemIntrinsicNode(
3197	Opcode: PPCISD::TOC_ENTRY, dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::Other), Ops, MemVT: VT,
3198	PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction()), Alignment: std::nullopt,
3199	Flags: MachineMemOperand::MOLoad);
3200	}
3201
3202	SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3203	SelectionDAG &DAG) const {
3204	EVT PtrVT = Op.getValueType();
3205	ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Val&: Op);
3206	const Constant *C = CP->getConstVal();
3207
3208	// 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3209	// The actual address of the GlobalValue is stored in the TOC.
3210	if (Subtarget.is64BitELFABI() \|\| Subtarget.isAIXABI()) {
3211	if (Subtarget.isUsingPCRelativeCalls()) {
3212	SDLoc DL(CP);
3213	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3214	SDValue ConstPool = DAG.getTargetConstantPool(
3215	C, VT: Ty, Align: CP->getAlign(), Offset: CP->getOffset(), TargetFlags: PPCII::MO_PCREL_FLAG);
3216	return DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: ConstPool);
3217	}
3218	setUsesTOCBasePtr(DAG);
3219	SDValue GA = DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: `0`);
3220	return getTOCEntry(DAG, dl: SDLoc (CP), GA);
3221	}
3222
3223	unsigned MOHiFlag, MOLoFlag;
3224	bool IsPIC = isPositionIndependent();
3225	getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag);
3226
3227	if (IsPIC && Subtarget.isSVR4ABI()) {
3228	SDValue GA =
3229	DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: PPCII::MO_PIC_FLAG);
3230	return getTOCEntry(DAG, dl: SDLoc (CP), GA);
3231	}
3232
3233	SDValue CPIHi =
3234	DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: `0`, TargetFlags: MOHiFlag);
3235	SDValue CPILo =
3236	DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: `0`, TargetFlags: MOLoFlag);
3237	return LowerLabelRef(HiPart: CPIHi, LoPart: CPILo, isPIC: IsPIC, DAG);
3238	}
3239
3240	// For 64-bit PowerPC, prefer the more compact relative encodings.
3241	// This trades 32 bits per jump table entry for one or two instructions
3242	// on the jump site.
3243	unsigned PPCTargetLowering::getJumpTableEncoding() const {
3244	if (isJumpTableRelative())
3245	return MachineJumpTableInfo::EK_LabelDifference32;
3246
3247	return TargetLowering::getJumpTableEncoding();
3248	}
3249
3250	bool PPCTargetLowering::isJumpTableRelative() const {
3251	if (UseAbsoluteJumpTables)
3252	return false;
3253	if (Subtarget.isPPC64() \|\| Subtarget.isAIXABI())
3254	return true;
3255	return TargetLowering::isJumpTableRelative();
3256	}
3257
3258	SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
3259	SelectionDAG &DAG) const {
3260	if (!Subtarget.isPPC64() \|\| Subtarget.isAIXABI())
3261	return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3262
3263	switch (getTargetMachine().getCodeModel()) {
3264	case CodeModel::Small:
3265	case CodeModel::Medium:
3266	return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3267	default:
3268	return DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: SDLoc (),
3269	VT: getPointerTy(DL: DAG.getDataLayout()));
3270	}
3271	}
3272
3273	const MCExpr *
3274	PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
3275	unsigned JTI,
3276	MCContext &Ctx) const {
3277	if (!Subtarget.isPPC64() \|\| Subtarget.isAIXABI())
3278	return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3279
3280	switch (getTargetMachine().getCodeModel()) {
3281	case CodeModel::Small:
3282	case CodeModel::Medium:
3283	return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3284	default:
3285	return MCSymbolRefExpr::create(Symbol: MF->getPICBaseSymbol(), Ctx);
3286	}
3287	}
3288
3289	SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3290	EVT PtrVT = Op.getValueType();
3291	JumpTableSDNode *JT = cast<JumpTableSDNode>(Val&: Op);
3292
3293	// isUsingPCRelativeCalls() returns true when PCRelative is enabled
3294	if (Subtarget.isUsingPCRelativeCalls()) {
3295	SDLoc DL(JT);
3296	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3297	SDValue GA =
3298	DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: Ty, TargetFlags: PPCII::MO_PCREL_FLAG);
3299	SDValue MatAddr = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3300	return MatAddr;
3301	}
3302
3303	// 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3304	// The actual address of the GlobalValue is stored in the TOC.
3305	if (Subtarget.is64BitELFABI() \|\| Subtarget.isAIXABI()) {
3306	setUsesTOCBasePtr(DAG);
3307	SDValue GA = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT);
3308	return getTOCEntry(DAG, dl: SDLoc (JT), GA);
3309	}
3310
3311	unsigned MOHiFlag, MOLoFlag;
3312	bool IsPIC = isPositionIndependent();
3313	getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag);
3314
3315	if (IsPIC && Subtarget.isSVR4ABI()) {
3316	SDValue GA = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT,
3317	TargetFlags: PPCII::MO_PIC_FLAG);
3318	return getTOCEntry(DAG, dl: SDLoc (GA), GA);
3319	}
3320
3321	SDValue JTIHi = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT, TargetFlags: MOHiFlag);
3322	SDValue JTILo = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT, TargetFlags: MOLoFlag);
3323	return LowerLabelRef(HiPart: JTIHi, LoPart: JTILo, isPIC: IsPIC, DAG);
3324	}
3325
3326	SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3327	SelectionDAG &DAG) const {
3328	EVT PtrVT = Op.getValueType();
3329	BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Val&: Op);
3330	const BlockAddress *BA = BASDN->getBlockAddress();
3331
3332	// isUsingPCRelativeCalls() returns true when PCRelative is enabled
3333	if (Subtarget.isUsingPCRelativeCalls()) {
3334	SDLoc DL(BASDN);
3335	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3336	SDValue GA = DAG.getTargetBlockAddress(BA, VT: Ty, Offset: BASDN->getOffset(),
3337	TargetFlags: PPCII::MO_PCREL_FLAG);
3338	SDValue MatAddr = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3339	return MatAddr;
3340	}
3341
3342	// 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3343	// The actual BlockAddress is stored in the TOC.
3344	if (Subtarget.is64BitELFABI() \|\| Subtarget.isAIXABI()) {
3345	setUsesTOCBasePtr(DAG);
3346	SDValue GA = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: BASDN->getOffset());
3347	return getTOCEntry(DAG, dl: SDLoc (BASDN), GA);
3348	}
3349
3350	// 32-bit position-independent ELF stores the BlockAddress in the .got.
3351	if (Subtarget.is32BitELFABI() && isPositionIndependent())
3352	return getTOCEntry(
3353	DAG, dl: SDLoc (BASDN),
3354	GA: DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: BASDN->getOffset()));
3355
3356	unsigned MOHiFlag, MOLoFlag;
3357	bool IsPIC = isPositionIndependent();
3358	getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag);
3359	SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: `0`, TargetFlags: MOHiFlag);
3360	SDValue TgtBALo = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: `0`, TargetFlags: MOLoFlag);
3361	return LowerLabelRef(HiPart: TgtBAHi, LoPart: TgtBALo, isPIC: IsPIC, DAG);
3362	}
3363
3364	SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3365	SelectionDAG &DAG) const {
3366	if (Subtarget.isAIXABI())
3367	return LowerGlobalTLSAddressAIX(Op, DAG);
3368
3369	return LowerGlobalTLSAddressLinux(Op, DAG);
3370	}
3371
3372	/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3373	/// and then apply the update.
3374	static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model,
3375	SelectionDAG &DAG,
3376	const TargetMachine &TM) {
3377	// Initialize TLS model opt setting lazily:
3378	// (1) Use initial-exec for single TLS var references within current function.
3379	// (2) Use local-dynamic for multiple TLS var references within current
3380	// function.
3381	PPCFunctionInfo *FuncInfo =
3382	DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
3383	if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3384	SmallPtrSet<const GlobalValue *, `8`> TLSGV;
3385	// Iterate over all instructions within current function, collect all TLS
3386	// global variables (global variables taken as the first parameter to
3387	// Intrinsic::threadlocal_address).
3388	const Function &Func = DAG.getMachineFunction().getFunction();
3389	for (const BasicBlock &BB : Func)
3390	for (const Instruction &I : BB)
3391	if (I.getOpcode() == Instruction::Call)
3392	if (const CallInst CI = dyn_cast<const* CallInst>(Val: &I))
3393	if (Function *CF = CI->getCalledFunction())
3394	if (CF->isDeclaration() &&
3395	CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3396	if (const GlobalValue *GV =
3397	dyn_cast<GlobalValue>(Val: I.getOperand(i: `0`))) {
3398	TLSModel::Model GVModel = TM.getTLSModel(GV);
3399	if (GVModel == TLSModel::LocalDynamic)
3400	TLSGV.insert(Ptr: GV);
3401	}
3402
3403	unsigned TLSGVCnt = TLSGV.size();
3404	LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3405	if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3406	FuncInfo->setAIXFuncUseTLSIEForLD();
3407	FuncInfo->setAIXFuncTLSModelOptInitDone();
3408	}
3409
3410	if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3411	LLVM_DEBUG(
3412	dbgs() << DAG.getMachineFunction().getName()
3413	<< " function is using the TLS-IE model for TLS-LD access.\n");
3414	Model = TLSModel::InitialExec;
3415	}
3416	}
3417
3418	SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3419	SelectionDAG &DAG) const {
3420	GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Val&: Op);
3421
3422	if (DAG.getTarget().useEmulatedTLS())
3423	report_fatal_error(reason: "Emulated TLS is not yet supported on AIX");
3424
3425	SDLoc dl(GA);
3426	const GlobalValue *GV = GA->getGlobal();
3427	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3428	bool Is64Bit = Subtarget.isPPC64();
3429	TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
3430
3431	// Apply update to the TLS model.
3432	if (Subtarget.hasAIXShLibTLSModelOpt())
3433	updateForAIXShLibTLSModelOpt(Model, DAG, TM: getTargetMachine());
3434
3435	// TLS variables are accessed through TOC entries.
3436	// To support this, set the DAG to use the TOC base pointer.
3437	setUsesTOCBasePtr(DAG);
3438
3439	bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3440
3441	if (IsTLSLocalExecModel \|\| Model == TLSModel::InitialExec) {
3442	bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3443	bool HasAIXSmallTLSGlobalAttr = false;
3444	SDValue VariableOffsetTGA =
3445	DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: PPCII::MO_TPREL_FLAG);
3446	SDValue VariableOffset = getTOCEntry(DAG, dl, GA: VariableOffsetTGA);
3447	SDValue TLSReg;
3448
3449	if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(Val: GV))
3450	if (GVar->hasAttribute(Kind: "aix-small-tls"))
3451	HasAIXSmallTLSGlobalAttr = true;
3452
3453	if (Is64Bit) {
3454	// For local-exec and initial-exec on AIX (64-bit), the sequence generated
3455	// involves a load of the variable offset (from the TOC), followed by an
3456	// add of the loaded variable offset to R13 (the thread pointer).
3457	// This code sequence looks like:
3458	// ld reg1,var[TC](2)
3459	// add reg2, reg1, r13 // r13 contains the thread pointer
3460	TLSReg = DAG.getRegister(Reg: PPC::X13, VT: MVT::i64);
3461
3462	// With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3463	// global variable attribute, produce a faster access sequence for
3464	// local-exec TLS variables where the offset from the TLS base is encoded
3465	// as an immediate operand.
3466	//
3467	// We only utilize the faster local-exec access sequence when the TLS
3468	// variable has a size within the policy limit. We treat types that are
3469	// not sized or are empty as being over the policy size limit.
3470	if ((HasAIXSmallLocalExecTLS \|\| HasAIXSmallTLSGlobalAttr) &&
3471	IsTLSLocalExecModel) {
3472	Type *GVType = GV->getValueType();
3473	if (GVType->isSized() && !GVType->isEmptyTy() &&
3474	GV->getDataLayout().getTypeAllocSize(Ty: GVType) <=
3475	AIXSmallTlsPolicySizeLimit)
3476	return DAG.getNode(Opcode: PPCISD::Lo, DL: dl, VT: PtrVT, N1: VariableOffsetTGA, N2: TLSReg);
3477	}
3478	} else {
3479	// For local-exec and initial-exec on AIX (32-bit), the sequence generated
3480	// involves loading the variable offset from the TOC, generating a call to
3481	// .__get_tpointer to get the thread pointer (which will be in R3), and
3482	// adding the two together:
3483	// lwz reg1,var[TC](2)
3484	// bla .__get_tpointer
3485	// add reg2, reg1, r3
3486	TLSReg = DAG.getNode(Opcode: PPCISD::GET_TPOINTER, DL: dl, VT: PtrVT);
3487
3488	// We do not implement the 32-bit version of the faster access sequence
3489	// for local-exec that is controlled by the -maix-small-local-exec-tls
3490	// option, or the "aix-small-tls" global variable attribute.
3491	if (HasAIXSmallLocalExecTLS \|\| HasAIXSmallTLSGlobalAttr)
3492	report_fatal_error(reason: "The small-local-exec TLS access sequence is "
3493	"currently only supported on AIX (64-bit mode).");
3494	}
3495	return DAG.getNode(Opcode: PPCISD::ADD_TLS, DL: dl, VT: PtrVT, N1: TLSReg, N2: VariableOffset);
3496	}
3497
3498	if (Model == TLSModel::LocalDynamic) {
3499	bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3500
3501	// We do not implement the 32-bit version of the faster access sequence
3502	// for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3503	if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3504	report_fatal_error(reason: "The small-local-dynamic TLS access sequence is "
3505	"currently only supported on AIX (64-bit mode).");
3506
3507	// For local-dynamic on AIX, we need to generate one TOC entry for each
3508	// variable offset, and a single module-handle TOC entry for the entire
3509	// file.
3510
3511	SDValue VariableOffsetTGA =
3512	DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: PPCII::MO_TLSLD_FLAG);
3513	SDValue VariableOffset = getTOCEntry(DAG, dl, GA: VariableOffsetTGA);
3514
3515	Module *M = DAG.getMachineFunction().getFunction().getParent();
3516	GlobalVariable *TLSGV =
3517	dyn_cast_or_null<GlobalVariable>(Val: M->getOrInsertGlobal(
3518	Name: StringRef ("_$TLSML"), Ty: PointerType::getUnqual(C&: *DAG.getContext())));
3519	TLSGV->setThreadLocalMode(GlobalVariable::LocalDynamicTLSModel);
3520	assert(TLSGV && "Not able to create GV for _$TLSML.");
3521	SDValue ModuleHandleTGA =
3522	DAG.getTargetGlobalAddress(GV: TLSGV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: PPCII::MO_TLSLDM_FLAG);
3523	SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, GA: ModuleHandleTGA);
3524	SDValue ModuleHandle =
3525	DAG.getNode(Opcode: PPCISD::TLSLD_AIX, DL: dl, VT: PtrVT, Operand: ModuleHandleTOC);
3526
3527	// With the -maix-small-local-dynamic-tls option, produce a faster access
3528	// sequence for local-dynamic TLS variables where the offset from the
3529	// module-handle is encoded as an immediate operand.
3530	//
3531	// We only utilize the faster local-dynamic access sequence when the TLS
3532	// variable has a size within the policy limit. We treat types that are
3533	// not sized or are empty as being over the policy size limit.
3534	if (HasAIXSmallLocalDynamicTLS) {
3535	Type *GVType = GV->getValueType();
3536	if (GVType->isSized() && !GVType->isEmptyTy() &&
3537	GV->getDataLayout().getTypeAllocSize(Ty: GVType) <=
3538	AIXSmallTlsPolicySizeLimit)
3539	return DAG.getNode(Opcode: PPCISD::Lo, DL: dl, VT: PtrVT, N1: VariableOffsetTGA,
3540	N2: ModuleHandle);
3541	}
3542
3543	return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: ModuleHandle, N2: VariableOffset);
3544	}
3545
3546	// If Local- or Initial-exec or Local-dynamic is not possible or specified,
3547	// all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3548	// need to generate two TOC entries, one for the variable offset, one for the
3549	// region handle. The global address for the TOC entry of the region handle is
3550	// created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3551	// entry of the variable offset is created with MO_TLSGD_FLAG.
3552	SDValue VariableOffsetTGA =
3553	DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: PPCII::MO_TLSGD_FLAG);
3554	SDValue RegionHandleTGA =
3555	DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: PPCII::MO_TLSGDM_FLAG);
3556	SDValue VariableOffset = getTOCEntry(DAG, dl, GA: VariableOffsetTGA);
3557	SDValue RegionHandle = getTOCEntry(DAG, dl, GA: RegionHandleTGA);
3558	return DAG.getNode(Opcode: PPCISD::TLSGD_AIX, DL: dl, VT: PtrVT, N1: VariableOffset,
3559	N2: RegionHandle);
3560	}
3561
3562	SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3563	SelectionDAG &DAG) const {
3564	// FIXME: TLS addresses currently use medium model code sequences,
3565	// which is the most useful form. Eventually support for small and
3566	// large models could be added if users need it, at the cost of
3567	// additional complexity.
3568	GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Val&: Op);
3569	if (DAG.getTarget().useEmulatedTLS())
3570	return LowerToTLSEmulatedModel(GA, DAG);
3571
3572	SDLoc dl(GA);
3573	const GlobalValue *GV = GA->getGlobal();
3574	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3575	bool is64bit = Subtarget.isPPC64();
3576	const Module *M = DAG.getMachineFunction().getFunction().getParent();
3577	PICLevel::Level picLevel = M->getPICLevel();
3578
3579	const TargetMachine &TM = getTargetMachine();
3580	TLSModel::Model Model = TM.getTLSModel(GV);
3581
3582	if (Model == TLSModel::LocalExec) {
3583	if (Subtarget.isUsingPCRelativeCalls()) {
3584	SDValue TLSReg = DAG.getRegister(Reg: PPC::X13, VT: MVT::i64);
3585	SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`,
3586	TargetFlags: PPCII::MO_TPREL_PCREL_FLAG);
3587	SDValue MatAddr =
3588	DAG.getNode(Opcode: PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3589	return DAG.getNode(Opcode: PPCISD::ADD_TLS, DL: dl, VT: PtrVT, N1: TLSReg, N2: MatAddr);
3590	}
3591
3592	SDValue TGAHi = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`,
3593	TargetFlags: PPCII::MO_TPREL_HA);
3594	SDValue TGALo = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`,
3595	TargetFlags: PPCII::MO_TPREL_LO);
3596	SDValue TLSReg = is64bit ? DAG.getRegister(Reg: PPC::X13, VT: MVT::i64)
3597	: DAG.getRegister(Reg: PPC::R2, VT: MVT::i32);
3598
3599	SDValue Hi = DAG.getNode(Opcode: PPCISD::Hi, DL: dl, VT: PtrVT, N1: TGAHi, N2: TLSReg);
3600	return DAG.getNode(Opcode: PPCISD::Lo, DL: dl, VT: PtrVT, N1: TGALo, N2: Hi);
3601	}
3602
3603	if (Model == TLSModel::InitialExec) {
3604	bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3605	SDValue TGA = DAG.getTargetGlobalAddress(
3606	GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : `0`);
3607	SDValue TGATLS = DAG.getTargetGlobalAddress(
3608	GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3609	SDValue TPOffset;
3610	if (IsPCRel) {
3611	SDValue MatPCRel = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3612	TPOffset = DAG.getLoad(VT: MVT::i64, dl, Chain: DAG.getEntryNode(), Ptr: MatPCRel,
3613	PtrInfo: MachinePointerInfo ());
3614	} else {
3615	SDValue GOTPtr;
3616	if (is64bit) {
3617	setUsesTOCBasePtr(DAG);
3618	SDValue GOTReg = DAG.getRegister(Reg: PPC::X2, VT: MVT::i64);
3619	GOTPtr =
3620	DAG.getNode(Opcode: PPCISD::ADDIS_GOT_TPREL_HA, DL: dl, VT: PtrVT, N1: GOTReg, N2: TGA);
3621	} else {
3622	if (!TM.isPositionIndependent())
3623	GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_GOT, DL: dl, VT: PtrVT);
3624	else if (picLevel == PICLevel::SmallPIC)
3625	GOTPtr = DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT: PtrVT);
3626	else
3627	GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_PICGOT, DL: dl, VT: PtrVT);
3628	}
3629	TPOffset = DAG.getNode(Opcode: PPCISD::LD_GOT_TPREL_L, DL: dl, VT: PtrVT, N1: TGA, N2: GOTPtr);
3630	}
3631	return DAG.getNode(Opcode: PPCISD::ADD_TLS, DL: dl, VT: PtrVT, N1: TPOffset, N2: TGATLS);
3632	}
3633
3634	if (Model == TLSModel::GeneralDynamic) {
3635	if (Subtarget.isUsingPCRelativeCalls()) {
3636	SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`,
3637	TargetFlags: PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3638	return DAG.getNode(Opcode: PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3639	}
3640
3641	SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: `0`);
3642	SDValue GOTPtr;
3643	if (is64bit) {
3644	setUsesTOCBasePtr(DAG);
3645	SDValue GOTReg = DAG.getRegister(Reg: PPC::X2, VT: MVT::i64);
3646	GOTPtr = DAG.getNode(Opcode: PPCISD::ADDIS_TLSGD_HA, DL: dl, VT: PtrVT,
3647	N1: GOTReg, N2: TGA);
3648	} else {
3649	if (picLevel == PICLevel::SmallPIC)
3650	GOTPtr = DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT: PtrVT);
3651	else
3652	GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_PICGOT, DL: dl, VT: PtrVT);
3653	}
3654	return DAG.getNode(Opcode: PPCISD::ADDI_TLSGD_L_ADDR, DL: dl, VT: PtrVT,
3655	N1: GOTPtr, N2: TGA, N3: TGA);
3656	}
3657
3658	if (Model == TLSModel::LocalDynamic) {
3659	if (Subtarget.isUsingPCRelativeCalls()) {
3660	SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`,
3661	TargetFlags: PPCII::MO_GOT_TLSLD_PCREL_FLAG);
3662	SDValue MatPCRel =
3663	DAG.getNode(Opcode: PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3664	return DAG.getNode(Opcode: PPCISD::PADDI_DTPREL, DL: dl, VT: PtrVT, N1: MatPCRel, N2: TGA);
3665	}
3666
3667	SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: `0`);
3668	SDValue GOTPtr;
3669	if (is64bit) {
3670	setUsesTOCBasePtr(DAG);
3671	SDValue GOTReg = DAG.getRegister(Reg: PPC::X2, VT: MVT::i64);
3672	GOTPtr = DAG.getNode(Opcode: PPCISD::ADDIS_TLSLD_HA, DL: dl, VT: PtrVT,
3673	N1: GOTReg, N2: TGA);
3674	} else {
3675	if (picLevel == PICLevel::SmallPIC)
3676	GOTPtr = DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT: PtrVT);
3677	else
3678	GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_PICGOT, DL: dl, VT: PtrVT);
3679	}
3680	SDValue TLSAddr = DAG.getNode(Opcode: PPCISD::ADDI_TLSLD_L_ADDR, DL: dl,
3681	VT: PtrVT, N1: GOTPtr, N2: TGA, N3: TGA);
3682	SDValue DtvOffsetHi = DAG.getNode(Opcode: PPCISD::ADDIS_DTPREL_HA, DL: dl,
3683	VT: PtrVT, N1: TLSAddr, N2: TGA);
3684	return DAG.getNode(Opcode: PPCISD::ADDI_DTPREL_L, DL: dl, VT: PtrVT, N1: DtvOffsetHi, N2: TGA);
3685	}
3686
3687	llvm_unreachable("Unknown TLS model!");
3688	}
3689
3690	SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3691	SelectionDAG &DAG) const {
3692	EVT PtrVT = Op.getValueType();
3693	GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Val&: Op);
3694	SDLoc DL(GSDN);
3695	const GlobalValue *GV = GSDN->getGlobal();
3696
3697	// 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3698	// The actual address of the GlobalValue is stored in the TOC.
3699	if (Subtarget.is64BitELFABI() \|\| Subtarget.isAIXABI()) {
3700	if (Subtarget.isUsingPCRelativeCalls()) {
3701	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3702	if (isAccessedAsGotIndirect(N: Op)) {
3703	SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: GSDN->getOffset(),
3704	TargetFlags: PPCII::MO_GOT_PCREL_FLAG);
3705	SDValue MatPCRel = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3706	SDValue Load = DAG.getLoad(VT: MVT::i64, dl: DL, Chain: DAG.getEntryNode(), Ptr: MatPCRel,
3707	PtrInfo: MachinePointerInfo ());
3708	return Load;
3709	} else {
3710	SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: GSDN->getOffset(),
3711	TargetFlags: PPCII::MO_PCREL_FLAG);
3712	return DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3713	}
3714	}
3715	setUsesTOCBasePtr(DAG);
3716	SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: GSDN->getOffset());
3717	return getTOCEntry(DAG, dl: DL, GA);
3718	}
3719
3720	unsigned MOHiFlag, MOLoFlag;
3721	bool IsPIC = isPositionIndependent();
3722	getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag, GV);
3723
3724	if (IsPIC && Subtarget.isSVR4ABI()) {
3725	SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT,
3726	offset: GSDN->getOffset(),
3727	TargetFlags: PPCII::MO_PIC_FLAG);
3728	return getTOCEntry(DAG, dl: DL, GA);
3729	}
3730
3731	SDValue GAHi =
3732	DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: GSDN->getOffset(), TargetFlags: MOHiFlag);
3733	SDValue GALo =
3734	DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: GSDN->getOffset(), TargetFlags: MOLoFlag);
3735
3736	return LowerLabelRef(HiPart: GAHi, LoPart: GALo, isPIC: IsPIC, DAG);
3737	}
3738
3739	SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3740	bool IsStrict = Op ->isStrictFPOpcode();
3741	ISD::CondCode CC =
3742	cast<CondCodeSDNode>(Val: Op.getOperand(i: IsStrict ? `3` : `2`))->get();
3743	SDValue LHS = Op.getOperand(i: IsStrict ? `1` : `0`);
3744	SDValue RHS = Op.getOperand(i: IsStrict ? `2` : `1`);
3745	SDValue Chain = IsStrict ? Op.getOperand(i: `0`) : SDValue ();
3746	EVT LHSVT = LHS.getValueType();
3747	SDLoc dl(Op);
3748
3749	// Soften the setcc with libcall if it is fp128.
3750	if (LHSVT == MVT::f128) {
3751	assert(!Subtarget.hasP9Vector() &&
3752	"SETCC for f128 is already legal under Power9!");
3753	softenSetCCOperands(DAG, VT: LHSVT, NewLHS&: LHS, NewRHS&: RHS, CCCode&: CC, DL: dl, OldLHS: LHS, OldRHS: RHS, Chain,
3754	IsSignaling: Op ->getOpcode() == ISD::STRICT_FSETCCS);
3755	if (RHS.getNode())
3756	LHS = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: Op.getValueType(), N1: LHS, N2: RHS,
3757	N3: DAG.getCondCode(Cond: CC));
3758	if (IsStrict)
3759	return DAG.getMergeValues(Ops: {LHS, Chain}, dl);
3760	return LHS;
3761	}
3762
3763	assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3764
3765	if (Op.getValueType() == MVT::v2i64) {
3766	// When the operands themselves are v2i64 values, we need to do something
3767	// special because VSX has no underlying comparison operations for these.
3768	if (LHS.getValueType() == MVT::v2i64) {
3769	// Equality can be handled by casting to the legal type for Altivec
3770	// comparisons, everything else needs to be expanded.
3771	if (CC != ISD::SETEQ && CC != ISD::SETNE)
3772	return SDValue ();
3773	SDValue SetCC32 = DAG.getSetCC(
3774	DL: dl, VT: MVT::v4i32, LHS: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: LHS),
3775	RHS: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: RHS), Cond: CC);
3776	int ShuffV[] = {`1`, `0`, `3`, `2`};
3777	SDValue Shuff =
3778	DAG.getVectorShuffle(VT: MVT::v4i32, dl, N1: SetCC32, N2: SetCC32, Mask: ShuffV);
3779	return DAG.getBitcast(VT: MVT::v2i64,
3780	V: DAG.getNode(Opcode: CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3781	DL: dl, VT: MVT::v4i32, N1: Shuff, N2: SetCC32));
3782	}
3783
3784	// We handle most of these in the usual way.
3785	return Op;
3786	}
3787
3788	// If we're comparing for equality to zero, expose the fact that this is
3789	// implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3790	// fold the new nodes.
3791	if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3792	return V;
3793
3794	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: RHS)) {
3795	// Leave comparisons against 0 and -1 alone for now, since they're usually
3796	// optimized. FIXME: revisit this when we can custom lower all setcc
3797	// optimizations.
3798	if (C->isAllOnes() \|\| C->isZero())
3799	return SDValue ();
3800	}
3801
3802	// If we have an integer seteq/setne, turn it into a compare against zero
3803	// by xor'ing the rhs with the lhs, which is faster than setting a
3804	// condition register, reading it back out, and masking the correct bit. The
3805	// normal approach here uses sub to do this instead of xor. Using xor exposes
3806	// the result to other bit-twiddling opportunities.
3807	if (LHSVT.isInteger() && (CC == ISD::SETEQ \|\| CC == ISD::SETNE)) {
3808	EVT VT = Op.getValueType();
3809	SDValue Sub = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: LHSVT, N1: LHS, N2: RHS);
3810	return DAG.getSetCC(DL: dl, VT, LHS: Sub, RHS: DAG.getConstant(Val: `0`, DL: dl, VT: LHSVT), Cond: CC);
3811	}
3812	return SDValue ();
3813	}
3814
3815	SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3816	SDNode *Node = Op.getNode();
3817	EVT VT = Node->getValueType(ResNo: `0`);
3818	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3819	SDValue InChain = Node->getOperand(Num: `0`);
3820	SDValue VAListPtr = Node->getOperand(Num: `1`);
3821	const Value *SV = cast<SrcValueSDNode>(Val: Node->getOperand(Num: `2`))->getValue();
3822	SDLoc dl(Node);
3823
3824	assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3825
3826	// gpr_index
3827	SDValue GprIndex = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT: MVT::i32, Chain: InChain,
3828	Ptr: VAListPtr, PtrInfo: MachinePointerInfo (SV), MemVT: MVT::i8);
3829	InChain = GprIndex.getValue(R: `1`);
3830
3831	if (VT == MVT::i64) {
3832	// Check if GprIndex is even
3833	SDValue GprAnd = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: GprIndex,
3834	N2: DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i32));
3835	SDValue CC64 = DAG.getSetCC(DL: dl, VT: MVT::i32, LHS: GprAnd,
3836	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: MVT::i32), Cond: ISD::SETNE);
3837	SDValue GprIndexPlusOne = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32, N1: GprIndex,
3838	N2: DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i32));
3839	// Align GprIndex to be even if it isn't
3840	GprIndex = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: MVT::i32, N1: CC64, N2: GprIndexPlusOne,
3841	N3: GprIndex);
3842	}
3843
3844	// fpr index is 1 byte after gpr
3845	SDValue FprPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: VAListPtr,
3846	N2: DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i32));
3847
3848	// fpr
3849	SDValue FprIndex = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT: MVT::i32, Chain: InChain,
3850	Ptr: FprPtr, PtrInfo: MachinePointerInfo (SV), MemVT: MVT::i8);
3851	InChain = FprIndex.getValue(R: `1`);
3852
3853	SDValue RegSaveAreaPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: VAListPtr,
3854	N2: DAG.getConstant(Val: `8`, DL: dl, VT: MVT::i32));
3855
3856	SDValue OverflowAreaPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: VAListPtr,
3857	N2: DAG.getConstant(Val: `4`, DL: dl, VT: MVT::i32));
3858
3859	// areas
3860	SDValue OverflowArea =
3861	DAG.getLoad(VT: MVT::i32, dl, Chain: InChain, Ptr: OverflowAreaPtr, PtrInfo: MachinePointerInfo ());
3862	InChain = OverflowArea.getValue(R: `1`);
3863
3864	SDValue RegSaveArea =
3865	DAG.getLoad(VT: MVT::i32, dl, Chain: InChain, Ptr: RegSaveAreaPtr, PtrInfo: MachinePointerInfo ());
3866	InChain = RegSaveArea.getValue(R: `1`);
3867
3868	// select overflow_area if index > 8
3869	SDValue CC = DAG.getSetCC(DL: dl, VT: MVT::i32, LHS: VT.isInteger() ? GprIndex : FprIndex,
3870	RHS: DAG.getConstant(Val: `8`, DL: dl, VT: MVT::i32), Cond: ISD::SETLT);
3871
3872	// adjustment constant gpr_index 4/8*
3873	SDValue RegConstant = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32,
3874	N1: VT.isInteger() ? GprIndex : FprIndex,
3875	N2: DAG.getConstant(Val: VT.isInteger() ? `4` : `8`, DL: dl,
3876	VT: MVT::i32));
3877
3878	// OurReg = RegSaveArea + RegConstant
3879	SDValue OurReg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: RegSaveArea,
3880	N2: RegConstant);
3881
3882	// Floating types are 32 bytes into RegSaveArea
3883	if (VT.isFloatingPoint())
3884	OurReg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: OurReg,
3885	N2: DAG.getConstant(Val: `32`, DL: dl, VT: MVT::i32));
3886
3887	// increase {f,g}pr_index by 1 (or 2 if VT is i64)
3888	SDValue IndexPlus1 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32,
3889	N1: VT.isInteger() ? GprIndex : FprIndex,
3890	N2: DAG.getConstant(Val: VT == MVT::i64 ? `2` : `1`, DL: dl,
3891	VT: MVT::i32));
3892
3893	InChain = DAG.getTruncStore(Chain: InChain, dl, Val: IndexPlus1,
3894	Ptr: VT.isInteger() ? VAListPtr : FprPtr,
3895	PtrInfo: MachinePointerInfo (SV), SVT: MVT::i8);
3896
3897	// determine if we should load from reg_save_area or overflow_area
3898	SDValue Result = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: PtrVT, N1: CC, N2: OurReg, N3: OverflowArea);
3899
3900	// increase overflow_area by 4/8 if gpr/fpr > 8
3901	SDValue OverflowAreaPlusN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: OverflowArea,
3902	N2: DAG.getConstant(Val: VT.isInteger() ? `4` : `8`,
3903	DL: dl, VT: MVT::i32));
3904
3905	OverflowArea = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: MVT::i32, N1: CC, N2: OverflowArea,
3906	N3: OverflowAreaPlusN);
3907
3908	InChain = DAG.getTruncStore(Chain: InChain, dl, Val: OverflowArea, Ptr: OverflowAreaPtr,
3909	PtrInfo: MachinePointerInfo (), SVT: MVT::i32);
3910
3911	return DAG.getLoad(VT, dl, Chain: InChain, Ptr: Result, PtrInfo: MachinePointerInfo ());
3912	}
3913
3914	SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3915	assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3916
3917	// We have to copy the entire va_list struct:
3918	// 2sizeof(char) + 2 Byte alignment + 2sizeof(char) = 12 Byte*
3919	return DAG.getMemcpy(Chain: Op.getOperand(i: `0`), dl: Op, Dst: Op.getOperand(i: `1`), Src: Op.getOperand(i: `2`),
3920	Size: DAG.getConstant(Val: `12`, DL: SDLoc (Op), VT: MVT::i32), Alignment: Align (`8`),
3921	isVol: false, AlwaysInline: true, /CI=/nullptr, OverrideTailCall: std::nullopt,
3922	DstPtrInfo: MachinePointerInfo (), SrcPtrInfo: MachinePointerInfo ());
3923	}
3924
3925	SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3926	SelectionDAG &DAG) const {
3927	if (Subtarget.isAIXABI())
3928	report_fatal_error(reason: "ADJUST_TRAMPOLINE operation is not supported on AIX.");
3929
3930	return Op.getOperand(i: `0`);
3931	}
3932
3933	SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3934	MachineFunction &MF = DAG.getMachineFunction();
3935	PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3936
3937	assert((Op.getOpcode() == ISD::INLINEASM \|\|
3938	Op.getOpcode() == ISD::INLINEASM_BR) &&
3939	"Expecting Inline ASM node.");
3940
3941	// If an LR store is already known to be required then there is not point in
3942	// checking this ASM as well.
3943	if (MFI.isLRStoreRequired())
3944	return Op;
3945
3946	// Inline ASM nodes have an optional last operand that is an incoming Flag of
3947	// type MVT::Glue. We want to ignore this last operand if that is the case.
3948	unsigned NumOps = Op.getNumOperands();
3949	if (Op.getOperand(i: NumOps - `1`).getValueType() == MVT::Glue)
3950	--NumOps;
3951
3952	// Check all operands that may contain the LR.
3953	for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3954	const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3955	unsigned NumVals = Flags.getNumOperandRegisters();
3956	++i; // Skip the ID value.
3957
3958	switch (Flags.getKind()) {
3959	default:
3960	llvm_unreachable("Bad flags!");
3961	case InlineAsm::Kind::RegUse:
3962	case InlineAsm::Kind::Imm:
3963	case InlineAsm::Kind::Mem:
3964	i += NumVals;
3965	break;
3966	case InlineAsm::Kind::Clobber:
3967	case InlineAsm::Kind::RegDef:
3968	case InlineAsm::Kind::RegDefEarlyClobber: {
3969	for (; NumVals; --NumVals, ++i) {
3970	Register Reg = cast<RegisterSDNode>(Val: Op.getOperand(i))->getReg();
3971	if (Reg != PPC::LR && Reg != PPC::LR8)
3972	continue;
3973	MFI.setLRStoreRequired();
3974	return Op;
3975	}
3976	break;
3977	}
3978	}
3979	}
3980
3981	return Op;
3982	}
3983
3984	SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3985	SelectionDAG &DAG) const {
3986	if (Subtarget.isAIXABI())
3987	report_fatal_error(reason: "INIT_TRAMPOLINE operation is not supported on AIX.");
3988
3989	SDValue Chain = Op.getOperand(i: `0`);
3990	SDValue Trmp = Op.getOperand(i: `1`); // trampoline
3991	SDValue FPtr = Op.getOperand(i: `2`); // nested function
3992	SDValue Nest = Op.getOperand(i: `3`); // 'nest' parameter value
3993	SDLoc dl(Op);
3994
3995	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3996	bool isPPC64 = (PtrVT == MVT::i64);
3997	Type IntPtrTy = DAG.getDataLayout().getIntPtrType(C&: DAG.getContext());
3998
3999	TargetLowering::ArgListTy Args;
4000	TargetLowering::ArgListEntry Entry;
4001
4002	Entry.Ty = IntPtrTy;
4003	Entry.Node = Trmp; Args.push_back(x: Entry);
4004
4005	// TrampSize == (isPPC64 ? 48 : 40);
4006	Entry.Node =
4007	DAG.getConstant(Val: isPPC64 ? `48` : `40`, DL: dl, VT: Subtarget.getScalarIntVT());
4008	Args.push_back(x: Entry);
4009
4010	Entry.Node = FPtr; Args.push_back(x: Entry);
4011	Entry.Node = Nest; Args.push_back(x: Entry);
4012
4013	// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
4014	TargetLowering::CallLoweringInfo CLI(DAG);
4015	CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
4016	CC: CallingConv::C, ResultType: Type::getVoidTy(C&: *DAG.getContext()),
4017	Target: DAG.getExternalSymbol(Sym: "__trampoline_setup", VT: PtrVT), ArgsList: std::move(Args));
4018
4019	std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4020	return CallResult.second;
4021	}
4022
4023	SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
4024	MachineFunction &MF = DAG.getMachineFunction();
4025	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4026	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
4027
4028	SDLoc dl(Op);
4029
4030	if (Subtarget.isPPC64() \|\| Subtarget.isAIXABI()) {
4031	// vastart just stores the address of the VarArgsFrameIndex slot into the
4032	// memory location argument.
4033	SDValue FR = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT);
4034	const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: `2`))->getValue();
4035	return DAG.getStore(Chain: Op.getOperand(i: `0`), dl, Val: FR, Ptr: Op.getOperand(i: `1`),
4036	PtrInfo: MachinePointerInfo (SV));
4037	}
4038
4039	// For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
4040	// We suppose the given va_list is already allocated.
4041	//
4042	// typedef struct {
4043	// char gpr; / index into the array of 8 GPRs*
4044	// stored in the register save area*
4045	// gpr=0 corresponds to r3,*
4046	// gpr=1 to r4, etc.*
4047	// /*
4048	// char fpr; / index into the array of 8 FPRs*
4049	// stored in the register save area*
4050	// fpr=0 corresponds to f1,*
4051	// fpr=1 to f2, etc.*
4052	// /*
4053	// char overflow_arg_area;*
4054	// / location on stack that holds*
4055	// the next overflow argument*
4056	// /*
4057	// char reg_save_area;*
4058	// / where r3:r10 and f1:f8 (if saved)*
4059	// are stored*
4060	// /*
4061	// } va_list[1];
4062
4063	SDValue ArgGPR = DAG.getConstant(Val: FuncInfo->getVarArgsNumGPR(), DL: dl, VT: MVT::i32);
4064	SDValue ArgFPR = DAG.getConstant(Val: FuncInfo->getVarArgsNumFPR(), DL: dl, VT: MVT::i32);
4065	SDValue StackOffsetFI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsStackOffset(),
4066	VT: PtrVT);
4067	SDValue FR = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(),
4068	VT: PtrVT);
4069
4070	uint64_t FrameOffset = PtrVT.getSizeInBits()/`8`;
4071	SDValue ConstFrameOffset = DAG.getConstant(Val: FrameOffset, DL: dl, VT: PtrVT);
4072
4073	uint64_t StackOffset = PtrVT.getSizeInBits()/`8` - `1`;
4074	SDValue ConstStackOffset = DAG.getConstant(Val: StackOffset, DL: dl, VT: PtrVT);
4075
4076	uint64_t FPROffset = `1`;
4077	SDValue ConstFPROffset = DAG.getConstant(Val: FPROffset, DL: dl, VT: PtrVT);
4078
4079	const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: `2`))->getValue();
4080
4081	// Store first byte : number of int regs
4082	SDValue firstStore =
4083	DAG.getTruncStore(Chain: Op.getOperand(i: `0`), dl, Val: ArgGPR, Ptr: Op.getOperand(i: `1`),
4084	PtrInfo: MachinePointerInfo (SV), SVT: MVT::i8);
4085	uint64_t nextOffset = FPROffset;
4086	SDValue nextPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: Op.getOperand(i: `1`),
4087	N2: ConstFPROffset);
4088
4089	// Store second byte : number of float regs
4090	SDValue secondStore =
4091	DAG.getTruncStore(Chain: firstStore, dl, Val: ArgFPR, Ptr: nextPtr,
4092	PtrInfo: MachinePointerInfo (SV, nextOffset), SVT: MVT::i8);
4093	nextOffset += StackOffset;
4094	nextPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: nextPtr, N2: ConstStackOffset);
4095
4096	// Store second word : arguments given on stack
4097	SDValue thirdStore = DAG.getStore(Chain: secondStore, dl, Val: StackOffsetFI, Ptr: nextPtr,
4098	PtrInfo: MachinePointerInfo (SV, nextOffset));
4099	nextOffset += FrameOffset;
4100	nextPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: nextPtr, N2: ConstFrameOffset);
4101
4102	// Store third word : arguments given in registers
4103	return DAG.getStore(Chain: thirdStore, dl, Val: FR, Ptr: nextPtr,
4104	PtrInfo: MachinePointerInfo (SV, nextOffset));
4105	}
4106
4107	/// FPR - The set of FP registers that should be allocated for arguments
4108	/// on Darwin and AIX.
4109	static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4110	PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4111	PPC::F11, PPC::F12, PPC::F13};
4112
4113	/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4114	/// the stack.
4115	static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4116	unsigned PtrByteSize) {
4117	unsigned ArgSize = ArgVT.getStoreSize();
4118	if (Flags.isByVal())
4119	ArgSize = Flags.getByValSize();
4120
4121	// Round up to multiples of the pointer size, except for array members,
4122	// which are always packed.
4123	if (!Flags.isInConsecutiveRegs())
4124	ArgSize = ((ArgSize + PtrByteSize - `1`)/PtrByteSize) * PtrByteSize;
4125
4126	return ArgSize;
4127	}
4128
4129	/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4130	/// on the stack.
4131	static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
4132	ISD::ArgFlagsTy Flags,
4133	unsigned PtrByteSize) {
4134	Align Alignment(PtrByteSize);
4135
4136	// Altivec parameters are padded to a 16 byte boundary.
4137	if (ArgVT == MVT::v4f32 \|\| ArgVT == MVT::v4i32 \|\|
4138	ArgVT == MVT::v8i16 \|\| ArgVT == MVT::v16i8 \|\|
4139	ArgVT == MVT::v2f64 \|\| ArgVT == MVT::v2i64 \|\|
4140	ArgVT == MVT::v1i128 \|\| ArgVT == MVT::f128)
4141	Alignment = Align (`16`);
4142
4143	// ByVal parameters are aligned as requested.
4144	if (Flags.isByVal()) {
4145	auto BVAlign = Flags.getNonZeroByValAlign();
4146	if (BVAlign > PtrByteSize) {
4147	if (BVAlign.value() % PtrByteSize != `0`)
4148	llvm_unreachable(
4149	"ByVal alignment is not a multiple of the pointer size");
4150
4151	Alignment = BVAlign;
4152	}
4153	}
4154
4155	// Array members are always packed to their original alignment.
4156	if (Flags.isInConsecutiveRegs()) {
4157	// If the array member was split into multiple registers, the first
4158	// needs to be aligned to the size of the full type. (Except for
4159	// ppcf128, which is only aligned as its f64 components.)
4160	if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4161	Alignment = Align (OrigVT.getStoreSize());
4162	else
4163	Alignment = Align (ArgVT.getStoreSize());
4164	}
4165
4166	return Alignment;
4167	}
4168
4169	/// CalculateStackSlotUsed - Return whether this argument will use its
4170	/// stack slot (instead of being passed in registers). ArgOffset,
4171	/// AvailableFPRs, and AvailableVRs must hold the current argument
4172	/// position, and will be updated to account for this argument.
4173	static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4174	unsigned PtrByteSize, unsigned LinkageSize,
4175	unsigned ParamAreaSize, unsigned &ArgOffset,
4176	unsigned &AvailableFPRs,
4177	unsigned &AvailableVRs) {
4178	bool UseMemory = false;
4179
4180	// Respect alignment of argument on the stack.
4181	Align Alignment =
4182	CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4183	ArgOffset = alignTo(Size: ArgOffset, A: Alignment);
4184	// If there's no space left in the argument save area, we must
4185	// use memory (this check also catches zero-sized arguments).
4186	if (ArgOffset >= LinkageSize + ParamAreaSize)
4187	UseMemory = true;
4188
4189	// Allocate argument on the stack.
4190	ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4191	if (Flags.isInConsecutiveRegsLast())
4192	ArgOffset = ((ArgOffset + PtrByteSize - `1`)/PtrByteSize) * PtrByteSize;
4193	// If we overran the argument save area, we must use memory
4194	// (this check catches arguments passed partially in memory)
4195	if (ArgOffset > LinkageSize + ParamAreaSize)
4196	UseMemory = true;
4197
4198	// However, if the argument is actually passed in an FPR or a VR,
4199	// we don't use memory after all.
4200	if (!Flags.isByVal()) {
4201	if (ArgVT == MVT::f32 \|\| ArgVT == MVT::f64)
4202	if (AvailableFPRs > `0`) {
4203	--AvailableFPRs;
4204	return false;
4205	}
4206	if (ArgVT == MVT::v4f32 \|\| ArgVT == MVT::v4i32 \|\|
4207	ArgVT == MVT::v8i16 \|\| ArgVT == MVT::v16i8 \|\|
4208	ArgVT == MVT::v2f64 \|\| ArgVT == MVT::v2i64 \|\|
4209	ArgVT == MVT::v1i128 \|\| ArgVT == MVT::f128)
4210	if (AvailableVRs > `0`) {
4211	--AvailableVRs;
4212	return false;
4213	}
4214	}
4215
4216	return UseMemory;
4217	}
4218
4219	/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4220	/// ensure minimum alignment required for target.
4221	static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
4222	unsigned NumBytes) {
4223	return alignTo(Size: NumBytes, A: Lowering->getStackAlign());
4224	}
4225
4226	SDValue PPCTargetLowering::LowerFormalArguments(
4227	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4228	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4229	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4230	if (Subtarget.isAIXABI())
4231	return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4232	InVals);
4233	if (Subtarget.is64BitELFABI())
4234	return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4235	InVals);
4236	assert(Subtarget.is32BitELFABI());
4237	return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4238	InVals);
4239	}
4240
4241	SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4242	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4243	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4244	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4245
4246	// 32-bit SVR4 ABI Stack Frame Layout:
4247	// +-----------------------------------+
4248	// +--> \| Back chain \|
4249	// \| +-----------------------------------+
4250	// \| \| Floating-point register save area \|
4251	// \| +-----------------------------------+
4252	// \| \| General register save area \|
4253	// \| +-----------------------------------+
4254	// \| \| CR save word \|
4255	// \| +-----------------------------------+
4256	// \| \| VRSAVE save word \|
4257	// \| +-----------------------------------+
4258	// \| \| Alignment padding \|
4259	// \| +-----------------------------------+
4260	// \| \| Vector register save area \|
4261	// \| +-----------------------------------+
4262	// \| \| Local variable space \|
4263	// \| +-----------------------------------+
4264	// \| \| Parameter list area \|
4265	// \| +-----------------------------------+
4266	// \| \| LR save word \|
4267	// \| +-----------------------------------+
4268	// SP--> +--- \| Back chain \|
4269	// +-----------------------------------+
4270	//
4271	// Specifications:
4272	// System V Application Binary Interface PowerPC Processor Supplement
4273	// AltiVec Technology Programming Interface Manual
4274
4275	MachineFunction &MF = DAG.getMachineFunction();
4276	MachineFrameInfo &MFI = MF.getFrameInfo();
4277	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4278
4279	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
4280	// Potential tail calls could cause overwriting of argument stack slots.
4281	bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4282	(CallConv == CallingConv::Fast));
4283	const Align PtrAlign(`4`);
4284
4285	// Assign locations to all of the incoming arguments.
4286	SmallVector<CCValAssign, `16`> ArgLocs;
4287	PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4288	*DAG.getContext());
4289
4290	// Reserve space for the linkage area on the stack.
4291	unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4292	CCInfo.AllocateStack(Size: LinkageSize, Alignment: PtrAlign);
4293	if (useSoftFloat())
4294	CCInfo.PreAnalyzeFormalArguments(Ins);
4295
4296	CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_PPC32_SVR4);
4297	CCInfo.clearWasPPCF128();
4298
4299	for (unsigned i = `0`, e = ArgLocs.size(); i != e; ++i) {
4300	CCValAssign &VA = ArgLocs [i];
4301
4302	// Arguments stored in registers.
4303	if (VA.isRegLoc()) {
4304	const TargetRegisterClass *RC;
4305	EVT ValVT = VA.getValVT();
4306
4307	switch (ValVT.getSimpleVT().SimpleTy) {
4308	default:
4309	llvm_unreachable("ValVT not supported by formal arguments Lowering");
4310	case MVT::i1:
4311	case MVT::i32:
4312	RC = &PPC::GPRCRegClass;
4313	break;
4314	case MVT::f32:
4315	if (Subtarget.hasP8Vector())
4316	RC = &PPC::VSSRCRegClass;
4317	else if (Subtarget.hasSPE())
4318	RC = &PPC::GPRCRegClass;
4319	else
4320	RC = &PPC::F4RCRegClass;
4321	break;
4322	case MVT::f64:
4323	if (Subtarget.hasVSX())
4324	RC = &PPC::VSFRCRegClass;
4325	else if (Subtarget.hasSPE())
4326	// SPE passes doubles in GPR pairs.
4327	RC = &PPC::GPRCRegClass;
4328	else
4329	RC = &PPC::F8RCRegClass;
4330	break;
4331	case MVT::v16i8:
4332	case MVT::v8i16:
4333	case MVT::v4i32:
4334	RC = &PPC::VRRCRegClass;
4335	break;
4336	case MVT::v4f32:
4337	RC = &PPC::VRRCRegClass;
4338	break;
4339	case MVT::v2f64:
4340	case MVT::v2i64:
4341	RC = &PPC::VRRCRegClass;
4342	break;
4343	}
4344
4345	SDValue ArgValue;
4346	// Transform the arguments stored in physical registers into
4347	// virtual ones.
4348	if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4349	assert(i + `1` < e && "No second half of double precision argument");
4350	Register RegLo = MF.addLiveIn(PReg: VA.getLocReg(), RC);
4351	Register RegHi = MF.addLiveIn(PReg: ArgLocs [++i].getLocReg(), RC);
4352	SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, Reg: RegLo, VT: MVT::i32);
4353	SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, Reg: RegHi, VT: MVT::i32);
4354	if (!Subtarget.isLittleEndian())
4355	std::swap (a&: ArgValueLo, b&: ArgValueHi);
4356	ArgValue = DAG.getNode(Opcode: PPCISD::BUILD_SPE64, DL: dl, VT: MVT::f64, N1: ArgValueLo,
4357	N2: ArgValueHi);
4358	} else {
4359	Register Reg = MF.addLiveIn(PReg: VA.getLocReg(), RC);
4360	ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4361	VT: ValVT == MVT::i1 ? MVT::i32 : ValVT);
4362	if (ValVT == MVT::i1)
4363	ArgValue = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: ArgValue);
4364	}
4365
4366	InVals.push_back(Elt: ArgValue);
4367	} else {
4368	// Argument stored in memory.
4369	assert(VA.isMemLoc());
4370
4371	// Get the extended size of the argument type in stack
4372	unsigned ArgSize = VA.getLocVT().getStoreSize();
4373	// Get the actual size of the argument type
4374	unsigned ObjSize = VA.getValVT().getStoreSize();
4375	unsigned ArgOffset = VA.getLocMemOffset();
4376	// Stack objects in PPC32 are right justified.
4377	ArgOffset += ArgSize - ObjSize;
4378	int FI = MFI.CreateFixedObject(Size: ArgSize, SPOffset: ArgOffset, IsImmutable: isImmutable);
4379
4380	// Create load nodes to retrieve arguments from the stack.
4381	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4382	InVals.push_back(
4383	Elt: DAG.getLoad(VT: VA.getValVT(), dl, Chain, Ptr: FIN, PtrInfo: MachinePointerInfo ()));
4384	}
4385	}
4386
4387	// Assign locations to all of the incoming aggregate by value arguments.
4388	// Aggregates passed by value are stored in the local variable space of the
4389	// caller's stack frame, right above the parameter list area.
4390	SmallVector<CCValAssign, `16`> ByValArgLocs;
4391	CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4392	ByValArgLocs, *DAG.getContext());
4393
4394	// Reserve stack space for the allocations in CCInfo.
4395	CCByValInfo.AllocateStack(Size: CCInfo.getStackSize(), Alignment: PtrAlign);
4396
4397	CCByValInfo.AnalyzeFormalArguments(Ins, Fn: CC_PPC32_SVR4_ByVal);
4398
4399	// Area that is at least reserved in the caller of this function.
4400	unsigned MinReservedArea = CCByValInfo.getStackSize();
4401	MinReservedArea = std::max(a: MinReservedArea, b: LinkageSize);
4402
4403	// Set the size that is at least reserved in caller of this function. Tail
4404	// call optimized function's reserved stack space needs to be aligned so that
4405	// taking the difference between two stack areas will result in an aligned
4406	// stack.
4407	MinReservedArea =
4408	EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes: MinReservedArea);
4409	FuncInfo->setMinReservedArea(MinReservedArea);
4410
4411	SmallVector<SDValue, `8`> MemOps;
4412
4413	// If the function takes variable number of arguments, make a frame index for
4414	// the start of the first vararg value... for expansion of llvm.va_start.
4415	if (isVarArg) {
4416	static const MCPhysReg GPArgRegs[] = {
4417	PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4418	PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4419	};
4420	const unsigned NumGPArgRegs = std::size(GPArgRegs);
4421
4422	static const MCPhysReg FPArgRegs[] = {
4423	PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4424	PPC::F8
4425	};
4426	unsigned NumFPArgRegs = std::size(FPArgRegs);
4427
4428	if (useSoftFloat() \|\| hasSPE())
4429	NumFPArgRegs = `0`;
4430
4431	FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(Regs: GPArgRegs));
4432	FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(Regs: FPArgRegs));
4433
4434	// Make room for NumGPArgRegs and NumFPArgRegs.
4435	int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/`8` +
4436	NumFPArgRegs * MVT (MVT::f64).getSizeInBits()/`8`;
4437
4438	FuncInfo->setVarArgsStackOffset(MFI.CreateFixedObject(
4439	Size: PtrVT.getSizeInBits() / `8`, SPOffset: CCInfo.getStackSize(), IsImmutable: true));
4440
4441	FuncInfo->setVarArgsFrameIndex(
4442	MFI.CreateStackObject(Size: Depth, Alignment: Align (`8`), isSpillSlot: false));
4443	SDValue FIN = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT);
4444
4445	// The fixed integer arguments of a variadic function are stored to the
4446	// VarArgsFrameIndex on the stack so that they may be loaded by
4447	// dereferencing the result of va_next.
4448	for (MCPhysReg GPArgReg : GPArgRegs) {
4449	// Get an existing live-in vreg, or add a new one.
4450	Register VReg = MF.getRegInfo().getLiveInVirtReg(PReg: GPArgReg);
4451	if (!VReg)
4452	VReg = MF.addLiveIn(PReg: GPArgReg, RC: &PPC::GPRCRegClass);
4453
4454	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4455	SDValue Store =
4456	DAG.getStore(Chain: Val.getValue(R: `1`), dl, Val, Ptr: FIN, PtrInfo: MachinePointerInfo ());
4457	MemOps.push_back(Elt: Store);
4458	// Increment the address by four for the next argument to store
4459	SDValue PtrOff = DAG.getConstant(Val: PtrVT.getSizeInBits()/`8`, DL: dl, VT: PtrVT);
4460	FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
4461	}
4462
4463	// FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4464	// is set.
4465	// The double arguments are stored to the VarArgsFrameIndex
4466	// on the stack.
4467	for (unsigned FPRIndex = `0`; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4468	// Get an existing live-in vreg, or add a new one.
4469	Register VReg = MF.getRegInfo().getLiveInVirtReg(PReg: FPArgRegs[FPRIndex]);
4470	if (!VReg)
4471	VReg = MF.addLiveIn(PReg: FPArgRegs[FPRIndex], RC: &PPC::F8RCRegClass);
4472
4473	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: MVT::f64);
4474	SDValue Store =
4475	DAG.getStore(Chain: Val.getValue(R: `1`), dl, Val, Ptr: FIN, PtrInfo: MachinePointerInfo ());
4476	MemOps.push_back(Elt: Store);
4477	// Increment the address by eight for the next argument to store
4478	SDValue PtrOff = DAG.getConstant(Val: MVT (MVT::f64).getSizeInBits()/`8`, DL: dl,
4479	VT: PtrVT);
4480	FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
4481	}
4482	}
4483
4484	if (!MemOps.empty())
4485	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOps);
4486
4487	return Chain;
4488	}
4489
4490	// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4491	// value to MVT::i64 and then truncate to the correct register size.
4492	SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4493	EVT ObjectVT, SelectionDAG &DAG,
4494	SDValue ArgVal,
4495	const SDLoc &dl) const {
4496	if (Flags.isSExt())
4497	ArgVal = DAG.getNode(Opcode: ISD::AssertSext, DL: dl, VT: MVT::i64, N1: ArgVal,
4498	N2: DAG.getValueType(ObjectVT));
4499	else if (Flags.isZExt())
4500	ArgVal = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: MVT::i64, N1: ArgVal,
4501	N2: DAG.getValueType(ObjectVT));
4502
4503	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: ObjectVT, Operand: ArgVal);
4504	}
4505
4506	SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4507	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4508	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4509	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4510	// TODO: add description of PPC stack frame format, or at least some docs.
4511	//
4512	bool isELFv2ABI = Subtarget.isELFv2ABI();
4513	bool isLittleEndian = Subtarget.isLittleEndian();
4514	MachineFunction &MF = DAG.getMachineFunction();
4515	MachineFrameInfo &MFI = MF.getFrameInfo();
4516	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4517
4518	assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4519	"fastcc not supported on varargs functions");
4520
4521	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
4522	// Potential tail calls could cause overwriting of argument stack slots.
4523	bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4524	(CallConv == CallingConv::Fast));
4525	unsigned PtrByteSize = `8`;
4526	unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4527
4528	static const MCPhysReg GPR[] = {
4529	PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4530	PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4531	};
4532	static const MCPhysReg VR[] = {
4533	PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4534	PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4535	};
4536
4537	const unsigned Num_GPR_Regs = std::size(GPR);
4538	const unsigned Num_FPR_Regs = useSoftFloat() ? `0` : `13`;
4539	const unsigned Num_VR_Regs = std::size(VR);
4540
4541	// Do a first pass over the arguments to determine whether the ABI
4542	// guarantees that our caller has allocated the parameter save area
4543	// on its stack frame. In the ELFv1 ABI, this is always the case;
4544	// in the ELFv2 ABI, it is true if this is a vararg function or if
4545	// any parameter is located in a stack slot.
4546
4547	bool HasParameterArea = !isELFv2ABI \|\| isVarArg;
4548	unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4549	unsigned NumBytes = LinkageSize;
4550	unsigned AvailableFPRs = Num_FPR_Regs;
4551	unsigned AvailableVRs = Num_VR_Regs;
4552	for (const ISD::InputArg &In : Ins) {
4553	if (In.Flags.isNest())
4554	continue;
4555
4556	if (CalculateStackSlotUsed(ArgVT: In.VT, OrigVT: In.ArgVT, Flags: In.Flags, PtrByteSize,
4557	LinkageSize, ParamAreaSize, ArgOffset&: NumBytes,
4558	AvailableFPRs, AvailableVRs))
4559	HasParameterArea = true;
4560	}
4561
4562	// Add DAG nodes to load the arguments or copy them out of registers. On
4563	// entry to a function on PPC, the arguments start after the linkage area,
4564	// although the first ones are often in registers.
4565
4566	unsigned ArgOffset = LinkageSize;
4567	unsigned GPR_idx = `0`, FPR_idx = `0`, VR_idx = `0`;
4568	SmallVector<SDValue, `8`> MemOps;
4569	Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4570	unsigned CurArgIdx = `0`;
4571	for (unsigned ArgNo = `0`, e = Ins.size(); ArgNo != e; ++ArgNo) {
4572	SDValue ArgVal;
4573	bool needsLoad = false;
4574	EVT ObjectVT = Ins [ArgNo].VT;
4575	EVT OrigVT = Ins [ArgNo].ArgVT;
4576	unsigned ObjSize = ObjectVT.getStoreSize();
4577	unsigned ArgSize = ObjSize;
4578	ISD::ArgFlagsTy Flags = Ins [ArgNo].Flags;
4579	if (Ins [ArgNo].isOrigArg()) {
4580	std::advance(i&: FuncArg, n: Ins [ArgNo].getOrigArgIndex() - CurArgIdx);
4581	CurArgIdx = Ins [ArgNo].getOrigArgIndex();
4582	}
4583	// We re-align the argument offset for each argument, except when using the
4584	// fast calling convention, when we need to make sure we do that only when
4585	// we'll actually use a stack slot.
4586	unsigned CurArgOffset;
4587	Align Alignment;
4588	auto ComputeArgOffset = [&]() {
4589	/ Respect alignment of argument on the stack. /
4590	Alignment =
4591	CalculateStackSlotAlignment(ArgVT: ObjectVT, OrigVT, Flags, PtrByteSize);
4592	ArgOffset = alignTo(Size: ArgOffset, A: Alignment);
4593	CurArgOffset = ArgOffset;
4594	};
4595
4596	if (CallConv != CallingConv::Fast) {
4597	ComputeArgOffset ();
4598
4599	/ Compute GPR index associated with argument offset. /
4600	GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4601	GPR_idx = std::min(a: GPR_idx, b: Num_GPR_Regs);
4602	}
4603
4604	// FIXME the codegen can be much improved in some cases.
4605	// We do not have to keep everything in memory.
4606	if (Flags.isByVal()) {
4607	assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4608
4609	if (CallConv == CallingConv::Fast)
4610	ComputeArgOffset ();
4611
4612	// ObjSize is the true size, ArgSize rounded up to multiple of registers.
4613	ObjSize = Flags.getByValSize();
4614	ArgSize = ((ObjSize + PtrByteSize - `1`)/PtrByteSize) * PtrByteSize;
4615	// Empty aggregate parameters do not take up registers. Examples:
4616	// struct { } a;
4617	// union { } b;
4618	// int c[0];
4619	// etc. However, we have to provide a place-holder in InVals, so
4620	// pretend we have an 8-byte item at the current address for that
4621	// purpose.
4622	if (!ObjSize) {
4623	int FI = MFI.CreateFixedObject(Size: PtrByteSize, SPOffset: ArgOffset, IsImmutable: true);
4624	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4625	InVals.push_back(Elt: FIN);
4626	continue;
4627	}
4628
4629	// Create a stack object covering all stack doublewords occupied
4630	// by the argument. If the argument is (fully or partially) on
4631	// the stack, or if the argument is fully in registers but the
4632	// caller has allocated the parameter save anyway, we can refer
4633	// directly to the caller's stack frame. Otherwise, create a
4634	// local copy in our own frame.
4635	int FI;
4636	if (HasParameterArea \|\|
4637	ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4638	FI = MFI.CreateFixedObject(Size: ArgSize, SPOffset: ArgOffset, IsImmutable: false, isAliased: true);
4639	else
4640	FI = MFI.CreateStackObject(Size: ArgSize, Alignment, isSpillSlot: false);
4641	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4642
4643	// Handle aggregates smaller than 8 bytes.
4644	if (ObjSize < PtrByteSize) {
4645	// The value of the object is its address, which differs from the
4646	// address of the enclosing doubleword on big-endian systems.
4647	SDValue Arg = FIN;
4648	if (!isLittleEndian) {
4649	SDValue ArgOff = DAG.getConstant(Val: PtrByteSize - ObjSize, DL: dl, VT: PtrVT);
4650	Arg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ArgOff.getValueType(), N1: Arg, N2: ArgOff);
4651	}
4652	InVals.push_back(Elt: Arg);
4653
4654	if (GPR_idx != Num_GPR_Regs) {
4655	Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx++], RC: &PPC::G8RCRegClass);
4656	FuncInfo->addLiveInAttr(VReg, Flags);
4657	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4658	EVT ObjType = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: ObjSize `8`);
4659	SDValue Store =
4660	DAG.getTruncStore(Chain: Val.getValue(R: `1`), dl, Val, Ptr: Arg,
4661	PtrInfo: MachinePointerInfo (&*FuncArg), SVT: ObjType);
4662	MemOps.push_back(Elt: Store);
4663	}
4664	// Whether we copied from a register or not, advance the offset
4665	// into the parameter save area by a full doubleword.
4666	ArgOffset += PtrByteSize;
4667	continue;
4668	}
4669
4670	// The value of the object is its address, which is the address of
4671	// its first stack doubleword.
4672	InVals.push_back(Elt: FIN);
4673
4674	// Store whatever pieces of the object are in registers to memory.
4675	for (unsigned j = `0`; j < ArgSize; j += PtrByteSize) {
4676	if (GPR_idx == Num_GPR_Regs)
4677	break;
4678
4679	Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx], RC: &PPC::G8RCRegClass);
4680	FuncInfo->addLiveInAttr(VReg, Flags);
4681	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4682	SDValue Addr = FIN;
4683	if (j) {
4684	SDValue Off = DAG.getConstant(Val: j, DL: dl, VT: PtrVT);
4685	Addr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: Off.getValueType(), N1: Addr, N2: Off);
4686	}
4687	unsigned StoreSizeInBits = std::min(a: PtrByteSize, b: (ObjSize - j)) * `8`;
4688	EVT ObjType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: StoreSizeInBits);
4689	SDValue Store =
4690	DAG.getTruncStore(Chain: Val.getValue(R: `1`), dl, Val, Ptr: Addr,
4691	PtrInfo: MachinePointerInfo (&*FuncArg, j), SVT: ObjType);
4692	MemOps.push_back(Elt: Store);
4693	++GPR_idx;
4694	}
4695	ArgOffset += ArgSize;
4696	continue;
4697	}
4698
4699	switch (ObjectVT.getSimpleVT().SimpleTy) {
4700	default: llvm_unreachable("Unhandled argument type!");
4701	case MVT::i1:
4702	case MVT::i32:
4703	case MVT::i64:
4704	if (Flags.isNest()) {
4705	// The 'nest' parameter, if any, is passed in R11.
4706	Register VReg = MF.addLiveIn(PReg: PPC::X11, RC: &PPC::G8RCRegClass);
4707	ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: MVT::i64);
4708
4709	if (ObjectVT == MVT::i32 \|\| ObjectVT == MVT::i1)
4710	ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4711
4712	break;
4713	}
4714
4715	// These can be scalar arguments or elements of an integer array type
4716	// passed directly. Clang may use those instead of "byval" aggregate
4717	// types to avoid forcing arguments to memory unnecessarily.
4718	if (GPR_idx != Num_GPR_Regs) {
4719	Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx++], RC: &PPC::G8RCRegClass);
4720	FuncInfo->addLiveInAttr(VReg, Flags);
4721	ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: MVT::i64);
4722
4723	if (ObjectVT == MVT::i32 \|\| ObjectVT == MVT::i1)
4724	// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4725	// value to MVT::i64 and then truncate to the correct register size.
4726	ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4727	} else {
4728	if (CallConv == CallingConv::Fast)
4729	ComputeArgOffset ();
4730
4731	needsLoad = true;
4732	ArgSize = PtrByteSize;
4733	}
4734	if (CallConv != CallingConv::Fast \|\| needsLoad)
4735	ArgOffset += `8`;
4736	break;
4737
4738	case MVT::f32:
4739	case MVT::f64:
4740	// These can be scalar arguments or elements of a float array type
4741	// passed directly. The latter are used to implement ELFv2 homogenous
4742	// float aggregates.
4743	if (FPR_idx != Num_FPR_Regs) {
4744	unsigned VReg;
4745
4746	if (ObjectVT == MVT::f32)
4747	VReg = MF.addLiveIn(PReg: FPR[FPR_idx],
4748	RC: Subtarget.hasP8Vector()
4749	? &PPC::VSSRCRegClass
4750	: &PPC::F4RCRegClass);
4751	else
4752	VReg = MF.addLiveIn(PReg: FPR[FPR_idx], RC: Subtarget.hasVSX()
4753	? &PPC::VSFRCRegClass
4754	: &PPC::F8RCRegClass);
4755
4756	ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: ObjectVT);
4757	++FPR_idx;
4758	} else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4759	// FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4760	// once we support fp <-> gpr moves.
4761
4762	// This can only ever happen in the presence of f32 array types,
4763	// since otherwise we never run out of FPRs before running out
4764	// of GPRs.
4765	Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx++], RC: &PPC::G8RCRegClass);
4766	FuncInfo->addLiveInAttr(VReg, Flags);
4767	ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: MVT::i64);
4768
4769	if (ObjectVT == MVT::f32) {
4770	if ((ArgOffset % PtrByteSize) == (isLittleEndian ? `4` : `0`))
4771	ArgVal = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i64, N1: ArgVal,
4772	N2: DAG.getConstant(Val: `32`, DL: dl, VT: MVT::i32));
4773	ArgVal = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: ArgVal);
4774	}
4775
4776	ArgVal = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ObjectVT, Operand: ArgVal);
4777	} else {
4778	if (CallConv == CallingConv::Fast)
4779	ComputeArgOffset ();
4780
4781	needsLoad = true;
4782	}
4783
4784	// When passing an array of floats, the array occupies consecutive
4785	// space in the argument area; only round up to the next doubleword
4786	// at the end of the array. Otherwise, each float takes 8 bytes.
4787	if (CallConv != CallingConv::Fast \|\| needsLoad) {
4788	ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4789	ArgOffset += ArgSize;
4790	if (Flags.isInConsecutiveRegsLast())
4791	ArgOffset = ((ArgOffset + PtrByteSize - `1`)/PtrByteSize) * PtrByteSize;
4792	}
4793	break;
4794	case MVT::v4f32:
4795	case MVT::v4i32:
4796	case MVT::v8i16:
4797	case MVT::v16i8:
4798	case MVT::v2f64:
4799	case MVT::v2i64:
4800	case MVT::v1i128:
4801	case MVT::f128:
4802	// These can be scalar arguments or elements of a vector array type
4803	// passed directly. The latter are used to implement ELFv2 homogenous
4804	// vector aggregates.
4805	if (VR_idx != Num_VR_Regs) {
4806	Register VReg = MF.addLiveIn(PReg: VR[VR_idx], RC: &PPC::VRRCRegClass);
4807	ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: ObjectVT);
4808	++VR_idx;
4809	} else {
4810	if (CallConv == CallingConv::Fast)
4811	ComputeArgOffset ();
4812	needsLoad = true;
4813	}
4814	if (CallConv != CallingConv::Fast \|\| needsLoad)
4815	ArgOffset += `16`;
4816	break;
4817	}
4818
4819	// We need to load the argument to a virtual register if we determined
4820	// above that we ran out of physical registers of the appropriate type.
4821	if (needsLoad) {
4822	if (ObjSize < ArgSize && !isLittleEndian)
4823	CurArgOffset += ArgSize - ObjSize;
4824	int FI = MFI.CreateFixedObject(Size: ObjSize, SPOffset: CurArgOffset, IsImmutable: isImmutable);
4825	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4826	ArgVal = DAG.getLoad(VT: ObjectVT, dl, Chain, Ptr: FIN, PtrInfo: MachinePointerInfo ());
4827	}
4828
4829	InVals.push_back(Elt: ArgVal);
4830	}
4831
4832	// Area that is at least reserved in the caller of this function.
4833	unsigned MinReservedArea;
4834	if (HasParameterArea)
4835	MinReservedArea = std::max(a: ArgOffset, b: LinkageSize + `8` * PtrByteSize);
4836	else
4837	MinReservedArea = LinkageSize;
4838
4839	// Set the size that is at least reserved in caller of this function. Tail
4840	// call optimized functions' reserved stack space needs to be aligned so that
4841	// taking the difference between two stack areas will result in an aligned
4842	// stack.
4843	MinReservedArea =
4844	EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes: MinReservedArea);
4845	FuncInfo->setMinReservedArea(MinReservedArea);
4846
4847	// If the function takes variable number of arguments, make a frame index for
4848	// the start of the first vararg value... for expansion of llvm.va_start.
4849	// On ELFv2ABI spec, it writes:
4850	// C programs that are intended to be portable* across different compilers*
4851	// and architectures must use the header file <stdarg.h> to deal with variable
4852	// argument lists.
4853	if (isVarArg && MFI.hasVAStart()) {
4854	int Depth = ArgOffset;
4855
4856	FuncInfo->setVarArgsFrameIndex(
4857	MFI.CreateFixedObject(Size: PtrByteSize, SPOffset: Depth, IsImmutable: true));
4858	SDValue FIN = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT);
4859
4860	// If this function is vararg, store any remaining integer argument regs
4861	// to their spots on the stack so that they may be loaded by dereferencing
4862	// the result of va_next.
4863	for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4864	GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4865	Register VReg = MF.addLiveIn(PReg: GPR[GPR_idx], RC: &PPC::G8RCRegClass);
4866	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4867	SDValue Store =
4868	DAG.getStore(Chain: Val.getValue(R: `1`), dl, Val, Ptr: FIN, PtrInfo: MachinePointerInfo ());
4869	MemOps.push_back(Elt: Store);
4870	// Increment the address by four for the next argument to store
4871	SDValue PtrOff = DAG.getConstant(Val: PtrByteSize, DL: dl, VT: PtrVT);
4872	FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
4873	}
4874	}
4875
4876	if (!MemOps.empty())
4877	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOps);
4878
4879	return Chain;
4880	}
4881
4882	/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4883	/// adjusted to accommodate the arguments for the tailcall.
4884	static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4885	unsigned ParamSize) {
4886
4887	if (!isTailCall) return `0`;
4888
4889	PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4890	unsigned CallerMinReservedArea = FI->getMinReservedArea();
4891	int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4892	// Remember only if the new adjustment is bigger.
4893	if (SPDiff < FI->getTailCallSPDelta())
4894	FI->setTailCallSPDelta(SPDiff);
4895
4896	return SPDiff;
4897	}
4898
4899	static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4900
4901	static bool callsShareTOCBase(const Function *Caller,
4902	const GlobalValue *CalleeGV,
4903	const TargetMachine &TM) {
4904	// It does not make sense to call callsShareTOCBase() with a caller that
4905	// is PC Relative since PC Relative callers do not have a TOC.
4906	#ifndef NDEBUG
4907	const PPCSubtarget STICaller = &TM.getSubtarget<PPCSubtarget>(Caller);
4908	assert(!STICaller->isUsingPCRelativeCalls() &&
4909	"PC Relative callers do not have a TOC and cannot share a TOC Base");
4910	#endif
4911
4912	// Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4913	// don't have enough information to determine if the caller and callee share
4914	// the same TOC base, so we have to pessimistically assume they don't for
4915	// correctness.
4916	if (!CalleeGV)
4917	return false;
4918
4919	// If the callee is preemptable, then the static linker will use a plt-stub
4920	// which saves the toc to the stack, and needs a nop after the call
4921	// instruction to convert to a toc-restore.
4922	if (!TM.shouldAssumeDSOLocal(GV: CalleeGV))
4923	return false;
4924
4925	// Functions with PC Relative enabled may clobber the TOC in the same DSO.
4926	// We may need a TOC restore in the situation where the caller requires a
4927	// valid TOC but the callee is PC Relative and does not.
4928	const Function *F = dyn_cast<Function>(Val: CalleeGV);
4929	const GlobalAlias *Alias = dyn_cast<GlobalAlias>(Val: CalleeGV);
4930
4931	// If we have an Alias we can try to get the function from there.
4932	if (Alias) {
4933	const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4934	F = dyn_cast<Function>(Val: GlobalObj);
4935	}
4936
4937	// If we still have no valid function pointer we do not have enough
4938	// information to determine if the callee uses PC Relative calls so we must
4939	// assume that it does.
4940	if (!F)
4941	return false;
4942
4943	// If the callee uses PC Relative we cannot guarantee that the callee won't
4944	// clobber the TOC of the caller and so we must assume that the two
4945	// functions do not share a TOC base.
4946	const PPCSubtarget STICallee = &TM.getSubtarget<PPCSubtarget>(F: F);
4947	if (STICallee->isUsingPCRelativeCalls())
4948	return false;
4949
4950	// If the GV is not a strong definition then we need to assume it can be
4951	// replaced by another function at link time. The function that replaces
4952	// it may not share the same TOC as the caller since the callee may be
4953	// replaced by a PC Relative version of the same function.
4954	if (!CalleeGV->isStrongDefinitionForLinker())
4955	return false;
4956
4957	// The medium and large code models are expected to provide a sufficiently
4958	// large TOC to provide all data addressing needs of a module with a
4959	// single TOC.
4960	if (CodeModel::Medium == TM.getCodeModel() \|\|
4961	CodeModel::Large == TM.getCodeModel())
4962	return true;
4963
4964	// Any explicitly-specified sections and section prefixes must also match.
4965	// Also, if we're using -ffunction-sections, then each function is always in
4966	// a different section (the same is true for COMDAT functions).
4967	if (TM.getFunctionSections() \|\| CalleeGV->hasComdat() \|\|
4968	Caller->hasComdat() \|\| CalleeGV->getSection() != Caller->getSection())
4969	return false;
4970	if (const auto *F = dyn_cast<Function>(Val: CalleeGV)) {
4971	if (F->getSectionPrefix() != Caller->getSectionPrefix())
4972	return false;
4973	}
4974
4975	return true;
4976	}
4977
4978	static bool
4979	needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4980	const SmallVectorImpl<ISD::OutputArg> &Outs) {
4981	assert(Subtarget.is64BitELFABI());
4982
4983	const unsigned PtrByteSize = `8`;
4984	const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4985
4986	static const MCPhysReg GPR[] = {
4987	PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4988	PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4989	};
4990	static const MCPhysReg VR[] = {
4991	PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4992	PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4993	};
4994
4995	const unsigned NumGPRs = std::size(GPR);
4996	const unsigned NumFPRs = `13`;
4997	const unsigned NumVRs = std::size(VR);
4998	const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4999
5000	unsigned NumBytes = LinkageSize;
5001	unsigned AvailableFPRs = NumFPRs;
5002	unsigned AvailableVRs = NumVRs;
5003
5004	for (const ISD::OutputArg& Param : Outs) {
5005	if (Param.Flags.isNest()) continue;
5006
5007	if (CalculateStackSlotUsed(ArgVT: Param.VT, OrigVT: Param.ArgVT, Flags: Param.Flags, PtrByteSize,
5008	LinkageSize, ParamAreaSize, ArgOffset&: NumBytes,
5009	AvailableFPRs, AvailableVRs))
5010	return true;
5011	}
5012	return false;
5013	}
5014
5015	static bool hasSameArgumentList(const Function CallerFn, const* CallBase &CB) {
5016	if (CB.arg_size() != CallerFn->arg_size())
5017	return false;
5018
5019	auto CalleeArgIter = CB.arg_begin();
5020	auto CalleeArgEnd = CB.arg_end();
5021	Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
5022
5023	for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
5024	const Value* CalleeArg = *CalleeArgIter;
5025	const Value* CallerArg = &(*CallerArgIter);
5026	if (CalleeArg == CallerArg)
5027	continue;
5028
5029	// e.g. @caller([4 x i64] %a, [4 x i64] %b) {
5030	// tail call @callee([4 x i64] undef, [4 x i64] %b)
5031	// }
5032	// 1st argument of callee is undef and has the same type as caller.
5033	if (CalleeArg->getType() == CallerArg->getType() &&
5034	isa<UndefValue>(Val: CalleeArg))
5035	continue;
5036
5037	return false;
5038	}
5039
5040	return true;
5041	}
5042
5043	// Returns true if TCO is possible between the callers and callees
5044	// calling conventions.
5045	static bool
5046	areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
5047	CallingConv::ID CalleeCC) {
5048	// Tail calls are possible with fastcc and ccc.
5049	auto isTailCallableCC = [] (CallingConv::ID CC){
5050	return CC == CallingConv::C \|\| CC == CallingConv::Fast;
5051	};
5052	if (!isTailCallableCC (CallerCC) \|\| !isTailCallableCC (CalleeCC))
5053	return false;
5054
5055	// We can safely tail call both fastcc and ccc callees from a c calling
5056	// convention caller. If the caller is fastcc, we may have less stack space
5057	// than a non-fastcc caller with the same signature so disable tail-calls in
5058	// that case.
5059	return CallerCC == CallingConv::C \|\| CallerCC == CalleeCC;
5060	}
5061
5062	bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5063	const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5064	CallingConv::ID CallerCC, const CallBase CB, bool* isVarArg,
5065	const SmallVectorImpl<ISD::OutputArg> &Outs,
5066	const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5067	bool isCalleeExternalSymbol) const {
5068	bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5069
5070	if (DisableSCO && !TailCallOpt) return false;
5071
5072	// Variadic argument functions are not supported.
5073	if (isVarArg) return false;
5074
5075	// Check that the calling conventions are compatible for tco.
5076	if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5077	return false;
5078
5079	// Caller contains any byval parameter is not supported.
5080	if (any_of(Range: Ins, P: [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5081	return false;
5082
5083	// Callee contains any byval parameter is not supported, too.
5084	// Note: This is a quick work around, because in some cases, e.g.
5085	// caller's stack size > callee's stack size, we are still able to apply
5086	// sibling call optimization. For example, gcc is able to do SCO for caller1
5087	// in the following example, but not for caller2.
5088	// struct test {
5089	// long int a;
5090	// char ary[56];
5091	// } gTest;
5092	// __attribute__((noinline)) int callee(struct test v, struct test b) {*
5093	// b->a = v.a;
5094	// return 0;
5095	// }
5096	// void caller1(struct test a, struct test c, struct test b) {*
5097	// callee(gTest, b); }
5098	// void caller2(struct test b) { callee(gTest, b); }*
5099	if (any_of(Range: Outs, P: [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5100	return false;
5101
5102	// If callee and caller use different calling conventions, we cannot pass
5103	// parameters on stack since offsets for the parameter area may be different.
5104	if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5105	return false;
5106
5107	// All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5108	// the caller and callee share the same TOC for TCO/SCO. If the caller and
5109	// callee potentially have different TOC bases then we cannot tail call since
5110	// we need to restore the TOC pointer after the call.
5111	// ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5112	// We cannot guarantee this for indirect calls or calls to external functions.
5113	// When PC-Relative addressing is used, the concept of the TOC is no longer
5114	// applicable so this check is not required.
5115	// Check first for indirect calls.
5116	if (!Subtarget.isUsingPCRelativeCalls() &&
5117	!isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5118	return false;
5119
5120	// Check if we share the TOC base.
5121	if (!Subtarget.isUsingPCRelativeCalls() &&
5122	!callsShareTOCBase(Caller: CallerFunc, CalleeGV, TM: getTargetMachine()))
5123	return false;
5124
5125	// TCO allows altering callee ABI, so we don't have to check further.
5126	if (CalleeCC == CallingConv::Fast && TailCallOpt)
5127	return true;
5128
5129	if (DisableSCO) return false;
5130
5131	// If callee use the same argument list that caller is using, then we can
5132	// apply SCO on this case. If it is not, then we need to check if callee needs
5133	// stack for passing arguments.
5134	// PC Relative tail calls may not have a CallBase.
5135	// If there is no CallBase we cannot verify if we have the same argument
5136	// list so assume that we don't have the same argument list.
5137	if (CB && !hasSameArgumentList(CallerFn: CallerFunc, CB: *CB) &&
5138	needStackSlotPassParameters(Subtarget, Outs))
5139	return false;
5140	else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5141	return false;
5142
5143	return true;
5144	}
5145
5146	/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5147	/// for tail call optimization. Targets which want to do tail call
5148	/// optimization should implement this function.
5149	bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5150	const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5151	CallingConv::ID CallerCC, bool isVarArg,
5152	const SmallVectorImpl<ISD::InputArg> &Ins) const {
5153	if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5154	return false;
5155
5156	// Variable argument functions are not supported.
5157	if (isVarArg)
5158	return false;
5159
5160	if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5161	// Functions containing by val parameters are not supported.
5162	if (any_of(Range: Ins, P: [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5163	return false;
5164
5165	// Non-PIC/GOT tail calls are supported.
5166	if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5167	return true;
5168
5169	// At the moment we can only do local tail calls (in same module, hidden
5170	// or protected) if we are generating PIC.
5171	if (CalleeGV)
5172	return CalleeGV->hasHiddenVisibility() \|\|
5173	CalleeGV->hasProtectedVisibility();
5174	}
5175
5176	return false;
5177	}
5178
5179	/// isCallCompatibleAddress - Return the immediate to use if the specified
5180	/// 32-bit value is representable in the immediate field of a BxA instruction.
5181	static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
5182	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: Op);
5183	if (!C) return nullptr;
5184
5185	int Addr = C->getZExtValue();
5186	if ((Addr & `3`) != `0` \|\| // Low 2 bits are implicitly zero.
5187	SignExtend32<`26`>(X: Addr) != Addr)
5188	return nullptr; // Top 6 bits have to be sext of immediate.
5189
5190	return DAG
5191	.getSignedConstant(
5192	Val: (int)C->getZExtValue() >> `2`, DL: SDLoc (Op),
5193	VT: DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout()))
5194	.getNode();
5195	}
5196
5197	namespace {
5198
5199	struct TailCallArgumentInfo {
5200	SDValue Arg;
5201	SDValue FrameIdxOp;
5202	int FrameIdx = `0`;
5203
5204	TailCallArgumentInfo() = default;
5205	};
5206
5207	} // end anonymous namespace
5208
5209	/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5210	static void StoreTailCallArgumentsToStackSlot(
5211	SelectionDAG &DAG, SDValue Chain,
5212	const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5213	SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5214	for (unsigned i = `0`, e = TailCallArgs.size(); i != e; ++i) {
5215	SDValue Arg = TailCallArgs [i].Arg;
5216	SDValue FIN = TailCallArgs [i].FrameIdxOp;
5217	int FI = TailCallArgs [i].FrameIdx;
5218	// Store relative to framepointer.
5219	MemOpChains.push_back(Elt: DAG.getStore(
5220	Chain, dl, Val: Arg, Ptr: FIN,
5221	PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI)));
5222	}
5223	}
5224
5225	/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5226	/// the appropriate stack slot for the tail call optimized function call.
5227	static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
5228	SDValue OldRetAddr, SDValue OldFP,
5229	int SPDiff, const SDLoc &dl) {
5230	if (SPDiff) {
5231	// Calculate the new stack slot for the return address.
5232	MachineFunction &MF = DAG.getMachineFunction();
5233	const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5234	const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5235	int SlotSize = Subtarget.isPPC64() ? `8` : `4`;
5236	int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5237	int NewRetAddr = MF.getFrameInfo().CreateFixedObject(Size: SlotSize,
5238	SPOffset: NewRetAddrLoc, IsImmutable: true);
5239	SDValue NewRetAddrFrIdx =
5240	DAG.getFrameIndex(FI: NewRetAddr, VT: Subtarget.getScalarIntVT());
5241	Chain = DAG.getStore(Chain, dl, Val: OldRetAddr, Ptr: NewRetAddrFrIdx,
5242	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: NewRetAddr));
5243	}
5244	return Chain;
5245	}
5246
5247	/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5248	/// the position of the argument.
5249	static void CalculateTailCallArgDest(
5250	SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5251	int SPDiff, unsigned ArgOffset,
5252	SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5253	int Offset = ArgOffset + SPDiff;
5254	uint32_t OpSize = (Arg.getValueSizeInBits() + `7`) / `8`;
5255	int FI = MF.getFrameInfo().CreateFixedObject(Size: OpSize, SPOffset: Offset, IsImmutable: true);
5256	EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5257	SDValue FIN = DAG.getFrameIndex(FI, VT);
5258	TailCallArgumentInfo Info;
5259	Info.Arg = Arg;
5260	Info.FrameIdxOp = FIN;
5261	Info.FrameIdx = FI;
5262	TailCallArguments.push_back(Elt: Info);
5263	}
5264
5265	/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5266	/// stack slot. Returns the chain as result and the loaded frame pointers in
5267	/// LROpOut/FPOpout. Used when tail calling.
5268	SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5269	SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5270	SDValue &FPOpOut, const SDLoc &dl) const {
5271	if (SPDiff) {
5272	// Load the LR and FP stack slot for later adjusting.
5273	LROpOut = getReturnAddrFrameIndex(DAG);
5274	LROpOut = DAG.getLoad(VT: Subtarget.getScalarIntVT(), dl, Chain, Ptr: LROpOut,
5275	PtrInfo: MachinePointerInfo ());
5276	Chain = SDValue (LROpOut.getNode(), `1`);
5277	}
5278	return Chain;
5279	}
5280
5281	/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5282	/// by "Src" to address "Dst" of size "Size". Alignment information is
5283	/// specified by the specific parameter attribute. The copy will be passed as
5284	/// a byval function parameter.
5285	/// Sometimes what we are copying is the end of a larger object, the part that
5286	/// does not fit in registers.
5287	static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
5288	SDValue Chain, ISD::ArgFlagsTy Flags,
5289	SelectionDAG &DAG, const SDLoc &dl) {
5290	SDValue SizeNode = DAG.getConstant(Val: Flags.getByValSize(), DL: dl, VT: MVT::i32);
5291	return DAG.getMemcpy(
5292	Chain, dl, Dst, Src, Size: SizeNode, Alignment: Flags.getNonZeroByValAlign(), isVol: false, AlwaysInline: false,
5293	/CI=/nullptr, OverrideTailCall: std::nullopt, DstPtrInfo: MachinePointerInfo (), SrcPtrInfo: MachinePointerInfo ());
5294	}
5295
5296	/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5297	/// tail calls.
5298	static void LowerMemOpCallTo(
5299	SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5300	SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5301	bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5302	SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5303	EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout());
5304	if (!isTailCall) {
5305	if (isVector) {
5306	SDValue StackPtr;
5307	if (isPPC64)
5308	StackPtr = DAG.getRegister(Reg: PPC::X1, VT: MVT::i64);
5309	else
5310	StackPtr = DAG.getRegister(Reg: PPC::R1, VT: MVT::i32);
5311	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr,
5312	N2: DAG.getConstant(Val: ArgOffset, DL: dl, VT: PtrVT));
5313	}
5314	MemOpChains.push_back(
5315	Elt: DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo ()));
5316	// Calculate and remember argument location.
5317	} else
5318	CalculateTailCallArgDest(DAG, MF, IsPPC64: isPPC64, Arg, SPDiff, ArgOffset,
5319	TailCallArguments);
5320	}
5321
5322	static void
5323	PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain,
5324	const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5325	SDValue FPOp,
5326	SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5327	// Emit a sequence of copyto/copyfrom virtual registers for arguments that
5328	// might overwrite each other in case of tail call optimization.
5329	SmallVector<SDValue, `8`> MemOpChains2;
5330	// Do not flag preceding copytoreg stuff together with the following stuff.
5331	InGlue = SDValue ();
5332	StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArgs: TailCallArguments,
5333	MemOpChains&: MemOpChains2, dl);
5334	if (!MemOpChains2.empty())
5335	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains2);
5336
5337	// Store the return address to the appropriate stack slot.
5338	Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, OldRetAddr: LROp, OldFP: FPOp, SPDiff, dl);
5339
5340	// Emit callseq_end just before tailcall node.
5341	Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: `0`, Glue: InGlue, DL: dl);
5342	InGlue = Chain.getValue(R: `1`);
5343	}
5344
5345	// Is this global address that of a function that can be called by name? (as
5346	// opposed to something that must hold a descriptor for an indirect call).
5347	static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5348	if (GV) {
5349	if (GV->isThreadLocal())
5350	return false;
5351
5352	return GV->getValueType()->isFunctionTy();
5353	}
5354
5355	return false;
5356	}
5357
5358	SDValue PPCTargetLowering::LowerCallResult(
5359	SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5360	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5361	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5362	SmallVector<CCValAssign, `16`> RVLocs;
5363	CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5364	*DAG.getContext());
5365
5366	CCRetInfo.AnalyzeCallResult(
5367	Ins, Fn: (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5368	? RetCC_PPC_Cold
5369	: RetCC_PPC);
5370
5371	// Copy all of the result registers out of their specified physreg.
5372	for (unsigned i = `0`, e = RVLocs.size(); i != e; ++i) {
5373	CCValAssign &VA = RVLocs [i];
5374	assert(VA.isRegLoc() && "Can only return in registers!");
5375
5376	SDValue Val;
5377
5378	if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5379	SDValue Lo = DAG.getCopyFromReg(Chain, dl, Reg: VA.getLocReg(), VT: MVT::i32,
5380	Glue: InGlue);
5381	Chain = Lo.getValue(R: `1`);
5382	InGlue = Lo.getValue(R: `2`);
5383	VA = RVLocs [++i]; // skip ahead to next loc
5384	SDValue Hi = DAG.getCopyFromReg(Chain, dl, Reg: VA.getLocReg(), VT: MVT::i32,
5385	Glue: InGlue);
5386	Chain = Hi.getValue(R: `1`);
5387	InGlue = Hi.getValue(R: `2`);
5388	if (!Subtarget.isLittleEndian())
5389	std::swap (a&: Lo, b&: Hi);
5390	Val = DAG.getNode(Opcode: PPCISD::BUILD_SPE64, DL: dl, VT: MVT::f64, N1: Lo, N2: Hi);
5391	} else {
5392	Val = DAG.getCopyFromReg(Chain, dl,
5393	Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue: InGlue);
5394	Chain = Val.getValue(R: `1`);
5395	InGlue = Val.getValue(R: `2`);
5396	}
5397
5398	switch (VA.getLocInfo()) {
5399	default: llvm_unreachable("Unknown loc info!");
5400	case CCValAssign::Full: break;
5401	case CCValAssign::AExt:
5402	Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val);
5403	break;
5404	case CCValAssign::ZExt:
5405	Val = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: VA.getLocVT(), N1: Val,
5406	N2: DAG.getValueType(VA.getValVT()));
5407	Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val);
5408	break;
5409	case CCValAssign::SExt:
5410	Val = DAG.getNode(Opcode: ISD::AssertSext, DL: dl, VT: VA.getLocVT(), N1: Val,
5411	N2: DAG.getValueType(VA.getValVT()));
5412	Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val);
5413	break;
5414	}
5415
5416	InVals.push_back(Elt: Val);
5417	}
5418
5419	return Chain;
5420	}
5421
5422	static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5423	const PPCSubtarget &Subtarget, bool isPatchPoint) {
5424	auto *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5425	const GlobalValue GV = G ? G->getGlobal() : nullptr*;
5426
5427	// PatchPoint calls are not indirect.
5428	if (isPatchPoint)
5429	return false;
5430
5431	if (isFunctionGlobalAddress(GV) \|\| isa<ExternalSymbolSDNode>(Val: Callee))
5432	return false;
5433
5434	// Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5435	// becuase the immediate function pointer points to a descriptor instead of
5436	// a function entry point. The ELFv2 ABI cannot use a BLA because the function
5437	// pointer immediate points to the global entry point, while the BLA would
5438	// need to jump to the local entry point (see rL211174).
5439	if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5440	isBLACompatibleAddress(Op: Callee, DAG))
5441	return false;
5442
5443	return true;
5444	}
5445
5446	// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5447	static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5448	return Subtarget.isAIXABI() \|\|
5449	(Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5450	}
5451
5452	static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5453	const Function &Caller, const SDValue &Callee,
5454	const PPCSubtarget &Subtarget,
5455	const TargetMachine &TM,
5456	bool IsStrictFPCall = false) {
5457	if (CFlags.IsTailCall)
5458	return PPCISD::TC_RETURN;
5459
5460	unsigned RetOpc = `0`;
5461	// This is a call through a function pointer.
5462	if (CFlags.IsIndirect) {
5463	// AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5464	// indirect calls. The save of the caller's TOC pointer to the stack will be
5465	// inserted into the DAG as part of call lowering. The restore of the TOC
5466	// pointer is modeled by using a pseudo instruction for the call opcode that
5467	// represents the 2 instruction sequence of an indirect branch and link,
5468	// immediately followed by a load of the TOC pointer from the stack save
5469	// slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5470	// as it is not saved or used.
5471	RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5472	: PPCISD::BCTRL;
5473	} else if (Subtarget.isUsingPCRelativeCalls()) {
5474	assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5475	RetOpc = PPCISD::CALL_NOTOC;
5476	} else if (Subtarget.isAIXABI() \|\| Subtarget.is64BitELFABI()) {
5477	// The ABIs that maintain a TOC pointer accross calls need to have a nop
5478	// immediately following the call instruction if the caller and callee may
5479	// have different TOC bases. At link time if the linker determines the calls
5480	// may not share a TOC base, the call is redirected to a trampoline inserted
5481	// by the linker. The trampoline will (among other things) save the callers
5482	// TOC pointer at an ABI designated offset in the linkage area and the
5483	// linker will rewrite the nop to be a load of the TOC pointer from the
5484	// linkage area into gpr2.
5485	auto *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5486	const GlobalValue GV = G ? G->getGlobal() : nullptr*;
5487	RetOpc =
5488	callsShareTOCBase(Caller: &Caller, CalleeGV: GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5489	} else
5490	RetOpc = PPCISD::CALL;
5491	if (IsStrictFPCall) {
5492	switch (RetOpc) {
5493	default:
5494	llvm_unreachable("Unknown call opcode");
5495	case PPCISD::BCTRL_LOAD_TOC:
5496	RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5497	break;
5498	case PPCISD::BCTRL:
5499	RetOpc = PPCISD::BCTRL_RM;
5500	break;
5501	case PPCISD::CALL_NOTOC:
5502	RetOpc = PPCISD::CALL_NOTOC_RM;
5503	break;
5504	case PPCISD::CALL:
5505	RetOpc = PPCISD::CALL_RM;
5506	break;
5507	case PPCISD::CALL_NOP:
5508	RetOpc = PPCISD::CALL_NOP_RM;
5509	break;
5510	}
5511	}
5512	return RetOpc;
5513	}
5514
5515	static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5516	const SDLoc &dl, const PPCSubtarget &Subtarget) {
5517	if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5518	if (SDNode *Dest = isBLACompatibleAddress(Op: Callee, DAG))
5519	return SDValue (Dest, `0`);
5520
5521	// Returns true if the callee is local, and false otherwise.
5522	auto isLocalCallee = [&]() {
5523	const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5524	const GlobalValue GV = G ? G->getGlobal() : nullptr*;
5525
5526	return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5527	!isa_and_nonnull<GlobalIFunc>(Val: GV);
5528	};
5529
5530	// The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5531	// a static relocation model causes some versions of GNU LD (2.17.50, at
5532	// least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5533	// built with secure-PLT.
5534	bool UsePlt =
5535	Subtarget.is32BitELFABI() && !isLocalCallee () &&
5536	Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5537
5538	const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5539	const TargetMachine &TM = Subtarget.getTargetMachine();
5540	const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5541	MCSymbolXCOFF *S =
5542	cast<MCSymbolXCOFF>(Val: TLOF->getFunctionEntryPointSymbol(Func: GV, TM));
5543
5544	MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout());
5545	return DAG.getMCSymbol(Sym: S, VT: PtrVT);
5546	};
5547
5548	auto *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5549	const GlobalValue GV = G ? G->getGlobal() : nullptr*;
5550	if (isFunctionGlobalAddress(GV)) {
5551	const GlobalValue *GV = cast<GlobalAddressSDNode>(Val: Callee)->getGlobal();
5552
5553	if (Subtarget.isAIXABI()) {
5554	assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5555	return getAIXFuncEntryPointSymbolSDNode (GV);
5556	}
5557	return DAG.getTargetGlobalAddress(GV, DL: dl, VT: Callee.getValueType(), offset: `0`,
5558	TargetFlags: UsePlt ? PPCII::MO_PLT : `0`);
5559	}
5560
5561	if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val: Callee)) {
5562	const char *SymName = S->getSymbol();
5563	if (Subtarget.isAIXABI()) {
5564	// If there exists a user-declared function whose name is the same as the
5565	// ExternalSymbol's, then we pick up the user-declared version.
5566	const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5567	if (const Function *F =
5568	dyn_cast_or_null<Function>(Val: Mod->getNamedValue(Name: SymName)))
5569	return getAIXFuncEntryPointSymbolSDNode (F);
5570
5571	// On AIX, direct function calls reference the symbol for the function's
5572	// entry point, which is named by prepending a "." before the function's
5573	// C-linkage name. A Qualname is returned here because an external
5574	// function entry point is a csect with XTY_ER property.
5575	const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5576	auto &Context = DAG.getMachineFunction().getContext();
5577	MCSectionXCOFF *Sec = Context.getXCOFFSection(
5578	Section: (Twine (".") + Twine (SymName)).str(), K: SectionKind::getMetadata(),
5579	CsectProp: XCOFF::CsectProperties (XCOFF::XMC_PR, XCOFF::XTY_ER));
5580	return Sec->getQualNameSymbol();
5581	};
5582
5583	SymName = getExternalFunctionEntryPointSymbol (SymName)->getName().data();
5584	}
5585	return DAG.getTargetExternalSymbol(Sym: SymName, VT: Callee.getValueType(),
5586	TargetFlags: UsePlt ? PPCII::MO_PLT : `0`);
5587	}
5588
5589	// No transformation needed.
5590	assert(Callee.getNode() && "What no callee?");
5591	return Callee;
5592	}
5593
5594	static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5595	assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5596	"Expected a CALLSEQ_STARTSDNode.");
5597
5598	// The last operand is the chain, except when the node has glue. If the node
5599	// has glue, then the last operand is the glue, and the chain is the second
5600	// last operand.
5601	SDValue LastValue = CallSeqStart.getValue(R: CallSeqStart ->getNumValues() - `1`);
5602	if (LastValue.getValueType() != MVT::Glue)
5603	return LastValue;
5604
5605	return CallSeqStart.getValue(R: CallSeqStart ->getNumValues() - `2`);
5606	}
5607
5608	// Creates the node that moves a functions address into the count register
5609	// to prepare for an indirect call instruction.
5610	static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5611	SDValue &Glue, SDValue &Chain,
5612	const SDLoc &dl) {
5613	SDValue MTCTROps[] = {Chain, Callee, Glue};
5614	EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5615	Chain = DAG.getNode(Opcode: PPCISD::MTCTR, DL: dl, ResultTys: ReturnTypes,
5616	Ops: ArrayRef(MTCTROps, Glue.getNode() ? `3` : `2`));
5617	// The glue is the second value produced.
5618	Glue = Chain.getValue(R: `1`);
5619	}
5620
5621	static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5622	SDValue &Glue, SDValue &Chain,
5623	SDValue CallSeqStart,
5624	const CallBase CB, const* SDLoc &dl,
5625	bool hasNest,
5626	const PPCSubtarget &Subtarget) {
5627	// Function pointers in the 64-bit SVR4 ABI do not point to the function
5628	// entry point, but to the function descriptor (the function entry point
5629	// address is part of the function descriptor though).
5630	// The function descriptor is a three doubleword structure with the
5631	// following fields: function entry point, TOC base address and
5632	// environment pointer.
5633	// Thus for a call through a function pointer, the following actions need
5634	// to be performed:
5635	// 1. Save the TOC of the caller in the TOC save area of its stack
5636	// frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5637	// 2. Load the address of the function entry point from the function
5638	// descriptor.
5639	// 3. Load the TOC of the callee from the function descriptor into r2.
5640	// 4. Load the environment pointer from the function descriptor into
5641	// r11.
5642	// 5. Branch to the function entry point address.
5643	// 6. On return of the callee, the TOC of the caller needs to be
5644	// restored (this is done in FinishCall()).
5645	//
5646	// The loads are scheduled at the beginning of the call sequence, and the
5647	// register copies are flagged together to ensure that no other
5648	// operations can be scheduled in between. E.g. without flagging the
5649	// copies together, a TOC access in the caller could be scheduled between
5650	// the assignment of the callee TOC and the branch to the callee, which leads
5651	// to incorrect code.
5652
5653	// Start by loading the function address from the descriptor.
5654	SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5655	auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5656	? (MachineMemOperand::MODereferenceable \|
5657	MachineMemOperand::MOInvariant)
5658	: MachineMemOperand::MONone;
5659
5660	MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5661
5662	// Registers used in building the DAG.
5663	const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5664	const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5665
5666	// Offsets of descriptor members.
5667	const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5668	const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5669
5670	const MVT RegVT = Subtarget.getScalarIntVT();
5671	const Align Alignment = Subtarget.isPPC64() ? Align (`8`) : Align (`4`);
5672
5673	// One load for the functions entry point address.
5674	SDValue LoadFuncPtr = DAG.getLoad(VT: RegVT, dl, Chain: LDChain, Ptr: Callee, PtrInfo: MPI,
5675	Alignment, MMOFlags);
5676
5677	// One for loading the TOC anchor for the module that contains the called
5678	// function.
5679	SDValue TOCOff = DAG.getIntPtrConstant(Val: TOCAnchorOffset, DL: dl);
5680	SDValue AddTOC = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RegVT, N1: Callee, N2: TOCOff);
5681	SDValue TOCPtr =
5682	DAG.getLoad(VT: RegVT, dl, Chain: LDChain, Ptr: AddTOC,
5683	PtrInfo: MPI.getWithOffset(O: TOCAnchorOffset), Alignment, MMOFlags);
5684
5685	// One for loading the environment pointer.
5686	SDValue PtrOff = DAG.getIntPtrConstant(Val: EnvPtrOffset, DL: dl);
5687	SDValue AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RegVT, N1: Callee, N2: PtrOff);
5688	SDValue LoadEnvPtr =
5689	DAG.getLoad(VT: RegVT, dl, Chain: LDChain, Ptr: AddPtr,
5690	PtrInfo: MPI.getWithOffset(O: EnvPtrOffset), Alignment, MMOFlags);
5691
5692
5693	// Then copy the newly loaded TOC anchor to the TOC pointer.
5694	SDValue TOCVal = DAG.getCopyToReg(Chain, dl, Reg: TOCReg, N: TOCPtr, Glue);
5695	Chain = TOCVal.getValue(R: `0`);
5696	Glue = TOCVal.getValue(R: `1`);
5697
5698	// If the function call has an explicit 'nest' parameter, it takes the
5699	// place of the environment pointer.
5700	assert((!hasNest \|\| !Subtarget.isAIXABI()) &&
5701	"Nest parameter is not supported on AIX.");
5702	if (!hasNest) {
5703	SDValue EnvVal = DAG.getCopyToReg(Chain, dl, Reg: EnvPtrReg, N: LoadEnvPtr, Glue);
5704	Chain = EnvVal.getValue(R: `0`);
5705	Glue = EnvVal.getValue(R: `1`);
5706	}
5707
5708	// The rest of the indirect call sequence is the same as the non-descriptor
5709	// DAG.
5710	prepareIndirectCall(DAG, Callee&: LoadFuncPtr, Glue, Chain, dl);
5711	}
5712
5713	static void
5714	buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5715	PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5716	SelectionDAG &DAG,
5717	SmallVector<std::pair<unsigned, SDValue>, `8`> &RegsToPass,
5718	SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5719	const PPCSubtarget &Subtarget) {
5720	const bool IsPPC64 = Subtarget.isPPC64();
5721	// MVT for a general purpose register.
5722	const MVT RegVT = Subtarget.getScalarIntVT();
5723
5724	// First operand is always the chain.
5725	Ops.push_back(Elt: Chain);
5726
5727	// If it's a direct call pass the callee as the second operand.
5728	if (!CFlags.IsIndirect)
5729	Ops.push_back(Elt: Callee);
5730	else {
5731	assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5732
5733	// For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5734	// on the stack (this would have been done in `LowerCall_64SVR4` or
5735	// `LowerCall_AIX`). The call instruction is a pseudo instruction that
5736	// represents both the indirect branch and a load that restores the TOC
5737	// pointer from the linkage area. The operand for the TOC restore is an add
5738	// of the TOC save offset to the stack pointer. This must be the second
5739	// operand: after the chain input but before any other variadic arguments.
5740	// For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5741	// saved or used.
5742	if (isTOCSaveRestoreRequired(Subtarget)) {
5743	const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5744
5745	SDValue StackPtr = DAG.getRegister(Reg: StackPtrReg, VT: RegVT);
5746	unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5747	SDValue TOCOff = DAG.getIntPtrConstant(Val: TOCSaveOffset, DL: dl);
5748	SDValue AddTOC = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RegVT, N1: StackPtr, N2: TOCOff);
5749	Ops.push_back(Elt: AddTOC);
5750	}
5751
5752	// Add the register used for the environment pointer.
5753	if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5754	Ops.push_back(Elt: DAG.getRegister(Reg: Subtarget.getEnvironmentPointerRegister(),
5755	VT: RegVT));
5756
5757
5758	// Add CTR register as callee so a bctr can be emitted later.
5759	if (CFlags.IsTailCall)
5760	Ops.push_back(Elt: DAG.getRegister(Reg: IsPPC64 ? PPC::CTR8 : PPC::CTR, VT: RegVT));
5761	}
5762
5763	// If this is a tail call add stack pointer delta.
5764	if (CFlags.IsTailCall)
5765	Ops.push_back(Elt: DAG.getConstant(Val: SPDiff, DL: dl, VT: MVT::i32));
5766
5767	// Add argument registers to the end of the list so that they are known live
5768	// into the call.
5769	for (const auto &[Reg, N] : RegsToPass)
5770	Ops.push_back(Elt: DAG.getRegister(Reg, VT: N.getValueType()));
5771
5772	// We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5773	// no way to mark dependencies as implicit here.
5774	// We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5775	if ((Subtarget.is64BitELFABI() \|\| Subtarget.isAIXABI()) &&
5776	!CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5777	Ops.push_back(Elt: DAG.getRegister(Reg: Subtarget.getTOCPointerRegister(), VT: RegVT));
5778
5779	// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5780	if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5781	Ops.push_back(Elt: DAG.getRegister(Reg: PPC::CR1EQ, VT: MVT::i32));
5782
5783	// Add a register mask operand representing the call-preserved registers.
5784	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5785	const uint32_t *Mask =
5786	TRI->getCallPreservedMask(MF: DAG.getMachineFunction(), CFlags.CallConv);
5787	assert(Mask && "Missing call preserved mask for calling convention");
5788	Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
5789
5790	// If the glue is valid, it is the last operand.
5791	if (Glue.getNode())
5792	Ops.push_back(Elt: Glue);
5793	}
5794
5795	SDValue PPCTargetLowering::FinishCall(
5796	CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5797	SmallVector<std::pair<unsigned, SDValue>, `8`> &RegsToPass, SDValue Glue,
5798	SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5799	unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5800	SmallVectorImpl<SDValue> &InVals, const CallBase CB) const* {
5801
5802	if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) \|\|
5803	Subtarget.isAIXABI())
5804	setUsesTOCBasePtr(DAG);
5805
5806	unsigned CallOpc =
5807	getCallOpcode(CFlags, Caller: DAG.getMachineFunction().getFunction(), Callee,
5808	Subtarget, TM: DAG.getTarget(), IsStrictFPCall: CB ? CB->isStrictFP() : false);
5809
5810	if (!CFlags.IsIndirect)
5811	Callee = transformCallee(Callee, DAG, dl, Subtarget);
5812	else if (Subtarget.usesFunctionDescriptors())
5813	prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5814	dl, hasNest: CFlags.HasNest, Subtarget);
5815	else
5816	prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5817
5818	// Build the operand list for the call instruction.
5819	SmallVector<SDValue, `8`> Ops;
5820	buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5821	SPDiff, Subtarget);
5822
5823	// Emit tail call.
5824	if (CFlags.IsTailCall) {
5825	// Indirect tail call when using PC Relative calls do not have the same
5826	// constraints.
5827	assert(((Callee.getOpcode() == ISD::Register &&
5828	cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) \|\|
5829	Callee.getOpcode() == ISD::TargetExternalSymbol \|\|
5830	Callee.getOpcode() == ISD::TargetGlobalAddress \|\|
5831	isa<ConstantSDNode>(Callee) \|\|
5832	(CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5833	"Expecting a global address, external symbol, absolute value, "
5834	"register or an indirect tail call when PC Relative calls are "
5835	"used.");
5836	// PC Relative calls also use TC_RETURN as the way to mark tail calls.
5837	assert(CallOpc == PPCISD::TC_RETURN &&
5838	"Unexpected call opcode for a tail call.");
5839	DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5840	SDValue Ret = DAG.getNode(Opcode: CallOpc, DL: dl, VT: MVT::Other, Ops);
5841	DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CFlags.NoMerge);
5842	return Ret;
5843	}
5844
5845	std::array<EVT, `2`> ReturnTypes = {._M_elems: {MVT::Other, MVT::Glue}};
5846	Chain = DAG.getNode(Opcode: CallOpc, DL: dl, ResultTys: ReturnTypes, Ops);
5847	DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CFlags.NoMerge);
5848	Glue = Chain.getValue(R: `1`);
5849
5850	// When performing tail call optimization the callee pops its arguments off
5851	// the stack. Account for this here so these bytes can be pushed back on in
5852	// PPCFrameLowering::eliminateCallFramePseudoInstr.
5853	int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5854	getTargetMachine().Options.GuaranteedTailCallOpt)
5855	? NumBytes
5856	: `0`;
5857
5858	Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: BytesCalleePops, Glue, DL: dl);
5859	Glue = Chain.getValue(R: `1`);
5860
5861	return LowerCallResult(Chain, InGlue: Glue, CallConv: CFlags.CallConv, isVarArg: CFlags.IsVarArg, Ins, dl,
5862	DAG, InVals);
5863	}
5864
5865	bool PPCTargetLowering::supportsTailCallFor(const CallBase CB) const* {
5866	CallingConv::ID CalleeCC = CB->getCallingConv();
5867	const Function *CallerFunc = CB->getCaller();
5868	CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5869	const Function *CalleeFunc = CB->getCalledFunction();
5870	if (!CalleeFunc)
5871	return false;
5872	const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(Val: CalleeFunc);
5873
5874	SmallVector<ISD::OutputArg, `2`> Outs;
5875	SmallVector<ISD::InputArg, `2`> Ins;
5876
5877	GetReturnInfo(CC: CalleeCC, ReturnType: CalleeFunc->getReturnType(),
5878	attr: CalleeFunc->getAttributes(), Outs, TLI: *this,
5879	DL: CalleeFunc->getDataLayout());
5880
5881	return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5882	isVarArg: CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5883	isCalleeExternalSymbol: false /isCalleeExternalSymbol/);
5884	}
5885
5886	bool PPCTargetLowering::isEligibleForTCO(
5887	const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5888	CallingConv::ID CallerCC, const CallBase CB, bool* isVarArg,
5889	const SmallVectorImpl<ISD::OutputArg> &Outs,
5890	const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5891	bool isCalleeExternalSymbol) const {
5892	if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5893	return false;
5894
5895	if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5896	return IsEligibleForTailCallOptimization_64SVR4(
5897	CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5898	isCalleeExternalSymbol);
5899	else
5900	return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5901	isVarArg, Ins);
5902	}
5903
5904	SDValue
5905	PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5906	SmallVectorImpl<SDValue> &InVals) const {
5907	SelectionDAG &DAG = CLI.DAG;
5908	SDLoc &dl = CLI.DL;
5909	SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5910	SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5911	SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5912	SDValue Chain = CLI.Chain;
5913	SDValue Callee = CLI.Callee;
5914	bool &isTailCall = CLI.IsTailCall;
5915	CallingConv::ID CallConv = CLI.CallConv;
5916	bool isVarArg = CLI.IsVarArg;
5917	bool isPatchPoint = CLI.IsPatchPoint;
5918	const CallBase *CB = CLI.CB;
5919
5920	if (isTailCall) {
5921	MachineFunction &MF = DAG.getMachineFunction();
5922	CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5923	auto *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee);
5924	const GlobalValue GV = G ? G->getGlobal() : nullptr*;
5925	bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Val: Callee);
5926
5927	isTailCall =
5928	isEligibleForTCO(CalleeGV: GV, CalleeCC: CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5929	CallerFunc: &(MF.getFunction()), isCalleeExternalSymbol: IsCalleeExternalSymbol);
5930	if (isTailCall) {
5931	++NumTailCalls;
5932	if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5933	++NumSiblingCalls;
5934
5935	// PC Relative calls no longer guarantee that the callee is a Global
5936	// Address Node. The callee could be an indirect tail call in which
5937	// case the SDValue for the callee could be a load (to load the address
5938	// of a function pointer) or it may be a register copy (to move the
5939	// address of the callee from a function parameter into a virtual
5940	// register). It may also be an ExternalSymbolSDNode (ex memcopy).
5941	assert((Subtarget.isUsingPCRelativeCalls() \|\|
5942	isa<GlobalAddressSDNode>(Callee)) &&
5943	"Callee should be an llvm::Function object.");
5944
5945	LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5946	<< "\nTCO callee: ");
5947	LLVM_DEBUG(Callee.dump());
5948	}
5949	}
5950
5951	if (!isTailCall && CB && CB->isMustTailCall())
5952	report_fatal_error(reason: "failed to perform tail call elimination on a call "
5953	"site marked musttail");
5954
5955	// When long calls (i.e. indirect calls) are always used, calls are always
5956	// made via function pointer. If we have a function name, first translate it
5957	// into a pointer.
5958	if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Val: Callee) &&
5959	!isTailCall)
5960	Callee = LowerGlobalAddress(Op: Callee, DAG);
5961
5962	CallFlags CFlags(
5963	CallConv, isTailCall, isVarArg, isPatchPoint,
5964	isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5965	// hasNest
5966	Subtarget.is64BitELFABI() &&
5967	any_of(Range&: Outs, P: [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5968	CLI.NoMerge);
5969
5970	if (Subtarget.isAIXABI())
5971	return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5972	InVals, CB);
5973
5974	assert(Subtarget.isSVR4ABI());
5975	if (Subtarget.isPPC64())
5976	return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5977	InVals, CB);
5978	return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5979	InVals, CB);
5980	}
5981
5982	SDValue PPCTargetLowering::LowerCall_32SVR4(
5983	SDValue Chain, SDValue Callee, CallFlags CFlags,
5984	const SmallVectorImpl<ISD::OutputArg> &Outs,
5985	const SmallVectorImpl<SDValue> &OutVals,
5986	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5987	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5988	const CallBase CB) const* {
5989	// See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5990	// of the 32-bit SVR4 ABI stack frame layout.
5991
5992	const CallingConv::ID CallConv = CFlags.CallConv;
5993	const bool IsVarArg = CFlags.IsVarArg;
5994	const bool IsTailCall = CFlags.IsTailCall;
5995
5996	assert((CallConv == CallingConv::C \|\|
5997	CallConv == CallingConv::Cold \|\|
5998	CallConv == CallingConv::Fast) && "Unknown calling convention!");
5999
6000	const Align PtrAlign(`4`);
6001
6002	MachineFunction &MF = DAG.getMachineFunction();
6003
6004	// Mark this function as potentially containing a function that contains a
6005	// tail call. As a consequence the frame pointer will be used for dynamicalloc
6006	// and restoring the callers stack pointer in this functions epilog. This is
6007	// done because by tail calling the called function might overwrite the value
6008	// in this function's (MF) stack pointer stack slot 0(SP).
6009	if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6010	CallConv == CallingConv::Fast)
6011	MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6012
6013	// Count how many bytes are to be pushed on the stack, including the linkage
6014	// area, parameter list area and the part of the local variable space which
6015	// contains copies of aggregates which are passed by value.
6016
6017	// Assign locations to all of the outgoing arguments.
6018	SmallVector<CCValAssign, `16`> ArgLocs;
6019	PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6020
6021	// Reserve space for the linkage area on the stack.
6022	CCInfo.AllocateStack(Size: Subtarget.getFrameLowering()->getLinkageSize(),
6023	Alignment: PtrAlign);
6024	if (useSoftFloat())
6025	CCInfo.PreAnalyzeCallOperands(Outs);
6026
6027	if (IsVarArg) {
6028	// Handle fixed and variable vector arguments differently.
6029	// Fixed vector arguments go into registers as long as registers are
6030	// available. Variable vector arguments always go into memory.
6031	unsigned NumArgs = Outs.size();
6032
6033	for (unsigned i = `0`; i != NumArgs; ++i) {
6034	MVT ArgVT = Outs [i].VT;
6035	ISD::ArgFlagsTy ArgFlags = Outs [i].Flags;
6036	bool Result;
6037
6038	if (Outs [i].IsFixed) {
6039	Result = CC_PPC32_SVR4(ValNo: i, ValVT: ArgVT, LocVT: ArgVT, LocInfo: CCValAssign::Full, ArgFlags,
6040	State&: CCInfo);
6041	} else {
6042	Result = CC_PPC32_SVR4_VarArg(ValNo: i, ValVT: ArgVT, LocVT: ArgVT, LocInfo: CCValAssign::Full,
6043	ArgFlags, State&: CCInfo);
6044	}
6045
6046	if (Result) {
6047	#ifndef NDEBUG
6048	errs() << "Call operand #" << i << " has unhandled type "
6049	<< ArgVT << "\n";
6050	#endif
6051	llvm_unreachable(nullptr);
6052	}
6053	}
6054	} else {
6055	// All arguments are treated the same.
6056	CCInfo.AnalyzeCallOperands(Outs, Fn: CC_PPC32_SVR4);
6057	}
6058	CCInfo.clearWasPPCF128();
6059
6060	// Assign locations to all of the outgoing aggregate by value arguments.
6061	SmallVector<CCValAssign, `16`> ByValArgLocs;
6062	CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6063
6064	// Reserve stack space for the allocations in CCInfo.
6065	CCByValInfo.AllocateStack(Size: CCInfo.getStackSize(), Alignment: PtrAlign);
6066
6067	CCByValInfo.AnalyzeCallOperands(Outs, Fn: CC_PPC32_SVR4_ByVal);
6068
6069	// Size of the linkage area, parameter list area and the part of the local
6070	// space variable where copies of aggregates which are passed by value are
6071	// stored.
6072	unsigned NumBytes = CCByValInfo.getStackSize();
6073
6074	// Calculate by how many bytes the stack has to be adjusted in case of tail
6075	// call optimization.
6076	int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall: IsTailCall, ParamSize: NumBytes);
6077
6078	// Adjust the stack pointer for the new arguments...
6079	// These operations are automatically eliminated by the prolog/epilog pass
6080	Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: `0`, DL: dl);
6081	SDValue CallSeqStart = Chain;
6082
6083	// Load the return address and frame pointer so it can be moved somewhere else
6084	// later.
6085	SDValue LROp, FPOp;
6086	Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROpOut&: LROp, FPOpOut&: FPOp, dl);
6087
6088	// Set up a copy of the stack pointer for use loading and storing any
6089	// arguments that may not fit in the registers available for argument
6090	// passing.
6091	SDValue StackPtr = DAG.getRegister(Reg: PPC::R1, VT: MVT::i32);
6092
6093	SmallVector<std::pair<unsigned, SDValue>, `8`> RegsToPass;
6094	SmallVector<TailCallArgumentInfo, `8`> TailCallArguments;
6095	SmallVector<SDValue, `8`> MemOpChains;
6096
6097	bool seenFloatArg = false;
6098	// Walk the register/memloc assignments, inserting copies/loads.
6099	// i - Tracks the index into the list of registers allocated for the call
6100	// RealArgIdx - Tracks the index into the list of actual function arguments
6101	// j - Tracks the index into the list of byval arguments
6102	for (unsigned i = `0`, RealArgIdx = `0`, j = `0`, e = ArgLocs.size();
6103	i != e;
6104	++i, ++RealArgIdx) {
6105	CCValAssign &VA = ArgLocs [i];
6106	SDValue Arg = OutVals [RealArgIdx];
6107	ISD::ArgFlagsTy Flags = Outs [RealArgIdx].Flags;
6108
6109	if (Flags.isByVal()) {
6110	// Argument is an aggregate which is passed by value, thus we need to
6111	// create a copy of it in the local variable space of the current stack
6112	// frame (which is the stack frame of the caller) and pass the address of
6113	// this copy to the callee.
6114	assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6115	CCValAssign &ByValVA = ByValArgLocs [j++];
6116	assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6117
6118	// Memory reserved in the local variable space of the callers stack frame.
6119	unsigned LocMemOffset = ByValVA.getLocMemOffset();
6120
6121	SDValue PtrOff = DAG.getIntPtrConstant(Val: LocMemOffset, DL: dl);
6122	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()),
6123	N1: StackPtr, N2: PtrOff);
6124
6125	// Create a copy of the argument in the local area of the current
6126	// stack frame.
6127	SDValue MemcpyCall =
6128	CreateCopyOfByValArgument(Src: Arg, Dst: PtrOff,
6129	Chain: CallSeqStart.getNode()->getOperand(Num: `0`),
6130	Flags, DAG, dl);
6131
6132	// This must go outside the CALLSEQ_START..END.
6133	SDValue NewCallSeqStart = DAG.getCALLSEQ_START(Chain: MemcpyCall, InSize: NumBytes, OutSize: `0`,
6134	DL: SDLoc (MemcpyCall));
6135	DAG.ReplaceAllUsesWith(From: CallSeqStart.getNode(),
6136	To: NewCallSeqStart.getNode());
6137	Chain = CallSeqStart = NewCallSeqStart;
6138
6139	// Pass the address of the aggregate copy on the stack either in a
6140	// physical register or in the parameter list area of the current stack
6141	// frame to the callee.
6142	Arg = PtrOff;
6143	}
6144
6145	// When useCRBits() is true, there can be i1 arguments.
6146	// It is because getRegisterType(MVT::i1) => MVT::i1,
6147	// and for other integer types getRegisterType() => MVT::i32.
6148	// Extend i1 and ensure callee will get i32.
6149	if (Arg.getValueType() == MVT::i1)
6150	Arg = DAG.getNode(Opcode: Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6151	DL: dl, VT: MVT::i32, Operand: Arg);
6152
6153	if (VA.isRegLoc()) {
6154	seenFloatArg \|= VA.getLocVT().isFloatingPoint();
6155	// Put argument in a physical register.
6156	if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6157	bool IsLE = Subtarget.isLittleEndian();
6158	SDValue SVal = DAG.getNode(Opcode: PPCISD::EXTRACT_SPE, DL: dl, VT: MVT::i32, N1: Arg,
6159	N2: DAG.getIntPtrConstant(Val: IsLE ? `0` : `1`, DL: dl));
6160	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y: SVal.getValue(R: `0`)));
6161	SVal = DAG.getNode(Opcode: PPCISD::EXTRACT_SPE, DL: dl, VT: MVT::i32, N1: Arg,
6162	N2: DAG.getIntPtrConstant(Val: IsLE ? `1` : `0`, DL: dl));
6163	RegsToPass.push_back(Elt: std::make_pair(x: ArgLocs [++i].getLocReg(),
6164	y: SVal.getValue(R: `0`)));
6165	} else
6166	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
6167	} else {
6168	// Put argument in the parameter list area of the current stack frame.
6169	assert(VA.isMemLoc());
6170	unsigned LocMemOffset = VA.getLocMemOffset();
6171
6172	if (!IsTailCall) {
6173	SDValue PtrOff = DAG.getIntPtrConstant(Val: LocMemOffset, DL: dl);
6174	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()),
6175	N1: StackPtr, N2: PtrOff);
6176
6177	MemOpChains.push_back(
6178	Elt: DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo ()));
6179	} else {
6180	// Calculate and remember argument location.
6181	CalculateTailCallArgDest(DAG, MF, IsPPC64: false, Arg, SPDiff, ArgOffset: LocMemOffset,
6182	TailCallArguments);
6183	}
6184	}
6185	}
6186
6187	if (!MemOpChains.empty())
6188	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains);
6189
6190	// Build a sequence of copy-to-reg nodes chained together with token chain
6191	// and flag operands which copy the outgoing args into the appropriate regs.
6192	SDValue InGlue;
6193	for (const auto &[Reg, N] : RegsToPass) {
6194	Chain = DAG.getCopyToReg(Chain, dl, Reg, N, Glue: InGlue);
6195	InGlue = Chain.getValue(R: `1`);
6196	}
6197
6198	// Set CR bit 6 to true if this is a vararg call with floating args passed in
6199	// registers.
6200	if (IsVarArg) {
6201	SDVTList VTs = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
6202	SDValue Ops[] = { Chain, InGlue };
6203
6204	Chain = DAG.getNode(Opcode: seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, DL: dl,
6205	VTList: VTs, Ops: ArrayRef(Ops, InGlue.getNode() ? `2` : `1`));
6206
6207	InGlue = Chain.getValue(R: `1`);
6208	}
6209
6210	if (IsTailCall)
6211	PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6212	TailCallArguments);
6213
6214	return FinishCall(CFlags, dl, DAG, RegsToPass, Glue: InGlue, Chain, CallSeqStart,
6215	Callee, SPDiff, NumBytes, Ins, InVals, CB);
6216	}
6217
6218	// Copy an argument into memory, being careful to do this outside the
6219	// call sequence for the call to which the argument belongs.
6220	SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6221	SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6222	SelectionDAG &DAG, const SDLoc &dl) const {
6223	SDValue MemcpyCall = CreateCopyOfByValArgument(Src: Arg, Dst: PtrOff,
6224	Chain: CallSeqStart.getNode()->getOperand(Num: `0`),
6225	Flags, DAG, dl);
6226	// The MEMCPY must go outside the CALLSEQ_START..END.
6227	int64_t FrameSize = CallSeqStart.getConstantOperandVal(i: `1`);
6228	SDValue NewCallSeqStart = DAG.getCALLSEQ_START(Chain: MemcpyCall, InSize: FrameSize, OutSize: `0`,
6229	DL: SDLoc (MemcpyCall));
6230	DAG.ReplaceAllUsesWith(From: CallSeqStart.getNode(),
6231	To: NewCallSeqStart.getNode());
6232	return NewCallSeqStart;
6233	}
6234
6235	SDValue PPCTargetLowering::LowerCall_64SVR4(
6236	SDValue Chain, SDValue Callee, CallFlags CFlags,
6237	const SmallVectorImpl<ISD::OutputArg> &Outs,
6238	const SmallVectorImpl<SDValue> &OutVals,
6239	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6240	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6241	const CallBase CB) const* {
6242	bool isELFv2ABI = Subtarget.isELFv2ABI();
6243	bool isLittleEndian = Subtarget.isLittleEndian();
6244	unsigned NumOps = Outs.size();
6245	bool IsSibCall = false;
6246	bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6247
6248	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
6249	unsigned PtrByteSize = `8`;
6250
6251	MachineFunction &MF = DAG.getMachineFunction();
6252
6253	if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6254	IsSibCall = true;
6255
6256	// Mark this function as potentially containing a function that contains a
6257	// tail call. As a consequence the frame pointer will be used for dynamicalloc
6258	// and restoring the callers stack pointer in this functions epilog. This is
6259	// done because by tail calling the called function might overwrite the value
6260	// in this function's (MF) stack pointer stack slot 0(SP).
6261	if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6262	MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6263
6264	assert(!(IsFastCall && CFlags.IsVarArg) &&
6265	"fastcc not supported on varargs functions");
6266
6267	// Count how many bytes are to be pushed on the stack, including the linkage
6268	// area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6269	// reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6270	// area is 32 bytes reserved space for [SP][CR][LR][TOC].
6271	unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6272	unsigned NumBytes = LinkageSize;
6273	unsigned GPR_idx = `0`, FPR_idx = `0`, VR_idx = `0`;
6274
6275	static const MCPhysReg GPR[] = {
6276	PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6277	PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6278	};
6279	static const MCPhysReg VR[] = {
6280	PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6281	PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6282	};
6283
6284	const unsigned NumGPRs = std::size(GPR);
6285	const unsigned NumFPRs = useSoftFloat() ? `0` : `13`;
6286	const unsigned NumVRs = std::size(VR);
6287
6288	// On ELFv2, we can avoid allocating the parameter area if all the arguments
6289	// can be passed to the callee in registers.
6290	// For the fast calling convention, there is another check below.
6291	// Note: We should keep consistent with LowerFormalArguments_64SVR4()
6292	bool HasParameterArea = !isELFv2ABI \|\| CFlags.IsVarArg \|\| IsFastCall;
6293	if (!HasParameterArea) {
6294	unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6295	unsigned AvailableFPRs = NumFPRs;
6296	unsigned AvailableVRs = NumVRs;
6297	unsigned NumBytesTmp = NumBytes;
6298	for (unsigned i = `0`; i != NumOps; ++i) {
6299	if (Outs [i].Flags.isNest()) continue;
6300	if (CalculateStackSlotUsed(ArgVT: Outs [i].VT, OrigVT: Outs [i].ArgVT, Flags: Outs [i].Flags,
6301	PtrByteSize, LinkageSize, ParamAreaSize,
6302	ArgOffset&: NumBytesTmp, AvailableFPRs, AvailableVRs))
6303	HasParameterArea = true;
6304	}
6305	}
6306
6307	// When using the fast calling convention, we don't provide backing for
6308	// arguments that will be in registers.
6309	unsigned NumGPRsUsed = `0`, NumFPRsUsed = `0`, NumVRsUsed = `0`;
6310
6311	// Avoid allocating parameter area for fastcc functions if all the arguments
6312	// can be passed in the registers.
6313	if (IsFastCall)
6314	HasParameterArea = false;
6315
6316	// Add up all the space actually used.
6317	for (unsigned i = `0`; i != NumOps; ++i) {
6318	ISD::ArgFlagsTy Flags = Outs [i].Flags;
6319	EVT ArgVT = Outs [i].VT;
6320	EVT OrigVT = Outs [i].ArgVT;
6321
6322	if (Flags.isNest())
6323	continue;
6324
6325	if (IsFastCall) {
6326	if (Flags.isByVal()) {
6327	NumGPRsUsed += (Flags.getByValSize()+`7`)/`8`;
6328	if (NumGPRsUsed > NumGPRs)
6329	HasParameterArea = true;
6330	} else {
6331	switch (ArgVT.getSimpleVT().SimpleTy) {
6332	default: llvm_unreachable("Unexpected ValueType for argument!");
6333	case MVT::i1:
6334	case MVT::i32:
6335	case MVT::i64:
6336	if (++NumGPRsUsed <= NumGPRs)
6337	continue;
6338	break;
6339	case MVT::v4i32:
6340	case MVT::v8i16:
6341	case MVT::v16i8:
6342	case MVT::v2f64:
6343	case MVT::v2i64:
6344	case MVT::v1i128:
6345	case MVT::f128:
6346	if (++NumVRsUsed <= NumVRs)
6347	continue;
6348	break;
6349	case MVT::v4f32:
6350	if (++NumVRsUsed <= NumVRs)
6351	continue;
6352	break;
6353	case MVT::f32:
6354	case MVT::f64:
6355	if (++NumFPRsUsed <= NumFPRs)
6356	continue;
6357	break;
6358	}
6359	HasParameterArea = true;
6360	}
6361	}
6362
6363	/ Respect alignment of argument on the stack. /
6364	auto Alignement =
6365	CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6366	NumBytes = alignTo(Size: NumBytes, A: Alignement);
6367
6368	NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6369	if (Flags.isInConsecutiveRegsLast())
6370	NumBytes = ((NumBytes + PtrByteSize - `1`)/PtrByteSize) * PtrByteSize;
6371	}
6372
6373	unsigned NumBytesActuallyUsed = NumBytes;
6374
6375	// In the old ELFv1 ABI,
6376	// the prolog code of the callee may store up to 8 GPR argument registers to
6377	// the stack, allowing va_start to index over them in memory if its varargs.
6378	// Because we cannot tell if this is needed on the caller side, we have to
6379	// conservatively assume that it is needed. As such, make sure we have at
6380	// least enough stack space for the caller to store the 8 GPRs.
6381	// In the ELFv2 ABI, we allocate the parameter area iff a callee
6382	// really requires memory operands, e.g. a vararg function.
6383	if (HasParameterArea)
6384	NumBytes = std::max(a: NumBytes, b: LinkageSize + `8` * PtrByteSize);
6385	else
6386	NumBytes = LinkageSize;
6387
6388	// Tail call needs the stack to be aligned.
6389	if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6390	NumBytes = EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes);
6391
6392	int SPDiff = `0`;
6393
6394	// Calculate by how many bytes the stack has to be adjusted in case of tail
6395	// call optimization.
6396	if (!IsSibCall)
6397	SPDiff = CalculateTailCallSPDiff(DAG, isTailCall: CFlags.IsTailCall, ParamSize: NumBytes);
6398
6399	// To protect arguments on the stack from being clobbered in a tail call,
6400	// force all the loads to happen before doing any other lowering.
6401	if (CFlags.IsTailCall)
6402	Chain = DAG.getStackArgumentTokenFactor(Chain);
6403
6404	// Adjust the stack pointer for the new arguments...
6405	// These operations are automatically eliminated by the prolog/epilog pass
6406	if (!IsSibCall)
6407	Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: `0`, DL: dl);
6408	SDValue CallSeqStart = Chain;
6409
6410	// Load the return address and frame pointer so it can be move somewhere else
6411	// later.
6412	SDValue LROp, FPOp;
6413	Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROpOut&: LROp, FPOpOut&: FPOp, dl);
6414
6415	// Set up a copy of the stack pointer for use loading and storing any
6416	// arguments that may not fit in the registers available for argument
6417	// passing.
6418	SDValue StackPtr = DAG.getRegister(Reg: PPC::X1, VT: MVT::i64);
6419
6420	// Figure out which arguments are going to go in registers, and which in
6421	// memory. Also, if this is a vararg function, floating point operations
6422	// must be stored to our stack, and loaded into integer regs as well, if
6423	// any integer regs are available for argument passing.
6424	unsigned ArgOffset = LinkageSize;
6425
6426	SmallVector<std::pair<unsigned, SDValue>, `8`> RegsToPass;
6427	SmallVector<TailCallArgumentInfo, `8`> TailCallArguments;
6428
6429	SmallVector<SDValue, `8`> MemOpChains;
6430	for (unsigned i = `0`; i != NumOps; ++i) {
6431	SDValue Arg = OutVals [i];
6432	ISD::ArgFlagsTy Flags = Outs [i].Flags;
6433	EVT ArgVT = Outs [i].VT;
6434	EVT OrigVT = Outs [i].ArgVT;
6435
6436	// PtrOff will be used to store the current argument to the stack if a
6437	// register cannot be found for it.
6438	SDValue PtrOff;
6439
6440	// We re-align the argument offset for each argument, except when using the
6441	// fast calling convention, when we need to make sure we do that only when
6442	// we'll actually use a stack slot.
6443	auto ComputePtrOff = [&]() {
6444	/ Respect alignment of argument on the stack. /
6445	auto Alignment =
6446	CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6447	ArgOffset = alignTo(Size: ArgOffset, A: Alignment);
6448
6449	PtrOff = DAG.getConstant(Val: ArgOffset, DL: dl, VT: StackPtr.getValueType());
6450
6451	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
6452	};
6453
6454	if (!IsFastCall) {
6455	ComputePtrOff ();
6456
6457	/ Compute GPR index associated with argument offset. /
6458	GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6459	GPR_idx = std::min(a: GPR_idx, b: NumGPRs);
6460	}
6461
6462	// Promote integers to 64-bit values.
6463	if (Arg.getValueType() == MVT::i32 \|\| Arg.getValueType() == MVT::i1) {
6464	// FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6465	unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6466	Arg = DAG.getNode(Opcode: ExtOp, DL: dl, VT: MVT::i64, Operand: Arg);
6467	}
6468
6469	// FIXME memcpy is used way more than necessary. Correctness first.
6470	// Note: "by value" is code for passing a structure by value, not
6471	// basic types.
6472	if (Flags.isByVal()) {
6473	// Note: Size includes alignment padding, so
6474	// struct x { short a; char b; }
6475	// will have Size = 4. With #pragma pack(1), it will have Size = 3.
6476	// These are the proper values we need for right-justifying the
6477	// aggregate in a parameter register.
6478	unsigned Size = Flags.getByValSize();
6479
6480	// An empty aggregate parameter takes up no storage and no
6481	// registers.
6482	if (Size == `0`)
6483	continue;
6484
6485	if (IsFastCall)
6486	ComputePtrOff ();
6487
6488	// All aggregates smaller than 8 bytes must be passed right-justified.
6489	if (Size==`1` \|\| Size==`2` \|\| Size==`4`) {
6490	EVT VT = (Size==`1`) ? MVT::i8 : ((Size==`2`) ? MVT::i16 : MVT::i32);
6491	if (GPR_idx != NumGPRs) {
6492	SDValue Load = DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: PtrVT, Chain, Ptr: Arg,
6493	PtrInfo: MachinePointerInfo (), MemVT: VT);
6494	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
6495	RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6496
6497	ArgOffset += PtrByteSize;
6498	continue;
6499	}
6500	}
6501
6502	if (GPR_idx == NumGPRs && Size < `8`) {
6503	SDValue AddPtr = PtrOff;
6504	if (!isLittleEndian) {
6505	SDValue Const = DAG.getConstant(Val: PtrByteSize - Size, DL: dl,
6506	VT: PtrOff.getValueType());
6507	AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff, N2: Const);
6508	}
6509	Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff: AddPtr,
6510	CallSeqStart,
6511	Flags, DAG, dl);
6512	ArgOffset += PtrByteSize;
6513	continue;
6514	}
6515	// Copy the object to parameter save area if it can not be entirely passed
6516	// by registers.
6517	// FIXME: we only need to copy the parts which need to be passed in
6518	// parameter save area. For the parts passed by registers, we don't need
6519	// to copy them to the stack although we need to allocate space for them
6520	// in parameter save area.
6521	if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6522	Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6523	CallSeqStart,
6524	Flags, DAG, dl);
6525
6526	// When a register is available, pass a small aggregate right-justified.
6527	if (Size < `8` && GPR_idx != NumGPRs) {
6528	// The easiest way to get this right-justified in a register
6529	// is to copy the structure into the rightmost portion of a
6530	// local variable slot, then load the whole slot into the
6531	// register.
6532	// FIXME: The memcpy seems to produce pretty awful code for
6533	// small aggregates, particularly for packed ones.
6534	// FIXME: It would be preferable to use the slot in the
6535	// parameter save area instead of a new local variable.
6536	SDValue AddPtr = PtrOff;
6537	if (!isLittleEndian) {
6538	SDValue Const = DAG.getConstant(Val: `8` - Size, DL: dl, VT: PtrOff.getValueType());
6539	AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff, N2: Const);
6540	}
6541	Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff: AddPtr,
6542	CallSeqStart,
6543	Flags, DAG, dl);
6544
6545	// Load the slot into the register.
6546	SDValue Load =
6547	DAG.getLoad(VT: PtrVT, dl, Chain, Ptr: PtrOff, PtrInfo: MachinePointerInfo ());
6548	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
6549	RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6550
6551	// Done with this argument.
6552	ArgOffset += PtrByteSize;
6553	continue;
6554	}
6555
6556	// For aggregates larger than PtrByteSize, copy the pieces of the
6557	// object that fit into registers from the parameter save area.
6558	for (unsigned j=`0`; j<Size; j+=PtrByteSize) {
6559	SDValue Const = DAG.getConstant(Val: j, DL: dl, VT: PtrOff.getValueType());
6560	SDValue AddArg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: Arg, N2: Const);
6561	if (GPR_idx != NumGPRs) {
6562	unsigned LoadSizeInBits = std::min(a: PtrByteSize, b: (Size - j)) * `8`;
6563	EVT ObjType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: LoadSizeInBits);
6564	SDValue Load = DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: PtrVT, Chain, Ptr: AddArg,
6565	PtrInfo: MachinePointerInfo (), MemVT: ObjType);
6566
6567	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
6568	RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6569	ArgOffset += PtrByteSize;
6570	} else {
6571	ArgOffset += ((Size - j + PtrByteSize-`1`)/PtrByteSize)*PtrByteSize;
6572	break;
6573	}
6574	}
6575	continue;
6576	}
6577
6578	switch (Arg.getSimpleValueType().SimpleTy) {
6579	default: llvm_unreachable("Unexpected ValueType for argument!");
6580	case MVT::i1:
6581	case MVT::i32:
6582	case MVT::i64:
6583	if (Flags.isNest()) {
6584	// The 'nest' parameter, if any, is passed in R11.
6585	RegsToPass.push_back(Elt: std::make_pair(x: PPC::X11, y&: Arg));
6586	break;
6587	}
6588
6589	// These can be scalar arguments or elements of an integer array type
6590	// passed directly. Clang may use those instead of "byval" aggregate
6591	// types to avoid forcing arguments to memory unnecessarily.
6592	if (GPR_idx != NumGPRs) {
6593	RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Arg));
6594	} else {
6595	if (IsFastCall)
6596	ComputePtrOff ();
6597
6598	assert(HasParameterArea &&
6599	"Parameter area must exist to pass an argument in memory.");
6600	LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6601	isPPC64: true, isTailCall: CFlags.IsTailCall, isVector: false, MemOpChains,
6602	TailCallArguments, dl);
6603	if (IsFastCall)
6604	ArgOffset += PtrByteSize;
6605	}
6606	if (!IsFastCall)
6607	ArgOffset += PtrByteSize;
6608	break;
6609	case MVT::f32:
6610	case MVT::f64: {
6611	// These can be scalar arguments or elements of a float array type
6612	// passed directly. The latter are used to implement ELFv2 homogenous
6613	// float aggregates.
6614
6615	// Named arguments go into FPRs first, and once they overflow, the
6616	// remaining arguments go into GPRs and then the parameter save area.
6617	// Unnamed arguments for vararg functions always go to GPRs and
6618	// then the parameter save area. For now, put all arguments to vararg
6619	// routines always in both locations (FPR and* GPR or stack slot).*
6620	bool NeedGPROrStack = CFlags.IsVarArg \|\| FPR_idx == NumFPRs;
6621	bool NeededLoad = false;
6622
6623	// First load the argument into the next available FPR.
6624	if (FPR_idx != NumFPRs)
6625	RegsToPass.push_back(Elt: std::make_pair(x: FPR[FPR_idx++], y&: Arg));
6626
6627	// Next, load the argument into GPR or stack slot if needed.
6628	if (!NeedGPROrStack)
6629	;
6630	else if (GPR_idx != NumGPRs && !IsFastCall) {
6631	// FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6632	// once we support fp <-> gpr moves.
6633
6634	// In the non-vararg case, this can only ever happen in the
6635	// presence of f32 array types, since otherwise we never run
6636	// out of FPRs before running out of GPRs.
6637	SDValue ArgVal;
6638
6639	// Double values are always passed in a single GPR.
6640	if (Arg.getValueType() != MVT::f32) {
6641	ArgVal = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i64, Operand: Arg);
6642
6643	// Non-array float values are extended and passed in a GPR.
6644	} else if (!Flags.isInConsecutiveRegs()) {
6645	ArgVal = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: Arg);
6646	ArgVal = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i64, Operand: ArgVal);
6647
6648	// If we have an array of floats, we collect every odd element
6649	// together with its predecessor into one GPR.
6650	} else if (ArgOffset % PtrByteSize != `0`) {
6651	SDValue Lo, Hi;
6652	Lo = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: OutVals [i - `1`]);
6653	Hi = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: Arg);
6654	if (!isLittleEndian)
6655	std::swap(a&: Lo, b&: Hi);
6656	ArgVal = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::i64, N1: Lo, N2: Hi);
6657
6658	// The final element, if even, goes into the first half of a GPR.
6659	} else if (Flags.isInConsecutiveRegsLast()) {
6660	ArgVal = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: Arg);
6661	ArgVal = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i64, Operand: ArgVal);
6662	if (!isLittleEndian)
6663	ArgVal = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i64, N1: ArgVal,
6664	N2: DAG.getConstant(Val: `32`, DL: dl, VT: MVT::i32));
6665
6666	// Non-final even elements are skipped; they will be handled
6667	// together the with subsequent argument on the next go-around.
6668	} else
6669	ArgVal = SDValue ();
6670
6671	if (ArgVal.getNode())
6672	RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: ArgVal));
6673	} else {
6674	if (IsFastCall)
6675	ComputePtrOff ();
6676
6677	// Single-precision floating-point values are mapped to the
6678	// second (rightmost) word of the stack doubleword.
6679	if (Arg.getValueType() == MVT::f32 &&
6680	!isLittleEndian && !Flags.isInConsecutiveRegs()) {
6681	SDValue ConstFour = DAG.getConstant(Val: `4`, DL: dl, VT: PtrOff.getValueType());
6682	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff, N2: ConstFour);
6683	}
6684
6685	assert(HasParameterArea &&
6686	"Parameter area must exist to pass an argument in memory.");
6687	LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6688	isPPC64: true, isTailCall: CFlags.IsTailCall, isVector: false, MemOpChains,
6689	TailCallArguments, dl);
6690
6691	NeededLoad = true;
6692	}
6693	// When passing an array of floats, the array occupies consecutive
6694	// space in the argument area; only round up to the next doubleword
6695	// at the end of the array. Otherwise, each float takes 8 bytes.
6696	if (!IsFastCall \|\| NeededLoad) {
6697	ArgOffset += (Arg.getValueType() == MVT::f32 &&
6698	Flags.isInConsecutiveRegs()) ? `4` : `8`;
6699	if (Flags.isInConsecutiveRegsLast())
6700	ArgOffset = ((ArgOffset + PtrByteSize - `1`)/PtrByteSize) * PtrByteSize;
6701	}
6702	break;
6703	}
6704	case MVT::v4f32:
6705	case MVT::v4i32:
6706	case MVT::v8i16:
6707	case MVT::v16i8:
6708	case MVT::v2f64:
6709	case MVT::v2i64:
6710	case MVT::v1i128:
6711	case MVT::f128:
6712	// These can be scalar arguments or elements of a vector array type
6713	// passed directly. The latter are used to implement ELFv2 homogenous
6714	// vector aggregates.
6715
6716	// For a varargs call, named arguments go into VRs or on the stack as
6717	// usual; unnamed arguments always go to the stack or the corresponding
6718	// GPRs when within range. For now, we always put the value in both
6719	// locations (or even all three).
6720	if (CFlags.IsVarArg) {
6721	assert(HasParameterArea &&
6722	"Parameter area must exist if we have a varargs call.");
6723	// We could elide this store in the case where the object fits
6724	// entirely in R registers. Maybe later.
6725	SDValue Store =
6726	DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo ());
6727	MemOpChains.push_back(Elt: Store);
6728	if (VR_idx != NumVRs) {
6729	SDValue Load =
6730	DAG.getLoad(VT: MVT::v4f32, dl, Chain: Store, Ptr: PtrOff, PtrInfo: MachinePointerInfo ());
6731	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
6732	RegsToPass.push_back(Elt: std::make_pair(x: VR[VR_idx++], y&: Load));
6733	}
6734	ArgOffset += `16`;
6735	for (unsigned i=`0`; i<`16`; i+=PtrByteSize) {
6736	if (GPR_idx == NumGPRs)
6737	break;
6738	SDValue Ix = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff,
6739	N2: DAG.getConstant(Val: i, DL: dl, VT: PtrVT));
6740	SDValue Load =
6741	DAG.getLoad(VT: PtrVT, dl, Chain: Store, Ptr: Ix, PtrInfo: MachinePointerInfo ());
6742	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
6743	RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6744	}
6745	break;
6746	}
6747
6748	// Non-varargs Altivec params go into VRs or on the stack.
6749	if (VR_idx != NumVRs) {
6750	RegsToPass.push_back(Elt: std::make_pair(x: VR[VR_idx++], y&: Arg));
6751	} else {
6752	if (IsFastCall)
6753	ComputePtrOff ();
6754
6755	assert(HasParameterArea &&
6756	"Parameter area must exist to pass an argument in memory.");
6757	LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6758	isPPC64: true, isTailCall: CFlags.IsTailCall, isVector: true, MemOpChains,
6759	TailCallArguments, dl);
6760	if (IsFastCall)
6761	ArgOffset += `16`;
6762	}
6763
6764	if (!IsFastCall)
6765	ArgOffset += `16`;
6766	break;
6767	}
6768	}
6769
6770	assert((!HasParameterArea \|\| NumBytesActuallyUsed == ArgOffset) &&
6771	"mismatch in size of parameter area");
6772	(void)NumBytesActuallyUsed;
6773
6774	if (!MemOpChains.empty())
6775	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains);
6776
6777	// Check if this is an indirect call (MTCTR/BCTRL).
6778	// See prepareDescriptorIndirectCall and buildCallOperands for more
6779	// information about calls through function pointers in the 64-bit SVR4 ABI.
6780	if (CFlags.IsIndirect) {
6781	// For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6782	// caller in the TOC save area.
6783	if (isTOCSaveRestoreRequired(Subtarget)) {
6784	assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6785	// Load r2 into a virtual register and store it to the TOC save area.
6786	setUsesTOCBasePtr(DAG);
6787	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: PPC::X2, VT: MVT::i64);
6788	// TOC save area offset.
6789	unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6790	SDValue PtrOff = DAG.getIntPtrConstant(Val: TOCSaveOffset, DL: dl);
6791	SDValue AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
6792	Chain = DAG.getStore(Chain: Val.getValue(R: `1`), dl, Val, Ptr: AddPtr,
6793	PtrInfo: MachinePointerInfo::getStack(
6794	MF&: DAG.getMachineFunction(), Offset: TOCSaveOffset));
6795	}
6796	// In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6797	// This does not mean the MTCTR instruction must use R12; it's easier
6798	// to model this as an extra parameter, so do that.
6799	if (isELFv2ABI && !CFlags.IsPatchPoint)
6800	RegsToPass.push_back(Elt: std::make_pair(x: (unsigned)PPC::X12, y&: Callee));
6801	}
6802
6803	// Build a sequence of copy-to-reg nodes chained together with token chain
6804	// and flag operands which copy the outgoing args into the appropriate regs.
6805	SDValue InGlue;
6806	for (const auto &[Reg, N] : RegsToPass) {
6807	Chain = DAG.getCopyToReg(Chain, dl, Reg, N, Glue: InGlue);
6808	InGlue = Chain.getValue(R: `1`);
6809	}
6810
6811	if (CFlags.IsTailCall && !IsSibCall)
6812	PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6813	TailCallArguments);
6814
6815	return FinishCall(CFlags, dl, DAG, RegsToPass, Glue: InGlue, Chain, CallSeqStart,
6816	Callee, SPDiff, NumBytes, Ins, InVals, CB);
6817	}
6818
6819	// Returns true when the shadow of a general purpose argument register
6820	// in the parameter save area is aligned to at least 'RequiredAlign'.
6821	static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6822	assert(RequiredAlign.value() <= `16` &&
6823	"Required alignment greater than stack alignment.");
6824	switch (Reg) {
6825	default:
6826	report_fatal_error(reason: "called on invalid register.");
6827	case PPC::R5:
6828	case PPC::R9:
6829	case PPC::X3:
6830	case PPC::X5:
6831	case PPC::X7:
6832	case PPC::X9:
6833	// These registers are 16 byte aligned which is the most strict aligment
6834	// we can support.
6835	return true;
6836	case PPC::R3:
6837	case PPC::R7:
6838	case PPC::X4:
6839	case PPC::X6:
6840	case PPC::X8:
6841	case PPC::X10:
6842	// The shadow of these registers in the PSA is 8 byte aligned.
6843	return RequiredAlign <= `8`;
6844	case PPC::R4:
6845	case PPC::R6:
6846	case PPC::R8:
6847	case PPC::R10:
6848	return RequiredAlign <= `4`;
6849	}
6850	}
6851
6852	static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6853	CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6854	CCState &S) {
6855	AIXCCState &State = static_cast<AIXCCState &>(S);
6856	const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6857	State.getMachineFunction().getSubtarget());
6858	const bool IsPPC64 = Subtarget.isPPC64();
6859	const unsigned PtrSize = IsPPC64 ? `8` : `4`;
6860	const Align PtrAlign(PtrSize);
6861	const Align StackAlign(`16`);
6862	const MVT RegVT = Subtarget.getScalarIntVT();
6863
6864	if (ValVT == MVT::f128)
6865	report_fatal_error(reason: "f128 is unimplemented on AIX.");
6866
6867	if (ArgFlags.isNest())
6868	report_fatal_error(reason: "Nest arguments are unimplemented.");
6869
6870	static const MCPhysReg GPR_32[] = {// 32-bit registers.
6871	PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6872	PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6873	static const MCPhysReg GPR_64[] = {// 64-bit registers.
6874	PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6875	PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6876
6877	static const MCPhysReg VR[] = {// Vector registers.
6878	PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6879	PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6880	PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6881
6882	const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6883
6884	if (ArgFlags.isByVal()) {
6885	const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6886	if (ByValAlign > StackAlign)
6887	report_fatal_error(reason: "Pass-by-value arguments with alignment greater than "
6888	"16 are not supported.");
6889
6890	const unsigned ByValSize = ArgFlags.getByValSize();
6891	const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6892
6893	// An empty aggregate parameter takes up no storage and no registers,
6894	// but needs a MemLoc for a stack slot for the formal arguments side.
6895	if (ByValSize == `0`) {
6896	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT: MVT::INVALID_SIMPLE_VALUE_TYPE,
6897	Offset: State.getStackSize(), LocVT: RegVT, HTP: LocInfo));
6898	return false;
6899	}
6900
6901	// Shadow allocate any registers that are not properly aligned.
6902	unsigned NextReg = State.getFirstUnallocated(Regs: GPRs);
6903	while (NextReg != GPRs.size() &&
6904	!isGPRShadowAligned(Reg: GPRs [NextReg], RequiredAlign: ObjAlign)) {
6905	// Shadow allocate next registers since its aligment is not strict enough.
6906	MCRegister Reg = State.AllocateReg(Regs: GPRs);
6907	// Allocate the stack space shadowed by said register.
6908	State.AllocateStack(Size: PtrSize, Alignment: PtrAlign);
6909	assert(Reg && "Alocating register unexpectedly failed.");
6910	(void)Reg;
6911	NextReg = State.getFirstUnallocated(Regs: GPRs);
6912	}
6913
6914	const unsigned StackSize = alignTo(Size: ByValSize, A: ObjAlign);
6915	unsigned Offset = State.AllocateStack(Size: StackSize, Alignment: ObjAlign);
6916	for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6917	if (MCRegister Reg = State.AllocateReg(Regs: GPRs))
6918	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT: RegVT, HTP: LocInfo));
6919	else {
6920	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT: MVT::INVALID_SIMPLE_VALUE_TYPE,
6921	Offset, LocVT: MVT::INVALID_SIMPLE_VALUE_TYPE,
6922	HTP: LocInfo));
6923	break;
6924	}
6925	}
6926	return false;
6927	}
6928
6929	// Arguments always reserve parameter save area.
6930	switch (ValVT.SimpleTy) {
6931	default:
6932	report_fatal_error(reason: "Unhandled value type for argument.");
6933	case MVT::i64:
6934	// i64 arguments should have been split to i32 for PPC32.
6935	assert(IsPPC64 && "PPC32 should have split i64 values.");
6936	[[fallthrough]];
6937	case MVT::i1:
6938	case MVT::i32: {
6939	const unsigned Offset = State.AllocateStack(Size: PtrSize, Alignment: PtrAlign);
6940	// AIX integer arguments are always passed in register width.
6941	if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6942	LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6943	: CCValAssign::LocInfo::ZExt;
6944	if (MCRegister Reg = State.AllocateReg(Regs: GPRs))
6945	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT: RegVT, HTP: LocInfo));
6946	else
6947	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT: RegVT, HTP: LocInfo));
6948
6949	return false;
6950	}
6951	case MVT::f32:
6952	case MVT::f64: {
6953	// Parameter save area (PSA) is reserved even if the float passes in fpr.
6954	const unsigned StoreSize = LocVT.getStoreSize();
6955	// Floats are always 4-byte aligned in the PSA on AIX.
6956	// This includes f64 in 64-bit mode for ABI compatibility.
6957	const unsigned Offset =
6958	State.AllocateStack(Size: IsPPC64 ? `8` : StoreSize, Alignment: Align (`4`));
6959	MCRegister FReg = State.AllocateReg(Regs: FPR);
6960	if (FReg)
6961	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg: FReg, LocVT, HTP: LocInfo));
6962
6963	// Reserve and initialize GPRs or initialize the PSA as required.
6964	for (unsigned I = `0`; I < StoreSize; I += PtrSize) {
6965	if (MCRegister Reg = State.AllocateReg(Regs: GPRs)) {
6966	assert(FReg && "An FPR should be available when a GPR is reserved.");
6967	if (State.isVarArg()) {
6968	// Successfully reserved GPRs are only initialized for vararg calls.
6969	// Custom handling is required for:
6970	// f64 in PPC32 needs to be split into 2 GPRs.
6971	// f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6972	State.addLoc(
6973	V: CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT: RegVT, HTP: LocInfo));
6974	}
6975	} else {
6976	// If there are insufficient GPRs, the PSA needs to be initialized.
6977	// Initialization occurs even if an FPR was initialized for
6978	// compatibility with the AIX XL compiler. The full memory for the
6979	// argument will be initialized even if a prior word is saved in GPR.
6980	// A custom memLoc is used when the argument also passes in FPR so
6981	// that the callee handling can skip over it easily.
6982	State.addLoc(
6983	V: FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6984	HTP: LocInfo)
6985	: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6986	break;
6987	}
6988	}
6989
6990	return false;
6991	}
6992	case MVT::v4f32:
6993	case MVT::v4i32:
6994	case MVT::v8i16:
6995	case MVT::v16i8:
6996	case MVT::v2i64:
6997	case MVT::v2f64:
6998	case MVT::v1i128: {
6999	const unsigned VecSize = `16`;
7000	const Align VecAlign(VecSize);
7001
7002	if (!State.isVarArg()) {
7003	// If there are vector registers remaining we don't consume any stack
7004	// space.
7005	if (MCRegister VReg = State.AllocateReg(Regs: VR)) {
7006	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg: VReg, LocVT, HTP: LocInfo));
7007	return false;
7008	}
7009	// Vectors passed on the stack do not shadow GPRs or FPRs even though they
7010	// might be allocated in the portion of the PSA that is shadowed by the
7011	// GPRs.
7012	const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
7013	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
7014	return false;
7015	}
7016
7017	unsigned NextRegIndex = State.getFirstUnallocated(Regs: GPRs);
7018	// Burn any underaligned registers and their shadowed stack space until
7019	// we reach the required alignment.
7020	while (NextRegIndex != GPRs.size() &&
7021	!isGPRShadowAligned(Reg: GPRs [NextRegIndex], RequiredAlign: VecAlign)) {
7022	// Shadow allocate register and its stack shadow.
7023	MCRegister Reg = State.AllocateReg(Regs: GPRs);
7024	State.AllocateStack(Size: PtrSize, Alignment: PtrAlign);
7025	assert(Reg && "Allocating register unexpectedly failed.");
7026	(void)Reg;
7027	NextRegIndex = State.getFirstUnallocated(Regs: GPRs);
7028	}
7029
7030	// Vectors that are passed as fixed arguments are handled differently.
7031	// They are passed in VRs if any are available (unlike arguments passed
7032	// through ellipses) and shadow GPRs (unlike arguments to non-vaarg
7033	// functions)
7034	if (State.isFixed(ValNo)) {
7035	if (MCRegister VReg = State.AllocateReg(Regs: VR)) {
7036	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg: VReg, LocVT, HTP: LocInfo));
7037	// Shadow allocate GPRs and stack space even though we pass in a VR.
7038	for (unsigned I = `0`; I != VecSize; I += PtrSize)
7039	State.AllocateReg(Regs: GPRs);
7040	State.AllocateStack(Size: VecSize, Alignment: VecAlign);
7041	return false;
7042	}
7043	// No vector registers remain so pass on the stack.
7044	const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
7045	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
7046	return false;
7047	}
7048
7049	// If all GPRS are consumed then we pass the argument fully on the stack.
7050	if (NextRegIndex == GPRs.size()) {
7051	const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
7052	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
7053	return false;
7054	}
7055
7056	// Corner case for 32-bit codegen. We have 2 registers to pass the first
7057	// half of the argument, and then need to pass the remaining half on the
7058	// stack.
7059	if (GPRs [NextRegIndex] == PPC::R9) {
7060	const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
7061	State.addLoc(
7062	V: CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
7063
7064	const MCRegister FirstReg = State.AllocateReg(Reg: PPC::R9);
7065	const MCRegister SecondReg = State.AllocateReg(Reg: PPC::R10);
7066	assert(FirstReg && SecondReg &&
7067	"Allocating R9 or R10 unexpectedly failed.");
7068	State.addLoc(
7069	V: CCValAssign::getCustomReg(ValNo, ValVT, Reg: FirstReg, LocVT: RegVT, HTP: LocInfo));
7070	State.addLoc(
7071	V: CCValAssign::getCustomReg(ValNo, ValVT, Reg: SecondReg, LocVT: RegVT, HTP: LocInfo));
7072	return false;
7073	}
7074
7075	// We have enough GPRs to fully pass the vector argument, and we have
7076	// already consumed any underaligned registers. Start with the custom
7077	// MemLoc and then the custom RegLocs.
7078	const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
7079	State.addLoc(
7080	V: CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
7081	for (unsigned I = `0`; I != VecSize; I += PtrSize) {
7082	const MCRegister Reg = State.AllocateReg(Regs: GPRs);
7083	assert(Reg && "Failed to allocated register for vararg vector argument");
7084	State.addLoc(
7085	V: CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT: RegVT, HTP: LocInfo));
7086	}
7087	return false;
7088	}
7089	}
7090	return true;
7091	}
7092
7093	// So far, this function is only used by LowerFormalArguments_AIX()
7094	static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
7095	bool IsPPC64,
7096	bool HasP8Vector,
7097	bool HasVSX) {
7098	assert((IsPPC64 \|\| SVT != MVT::i64) &&
7099	"i64 should have been split for 32-bit codegen.");
7100
7101	switch (SVT) {
7102	default:
7103	report_fatal_error(reason: "Unexpected value type for formal argument");
7104	case MVT::i1:
7105	case MVT::i32:
7106	case MVT::i64:
7107	return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7108	case MVT::f32:
7109	return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7110	case MVT::f64:
7111	return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7112	case MVT::v4f32:
7113	case MVT::v4i32:
7114	case MVT::v8i16:
7115	case MVT::v16i8:
7116	case MVT::v2i64:
7117	case MVT::v2f64:
7118	case MVT::v1i128:
7119	return &PPC::VRRCRegClass;
7120	}
7121	}
7122
7123	static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
7124	SelectionDAG &DAG, SDValue ArgValue,
7125	MVT LocVT, const SDLoc &dl) {
7126	assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7127	assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7128
7129	if (Flags.isSExt())
7130	ArgValue = DAG.getNode(Opcode: ISD::AssertSext, DL: dl, VT: LocVT, N1: ArgValue,
7131	N2: DAG.getValueType(ValVT));
7132	else if (Flags.isZExt())
7133	ArgValue = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: LocVT, N1: ArgValue,
7134	N2: DAG.getValueType(ValVT));
7135
7136	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: ValVT, Operand: ArgValue);
7137	}
7138
7139	static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7140	const unsigned LASize = FL->getLinkageSize();
7141
7142	if (PPC::GPRCRegClass.contains(Reg)) {
7143	assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7144	"Reg must be a valid argument register!");
7145	return LASize + `4` * (Reg - PPC::R3);
7146	}
7147
7148	if (PPC::G8RCRegClass.contains(Reg)) {
7149	assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7150	"Reg must be a valid argument register!");
7151	return LASize + `8` * (Reg - PPC::X3);
7152	}
7153
7154	llvm_unreachable("Only general purpose registers expected.");
7155	}
7156
7157	// AIX ABI Stack Frame Layout:
7158	//
7159	// Low Memory +--------------------------------------------+
7160	// SP +---> \| Back chain \| ---+
7161	// \| +--------------------------------------------+ \|
7162	// \| \| Saved Condition Register \| \|
7163	// \| +--------------------------------------------+ \|
7164	// \| \| Saved Linkage Register \| \|
7165	// \| +--------------------------------------------+ \| Linkage Area
7166	// \| \| Reserved for compilers \| \|
7167	// \| +--------------------------------------------+ \|
7168	// \| \| Reserved for binders \| \|
7169	// \| +--------------------------------------------+ \|
7170	// \| \| Saved TOC pointer \| ---+
7171	// \| +--------------------------------------------+
7172	// \| \| Parameter save area \|
7173	// \| +--------------------------------------------+
7174	// \| \| Alloca space \|
7175	// \| +--------------------------------------------+
7176	// \| \| Local variable space \|
7177	// \| +--------------------------------------------+
7178	// \| \| Float/int conversion temporary \|
7179	// \| +--------------------------------------------+
7180	// \| \| Save area for AltiVec registers \|
7181	// \| +--------------------------------------------+
7182	// \| \| AltiVec alignment padding \|
7183	// \| +--------------------------------------------+
7184	// \| \| Save area for VRSAVE register \|
7185	// \| +--------------------------------------------+
7186	// \| \| Save area for General Purpose registers \|
7187	// \| +--------------------------------------------+
7188	// \| \| Save area for Floating Point registers \|
7189	// \| +--------------------------------------------+
7190	// +---- \| Back chain \|
7191	// High Memory +--------------------------------------------+
7192	//
7193	// Specifications:
7194	// AIX 7.2 Assembler Language Reference
7195	// Subroutine linkage convention
7196
7197	SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7198	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7199	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7200	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7201
7202	assert((CallConv == CallingConv::C \|\| CallConv == CallingConv::Cold \|\|
7203	CallConv == CallingConv::Fast) &&
7204	"Unexpected calling convention!");
7205
7206	if (getTargetMachine().Options.GuaranteedTailCallOpt)
7207	report_fatal_error(reason: "Tail call support is unimplemented on AIX.");
7208
7209	if (useSoftFloat())
7210	report_fatal_error(reason: "Soft float support is unimplemented on AIX.");
7211
7212	const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7213
7214	const bool IsPPC64 = Subtarget.isPPC64();
7215	const unsigned PtrByteSize = IsPPC64 ? `8` : `4`;
7216
7217	// Assign locations to all of the incoming arguments.
7218	SmallVector<CCValAssign, `16`> ArgLocs;
7219	MachineFunction &MF = DAG.getMachineFunction();
7220	MachineFrameInfo &MFI = MF.getFrameInfo();
7221	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7222	AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7223
7224	const EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
7225	// Reserve space for the linkage area on the stack.
7226	const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7227	CCInfo.AllocateStack(Size: LinkageSize, Alignment: Align (PtrByteSize));
7228	uint64_t SaveStackPos = CCInfo.getStackSize();
7229	bool SaveParams = MF.getFunction().hasFnAttribute(Kind: "save-reg-params");
7230	CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_AIX);
7231
7232	SmallVector<SDValue, `8`> MemOps;
7233
7234	for (size_t I = `0`, End = ArgLocs.size(); I != End; / No increment here /) {
7235	CCValAssign &VA = ArgLocs [I++];
7236	MVT LocVT = VA.getLocVT();
7237	MVT ValVT = VA.getValVT();
7238	ISD::ArgFlagsTy Flags = Ins [VA.getValNo()].Flags;
7239
7240	EVT ArgVT = Ins [VA.getValNo()].ArgVT;
7241	bool ArgSignExt = Ins [VA.getValNo()].Flags.isSExt();
7242	// For compatibility with the AIX XL compiler, the float args in the
7243	// parameter save area are initialized even if the argument is available
7244	// in register. The caller is required to initialize both the register
7245	// and memory, however, the callee can choose to expect it in either.
7246	// The memloc is dismissed here because the argument is retrieved from
7247	// the register.
7248	if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7249	continue;
7250
7251	if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7252	const TargetRegisterClass *RegClass = getRegClassForSVT(
7253	SVT: LocVT.SimpleTy, IsPPC64, HasP8Vector: Subtarget.hasP8Vector(), HasVSX: Subtarget.hasVSX());
7254	// On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7255	MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7256	const Register VReg = MF.addLiveIn(PReg: VA.getLocReg(), RC: RegClass);
7257	SDValue Parm = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: SaveVT);
7258	int FI = MFI.CreateFixedObject(Size: SaveVT.getStoreSize(), SPOffset: SaveStackPos, IsImmutable: true);
7259	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7260	SDValue StoreReg = DAG.getStore(Chain, dl, Val: Parm, Ptr: FIN,
7261	PtrInfo: MachinePointerInfo (), Alignment: Align (PtrByteSize));
7262	SaveStackPos = alignTo(Value: SaveStackPos + SaveVT.getStoreSize(), Align: PtrByteSize);
7263	MemOps.push_back(Elt: StoreReg);
7264	}
7265
7266	if (SaveParams && (VA.isMemLoc() \|\| Flags.isByVal()) && !VA.needsCustom()) {
7267	unsigned StoreSize =
7268	Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7269	SaveStackPos = alignTo(Value: SaveStackPos + StoreSize, Align: PtrByteSize);
7270	}
7271
7272	auto HandleMemLoc = [&]() {
7273	const unsigned LocSize = LocVT.getStoreSize();
7274	const unsigned ValSize = ValVT.getStoreSize();
7275	assert((ValSize <= LocSize) &&
7276	"Object size is larger than size of MemLoc");
7277	int CurArgOffset = VA.getLocMemOffset();
7278	// Objects are right-justified because AIX is big-endian.
7279	if (LocSize > ValSize)
7280	CurArgOffset += LocSize - ValSize;
7281	// Potential tail calls could cause overwriting of argument stack slots.
7282	const bool IsImmutable =
7283	!(getTargetMachine().Options.GuaranteedTailCallOpt &&
7284	(CallConv == CallingConv::Fast));
7285	int FI = MFI.CreateFixedObject(Size: ValSize, SPOffset: CurArgOffset, IsImmutable);
7286	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7287	SDValue ArgValue =
7288	DAG.getLoad(VT: ValVT, dl, Chain, Ptr: FIN, PtrInfo: MachinePointerInfo ());
7289
7290	// While the ABI specifies the argument type is (sign or zero) extended
7291	// out to register width, not all code is compliant. We truncate and
7292	// re-extend to be more forgiving of these callers when the argument type
7293	// is smaller than register width.
7294	if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7295	ValVT.isInteger() &&
7296	ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7297	SDValue ArgValueTrunc = DAG.getNode(
7298	Opcode: ISD::TRUNCATE, DL: dl, VT: ArgVT.getSimpleVT() == MVT::i1 ? MVT::i8 : ArgVT,
7299	Operand: ArgValue);
7300	SDValue ArgValueExt =
7301	ArgSignExt ? DAG.getSExtOrTrunc(Op: ArgValueTrunc, DL: dl, VT: ValVT)
7302	: DAG.getZExtOrTrunc(Op: ArgValueTrunc, DL: dl, VT: ValVT);
7303	InVals.push_back(Elt: ArgValueExt);
7304	} else {
7305	InVals.push_back(Elt: ArgValue);
7306	}
7307	};
7308
7309	// Vector arguments to VaArg functions are passed both on the stack, and
7310	// in any available GPRs. Load the value from the stack and add the GPRs
7311	// as live ins.
7312	if (VA.isMemLoc() && VA.needsCustom()) {
7313	assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7314	assert(isVarArg && "Only use custom memloc for vararg.");
7315	// ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7316	// matching custom RegLocs.
7317	const unsigned OriginalValNo = VA.getValNo();
7318	(void)OriginalValNo;
7319
7320	auto HandleCustomVecRegLoc = [&]() {
7321	assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7322	"Missing custom RegLoc.");
7323	VA = ArgLocs [I++];
7324	assert(VA.getValVT().isVector() &&
7325	"Unexpected Val type for custom RegLoc.");
7326	assert(VA.getValNo() == OriginalValNo &&
7327	"ValNo mismatch between custom MemLoc and RegLoc.");
7328	MVT::SimpleValueType SVT = VA.getLocVT().SimpleTy;
7329	MF.addLiveIn(PReg: VA.getLocReg(),
7330	RC: getRegClassForSVT(SVT, IsPPC64, HasP8Vector: Subtarget.hasP8Vector(),
7331	HasVSX: Subtarget.hasVSX()));
7332	};
7333
7334	HandleMemLoc ();
7335	// In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7336	// in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7337	// R10.
7338	HandleCustomVecRegLoc ();
7339	HandleCustomVecRegLoc ();
7340
7341	// If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7342	// we passed the vector in R5, R6, R7 and R8.
7343	if (I != End && ArgLocs [I].isRegLoc() && ArgLocs [I].needsCustom()) {
7344	assert(!IsPPC64 &&
7345	"Only 2 custom RegLocs expected for 64-bit codegen.");
7346	HandleCustomVecRegLoc ();
7347	HandleCustomVecRegLoc ();
7348	}
7349
7350	continue;
7351	}
7352
7353	if (VA.isRegLoc()) {
7354	if (VA.getValVT().isScalarInteger())
7355	FuncInfo->appendParameterType(Type: PPCFunctionInfo::FixedType);
7356	else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7357	switch (VA.getValVT().SimpleTy) {
7358	default:
7359	report_fatal_error(reason: "Unhandled value type for argument.");
7360	case MVT::f32:
7361	FuncInfo->appendParameterType(Type: PPCFunctionInfo::ShortFloatingPoint);
7362	break;
7363	case MVT::f64:
7364	FuncInfo->appendParameterType(Type: PPCFunctionInfo::LongFloatingPoint);
7365	break;
7366	}
7367	} else if (VA.getValVT().isVector()) {
7368	switch (VA.getValVT().SimpleTy) {
7369	default:
7370	report_fatal_error(reason: "Unhandled value type for argument.");
7371	case MVT::v16i8:
7372	FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorChar);
7373	break;
7374	case MVT::v8i16:
7375	FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorShort);
7376	break;
7377	case MVT::v4i32:
7378	case MVT::v2i64:
7379	case MVT::v1i128:
7380	FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorInt);
7381	break;
7382	case MVT::v4f32:
7383	case MVT::v2f64:
7384	FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorFloat);
7385	break;
7386	}
7387	}
7388	}
7389
7390	if (Flags.isByVal() && VA.isMemLoc()) {
7391	const unsigned Size =
7392	alignTo(Value: Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7393	Align: PtrByteSize);
7394	const int FI = MF.getFrameInfo().CreateFixedObject(
7395	Size, SPOffset: VA.getLocMemOffset(), / IsImmutable / false,
7396	/ IsAliased / isAliased: true);
7397	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7398	InVals.push_back(Elt: FIN);
7399
7400	continue;
7401	}
7402
7403	if (Flags.isByVal()) {
7404	assert(VA.isRegLoc() && "MemLocs should already be handled.");
7405
7406	const MCPhysReg ArgReg = VA.getLocReg();
7407	const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7408
7409	const unsigned StackSize = alignTo(Value: Flags.getByValSize(), Align: PtrByteSize);
7410	const int FI = MF.getFrameInfo().CreateFixedObject(
7411	Size: StackSize, SPOffset: mapArgRegToOffsetAIX(Reg: ArgReg, FL), / IsImmutable / false,
7412	/ IsAliased / isAliased: true);
7413	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7414	InVals.push_back(Elt: FIN);
7415
7416	// Add live ins for all the RegLocs for the same ByVal.
7417	const TargetRegisterClass *RegClass =
7418	IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7419
7420	auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7421	unsigned Offset) {
7422	const Register VReg = MF.addLiveIn(PReg: PhysReg, RC: RegClass);
7423	// Since the callers side has left justified the aggregate in the
7424	// register, we can simply store the entire register into the stack
7425	// slot.
7426	SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: LocVT);
7427	// The store to the fixedstack object is needed becuase accessing a
7428	// field of the ByVal will use a gep and load. Ideally we will optimize
7429	// to extracting the value from the register directly, and elide the
7430	// stores when the arguments address is not taken, but that will need to
7431	// be future work.
7432	SDValue Store = DAG.getStore(
7433	Chain: CopyFrom.getValue(R: `1`), dl, Val: CopyFrom,
7434	Ptr: DAG.getObjectPtrOffset(SL: dl, Ptr: FIN, Offset: TypeSize::getFixed(ExactSize: Offset)),
7435	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI, Offset));
7436
7437	MemOps.push_back(Elt: Store);
7438	};
7439
7440	unsigned Offset = `0`;
7441	HandleRegLoc (VA.getLocReg(), Offset);
7442	Offset += PtrByteSize;
7443	for (; Offset != StackSize && ArgLocs [I].isRegLoc();
7444	Offset += PtrByteSize) {
7445	assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7446	"RegLocs should be for ByVal argument.");
7447
7448	const CCValAssign RL = ArgLocs [I++];
7449	HandleRegLoc (RL.getLocReg(), Offset);
7450	FuncInfo->appendParameterType(Type: PPCFunctionInfo::FixedType);
7451	}
7452
7453	if (Offset != StackSize) {
7454	assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7455	"Expected MemLoc for remaining bytes.");
7456	assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7457	// Consume the MemLoc.The InVal has already been emitted, so nothing
7458	// more needs to be done.
7459	++I;
7460	}
7461
7462	continue;
7463	}
7464
7465	if (VA.isRegLoc() && !VA.needsCustom()) {
7466	MVT::SimpleValueType SVT = ValVT.SimpleTy;
7467	Register VReg =
7468	MF.addLiveIn(PReg: VA.getLocReg(),
7469	RC: getRegClassForSVT(SVT, IsPPC64, HasP8Vector: Subtarget.hasP8Vector(),
7470	HasVSX: Subtarget.hasVSX()));
7471	SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: LocVT);
7472	if (ValVT.isScalarInteger() &&
7473	(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7474	ArgValue =
7475	truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7476	}
7477	InVals.push_back(Elt: ArgValue);
7478	continue;
7479	}
7480	if (VA.isMemLoc()) {
7481	HandleMemLoc ();
7482	continue;
7483	}
7484	}
7485
7486	// On AIX a minimum of 8 words is saved to the parameter save area.
7487	const unsigned MinParameterSaveArea = `8` * PtrByteSize;
7488	// Area that is at least reserved in the caller of this function.
7489	unsigned CallerReservedArea = std::max<unsigned>(
7490	a: CCInfo.getStackSize(), b: LinkageSize + MinParameterSaveArea);
7491
7492	// Set the size that is at least reserved in caller of this function. Tail
7493	// call optimized function's reserved stack space needs to be aligned so
7494	// that taking the difference between two stack areas will result in an
7495	// aligned stack.
7496	CallerReservedArea =
7497	EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes: CallerReservedArea);
7498	FuncInfo->setMinReservedArea(CallerReservedArea);
7499
7500	if (isVarArg) {
7501	FuncInfo->setVarArgsFrameIndex(
7502	MFI.CreateFixedObject(Size: PtrByteSize, SPOffset: CCInfo.getStackSize(), IsImmutable: true));
7503	SDValue FIN = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT);
7504
7505	static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7506	PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7507
7508	static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7509	PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7510	const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7511
7512	// The fixed integer arguments of a variadic function are stored to the
7513	// VarArgsFrameIndex on the stack so that they may be loaded by
7514	// dereferencing the result of va_next.
7515	for (unsigned GPRIndex =
7516	(CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7517	GPRIndex < NumGPArgRegs; ++GPRIndex) {
7518
7519	const Register VReg =
7520	IsPPC64 ? MF.addLiveIn(PReg: GPR_64[GPRIndex], RC: &PPC::G8RCRegClass)
7521	: MF.addLiveIn(PReg: GPR_32[GPRIndex], RC: &PPC::GPRCRegClass);
7522
7523	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
7524	SDValue Store =
7525	DAG.getStore(Chain: Val.getValue(R: `1`), dl, Val, Ptr: FIN, PtrInfo: MachinePointerInfo ());
7526	MemOps.push_back(Elt: Store);
7527	// Increment the address for the next argument to store.
7528	SDValue PtrOff = DAG.getConstant(Val: PtrByteSize, DL: dl, VT: PtrVT);
7529	FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
7530	}
7531	}
7532
7533	if (!MemOps.empty())
7534	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOps);
7535
7536	return Chain;
7537	}
7538
7539	SDValue PPCTargetLowering::LowerCall_AIX(
7540	SDValue Chain, SDValue Callee, CallFlags CFlags,
7541	const SmallVectorImpl<ISD::OutputArg> &Outs,
7542	const SmallVectorImpl<SDValue> &OutVals,
7543	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7544	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7545	const CallBase CB) const* {
7546	// See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7547	// AIX ABI stack frame layout.
7548
7549	assert((CFlags.CallConv == CallingConv::C \|\|
7550	CFlags.CallConv == CallingConv::Cold \|\|
7551	CFlags.CallConv == CallingConv::Fast) &&
7552	"Unexpected calling convention!");
7553
7554	if (CFlags.IsPatchPoint)
7555	report_fatal_error(reason: "This call type is unimplemented on AIX.");
7556
7557	const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7558
7559	MachineFunction &MF = DAG.getMachineFunction();
7560	SmallVector<CCValAssign, `16`> ArgLocs;
7561	AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7562	*DAG.getContext());
7563
7564	// Reserve space for the linkage save area (LSA) on the stack.
7565	// In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7566	// [SP][CR][LR][2 x reserved][TOC].
7567	// The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7568	const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7569	const bool IsPPC64 = Subtarget.isPPC64();
7570	const EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7571	const unsigned PtrByteSize = IsPPC64 ? `8` : `4`;
7572	CCInfo.AllocateStack(Size: LinkageSize, Alignment: Align (PtrByteSize));
7573	CCInfo.AnalyzeCallOperands(Outs, Fn: CC_AIX);
7574
7575	// The prolog code of the callee may store up to 8 GPR argument registers to
7576	// the stack, allowing va_start to index over them in memory if the callee
7577	// is variadic.
7578	// Because we cannot tell if this is needed on the caller side, we have to
7579	// conservatively assume that it is needed. As such, make sure we have at
7580	// least enough stack space for the caller to store the 8 GPRs.
7581	const unsigned MinParameterSaveAreaSize = `8` * PtrByteSize;
7582	const unsigned NumBytes = std::max<unsigned>(
7583	a: LinkageSize + MinParameterSaveAreaSize, b: CCInfo.getStackSize());
7584
7585	// Adjust the stack pointer for the new arguments...
7586	// These operations are automatically eliminated by the prolog/epilog pass.
7587	Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: `0`, DL: dl);
7588	SDValue CallSeqStart = Chain;
7589
7590	SmallVector<std::pair<unsigned, SDValue>, `8`> RegsToPass;
7591	SmallVector<SDValue, `8`> MemOpChains;
7592
7593	// Set up a copy of the stack pointer for loading and storing any
7594	// arguments that may not fit in the registers available for argument
7595	// passing.
7596	const SDValue StackPtr = IsPPC64 ? DAG.getRegister(Reg: PPC::X1, VT: MVT::i64)
7597	: DAG.getRegister(Reg: PPC::R1, VT: MVT::i32);
7598
7599	for (unsigned I = `0`, E = ArgLocs.size(); I != E;) {
7600	const unsigned ValNo = ArgLocs [I].getValNo();
7601	SDValue Arg = OutVals [ValNo];
7602	ISD::ArgFlagsTy Flags = Outs [ValNo].Flags;
7603
7604	if (Flags.isByVal()) {
7605	const unsigned ByValSize = Flags.getByValSize();
7606
7607	// Nothing to do for zero-sized ByVals on the caller side.
7608	if (!ByValSize) {
7609	++I;
7610	continue;
7611	}
7612
7613	auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7614	return DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT: PtrVT, Chain,
7615	Ptr: (LoadOffset != `0`)
7616	? DAG.getObjectPtrOffset(
7617	SL: dl, Ptr: Arg, Offset: TypeSize::getFixed(ExactSize: LoadOffset))
7618	: Arg,
7619	PtrInfo: MachinePointerInfo (), MemVT: VT);
7620	};
7621
7622	unsigned LoadOffset = `0`;
7623
7624	// Initialize registers, which are fully occupied by the by-val argument.
7625	while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs [I].isRegLoc()) {
7626	SDValue Load = GetLoad (PtrVT, LoadOffset);
7627	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
7628	LoadOffset += PtrByteSize;
7629	const CCValAssign &ByValVA = ArgLocs [I++];
7630	assert(ByValVA.getValNo() == ValNo &&
7631	"Unexpected location for pass-by-value argument.");
7632	RegsToPass.push_back(Elt: std::make_pair(x: ByValVA.getLocReg(), y&: Load));
7633	}
7634
7635	if (LoadOffset == ByValSize)
7636	continue;
7637
7638	// There must be one more loc to handle the remainder.
7639	assert(ArgLocs[I].getValNo() == ValNo &&
7640	"Expected additional location for by-value argument.");
7641
7642	if (ArgLocs [I].isMemLoc()) {
7643	assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7644	const CCValAssign &ByValVA = ArgLocs [I++];
7645	ISD::ArgFlagsTy MemcpyFlags = Flags;
7646	// Only memcpy the bytes that don't pass in register.
7647	MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7648	Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7649	Arg: (LoadOffset != `0`) ? DAG.getObjectPtrOffset(
7650	SL: dl, Ptr: Arg, Offset: TypeSize::getFixed(ExactSize: LoadOffset))
7651	: Arg,
7652	PtrOff: DAG.getObjectPtrOffset(
7653	SL: dl, Ptr: StackPtr, Offset: TypeSize::getFixed(ExactSize: ByValVA.getLocMemOffset())),
7654	CallSeqStart, Flags: MemcpyFlags, DAG, dl);
7655	continue;
7656	}
7657
7658	// Initialize the final register residue.
7659	// Any residue that occupies the final by-val arg register must be
7660	// left-justified on AIX. Loads must be a power-of-2 size and cannot be
7661	// larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7662	// 2 and 1 byte loads.
7663	const unsigned ResidueBytes = ByValSize % PtrByteSize;
7664	assert(ResidueBytes != `0` && LoadOffset + PtrByteSize > ByValSize &&
7665	"Unexpected register residue for by-value argument.");
7666	SDValue ResidueVal;
7667	for (unsigned Bytes = `0`; Bytes != ResidueBytes;) {
7668	const unsigned N = llvm::bit_floor(Value: ResidueBytes - Bytes);
7669	const MVT VT =
7670	N == `1` ? MVT::i8
7671	: ((N == `2`) ? MVT::i16 : (N == `4` ? MVT::i32 : MVT::i64));
7672	SDValue Load = GetLoad (VT, LoadOffset);
7673	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
7674	LoadOffset += N;
7675	Bytes += N;
7676
7677	// By-val arguments are passed left-justfied in register.
7678	// Every load here needs to be shifted, otherwise a full register load
7679	// should have been used.
7680	assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * `8`) &&
7681	"Unexpected load emitted during handling of pass-by-value "
7682	"argument.");
7683	unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * `8`);
7684	EVT ShiftAmountTy =
7685	getShiftAmountTy(LHSTy: Load ->getValueType(ResNo: `0`), DL: DAG.getDataLayout());
7686	SDValue SHLAmt = DAG.getConstant(Val: NumSHLBits, DL: dl, VT: ShiftAmountTy);
7687	SDValue ShiftedLoad =
7688	DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: Load.getValueType(), N1: Load, N2: SHLAmt);
7689	ResidueVal = ResidueVal ? DAG.getNode(Opcode: ISD::OR, DL: dl, VT: PtrVT, N1: ResidueVal,
7690	N2: ShiftedLoad)
7691	: ShiftedLoad;
7692	}
7693
7694	const CCValAssign &ByValVA = ArgLocs [I++];
7695	RegsToPass.push_back(Elt: std::make_pair(x: ByValVA.getLocReg(), y&: ResidueVal));
7696	continue;
7697	}
7698
7699	CCValAssign &VA = ArgLocs [I++];
7700	const MVT LocVT = VA.getLocVT();
7701	const MVT ValVT = VA.getValVT();
7702
7703	switch (VA.getLocInfo()) {
7704	default:
7705	report_fatal_error(reason: "Unexpected argument extension type.");
7706	case CCValAssign::Full:
7707	break;
7708	case CCValAssign::ZExt:
7709	Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7710	break;
7711	case CCValAssign::SExt:
7712	Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7713	break;
7714	}
7715
7716	if (VA.isRegLoc() && !VA.needsCustom()) {
7717	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
7718	continue;
7719	}
7720
7721	// Vector arguments passed to VarArg functions need custom handling when
7722	// they are passed (at least partially) in GPRs.
7723	if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7724	assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7725	// Store value to its stack slot.
7726	SDValue PtrOff =
7727	DAG.getConstant(Val: VA.getLocMemOffset(), DL: dl, VT: StackPtr.getValueType());
7728	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
7729	SDValue Store =
7730	DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo ());
7731	MemOpChains.push_back(Elt: Store);
7732	const unsigned OriginalValNo = VA.getValNo();
7733	// Then load the GPRs from the stack
7734	unsigned LoadOffset = `0`;
7735	auto HandleCustomVecRegLoc = [&]() {
7736	assert(I != E && "Unexpected end of CCvalAssigns.");
7737	assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7738	"Expected custom RegLoc.");
7739	CCValAssign RegVA = ArgLocs [I++];
7740	assert(RegVA.getValNo() == OriginalValNo &&
7741	"Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7742	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff,
7743	N2: DAG.getConstant(Val: LoadOffset, DL: dl, VT: PtrVT));
7744	SDValue Load = DAG.getLoad(VT: PtrVT, dl, Chain: Store, Ptr: Add, PtrInfo: MachinePointerInfo ());
7745	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
7746	RegsToPass.push_back(Elt: std::make_pair(x: RegVA.getLocReg(), y&: Load));
7747	LoadOffset += PtrByteSize;
7748	};
7749
7750	// In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7751	// in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7752	// R10.
7753	HandleCustomVecRegLoc ();
7754	HandleCustomVecRegLoc ();
7755
7756	if (I != E && ArgLocs [I].isRegLoc() && ArgLocs [I].needsCustom() &&
7757	ArgLocs [I].getValNo() == OriginalValNo) {
7758	assert(!IsPPC64 &&
7759	"Only 2 custom RegLocs expected for 64-bit codegen.");
7760	HandleCustomVecRegLoc ();
7761	HandleCustomVecRegLoc ();
7762	}
7763
7764	continue;
7765	}
7766
7767	if (VA.isMemLoc()) {
7768	SDValue PtrOff =
7769	DAG.getConstant(Val: VA.getLocMemOffset(), DL: dl, VT: StackPtr.getValueType());
7770	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
7771	MemOpChains.push_back(
7772	Elt: DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff,
7773	PtrInfo: MachinePointerInfo::getStack(MF, Offset: VA.getLocMemOffset()),
7774	Alignment: Subtarget.getFrameLowering()->getStackAlign()));
7775
7776	continue;
7777	}
7778
7779	if (!ValVT.isFloatingPoint())
7780	report_fatal_error(
7781	reason: "Unexpected register handling for calling convention.");
7782
7783	// Custom handling is used for GPR initializations for vararg float
7784	// arguments.
7785	assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7786	LocVT.isInteger() &&
7787	"Custom register handling only expected for VarArg.");
7788
7789	SDValue ArgAsInt =
7790	DAG.getBitcast(VT: MVT::getIntegerVT(BitWidth: ValVT.getSizeInBits()), V: Arg);
7791
7792	if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7793	// f32 in 32-bit GPR
7794	// f64 in 64-bit GPR
7795	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgAsInt));
7796	else if (Arg.getValueType().getFixedSizeInBits() <
7797	LocVT.getFixedSizeInBits())
7798	// f32 in 64-bit GPR.
7799	RegsToPass.push_back(Elt: std::make_pair(
7800	x: VA.getLocReg(), y: DAG.getZExtOrTrunc(Op: ArgAsInt, DL: dl, VT: LocVT)));
7801	else {
7802	// f64 in two 32-bit GPRs
7803	// The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7804	assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7805	"Unexpected custom register for argument!");
7806	CCValAssign &GPR1 = VA;
7807	SDValue MSWAsI64 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i64, N1: ArgAsInt,
7808	N2: DAG.getConstant(Val: `32`, DL: dl, VT: MVT::i8));
7809	RegsToPass.push_back(Elt: std::make_pair(
7810	x: GPR1.getLocReg(), y: DAG.getZExtOrTrunc(Op: MSWAsI64, DL: dl, VT: MVT::i32)));
7811
7812	if (I != E) {
7813	// If only 1 GPR was available, there will only be one custom GPR and
7814	// the argument will also pass in memory.
7815	CCValAssign &PeekArg = ArgLocs [I];
7816	if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7817	assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7818	CCValAssign &GPR2 = ArgLocs [I++];
7819	RegsToPass.push_back(Elt: std::make_pair(
7820	x: GPR2.getLocReg(), y: DAG.getZExtOrTrunc(Op: ArgAsInt, DL: dl, VT: MVT::i32)));
7821	}
7822	}
7823	}
7824	}
7825
7826	if (!MemOpChains.empty())
7827	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains);
7828
7829	// For indirect calls, we need to save the TOC base to the stack for
7830	// restoration after the call.
7831	if (CFlags.IsIndirect) {
7832	assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7833	const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7834	const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7835	const MVT PtrVT = Subtarget.getScalarIntVT();
7836	const unsigned TOCSaveOffset =
7837	Subtarget.getFrameLowering()->getTOCSaveOffset();
7838
7839	setUsesTOCBasePtr(DAG);
7840	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: TOCBaseReg, VT: PtrVT);
7841	SDValue PtrOff = DAG.getIntPtrConstant(Val: TOCSaveOffset, DL: dl);
7842	SDValue StackPtr = DAG.getRegister(Reg: StackPtrReg, VT: PtrVT);
7843	SDValue AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
7844	Chain = DAG.getStore(
7845	Chain: Val.getValue(R: `1`), dl, Val, Ptr: AddPtr,
7846	PtrInfo: MachinePointerInfo::getStack(MF&: DAG.getMachineFunction(), Offset: TOCSaveOffset));
7847	}
7848
7849	// Build a sequence of copy-to-reg nodes chained together with token chain
7850	// and flag operands which copy the outgoing args into the appropriate regs.
7851	SDValue InGlue;
7852	for (auto Reg : RegsToPass) {
7853	Chain = DAG.getCopyToReg(Chain, dl, Reg: Reg.first, N: Reg.second, Glue: InGlue);
7854	InGlue = Chain.getValue(R: `1`);
7855	}
7856
7857	const int SPDiff = `0`;
7858	return FinishCall(CFlags, dl, DAG, RegsToPass, Glue: InGlue, Chain, CallSeqStart,
7859	Callee, SPDiff, NumBytes, Ins, InVals, CB);
7860	}
7861
7862	bool
7863	PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7864	MachineFunction &MF, bool isVarArg,
7865	const SmallVectorImpl<ISD::OutputArg> &Outs,
7866	LLVMContext &Context,
7867	const Type RetTy) const* {
7868	SmallVector<CCValAssign, `16`> RVLocs;
7869	CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7870	return CCInfo.CheckReturn(
7871	Outs, Fn: (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7872	? RetCC_PPC_Cold
7873	: RetCC_PPC);
7874	}
7875
7876	SDValue
7877	PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7878	bool isVarArg,
7879	const SmallVectorImpl<ISD::OutputArg> &Outs,
7880	const SmallVectorImpl<SDValue> &OutVals,
7881	const SDLoc &dl, SelectionDAG &DAG) const {
7882	SmallVector<CCValAssign, `16`> RVLocs;
7883	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7884	*DAG.getContext());
7885	CCInfo.AnalyzeReturn(Outs,
7886	Fn: (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7887	? RetCC_PPC_Cold
7888	: RetCC_PPC);
7889
7890	SDValue Glue;
7891	SmallVector<SDValue, `4`> RetOps(`1`, Chain);
7892
7893	// Copy the result values into the output registers.
7894	for (unsigned i = `0`, RealResIdx = `0`; i != RVLocs.size(); ++i, ++RealResIdx) {
7895	CCValAssign &VA = RVLocs [i];
7896	assert(VA.isRegLoc() && "Can only return in registers!");
7897
7898	SDValue Arg = OutVals [RealResIdx];
7899
7900	switch (VA.getLocInfo()) {
7901	default: llvm_unreachable("Unknown loc info!");
7902	case CCValAssign::Full: break;
7903	case CCValAssign::AExt:
7904	Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7905	break;
7906	case CCValAssign::ZExt:
7907	Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7908	break;
7909	case CCValAssign::SExt:
7910	Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7911	break;
7912	}
7913	if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7914	bool isLittleEndian = Subtarget.isLittleEndian();
7915	// Legalize ret f64 -> ret 2 x i32.
7916	SDValue SVal =
7917	DAG.getNode(Opcode: PPCISD::EXTRACT_SPE, DL: dl, VT: MVT::i32, N1: Arg,
7918	N2: DAG.getIntPtrConstant(Val: isLittleEndian ? `0` : `1`, DL: dl));
7919	Chain = DAG.getCopyToReg(Chain, dl, Reg: VA.getLocReg(), N: SVal, Glue);
7920	RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
7921	SVal = DAG.getNode(Opcode: PPCISD::EXTRACT_SPE, DL: dl, VT: MVT::i32, N1: Arg,
7922	N2: DAG.getIntPtrConstant(Val: isLittleEndian ? `1` : `0`, DL: dl));
7923	Glue = Chain.getValue(R: `1`);
7924	VA = RVLocs [++i]; // skip ahead to next loc
7925	Chain = DAG.getCopyToReg(Chain, dl, Reg: VA.getLocReg(), N: SVal, Glue);
7926	} else
7927	Chain = DAG.getCopyToReg(Chain, dl, Reg: VA.getLocReg(), N: Arg, Glue);
7928	Glue = Chain.getValue(R: `1`);
7929	RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
7930	}
7931
7932	RetOps [`0`] = Chain; // Update chain.
7933
7934	// Add the glue if we have it.
7935	if (Glue.getNode())
7936	RetOps.push_back(Elt: Glue);
7937
7938	return DAG.getNode(Opcode: PPCISD::RET_GLUE, DL: dl, VT: MVT::Other, Ops: RetOps);
7939	}
7940
7941	SDValue
7942	PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7943	SelectionDAG &DAG) const {
7944	SDLoc dl(Op);
7945
7946	// Get the correct type for integers.
7947	EVT IntVT = Op.getValueType();
7948
7949	// Get the inputs.
7950	SDValue Chain = Op.getOperand(i: `0`);
7951	SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7952	// Build a DYNAREAOFFSET node.
7953	SDValue Ops[`2`] = {Chain, FPSIdx};
7954	SDVTList VTs = DAG.getVTList(VT: IntVT);
7955	return DAG.getNode(Opcode: PPCISD::DYNAREAOFFSET, DL: dl, VTList: VTs, Ops);
7956	}
7957
7958	SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7959	SelectionDAG &DAG) const {
7960	// When we pop the dynamic allocation we need to restore the SP link.
7961	SDLoc dl(Op);
7962
7963	// Get the correct type for pointers.
7964	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7965
7966	// Construct the stack pointer operand.
7967	bool isPPC64 = Subtarget.isPPC64();
7968	unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7969	SDValue StackPtr = DAG.getRegister(Reg: SP, VT: PtrVT);
7970
7971	// Get the operands for the STACKRESTORE.
7972	SDValue Chain = Op.getOperand(i: `0`);
7973	SDValue SaveSP = Op.getOperand(i: `1`);
7974
7975	// Load the old link SP.
7976	SDValue LoadLinkSP =
7977	DAG.getLoad(VT: PtrVT, dl, Chain, Ptr: StackPtr, PtrInfo: MachinePointerInfo ());
7978
7979	// Restore the stack pointer.
7980	Chain = DAG.getCopyToReg(Chain: LoadLinkSP.getValue(R: `1`), dl, Reg: SP, N: SaveSP);
7981
7982	// Store the old link SP.
7983	return DAG.getStore(Chain, dl, Val: LoadLinkSP, Ptr: StackPtr, PtrInfo: MachinePointerInfo ());
7984	}
7985
7986	SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7987	MachineFunction &MF = DAG.getMachineFunction();
7988	bool isPPC64 = Subtarget.isPPC64();
7989	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
7990
7991	// Get current frame pointer save index. The users of this index will be
7992	// primarily DYNALLOC instructions.
7993	PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7994	int RASI = FI->getReturnAddrSaveIndex();
7995
7996	// If the frame pointer save index hasn't been defined yet.
7997	if (!RASI) {
7998	// Find out what the fix offset of the frame pointer save area.
7999	int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
8000	// Allocate the frame index for frame pointer save area.
8001	RASI = MF.getFrameInfo().CreateFixedObject(Size: isPPC64? `8` : `4`, SPOffset: LROffset, IsImmutable: false);
8002	// Save the result.
8003	FI->setReturnAddrSaveIndex(RASI);
8004	}
8005	return DAG.getFrameIndex(FI: RASI, VT: PtrVT);
8006	}
8007
8008	SDValue
8009	PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
8010	MachineFunction &MF = DAG.getMachineFunction();
8011	bool isPPC64 = Subtarget.isPPC64();
8012	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
8013
8014	// Get current frame pointer save index. The users of this index will be
8015	// primarily DYNALLOC instructions.
8016	PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
8017	int FPSI = FI->getFramePointerSaveIndex();
8018
8019	// If the frame pointer save index hasn't been defined yet.
8020	if (!FPSI) {
8021	// Find out what the fix offset of the frame pointer save area.
8022	int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
8023	// Allocate the frame index for frame pointer save area.
8024	FPSI = MF.getFrameInfo().CreateFixedObject(Size: isPPC64? `8` : `4`, SPOffset: FPOffset, IsImmutable: true);
8025	// Save the result.
8026	FI->setFramePointerSaveIndex(FPSI);
8027	}
8028	return DAG.getFrameIndex(FI: FPSI, VT: PtrVT);
8029	}
8030
8031	SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
8032	SelectionDAG &DAG) const {
8033	MachineFunction &MF = DAG.getMachineFunction();
8034	// Get the inputs.
8035	SDValue Chain = Op.getOperand(i: `0`);
8036	SDValue Size = Op.getOperand(i: `1`);
8037	SDLoc dl(Op);
8038
8039	// Get the correct type for pointers.
8040	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
8041	// Negate the size.
8042	SDValue NegSize = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: PtrVT,
8043	N1: DAG.getConstant(Val: `0`, DL: dl, VT: PtrVT), N2: Size);
8044	// Construct a node for the frame pointer save index.
8045	SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8046	SDValue Ops[`3`] = { Chain, NegSize, FPSIdx };
8047	SDVTList VTs = DAG.getVTList(VT1: PtrVT, VT2: MVT::Other);
8048	if (hasInlineStackProbe(MF))
8049	return DAG.getNode(Opcode: PPCISD::PROBED_ALLOCA, DL: dl, VTList: VTs, Ops);
8050	return DAG.getNode(Opcode: PPCISD::DYNALLOC, DL: dl, VTList: VTs, Ops);
8051	}
8052
8053	SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
8054	SelectionDAG &DAG) const {
8055	MachineFunction &MF = DAG.getMachineFunction();
8056
8057	bool isPPC64 = Subtarget.isPPC64();
8058	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
8059
8060	int FI = MF.getFrameInfo().CreateFixedObject(Size: isPPC64 ? `8` : `4`, SPOffset: `0`, IsImmutable: false);
8061	return DAG.getFrameIndex(FI, VT: PtrVT);
8062	}
8063
8064	SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8065	SelectionDAG &DAG) const {
8066	SDLoc DL(Op);
8067	return DAG.getNode(Opcode: PPCISD::EH_SJLJ_SETJMP, DL,
8068	VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other),
8069	N1: Op.getOperand(i: `0`), N2: Op.getOperand(i: `1`));
8070	}
8071
8072	SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8073	SelectionDAG &DAG) const {
8074	SDLoc DL(Op);
8075	return DAG.getNode(Opcode: PPCISD::EH_SJLJ_LONGJMP, DL, VT: MVT::Other,
8076	N1: Op.getOperand(i: `0`), N2: Op.getOperand(i: `1`));
8077	}
8078
8079	SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8080	if (Op.getValueType().isVector())
8081	return LowerVectorLoad(Op, DAG);
8082
8083	assert(Op.getValueType() == MVT::i1 &&
8084	"Custom lowering only for i1 loads");
8085
8086	// First, load 8 bits into 32 bits, then truncate to 1 bit.
8087
8088	SDLoc dl(Op);
8089	LoadSDNode *LD = cast<LoadSDNode>(Val&: Op);
8090
8091	SDValue Chain = LD->getChain();
8092	SDValue BasePtr = LD->getBasePtr();
8093	MachineMemOperand *MMO = LD->getMemOperand();
8094
8095	SDValue NewLD =
8096	DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: getPointerTy(DL: DAG.getDataLayout()), Chain,
8097	Ptr: BasePtr, MemVT: MVT::i8, MMO);
8098	SDValue Result = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: NewLD);
8099
8100	SDValue Ops[] = { Result, SDValue (NewLD.getNode(), `1`) };
8101	return DAG.getMergeValues(Ops, dl);
8102	}
8103
8104	SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8105	if (Op.getOperand(i: `1`).getValueType().isVector())
8106	return LowerVectorStore(Op, DAG);
8107
8108	assert(Op.getOperand(`1`).getValueType() == MVT::i1 &&
8109	"Custom lowering only for i1 stores");
8110
8111	// First, zero extend to 32 bits, then use a truncating store to 8 bits.
8112
8113	SDLoc dl(Op);
8114	StoreSDNode *ST = cast<StoreSDNode>(Val&: Op);
8115
8116	SDValue Chain = ST->getChain();
8117	SDValue BasePtr = ST->getBasePtr();
8118	SDValue Value = ST->getValue();
8119	MachineMemOperand *MMO = ST->getMemOperand();
8120
8121	Value = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()),
8122	Operand: Value);
8123	return DAG.getTruncStore(Chain, dl, Val: Value, Ptr: BasePtr, SVT: MVT::i8, MMO);
8124	}
8125
8126	// FIXME: Remove this once the ANDI glue bug is fixed:
8127	SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8128	assert(Op.getValueType() == MVT::i1 &&
8129	"Custom lowering only for i1 results");
8130
8131	SDLoc DL(Op);
8132	return DAG.getNode(Opcode: PPCISD::ANDI_rec_1_GT_BIT, DL, VT: MVT::i1, Operand: Op.getOperand(i: `0`));
8133	}
8134
8135	SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8136	SelectionDAG &DAG) const {
8137
8138	// Implements a vector truncate that fits in a vector register as a shuffle.
8139	// We want to legalize vector truncates down to where the source fits in
8140	// a vector register (and target is therefore smaller than vector register
8141	// size). At that point legalization will try to custom lower the sub-legal
8142	// result and get here - where we can contain the truncate as a single target
8143	// operation.
8144
8145	// For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8146	// <MSB1\|LSB1, MSB2\|LSB2> to <LSB1, LSB2>
8147	//
8148	// We will implement it for big-endian ordering as this (where x denotes
8149	// undefined):
8150	// < MSB1\|LSB1, MSB2\|LSB2, uu, uu, uu, uu, uu, uu> to
8151	// < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8152	//
8153	// The same operation in little-endian ordering will be:
8154	// <uu, uu, uu, uu, uu, uu, LSB2\|MSB2, LSB1\|MSB1> to
8155	// <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8156
8157	EVT TrgVT = Op.getValueType();
8158	assert(TrgVT.isVector() && "Vector type expected.");
8159	unsigned TrgNumElts = TrgVT.getVectorNumElements();
8160	EVT EltVT = TrgVT.getVectorElementType();
8161	if (!isOperationCustom(Op: Op.getOpcode(), VT: TrgVT) \|\|
8162	TrgVT.getSizeInBits() > `128` \|\| !isPowerOf2_32(Value: TrgNumElts) \|\|
8163	!llvm::has_single_bit<uint32_t>(Value: EltVT.getSizeInBits()))
8164	return SDValue ();
8165
8166	SDValue N1 = Op.getOperand(i: `0`);
8167	EVT SrcVT = N1.getValueType();
8168	unsigned SrcSize = SrcVT.getSizeInBits();
8169	if (SrcSize > `256` \|\| !isPowerOf2_32(Value: SrcVT.getVectorNumElements()) \|\|
8170	!llvm::has_single_bit<uint32_t>(
8171	Value: SrcVT.getVectorElementType().getSizeInBits()))
8172	return SDValue ();
8173	if (SrcSize == `256` && SrcVT.getVectorNumElements() < `2`)
8174	return SDValue ();
8175
8176	unsigned WideNumElts = `128` / EltVT.getSizeInBits();
8177	EVT WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: EltVT, NumElements: WideNumElts);
8178
8179	SDLoc DL(Op);
8180	SDValue Op1, Op2;
8181	if (SrcSize == `256`) {
8182	EVT VecIdxTy = getVectorIdxTy(DL: DAG.getDataLayout());
8183	EVT SplitVT =
8184	N1.getValueType().getHalfNumVectorElementsVT(Context&: *DAG.getContext());
8185	unsigned SplitNumElts = SplitVT.getVectorNumElements();
8186	Op1 = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: SplitVT, N1,
8187	N2: DAG.getConstant(Val: `0`, DL, VT: VecIdxTy));
8188	Op2 = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: SplitVT, N1,
8189	N2: DAG.getConstant(Val: SplitNumElts, DL, VT: VecIdxTy));
8190	}
8191	else {
8192	Op1 = SrcSize == `128` ? N1 : widenVec(DAG, Vec: N1, dl: DL);
8193	Op2 = DAG.getUNDEF(VT: WideVT);
8194	}
8195
8196	// First list the elements we want to keep.
8197	unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8198	SmallVector<int, `16`> ShuffV;
8199	if (Subtarget.isLittleEndian())
8200	for (unsigned i = `0`; i < TrgNumElts; ++i)
8201	ShuffV.push_back(Elt: i * SizeMult);
8202	else
8203	for (unsigned i = `1`; i <= TrgNumElts; ++i)
8204	ShuffV.push_back(Elt: i * SizeMult - `1`);
8205
8206	// Populate the remaining elements with undefs.
8207	for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8208	// ShuffV.push_back(i + WideNumElts);
8209	ShuffV.push_back(Elt: WideNumElts + `1`);
8210
8211	Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: WideVT, Operand: Op1);
8212	Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: WideVT, Operand: Op2);
8213	return DAG.getVectorShuffle(VT: WideVT, dl: DL, N1: Op1, N2: Op2, Mask: ShuffV);
8214	}
8215
8216	/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8217	/// possible.
8218	SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8219	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: `4`))->get();
8220	EVT ResVT = Op.getValueType();
8221	EVT CmpVT = Op.getOperand(i: `0`).getValueType();
8222	SDValue LHS = Op.getOperand(i: `0`), RHS = Op.getOperand(i: `1`);
8223	SDValue TV = Op.getOperand(i: `2`), FV = Op.getOperand(i: `3`);
8224	SDLoc dl(Op);
8225
8226	// Without power9-vector, we don't have native instruction for f128 comparison.
8227	// Following transformation to libcall is needed for setcc:
8228	// select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8229	if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8230	SDValue Z = DAG.getSetCC(
8231	DL: dl, VT: getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: CmpVT),
8232	LHS, RHS, Cond: CC);
8233	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: Z.getValueType());
8234	return DAG.getSelectCC(DL: dl, LHS: Z, RHS: Zero, True: TV, False: FV, Cond: ISD::SETNE);
8235	}
8236
8237	// Not FP, or using SPE? Not a fsel.
8238	if (!CmpVT.isFloatingPoint() \|\| !TV.getValueType().isFloatingPoint() \|\|
8239	Subtarget.hasSPE())
8240	return Op;
8241
8242	SDNodeFlags Flags = Op.getNode()->getFlags();
8243
8244	// We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8245	// presence of infinities.
8246	if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8247	switch (CC) {
8248	default:
8249	break;
8250	case ISD::SETOGT:
8251	case ISD::SETGT:
8252	return DAG.getNode(Opcode: PPCISD::XSMAXC, DL: dl, VT: Op.getValueType(), N1: LHS, N2: RHS);
8253	case ISD::SETOLT:
8254	case ISD::SETLT:
8255	return DAG.getNode(Opcode: PPCISD::XSMINC, DL: dl, VT: Op.getValueType(), N1: LHS, N2: RHS);
8256	}
8257	}
8258
8259	// We might be able to do better than this under some circumstances, but in
8260	// general, fsel-based lowering of select is a finite-math-only optimization.
8261	// For more information, see section F.3 of the 2.06 ISA specification.
8262	// With ISA 3.0
8263	if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) \|\|
8264	(!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) \|\|
8265	ResVT == MVT::f128)
8266	return Op;
8267
8268	// If the RHS of the comparison is a 0.0, we don't need to do the
8269	// subtraction at all.
8270	SDValue Sel1;
8271	if (isFloatingPointZero(Op: RHS))
8272	switch (CC) {
8273	default: break; // SETUO etc aren't handled by fsel.
8274	case ISD::SETNE:
8275	std::swap(a&: TV, b&: FV);
8276	[[fallthrough]];
8277	case ISD::SETEQ:
8278	if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8279	LHS = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: LHS);
8280	Sel1 = DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: LHS, N2: TV, N3: FV);
8281	if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8282	Sel1 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Sel1);
8283	return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT,
8284	N1: DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: MVT::f64, Operand: LHS), N2: Sel1, N3: FV);
8285	case ISD::SETULT:
8286	case ISD::SETLT:
8287	std::swap(a&: TV, b&: FV); // fsel is natively setge, swap operands for setlt
8288	[[fallthrough]];
8289	case ISD::SETOGE:
8290	case ISD::SETGE:
8291	if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8292	LHS = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: LHS);
8293	return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: LHS, N2: TV, N3: FV);
8294	case ISD::SETUGT:
8295	case ISD::SETGT:
8296	std::swap(a&: TV, b&: FV); // fsel is natively setge, swap operands for setlt
8297	[[fallthrough]];
8298	case ISD::SETOLE:
8299	case ISD::SETLE:
8300	if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8301	LHS = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: LHS);
8302	return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT,
8303	N1: DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: MVT::f64, Operand: LHS), N2: TV, N3: FV);
8304	}
8305
8306	SDValue Cmp;
8307	switch (CC) {
8308	default: break; // SETUO etc aren't handled by fsel.
8309	case ISD::SETNE:
8310	std::swap(a&: TV, b&: FV);
8311	[[fallthrough]];
8312	case ISD::SETEQ:
8313	Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: LHS, N2: RHS, Flags);
8314	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8315	Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8316	Sel1 = DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: TV, N3: FV);
8317	if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8318	Sel1 = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Sel1);
8319	return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT,
8320	N1: DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: MVT::f64, Operand: Cmp), N2: Sel1, N3: FV);
8321	case ISD::SETULT:
8322	case ISD::SETLT:
8323	Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: LHS, N2: RHS, Flags);
8324	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8325	Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8326	return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: FV, N3: TV);
8327	case ISD::SETOGE:
8328	case ISD::SETGE:
8329	Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: LHS, N2: RHS, Flags);
8330	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8331	Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8332	return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: TV, N3: FV);
8333	case ISD::SETUGT:
8334	case ISD::SETGT:
8335	Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: RHS, N2: LHS, Flags);
8336	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8337	Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8338	return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: FV, N3: TV);
8339	case ISD::SETOLE:
8340	case ISD::SETLE:
8341	Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: RHS, N2: LHS, Flags);
8342	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8343	Cmp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Cmp);
8344	return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: TV, N3: FV);
8345	}
8346	return Op;
8347	}
8348
8349	static unsigned getPPCStrictOpcode(unsigned Opc) {
8350	switch (Opc) {
8351	default:
8352	llvm_unreachable("No strict version of this opcode!");
8353	case PPCISD::FCTIDZ:
8354	return PPCISD::STRICT_FCTIDZ;
8355	case PPCISD::FCTIWZ:
8356	return PPCISD::STRICT_FCTIWZ;
8357	case PPCISD::FCTIDUZ:
8358	return PPCISD::STRICT_FCTIDUZ;
8359	case PPCISD::FCTIWUZ:
8360	return PPCISD::STRICT_FCTIWUZ;
8361	case PPCISD::FCFID:
8362	return PPCISD::STRICT_FCFID;
8363	case PPCISD::FCFIDU:
8364	return PPCISD::STRICT_FCFIDU;
8365	case PPCISD::FCFIDS:
8366	return PPCISD::STRICT_FCFIDS;
8367	case PPCISD::FCFIDUS:
8368	return PPCISD::STRICT_FCFIDUS;
8369	}
8370	}
8371
8372	static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
8373	const PPCSubtarget &Subtarget) {
8374	SDLoc dl(Op);
8375	bool IsStrict = Op ->isStrictFPOpcode();
8376	bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT \|\|
8377	Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8378
8379	// TODO: Any other flags to propagate?
8380	SDNodeFlags Flags;
8381	Flags.setNoFPExcept(Op ->getFlags().hasNoFPExcept());
8382
8383	// For strict nodes, source is the second operand.
8384	SDValue Src = Op.getOperand(i: IsStrict ? `1` : `0`);
8385	SDValue Chain = IsStrict ? Op.getOperand(i: `0`) : SDValue ();
8386	MVT DestTy = Op.getSimpleValueType();
8387	assert(Src.getValueType().isFloatingPoint() &&
8388	(DestTy == MVT::i8 \|\| DestTy == MVT::i16 \|\| DestTy == MVT::i32 \|\|
8389	DestTy == MVT::i64) &&
8390	"Invalid FP_TO_INT types");
8391	if (Src.getValueType() == MVT::f32) {
8392	if (IsStrict) {
8393	Src =
8394	DAG.getNode(Opcode: ISD::STRICT_FP_EXTEND, DL: dl,
8395	VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other), Ops: {Chain, Src}, Flags);
8396	Chain = Src.getValue(R: `1`);
8397	} else
8398	Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Src);
8399	}
8400	if ((DestTy == MVT::i8 \|\| DestTy == MVT::i16) && Subtarget.hasP9Vector())
8401	DestTy = Subtarget.getScalarIntVT();
8402	unsigned Opc = ISD::DELETED_NODE;
8403	switch (DestTy.SimpleTy) {
8404	default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8405	case MVT::i32:
8406	Opc = IsSigned ? PPCISD::FCTIWZ
8407	: (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8408	break;
8409	case MVT::i64:
8410	assert((IsSigned \|\| Subtarget.hasFPCVT()) &&
8411	"i64 FP_TO_UINT is supported only with FPCVT");
8412	Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8413	}
8414	EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8415	SDValue Conv;
8416	if (IsStrict) {
8417	Opc = getPPCStrictOpcode(Opc);
8418	Conv = DAG.getNode(Opcode: Opc, DL: dl, VTList: DAG.getVTList(VT1: ConvTy, VT2: MVT::Other), Ops: {Chain, Src},
8419	Flags);
8420	} else {
8421	Conv = DAG.getNode(Opcode: Opc, DL: dl, VT: ConvTy, Operand: Src);
8422	}
8423	return Conv;
8424	}
8425
8426	void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8427	SelectionDAG &DAG,
8428	const SDLoc &dl) const {
8429	SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8430	bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT \|\|
8431	Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8432	bool IsStrict = Op ->isStrictFPOpcode();
8433
8434	// Convert the FP value to an int value through memory.
8435	bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8436	(IsSigned \|\| Subtarget.hasFPCVT());
8437	SDValue FIPtr = DAG.CreateStackTemporary(VT: i32Stack ? MVT::i32 : MVT::f64);
8438	int FI = cast<FrameIndexSDNode>(Val&: FIPtr)->getIndex();
8439	MachinePointerInfo MPI =
8440	MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI);
8441
8442	// Emit a store to the stack slot.
8443	SDValue Chain = IsStrict ? Tmp.getValue(R: `1`) : DAG.getEntryNode();
8444	Align Alignment(DAG.getEVTAlign(MemoryVT: Tmp.getValueType()));
8445	if (i32Stack) {
8446	MachineFunction &MF = DAG.getMachineFunction();
8447	Alignment = Align (`4`);
8448	MachineMemOperand *MMO =
8449	MF.getMachineMemOperand(PtrInfo: MPI, F: MachineMemOperand::MOStore, Size: `4`, BaseAlignment: Alignment);
8450	SDValue Ops[] = { Chain, Tmp, FIPtr };
8451	Chain = DAG.getMemIntrinsicNode(Opcode: PPCISD::STFIWX, dl,
8452	VTList: DAG.getVTList(VT: MVT::Other), Ops, MemVT: MVT::i32, MMO);
8453	} else
8454	Chain = DAG.getStore(Chain, dl, Val: Tmp, Ptr: FIPtr, PtrInfo: MPI, Alignment);
8455
8456	// Result is a load from the stack slot. If loading 4 bytes, make sure to
8457	// add in a bias on big endian.
8458	if (Op.getValueType() == MVT::i32 && !i32Stack) {
8459	FIPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: FIPtr.getValueType(), N1: FIPtr,
8460	N2: DAG.getConstant(Val: `4`, DL: dl, VT: FIPtr.getValueType()));
8461	MPI = MPI.getWithOffset(O: Subtarget.isLittleEndian() ? `0` : `4`);
8462	}
8463
8464	RLI.Chain = Chain;
8465	RLI.Ptr = FIPtr;
8466	RLI.MPI = MPI;
8467	RLI.Alignment = Alignment;
8468	}
8469
8470	/// Custom lowers floating point to integer conversions to use
8471	/// the direct move instructions available in ISA 2.07 to avoid the
8472	/// need for load/store combinations.
8473	SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8474	SelectionDAG &DAG,
8475	const SDLoc &dl) const {
8476	SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8477	SDValue Mov = DAG.getNode(Opcode: PPCISD::MFVSR, DL: dl, VT: Op.getValueType(), Operand: Conv);
8478	if (Op ->isStrictFPOpcode())
8479	return DAG.getMergeValues(Ops: {Mov, Conv.getValue(R: `1`)}, dl);
8480	else
8481	return Mov;
8482	}
8483
8484	SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8485	const SDLoc &dl) const {
8486	bool IsStrict = Op ->isStrictFPOpcode();
8487	bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT \|\|
8488	Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8489	SDValue Src = Op.getOperand(i: IsStrict ? `1` : `0`);
8490	EVT SrcVT = Src.getValueType();
8491	EVT DstVT = Op.getValueType();
8492
8493	// FP to INT conversions are legal for f128.
8494	if (SrcVT == MVT::f128)
8495	return Subtarget.hasP9Vector() ? Op : SDValue ();
8496
8497	// Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8498	// PPC (the libcall is not available).
8499	if (SrcVT == MVT::ppcf128) {
8500	if (DstVT == MVT::i32) {
8501	// TODO: Conservatively pass only nofpexcept flag here. Need to check and
8502	// set other fast-math flags to FP operations in both strict and
8503	// non-strict cases. (FP_TO_SINT, FSUB)
8504	SDNodeFlags Flags;
8505	Flags.setNoFPExcept(Op ->getFlags().hasNoFPExcept());
8506
8507	if (IsSigned) {
8508	SDValue Lo, Hi;
8509	std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: Src, DL: dl, LoVT: MVT::f64, HiVT: MVT::f64);
8510
8511	// Add the two halves of the long double in round-to-zero mode, and use
8512	// a smaller FP_TO_SINT.
8513	if (IsStrict) {
8514	SDValue Res = DAG.getNode(Opcode: PPCISD::STRICT_FADDRTZ, DL: dl,
8515	VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
8516	Ops: {Op.getOperand(i: `0`), Lo, Hi}, Flags);
8517	return DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl,
8518	VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other),
8519	Ops: {Res.getValue(R: `1`), Res}, Flags);
8520	} else {
8521	SDValue Res = DAG.getNode(Opcode: PPCISD::FADDRTZ, DL: dl, VT: MVT::f64, N1: Lo, N2: Hi);
8522	return DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: MVT::i32, Operand: Res);
8523	}
8524	} else {
8525	const uint64_t TwoE31[] = {`0x41e0000000000000LL`, `0`};
8526	APFloat APF = APFloat (APFloat::PPCDoubleDouble(), APInt (`128`, TwoE31));
8527	SDValue Cst = DAG.getConstantFP(Val: APF, DL: dl, VT: SrcVT);
8528	SDValue SignMask = DAG.getConstant(Val: `0x80000000`, DL: dl, VT: DstVT);
8529	if (IsStrict) {
8530	// Sel = Src < 0x80000000
8531	// FltOfs = select Sel, 0.0, 0x80000000
8532	// IntOfs = select Sel, 0, 0x80000000
8533	// Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8534	SDValue Chain = Op.getOperand(i: `0`);
8535	EVT SetCCVT =
8536	getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: SrcVT);
8537	EVT DstSetCCVT =
8538	getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: DstVT);
8539	SDValue Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT,
8540	Chain, IsSignaling: true);
8541	Chain = Sel.getValue(R: `1`);
8542
8543	SDValue FltOfs = DAG.getSelect(
8544	DL: dl, VT: SrcVT, Cond: Sel, LHS: DAG.getConstantFP(Val: `0.0`, DL: dl, VT: SrcVT), RHS: Cst);
8545	Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8546
8547	SDValue Val = DAG.getNode(Opcode: ISD::STRICT_FSUB, DL: dl,
8548	VTList: DAG.getVTList(VT1: SrcVT, VT2: MVT::Other),
8549	Ops: {Chain, Src, FltOfs}, Flags);
8550	Chain = Val.getValue(R: `1`);
8551	SDValue SInt = DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl,
8552	VTList: DAG.getVTList(VT1: DstVT, VT2: MVT::Other),
8553	Ops: {Chain, Val}, Flags);
8554	Chain = SInt.getValue(R: `1`);
8555	SDValue IntOfs = DAG.getSelect(
8556	DL: dl, VT: DstVT, Cond: Sel, LHS: DAG.getConstant(Val: `0`, DL: dl, VT: DstVT), RHS: SignMask);
8557	SDValue Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: SInt, N2: IntOfs);
8558	return DAG.getMergeValues(Ops: {Result, Chain}, dl);
8559	} else {
8560	// X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8561	// FIXME: generated code sucks.
8562	SDValue True = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: MVT::ppcf128, N1: Src, N2: Cst);
8563	True = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: MVT::i32, Operand: True);
8564	True = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i32, N1: True, N2: SignMask);
8565	SDValue False = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: MVT::i32, Operand: Src);
8566	return DAG.getSelectCC(DL: dl, LHS: Src, RHS: Cst, True, False, Cond: ISD::SETGE);
8567	}
8568	}
8569	}
8570
8571	return SDValue ();
8572	}
8573
8574	if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8575	return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8576
8577	ReuseLoadInfo RLI;
8578	LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8579
8580	return DAG.getLoad(VT: Op.getValueType(), dl, Chain: RLI.Chain, Ptr: RLI.Ptr, PtrInfo: RLI.MPI,
8581	Alignment: RLI.Alignment, MMOFlags: RLI.MMOFlags(), AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8582	}
8583
8584	// We're trying to insert a regular store, S, and then a load, L. If the
8585	// incoming value, O, is a load, we might just be able to have our load use the
8586	// address used by O. However, we don't know if anything else will store to
8587	// that address before we can load from it. To prevent this situation, we need
8588	// to insert our load, L, into the chain as a peer of O. To do this, we give L
8589	// the same chain operand as O, we create a token factor from the chain results
8590	// of O and L, and we replace all uses of O's chain result with that token
8591	// factor (this last part is handled by makeEquivalentMemoryOrdering).
8592	bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8593	ReuseLoadInfo &RLI,
8594	SelectionDAG &DAG,
8595	ISD::LoadExtType ET) const {
8596	// Conservatively skip reusing for constrained FP nodes.
8597	if (Op ->isStrictFPOpcode())
8598	return false;
8599
8600	SDLoc dl(Op);
8601	bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8602	(Subtarget.hasFPCVT() \|\| Op.getValueType() == MVT::i32);
8603	if (ET == ISD::NON_EXTLOAD &&
8604	(ValidFPToUint \|\| Op.getOpcode() == ISD::FP_TO_SINT) &&
8605	isOperationLegalOrCustom(Op: Op.getOpcode(),
8606	VT: Op.getOperand(i: `0`).getValueType())) {
8607
8608	LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8609	return true;
8610	}
8611
8612	LoadSDNode *LD = dyn_cast<LoadSDNode>(Val&: Op);
8613	if (!LD \|\| LD->getExtensionType() != ET \|\| LD->isVolatile() \|\|
8614	LD->isNonTemporal())
8615	return false;
8616	if (LD->getMemoryVT() != MemVT)
8617	return false;
8618
8619	// If the result of the load is an illegal type, then we can't build a
8620	// valid chain for reuse since the legalised loads and token factor node that
8621	// ties the legalised loads together uses a different output chain then the
8622	// illegal load.
8623	if (!isTypeLegal(VT: LD->getValueType(ResNo: `0`)))
8624	return false;
8625
8626	RLI.Ptr = LD->getBasePtr();
8627	if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8628	assert(LD->getAddressingMode() == ISD::PRE_INC &&
8629	"Non-pre-inc AM on PPC?");
8630	RLI.Ptr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RLI.Ptr.getValueType(), N1: RLI.Ptr,
8631	N2: LD->getOffset());
8632	}
8633
8634	RLI.Chain = LD->getChain();
8635	RLI.MPI = LD->getPointerInfo();
8636	RLI.IsDereferenceable = LD->isDereferenceable();
8637	RLI.IsInvariant = LD->isInvariant();
8638	RLI.Alignment = LD->getAlign();
8639	RLI.AAInfo = LD->getAAInfo();
8640	RLI.Ranges = LD->getRanges();
8641
8642	RLI.ResChain = SDValue (LD, LD->isIndexed() ? `2` : `1`);
8643	return true;
8644	}
8645
8646	/// Analyze profitability of direct move
8647	/// prefer float load to int load plus direct move
8648	/// when there is no integer use of int load
8649	bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8650	SDNode *Origin = Op.getOperand(i: Op ->isStrictFPOpcode() ? `1` : `0`).getNode();
8651	if (Origin->getOpcode() != ISD::LOAD)
8652	return true;
8653
8654	// If there is no LXSIBZX/LXSIHZX, like Power8,
8655	// prefer direct move if the memory size is 1 or 2 bytes.
8656	MachineMemOperand *MMO = cast<LoadSDNode>(Val: Origin)->getMemOperand();
8657	if (!Subtarget.hasP9Vector() &&
8658	(!MMO->getSize().hasValue() \|\| MMO->getSize().getValue() <= `2`))
8659	return true;
8660
8661	for (SDUse &Use : Origin->uses()) {
8662
8663	// Only look at the users of the loaded value.
8664	if (Use.getResNo() != `0`)
8665	continue;
8666
8667	SDNode *User = Use.getUser();
8668	if (User->getOpcode() != ISD::SINT_TO_FP &&
8669	User->getOpcode() != ISD::UINT_TO_FP &&
8670	User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8671	User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8672	return true;
8673	}
8674
8675	return false;
8676	}
8677
8678	static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
8679	const PPCSubtarget &Subtarget,
8680	SDValue Chain = SDValue ()) {
8681	bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP \|\|
8682	Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8683	SDLoc dl(Op);
8684
8685	// TODO: Any other flags to propagate?
8686	SDNodeFlags Flags;
8687	Flags.setNoFPExcept(Op ->getFlags().hasNoFPExcept());
8688
8689	// If we have FCFIDS, then use it when converting to single-precision.
8690	// Otherwise, convert to double-precision and then round.
8691	bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8692	unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8693	: (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8694	EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8695	if (Op ->isStrictFPOpcode()) {
8696	if (!Chain)
8697	Chain = Op.getOperand(i: `0`);
8698	return DAG.getNode(Opcode: getPPCStrictOpcode(Opc: ConvOpc), DL: dl,
8699	VTList: DAG.getVTList(VT1: ConvTy, VT2: MVT::Other), Ops: {Chain, Src}, Flags);
8700	} else
8701	return DAG.getNode(Opcode: ConvOpc, DL: dl, VT: ConvTy, Operand: Src);
8702	}
8703
8704	/// Custom lowers integer to floating point conversions to use
8705	/// the direct move instructions available in ISA 2.07 to avoid the
8706	/// need for load/store combinations.
8707	SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8708	SelectionDAG &DAG,
8709	const SDLoc &dl) const {
8710	assert((Op.getValueType() == MVT::f32 \|\|
8711	Op.getValueType() == MVT::f64) &&
8712	"Invalid floating point type as target of conversion");
8713	assert(Subtarget.hasFPCVT() &&
8714	"Int to FP conversions with direct moves require FPCVT");
8715	SDValue Src = Op.getOperand(i: Op ->isStrictFPOpcode() ? `1` : `0`);
8716	bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8717	bool Signed = Op.getOpcode() == ISD::SINT_TO_FP \|\|
8718	Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8719	unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8720	SDValue Mov = DAG.getNode(Opcode: MovOpc, DL: dl, VT: MVT::f64, Operand: Src);
8721	return convertIntToFP(Op, Src: Mov, DAG, Subtarget);
8722	}
8723
8724	static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8725
8726	EVT VecVT = Vec.getValueType();
8727	assert(VecVT.isVector() && "Expected a vector type.");
8728	assert(VecVT.getSizeInBits() < `128` && "Vector is already full width.");
8729
8730	EVT EltVT = VecVT.getVectorElementType();
8731	unsigned WideNumElts = `128` / EltVT.getSizeInBits();
8732	EVT WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: EltVT, NumElements: WideNumElts);
8733
8734	unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8735	SmallVector<SDValue, `16`> Ops(NumConcat);
8736	Ops [`0`] = Vec;
8737	SDValue UndefVec = DAG.getUNDEF(VT: VecVT);
8738	for (unsigned i = `1`; i < NumConcat; ++i)
8739	Ops [i] = UndefVec;
8740
8741	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: WideVT, Ops);
8742	}
8743
8744	SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8745	const SDLoc &dl) const {
8746	bool IsStrict = Op ->isStrictFPOpcode();
8747	unsigned Opc = Op.getOpcode();
8748	SDValue Src = Op.getOperand(i: IsStrict ? `1` : `0`);
8749	assert((Opc == ISD::UINT_TO_FP \|\| Opc == ISD::SINT_TO_FP \|\|
8750	Opc == ISD::STRICT_UINT_TO_FP \|\| Opc == ISD::STRICT_SINT_TO_FP) &&
8751	"Unexpected conversion type");
8752	assert((Op.getValueType() == MVT::v2f64 \|\| Op.getValueType() == MVT::v4f32) &&
8753	"Supports conversions to v2f64/v4f32 only.");
8754
8755	// TODO: Any other flags to propagate?
8756	SDNodeFlags Flags;
8757	Flags.setNoFPExcept(Op ->getFlags().hasNoFPExcept());
8758
8759	bool SignedConv = Opc == ISD::SINT_TO_FP \|\| Opc == ISD::STRICT_SINT_TO_FP;
8760	bool FourEltRes = Op.getValueType() == MVT::v4f32;
8761
8762	SDValue Wide = widenVec(DAG, Vec: Src, dl);
8763	EVT WideVT = Wide.getValueType();
8764	unsigned WideNumElts = WideVT.getVectorNumElements();
8765	MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8766
8767	SmallVector<int, `16`> ShuffV;
8768	for (unsigned i = `0`; i < WideNumElts; ++i)
8769	ShuffV.push_back(Elt: i + WideNumElts);
8770
8771	int Stride = FourEltRes ? WideNumElts / `4` : WideNumElts / `2`;
8772	int SaveElts = FourEltRes ? `4` : `2`;
8773	if (Subtarget.isLittleEndian())
8774	for (int i = `0`; i < SaveElts; i++)
8775	ShuffV [i * Stride] = i;
8776	else
8777	for (int i = `1`; i <= SaveElts; i++)
8778	ShuffV [i * Stride - `1`] = i - `1`;
8779
8780	SDValue ShuffleSrc2 =
8781	SignedConv ? DAG.getUNDEF(VT: WideVT) : DAG.getConstant(Val: `0`, DL: dl, VT: WideVT);
8782	SDValue Arrange = DAG.getVectorShuffle(VT: WideVT, dl, N1: Wide, N2: ShuffleSrc2, Mask: ShuffV);
8783
8784	SDValue Extend;
8785	if (SignedConv) {
8786	Arrange = DAG.getBitcast(VT: IntermediateVT, V: Arrange);
8787	EVT ExtVT = Src.getValueType();
8788	if (Subtarget.hasP9Altivec())
8789	ExtVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT.getVectorElementType(),
8790	NumElements: IntermediateVT.getVectorNumElements());
8791
8792	Extend = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT: IntermediateVT, N1: Arrange,
8793	N2: DAG.getValueType(ExtVT));
8794	} else
8795	Extend = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: IntermediateVT, Operand: Arrange);
8796
8797	if (IsStrict)
8798	return DAG.getNode(Opcode: Opc, DL: dl, VTList: DAG.getVTList(VT1: Op.getValueType(), VT2: MVT::Other),
8799	Ops: {Op.getOperand(i: `0`), Extend}, Flags);
8800
8801	return DAG.getNode(Opcode: Opc, DL: dl, VT: Op.getValueType(), Operand: Extend);
8802	}
8803
8804	SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8805	SelectionDAG &DAG) const {
8806	SDLoc dl(Op);
8807	bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP \|\|
8808	Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8809	bool IsStrict = Op ->isStrictFPOpcode();
8810	SDValue Src = Op.getOperand(i: IsStrict ? `1` : `0`);
8811	SDValue Chain = IsStrict ? Op.getOperand(i: `0`) : DAG.getEntryNode();
8812
8813	// TODO: Any other flags to propagate?
8814	SDNodeFlags Flags;
8815	Flags.setNoFPExcept(Op ->getFlags().hasNoFPExcept());
8816
8817	EVT InVT = Src.getValueType();
8818	EVT OutVT = Op.getValueType();
8819	if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8820	isOperationCustom(Op: Op.getOpcode(), VT: InVT))
8821	return LowerINT_TO_FPVector(Op, DAG, dl);
8822
8823	// Conversions to f128 are legal.
8824	if (Op.getValueType() == MVT::f128)
8825	return Subtarget.hasP9Vector() ? Op : SDValue ();
8826
8827	// Don't handle ppc_fp128 here; let it be lowered to a libcall.
8828	if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8829	return SDValue ();
8830
8831	if (Src.getValueType() == MVT::i1) {
8832	SDValue Sel = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: Op.getValueType(), N1: Src,
8833	N2: DAG.getConstantFP(Val: `1.0`, DL: dl, VT: Op.getValueType()),
8834	N3: DAG.getConstantFP(Val: `0.0`, DL: dl, VT: Op.getValueType()));
8835	if (IsStrict)
8836	return DAG.getMergeValues(Ops: {Sel, Chain}, dl);
8837	else
8838	return Sel;
8839	}
8840
8841	// If we have direct moves, we can do all the conversion, skip the store/load
8842	// however, without FPCVT we can't do most conversions.
8843	if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8844	Subtarget.isPPC64() && Subtarget.hasFPCVT())
8845	return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8846
8847	assert((IsSigned \|\| Subtarget.hasFPCVT()) &&
8848	"UINT_TO_FP is supported only with FPCVT");
8849
8850	if (Src.getValueType() == MVT::i64) {
8851	SDValue SINT = Src;
8852	// When converting to single-precision, we actually need to convert
8853	// to double-precision first and then round to single-precision.
8854	// To avoid double-rounding effects during that operation, we have
8855	// to prepare the input operand. Bits that might be truncated when
8856	// converting to double-precision are replaced by a bit that won't
8857	// be lost at this stage, but is below the single-precision rounding
8858	// position.
8859	//
8860	// However, if -enable-unsafe-fp-math is in effect, accept double
8861	// rounding to avoid the extra overhead.
8862	if (Op.getValueType() == MVT::f32 &&
8863	!Subtarget.hasFPCVT() &&
8864	!DAG.getTarget().Options.UnsafeFPMath) {
8865
8866	// Twiddle input to make sure the low 11 bits are zero. (If this
8867	// is the case, we are guaranteed the value will fit into the 53 bit
8868	// mantissa of an IEEE double-precision value without rounding.)
8869	// If any of those low 11 bits were not zero originally, make sure
8870	// bit 12 (value 2048) is set instead, so that the final rounding
8871	// to single-precision gets the correct result.
8872	SDValue Round = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i64,
8873	N1: SINT, N2: DAG.getConstant(Val: `2047`, DL: dl, VT: MVT::i64));
8874	Round = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i64,
8875	N1: Round, N2: DAG.getConstant(Val: `2047`, DL: dl, VT: MVT::i64));
8876	Round = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i64, N1: Round, N2: SINT);
8877	Round = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i64, N1: Round,
8878	N2: DAG.getSignedConstant(Val: -`2048`, DL: dl, VT: MVT::i64));
8879
8880	// However, we cannot use that value unconditionally: if the magnitude
8881	// of the input value is small, the bit-twiddling we did above might
8882	// end up visibly changing the output. Fortunately, in that case, we
8883	// don't need to twiddle bits since the original input will convert
8884	// exactly to double-precision floating-point already. Therefore,
8885	// construct a conditional to use the original value if the top 11
8886	// bits are all sign-bit copies, and use the rounded value computed
8887	// above otherwise.
8888	SDValue Cond = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: MVT::i64,
8889	N1: SINT, N2: DAG.getConstant(Val: `53`, DL: dl, VT: MVT::i32));
8890	Cond = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::i64,
8891	N1: Cond, N2: DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i64));
8892	Cond = DAG.getSetCC(
8893	DL: dl,
8894	VT: getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: MVT::i64),
8895	LHS: Cond, RHS: DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i64), Cond: ISD::SETUGT);
8896
8897	SINT = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: MVT::i64, N1: Cond, N2: Round, N3: SINT);
8898	}
8899
8900	ReuseLoadInfo RLI;
8901	SDValue Bits;
8902
8903	MachineFunction &MF = DAG.getMachineFunction();
8904	if (canReuseLoadAddress(Op: SINT, MemVT: MVT::i64, RLI, DAG)) {
8905	Bits = DAG.getLoad(VT: MVT::f64, dl, Chain: RLI.Chain, Ptr: RLI.Ptr, PtrInfo: RLI.MPI,
8906	Alignment: RLI.Alignment, MMOFlags: RLI.MMOFlags(), AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8907	if (RLI.ResChain)
8908	DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Bits.getValue(R: `1`));
8909	} else if (Subtarget.hasLFIWAX() &&
8910	canReuseLoadAddress(Op: SINT, MemVT: MVT::i32, RLI, DAG, ET: ISD::SEXTLOAD)) {
8911	MachineMemOperand *MMO =
8912	MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: `4`,
8913	BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8914	SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8915	Bits = DAG.getMemIntrinsicNode(Opcode: PPCISD::LFIWAX, dl,
8916	VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
8917	Ops, MemVT: MVT::i32, MMO);
8918	if (RLI.ResChain)
8919	DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Bits.getValue(R: `1`));
8920	} else if (Subtarget.hasFPCVT() &&
8921	canReuseLoadAddress(Op: SINT, MemVT: MVT::i32, RLI, DAG, ET: ISD::ZEXTLOAD)) {
8922	MachineMemOperand *MMO =
8923	MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: `4`,
8924	BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8925	SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8926	Bits = DAG.getMemIntrinsicNode(Opcode: PPCISD::LFIWZX, dl,
8927	VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
8928	Ops, MemVT: MVT::i32, MMO);
8929	if (RLI.ResChain)
8930	DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Bits.getValue(R: `1`));
8931	} else if (((Subtarget.hasLFIWAX() &&
8932	SINT.getOpcode() == ISD::SIGN_EXTEND) \|\|
8933	(Subtarget.hasFPCVT() &&
8934	SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8935	SINT.getOperand(i: `0`).getValueType() == MVT::i32) {
8936	MachineFrameInfo &MFI = MF.getFrameInfo();
8937	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
8938
8939	int FrameIdx = MFI.CreateStackObject(Size: `4`, Alignment: Align (`4`), isSpillSlot: false);
8940	SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
8941
8942	SDValue Store = DAG.getStore(Chain, dl, Val: SINT.getOperand(i: `0`), Ptr: FIdx,
8943	PtrInfo: MachinePointerInfo::getFixedStack(
8944	MF&: DAG.getMachineFunction(), FI: FrameIdx));
8945	Chain = Store;
8946
8947	assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8948	"Expected an i32 store");
8949
8950	RLI.Ptr = FIdx;
8951	RLI.Chain = Chain;
8952	RLI.MPI =
8953	MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx);
8954	RLI.Alignment = Align (`4`);
8955
8956	MachineMemOperand *MMO =
8957	MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: `4`,
8958	BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8959	SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8960	Bits = DAG.getMemIntrinsicNode(Opcode: SINT.getOpcode() == ISD::ZERO_EXTEND ?
8961	PPCISD::LFIWZX : PPCISD::LFIWAX,
8962	dl, VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
8963	Ops, MemVT: MVT::i32, MMO);
8964	Chain = Bits.getValue(R: `1`);
8965	} else
8966	Bits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::f64, Operand: SINT);
8967
8968	SDValue FP = convertIntToFP(Op, Src: Bits, DAG, Subtarget, Chain);
8969	if (IsStrict)
8970	Chain = FP.getValue(R: `1`);
8971
8972	if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8973	if (IsStrict)
8974	FP = DAG.getNode(
8975	Opcode: ISD::STRICT_FP_ROUND, DL: dl, VTList: DAG.getVTList(VT1: MVT::f32, VT2: MVT::Other),
8976	Ops: {Chain, FP, DAG.getIntPtrConstant(Val: `0`, DL: dl, /isTarget=/true)},
8977	Flags);
8978	else
8979	FP = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: MVT::f32, N1: FP,
8980	N2: DAG.getIntPtrConstant(Val: `0`, DL: dl, /isTarget=/true));
8981	}
8982	return FP;
8983	}
8984
8985	assert(Src.getValueType() == MVT::i32 &&
8986	"Unhandled INT_TO_FP type in custom expander!");
8987	// Since we only generate this in 64-bit mode, we can take advantage of
8988	// 64-bit registers. In particular, sign extend the input value into the
8989	// 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8990	// then lfd it and fcfid it.
8991	MachineFunction &MF = DAG.getMachineFunction();
8992	MachineFrameInfo &MFI = MF.getFrameInfo();
8993	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
8994
8995	SDValue Ld;
8996	if (Subtarget.hasLFIWAX() \|\| Subtarget.hasFPCVT()) {
8997	ReuseLoadInfo RLI;
8998	bool ReusingLoad;
8999	if (!(ReusingLoad = canReuseLoadAddress(Op: Src, MemVT: MVT::i32, RLI, DAG))) {
9000	int FrameIdx = MFI.CreateStackObject(Size: `4`, Alignment: Align (`4`), isSpillSlot: false);
9001	SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
9002
9003	SDValue Store = DAG.getStore(Chain, dl, Val: Src, Ptr: FIdx,
9004	PtrInfo: MachinePointerInfo::getFixedStack(
9005	MF&: DAG.getMachineFunction(), FI: FrameIdx));
9006	Chain = Store;
9007
9008	assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
9009	"Expected an i32 store");
9010
9011	RLI.Ptr = FIdx;
9012	RLI.Chain = Chain;
9013	RLI.MPI =
9014	MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx);
9015	RLI.Alignment = Align (`4`);
9016	}
9017
9018	MachineMemOperand *MMO =
9019	MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: `4`,
9020	BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
9021	SDValue Ops[] = { RLI.Chain, RLI.Ptr };
9022	Ld = DAG.getMemIntrinsicNode(Opcode: IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
9023	VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other), Ops,
9024	MemVT: MVT::i32, MMO);
9025	Chain = Ld.getValue(R: `1`);
9026	if (ReusingLoad && RLI.ResChain) {
9027	DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Ld.getValue(R: `1`));
9028	}
9029	} else {
9030	assert(Subtarget.isPPC64() &&
9031	"i32->FP without LFIWAX supported only on PPC64");
9032
9033	int FrameIdx = MFI.CreateStackObject(Size: `8`, Alignment: Align (`8`), isSpillSlot: false);
9034	SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
9035
9036	SDValue Ext64 = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MVT::i64, Operand: Src);
9037
9038	// STD the extended value into the stack slot.
9039	SDValue Store = DAG.getStore(
9040	Chain, dl, Val: Ext64, Ptr: FIdx,
9041	PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx));
9042	Chain = Store;
9043
9044	// Load the value as a double.
9045	Ld = DAG.getLoad(
9046	VT: MVT::f64, dl, Chain, Ptr: FIdx,
9047	PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx));
9048	Chain = Ld.getValue(R: `1`);
9049	}
9050
9051	// FCFID it and return it.
9052	SDValue FP = convertIntToFP(Op, Src: Ld, DAG, Subtarget, Chain);
9053	if (IsStrict)
9054	Chain = FP.getValue(R: `1`);
9055	if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9056	if (IsStrict)
9057	FP = DAG.getNode(
9058	Opcode: ISD::STRICT_FP_ROUND, DL: dl, VTList: DAG.getVTList(VT1: MVT::f32, VT2: MVT::Other),
9059	Ops: {Chain, FP, DAG.getIntPtrConstant(Val: `0`, DL: dl, /isTarget=/true)}, Flags);
9060	else
9061	FP = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: MVT::f32, N1: FP,
9062	N2: DAG.getIntPtrConstant(Val: `0`, DL: dl, /isTarget=/true));
9063	}
9064	return FP;
9065	}
9066
9067	SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
9068	SelectionDAG &DAG) const {
9069	SDLoc Dl(Op);
9070	MachineFunction &MF = DAG.getMachineFunction();
9071	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
9072	SDValue Chain = Op.getOperand(i: `0`);
9073
9074	// If requested mode is constant, just use simpler mtfsb/mffscrni
9075	if (auto *CVal = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `1`))) {
9076	uint64_t Mode = CVal->getZExtValue();
9077	assert(Mode < `4` && "Unsupported rounding mode!");
9078	unsigned InternalRnd = Mode ^ (~(Mode >> `1`) & `1`);
9079	if (Subtarget.isISA3_0())
9080	return SDValue (
9081	DAG.getMachineNode(
9082	Opcode: PPC::MFFSCRNI, dl: Dl, ResultTys: {MVT::f64, MVT::Other},
9083	Ops: {DAG.getConstant(Val: InternalRnd, DL: Dl, VT: MVT::i32, isTarget: true), Chain}),
9084	`1`);
9085	SDNode *SetHi = DAG.getMachineNode(
9086	Opcode: (InternalRnd & `2`) ? PPC::MTFSB1 : PPC::MTFSB0, dl: Dl, VT: MVT::Other,
9087	Ops: {DAG.getConstant(Val: `30`, DL: Dl, VT: MVT::i32, isTarget: true), Chain});
9088	SDNode *SetLo = DAG.getMachineNode(
9089	Opcode: (InternalRnd & `1`) ? PPC::MTFSB1 : PPC::MTFSB0, dl: Dl, VT: MVT::Other,
9090	Ops: {DAG.getConstant(Val: `31`, DL: Dl, VT: MVT::i32, isTarget: true), SDValue (SetHi, `0`)});
9091	return SDValue (SetLo, `0`);
9092	}
9093
9094	// Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9095	SDValue One = DAG.getConstant(Val: `1`, DL: Dl, VT: MVT::i32);
9096	SDValue SrcFlag = DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i32, N1: Op.getOperand(i: `1`),
9097	N2: DAG.getConstant(Val: `3`, DL: Dl, VT: MVT::i32));
9098	SDValue DstFlag = DAG.getNode(
9099	Opcode: ISD::XOR, DL: Dl, VT: MVT::i32, N1: SrcFlag,
9100	N2: DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i32,
9101	N1: DAG.getNOT(DL: Dl,
9102	Val: DAG.getNode(Opcode: ISD::SRL, DL: Dl, VT: MVT::i32, N1: SrcFlag, N2: One),
9103	VT: MVT::i32),
9104	N2: One));
9105	// For Power9, there's faster mffscrn, and we don't need to read FPSCR
9106	SDValue MFFS;
9107	if (!Subtarget.isISA3_0()) {
9108	MFFS = DAG.getNode(Opcode: PPCISD::MFFS, DL: Dl, ResultTys: {MVT::f64, MVT::Other}, Ops: Chain);
9109	Chain = MFFS.getValue(R: `1`);
9110	}
9111	SDValue NewFPSCR;
9112	if (Subtarget.isPPC64()) {
9113	if (Subtarget.isISA3_0()) {
9114	NewFPSCR = DAG.getAnyExtOrTrunc(Op: DstFlag, DL: Dl, VT: MVT::i64);
9115	} else {
9116	// Set the last two bits (rounding mode) of bitcasted FPSCR.
9117	SDNode *InsertRN = DAG.getMachineNode(
9118	Opcode: PPC::RLDIMI, dl: Dl, VT: MVT::i64,
9119	Ops: {DAG.getNode(Opcode: ISD::BITCAST, DL: Dl, VT: MVT::i64, Operand: MFFS),
9120	DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: Dl, VT: MVT::i64, Operand: DstFlag),
9121	DAG.getTargetConstant(Val: `0`, DL: Dl, VT: MVT::i32),
9122	DAG.getTargetConstant(Val: `62`, DL: Dl, VT: MVT::i32)});
9123	NewFPSCR = SDValue (InsertRN, `0`);
9124	}
9125	NewFPSCR = DAG.getNode(Opcode: ISD::BITCAST, DL: Dl, VT: MVT::f64, Operand: NewFPSCR);
9126	} else {
9127	// In 32-bit mode, store f64, load and update the lower half.
9128	int SSFI = MF.getFrameInfo().CreateStackObject(Size: `8`, Alignment: Align (`8`), isSpillSlot: false);
9129	SDValue StackSlot = DAG.getFrameIndex(FI: SSFI, VT: PtrVT);
9130	SDValue Addr = Subtarget.isLittleEndian()
9131	? StackSlot
9132	: DAG.getNode(Opcode: ISD::ADD, DL: Dl, VT: PtrVT, N1: StackSlot,
9133	N2: DAG.getConstant(Val: `4`, DL: Dl, VT: PtrVT));
9134	if (Subtarget.isISA3_0()) {
9135	Chain = DAG.getStore(Chain, dl: Dl, Val: DstFlag, Ptr: Addr, PtrInfo: MachinePointerInfo ());
9136	} else {
9137	Chain = DAG.getStore(Chain, dl: Dl, Val: MFFS, Ptr: StackSlot, PtrInfo: MachinePointerInfo ());
9138	SDValue Tmp =
9139	DAG.getLoad(VT: MVT::i32, dl: Dl, Chain, Ptr: Addr, PtrInfo: MachinePointerInfo ());
9140	Chain = Tmp.getValue(R: `1`);
9141	Tmp = SDValue (DAG.getMachineNode(
9142	Opcode: PPC::RLWIMI, dl: Dl, VT: MVT::i32,
9143	Ops: {Tmp, DstFlag, DAG.getTargetConstant(Val: `0`, DL: Dl, VT: MVT::i32),
9144	DAG.getTargetConstant(Val: `30`, DL: Dl, VT: MVT::i32),
9145	DAG.getTargetConstant(Val: `31`, DL: Dl, VT: MVT::i32)}),
9146	`0`);
9147	Chain = DAG.getStore(Chain, dl: Dl, Val: Tmp, Ptr: Addr, PtrInfo: MachinePointerInfo ());
9148	}
9149	NewFPSCR =
9150	DAG.getLoad(VT: MVT::f64, dl: Dl, Chain, Ptr: StackSlot, PtrInfo: MachinePointerInfo ());
9151	Chain = NewFPSCR.getValue(R: `1`);
9152	}
9153	if (Subtarget.isISA3_0())
9154	return SDValue (DAG.getMachineNode(Opcode: PPC::MFFSCRN, dl: Dl, ResultTys: {MVT::f64, MVT::Other},
9155	Ops: {NewFPSCR, Chain}),
9156	`1`);
9157	SDValue Zero = DAG.getConstant(Val: `0`, DL: Dl, VT: MVT::i32, isTarget: true);
9158	SDNode *MTFSF = DAG.getMachineNode(
9159	Opcode: PPC::MTFSF, dl: Dl, VT: MVT::Other,
9160	Ops: {DAG.getConstant(Val: `255`, DL: Dl, VT: MVT::i32, isTarget: true), NewFPSCR, Zero, Zero, Chain});
9161	return SDValue (MTFSF, `0`);
9162	}
9163
9164	SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9165	SelectionDAG &DAG) const {
9166	SDLoc dl(Op);
9167	/*
9168	The rounding mode is in bits 30:31 of FPSR, and has the following
9169	settings:
9170	00 Round to nearest
9171	01 Round to 0
9172	10 Round to +inf
9173	11 Round to -inf
9174
9175	GET_ROUNDING, on the other hand, expects the following:
9176	-1 Undefined
9177	0 Round to 0
9178	1 Round to nearest
9179	2 Round to +inf
9180	3 Round to -inf
9181
9182	To perform the conversion, we do:
9183	((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9184	*/
9185
9186	MachineFunction &MF = DAG.getMachineFunction();
9187	EVT VT = Op.getValueType();
9188	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
9189
9190	// Save FP Control Word to register
9191	SDValue Chain = Op.getOperand(i: `0`);
9192	SDValue MFFS = DAG.getNode(Opcode: PPCISD::MFFS, DL: dl, ResultTys: {MVT::f64, MVT::Other}, Ops: Chain);
9193	Chain = MFFS.getValue(R: `1`);
9194
9195	SDValue CWD;
9196	if (isTypeLegal(VT: MVT::i64)) {
9197	CWD = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32,
9198	Operand: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i64, Operand: MFFS));
9199	} else {
9200	// Save FP register to stack slot
9201	int SSFI = MF.getFrameInfo().CreateStackObject(Size: `8`, Alignment: Align (`8`), isSpillSlot: false);
9202	SDValue StackSlot = DAG.getFrameIndex(FI: SSFI, VT: PtrVT);
9203	Chain = DAG.getStore(Chain, dl, Val: MFFS, Ptr: StackSlot, PtrInfo: MachinePointerInfo ());
9204
9205	// Load FP Control Word from low 32 bits of stack slot.
9206	assert(hasBigEndianPartOrdering(MVT::i64, MF.getDataLayout()) &&
9207	"Stack slot adjustment is valid only on big endian subtargets!");
9208	SDValue Four = DAG.getConstant(Val: `4`, DL: dl, VT: PtrVT);
9209	SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackSlot, N2: Four);
9210	CWD = DAG.getLoad(VT: MVT::i32, dl, Chain, Ptr: Addr, PtrInfo: MachinePointerInfo ());
9211	Chain = CWD.getValue(R: `1`);
9212	}
9213
9214	// Transform as necessary
9215	SDValue CWD1 =
9216	DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32,
9217	N1: CWD, N2: DAG.getConstant(Val: `3`, DL: dl, VT: MVT::i32));
9218	SDValue CWD2 =
9219	DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i32,
9220	N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32,
9221	N1: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i32,
9222	N1: CWD, N2: DAG.getConstant(Val: `3`, DL: dl, VT: MVT::i32)),
9223	N2: DAG.getConstant(Val: `3`, DL: dl, VT: MVT::i32)),
9224	N2: DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i32));
9225
9226	SDValue RetVal =
9227	DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i32, N1: CWD1, N2: CWD2);
9228
9229	RetVal =
9230	DAG.getNode(Opcode: (VT.getSizeInBits() < `16` ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
9231	DL: dl, VT, Operand: RetVal);
9232
9233	return DAG.getMergeValues(Ops: {RetVal, Chain}, dl);
9234	}
9235
9236	SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9237	EVT VT = Op.getValueType();
9238	uint64_t BitWidth = VT.getSizeInBits();
9239	SDLoc dl(Op);
9240	assert(Op.getNumOperands() == `3` &&
9241	VT == Op.getOperand(`1`).getValueType() &&
9242	"Unexpected SHL!");
9243
9244	// Expand into a bunch of logical ops. Note that these ops
9245	// depend on the PPC behavior for oversized shift amounts.
9246	SDValue Lo = Op.getOperand(i: `0`);
9247	SDValue Hi = Op.getOperand(i: `1`);
9248	SDValue Amt = Op.getOperand(i: `2`);
9249	EVT AmtVT = Amt.getValueType();
9250
9251	SDValue Tmp1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT,
9252	N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Amt);
9253	SDValue Tmp2 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Hi, N2: Amt);
9254	SDValue Tmp3 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Lo, N2: Tmp1);
9255	SDValue Tmp4 = DAG.getNode(Opcode: ISD::OR , DL: dl, VT, N1: Tmp2, N2: Tmp3);
9256	SDValue Tmp5 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AmtVT, N1: Amt,
9257	N2: DAG.getSignedConstant(Val: -BitWidth, DL: dl, VT: AmtVT));
9258	SDValue Tmp6 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Lo, N2: Tmp5);
9259	SDValue OutHi = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp6);
9260	SDValue OutLo = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Lo, N2: Amt);
9261	SDValue OutOps[] = { OutLo, OutHi };
9262	return DAG.getMergeValues(Ops: OutOps, dl);
9263	}
9264
9265	SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9266	EVT VT = Op.getValueType();
9267	SDLoc dl(Op);
9268	uint64_t BitWidth = VT.getSizeInBits();
9269	assert(Op.getNumOperands() == `3` &&
9270	VT == Op.getOperand(`1`).getValueType() &&
9271	"Unexpected SRL!");
9272
9273	// Expand into a bunch of logical ops. Note that these ops
9274	// depend on the PPC behavior for oversized shift amounts.
9275	SDValue Lo = Op.getOperand(i: `0`);
9276	SDValue Hi = Op.getOperand(i: `1`);
9277	SDValue Amt = Op.getOperand(i: `2`);
9278	EVT AmtVT = Amt.getValueType();
9279
9280	SDValue Tmp1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT,
9281	N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Amt);
9282	SDValue Tmp2 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Lo, N2: Amt);
9283	SDValue Tmp3 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Hi, N2: Tmp1);
9284	SDValue Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9285	SDValue Tmp5 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AmtVT, N1: Amt,
9286	N2: DAG.getSignedConstant(Val: -BitWidth, DL: dl, VT: AmtVT));
9287	SDValue Tmp6 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Hi, N2: Tmp5);
9288	SDValue OutLo = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp6);
9289	SDValue OutHi = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Hi, N2: Amt);
9290	SDValue OutOps[] = { OutLo, OutHi };
9291	return DAG.getMergeValues(Ops: OutOps, dl);
9292	}
9293
9294	SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9295	SDLoc dl(Op);
9296	EVT VT = Op.getValueType();
9297	uint64_t BitWidth = VT.getSizeInBits();
9298	assert(Op.getNumOperands() == `3` &&
9299	VT == Op.getOperand(`1`).getValueType() &&
9300	"Unexpected SRA!");
9301
9302	// Expand into a bunch of logical ops, followed by a select_cc.
9303	SDValue Lo = Op.getOperand(i: `0`);
9304	SDValue Hi = Op.getOperand(i: `1`);
9305	SDValue Amt = Op.getOperand(i: `2`);
9306	EVT AmtVT = Amt.getValueType();
9307
9308	SDValue Tmp1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT,
9309	N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Amt);
9310	SDValue Tmp2 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Lo, N2: Amt);
9311	SDValue Tmp3 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Hi, N2: Tmp1);
9312	SDValue Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9313	SDValue Tmp5 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AmtVT, N1: Amt,
9314	N2: DAG.getSignedConstant(Val: -BitWidth, DL: dl, VT: AmtVT));
9315	SDValue Tmp6 = DAG.getNode(Opcode: PPCISD::SRA, DL: dl, VT, N1: Hi, N2: Tmp5);
9316	SDValue OutHi = DAG.getNode(Opcode: PPCISD::SRA, DL: dl, VT, N1: Hi, N2: Amt);
9317	SDValue OutLo = DAG.getSelectCC(DL: dl, LHS: Tmp5, RHS: DAG.getConstant(Val: `0`, DL: dl, VT: AmtVT),
9318	True: Tmp4, False: Tmp6, Cond: ISD::SETLE);
9319	SDValue OutOps[] = { OutLo, OutHi };
9320	return DAG.getMergeValues(Ops: OutOps, dl);
9321	}
9322
9323	SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9324	SelectionDAG &DAG) const {
9325	SDLoc dl(Op);
9326	EVT VT = Op.getValueType();
9327	unsigned BitWidth = VT.getSizeInBits();
9328
9329	bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9330	SDValue X = Op.getOperand(i: `0`);
9331	SDValue Y = Op.getOperand(i: `1`);
9332	SDValue Z = Op.getOperand(i: `2`);
9333	EVT AmtVT = Z.getValueType();
9334
9335	// fshl: (X << (Z % BW)) \| (Y >> (BW - (Z % BW)))
9336	// fshr: (X << (BW - (Z % BW))) \| (Y >> (Z % BW))
9337	// This is simpler than TargetLowering::expandFunnelShift because we can rely
9338	// on PowerPC shift by BW being well defined.
9339	Z = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: AmtVT, N1: Z,
9340	N2: DAG.getConstant(Val: BitWidth - `1`, DL: dl, VT: AmtVT));
9341	SDValue SubZ =
9342	DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT, N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Z);
9343	X = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: X, N2: IsFSHL ? Z : SubZ);
9344	Y = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Y, N2: IsFSHL ? SubZ : Z);
9345	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: X, N2: Y);
9346	}
9347
9348	//===----------------------------------------------------------------------===//
9349	// Vector related lowering.
9350	//
9351
9352	/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9353	/// element size of SplatSize. Cast the result to VT.
9354	static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9355	SelectionDAG &DAG, const SDLoc &dl) {
9356	static const MVT VTys[] = { // canonical VT to use for each size.
9357	MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9358	};
9359
9360	EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-`1`];
9361
9362	// For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9363	if (Val == ((`1LLU` << (SplatSize * `8`)) - `1`)) {
9364	SplatSize = `1`;
9365	Val = `0xFF`;
9366	}
9367
9368	EVT CanonicalVT = VTys[SplatSize-`1`];
9369
9370	// Build a canonical splat for this value.
9371	// Explicitly truncate APInt here, as this API is used with a mix of
9372	// signed and unsigned values.
9373	return DAG.getBitcast(
9374	VT: ReqVT,
9375	V: DAG.getConstant(Val: APInt (`64`, Val).trunc(width: SplatSize * `8`), DL: dl, VT: CanonicalVT));
9376	}
9377
9378	/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9379	/// specified intrinsic ID.
9380	static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
9381	const SDLoc &dl, EVT DestVT = MVT::Other) {
9382	if (DestVT == MVT::Other) DestVT = Op.getValueType();
9383	return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: DestVT,
9384	N1: DAG.getConstant(Val: IID, DL: dl, VT: MVT::i32), N2: Op);
9385	}
9386
9387	/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9388	/// specified intrinsic ID.
9389	static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9390	SelectionDAG &DAG, const SDLoc &dl,
9391	EVT DestVT = MVT::Other) {
9392	if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9393	return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: DestVT,
9394	N1: DAG.getConstant(Val: IID, DL: dl, VT: MVT::i32), N2: LHS, N3: RHS);
9395	}
9396
9397	/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9398	/// specified intrinsic ID.
9399	static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9400	SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9401	EVT DestVT = MVT::Other) {
9402	if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9403	return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: DestVT,
9404	N1: DAG.getConstant(Val: IID, DL: dl, VT: MVT::i32), N2: Op0, N3: Op1, N4: Op2);
9405	}
9406
9407	/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9408	/// amount. The result has the specified value type.
9409	static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9410	SelectionDAG &DAG, const SDLoc &dl) {
9411	// Force LHS/RHS to be the right type.
9412	LHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: LHS);
9413	RHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: RHS);
9414
9415	int Ops[`16`];
9416	for (unsigned i = `0`; i != `16`; ++i)
9417	Ops[i] = i + Amt;
9418	SDValue T = DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: LHS, N2: RHS, Mask: Ops);
9419	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: T);
9420	}
9421
9422	/// Do we have an efficient pattern in a .td file for this node?
9423	///
9424	/// \param V - pointer to the BuildVectorSDNode being matched
9425	/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9426	///
9427	/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9428	/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9429	/// the opposite is true (expansion is beneficial) are:
9430	/// - The node builds a vector out of integers that are not 32 or 64-bits
9431	/// - The node builds a vector out of constants
9432	/// - The node is a "load-and-splat"
9433	/// In all other cases, we will choose to keep the BUILD_VECTOR.
9434	static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
9435	bool HasDirectMove,
9436	bool HasP8Vector) {
9437	EVT VecVT = V->getValueType(ResNo: `0`);
9438	bool RightType = VecVT == MVT::v2f64 \|\|
9439	(HasP8Vector && VecVT == MVT::v4f32) \|\|
9440	(HasDirectMove && (VecVT == MVT::v2i64 \|\| VecVT == MVT::v4i32));
9441	if (!RightType)
9442	return false;
9443
9444	bool IsSplat = true;
9445	bool IsLoad = false;
9446	SDValue Op0 = V->getOperand(Num: `0`);
9447
9448	// This function is called in a block that confirms the node is not a constant
9449	// splat. So a constant BUILD_VECTOR here means the vector is built out of
9450	// different constants.
9451	if (V->isConstant())
9452	return false;
9453	for (int i = `0`, e = V->getNumOperands(); i < e; ++i) {
9454	if (V->getOperand(Num: i).isUndef())
9455	return false;
9456	// We want to expand nodes that represent load-and-splat even if the
9457	// loaded value is a floating point truncation or conversion to int.
9458	if (V->getOperand(Num: i).getOpcode() == ISD::LOAD \|\|
9459	(V->getOperand(Num: i).getOpcode() == ISD::FP_ROUND &&
9460	V->getOperand(Num: i).getOperand(i: `0`).getOpcode() == ISD::LOAD) \|\|
9461	(V->getOperand(Num: i).getOpcode() == ISD::FP_TO_SINT &&
9462	V->getOperand(Num: i).getOperand(i: `0`).getOpcode() == ISD::LOAD) \|\|
9463	(V->getOperand(Num: i).getOpcode() == ISD::FP_TO_UINT &&
9464	V->getOperand(Num: i).getOperand(i: `0`).getOpcode() == ISD::LOAD))
9465	IsLoad = true;
9466	// If the operands are different or the input is not a load and has more
9467	// uses than just this BV node, then it isn't a splat.
9468	if (V->getOperand(Num: i) != Op0 \|\|
9469	(!IsLoad && !V->isOnlyUserOf(N: V->getOperand(Num: i).getNode())))
9470	IsSplat = false;
9471	}
9472	return !(IsSplat && IsLoad);
9473	}
9474
9475	// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9476	SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9477
9478	SDLoc dl(Op);
9479	SDValue Op0 = Op ->getOperand(Num: `0`);
9480
9481	if (!Subtarget.isPPC64() \|\| (Op0.getOpcode() != ISD::BUILD_PAIR) \|\|
9482	(Op.getValueType() != MVT::f128))
9483	return SDValue ();
9484
9485	SDValue Lo = Op0.getOperand(i: `0`);
9486	SDValue Hi = Op0.getOperand(i: `1`);
9487	if ((Lo.getValueType() != MVT::i64) \|\| (Hi.getValueType() != MVT::i64))
9488	return SDValue ();
9489
9490	if (!Subtarget.isLittleEndian())
9491	std::swap(a&: Lo, b&: Hi);
9492
9493	return DAG.getNode(Opcode: PPCISD::BUILD_FP128, DL: dl, VT: MVT::f128, N1: Lo, N2: Hi);
9494	}
9495
9496	static const SDValue getNormalLoadInput(const* SDValue &Op, bool &IsPermuted) {
9497	const SDValue *InputLoad = &Op;
9498	while (InputLoad->getOpcode() == ISD::BITCAST)
9499	InputLoad = &InputLoad->getOperand(i: `0`);
9500	if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR \|\|
9501	InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9502	IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9503	InputLoad = &InputLoad->getOperand(i: `0`);
9504	}
9505	if (InputLoad->getOpcode() != ISD::LOAD)
9506	return nullptr;
9507	LoadSDNode LD = cast<LoadSDNode>(Val: InputLoad);
9508	return ISD::isNormalLoad(N: LD) ? InputLoad : nullptr;
9509	}
9510
9511	// Convert the argument APFloat to a single precision APFloat if there is no
9512	// loss in information during the conversion to single precision APFloat and the
9513	// resulting number is not a denormal number. Return true if successful.
9514	bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
9515	APFloat APFloatToConvert = ArgAPFloat;
9516	bool LosesInfo = true;
9517	APFloatToConvert.convert(ToSemantics: APFloat::IEEEsingle(), RM: APFloat::rmNearestTiesToEven,
9518	losesInfo: &LosesInfo);
9519	bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9520	if (Success)
9521	ArgAPFloat = APFloatToConvert;
9522	return Success;
9523	}
9524
9525	// Bitcast the argument APInt to a double and convert it to a single precision
9526	// APFloat, bitcast the APFloat to an APInt and assign it to the original
9527	// argument if there is no loss in information during the conversion from
9528	// double to single precision APFloat and the resulting number is not a denormal
9529	// number. Return true if successful.
9530	bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
9531	double DpValue = ArgAPInt.bitsToDouble();
9532	APFloat APFloatDp(DpValue);
9533	bool Success = convertToNonDenormSingle(ArgAPFloat&: APFloatDp);
9534	if (Success)
9535	ArgAPInt = APFloatDp.bitcastToAPInt();
9536	return Success;
9537	}
9538
9539	// Nondestructive check for convertTonNonDenormSingle.
9540	bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) {
9541	// Only convert if it loses info, since XXSPLTIDP should
9542	// handle the other case.
9543	APFloat APFloatToConvert = ArgAPFloat;
9544	bool LosesInfo = true;
9545	APFloatToConvert.convert(ToSemantics: APFloat::IEEEsingle(), RM: APFloat::rmNearestTiesToEven,
9546	losesInfo: &LosesInfo);
9547
9548	return (!LosesInfo && !APFloatToConvert.isDenormal());
9549	}
9550
9551	static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9552	unsigned &Opcode) {
9553	LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Val: Op.getOperand(i: `0`));
9554	if (!InputNode \|\| !Subtarget.hasVSX() \|\| !ISD::isUNINDEXEDLoad(N: InputNode))
9555	return false;
9556
9557	EVT Ty = Op ->getValueType(ResNo: `0`);
9558	// For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9559	// as we cannot handle extending loads for these types.
9560	if ((Ty == MVT::v2f64 \|\| Ty == MVT::v4f32 \|\| Ty == MVT::v4i32) &&
9561	ISD::isNON_EXTLoad(N: InputNode))
9562	return true;
9563
9564	EVT MemVT = InputNode->getMemoryVT();
9565	// For v8i16 and v16i8 types, extending loads can be handled as long as the
9566	// memory VT is the same vector element VT type.
9567	// The loads feeding into the v8i16 and v16i8 types will be extending because
9568	// scalar i8/i16 are not legal types.
9569	if ((Ty == MVT::v8i16 \|\| Ty == MVT::v16i8) && ISD::isEXTLoad(N: InputNode) &&
9570	(MemVT == Ty.getVectorElementType()))
9571	return true;
9572
9573	if (Ty == MVT::v2i64) {
9574	// Check the extend type, when the input type is i32, and the output vector
9575	// type is v2i64.
9576	if (MemVT == MVT::i32) {
9577	if (ISD::isZEXTLoad(N: InputNode))
9578	Opcode = PPCISD::ZEXT_LD_SPLAT;
9579	if (ISD::isSEXTLoad(N: InputNode))
9580	Opcode = PPCISD::SEXT_LD_SPLAT;
9581	}
9582	return true;
9583	}
9584	return false;
9585	}
9586
9587	// If this is a case we can't handle, return null and let the default
9588	// expansion code take care of it. If we CAN select this case, and if it
9589	// selects to a single instruction, return Op. Otherwise, if we can codegen
9590	// this case more efficiently than a constant pool load, lower it to the
9591	// sequence of ops that should be used.
9592	SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9593	SelectionDAG &DAG) const {
9594	SDLoc dl(Op);
9595	BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Val: Op.getNode());
9596	assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9597
9598	// Check if this is a splat of a constant value.
9599	APInt APSplatBits, APSplatUndef;
9600	unsigned SplatBitSize;
9601	bool HasAnyUndefs;
9602	bool BVNIsConstantSplat =
9603	BVN->isConstantSplat(SplatValue&: APSplatBits, SplatUndef&: APSplatUndef, SplatBitSize,
9604	HasAnyUndefs, MinSplatBits: `0`, isBigEndian: !Subtarget.isLittleEndian());
9605
9606	// If it is a splat of a double, check if we can shrink it to a 32 bit
9607	// non-denormal float which when converted back to double gives us the same
9608	// double. This is to exploit the XXSPLTIDP instruction.
9609	// If we lose precision, we use XXSPLTI32DX.
9610	if (BVNIsConstantSplat && (SplatBitSize == `64`) &&
9611	Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9612	// Check the type first to short-circuit so we don't modify APSplatBits if
9613	// this block isn't executed.
9614	if ((Op ->getValueType(ResNo: `0`) == MVT::v2f64) &&
9615	convertToNonDenormSingle(ArgAPInt&: APSplatBits)) {
9616	SDValue SplatNode = DAG.getNode(
9617	Opcode: PPCISD::XXSPLTI_SP_TO_DP, DL: dl, VT: MVT::v2f64,
9618	Operand: DAG.getTargetConstant(Val: APSplatBits.getZExtValue(), DL: dl, VT: MVT::i32));
9619	return DAG.getBitcast(VT: Op.getValueType(), V: SplatNode);
9620	} else {
9621	// We may lose precision, so we have to use XXSPLTI32DX.
9622
9623	uint32_t Hi = Hi_32(Value: APSplatBits.getZExtValue());
9624	uint32_t Lo = Lo_32(Value: APSplatBits.getZExtValue());
9625	SDValue SplatNode = DAG.getUNDEF(VT: MVT::v2i64);
9626
9627	if (!Hi \|\| !Lo)
9628	// If either load is 0, then we should generate XXLXOR to set to 0.
9629	SplatNode = DAG.getTargetConstant(Val: `0`, DL: dl, VT: MVT::v2i64);
9630
9631	if (Hi)
9632	SplatNode = DAG.getNode(
9633	Opcode: PPCISD::XXSPLTI32DX, DL: dl, VT: MVT::v2i64, N1: SplatNode,
9634	N2: DAG.getTargetConstant(Val: `0`, DL: dl, VT: MVT::i32),
9635	N3: DAG.getTargetConstant(Val: Hi, DL: dl, VT: MVT::i32));
9636
9637	if (Lo)
9638	SplatNode =
9639	DAG.getNode(Opcode: PPCISD::XXSPLTI32DX, DL: dl, VT: MVT::v2i64, N1: SplatNode,
9640	N2: DAG.getTargetConstant(Val: `1`, DL: dl, VT: MVT::i32),
9641	N3: DAG.getTargetConstant(Val: Lo, DL: dl, VT: MVT::i32));
9642
9643	return DAG.getBitcast(VT: Op.getValueType(), V: SplatNode);
9644	}
9645	}
9646
9647	bool IsSplat64 = false;
9648	uint64_t SplatBits = `0`;
9649	int32_t SextVal = `0`;
9650	if (BVNIsConstantSplat && SplatBitSize <= `64`) {
9651	SplatBits = APSplatBits.getZExtValue();
9652	if (SplatBitSize <= `32`) {
9653	SextVal = SignExtend32(X: SplatBits, B: SplatBitSize);
9654	} else if (SplatBitSize == `64` && Subtarget.hasP8Altivec()) {
9655	int64_t Splat64Val = static_cast<int64_t>(SplatBits);
9656	bool P9Vector = Subtarget.hasP9Vector();
9657	int32_t Hi = P9Vector ? `127` : `15`;
9658	int32_t Lo = P9Vector ? -`128` : -`16`;
9659	IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
9660	SextVal = static_cast<int32_t>(SplatBits);
9661	}
9662	}
9663
9664	if (!BVNIsConstantSplat \|\| (SplatBitSize > `32` && !IsSplat64)) {
9665	unsigned NewOpcode = PPCISD::LD_SPLAT;
9666
9667	// Handle load-and-splat patterns as we have instructions that will do this
9668	// in one go.
9669	if (DAG.isSplatValue(V: Op, AllowUndefs: true) &&
9670	isValidSplatLoad(Subtarget, Op, Opcode&: NewOpcode)) {
9671	const SDValue *InputLoad = &Op.getOperand(i: `0`);
9672	LoadSDNode LD = cast<LoadSDNode>(Val: InputLoad);
9673
9674	// If the input load is an extending load, it will be an i32 -> i64
9675	// extending load and isValidSplatLoad() will update NewOpcode.
9676	unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9677	unsigned ElementSize =
9678	MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? `1` : `2`);
9679
9680	assert(((ElementSize == `2` * MemorySize)
9681	? (NewOpcode == PPCISD::ZEXT_LD_SPLAT \|\|
9682	NewOpcode == PPCISD::SEXT_LD_SPLAT)
9683	: (NewOpcode == PPCISD::LD_SPLAT)) &&
9684	"Unmatched element size and opcode!\n");
9685
9686	// Checking for a single use of this load, we have to check for vector
9687	// width (128 bits) / ElementSize uses (since each operand of the
9688	// BUILD_VECTOR is a separate use of the value.
9689	unsigned NumUsesOfInputLD = `128` / ElementSize;
9690	for (SDValue BVInOp : Op ->ops())
9691	if (BVInOp.isUndef())
9692	NumUsesOfInputLD--;
9693
9694	// Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9695	// Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9696	// 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9697	// 15", but function IsValidSplatLoad() now will only return true when
9698	// the data at index 0 is not nullptr. So we will not get into trouble for
9699	// these cases.
9700	//
9701	// case 1 - lfiwzx/lfiwax
9702	// 1.1: load result is i32 and is sign/zero extend to i64;
9703	// 1.2: build a v2i64 vector type with above loaded value;
9704	// 1.3: the vector has only one value at index 0, others are all undef;
9705	// 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9706	if (NumUsesOfInputLD == `1` &&
9707	(Op ->getValueType(ResNo: `0`) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9708	!Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9709	Subtarget.hasLFIWAX()))
9710	return SDValue ();
9711
9712	// case 2 - lxvr[hb]x
9713	// 2.1: load result is at most i16;
9714	// 2.2: build a vector with above loaded value;
9715	// 2.3: the vector has only one value at index 0, others are all undef;
9716	// 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9717	if (NumUsesOfInputLD == `1` && Subtarget.isLittleEndian() &&
9718	Subtarget.isISA3_1() && ElementSize <= `16`)
9719	return SDValue ();
9720
9721	assert(NumUsesOfInputLD > `0` && "No uses of input LD of a build_vector?");
9722	if (InputLoad->getNode()->hasNUsesOfValue(NUses: NumUsesOfInputLD, Value: `0`) &&
9723	Subtarget.hasVSX()) {
9724	SDValue Ops[] = {
9725	LD->getChain(), // Chain
9726	LD->getBasePtr(), // Ptr
9727	DAG.getValueType(Op.getValueType()) // VT
9728	};
9729	SDValue LdSplt = DAG.getMemIntrinsicNode(
9730	Opcode: NewOpcode, dl, VTList: DAG.getVTList(VT1: Op.getValueType(), VT2: MVT::Other), Ops,
9731	MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
9732	// Replace all uses of the output chain of the original load with the
9733	// output chain of the new load.
9734	DAG.ReplaceAllUsesOfValueWith(From: InputLoad->getValue(R: `1`),
9735	To: LdSplt.getValue(R: `1`));
9736	return LdSplt;
9737	}
9738	}
9739
9740	// In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9741	// 32-bits can be lowered to VSX instructions under certain conditions.
9742	// Without VSX, there is no pattern more efficient than expanding the node.
9743	if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9744	haveEfficientBuildVectorPattern(V: BVN, HasDirectMove: Subtarget.hasDirectMove(),
9745	HasP8Vector: Subtarget.hasP8Vector()))
9746	return Op;
9747	return SDValue ();
9748	}
9749
9750	uint64_t SplatUndef = APSplatUndef.getZExtValue();
9751	unsigned SplatSize = SplatBitSize / `8`;
9752
9753	// First, handle single instruction cases.
9754
9755	// All zeros?
9756	if (SplatBits == `0`) {
9757	// Canonicalize all zero vectors to be v4i32.
9758	if (Op.getValueType() != MVT::v4i32 \|\| HasAnyUndefs) {
9759	SDValue Z = DAG.getConstant(Val: `0`, DL: dl, VT: MVT::v4i32);
9760	Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Z);
9761	}
9762	return Op;
9763	}
9764
9765	// We have XXSPLTIW for constant splats four bytes wide.
9766	// Given vector length is a multiple of 4, 2-byte splats can be replaced
9767	// with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9768	// make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9769	// turned into a 4-byte splat of 0xABABABAB.
9770	if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == `2`)
9771	return getCanonicalConstSplat(Val: SplatBits \| (SplatBits << `16`), SplatSize: SplatSize * `2`,
9772	VT: Op.getValueType(), DAG, dl);
9773
9774	if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == `4`)
9775	return getCanonicalConstSplat(Val: SplatBits, SplatSize, VT: Op.getValueType(), DAG,
9776	dl);
9777
9778	// We have XXSPLTIB for constant splats one byte wide.
9779	if (Subtarget.hasP9Vector() && SplatSize == `1`)
9780	return getCanonicalConstSplat(Val: SplatBits, SplatSize, VT: Op.getValueType(), DAG,
9781	dl);
9782
9783	// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9784	// Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
9785	if (SextVal >= -`16` && SextVal <= `15`) {
9786	// SplatSize may be 1, 2, 4, or 8. Use size 4 instead of 8 for the splat to
9787	// generate a splat word with extend for size 8.
9788	unsigned UseSize = SplatSize == `8` ? `4` : SplatSize;
9789	SDValue Res =
9790	getCanonicalConstSplat(Val: SextVal, SplatSize: UseSize, VT: Op.getValueType(), DAG, dl);
9791	if (SplatSize != `8`)
9792	return Res;
9793	return BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vupklsw, Op: Res, DAG, dl);
9794	}
9795
9796	// Two instruction sequences.
9797
9798	if (Subtarget.hasP9Vector() && SextVal >= -`128` && SextVal <= `127`) {
9799	SDValue C = DAG.getConstant(Val: (unsigned char)SextVal, DL: dl, VT: MVT::i32);
9800	SmallVector<SDValue, `16`> Ops(`16`, C);
9801	SDValue BV = DAG.getBuildVector(VT: MVT::v16i8, DL: dl, Ops);
9802	unsigned IID;
9803	switch (SplatSize) {
9804	default:
9805	llvm_unreachable("Unexpected type for vector constant.");
9806	case `2`:
9807	IID = Intrinsic::ppc_altivec_vupklsb;
9808	break;
9809	case `4`:
9810	IID = Intrinsic::ppc_altivec_vextsb2w;
9811	break;
9812	case `8`:
9813	IID = Intrinsic::ppc_altivec_vextsb2d;
9814	break;
9815	}
9816	SDValue Extend = BuildIntrinsicOp(IID, Op: BV, DAG, dl);
9817	return DAG.getBitcast(VT: Op ->getValueType(ResNo: `0`), V: Extend);
9818	}
9819	assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
9820
9821	// If this value is in the range [-32,30] and is even, use:
9822	// VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9823	// If this value is in the range [17,31] and is odd, use:
9824	// VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9825	// If this value is in the range [-31,-17] and is odd, use:
9826	// VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9827	// Note the last two are three-instruction sequences.
9828	if (SextVal >= -`32` && SextVal <= `31`) {
9829	// To avoid having these optimizations undone by constant folding,
9830	// we convert to a pseudo that will be expanded later into one of
9831	// the above forms.
9832	SDValue Elt = DAG.getSignedConstant(Val: SextVal, DL: dl, VT: MVT::i32);
9833	EVT VT = (SplatSize == `1` ? MVT::v16i8 :
9834	(SplatSize == `2` ? MVT::v8i16 : MVT::v4i32));
9835	SDValue EltSize = DAG.getConstant(Val: SplatSize, DL: dl, VT: MVT::i32);
9836	SDValue RetVal = DAG.getNode(Opcode: PPCISD::VADD_SPLAT, DL: dl, VT, N1: Elt, N2: EltSize);
9837	if (VT == Op.getValueType())
9838	return RetVal;
9839	else
9840	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: RetVal);
9841	}
9842
9843	// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9844	// 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9845	// for fneg/fabs.
9846	if (SplatSize == `4` && SplatBits == (`0x7FFFFFFF`&~SplatUndef)) {
9847	// Make -1 and vspltisw -1:
9848	SDValue OnesV = getCanonicalConstSplat(Val: -`1`, SplatSize: `4`, VT: MVT::v4i32, DAG, dl);
9849
9850	// Make the VSLW intrinsic, computing 0x8000_0000.
9851	SDValue Res = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vslw, LHS: OnesV,
9852	RHS: OnesV, DAG, dl);
9853
9854	// xor by OnesV to invert it.
9855	Res = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::v4i32, N1: Res, N2: OnesV);
9856	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9857	}
9858
9859	// Check to see if this is a wide variety of vsplti, binop self cases.*
9860	static const signed char SplatCsts[] = {
9861	-`1`, `1`, -`2`, `2`, -`3`, `3`, -`4`, `4`, -`5`, `5`, -`6`, `6`, -`7`, `7`,
9862	-`8`, `8`, -`9`, `9`, -`10`, `10`, -`11`, `11`, -`12`, `12`, -`13`, `13`, `14`, -`14`, `15`, -`15`, -`16`
9863	};
9864
9865	for (unsigned idx = `0`; idx < std::size(SplatCsts); ++idx) {
9866	// Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9867	// cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9868	int i = SplatCsts[idx];
9869
9870	// Figure out what shift amount will be used by altivec if shifted by i in
9871	// this splat size.
9872	unsigned TypeShiftAmt = i & (SplatBitSize-`1`);
9873
9874	// vsplti + shl self.
9875	if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9876	SDValue Res = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::Other, DAG, dl);
9877	static const unsigned IIDs[] = { // Intrinsic to use for each size.
9878	Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, `0`,
9879	Intrinsic::ppc_altivec_vslw
9880	};
9881	Res = BuildIntrinsicOp(IID: IIDs[SplatSize-`1`], LHS: Res, RHS: Res, DAG, dl);
9882	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9883	}
9884
9885	// vsplti + srl self.
9886	if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9887	SDValue Res = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::Other, DAG, dl);
9888	static const unsigned IIDs[] = { // Intrinsic to use for each size.
9889	Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, `0`,
9890	Intrinsic::ppc_altivec_vsrw
9891	};
9892	Res = BuildIntrinsicOp(IID: IIDs[SplatSize-`1`], LHS: Res, RHS: Res, DAG, dl);
9893	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9894	}
9895
9896	// vsplti + rol self.
9897	if (SextVal == (int)(((unsigned)i << TypeShiftAmt) \|
9898	((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9899	SDValue Res = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::Other, DAG, dl);
9900	static const unsigned IIDs[] = { // Intrinsic to use for each size.
9901	Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, `0`,
9902	Intrinsic::ppc_altivec_vrlw
9903	};
9904	Res = BuildIntrinsicOp(IID: IIDs[SplatSize-`1`], LHS: Res, RHS: Res, DAG, dl);
9905	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9906	}
9907
9908	// t = vsplti c, result = vsldoi t, t, 1
9909	if (SextVal == (int)(((unsigned)i << `8`) \| (i < `0` ? `0xFF` : `0`))) {
9910	SDValue T = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::v16i8, DAG, dl);
9911	unsigned Amt = Subtarget.isLittleEndian() ? `15` : `1`;
9912	return BuildVSLDOI(LHS: T, RHS: T, Amt, VT: Op.getValueType(), DAG, dl);
9913	}
9914	// t = vsplti c, result = vsldoi t, t, 2
9915	if (SextVal == (int)(((unsigned)i << `16`) \| (i < `0` ? `0xFFFF` : `0`))) {
9916	SDValue T = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::v16i8, DAG, dl);
9917	unsigned Amt = Subtarget.isLittleEndian() ? `14` : `2`;
9918	return BuildVSLDOI(LHS: T, RHS: T, Amt, VT: Op.getValueType(), DAG, dl);
9919	}
9920	// t = vsplti c, result = vsldoi t, t, 3
9921	if (SextVal == (int)(((unsigned)i << `24`) \| (i < `0` ? `0xFFFFFF` : `0`))) {
9922	SDValue T = getCanonicalConstSplat(Val: i, SplatSize, VT: MVT::v16i8, DAG, dl);
9923	unsigned Amt = Subtarget.isLittleEndian() ? `13` : `3`;
9924	return BuildVSLDOI(LHS: T, RHS: T, Amt, VT: Op.getValueType(), DAG, dl);
9925	}
9926	}
9927
9928	return SDValue ();
9929	}
9930
9931	/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9932	/// the specified operations to build the shuffle.
9933	static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9934	SDValue RHS, SelectionDAG &DAG,
9935	const SDLoc &dl) {
9936	unsigned OpNum = (PFEntry >> `26`) & `0x0F`;
9937	unsigned LHSID = (PFEntry >> `13`) & ((`1` << `13`)-`1`);
9938	unsigned RHSID = (PFEntry >> `0`) & ((`1` << `13`)-`1`);
9939
9940	enum {
9941	OP_COPY = `0`, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9942	OP_VMRGHW,
9943	OP_VMRGLW,
9944	OP_VSPLTISW0,
9945	OP_VSPLTISW1,
9946	OP_VSPLTISW2,
9947	OP_VSPLTISW3,
9948	OP_VSLDOI4,
9949	OP_VSLDOI8,
9950	OP_VSLDOI12
9951	};
9952
9953	if (OpNum == OP_COPY) {
9954	if (LHSID == (`1``9`+`2`)`9`+`3`) return LHS;
9955	assert(LHSID == ((`4``9`+`5`)`9`+`6`)*`9`+`7` && "Illegal OP_COPY!");
9956	return RHS;
9957	}
9958
9959	SDValue OpLHS, OpRHS;
9960	OpLHS = GeneratePerfectShuffle(PFEntry: PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9961	OpRHS = GeneratePerfectShuffle(PFEntry: PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9962
9963	int ShufIdxs[`16`];
9964	switch (OpNum) {
9965	default: llvm_unreachable("Unknown i32 permute!");
9966	case OP_VMRGHW:
9967	ShufIdxs[ `0`] = `0`; ShufIdxs[ `1`] = `1`; ShufIdxs[ `2`] = `2`; ShufIdxs[ `3`] = `3`;
9968	ShufIdxs[ `4`] = `16`; ShufIdxs[ `5`] = `17`; ShufIdxs[ `6`] = `18`; ShufIdxs[ `7`] = `19`;
9969	ShufIdxs[ `8`] = `4`; ShufIdxs[ `9`] = `5`; ShufIdxs[`10`] = `6`; ShufIdxs[`11`] = `7`;
9970	ShufIdxs[`12`] = `20`; ShufIdxs[`13`] = `21`; ShufIdxs[`14`] = `22`; ShufIdxs[`15`] = `23`;
9971	break;
9972	case OP_VMRGLW:
9973	ShufIdxs[ `0`] = `8`; ShufIdxs[ `1`] = `9`; ShufIdxs[ `2`] = `10`; ShufIdxs[ `3`] = `11`;
9974	ShufIdxs[ `4`] = `24`; ShufIdxs[ `5`] = `25`; ShufIdxs[ `6`] = `26`; ShufIdxs[ `7`] = `27`;
9975	ShufIdxs[ `8`] = `12`; ShufIdxs[ `9`] = `13`; ShufIdxs[`10`] = `14`; ShufIdxs[`11`] = `15`;
9976	ShufIdxs[`12`] = `28`; ShufIdxs[`13`] = `29`; ShufIdxs[`14`] = `30`; ShufIdxs[`15`] = `31`;
9977	break;
9978	case OP_VSPLTISW0:
9979	for (unsigned i = `0`; i != `16`; ++i)
9980	ShufIdxs[i] = (i&`3`)+`0`;
9981	break;
9982	case OP_VSPLTISW1:
9983	for (unsigned i = `0`; i != `16`; ++i)
9984	ShufIdxs[i] = (i&`3`)+`4`;
9985	break;
9986	case OP_VSPLTISW2:
9987	for (unsigned i = `0`; i != `16`; ++i)
9988	ShufIdxs[i] = (i&`3`)+`8`;
9989	break;
9990	case OP_VSPLTISW3:
9991	for (unsigned i = `0`; i != `16`; ++i)
9992	ShufIdxs[i] = (i&`3`)+`12`;
9993	break;
9994	case OP_VSLDOI4:
9995	return BuildVSLDOI(LHS: OpLHS, RHS: OpRHS, Amt: `4`, VT: OpLHS.getValueType(), DAG, dl);
9996	case OP_VSLDOI8:
9997	return BuildVSLDOI(LHS: OpLHS, RHS: OpRHS, Amt: `8`, VT: OpLHS.getValueType(), DAG, dl);
9998	case OP_VSLDOI12:
9999	return BuildVSLDOI(LHS: OpLHS, RHS: OpRHS, Amt: `12`, VT: OpLHS.getValueType(), DAG, dl);
10000	}
10001	EVT VT = OpLHS.getValueType();
10002	OpLHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: OpLHS);
10003	OpRHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: OpRHS);
10004	SDValue T = DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: OpLHS, N2: OpRHS, Mask: ShufIdxs);
10005	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: T);
10006	}
10007
10008	/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
10009	/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
10010	/// SDValue.
10011	SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
10012	SelectionDAG &DAG) const {
10013	const unsigned BytesInVector = `16`;
10014	bool IsLE = Subtarget.isLittleEndian();
10015	SDLoc dl(N);
10016	SDValue V1 = N->getOperand(Num: `0`);
10017	SDValue V2 = N->getOperand(Num: `1`);
10018	unsigned ShiftElts = `0`, InsertAtByte = `0`;
10019	bool Swap = false;
10020
10021	// Shifts required to get the byte we want at element 7.
10022	unsigned LittleEndianShifts[] = {`8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`,
10023	`0`, `15`, `14`, `13`, `12`, `11`, `10`, `9`};
10024	unsigned BigEndianShifts[] = {`9`, `10`, `11`, `12`, `13`, `14`, `15`, `0`,
10025	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`};
10026
10027	ArrayRef<int> Mask = N->getMask();
10028	int OriginalOrder[] = {`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`};
10029
10030	// For each mask element, find out if we're just inserting something
10031	// from V2 into V1 or vice versa.
10032	// Possible permutations inserting an element from V2 into V1:
10033	// X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10034	// 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10035	// ...
10036	// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
10037	// Inserting from V1 into V2 will be similar, except mask range will be
10038	// [16,31].
10039
10040	bool FoundCandidate = false;
10041	// If both vector operands for the shuffle are the same vector, the mask
10042	// will contain only elements from the first one and the second one will be
10043	// undef.
10044	unsigned VINSERTBSrcElem = IsLE ? `8` : `7`;
10045	// Go through the mask of half-words to find an element that's being moved
10046	// from one vector to the other.
10047	for (unsigned i = `0`; i < BytesInVector; ++i) {
10048	unsigned CurrentElement = Mask [i];
10049	// If 2nd operand is undefined, we should only look for element 7 in the
10050	// Mask.
10051	if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10052	continue;
10053
10054	bool OtherElementsInOrder = true;
10055	// Examine the other elements in the Mask to see if they're in original
10056	// order.
10057	for (unsigned j = `0`; j < BytesInVector; ++j) {
10058	if (j == i)
10059	continue;
10060	// If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10061	// from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10062	// in which we always assume we're always picking from the 1st operand.
10063	int MaskOffset =
10064	(!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : `0`;
10065	if (Mask [j] != OriginalOrder[j] + MaskOffset) {
10066	OtherElementsInOrder = false;
10067	break;
10068	}
10069	}
10070	// If other elements are in original order, we record the number of shifts
10071	// we need to get the element we want into element 7. Also record which byte
10072	// in the vector we should insert into.
10073	if (OtherElementsInOrder) {
10074	// If 2nd operand is undefined, we assume no shifts and no swapping.
10075	if (V2.isUndef()) {
10076	ShiftElts = `0`;
10077	Swap = false;
10078	} else {
10079	// Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10080	ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & `0xF`]
10081	: BigEndianShifts[CurrentElement & `0xF`];
10082	Swap = CurrentElement < BytesInVector;
10083	}
10084	InsertAtByte = IsLE ? BytesInVector - (i + `1`) : i;
10085	FoundCandidate = true;
10086	break;
10087	}
10088	}
10089
10090	if (!FoundCandidate)
10091	return SDValue ();
10092
10093	// Candidate found, construct the proper SDAG sequence with VINSERTB,
10094	// optionally with VECSHL if shift is required.
10095	if (Swap)
10096	std::swap(a&: V1, b&: V2);
10097	if (V2.isUndef())
10098	V2 = V1;
10099	if (ShiftElts) {
10100	SDValue Shl = DAG.getNode(Opcode: PPCISD::VECSHL, DL: dl, VT: MVT::v16i8, N1: V2, N2: V2,
10101	N3: DAG.getConstant(Val: ShiftElts, DL: dl, VT: MVT::i32));
10102	return DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v16i8, N1: V1, N2: Shl,
10103	N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10104	}
10105	return DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v16i8, N1: V1, N2: V2,
10106	N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10107	}
10108
10109	/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10110	/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10111	/// SDValue.
10112	SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10113	SelectionDAG &DAG) const {
10114	const unsigned NumHalfWords = `8`;
10115	const unsigned BytesInVector = NumHalfWords * `2`;
10116	// Check that the shuffle is on half-words.
10117	if (!isNByteElemShuffleMask(N, Width: `2`, StepLen: `1`))
10118	return SDValue ();
10119
10120	bool IsLE = Subtarget.isLittleEndian();
10121	SDLoc dl(N);
10122	SDValue V1 = N->getOperand(Num: `0`);
10123	SDValue V2 = N->getOperand(Num: `1`);
10124	unsigned ShiftElts = `0`, InsertAtByte = `0`;
10125	bool Swap = false;
10126
10127	// Shifts required to get the half-word we want at element 3.
10128	unsigned LittleEndianShifts[] = {`4`, `3`, `2`, `1`, `0`, `7`, `6`, `5`};
10129	unsigned BigEndianShifts[] = {`5`, `6`, `7`, `0`, `1`, `2`, `3`, `4`};
10130
10131	uint32_t Mask = `0`;
10132	uint32_t OriginalOrderLow = `0x1234567`;
10133	uint32_t OriginalOrderHigh = `0x89ABCDEF`;
10134	// Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10135	// 32-bit space, only need 4-bit nibbles per element.
10136	for (unsigned i = `0`; i < NumHalfWords; ++i) {
10137	unsigned MaskShift = (NumHalfWords - `1` - i) * `4`;
10138	Mask \|= ((uint32_t)(N->getMaskElt(Idx: i * `2`) / `2`) << MaskShift);
10139	}
10140
10141	// For each mask element, find out if we're just inserting something
10142	// from V2 into V1 or vice versa. Possible permutations inserting an element
10143	// from V2 into V1:
10144	// X, 1, 2, 3, 4, 5, 6, 7
10145	// 0, X, 2, 3, 4, 5, 6, 7
10146	// 0, 1, X, 3, 4, 5, 6, 7
10147	// 0, 1, 2, X, 4, 5, 6, 7
10148	// 0, 1, 2, 3, X, 5, 6, 7
10149	// 0, 1, 2, 3, 4, X, 6, 7
10150	// 0, 1, 2, 3, 4, 5, X, 7
10151	// 0, 1, 2, 3, 4, 5, 6, X
10152	// Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10153
10154	bool FoundCandidate = false;
10155	// Go through the mask of half-words to find an element that's being moved
10156	// from one vector to the other.
10157	for (unsigned i = `0`; i < NumHalfWords; ++i) {
10158	unsigned MaskShift = (NumHalfWords - `1` - i) * `4`;
10159	uint32_t MaskOneElt = (Mask >> MaskShift) & `0xF`;
10160	uint32_t MaskOtherElts = ~(`0xF` << MaskShift);
10161	uint32_t TargetOrder = `0x0`;
10162
10163	// If both vector operands for the shuffle are the same vector, the mask
10164	// will contain only elements from the first one and the second one will be
10165	// undef.
10166	if (V2.isUndef()) {
10167	ShiftElts = `0`;
10168	unsigned VINSERTHSrcElem = IsLE ? `4` : `3`;
10169	TargetOrder = OriginalOrderLow;
10170	Swap = false;
10171	// Skip if not the correct element or mask of other elements don't equal
10172	// to our expected order.
10173	if (MaskOneElt == VINSERTHSrcElem &&
10174	(Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10175	InsertAtByte = IsLE ? BytesInVector - (i + `1`) * `2` : i * `2`;
10176	FoundCandidate = true;
10177	break;
10178	}
10179	} else { // If both operands are defined.
10180	// Target order is [8,15] if the current mask is between [0,7].
10181	TargetOrder =
10182	(MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10183	// Skip if mask of other elements don't equal our expected order.
10184	if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10185	// We only need the last 3 bits for the number of shifts.
10186	ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & `0x7`]
10187	: BigEndianShifts[MaskOneElt & `0x7`];
10188	InsertAtByte = IsLE ? BytesInVector - (i + `1`) * `2` : i * `2`;
10189	Swap = MaskOneElt < NumHalfWords;
10190	FoundCandidate = true;
10191	break;
10192	}
10193	}
10194	}
10195
10196	if (!FoundCandidate)
10197	return SDValue ();
10198
10199	// Candidate found, construct the proper SDAG sequence with VINSERTH,
10200	// optionally with VECSHL if shift is required.
10201	if (Swap)
10202	std::swap(a&: V1, b&: V2);
10203	if (V2.isUndef())
10204	V2 = V1;
10205	SDValue Conv1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: V1);
10206	if (ShiftElts) {
10207	// Double ShiftElts because we're left shifting on v16i8 type.
10208	SDValue Shl = DAG.getNode(Opcode: PPCISD::VECSHL, DL: dl, VT: MVT::v16i8, N1: V2, N2: V2,
10209	N3: DAG.getConstant(Val: `2` * ShiftElts, DL: dl, VT: MVT::i32));
10210	SDValue Conv2 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: Shl);
10211	SDValue Ins = DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v8i16, N1: Conv1, N2: Conv2,
10212	N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10213	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Ins);
10214	}
10215	SDValue Conv2 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: V2);
10216	SDValue Ins = DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v8i16, N1: Conv1, N2: Conv2,
10217	N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10218	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Ins);
10219	}
10220
10221	/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10222	/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10223	/// return the default SDValue.
10224	SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10225	SelectionDAG &DAG) const {
10226	// The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10227	// to v16i8. Peek through the bitcasts to get the actual operands.
10228	SDValue LHS = peekThroughBitcasts(V: SVN->getOperand(Num: `0`));
10229	SDValue RHS = peekThroughBitcasts(V: SVN->getOperand(Num: `1`));
10230
10231	auto ShuffleMask = SVN->getMask();
10232	SDValue VecShuffle(SVN, `0`);
10233	SDLoc DL(SVN);
10234
10235	// Check that we have a four byte shuffle.
10236	if (!isNByteElemShuffleMask(N: SVN, Width: `4`, StepLen: `1`))
10237	return SDValue ();
10238
10239	// Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10240	if (RHS ->getOpcode() != ISD::BUILD_VECTOR) {
10241	std::swap(a&: LHS, b&: RHS);
10242	VecShuffle = peekThroughBitcasts(V: DAG.getCommutedVectorShuffle(SV: *SVN));
10243	ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(Val&: VecShuffle);
10244	if (!CommutedSV)
10245	return SDValue ();
10246	ShuffleMask = CommutedSV->getMask();
10247	}
10248
10249	// Ensure that the RHS is a vector of constants.
10250	BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Val: RHS.getNode());
10251	if (!BVN)
10252	return SDValue ();
10253
10254	// Check if RHS is a splat of 4-bytes (or smaller).
10255	APInt APSplatValue, APSplatUndef;
10256	unsigned SplatBitSize;
10257	bool HasAnyUndefs;
10258	if (!BVN->isConstantSplat(SplatValue&: APSplatValue, SplatUndef&: APSplatUndef, SplatBitSize,
10259	HasAnyUndefs, MinSplatBits: `0`, isBigEndian: !Subtarget.isLittleEndian()) \|\|
10260	SplatBitSize > `32`)
10261	return SDValue ();
10262
10263	// Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10264	// The instruction splats a constant C into two words of the source vector
10265	// producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10266	// Thus we check that the shuffle mask is the equivalent of
10267	// <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10268	// Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10269	// within each word are consecutive, so we only need to check the first byte.
10270	SDValue Index;
10271	bool IsLE = Subtarget.isLittleEndian();
10272	if ((ShuffleMask [`0`] == `0` && ShuffleMask [`8`] == `8`) &&
10273	(ShuffleMask [`4`] % `4` == `0` && ShuffleMask [`12`] % `4` == `0` &&
10274	ShuffleMask [`4`] > `15` && ShuffleMask [`12`] > `15`))
10275	Index = DAG.getTargetConstant(Val: IsLE ? `0` : `1`, DL, VT: MVT::i32);
10276	else if ((ShuffleMask [`4`] == `4` && ShuffleMask [`12`] == `12`) &&
10277	(ShuffleMask [`0`] % `4` == `0` && ShuffleMask [`8`] % `4` == `0` &&
10278	ShuffleMask [`0`] > `15` && ShuffleMask [`8`] > `15`))
10279	Index = DAG.getTargetConstant(Val: IsLE ? `1` : `0`, DL, VT: MVT::i32);
10280	else
10281	return SDValue ();
10282
10283	// If the splat is narrower than 32-bits, we need to get the 32-bit value
10284	// for XXSPLTI32DX.
10285	unsigned SplatVal = APSplatValue.getZExtValue();
10286	for (; SplatBitSize < `32`; SplatBitSize <<= `1`)
10287	SplatVal \|= (SplatVal << SplatBitSize);
10288
10289	SDValue SplatNode = DAG.getNode(
10290	Opcode: PPCISD::XXSPLTI32DX, DL, VT: MVT::v2i64, N1: DAG.getBitcast(VT: MVT::v2i64, V: LHS),
10291	N2: Index, N3: DAG.getTargetConstant(Val: SplatVal, DL, VT: MVT::i32));
10292	return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v16i8, Operand: SplatNode);
10293	}
10294
10295	/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10296	/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10297	/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10298	/// i.e (or (shl x, C1), (srl x, 128-C1)).
10299	SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10300	assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10301	assert(Op.getValueType() == MVT::v1i128 &&
10302	"Only set v1i128 as custom, other type shouldn't reach here!");
10303	SDLoc dl(Op);
10304	SDValue N0 = peekThroughBitcasts(V: Op.getOperand(i: `0`));
10305	SDValue N1 = peekThroughBitcasts(V: Op.getOperand(i: `1`));
10306	unsigned SHLAmt = N1.getConstantOperandVal(i: `0`);
10307	if (SHLAmt % `8` == `0`) {
10308	std::array<int, `16`> Mask;
10309	std::iota(first: Mask.begin(), last: Mask.end(), value: `0`);
10310	std::rotate(first: Mask.begin(), middle: Mask.begin() + SHLAmt / `8`, last: Mask.end());
10311	if (SDValue Shuffle =
10312	DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: DAG.getBitcast(VT: MVT::v16i8, V: N0),
10313	N2: DAG.getUNDEF(VT: MVT::v16i8), Mask))
10314	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v1i128, Operand: Shuffle);
10315	}
10316	SDValue ArgVal = DAG.getBitcast(VT: MVT::i128, V: N0);
10317	SDValue SHLOp = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i128, N1: ArgVal,
10318	N2: DAG.getConstant(Val: SHLAmt, DL: dl, VT: MVT::i32));
10319	SDValue SRLOp = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i128, N1: ArgVal,
10320	N2: DAG.getConstant(Val: `128` - SHLAmt, DL: dl, VT: MVT::i32));
10321	SDValue OROp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i128, N1: SHLOp, N2: SRLOp);
10322	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v1i128, Operand: OROp);
10323	}
10324
10325	/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10326	/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10327	/// return the code it can be lowered into. Worst case, it can always be
10328	/// lowered into a vperm.
10329	SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10330	SelectionDAG &DAG) const {
10331	SDLoc dl(Op);
10332	SDValue V1 = Op.getOperand(i: `0`);
10333	SDValue V2 = Op.getOperand(i: `1`);
10334	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val&: Op);
10335
10336	// Any nodes that were combined in the target-independent combiner prior
10337	// to vector legalization will not be sent to the target combine. Try to
10338	// combine it here.
10339	if (SDValue NewShuffle = combineVectorShuffle(SVN: SVOp, DAG)) {
10340	if (!isa<ShuffleVectorSDNode>(Val: NewShuffle))
10341	return NewShuffle;
10342	Op = NewShuffle;
10343	SVOp = cast<ShuffleVectorSDNode>(Val&: Op);
10344	V1 = Op.getOperand(i: `0`);
10345	V2 = Op.getOperand(i: `1`);
10346	}
10347	EVT VT = Op.getValueType();
10348	bool isLittleEndian = Subtarget.isLittleEndian();
10349
10350	unsigned ShiftElts, InsertAtByte;
10351	bool Swap = false;
10352
10353	// If this is a load-and-splat, we can do that with a single instruction
10354	// in some cases. However if the load has multiple uses, we don't want to
10355	// combine it because that will just produce multiple loads.
10356	bool IsPermutedLoad = false;
10357	const SDValue *InputLoad = getNormalLoadInput(Op: V1, IsPermuted&: IsPermutedLoad);
10358	if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10359	(PPC::isSplatShuffleMask(N: SVOp, EltSize: `4`) \|\| PPC::isSplatShuffleMask(N: SVOp, EltSize: `8`)) &&
10360	InputLoad->hasOneUse()) {
10361	bool IsFourByte = PPC::isSplatShuffleMask(N: SVOp, EltSize: `4`);
10362	int SplatIdx =
10363	PPC::getSplatIdxForPPCMnemonics(N: SVOp, EltSize: IsFourByte ? `4` : `8`, DAG);
10364
10365	// The splat index for permuted loads will be in the left half of the vector
10366	// which is strictly wider than the loaded value by 8 bytes. So we need to
10367	// adjust the splat index to point to the correct address in memory.
10368	if (IsPermutedLoad) {
10369	assert((isLittleEndian \|\| IsFourByte) &&
10370	"Unexpected size for permuted load on big endian target");
10371	SplatIdx += IsFourByte ? `2` : `1`;
10372	assert((SplatIdx < (IsFourByte ? `4` : `2`)) &&
10373	"Splat of a value outside of the loaded memory");
10374	}
10375
10376	LoadSDNode LD = cast<LoadSDNode>(Val: InputLoad);
10377	// For 4-byte load-and-splat, we need Power9.
10378	if ((IsFourByte && Subtarget.hasP9Vector()) \|\| !IsFourByte) {
10379	uint64_t Offset = `0`;
10380	if (IsFourByte)
10381	Offset = isLittleEndian ? (`3` - SplatIdx) * `4` : SplatIdx * `4`;
10382	else
10383	Offset = isLittleEndian ? (`1` - SplatIdx) * `8` : SplatIdx * `8`;
10384
10385	// If the width of the load is the same as the width of the splat,
10386	// loading with an offset would load the wrong memory.
10387	if (LD->getValueType(ResNo: `0`).getSizeInBits() == (IsFourByte ? `32` : `64`))
10388	Offset = `0`;
10389
10390	SDValue BasePtr = LD->getBasePtr();
10391	if (Offset != `0`)
10392	BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()),
10393	N1: BasePtr, N2: DAG.getIntPtrConstant(Val: Offset, DL: dl));
10394	SDValue Ops[] = {
10395	LD->getChain(), // Chain
10396	BasePtr, // BasePtr
10397	DAG.getValueType(Op.getValueType()) // VT
10398	};
10399	SDVTList VTL =
10400	DAG.getVTList(VT1: IsFourByte ? MVT::v4i32 : MVT::v2i64, VT2: MVT::Other);
10401	SDValue LdSplt =
10402	DAG.getMemIntrinsicNode(Opcode: PPCISD::LD_SPLAT, dl, VTList: VTL,
10403	Ops, MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
10404	DAG.ReplaceAllUsesOfValueWith(From: InputLoad->getValue(R: `1`), To: LdSplt.getValue(R: `1`));
10405	if (LdSplt.getValueType() != SVOp->getValueType(ResNo: `0`))
10406	LdSplt = DAG.getBitcast(VT: SVOp->getValueType(ResNo: `0`), V: LdSplt);
10407	return LdSplt;
10408	}
10409	}
10410
10411	// All v2i64 and v2f64 shuffles are legal
10412	if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
10413	return Op;
10414
10415	if (Subtarget.hasP9Vector() &&
10416	PPC::isXXINSERTWMask(N: SVOp, ShiftElts, InsertAtByte, Swap,
10417	IsLE: isLittleEndian)) {
10418	if (V2.isUndef())
10419	V2 = V1;
10420	else if (Swap)
10421	std::swap(a&: V1, b&: V2);
10422	SDValue Conv1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V1);
10423	SDValue Conv2 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V2);
10424	if (ShiftElts) {
10425	SDValue Shl = DAG.getNode(Opcode: PPCISD::VECSHL, DL: dl, VT: MVT::v4i32, N1: Conv2, N2: Conv2,
10426	N3: DAG.getConstant(Val: ShiftElts, DL: dl, VT: MVT::i32));
10427	SDValue Ins = DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v4i32, N1: Conv1, N2: Shl,
10428	N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10429	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Ins);
10430	}
10431	SDValue Ins = DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT: MVT::v4i32, N1: Conv1, N2: Conv2,
10432	N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
10433	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Ins);
10434	}
10435
10436	if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10437	SDValue SplatInsertNode;
10438	if ((SplatInsertNode = lowerToXXSPLTI32DX(SVN: SVOp, DAG)))
10439	return SplatInsertNode;
10440	}
10441
10442	if (Subtarget.hasP9Altivec()) {
10443	SDValue NewISDNode;
10444	if ((NewISDNode = lowerToVINSERTH(N: SVOp, DAG)))
10445	return NewISDNode;
10446
10447	if ((NewISDNode = lowerToVINSERTB(N: SVOp, DAG)))
10448	return NewISDNode;
10449	}
10450
10451	if (Subtarget.hasVSX() &&
10452	PPC::isXXSLDWIShuffleMask(N: SVOp, ShiftElts, Swap, IsLE: isLittleEndian)) {
10453	if (Swap)
10454	std::swap(a&: V1, b&: V2);
10455	SDValue Conv1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V1);
10456	SDValue Conv2 =
10457	DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V2.isUndef() ? V1 : V2);
10458
10459	SDValue Shl = DAG.getNode(Opcode: PPCISD::VECSHL, DL: dl, VT: MVT::v4i32, N1: Conv1, N2: Conv2,
10460	N3: DAG.getConstant(Val: ShiftElts, DL: dl, VT: MVT::i32));
10461	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Shl);
10462	}
10463
10464	if (Subtarget.hasVSX() &&
10465	PPC::isXXPERMDIShuffleMask(N: SVOp, DM&: ShiftElts, Swap, IsLE: isLittleEndian)) {
10466	if (Swap)
10467	std::swap(a&: V1, b&: V2);
10468	SDValue Conv1 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2i64, Operand: V1);
10469	SDValue Conv2 =
10470	DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2i64, Operand: V2.isUndef() ? V1 : V2);
10471
10472	SDValue PermDI = DAG.getNode(Opcode: PPCISD::XXPERMDI, DL: dl, VT: MVT::v2i64, N1: Conv1, N2: Conv2,
10473	N3: DAG.getConstant(Val: ShiftElts, DL: dl, VT: MVT::i32));
10474	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: PermDI);
10475	}
10476
10477	if (Subtarget.hasP9Vector()) {
10478	if (PPC::isXXBRHShuffleMask(N: SVOp)) {
10479	SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: V1);
10480	SDValue ReveHWord = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v8i16, Operand: Conv);
10481	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: ReveHWord);
10482	} else if (PPC::isXXBRWShuffleMask(N: SVOp)) {
10483	SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V1);
10484	SDValue ReveWord = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v4i32, Operand: Conv);
10485	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: ReveWord);
10486	} else if (PPC::isXXBRDShuffleMask(N: SVOp)) {
10487	SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2i64, Operand: V1);
10488	SDValue ReveDWord = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v2i64, Operand: Conv);
10489	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: ReveDWord);
10490	} else if (PPC::isXXBRQShuffleMask(N: SVOp)) {
10491	SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v1i128, Operand: V1);
10492	SDValue ReveQWord = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v1i128, Operand: Conv);
10493	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: ReveQWord);
10494	}
10495	}
10496
10497	if (Subtarget.hasVSX()) {
10498	if (V2.isUndef() && PPC::isSplatShuffleMask(N: SVOp, EltSize: `4`)) {
10499	int SplatIdx = PPC::getSplatIdxForPPCMnemonics(N: SVOp, EltSize: `4`, DAG);
10500
10501	SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v4i32, Operand: V1);
10502	SDValue Splat = DAG.getNode(Opcode: PPCISD::XXSPLT, DL: dl, VT: MVT::v4i32, N1: Conv,
10503	N2: DAG.getConstant(Val: SplatIdx, DL: dl, VT: MVT::i32));
10504	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Splat);
10505	}
10506
10507	// Left shifts of 8 bytes are actually swaps. Convert accordingly.
10508	if (V2.isUndef() && PPC::isVSLDOIShuffleMask(N: SVOp, ShuffleKind: `1`, DAG) == `8`) {
10509	SDValue Conv = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2f64, Operand: V1);
10510	SDValue Swap = DAG.getNode(Opcode: PPCISD::SWAP_NO_CHAIN, DL: dl, VT: MVT::v2f64, Operand: Conv);
10511	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: Swap);
10512	}
10513	}
10514
10515	// Cases that are handled by instructions that take permute immediates
10516	// (such as vsplt) should be left as VECTOR_SHUFFLE nodes so they can be*
10517	// selected by the instruction selector.
10518	if (V2.isUndef()) {
10519	if (PPC::isSplatShuffleMask(N: SVOp, EltSize: `1`) \|\|
10520	PPC::isSplatShuffleMask(N: SVOp, EltSize: `2`) \|\|
10521	PPC::isSplatShuffleMask(N: SVOp, EltSize: `4`) \|\|
10522	PPC::isVPKUWUMShuffleMask(N: SVOp, ShuffleKind: `1`, DAG) \|\|
10523	PPC::isVPKUHUMShuffleMask(N: SVOp, ShuffleKind: `1`, DAG) \|\|
10524	PPC::isVSLDOIShuffleMask(N: SVOp, ShuffleKind: `1`, DAG) != -`1` \|\|
10525	PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: `1`, ShuffleKind: `1`, DAG) \|\|
10526	PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: `2`, ShuffleKind: `1`, DAG) \|\|
10527	PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: `4`, ShuffleKind: `1`, DAG) \|\|
10528	PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: `1`, ShuffleKind: `1`, DAG) \|\|
10529	PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: `2`, ShuffleKind: `1`, DAG) \|\|
10530	PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: `4`, ShuffleKind: `1`, DAG) \|\|
10531	(Subtarget.hasP8Altivec() && (
10532	PPC::isVPKUDUMShuffleMask(N: SVOp, ShuffleKind: `1`, DAG) \|\|
10533	PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: true, ShuffleKind: `1`, DAG) \|\|
10534	PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: false, ShuffleKind: `1`, DAG)))) {
10535	return Op;
10536	}
10537	}
10538
10539	// Altivec has a variety of "shuffle immediates" that take two vector inputs
10540	// and produce a fixed permutation. If any of these match, do not lower to
10541	// VPERM.
10542	unsigned int ShuffleKind = isLittleEndian ? `2` : `0`;
10543	if (PPC::isVPKUWUMShuffleMask(N: SVOp, ShuffleKind, DAG) \|\|
10544	PPC::isVPKUHUMShuffleMask(N: SVOp, ShuffleKind, DAG) \|\|
10545	PPC::isVSLDOIShuffleMask(N: SVOp, ShuffleKind, DAG) != -`1` \|\|
10546	PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: `1`, ShuffleKind, DAG) \|\|
10547	PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: `2`, ShuffleKind, DAG) \|\|
10548	PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: `4`, ShuffleKind, DAG) \|\|
10549	PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: `1`, ShuffleKind, DAG) \|\|
10550	PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: `2`, ShuffleKind, DAG) \|\|
10551	PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: `4`, ShuffleKind, DAG) \|\|
10552	(Subtarget.hasP8Altivec() && (
10553	PPC::isVPKUDUMShuffleMask(N: SVOp, ShuffleKind, DAG) \|\|
10554	PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: true, ShuffleKind, DAG) \|\|
10555	PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: false, ShuffleKind, DAG))))
10556	return Op;
10557
10558	// Check to see if this is a shuffle of 4-byte values. If so, we can use our
10559	// perfect shuffle table to emit an optimal matching sequence.
10560	ArrayRef<int> PermMask = SVOp->getMask();
10561
10562	if (!DisablePerfectShuffle && !isLittleEndian) {
10563	unsigned PFIndexes[`4`];
10564	bool isFourElementShuffle = true;
10565	for (unsigned i = `0`; i != `4` && isFourElementShuffle;
10566	++i) { // Element number
10567	unsigned EltNo = `8`; // Start out undef.
10568	for (unsigned j = `0`; j != `4`; ++j) { // Intra-element byte.
10569	if (PermMask [i * `4` + j] < `0`)
10570	continue; // Undef, ignore it.
10571
10572	unsigned ByteSource = PermMask [i * `4` + j];
10573	if ((ByteSource & `3`) != j) {
10574	isFourElementShuffle = false;
10575	break;
10576	}
10577
10578	if (EltNo == `8`) {
10579	EltNo = ByteSource / `4`;
10580	} else if (EltNo != ByteSource / `4`) {
10581	isFourElementShuffle = false;
10582	break;
10583	}
10584	}
10585	PFIndexes[i] = EltNo;
10586	}
10587
10588	// If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10589	// perfect shuffle vector to determine if it is cost effective to do this as
10590	// discrete instructions, or whether we should use a vperm.
10591	// For now, we skip this for little endian until such time as we have a
10592	// little-endian perfect shuffle table.
10593	if (isFourElementShuffle) {
10594	// Compute the index in the perfect shuffle table.
10595	unsigned PFTableIndex = PFIndexes[`0`] * `9` * `9` * `9` + PFIndexes[`1`] * `9` * `9` +
10596	PFIndexes[`2`] * `9` + PFIndexes[`3`];
10597
10598	unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10599	unsigned Cost = (PFEntry >> `30`);
10600
10601	// Determining when to avoid vperm is tricky. Many things affect the cost
10602	// of vperm, particularly how many times the perm mask needs to be
10603	// computed. For example, if the perm mask can be hoisted out of a loop or
10604	// is already used (perhaps because there are multiple permutes with the
10605	// same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10606	// permute mask out of the loop requires an extra register.
10607	//
10608	// As a compromise, we only emit discrete instructions if the shuffle can
10609	// be generated in 3 or fewer operations. When we have loop information
10610	// available, if this block is within a loop, we should avoid using vperm
10611	// for 3-operation perms and use a constant pool load instead.
10612	if (Cost < `3`)
10613	return GeneratePerfectShuffle(PFEntry, LHS: V1, RHS: V2, DAG, dl);
10614	}
10615	}
10616
10617	// Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10618	// vector that will get spilled to the constant pool.
10619	if (V2.isUndef()) V2 = V1;
10620
10621	return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10622	}
10623
10624	SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10625	ArrayRef<int> PermMask, EVT VT,
10626	SDValue V1, SDValue V2) const {
10627	unsigned Opcode = PPCISD::VPERM;
10628	EVT ValType = V1.getValueType();
10629	SDLoc dl(Op);
10630	bool NeedSwap = false;
10631	bool isLittleEndian = Subtarget.isLittleEndian();
10632	bool isPPC64 = Subtarget.isPPC64();
10633
10634	if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10635	(V1 ->hasOneUse() \|\| V2 ->hasOneUse())) {
10636	LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10637	"XXPERM instead\n");
10638	Opcode = PPCISD::XXPERM;
10639
10640	// The second input to XXPERM is also an output so if the second input has
10641	// multiple uses then copying is necessary, as a result we want the
10642	// single-use operand to be used as the second input to prevent copying.
10643	if ((!isLittleEndian && !V2 ->hasOneUse() && V1 ->hasOneUse()) \|\|
10644	(isLittleEndian && !V1 ->hasOneUse() && V2 ->hasOneUse())) {
10645	std::swap(a&: V1, b&: V2);
10646	NeedSwap = !NeedSwap;
10647	}
10648	}
10649
10650	// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10651	// that it is in input element units, not in bytes. Convert now.
10652
10653	// For little endian, the order of the input vectors is reversed, and
10654	// the permutation mask is complemented with respect to 31. This is
10655	// necessary to produce proper semantics with the big-endian-based vperm
10656	// instruction.
10657	EVT EltVT = V1.getValueType().getVectorElementType();
10658	unsigned BytesPerElement = EltVT.getSizeInBits() / `8`;
10659
10660	bool V1HasXXSWAPD = V1 ->getOperand(Num: `0`)->getOpcode() == PPCISD::XXSWAPD;
10661	bool V2HasXXSWAPD = V2 ->getOperand(Num: `0`)->getOpcode() == PPCISD::XXSWAPD;
10662
10663	/*
10664	Vectors will be appended like so: [ V1 \| v2 ]
10665	XXSWAPD on V1:
10666	[ A \| B \| C \| D ] -> [ C \| D \| A \| B ]
10667	0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10668	i.e. index of A, B += 8, and index of C, D -= 8.
10669	XXSWAPD on V2:
10670	[ E \| F \| G \| H ] -> [ G \| H \| E \| F ]
10671	16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10672	i.e. index of E, F += 8, index of G, H -= 8
10673	Swap V1 and V2:
10674	[ V1 \| V2 ] -> [ V2 \| V1 ]
10675	0-15 16-31 0-15 16-31
10676	i.e. index of V1 += 16, index of V2 -= 16
10677	*/
10678
10679	SmallVector<SDValue, `16`> ResultMask;
10680	for (unsigned i = `0`, e = VT.getVectorNumElements(); i != e; ++i) {
10681	unsigned SrcElt = PermMask [i] < `0` ? `0` : PermMask [i];
10682
10683	if (V1HasXXSWAPD) {
10684	if (SrcElt < `8`)
10685	SrcElt += `8`;
10686	else if (SrcElt < `16`)
10687	SrcElt -= `8`;
10688	}
10689	if (V2HasXXSWAPD) {
10690	if (SrcElt > `23`)
10691	SrcElt -= `8`;
10692	else if (SrcElt > `15`)
10693	SrcElt += `8`;
10694	}
10695	if (NeedSwap) {
10696	if (SrcElt < `16`)
10697	SrcElt += `16`;
10698	else
10699	SrcElt -= `16`;
10700	}
10701	for (unsigned j = `0`; j != BytesPerElement; ++j)
10702	if (isLittleEndian)
10703	ResultMask.push_back(
10704	Elt: DAG.getConstant(Val: `31` - (SrcElt * BytesPerElement + j), DL: dl, VT: MVT::i32));
10705	else
10706	ResultMask.push_back(
10707	Elt: DAG.getConstant(Val: SrcElt * BytesPerElement + j, DL: dl, VT: MVT::i32));
10708	}
10709
10710	if (V1HasXXSWAPD) {
10711	dl = SDLoc (V1 ->getOperand(Num: `0`));
10712	V1 = V1 ->getOperand(Num: `0`)->getOperand(Num: `1`);
10713	}
10714	if (V2HasXXSWAPD) {
10715	dl = SDLoc (V2 ->getOperand(Num: `0`));
10716	V2 = V2 ->getOperand(Num: `0`)->getOperand(Num: `1`);
10717	}
10718
10719	if (isPPC64 && (V1HasXXSWAPD \|\| V2HasXXSWAPD)) {
10720	if (ValType != MVT::v2f64)
10721	V1 = DAG.getBitcast(VT: MVT::v2f64, V: V1);
10722	if (V2.getValueType() != MVT::v2f64)
10723	V2 = DAG.getBitcast(VT: MVT::v2f64, V: V2);
10724	}
10725
10726	ShufflesHandledWithVPERM ++;
10727	SDValue VPermMask = DAG.getBuildVector(VT: MVT::v16i8, DL: dl, Ops: ResultMask);
10728	LLVM_DEBUG({
10729	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10730	if (Opcode == PPCISD::XXPERM) {
10731	dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10732	} else {
10733	dbgs() << "Emitting a VPERM for the following shuffle:\n";
10734	}
10735	SVOp->dump();
10736	dbgs() << "With the following permute control vector:\n";
10737	VPermMask.dump();
10738	});
10739
10740	if (Opcode == PPCISD::XXPERM)
10741	VPermMask = DAG.getBitcast(VT: MVT::v4i32, V: VPermMask);
10742
10743	// Only need to place items backwards in LE,
10744	// the mask was properly calculated.
10745	if (isLittleEndian)
10746	std::swap(a&: V1, b&: V2);
10747
10748	SDValue VPERMNode =
10749	DAG.getNode(Opcode, DL: dl, VT: V1.getValueType(), N1: V1, N2: V2, N3: VPermMask);
10750
10751	VPERMNode = DAG.getBitcast(VT: ValType, V: VPERMNode);
10752	return VPERMNode;
10753	}
10754
10755	/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10756	/// vector comparison. If it is, return true and fill in Opc/isDot with
10757	/// information about the intrinsic.
10758	static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10759	bool &isDot, const PPCSubtarget &Subtarget) {
10760	unsigned IntrinsicID = Intrin.getConstantOperandVal(i: `0`);
10761	CompareOpc = -`1`;
10762	isDot = false;
10763	switch (IntrinsicID) {
10764	default:
10765	return false;
10766	// Comparison predicates.
10767	case Intrinsic::ppc_altivec_vcmpbfp_p:
10768	CompareOpc = `966`;
10769	isDot = true;
10770	break;
10771	case Intrinsic::ppc_altivec_vcmpeqfp_p:
10772	CompareOpc = `198`;
10773	isDot = true;
10774	break;
10775	case Intrinsic::ppc_altivec_vcmpequb_p:
10776	CompareOpc = `6`;
10777	isDot = true;
10778	break;
10779	case Intrinsic::ppc_altivec_vcmpequh_p:
10780	CompareOpc = `70`;
10781	isDot = true;
10782	break;
10783	case Intrinsic::ppc_altivec_vcmpequw_p:
10784	CompareOpc = `134`;
10785	isDot = true;
10786	break;
10787	case Intrinsic::ppc_altivec_vcmpequd_p:
10788	if (Subtarget.hasVSX() \|\| Subtarget.hasP8Altivec()) {
10789	CompareOpc = `199`;
10790	isDot = true;
10791	} else
10792	return false;
10793	break;
10794	case Intrinsic::ppc_altivec_vcmpneb_p:
10795	case Intrinsic::ppc_altivec_vcmpneh_p:
10796	case Intrinsic::ppc_altivec_vcmpnew_p:
10797	case Intrinsic::ppc_altivec_vcmpnezb_p:
10798	case Intrinsic::ppc_altivec_vcmpnezh_p:
10799	case Intrinsic::ppc_altivec_vcmpnezw_p:
10800	if (Subtarget.hasP9Altivec()) {
10801	switch (IntrinsicID) {
10802	default:
10803	llvm_unreachable("Unknown comparison intrinsic.");
10804	case Intrinsic::ppc_altivec_vcmpneb_p:
10805	CompareOpc = `7`;
10806	break;
10807	case Intrinsic::ppc_altivec_vcmpneh_p:
10808	CompareOpc = `71`;
10809	break;
10810	case Intrinsic::ppc_altivec_vcmpnew_p:
10811	CompareOpc = `135`;
10812	break;
10813	case Intrinsic::ppc_altivec_vcmpnezb_p:
10814	CompareOpc = `263`;
10815	break;
10816	case Intrinsic::ppc_altivec_vcmpnezh_p:
10817	CompareOpc = `327`;
10818	break;
10819	case Intrinsic::ppc_altivec_vcmpnezw_p:
10820	CompareOpc = `391`;
10821	break;
10822	}
10823	isDot = true;
10824	} else
10825	return false;
10826	break;
10827	case Intrinsic::ppc_altivec_vcmpgefp_p:
10828	CompareOpc = `454`;
10829	isDot = true;
10830	break;
10831	case Intrinsic::ppc_altivec_vcmpgtfp_p:
10832	CompareOpc = `710`;
10833	isDot = true;
10834	break;
10835	case Intrinsic::ppc_altivec_vcmpgtsb_p:
10836	CompareOpc = `774`;
10837	isDot = true;
10838	break;
10839	case Intrinsic::ppc_altivec_vcmpgtsh_p:
10840	CompareOpc = `838`;
10841	isDot = true;
10842	break;
10843	case Intrinsic::ppc_altivec_vcmpgtsw_p:
10844	CompareOpc = `902`;
10845	isDot = true;
10846	break;
10847	case Intrinsic::ppc_altivec_vcmpgtsd_p:
10848	if (Subtarget.hasVSX() \|\| Subtarget.hasP8Altivec()) {
10849	CompareOpc = `967`;
10850	isDot = true;
10851	} else
10852	return false;
10853	break;
10854	case Intrinsic::ppc_altivec_vcmpgtub_p:
10855	CompareOpc = `518`;
10856	isDot = true;
10857	break;
10858	case Intrinsic::ppc_altivec_vcmpgtuh_p:
10859	CompareOpc = `582`;
10860	isDot = true;
10861	break;
10862	case Intrinsic::ppc_altivec_vcmpgtuw_p:
10863	CompareOpc = `646`;
10864	isDot = true;
10865	break;
10866	case Intrinsic::ppc_altivec_vcmpgtud_p:
10867	if (Subtarget.hasVSX() \|\| Subtarget.hasP8Altivec()) {
10868	CompareOpc = `711`;
10869	isDot = true;
10870	} else
10871	return false;
10872	break;
10873
10874	case Intrinsic::ppc_altivec_vcmpequq:
10875	case Intrinsic::ppc_altivec_vcmpgtsq:
10876	case Intrinsic::ppc_altivec_vcmpgtuq:
10877	if (!Subtarget.isISA3_1())
10878	return false;
10879	switch (IntrinsicID) {
10880	default:
10881	llvm_unreachable("Unknown comparison intrinsic.");
10882	case Intrinsic::ppc_altivec_vcmpequq:
10883	CompareOpc = `455`;
10884	break;
10885	case Intrinsic::ppc_altivec_vcmpgtsq:
10886	CompareOpc = `903`;
10887	break;
10888	case Intrinsic::ppc_altivec_vcmpgtuq:
10889	CompareOpc = `647`;
10890	break;
10891	}
10892	break;
10893
10894	// VSX predicate comparisons use the same infrastructure
10895	case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10896	case Intrinsic::ppc_vsx_xvcmpgedp_p:
10897	case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10898	case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10899	case Intrinsic::ppc_vsx_xvcmpgesp_p:
10900	case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10901	if (Subtarget.hasVSX()) {
10902	switch (IntrinsicID) {
10903	case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10904	CompareOpc = `99`;
10905	break;
10906	case Intrinsic::ppc_vsx_xvcmpgedp_p:
10907	CompareOpc = `115`;
10908	break;
10909	case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10910	CompareOpc = `107`;
10911	break;
10912	case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10913	CompareOpc = `67`;
10914	break;
10915	case Intrinsic::ppc_vsx_xvcmpgesp_p:
10916	CompareOpc = `83`;
10917	break;
10918	case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10919	CompareOpc = `75`;
10920	break;
10921	}
10922	isDot = true;
10923	} else
10924	return false;
10925	break;
10926
10927	// Normal Comparisons.
10928	case Intrinsic::ppc_altivec_vcmpbfp:
10929	CompareOpc = `966`;
10930	break;
10931	case Intrinsic::ppc_altivec_vcmpeqfp:
10932	CompareOpc = `198`;
10933	break;
10934	case Intrinsic::ppc_altivec_vcmpequb:
10935	CompareOpc = `6`;
10936	break;
10937	case Intrinsic::ppc_altivec_vcmpequh:
10938	CompareOpc = `70`;
10939	break;
10940	case Intrinsic::ppc_altivec_vcmpequw:
10941	CompareOpc = `134`;
10942	break;
10943	case Intrinsic::ppc_altivec_vcmpequd:
10944	if (Subtarget.hasP8Altivec())
10945	CompareOpc = `199`;
10946	else
10947	return false;
10948	break;
10949	case Intrinsic::ppc_altivec_vcmpneb:
10950	case Intrinsic::ppc_altivec_vcmpneh:
10951	case Intrinsic::ppc_altivec_vcmpnew:
10952	case Intrinsic::ppc_altivec_vcmpnezb:
10953	case Intrinsic::ppc_altivec_vcmpnezh:
10954	case Intrinsic::ppc_altivec_vcmpnezw:
10955	if (Subtarget.hasP9Altivec())
10956	switch (IntrinsicID) {
10957	default:
10958	llvm_unreachable("Unknown comparison intrinsic.");
10959	case Intrinsic::ppc_altivec_vcmpneb:
10960	CompareOpc = `7`;
10961	break;
10962	case Intrinsic::ppc_altivec_vcmpneh:
10963	CompareOpc = `71`;
10964	break;
10965	case Intrinsic::ppc_altivec_vcmpnew:
10966	CompareOpc = `135`;
10967	break;
10968	case Intrinsic::ppc_altivec_vcmpnezb:
10969	CompareOpc = `263`;
10970	break;
10971	case Intrinsic::ppc_altivec_vcmpnezh:
10972	CompareOpc = `327`;
10973	break;
10974	case Intrinsic::ppc_altivec_vcmpnezw:
10975	CompareOpc = `391`;
10976	break;
10977	}
10978	else
10979	return false;
10980	break;
10981	case Intrinsic::ppc_altivec_vcmpgefp:
10982	CompareOpc = `454`;
10983	break;
10984	case Intrinsic::ppc_altivec_vcmpgtfp:
10985	CompareOpc = `710`;
10986	break;
10987	case Intrinsic::ppc_altivec_vcmpgtsb:
10988	CompareOpc = `774`;
10989	break;
10990	case Intrinsic::ppc_altivec_vcmpgtsh:
10991	CompareOpc = `838`;
10992	break;
10993	case Intrinsic::ppc_altivec_vcmpgtsw:
10994	CompareOpc = `902`;
10995	break;
10996	case Intrinsic::ppc_altivec_vcmpgtsd:
10997	if (Subtarget.hasP8Altivec())
10998	CompareOpc = `967`;
10999	else
11000	return false;
11001	break;
11002	case Intrinsic::ppc_altivec_vcmpgtub:
11003	CompareOpc = `518`;
11004	break;
11005	case Intrinsic::ppc_altivec_vcmpgtuh:
11006	CompareOpc = `582`;
11007	break;
11008	case Intrinsic::ppc_altivec_vcmpgtuw:
11009	CompareOpc = `646`;
11010	break;
11011	case Intrinsic::ppc_altivec_vcmpgtud:
11012	if (Subtarget.hasP8Altivec())
11013	CompareOpc = `711`;
11014	else
11015	return false;
11016	break;
11017	case Intrinsic::ppc_altivec_vcmpequq_p:
11018	case Intrinsic::ppc_altivec_vcmpgtsq_p:
11019	case Intrinsic::ppc_altivec_vcmpgtuq_p:
11020	if (!Subtarget.isISA3_1())
11021	return false;
11022	switch (IntrinsicID) {
11023	default:
11024	llvm_unreachable("Unknown comparison intrinsic.");
11025	case Intrinsic::ppc_altivec_vcmpequq_p:
11026	CompareOpc = `455`;
11027	break;
11028	case Intrinsic::ppc_altivec_vcmpgtsq_p:
11029	CompareOpc = `903`;
11030	break;
11031	case Intrinsic::ppc_altivec_vcmpgtuq_p:
11032	CompareOpc = `647`;
11033	break;
11034	}
11035	isDot = true;
11036	break;
11037	}
11038	return true;
11039	}
11040
11041	/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11042	/// lower, do it, otherwise return null.
11043	SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11044	SelectionDAG &DAG) const {
11045	unsigned IntrinsicID = Op.getConstantOperandVal(i: `0`);
11046
11047	SDLoc dl(Op);
11048
11049	switch (IntrinsicID) {
11050	case Intrinsic::thread_pointer:
11051	// Reads the thread pointer register, used for __builtin_thread_pointer.
11052	if (Subtarget.isPPC64())
11053	return DAG.getRegister(Reg: PPC::X13, VT: MVT::i64);
11054	return DAG.getRegister(Reg: PPC::R2, VT: MVT::i32);
11055
11056	case Intrinsic::ppc_rldimi: {
11057	assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11058	SDValue Src = Op.getOperand(i: `1`);
11059	APInt Mask = Op.getConstantOperandAPInt(i: `4`);
11060	if (Mask.isZero())
11061	return Op.getOperand(i: `2`);
11062	if (Mask.isAllOnes())
11063	return DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT: MVT::i64, N1: Src, N2: Op.getOperand(i: `3`));
11064	uint64_t SH = Op.getConstantOperandVal(i: `3`);
11065	unsigned MB = `0`, ME = `0`;
11066	if (!isRunOfOnes64(Val: Mask.getZExtValue(), MB, ME))
11067	report_fatal_error(reason: "invalid rldimi mask!");
11068	// rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11069	if (ME < `63` - SH) {
11070	Src = DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT: MVT::i64, N1: Src,
11071	N2: DAG.getConstant(Val: ME + SH + `1`, DL: dl, VT: MVT::i32));
11072	} else if (ME > `63` - SH) {
11073	Src = DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT: MVT::i64, N1: Src,
11074	N2: DAG.getConstant(Val: ME + SH - `63`, DL: dl, VT: MVT::i32));
11075	}
11076	return SDValue (
11077	DAG.getMachineNode(Opcode: PPC::RLDIMI, dl, VT: MVT::i64,
11078	Ops: {Op.getOperand(i: `2`), Src,
11079	DAG.getTargetConstant(Val: `63` - ME, DL: dl, VT: MVT::i32),
11080	DAG.getTargetConstant(Val: MB, DL: dl, VT: MVT::i32)}),
11081	`0`);
11082	}
11083
11084	case Intrinsic::ppc_rlwimi: {
11085	APInt Mask = Op.getConstantOperandAPInt(i: `4`);
11086	if (Mask.isZero())
11087	return Op.getOperand(i: `2`);
11088	if (Mask.isAllOnes())
11089	return DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT: MVT::i32, N1: Op.getOperand(i: `1`),
11090	N2: Op.getOperand(i: `3`));
11091	unsigned MB = `0`, ME = `0`;
11092	if (!isRunOfOnes(Val: Mask.getZExtValue(), MB, ME))
11093	report_fatal_error(reason: "invalid rlwimi mask!");
11094	return SDValue (DAG.getMachineNode(
11095	Opcode: PPC::RLWIMI, dl, VT: MVT::i32,
11096	Ops: {Op.getOperand(i: `2`), Op.getOperand(i: `1`), Op.getOperand(i: `3`),
11097	DAG.getTargetConstant(Val: MB, DL: dl, VT: MVT::i32),
11098	DAG.getTargetConstant(Val: ME, DL: dl, VT: MVT::i32)}),
11099	`0`);
11100	}
11101
11102	case Intrinsic::ppc_rlwnm: {
11103	if (Op.getConstantOperandVal(i: `3`) == `0`)
11104	return DAG.getConstant(Val: `0`, DL: dl, VT: MVT::i32);
11105	unsigned MB = `0`, ME = `0`;
11106	if (!isRunOfOnes(Val: Op.getConstantOperandVal(i: `3`), MB, ME))
11107	report_fatal_error(reason: "invalid rlwnm mask!");
11108	return SDValue (
11109	DAG.getMachineNode(Opcode: PPC::RLWNM, dl, VT: MVT::i32,
11110	Ops: {Op.getOperand(i: `1`), Op.getOperand(i: `2`),
11111	DAG.getTargetConstant(Val: MB, DL: dl, VT: MVT::i32),
11112	DAG.getTargetConstant(Val: ME, DL: dl, VT: MVT::i32)}),
11113	`0`);
11114	}
11115
11116	case Intrinsic::ppc_mma_disassemble_acc: {
11117	if (Subtarget.isISAFuture()) {
11118	EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11119	SDValue WideVec =
11120	SDValue (DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes,
11121	Ops: Op.getOperand(i: `1`)),
11122	`0`);
11123	SmallVector<SDValue, `4`> RetOps;
11124	SDValue Value = SDValue (WideVec.getNode(), `0`);
11125	SDValue Value2 = SDValue (WideVec.getNode(), `1`);
11126
11127	SDValue Extract;
11128	Extract = DAG.getNode(
11129	Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
11130	N1: Subtarget.isLittleEndian() ? Value2 : Value,
11131	N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? `1` : `0`,
11132	DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11133	RetOps.push_back(Elt: Extract);
11134	Extract = DAG.getNode(
11135	Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
11136	N1: Subtarget.isLittleEndian() ? Value2 : Value,
11137	N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? `0` : `1`,
11138	DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11139	RetOps.push_back(Elt: Extract);
11140	Extract = DAG.getNode(
11141	Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
11142	N1: Subtarget.isLittleEndian() ? Value : Value2,
11143	N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? `1` : `0`,
11144	DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11145	RetOps.push_back(Elt: Extract);
11146	Extract = DAG.getNode(
11147	Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
11148	N1: Subtarget.isLittleEndian() ? Value : Value2,
11149	N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? `0` : `1`,
11150	DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11151	RetOps.push_back(Elt: Extract);
11152	return DAG.getMergeValues(Ops: RetOps, dl);
11153	}
11154	[[fallthrough]];
11155	}
11156	case Intrinsic::ppc_vsx_disassemble_pair: {
11157	int NumVecs = `2`;
11158	SDValue WideVec = Op.getOperand(i: `1`);
11159	if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11160	NumVecs = `4`;
11161	WideVec = DAG.getNode(Opcode: PPCISD::XXMFACC, DL: dl, VT: MVT::v512i1, Operand: WideVec);
11162	}
11163	SmallVector<SDValue, `4`> RetOps;
11164	for (int VecNo = `0`; VecNo < NumVecs; VecNo++) {
11165	SDValue Extract = DAG.getNode(
11166	Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8, N1: WideVec,
11167	N2: DAG.getConstant(Val: Subtarget.isLittleEndian() ? NumVecs - `1` - VecNo
11168	: VecNo,
11169	DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
11170	RetOps.push_back(Elt: Extract);
11171	}
11172	return DAG.getMergeValues(Ops: RetOps, dl);
11173	}
11174
11175	case Intrinsic::ppc_mma_dmxxextfdmr512: {
11176	assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
11177	auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
11178	assert(Idx && (Idx->getSExtValue() == `0` \|\| Idx->getSExtValue() == `1`) &&
11179	"Specify P of 0 or 1 for lower or upper 512 bytes");
11180	unsigned HiLo = Idx->getSExtValue();
11181	unsigned Opcode;
11182	unsigned Subx;
11183	if (HiLo == `0`) {
11184	Opcode = PPC::DMXXEXTFDMR512;
11185	Subx = PPC::sub_wacc_lo;
11186	} else {
11187	Opcode = PPC::DMXXEXTFDMR512_HI;
11188	Subx = PPC::sub_wacc_hi;
11189	}
11190	SDValue Subreg(
11191	DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1,
11192	Op1: Op.getOperand(i: `1`),
11193	Op2: DAG.getTargetConstant(Val: Subx, DL: dl, VT: MVT::i32)),
11194	`0`);
11195	EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11196	return SDValue (DAG.getMachineNode(Opcode, dl, ResultTys: ReturnTypes, Ops: Subreg), `0`);
11197	}
11198
11199	case Intrinsic::ppc_mma_dmxxextfdmr256: {
11200	assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
11201	auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
11202	assert(Idx && (Idx->getSExtValue() >= `0` \|\| Idx->getSExtValue() <= `3`) &&
11203	"Specify a dmr row pair 0-3");
11204	unsigned IdxVal = Idx->getSExtValue();
11205	unsigned Subx;
11206	switch (IdxVal) {
11207	case `0`:
11208	Subx = PPC::sub_dmrrowp0;
11209	break;
11210	case `1`:
11211	Subx = PPC::sub_dmrrowp1;
11212	break;
11213	case `2`:
11214	Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11215	break;
11216	case `3`:
11217	Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11218	break;
11219	}
11220	SDValue Subreg(
11221	DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v256i1,
11222	Op1: Op.getOperand(i: `1`),
11223	Op2: DAG.getTargetConstant(Val: Subx, DL: dl, VT: MVT::i32)),
11224	`0`);
11225	SDValue P = DAG.getTargetConstant(Val: IdxVal, DL: dl, VT: MVT::i32);
11226	return SDValue (
11227	DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR256, dl, VT: MVT::v256i1, Ops: {Subreg, P}),
11228	`0`);
11229	}
11230
11231	case Intrinsic::ppc_mma_dmxxinstdmr512: {
11232	assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
11233	auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `4`));
11234	assert(Idx && (Idx->getSExtValue() == `0` \|\| Idx->getSExtValue() == `1`) &&
11235	"Specify P of 0 or 1 for lower or upper 512 bytes");
11236	unsigned HiLo = Idx->getSExtValue();
11237	unsigned Opcode;
11238	unsigned Subx;
11239	if (HiLo == `0`) {
11240	Opcode = PPC::DMXXINSTDMR512;
11241	Subx = PPC::sub_wacc_lo;
11242	} else {
11243	Opcode = PPC::DMXXINSTDMR512_HI;
11244	Subx = PPC::sub_wacc_hi;
11245	}
11246	SDValue Ops[] = {Op.getOperand(i: `2`), Op.getOperand(i: `3`)};
11247	SDValue Wacc = SDValue (DAG.getMachineNode(Opcode, dl, VT: MVT::v512i1, Ops), `0`);
11248	SDValue SubReg = DAG.getTargetConstant(Val: Subx, DL: dl, VT: MVT::i32);
11249	return SDValue (DAG.getMachineNode(Opcode: PPC::INSERT_SUBREG, dl, VT: MVT::v1024i1,
11250	Op1: Op.getOperand(i: `1`), Op2: Wacc, Op3: SubReg),
11251	`0`);
11252	}
11253
11254	case Intrinsic::ppc_mma_dmxxinstdmr256: {
11255	assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
11256	auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `3`));
11257	assert(Idx && (Idx->getSExtValue() >= `0` \|\| Idx->getSExtValue() <= `3`) &&
11258	"Specify a dmr row pair 0-3");
11259	unsigned IdxVal = Idx->getSExtValue();
11260	unsigned Subx;
11261	switch (IdxVal) {
11262	case `0`:
11263	Subx = PPC::sub_dmrrowp0;
11264	break;
11265	case `1`:
11266	Subx = PPC::sub_dmrrowp1;
11267	break;
11268	case `2`:
11269	Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11270	break;
11271	case `3`:
11272	Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11273	break;
11274	}
11275	SDValue SubReg = DAG.getTargetConstant(Val: Subx, DL: dl, VT: MVT::i32);
11276	SDValue P = DAG.getTargetConstant(Val: IdxVal, DL: dl, VT: MVT::i32);
11277	SDValue Ops[] = {Op.getOperand(i: `2`), P};
11278	SDValue DMRRowp = SDValue (
11279	DAG.getMachineNode(Opcode: PPC::DMXXINSTDMR256, dl, VT: MVT::v256i1, Ops), `0`);
11280	return SDValue (DAG.getMachineNode(Opcode: PPC::INSERT_SUBREG, dl, VT: MVT::v1024i1,
11281	Op1: Op.getOperand(i: `1`), Op2: DMRRowp, Op3: SubReg),
11282	`0`);
11283	}
11284
11285	case Intrinsic::ppc_mma_xxmfacc:
11286	case Intrinsic::ppc_mma_xxmtacc: {
11287	// Allow pre-isa-future subtargets to lower as normal.
11288	if (!Subtarget.isISAFuture())
11289	return SDValue ();
11290	// The intrinsics for xxmtacc and xxmfacc take one argument of
11291	// type v512i1, for future cpu the corresponding wacc instruction
11292	// dmxx[inst\|extf]dmr512 is always generated for type v512i1, negating
11293	// the need to produce the xxm[t\|f]acc.
11294	SDValue WideVec = Op.getOperand(i: `1`);
11295	DAG.ReplaceAllUsesWith(From: Op, To: WideVec);
11296	return SDValue ();
11297	}
11298
11299	case Intrinsic::ppc_unpack_longdouble: {
11300	auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
11301	assert(Idx && (Idx->getSExtValue() == `0` \|\| Idx->getSExtValue() == `1`) &&
11302	"Argument of long double unpack must be 0 or 1!");
11303	return DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL: dl, VT: MVT::f64, N1: Op.getOperand(i: `1`),
11304	N2: DAG.getConstant(Val: !!(Idx->getSExtValue()), DL: dl,
11305	VT: Idx->getValueType(ResNo: `0`)));
11306	}
11307
11308	case Intrinsic::ppc_compare_exp_lt:
11309	case Intrinsic::ppc_compare_exp_gt:
11310	case Intrinsic::ppc_compare_exp_eq:
11311	case Intrinsic::ppc_compare_exp_uo: {
11312	unsigned Pred;
11313	switch (IntrinsicID) {
11314	case Intrinsic::ppc_compare_exp_lt:
11315	Pred = PPC::PRED_LT;
11316	break;
11317	case Intrinsic::ppc_compare_exp_gt:
11318	Pred = PPC::PRED_GT;
11319	break;
11320	case Intrinsic::ppc_compare_exp_eq:
11321	Pred = PPC::PRED_EQ;
11322	break;
11323	case Intrinsic::ppc_compare_exp_uo:
11324	Pred = PPC::PRED_UN;
11325	break;
11326	}
11327	return SDValue (
11328	DAG.getMachineNode(
11329	Opcode: PPC::SELECT_CC_I4, dl, VT: MVT::i32,
11330	Ops: {SDValue (DAG.getMachineNode(Opcode: PPC::XSCMPEXPDP, dl, VT: MVT::i32,
11331	Op1: Op.getOperand(i: `1`), Op2: Op.getOperand(i: `2`)),
11332	`0`),
11333	DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i32), DAG.getConstant(Val: `0`, DL: dl, VT: MVT::i32),
11334	DAG.getTargetConstant(Val: Pred, DL: dl, VT: MVT::i32)}),
11335	`0`);
11336	}
11337	case Intrinsic::ppc_test_data_class: {
11338	EVT OpVT = Op.getOperand(i: `1`).getValueType();
11339	unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11340	: (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11341	: PPC::XSTSTDCSP);
11342	return SDValue (
11343	DAG.getMachineNode(
11344	Opcode: PPC::SELECT_CC_I4, dl, VT: MVT::i32,
11345	Ops: {SDValue (DAG.getMachineNode(Opcode: CmprOpc, dl, VT: MVT::i32, Op1: Op.getOperand(i: `2`),
11346	Op2: Op.getOperand(i: `1`)),
11347	`0`),
11348	DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i32), DAG.getConstant(Val: `0`, DL: dl, VT: MVT::i32),
11349	DAG.getTargetConstant(Val: PPC::PRED_EQ, DL: dl, VT: MVT::i32)}),
11350	`0`);
11351	}
11352	case Intrinsic::ppc_fnmsub: {
11353	EVT VT = Op.getOperand(i: `1`).getValueType();
11354	if (!Subtarget.hasVSX() \|\| (!Subtarget.hasFloat128() && VT == MVT::f128))
11355	return DAG.getNode(
11356	Opcode: ISD::FNEG, DL: dl, VT,
11357	Operand: DAG.getNode(Opcode: ISD::FMA, DL: dl, VT, N1: Op.getOperand(i: `1`), N2: Op.getOperand(i: `2`),
11358	N3: DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT, Operand: Op.getOperand(i: `3`))));
11359	return DAG.getNode(Opcode: PPCISD::FNMSUB, DL: dl, VT, N1: Op.getOperand(i: `1`),
11360	N2: Op.getOperand(i: `2`), N3: Op.getOperand(i: `3`));
11361	}
11362	case Intrinsic::ppc_convert_f128_to_ppcf128:
11363	case Intrinsic::ppc_convert_ppcf128_to_f128: {
11364	RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11365	? RTLIB::CONVERT_PPCF128_F128
11366	: RTLIB::CONVERT_F128_PPCF128;
11367	MakeLibCallOptions CallOptions;
11368	std::pair<SDValue, SDValue> Result =
11369	makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op.getOperand(i: `1`), CallOptions,
11370	dl, Chain: SDValue ());
11371	return Result.first;
11372	}
11373	case Intrinsic::ppc_maxfe:
11374	case Intrinsic::ppc_maxfl:
11375	case Intrinsic::ppc_maxfs:
11376	case Intrinsic::ppc_minfe:
11377	case Intrinsic::ppc_minfl:
11378	case Intrinsic::ppc_minfs: {
11379	EVT VT = Op.getValueType();
11380	assert(
11381	all_of(Op->ops().drop_front(`4`),
11382	[VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11383	"ppc_[max\|min]f[e\|l\|s] must have uniform type arguments");
11384	(void)VT;
11385	ISD::CondCode CC = ISD::SETGT;
11386	if (IntrinsicID == Intrinsic::ppc_minfe \|\|
11387	IntrinsicID == Intrinsic::ppc_minfl \|\|
11388	IntrinsicID == Intrinsic::ppc_minfs)
11389	CC = ISD::SETLT;
11390	unsigned I = Op.getNumOperands() - `2`, Cnt = I;
11391	SDValue Res = Op.getOperand(i: I);
11392	for (--I; Cnt != `0`; --Cnt, I = (--I == `0` ? (Op.getNumOperands() - `1`) : I)) {
11393	Res =
11394	DAG.getSelectCC(DL: dl, LHS: Res, RHS: Op.getOperand(i: I), True: Res, False: Op.getOperand(i: I), Cond: CC);
11395	}
11396	return Res;
11397	}
11398	}
11399
11400	// If this is a lowered altivec predicate compare, CompareOpc is set to the
11401	// opcode number of the comparison.
11402	int CompareOpc;
11403	bool isDot;
11404	if (!getVectorCompareInfo(Intrin: Op, CompareOpc, isDot, Subtarget))
11405	return SDValue (); // Don't custom lower most intrinsics.
11406
11407	// If this is a non-dot comparison, make the VCMP node and we are done.
11408	if (!isDot) {
11409	SDValue Tmp = DAG.getNode(Opcode: PPCISD::VCMP, DL: dl, VT: Op.getOperand(i: `2`).getValueType(),
11410	N1: Op.getOperand(i: `1`), N2: Op.getOperand(i: `2`),
11411	N3: DAG.getConstant(Val: CompareOpc, DL: dl, VT: MVT::i32));
11412	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Tmp);
11413	}
11414
11415	// Create the PPCISD altivec 'dot' comparison node.
11416	SDValue Ops[] = {
11417	Op.getOperand(i: `2`), // LHS
11418	Op.getOperand(i: `3`), // RHS
11419	DAG.getConstant(Val: CompareOpc, DL: dl, VT: MVT::i32)
11420	};
11421	EVT VTs[] = { Op.getOperand(i: `2`).getValueType(), MVT::Glue };
11422	SDValue CompNode = DAG.getNode(Opcode: PPCISD::VCMP_rec, DL: dl, ResultTys: VTs, Ops);
11423
11424	// Unpack the result based on how the target uses it.
11425	unsigned BitNo; // Bit # of CR6.
11426	bool InvertBit; // Invert result?
11427	unsigned Bitx;
11428	unsigned SetOp;
11429	switch (Op.getConstantOperandVal(i: `1`)) {
11430	default: // Can't happen, don't crash on invalid number though.
11431	case `0`: // Return the value of the EQ bit of CR6.
11432	BitNo = `0`;
11433	InvertBit = false;
11434	Bitx = PPC::sub_eq;
11435	SetOp = PPCISD::SETBC;
11436	break;
11437	case `1`: // Return the inverted value of the EQ bit of CR6.
11438	BitNo = `0`;
11439	InvertBit = true;
11440	Bitx = PPC::sub_eq;
11441	SetOp = PPCISD::SETBCR;
11442	break;
11443	case `2`: // Return the value of the LT bit of CR6.
11444	BitNo = `2`;
11445	InvertBit = false;
11446	Bitx = PPC::sub_lt;
11447	SetOp = PPCISD::SETBC;
11448	break;
11449	case `3`: // Return the inverted value of the LT bit of CR6.
11450	BitNo = `2`;
11451	InvertBit = true;
11452	Bitx = PPC::sub_lt;
11453	SetOp = PPCISD::SETBCR;
11454	break;
11455	}
11456
11457	SDValue GlueOp = CompNode.getValue(R: `1`);
11458	if (Subtarget.isISA3_1()) {
11459	SDValue SubRegIdx = DAG.getTargetConstant(Val: Bitx, DL: dl, VT: MVT::i32);
11460	SDValue CR6Reg = DAG.getRegister(Reg: PPC::CR6, VT: MVT::i32);
11461	SDValue CRBit =
11462	SDValue (DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::i1,
11463	Op1: CR6Reg, Op2: SubRegIdx, Op3: GlueOp),
11464	`0`);
11465	return DAG.getNode(Opcode: SetOp, DL: dl, VT: MVT::i32, Operand: CRBit);
11466	}
11467
11468	// Now that we have the comparison, emit a copy from the CR to a GPR.
11469	// This is flagged to the above dot comparison.
11470	SDValue Flags = DAG.getNode(Opcode: PPCISD::MFOCRF, DL: dl, VT: MVT::i32,
11471	N1: DAG.getRegister(Reg: PPC::CR6, VT: MVT::i32), N2: GlueOp);
11472
11473	// Shift the bit into the low position.
11474	Flags = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i32, N1: Flags,
11475	N2: DAG.getConstant(Val: `8` - (`3` - BitNo), DL: dl, VT: MVT::i32));
11476	// Isolate the bit.
11477	Flags = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: Flags,
11478	N2: DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i32));
11479
11480	// If we are supposed to, toggle the bit.
11481	if (InvertBit)
11482	Flags = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i32, N1: Flags,
11483	N2: DAG.getConstant(Val: `1`, DL: dl, VT: MVT::i32));
11484	return Flags;
11485	}
11486
11487	SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11488	SelectionDAG &DAG) const {
11489	// SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11490	// the beginning of the argument list.
11491	int ArgStart = isa<ConstantSDNode>(Val: Op.getOperand(i: `0`)) ? `0` : `1`;
11492	SDLoc DL(Op);
11493	switch (Op.getConstantOperandVal(i: ArgStart)) {
11494	case Intrinsic::ppc_cfence: {
11495	assert(ArgStart == `1` && "llvm.ppc.cfence must carry a chain argument.");
11496	SDValue Val = Op.getOperand(i: ArgStart + `1`);
11497	EVT Ty = Val.getValueType();
11498	if (Ty == MVT::i128) {
11499	// FIXME: Testing one of two paired registers is sufficient to guarantee
11500	// ordering?
11501	Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i64, Operand: Val);
11502	}
11503	unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11504	return SDValue (
11505	DAG.getMachineNode(
11506	Opcode, dl: DL, VT: MVT::Other,
11507	Op1: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: Subtarget.getScalarIntVT(), Operand: Val),
11508	Op2: Op.getOperand(i: `0`)),
11509	`0`);
11510	}
11511	default:
11512	break;
11513	}
11514	return SDValue ();
11515	}
11516
11517	// Lower scalar BSWAP64 to xxbrd.
11518	SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11519	SDLoc dl(Op);
11520	if (!Subtarget.isPPC64())
11521	return Op;
11522	// MTVSRDD
11523	Op = DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL: dl, VT: MVT::v2i64, N1: Op.getOperand(i: `0`),
11524	N2: Op.getOperand(i: `0`));
11525	// XXBRD
11526	Op = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::v2i64, Operand: Op);
11527	// MFVSRD
11528	int VectorIndex = `0`;
11529	if (Subtarget.isLittleEndian())
11530	VectorIndex = `1`;
11531	Op = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: MVT::i64, N1: Op,
11532	N2: DAG.getTargetConstant(Val: VectorIndex, DL: dl, VT: MVT::i32));
11533	return Op;
11534	}
11535
11536	// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11537	// compared to a value that is atomically loaded (atomic loads zero-extend).
11538	SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11539	SelectionDAG &DAG) const {
11540	assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11541	"Expecting an atomic compare-and-swap here.");
11542	SDLoc dl(Op);
11543	auto *AtomicNode = cast<AtomicSDNode>(Val: Op.getNode());
11544	EVT MemVT = AtomicNode->getMemoryVT();
11545	if (MemVT.getSizeInBits() >= `32`)
11546	return Op;
11547
11548	SDValue CmpOp = Op.getOperand(i: `2`);
11549	// If this is already correctly zero-extended, leave it alone.
11550	auto HighBits = APInt::getHighBitsSet(numBits: `32`, hiBitsSet: `32` - MemVT.getSizeInBits());
11551	if (DAG.MaskedValueIsZero(Op: CmpOp, Mask: HighBits))
11552	return Op;
11553
11554	// Clear the high bits of the compare operand.
11555	unsigned MaskVal = (`1` << MemVT.getSizeInBits()) - `1`;
11556	SDValue NewCmpOp =
11557	DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: CmpOp,
11558	N2: DAG.getConstant(Val: MaskVal, DL: dl, VT: MVT::i32));
11559
11560	// Replace the existing compare operand with the properly zero-extended one.
11561	SmallVector<SDValue, `4`> Ops;
11562	for (int i = `0`, e = AtomicNode->getNumOperands(); i < e; i++)
11563	Ops.push_back(Elt: AtomicNode->getOperand(Num: i));
11564	Ops [`2`] = NewCmpOp;
11565	MachineMemOperand *MMO = AtomicNode->getMemOperand();
11566	SDVTList Tys = DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other);
11567	auto NodeTy =
11568	(MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11569	return DAG.getMemIntrinsicNode(Opcode: NodeTy, dl, VTList: Tys, Ops, MemVT, MMO);
11570	}
11571
11572	SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11573	SelectionDAG &DAG) const {
11574	AtomicSDNode *N = cast<AtomicSDNode>(Val: Op.getNode());
11575	EVT MemVT = N->getMemoryVT();
11576	assert(MemVT.getSimpleVT() == MVT::i128 &&
11577	"Expect quadword atomic operations");
11578	SDLoc dl(N);
11579	unsigned Opc = N->getOpcode();
11580	switch (Opc) {
11581	case ISD::ATOMIC_LOAD: {
11582	// Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11583	// lowered to ppc instructions by pattern matching instruction selector.
11584	SDVTList Tys = DAG.getVTList(VT1: MVT::i64, VT2: MVT::i64, VT3: MVT::Other);
11585	SmallVector<SDValue, `4`> Ops{
11586	N->getOperand(Num: `0`),
11587	DAG.getConstant(Val: Intrinsic::ppc_atomic_load_i128, DL: dl, VT: MVT::i32)};
11588	for (int I = `1`, E = N->getNumOperands(); I < E; ++I)
11589	Ops.push_back(Elt: N->getOperand(Num: I));
11590	SDValue LoadedVal = DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl, VTList: Tys,
11591	Ops, MemVT, MMO: N->getMemOperand());
11592	SDValue ValLo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MVT::i128, Operand: LoadedVal);
11593	SDValue ValHi =
11594	DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MVT::i128, Operand: LoadedVal.getValue(R: `1`));
11595	ValHi = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i128, N1: ValHi,
11596	N2: DAG.getConstant(Val: `64`, DL: dl, VT: MVT::i32));
11597	SDValue Val =
11598	DAG.getNode(Opcode: ISD::OR, DL: dl, ResultTys: {MVT::i128, MVT::Other}, Ops: {ValLo, ValHi});
11599	return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: dl, ResultTys: {MVT::i128, MVT::Other},
11600	Ops: {Val, LoadedVal.getValue(R: `2`)});
11601	}
11602	case ISD::ATOMIC_STORE: {
11603	// Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11604	// lowered to ppc instructions by pattern matching instruction selector.
11605	SDVTList Tys = DAG.getVTList(VT: MVT::Other);
11606	SmallVector<SDValue, `4`> Ops{
11607	N->getOperand(Num: `0`),
11608	DAG.getConstant(Val: Intrinsic::ppc_atomic_store_i128, DL: dl, VT: MVT::i32)};
11609	SDValue Val = N->getOperand(Num: `1`);
11610	SDValue ValLo = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i64, Operand: Val);
11611	SDValue ValHi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i128, N1: Val,
11612	N2: DAG.getConstant(Val: `64`, DL: dl, VT: MVT::i32));
11613	ValHi = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i64, Operand: ValHi);
11614	Ops.push_back(Elt: ValLo);
11615	Ops.push_back(Elt: ValHi);
11616	Ops.push_back(Elt: N->getOperand(Num: `2`));
11617	return DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl, VTList: Tys, Ops, MemVT,
11618	MMO: N->getMemOperand());
11619	}
11620	default:
11621	llvm_unreachable("Unexpected atomic opcode");
11622	}
11623	}
11624
11625	static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl,
11626	SelectionDAG &DAG,
11627	const PPCSubtarget &Subtarget) {
11628	assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11629
11630	enum DataClassMask {
11631	DC_NAN = `1` << `6`,
11632	DC_NEG_INF = `1` << `4`,
11633	DC_POS_INF = `1` << `5`,
11634	DC_NEG_ZERO = `1` << `2`,
11635	DC_POS_ZERO = `1` << `3`,
11636	DC_NEG_SUBNORM = `1`,
11637	DC_POS_SUBNORM = `1` << `1`,
11638	};
11639
11640	EVT VT = Op.getValueType();
11641
11642	unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11643	: VT == MVT::f64 ? PPC::XSTSTDCDP
11644	: PPC::XSTSTDCSP;
11645
11646	if (Mask == fcAllFlags)
11647	return DAG.getBoolConstant(V: true, DL: Dl, VT: MVT::i1, OpVT: VT);
11648	if (Mask == `0`)
11649	return DAG.getBoolConstant(V: false, DL: Dl, VT: MVT::i1, OpVT: VT);
11650
11651	// When it's cheaper or necessary to test reverse flags.
11652	if ((Mask & fcNormal) == fcNormal \|\| Mask == ~fcQNan \|\| Mask == ~fcSNan) {
11653	SDValue Rev = getDataClassTest(Op, Mask: ~Mask, Dl, DAG, Subtarget);
11654	return DAG.getNOT(DL: Dl, Val: Rev, VT: MVT::i1);
11655	}
11656
11657	// Power doesn't support testing whether a value is 'normal'. Test the rest
11658	// first, and test if it's 'not not-normal' with expected sign.
11659	if (Mask & fcNormal) {
11660	SDValue Rev(DAG.getMachineNode(
11661	Opcode: TestOp, dl: Dl, VT: MVT::i32,
11662	Op1: DAG.getTargetConstant(Val: DC_NAN \| DC_NEG_INF \| DC_POS_INF \|
11663	DC_NEG_ZERO \| DC_POS_ZERO \|
11664	DC_NEG_SUBNORM \| DC_POS_SUBNORM,
11665	DL: Dl, VT: MVT::i32),
11666	Op2: Op),
11667	`0`);
11668	// Sign are stored in CR bit 0, result are in CR bit 2.
11669	SDValue Sign(
11670	DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: Dl, VT: MVT::i1, Op1: Rev,
11671	Op2: DAG.getTargetConstant(Val: PPC::sub_lt, DL: Dl, VT: MVT::i32)),
11672	`0`);
11673	SDValue Normal(DAG.getNOT(
11674	DL: Dl,
11675	Val: SDValue (DAG.getMachineNode(
11676	Opcode: TargetOpcode::EXTRACT_SUBREG, dl: Dl, VT: MVT::i1, Op1: Rev,
11677	Op2: DAG.getTargetConstant(Val: PPC::sub_eq, DL: Dl, VT: MVT::i32)),
11678	`0`),
11679	VT: MVT::i1));
11680	if (Mask & fcPosNormal)
11681	Sign = DAG.getNOT(DL: Dl, Val: Sign, VT: MVT::i1);
11682	SDValue Result = DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i1, N1: Sign, N2: Normal);
11683	if (Mask == fcPosNormal \|\| Mask == fcNegNormal)
11684	return Result;
11685
11686	return DAG.getNode(
11687	Opcode: ISD::OR, DL: Dl, VT: MVT::i1,
11688	N1: getDataClassTest(Op, Mask: Mask & ~fcNormal, Dl, DAG, Subtarget), N2: Result);
11689	}
11690
11691	// The instruction doesn't differentiate between signaling or quiet NaN. Test
11692	// the rest first, and test if it 'is NaN and is signaling/quiet'.
11693	if ((Mask & fcNan) == fcQNan \|\| (Mask & fcNan) == fcSNan) {
11694	bool IsQuiet = Mask & fcQNan;
11695	SDValue NanCheck = getDataClassTest(Op, Mask: fcNan, Dl, DAG, Subtarget);
11696
11697	// Quietness is determined by the first bit in fraction field.
11698	uint64_t QuietMask = `0`;
11699	SDValue HighWord;
11700	if (VT == MVT::f128) {
11701	HighWord = DAG.getNode(
11702	Opcode: ISD::EXTRACT_VECTOR_ELT, DL: Dl, VT: MVT::i32, N1: DAG.getBitcast(VT: MVT::v4i32, V: Op),
11703	N2: DAG.getVectorIdxConstant(Val: Subtarget.isLittleEndian() ? `3` : `0`, DL: Dl));
11704	QuietMask = `0x8000`;
11705	} else if (VT == MVT::f64) {
11706	if (Subtarget.isPPC64()) {
11707	HighWord = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL: Dl, VT: MVT::i32,
11708	N1: DAG.getBitcast(VT: MVT::i64, V: Op),
11709	N2: DAG.getConstant(Val: `1`, DL: Dl, VT: MVT::i32));
11710	} else {
11711	SDValue Vec = DAG.getBitcast(
11712	VT: MVT::v4i32, V: DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL: Dl, VT: MVT::v2f64, Operand: Op));
11713	HighWord = DAG.getNode(
11714	Opcode: ISD::EXTRACT_VECTOR_ELT, DL: Dl, VT: MVT::i32, N1: Vec,
11715	N2: DAG.getVectorIdxConstant(Val: Subtarget.isLittleEndian() ? `1` : `0`, DL: Dl));
11716	}
11717	QuietMask = `0x80000`;
11718	} else if (VT == MVT::f32) {
11719	HighWord = DAG.getBitcast(VT: MVT::i32, V: Op);
11720	QuietMask = `0x400000`;
11721	}
11722	SDValue NanRes = DAG.getSetCC(
11723	DL: Dl, VT: MVT::i1,
11724	LHS: DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i32, N1: HighWord,
11725	N2: DAG.getConstant(Val: QuietMask, DL: Dl, VT: MVT::i32)),
11726	RHS: DAG.getConstant(Val: `0`, DL: Dl, VT: MVT::i32), Cond: IsQuiet ? ISD::SETNE : ISD::SETEQ);
11727	NanRes = DAG.getNode(Opcode: ISD::AND, DL: Dl, VT: MVT::i1, N1: NanCheck, N2: NanRes);
11728	if (Mask == fcQNan \|\| Mask == fcSNan)
11729	return NanRes;
11730
11731	return DAG.getNode(Opcode: ISD::OR, DL: Dl, VT: MVT::i1,
11732	N1: getDataClassTest(Op, Mask: Mask & ~fcNan, Dl, DAG, Subtarget),
11733	N2: NanRes);
11734	}
11735
11736	unsigned NativeMask = `0`;
11737	if ((Mask & fcNan) == fcNan)
11738	NativeMask \|= DC_NAN;
11739	if (Mask & fcNegInf)
11740	NativeMask \|= DC_NEG_INF;
11741	if (Mask & fcPosInf)
11742	NativeMask \|= DC_POS_INF;
11743	if (Mask & fcNegZero)
11744	NativeMask \|= DC_NEG_ZERO;
11745	if (Mask & fcPosZero)
11746	NativeMask \|= DC_POS_ZERO;
11747	if (Mask & fcNegSubnormal)
11748	NativeMask \|= DC_NEG_SUBNORM;
11749	if (Mask & fcPosSubnormal)
11750	NativeMask \|= DC_POS_SUBNORM;
11751	return SDValue (
11752	DAG.getMachineNode(
11753	Opcode: TargetOpcode::EXTRACT_SUBREG, dl: Dl, VT: MVT::i1,
11754	Op1: SDValue (DAG.getMachineNode(
11755	Opcode: TestOp, dl: Dl, VT: MVT::i32,
11756	Op1: DAG.getTargetConstant(Val: NativeMask, DL: Dl, VT: MVT::i32), Op2: Op),
11757	`0`),
11758	Op2: DAG.getTargetConstant(Val: PPC::sub_eq, DL: Dl, VT: MVT::i32)),
11759	`0`);
11760	}
11761
11762	SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11763	SelectionDAG &DAG) const {
11764	assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11765	SDValue LHS = Op.getOperand(i: `0`);
11766	uint64_t RHSC = Op.getConstantOperandVal(i: `1`);
11767	SDLoc Dl(Op);
11768	FPClassTest Category = static_cast<FPClassTest>(RHSC);
11769	if (LHS.getValueType() == MVT::ppcf128) {
11770	// The higher part determines the value class.
11771	LHS = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL: Dl, VT: MVT::f64, N1: LHS,
11772	N2: DAG.getConstant(Val: `1`, DL: Dl, VT: MVT::i32));
11773	}
11774
11775	return getDataClassTest(Op: LHS, Mask: Category, Dl, DAG, Subtarget);
11776	}
11777
11778	SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11779	SelectionDAG &DAG) const {
11780	SDLoc dl(Op);
11781
11782	MachineFunction &MF = DAG.getMachineFunction();
11783	SDValue Op0 = Op.getOperand(i: `0`);
11784	EVT ValVT = Op0.getValueType();
11785	unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11786	if (isa<ConstantSDNode>(Val: Op0) && EltSize <= `32`) {
11787	int64_t IntVal = Op.getConstantOperandVal(i: `0`);
11788	if (IntVal >= -`16` && IntVal <= `15`)
11789	return getCanonicalConstSplat(Val: IntVal, SplatSize: EltSize / `8`, VT: Op.getValueType(), DAG,
11790	dl);
11791	}
11792
11793	ReuseLoadInfo RLI;
11794	if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11795	Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11796	Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11797	canReuseLoadAddress(Op: Op0, MemVT: MVT::i32, RLI, DAG, ET: ISD::NON_EXTLOAD)) {
11798
11799	MachineMemOperand *MMO =
11800	MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: `4`,
11801	BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
11802	SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11803	SDValue Bits = DAG.getMemIntrinsicNode(
11804	Opcode: PPCISD::LD_SPLAT, dl, VTList: DAG.getVTList(VT1: MVT::v4i32, VT2: MVT::Other), Ops,
11805	MemVT: MVT::i32, MMO);
11806	if (RLI.ResChain)
11807	DAG.makeEquivalentMemoryOrdering(OldChain: RLI.ResChain, NewMemOpChain: Bits.getValue(R: `1`));
11808	return Bits.getValue(R: `0`);
11809	}
11810
11811	// Create a stack slot that is 16-byte aligned.
11812	MachineFrameInfo &MFI = MF.getFrameInfo();
11813	int FrameIdx = MFI.CreateStackObject(Size: `16`, Alignment: Align (`16`), isSpillSlot: false);
11814	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
11815	SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
11816
11817	SDValue Val = Op0;
11818	// P10 hardware store forwarding requires that a single store contains all
11819	// the data for the load. P10 is able to merge a pair of adjacent stores. Try
11820	// to avoid load hit store on P10 when running binaries compiled for older
11821	// processors by generating two mergeable scalar stores to forward with the
11822	// vector load.
11823	if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11824	!Subtarget.isLittleEndian() && ValVT.isInteger() &&
11825	ValVT.getSizeInBits() <= `64`) {
11826	Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i64, Operand: Val);
11827	EVT ShiftAmountTy = getShiftAmountTy(LHSTy: MVT::i64, DL: DAG.getDataLayout());
11828	SDValue ShiftBy = DAG.getConstant(
11829	Val: `64` - Op.getValueType().getScalarSizeInBits(), DL: dl, VT: ShiftAmountTy);
11830	Val = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i64, N1: Val, N2: ShiftBy);
11831	SDValue Plus8 =
11832	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: FIdx, N2: DAG.getConstant(Val: `8`, DL: dl, VT: PtrVT));
11833	SDValue Store2 =
11834	DAG.getStore(Chain: DAG.getEntryNode(), dl, Val, Ptr: Plus8, PtrInfo: MachinePointerInfo ());
11835	SDValue Store = DAG.getStore(Chain: Store2, dl, Val, Ptr: FIdx, PtrInfo: MachinePointerInfo ());
11836	return DAG.getLoad(VT: Op.getValueType(), dl, Chain: Store, Ptr: FIdx,
11837	PtrInfo: MachinePointerInfo ());
11838	}
11839
11840	// Store the input value into Value#0 of the stack slot.
11841	SDValue Store =
11842	DAG.getStore(Chain: DAG.getEntryNode(), dl, Val, Ptr: FIdx, PtrInfo: MachinePointerInfo ());
11843	// Load it out.
11844	return DAG.getLoad(VT: Op.getValueType(), dl, Chain: Store, Ptr: FIdx, PtrInfo: MachinePointerInfo ());
11845	}
11846
11847	SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11848	SelectionDAG &DAG) const {
11849	assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11850	"Should only be called for ISD::INSERT_VECTOR_ELT");
11851
11852	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
11853
11854	EVT VT = Op.getValueType();
11855	SDLoc dl(Op);
11856	SDValue V1 = Op.getOperand(i: `0`);
11857	SDValue V2 = Op.getOperand(i: `1`);
11858
11859	if (VT == MVT::v2f64 && C)
11860	return Op;
11861
11862	if (Subtarget.hasP9Vector()) {
11863	// A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11864	// because on P10, it allows this specific insert_vector_elt load pattern to
11865	// utilize the refactored load and store infrastructure in order to exploit
11866	// prefixed loads.
11867	// On targets with inexpensive direct moves (Power9 and up), a
11868	// (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11869	// load since a single precision load will involve conversion to double
11870	// precision on the load followed by another conversion to single precision.
11871	if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11872	(isa<LoadSDNode>(Val: V2))) {
11873	SDValue BitcastVector = DAG.getBitcast(VT: MVT::v4i32, V: V1);
11874	SDValue BitcastLoad = DAG.getBitcast(VT: MVT::i32, V: V2);
11875	SDValue InsVecElt =
11876	DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: dl, VT: MVT::v4i32, N1: BitcastVector,
11877	N2: BitcastLoad, N3: Op.getOperand(i: `2`));
11878	return DAG.getBitcast(VT: MVT::v4f32, V: InsVecElt);
11879	}
11880	}
11881
11882	if (Subtarget.isISA3_1()) {
11883	if ((VT == MVT::v2i64 \|\| VT == MVT::v2f64) && !Subtarget.isPPC64())
11884	return SDValue ();
11885	// On P10, we have legal lowering for constant and variable indices for
11886	// all vectors.
11887	if (VT == MVT::v16i8 \|\| VT == MVT::v8i16 \|\| VT == MVT::v4i32 \|\|
11888	VT == MVT::v2i64 \|\| VT == MVT::v4f32 \|\| VT == MVT::v2f64)
11889	return Op;
11890	}
11891
11892	// Before P10, we have legal lowering for constant indices but not for
11893	// variable ones.
11894	if (!C)
11895	return SDValue ();
11896
11897	// We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11898	if (VT == MVT::v8i16 \|\| VT == MVT::v16i8) {
11899	SDValue Mtvsrz = DAG.getNode(Opcode: PPCISD::MTVSRZ, DL: dl, VT, Operand: V2);
11900	unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / `8`;
11901	unsigned InsertAtElement = C->getZExtValue();
11902	unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11903	if (Subtarget.isLittleEndian()) {
11904	InsertAtByte = (`16` - BytesInEachElement) - InsertAtByte;
11905	}
11906	return DAG.getNode(Opcode: PPCISD::VECINSERT, DL: dl, VT, N1: V1, N2: Mtvsrz,
11907	N3: DAG.getConstant(Val: InsertAtByte, DL: dl, VT: MVT::i32));
11908	}
11909	return Op;
11910	}
11911
11912	SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
11913	SelectionDAG &DAG) const {
11914	SDLoc dl(Op);
11915	LoadSDNode *LN = cast<LoadSDNode>(Val: Op.getNode());
11916	SDValue LoadChain = LN->getChain();
11917	SDValue BasePtr = LN->getBasePtr();
11918	EVT VT = Op.getValueType();
11919	bool IsV1024i1 = VT == MVT::v1024i1;
11920	bool IsV2048i1 = VT == MVT::v2048i1;
11921
11922	// The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
11923	// Dense Math dmr pair registers, respectively.
11924	assert((IsV1024i1 \|\| IsV2048i1) && "Unsupported type.");
11925	(void)IsV2048i1;
11926	assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
11927	"Dense Math support required.");
11928	assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
11929
11930	SmallVector<SDValue, `8`> Loads;
11931	SmallVector<SDValue, `8`> LoadChains;
11932
11933	SDValue IntrinID = DAG.getConstant(Val: Intrinsic::ppc_vsx_lxvp, DL: dl, VT: MVT::i32);
11934	SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
11935	MachineMemOperand *MMO = LN->getMemOperand();
11936	unsigned NumVecs = VT.getSizeInBits() / `256`;
11937	for (unsigned Idx = `0`; Idx < NumVecs; ++Idx) {
11938	MachineMemOperand *NewMMO =
11939	DAG.getMachineFunction().getMachineMemOperand(MMO, Offset: Idx * `32`, Size: `32`);
11940	if (Idx > `0`) {
11941	BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
11942	N2: DAG.getConstant(Val: `32`, DL: dl, VT: BasePtr.getValueType()));
11943	LoadOps[`2`] = BasePtr;
11944	}
11945	SDValue Ld = DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl,
11946	VTList: DAG.getVTList(VT1: MVT::v256i1, VT2: MVT::Other),
11947	Ops: LoadOps, MemVT: MVT::v256i1, MMO: NewMMO);
11948	LoadChains.push_back(Elt: Ld.getValue(R: `1`));
11949	Loads.push_back(Elt: Ld);
11950	}
11951
11952	if (Subtarget.isLittleEndian()) {
11953	std::reverse(first: Loads.begin(), last: Loads.end());
11954	std::reverse(first: LoadChains.begin(), last: LoadChains.end());
11955	}
11956
11957	SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: LoadChains);
11958	SDValue Lo(DAG.getMachineNode(Opcode: PPC::DMXXINSTDMR512, dl, VT: MVT::v512i1, Op1: Loads [`0`],
11959	Op2: Loads [`1`]),
11960	`0`);
11961	SDValue LoSub = DAG.getTargetConstant(Val: PPC::sub_wacc_lo, DL: dl, VT: MVT::i32);
11962	SDValue Hi(DAG.getMachineNode(Opcode: PPC::DMXXINSTDMR512_HI, dl, VT: MVT::v512i1,
11963	Op1: Loads [`2`], Op2: Loads [`3`]),
11964	`0`);
11965	SDValue HiSub = DAG.getTargetConstant(Val: PPC::sub_wacc_hi, DL: dl, VT: MVT::i32);
11966	SDValue RC = DAG.getTargetConstant(Val: PPC::DMRRCRegClassID, DL: dl, VT: MVT::i32);
11967	const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
11968
11969	SDValue Value =
11970	SDValue (DAG.getMachineNode(Opcode: PPC::REG_SEQUENCE, dl, VT: MVT::v1024i1, Ops), `0`);
11971
11972	if (IsV1024i1) {
11973	return DAG.getMergeValues(Ops: {Value, TF}, dl);
11974	}
11975
11976	// Handle Loads for V2048i1 which represents a dmr pair.
11977	SDValue DmrPValue;
11978	SDValue Dmr1Lo(DAG.getMachineNode(Opcode: PPC::DMXXINSTDMR512, dl, VT: MVT::v512i1,
11979	Op1: Loads [`4`], Op2: Loads [`5`]),
11980	`0`);
11981	SDValue Dmr1Hi(DAG.getMachineNode(Opcode: PPC::DMXXINSTDMR512_HI, dl, VT: MVT::v512i1,
11982	Op1: Loads [`6`], Op2: Loads [`7`]),
11983	`0`);
11984	const SDValue Dmr1Ops[] = {RC, Dmr1Lo, LoSub, Dmr1Hi, HiSub};
11985	SDValue Dmr1Value = SDValue (
11986	DAG.getMachineNode(Opcode: PPC::REG_SEQUENCE, dl, VT: MVT::v1024i1, Ops: Dmr1Ops), `0`);
11987
11988	SDValue Dmr0Sub = DAG.getTargetConstant(Val: PPC::sub_dmr0, DL: dl, VT: MVT::i32);
11989	SDValue Dmr1Sub = DAG.getTargetConstant(Val: PPC::sub_dmr1, DL: dl, VT: MVT::i32);
11990
11991	SDValue DmrPRC = DAG.getTargetConstant(Val: PPC::DMRpRCRegClassID, DL: dl, VT: MVT::i32);
11992	const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
11993
11994	DmrPValue = SDValue (
11995	DAG.getMachineNode(Opcode: PPC::REG_SEQUENCE, dl, VT: MVT::v2048i1, Ops: DmrPOps), `0`);
11996
11997	return DAG.getMergeValues(Ops: {DmrPValue, TF}, dl);
11998	}
11999
12000	SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
12001	SelectionDAG &DAG) const {
12002	SDLoc dl(Op);
12003	LoadSDNode *LN = cast<LoadSDNode>(Val: Op.getNode());
12004	SDValue LoadChain = LN->getChain();
12005	SDValue BasePtr = LN->getBasePtr();
12006	EVT VT = Op.getValueType();
12007
12008	if (VT == MVT::v1024i1 \|\| VT == MVT::v2048i1)
12009	return LowerDMFVectorLoad(Op, DAG);
12010
12011	if (VT != MVT::v256i1 && VT != MVT::v512i1)
12012	return Op;
12013
12014	// Type v256i1 is used for pairs and v512i1 is used for accumulators.
12015	// Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
12016	// 2 or 4 vsx registers.
12017	assert((VT != MVT::v512i1 \|\| Subtarget.hasMMA()) &&
12018	"Type unsupported without MMA");
12019	assert((VT != MVT::v256i1 \|\| Subtarget.pairedVectorMemops()) &&
12020	"Type unsupported without paired vector support");
12021	Align Alignment = LN->getAlign();
12022	SmallVector<SDValue, `4`> Loads;
12023	SmallVector<SDValue, `4`> LoadChains;
12024	unsigned NumVecs = VT.getSizeInBits() / `128`;
12025	for (unsigned Idx = `0`; Idx < NumVecs; ++Idx) {
12026	SDValue Load =
12027	DAG.getLoad(VT: MVT::v16i8, dl, Chain: LoadChain, Ptr: BasePtr,
12028	PtrInfo: LN->getPointerInfo().getWithOffset(O: Idx * `16`),
12029	Alignment: commonAlignment(A: Alignment, Offset: Idx * `16`),
12030	MMOFlags: LN->getMemOperand()->getFlags(), AAInfo: LN->getAAInfo());
12031	BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
12032	N2: DAG.getConstant(Val: `16`, DL: dl, VT: BasePtr.getValueType()));
12033	Loads.push_back(Elt: Load);
12034	LoadChains.push_back(Elt: Load.getValue(R: `1`));
12035	}
12036	if (Subtarget.isLittleEndian()) {
12037	std::reverse(first: Loads.begin(), last: Loads.end());
12038	std::reverse(first: LoadChains.begin(), last: LoadChains.end());
12039	}
12040	SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: LoadChains);
12041	SDValue Value =
12042	DAG.getNode(Opcode: VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
12043	DL: dl, VT, Ops: Loads);
12044	SDValue RetOps[] = {Value, TF};
12045	return DAG.getMergeValues(Ops: RetOps, dl);
12046	}
12047
12048	SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
12049	SelectionDAG &DAG) const {
12050
12051	SDLoc dl(Op);
12052	StoreSDNode *SN = cast<StoreSDNode>(Val: Op.getNode());
12053	SDValue StoreChain = SN->getChain();
12054	SDValue BasePtr = SN->getBasePtr();
12055	SmallVector<SDValue, `8`> Values;
12056	SmallVector<SDValue, `8`> Stores;
12057	EVT VT = SN->getValue().getValueType();
12058	bool IsV1024i1 = VT == MVT::v1024i1;
12059	bool IsV2048i1 = VT == MVT::v2048i1;
12060
12061	// The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12062	// Dense Math dmr pair registers, respectively.
12063	assert((IsV1024i1 \|\| IsV2048i1) && "Unsupported type.");
12064	(void)IsV2048i1;
12065	assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12066	"Dense Math support required.");
12067	assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12068
12069	EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12070	if (IsV1024i1) {
12071	SDValue Lo(DAG.getMachineNode(
12072	Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1,
12073	Op1: Op.getOperand(i: `1`),
12074	Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_lo, DL: dl, VT: MVT::i32)),
12075	`0`);
12076	SDValue Hi(DAG.getMachineNode(
12077	Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1,
12078	Op1: Op.getOperand(i: `1`),
12079	Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_hi, DL: dl, VT: MVT::i32)),
12080	`0`);
12081	MachineSDNode *ExtNode =
12082	DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes, Ops: Lo);
12083	Values.push_back(Elt: SDValue (ExtNode, `0`));
12084	Values.push_back(Elt: SDValue (ExtNode, `1`));
12085	ExtNode = DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512_HI, dl, ResultTys: ReturnTypes, Ops: Hi);
12086	Values.push_back(Elt: SDValue (ExtNode, `0`));
12087	Values.push_back(Elt: SDValue (ExtNode, `1`));
12088	} else {
12089	// This corresponds to v2048i1 which represents a dmr pair.
12090	SDValue Dmr0(
12091	DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v1024i1,
12092	Op1: Op.getOperand(i: `1`),
12093	Op2: DAG.getTargetConstant(Val: PPC::sub_dmr0, DL: dl, VT: MVT::i32)),
12094	`0`);
12095
12096	SDValue Dmr1(
12097	DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v1024i1,
12098	Op1: Op.getOperand(i: `1`),
12099	Op2: DAG.getTargetConstant(Val: PPC::sub_dmr1, DL: dl, VT: MVT::i32)),
12100	`0`);
12101
12102	SDValue Dmr0Lo(DAG.getMachineNode(
12103	Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1, Op1: Dmr0,
12104	Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_lo, DL: dl, VT: MVT::i32)),
12105	`0`);
12106
12107	SDValue Dmr0Hi(DAG.getMachineNode(
12108	Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1, Op1: Dmr0,
12109	Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_hi, DL: dl, VT: MVT::i32)),
12110	`0`);
12111
12112	SDValue Dmr1Lo(DAG.getMachineNode(
12113	Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1, Op1: Dmr1,
12114	Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_lo, DL: dl, VT: MVT::i32)),
12115	`0`);
12116
12117	SDValue Dmr1Hi(DAG.getMachineNode(
12118	Opcode: TargetOpcode::EXTRACT_SUBREG, dl, VT: MVT::v512i1, Op1: Dmr1,
12119	Op2: DAG.getTargetConstant(Val: PPC::sub_wacc_hi, DL: dl, VT: MVT::i32)),
12120	`0`);
12121
12122	MachineSDNode *ExtNode =
12123	DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes, Ops: Dmr0Lo);
12124	Values.push_back(Elt: SDValue (ExtNode, `0`));
12125	Values.push_back(Elt: SDValue (ExtNode, `1`));
12126	ExtNode =
12127	DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512_HI, dl, ResultTys: ReturnTypes, Ops: Dmr0Hi);
12128	Values.push_back(Elt: SDValue (ExtNode, `0`));
12129	Values.push_back(Elt: SDValue (ExtNode, `1`));
12130	ExtNode = DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes, Ops: Dmr1Lo);
12131	Values.push_back(Elt: SDValue (ExtNode, `0`));
12132	Values.push_back(Elt: SDValue (ExtNode, `1`));
12133	ExtNode =
12134	DAG.getMachineNode(Opcode: PPC::DMXXEXTFDMR512_HI, dl, ResultTys: ReturnTypes, Ops: Dmr1Hi);
12135	Values.push_back(Elt: SDValue (ExtNode, `0`));
12136	Values.push_back(Elt: SDValue (ExtNode, `1`));
12137	}
12138
12139	if (Subtarget.isLittleEndian())
12140	std::reverse(first: Values.begin(), last: Values.end());
12141
12142	SDVTList Tys = DAG.getVTList(VT: MVT::Other);
12143	SmallVector<SDValue, `4`> Ops{
12144	StoreChain, DAG.getConstant(Val: Intrinsic::ppc_vsx_stxvp, DL: dl, VT: MVT::i32),
12145	Values [`0`], BasePtr};
12146	MachineMemOperand *MMO = SN->getMemOperand();
12147	unsigned NumVecs = VT.getSizeInBits() / `256`;
12148	for (unsigned Idx = `0`; Idx < NumVecs; ++Idx) {
12149	MachineMemOperand *NewMMO =
12150	DAG.getMachineFunction().getMachineMemOperand(MMO, Offset: Idx * `32`, Size: `32`);
12151	if (Idx > `0`) {
12152	BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
12153	N2: DAG.getConstant(Val: `32`, DL: dl, VT: BasePtr.getValueType()));
12154	Ops [`3`] = BasePtr;
12155	}
12156	Ops [`2`] = Values [Idx];
12157	SDValue St = DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl, VTList: Tys, Ops,
12158	MemVT: MVT::v256i1, MMO: NewMMO);
12159	Stores.push_back(Elt: St);
12160	}
12161
12162	SDValue TF = DAG.getTokenFactor(DL: dl, Vals&: Stores);
12163	return TF;
12164	}
12165
12166	SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
12167	SelectionDAG &DAG) const {
12168	SDLoc dl(Op);
12169	StoreSDNode *SN = cast<StoreSDNode>(Val: Op.getNode());
12170	SDValue StoreChain = SN->getChain();
12171	SDValue BasePtr = SN->getBasePtr();
12172	SDValue Value = SN->getValue();
12173	SDValue Value2 = SN->getValue();
12174	EVT StoreVT = Value.getValueType();
12175
12176	if (StoreVT == MVT::v1024i1 \|\| StoreVT == MVT::v2048i1)
12177	return LowerDMFVectorStore(Op, DAG);
12178
12179	if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
12180	return Op;
12181
12182	// Type v256i1 is used for pairs and v512i1 is used for accumulators.
12183	// Here we create 2 or 4 v16i8 stores to store the pair or accumulator
12184	// underlying registers individually.
12185	assert((StoreVT != MVT::v512i1 \|\| Subtarget.hasMMA()) &&
12186	"Type unsupported without MMA");
12187	assert((StoreVT != MVT::v256i1 \|\| Subtarget.pairedVectorMemops()) &&
12188	"Type unsupported without paired vector support");
12189	Align Alignment = SN->getAlign();
12190	SmallVector<SDValue, `4`> Stores;
12191	unsigned NumVecs = `2`;
12192	if (StoreVT == MVT::v512i1) {
12193	if (Subtarget.isISAFuture()) {
12194	EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12195	MachineSDNode *ExtNode = DAG.getMachineNode(
12196	Opcode: PPC::DMXXEXTFDMR512, dl, ResultTys: ReturnTypes, Ops: Op.getOperand(i: `1`));
12197
12198	Value = SDValue (ExtNode, `0`);
12199	Value2 = SDValue (ExtNode, `1`);
12200	} else
12201	Value = DAG.getNode(Opcode: PPCISD::XXMFACC, DL: dl, VT: MVT::v512i1, Operand: Value);
12202	NumVecs = `4`;
12203	}
12204	for (unsigned Idx = `0`; Idx < NumVecs; ++Idx) {
12205	unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - `1` - Idx : Idx;
12206	SDValue Elt;
12207	if (Subtarget.isISAFuture()) {
12208	VecNum = Subtarget.isLittleEndian() ? `1` - (Idx % `2`) : (Idx % `2`);
12209	Elt = DAG.getNode(Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8,
12210	N1: Idx > `1` ? Value2 : Value,
12211	N2: DAG.getConstant(Val: VecNum, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
12212	} else
12213	Elt = DAG.getNode(Opcode: PPCISD::EXTRACT_VSX_REG, DL: dl, VT: MVT::v16i8, N1: Value,
12214	N2: DAG.getConstant(Val: VecNum, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
12215
12216	SDValue Store =
12217	DAG.getStore(Chain: StoreChain, dl, Val: Elt, Ptr: BasePtr,
12218	PtrInfo: SN->getPointerInfo().getWithOffset(O: Idx * `16`),
12219	Alignment: commonAlignment(A: Alignment, Offset: Idx * `16`),
12220	MMOFlags: SN->getMemOperand()->getFlags(), AAInfo: SN->getAAInfo());
12221	BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
12222	N2: DAG.getConstant(Val: `16`, DL: dl, VT: BasePtr.getValueType()));
12223	Stores.push_back(Elt: Store);
12224	}
12225	SDValue TF = DAG.getTokenFactor(DL: dl, Vals&: Stores);
12226	return TF;
12227	}
12228
12229	SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
12230	SDLoc dl(Op);
12231	if (Op.getValueType() == MVT::v4i32) {
12232	SDValue LHS = Op.getOperand(i: `0`), RHS = Op.getOperand(i: `1`);
12233
12234	SDValue Zero = getCanonicalConstSplat(Val: `0`, SplatSize: `1`, VT: MVT::v4i32, DAG, dl);
12235	// +16 as shift amt.
12236	SDValue Neg16 = getCanonicalConstSplat(Val: -`16`, SplatSize: `4`, VT: MVT::v4i32, DAG, dl);
12237	SDValue RHSSwap = // = vrlw RHS, 16
12238	BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vrlw, LHS: RHS, RHS: Neg16, DAG, dl);
12239
12240	// Shrinkify inputs to v8i16.
12241	LHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: LHS);
12242	RHS = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: RHS);
12243	RHSSwap = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v8i16, Operand: RHSSwap);
12244
12245	// Low parts multiplied together, generating 32-bit results (we ignore the
12246	// top parts).
12247	SDValue LoProd = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vmulouh,
12248	LHS, RHS, DAG, dl, DestVT: MVT::v4i32);
12249
12250	SDValue HiProd = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vmsumuhm,
12251	Op0: LHS, Op1: RHSSwap, Op2: Zero, DAG, dl, DestVT: MVT::v4i32);
12252	// Shift the high parts up 16 bits.
12253	HiProd = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vslw, LHS: HiProd,
12254	RHS: Neg16, DAG, dl);
12255	return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: MVT::v4i32, N1: LoProd, N2: HiProd);
12256	} else if (Op.getValueType() == MVT::v16i8) {
12257	SDValue LHS = Op.getOperand(i: `0`), RHS = Op.getOperand(i: `1`);
12258	bool isLittleEndian = Subtarget.isLittleEndian();
12259
12260	// Multiply the even 8-bit parts, producing 16-bit sums.
12261	SDValue EvenParts = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vmuleub,
12262	LHS, RHS, DAG, dl, DestVT: MVT::v8i16);
12263	EvenParts = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: EvenParts);
12264
12265	// Multiply the odd 8-bit parts, producing 16-bit sums.
12266	SDValue OddParts = BuildIntrinsicOp(IID: Intrinsic::ppc_altivec_vmuloub,
12267	LHS, RHS, DAG, dl, DestVT: MVT::v8i16);
12268	OddParts = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v16i8, Operand: OddParts);
12269
12270	// Merge the results together. Because vmuleub and vmuloub are
12271	// instructions with a big-endian bias, we must reverse the
12272	// element numbering and reverse the meaning of "odd" and "even"
12273	// when generating little endian code.
12274	int Ops[`16`];
12275	for (unsigned i = `0`; i != `8`; ++i) {
12276	if (isLittleEndian) {
12277	Ops[i`2` ] = `2`i;
12278	Ops[i`2`+`1`] = `2`i+`16`;
12279	} else {
12280	Ops[i`2` ] = `2`i+`1`;
12281	Ops[i`2`+`1`] = `2`i+`1`+`16`;
12282	}
12283	}
12284	if (isLittleEndian)
12285	return DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: OddParts, N2: EvenParts, Mask: Ops);
12286	else
12287	return DAG.getVectorShuffle(VT: MVT::v16i8, dl, N1: EvenParts, N2: OddParts, Mask: Ops);
12288	} else {
12289	llvm_unreachable("Unknown mul to lower!");
12290	}
12291	}
12292
12293	SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
12294	bool IsStrict = Op ->isStrictFPOpcode();
12295	if (Op.getOperand(i: IsStrict ? `1` : `0`).getValueType() == MVT::f128 &&
12296	!Subtarget.hasP9Vector())
12297	return SDValue ();
12298
12299	return Op;
12300	}
12301
12302	// Custom lowering for fpext vf32 to v2f64
12303	SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
12304
12305	assert(Op.getOpcode() == ISD::FP_EXTEND &&
12306	"Should only be called for ISD::FP_EXTEND");
12307
12308	// FIXME: handle extends from half precision float vectors on P9.
12309	// We only want to custom lower an extend from v2f32 to v2f64.
12310	if (Op.getValueType() != MVT::v2f64 \|\|
12311	Op.getOperand(i: `0`).getValueType() != MVT::v2f32)
12312	return SDValue ();
12313
12314	SDLoc dl(Op);
12315	SDValue Op0 = Op.getOperand(i: `0`);
12316
12317	switch (Op0.getOpcode()) {
12318	default:
12319	return SDValue ();
12320	case ISD::EXTRACT_SUBVECTOR: {
12321	assert(Op0.getNumOperands() == `2` &&
12322	isa<ConstantSDNode>(Op0->getOperand(`1`)) &&
12323	"Node should have 2 operands with second one being a constant!");
12324
12325	if (Op0.getOperand(i: `0`).getValueType() != MVT::v4f32)
12326	return SDValue ();
12327
12328	// Custom lower is only done for high or low doubleword.
12329	int Idx = Op0.getConstantOperandVal(i: `1`);
12330	if (Idx % `2` != `0`)
12331	return SDValue ();
12332
12333	// Since input is v4f32, at this point Idx is either 0 or 2.
12334	// Shift to get the doubleword position we want.
12335	int DWord = Idx >> `1`;
12336
12337	// High and low word positions are different on little endian.
12338	if (Subtarget.isLittleEndian())
12339	DWord ^= `0x1`;
12340
12341	return DAG.getNode(Opcode: PPCISD::FP_EXTEND_HALF, DL: dl, VT: MVT::v2f64,
12342	N1: Op0.getOperand(i: `0`), N2: DAG.getConstant(Val: DWord, DL: dl, VT: MVT::i32));
12343	}
12344	case ISD::FADD:
12345	case ISD::FMUL:
12346	case ISD::FSUB: {
12347	SDValue NewLoad[`2`];
12348	for (unsigned i = `0`, ie = Op0.getNumOperands(); i != ie; ++i) {
12349	// Ensure both input are loads.
12350	SDValue LdOp = Op0.getOperand(i);
12351	if (LdOp.getOpcode() != ISD::LOAD)
12352	return SDValue ();
12353	// Generate new load node.
12354	LoadSDNode *LD = cast<LoadSDNode>(Val&: LdOp);
12355	SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12356	NewLoad[i] = DAG.getMemIntrinsicNode(
12357	Opcode: PPCISD::LD_VSX_LH, dl, VTList: DAG.getVTList(VT1: MVT::v4f32, VT2: MVT::Other), Ops: LoadOps,
12358	MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
12359	}
12360	SDValue NewOp =
12361	DAG.getNode(Opcode: Op0.getOpcode(), DL: SDLoc (Op0), VT: MVT::v4f32, N1: NewLoad[`0`],
12362	N2: NewLoad[`1`], Flags: Op0.getNode()->getFlags());
12363	return DAG.getNode(Opcode: PPCISD::FP_EXTEND_HALF, DL: dl, VT: MVT::v2f64, N1: NewOp,
12364	N2: DAG.getConstant(Val: `0`, DL: dl, VT: MVT::i32));
12365	}
12366	case ISD::LOAD: {
12367	LoadSDNode *LD = cast<LoadSDNode>(Val&: Op0);
12368	SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12369	SDValue NewLd = DAG.getMemIntrinsicNode(
12370	Opcode: PPCISD::LD_VSX_LH, dl, VTList: DAG.getVTList(VT1: MVT::v4f32, VT2: MVT::Other), Ops: LoadOps,
12371	MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
12372	return DAG.getNode(Opcode: PPCISD::FP_EXTEND_HALF, DL: dl, VT: MVT::v2f64, N1: NewLd,
12373	N2: DAG.getConstant(Val: `0`, DL: dl, VT: MVT::i32));
12374	}
12375	}
12376	llvm_unreachable("ERROR:Should return for all cases within swtich.");
12377	}
12378
12379	static SDValue ConvertCarryValueToCarryFlag(EVT SumType, SDValue Value,
12380	SelectionDAG &DAG,
12381	const PPCSubtarget &STI) {
12382	SDLoc DL(Value);
12383	if (STI.useCRBits())
12384	Value = DAG.getNode(Opcode: ISD::SELECT, DL, VT: SumType, N1: Value,
12385	N2: DAG.getConstant(Val: `1`, DL, VT: SumType),
12386	N3: DAG.getConstant(Val: `0`, DL, VT: SumType));
12387	else
12388	Value = DAG.getZExtOrTrunc(Op: Value, DL, VT: SumType);
12389	SDValue Sum = DAG.getNode(Opcode: PPCISD::ADDC, DL, VTList: DAG.getVTList(VT1: SumType, VT2: MVT::i32),
12390	N1: Value, N2: DAG.getAllOnesConstant(DL, VT: SumType));
12391	return Sum.getValue(R: `1`);
12392	}
12393
12394	static SDValue ConvertCarryFlagToCarryValue(EVT SumType, SDValue Flag,
12395	EVT CarryType, SelectionDAG &DAG,
12396	const PPCSubtarget &STI) {
12397	SDLoc DL(Flag);
12398	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: SumType);
12399	SDValue Carry = DAG.getNode(
12400	Opcode: PPCISD::ADDE, DL, VTList: DAG.getVTList(VT1: SumType, VT2: MVT::i32), N1: Zero, N2: Zero, N3: Flag);
12401	if (STI.useCRBits())
12402	return DAG.getSetCC(DL, VT: CarryType, LHS: Carry, RHS: Zero, Cond: ISD::SETNE);
12403	return DAG.getZExtOrTrunc(Op: Carry, DL, VT: CarryType);
12404	}
12405
12406	SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const {
12407
12408	SDLoc DL(Op);
12409	SDNode *N = Op.getNode();
12410	EVT VT = N->getValueType(ResNo: `0`);
12411	EVT CarryType = N->getValueType(ResNo: `1`);
12412	unsigned Opc = N->getOpcode();
12413	bool IsAdd = Opc == ISD::UADDO;
12414	Opc = IsAdd ? PPCISD::ADDC : PPCISD::SUBC;
12415	SDValue Sum = DAG.getNode(Opcode: Opc, DL, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i32),
12416	N1: N->getOperand(Num: `0`), N2: N->getOperand(Num: `1`));
12417	SDValue Carry = ConvertCarryFlagToCarryValue(SumType: VT, Flag: Sum.getValue(R: `1`), CarryType,
12418	DAG, STI: Subtarget);
12419	if (!IsAdd)
12420	Carry = DAG.getNode(Opcode: ISD::XOR, DL, VT: CarryType, N1: Carry,
12421	N2: DAG.getConstant(Val: `1UL`, DL, VT: CarryType));
12422	return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: N->getVTList(), N1: Sum, N2: Carry);
12423	}
12424
12425	SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op,
12426	SelectionDAG &DAG) const {
12427	SDLoc DL(Op);
12428	SDNode *N = Op.getNode();
12429	unsigned Opc = N->getOpcode();
12430	EVT VT = N->getValueType(ResNo: `0`);
12431	EVT CarryType = N->getValueType(ResNo: `1`);
12432	SDValue CarryOp = N->getOperand(Num: `2`);
12433	bool IsAdd = Opc == ISD::UADDO_CARRY;
12434	Opc = IsAdd ? PPCISD::ADDE : PPCISD::SUBE;
12435	if (!IsAdd)
12436	CarryOp = DAG.getNode(Opcode: ISD::XOR, DL, VT: CarryOp.getValueType(), N1: CarryOp,
12437	N2: DAG.getConstant(Val: `1UL`, DL, VT: CarryOp.getValueType()));
12438	CarryOp = ConvertCarryValueToCarryFlag(SumType: VT, Value: CarryOp, DAG, STI: Subtarget);
12439	SDValue Sum = DAG.getNode(Opcode: Opc, DL, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i32),
12440	N1: Op.getOperand(i: `0`), N2: Op.getOperand(i: `1`), N3: CarryOp);
12441	CarryOp = ConvertCarryFlagToCarryValue(SumType: VT, Flag: Sum.getValue(R: `1`), CarryType, DAG,
12442	STI: Subtarget);
12443	if (!IsAdd)
12444	CarryOp = DAG.getNode(Opcode: ISD::XOR, DL, VT: CarryOp.getValueType(), N1: CarryOp,
12445	N2: DAG.getConstant(Val: `1UL`, DL, VT: CarryOp.getValueType()));
12446	return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: N->getVTList(), N1: Sum, N2: CarryOp);
12447	}
12448
12449	SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12450
12451	SDLoc dl(Op);
12452	SDValue LHS = Op.getOperand(i: `0`);
12453	SDValue RHS = Op.getOperand(i: `1`);
12454	EVT VT = Op.getNode()->getValueType(ResNo: `0`);
12455
12456	SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS);
12457
12458	SDValue Xor1 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: RHS, N2: LHS);
12459	SDValue Xor2 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Sub, N2: LHS);
12460
12461	SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Xor1, N2: Xor2);
12462
12463	SDValue Overflow =
12464	DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: And,
12465	N2: DAG.getConstant(Val: VT.getSizeInBits() - `1`, DL: dl, VT: MVT::i32));
12466
12467	SDValue OverflowTrunc =
12468	DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: Op.getNode()->getValueType(ResNo: `1`), Operand: Overflow);
12469
12470	return DAG.getMergeValues(Ops: {Sub, OverflowTrunc}, dl);
12471	}
12472
12473	/// LowerOperation - Provide custom lowering hooks for some operations.
12474	///
12475	SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
12476	switch (Op.getOpcode()) {
12477	default:
12478	llvm_unreachable("Wasn't expecting to be able to lower this!");
12479	case ISD::FPOW: return lowerPow(Op, DAG);
12480	case ISD::FSIN: return lowerSin(Op, DAG);
12481	case ISD::FCOS: return lowerCos(Op, DAG);
12482	case ISD::FLOG: return lowerLog(Op, DAG);
12483	case ISD::FLOG10: return lowerLog10(Op, DAG);
12484	case ISD::FEXP: return lowerExp(Op, DAG);
12485	case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12486	case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12487	case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12488	case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12489	case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12490	case ISD::STRICT_FSETCC:
12491	case ISD::STRICT_FSETCCS:
12492	case ISD::SETCC: return LowerSETCC(Op, DAG);
12493	case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12494	case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12495	case ISD::SSUBO:
12496	return LowerSSUBO(Op, DAG);
12497
12498	case ISD::INLINEASM:
12499	case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12500	// Variable argument lowering.
12501	case ISD::VASTART: return LowerVASTART(Op, DAG);
12502	case ISD::VAARG: return LowerVAARG(Op, DAG);
12503	case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12504
12505	case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12506	case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12507	case ISD::GET_DYNAMIC_AREA_OFFSET:
12508	return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12509
12510	// Exception handling lowering.
12511	case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12512	case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12513	case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12514
12515	case ISD::LOAD: return LowerLOAD(Op, DAG);
12516	case ISD::STORE: return LowerSTORE(Op, DAG);
12517	case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12518	case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12519	case ISD::STRICT_FP_TO_UINT:
12520	case ISD::STRICT_FP_TO_SINT:
12521	case ISD::FP_TO_UINT:
12522	case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, dl: SDLoc (Op));
12523	case ISD::STRICT_UINT_TO_FP:
12524	case ISD::STRICT_SINT_TO_FP:
12525	case ISD::UINT_TO_FP:
12526	case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12527	case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12528	case ISD::SET_ROUNDING:
12529	return LowerSET_ROUNDING(Op, DAG);
12530
12531	// Lower 64-bit shifts.
12532	case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12533	case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12534	case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12535
12536	case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12537	case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12538
12539	// Vector-related lowering.
12540	case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12541	case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12542	case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12543	case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12544	case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12545	case ISD::MUL: return LowerMUL(Op, DAG);
12546	case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12547	case ISD::STRICT_FP_ROUND:
12548	case ISD::FP_ROUND:
12549	return LowerFP_ROUND(Op, DAG);
12550	case ISD::ROTL: return LowerROTL(Op, DAG);
12551
12552	// For counter-based loop handling.
12553	case ISD::INTRINSIC_W_CHAIN: return SDValue ();
12554
12555	case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12556
12557	// Frame & Return address.
12558	case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12559	case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12560
12561	case ISD::INTRINSIC_VOID:
12562	return LowerINTRINSIC_VOID(Op, DAG);
12563	case ISD::BSWAP:
12564	return LowerBSWAP(Op, DAG);
12565	case ISD::ATOMIC_CMP_SWAP:
12566	return LowerATOMIC_CMP_SWAP(Op, DAG);
12567	case ISD::ATOMIC_STORE:
12568	return LowerATOMIC_LOAD_STORE(Op, DAG);
12569	case ISD::IS_FPCLASS:
12570	return LowerIS_FPCLASS(Op, DAG);
12571	case ISD::UADDO:
12572	case ISD::USUBO:
12573	return LowerADDSUBO(Op, DAG);
12574	case ISD::UADDO_CARRY:
12575	case ISD::USUBO_CARRY:
12576	return LowerADDSUBO_CARRY(Op, DAG);
12577	}
12578	}
12579
12580	void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
12581	SmallVectorImpl<SDValue>&Results,
12582	SelectionDAG &DAG) const {
12583	SDLoc dl(N);
12584	switch (N->getOpcode()) {
12585	default:
12586	llvm_unreachable("Do not know how to custom type legalize this operation!");
12587	case ISD::ATOMIC_LOAD: {
12588	SDValue Res = LowerATOMIC_LOAD_STORE(Op: SDValue (N, `0`), DAG);
12589	Results.push_back(Elt: Res);
12590	Results.push_back(Elt: Res.getValue(R: `1`));
12591	break;
12592	}
12593	case ISD::READCYCLECOUNTER: {
12594	SDVTList VTs = DAG.getVTList(VT1: MVT::i32, VT2: MVT::i32, VT3: MVT::Other);
12595	SDValue RTB = DAG.getNode(Opcode: PPCISD::READ_TIME_BASE, DL: dl, VTList: VTs, N: N->getOperand(Num: `0`));
12596
12597	Results.push_back(
12598	Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::i64, N1: RTB, N2: RTB.getValue(R: `1`)));
12599	Results.push_back(Elt: RTB.getValue(R: `2`));
12600	break;
12601	}
12602	case ISD::INTRINSIC_W_CHAIN: {
12603	if (N->getConstantOperandVal(Num: `1`) != Intrinsic::loop_decrement)
12604	break;
12605
12606	assert(N->getValueType(`0`) == MVT::i1 &&
12607	"Unexpected result type for CTR decrement intrinsic");
12608	EVT SVT = getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(),
12609	VT: N->getValueType(ResNo: `0`));
12610	SDVTList VTs = DAG.getVTList(VT1: SVT, VT2: MVT::Other);
12611	SDValue NewInt = DAG.getNode(Opcode: N->getOpcode(), DL: dl, VTList: VTs, N1: N->getOperand(Num: `0`),
12612	N2: N->getOperand(Num: `1`));
12613
12614	Results.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: NewInt));
12615	Results.push_back(Elt: NewInt.getValue(R: `1`));
12616	break;
12617	}
12618	case ISD::INTRINSIC_WO_CHAIN: {
12619	switch (N->getConstantOperandVal(Num: `0`)) {
12620	case Intrinsic::ppc_pack_longdouble:
12621	Results.push_back(Elt: DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::ppcf128,
12622	N1: N->getOperand(Num: `2`), N2: N->getOperand(Num: `1`)));
12623	break;
12624	case Intrinsic::ppc_maxfe:
12625	case Intrinsic::ppc_minfe:
12626	case Intrinsic::ppc_fnmsub:
12627	case Intrinsic::ppc_convert_f128_to_ppcf128:
12628	Results.push_back(Elt: LowerINTRINSIC_WO_CHAIN(Op: SDValue (N, `0`), DAG));
12629	break;
12630	}
12631	break;
12632	}
12633	case ISD::VAARG: {
12634	if (!Subtarget.isSVR4ABI() \|\| Subtarget.isPPC64())
12635	return;
12636
12637	EVT VT = N->getValueType(ResNo: `0`);
12638
12639	if (VT == MVT::i64) {
12640	SDValue NewNode = LowerVAARG(Op: SDValue (N, `1`), DAG);
12641
12642	Results.push_back(Elt: NewNode);
12643	Results.push_back(Elt: NewNode.getValue(R: `1`));
12644	}
12645	return;
12646	}
12647	case ISD::STRICT_FP_TO_SINT:
12648	case ISD::STRICT_FP_TO_UINT:
12649	case ISD::FP_TO_SINT:
12650	case ISD::FP_TO_UINT: {
12651	// LowerFP_TO_INT() can only handle f32 and f64.
12652	if (N->getOperand(Num: N->isStrictFPOpcode() ? `1` : `0`).getValueType() ==
12653	MVT::ppcf128)
12654	return;
12655	SDValue LoweredValue = LowerFP_TO_INT(Op: SDValue (N, `0`), DAG, dl);
12656	Results.push_back(Elt: LoweredValue);
12657	if (N->isStrictFPOpcode())
12658	Results.push_back(Elt: LoweredValue.getValue(R: `1`));
12659	return;
12660	}
12661	case ISD::TRUNCATE: {
12662	if (!N->getValueType(ResNo: `0`).isVector())
12663	return;
12664	SDValue Lowered = LowerTRUNCATEVector(Op: SDValue (N, `0`), DAG);
12665	if (Lowered)
12666	Results.push_back(Elt: Lowered);
12667	return;
12668	}
12669	case ISD::SCALAR_TO_VECTOR: {
12670	SDValue Lowered = LowerSCALAR_TO_VECTOR(Op: SDValue (N, `0`), DAG);
12671	if (Lowered)
12672	Results.push_back(Elt: Lowered);
12673	return;
12674	}
12675	case ISD::FSHL:
12676	case ISD::FSHR:
12677	// Don't handle funnel shifts here.
12678	return;
12679	case ISD::BITCAST:
12680	// Don't handle bitcast here.
12681	return;
12682	case ISD::FP_EXTEND:
12683	SDValue Lowered = LowerFP_EXTEND(Op: SDValue (N, `0`), DAG);
12684	if (Lowered)
12685	Results.push_back(Elt: Lowered);
12686	return;
12687	}
12688	}
12689
12690	//===----------------------------------------------------------------------===//
12691	// Other Lowering Code
12692	//===----------------------------------------------------------------------===//
12693
12694	static Instruction *callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id) {
12695	return Builder.CreateIntrinsic(ID: Id, Args: {});
12696	}
12697
12698	Value PPCTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type ValueTy,
12699	Value *Addr,
12700	AtomicOrdering Ord) const {
12701	unsigned SZ = ValueTy->getPrimitiveSizeInBits();
12702
12703	assert((SZ == `8` \|\| SZ == `16` \|\| SZ == `32` \|\| SZ == `64`) &&
12704	"Only 8/16/32/64-bit atomic loads supported");
12705	Intrinsic::ID IntID;
12706	switch (SZ) {
12707	default:
12708	llvm_unreachable("Unexpected PrimitiveSize");
12709	case `8`:
12710	IntID = Intrinsic::ppc_lbarx;
12711	assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12712	break;
12713	case `16`:
12714	IntID = Intrinsic::ppc_lharx;
12715	assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12716	break;
12717	case `32`:
12718	IntID = Intrinsic::ppc_lwarx;
12719	break;
12720	case `64`:
12721	IntID = Intrinsic::ppc_ldarx;
12722	break;
12723	}
12724	Value *Call =
12725	Builder.CreateIntrinsic(ID: IntID, Args: Addr, /FMFSource=/nullptr, Name: "larx");
12726
12727	return Builder.CreateTruncOrBitCast(V: Call, DestTy: ValueTy);
12728	}
12729
12730	// Perform a store-conditional operation to Addr. Return the status of the
12731	// store. This should be 0 if the store succeeded, non-zero otherwise.
12732	Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
12733	Value Val, Value Addr,
12734	AtomicOrdering Ord) const {
12735	Type *Ty = Val->getType();
12736	unsigned SZ = Ty->getPrimitiveSizeInBits();
12737
12738	assert((SZ == `8` \|\| SZ == `16` \|\| SZ == `32` \|\| SZ == `64`) &&
12739	"Only 8/16/32/64-bit atomic loads supported");
12740	Intrinsic::ID IntID;
12741	switch (SZ) {
12742	default:
12743	llvm_unreachable("Unexpected PrimitiveSize");
12744	case `8`:
12745	IntID = Intrinsic::ppc_stbcx;
12746	assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12747	break;
12748	case `16`:
12749	IntID = Intrinsic::ppc_sthcx;
12750	assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12751	break;
12752	case `32`:
12753	IntID = Intrinsic::ppc_stwcx;
12754	break;
12755	case `64`:
12756	IntID = Intrinsic::ppc_stdcx;
12757	break;
12758	}
12759
12760	if (SZ == `8` \|\| SZ == `16`)
12761	Val = Builder.CreateZExt(V: Val, DestTy: Builder.getInt32Ty());
12762
12763	Value *Call = Builder.CreateIntrinsic(ID: IntID, Args: {Addr, Val},
12764	/FMFSource=/nullptr, Name: "stcx");
12765	return Builder.CreateXor(LHS: Call, RHS: Builder.getInt32(C: `1`));
12766	}
12767
12768	// The mappings for emitLeading/TrailingFence is taken from
12769	// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12770	Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
12771	Instruction *Inst,
12772	AtomicOrdering Ord) const {
12773	if (Ord == AtomicOrdering::SequentiallyConsistent)
12774	return callIntrinsic(Builder, Id: Intrinsic::ppc_sync);
12775	if (isReleaseOrStronger(AO: Ord))
12776	return callIntrinsic(Builder, Id: Intrinsic::ppc_lwsync);
12777	return nullptr;
12778	}
12779
12780	Instruction *PPCTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
12781	Instruction *Inst,
12782	AtomicOrdering Ord) const {
12783	if (Inst->hasAtomicLoad() && isAcquireOrStronger(AO: Ord)) {
12784	// See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12785	// http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12786	// and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12787	if (isa<LoadInst>(Val: Inst))
12788	return Builder.CreateIntrinsic(ID: Intrinsic::ppc_cfence, Types: {Inst->getType()},
12789	Args: {Inst});
12790	// FIXME: Can use isync for rmw operation.
12791	return callIntrinsic(Builder, Id: Intrinsic::ppc_lwsync);
12792	}
12793	return nullptr;
12794	}
12795
12796	MachineBasicBlock *
12797	PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
12798	unsigned AtomicSize,
12799	unsigned BinOpcode,
12800	unsigned CmpOpcode,
12801	unsigned CmpPred) const {
12802	// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12803	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12804
12805	auto LoadMnemonic = PPC::LDARX;
12806	auto StoreMnemonic = PPC::STDCX;
12807	switch (AtomicSize) {
12808	default:
12809	llvm_unreachable("Unexpected size of atomic entity");
12810	case `1`:
12811	LoadMnemonic = PPC::LBARX;
12812	StoreMnemonic = PPC::STBCX;
12813	assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12814	break;
12815	case `2`:
12816	LoadMnemonic = PPC::LHARX;
12817	StoreMnemonic = PPC::STHCX;
12818	assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12819	break;
12820	case `4`:
12821	LoadMnemonic = PPC::LWARX;
12822	StoreMnemonic = PPC::STWCX;
12823	break;
12824	case `8`:
12825	LoadMnemonic = PPC::LDARX;
12826	StoreMnemonic = PPC::STDCX;
12827	break;
12828	}
12829
12830	const BasicBlock *LLVM_BB = BB->getBasicBlock();
12831	MachineFunction *F = BB->getParent();
12832	MachineFunction::iterator It = ++BB->getIterator();
12833
12834	Register dest = MI.getOperand(i: `0`).getReg();
12835	Register ptrA = MI.getOperand(i: `1`).getReg();
12836	Register ptrB = MI.getOperand(i: `2`).getReg();
12837	Register incr = MI.getOperand(i: `3`).getReg();
12838	DebugLoc dl = MI.getDebugLoc();
12839
12840	MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
12841	MachineBasicBlock *loop2MBB =
12842	CmpOpcode ? F->CreateMachineBasicBlock(BB: LLVM_BB) : nullptr;
12843	MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
12844	F->insert(MBBI: It, MBB: loopMBB);
12845	if (CmpOpcode)
12846	F->insert(MBBI: It, MBB: loop2MBB);
12847	F->insert(MBBI: It, MBB: exitMBB);
12848	exitMBB->splice(Where: exitMBB->begin(), Other: BB,
12849	From: std::next(x: MachineBasicBlock::iterator (MI)), To: BB->end());
12850	exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
12851
12852	MachineRegisterInfo &RegInfo = F->getRegInfo();
12853	Register TmpReg = (!BinOpcode) ? incr :
12854	RegInfo.createVirtualRegister( RegClass: AtomicSize == `8` ? &PPC::G8RCRegClass
12855	: &PPC::GPRCRegClass);
12856
12857	// thisMBB:
12858	// ...
12859	// fallthrough --> loopMBB
12860	BB->addSuccessor(Succ: loopMBB);
12861
12862	// loopMBB:
12863	// l[wd]arx dest, ptr
12864	// add r0, dest, incr
12865	// st[wd]cx. r0, ptr
12866	// bne- loopMBB
12867	// fallthrough --> exitMBB
12868
12869	// For max/min...
12870	// loopMBB:
12871	// l[wd]arx dest, ptr
12872	// cmpl?[wd] dest, incr
12873	// bgt exitMBB
12874	// loop2MBB:
12875	// st[wd]cx. dest, ptr
12876	// bne- loopMBB
12877	// fallthrough --> exitMBB
12878
12879	BB = loopMBB;
12880	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: LoadMnemonic), DestReg: dest)
12881	.addReg(RegNo: ptrA).addReg(RegNo: ptrB);
12882	if (BinOpcode)
12883	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: BinOpcode), DestReg: TmpReg).addReg(RegNo: incr).addReg(RegNo: dest);
12884	if (CmpOpcode) {
12885	Register CrReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
12886	// Signed comparisons of byte or halfword values must be sign-extended.
12887	if (CmpOpcode == PPC::CMPW && AtomicSize < `4`) {
12888	Register ExtReg = RegInfo.createVirtualRegister(RegClass: &PPC::GPRCRegClass);
12889	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: AtomicSize == `1` ? PPC::EXTSB : PPC::EXTSH),
12890	DestReg: ExtReg).addReg(RegNo: dest);
12891	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: CmpOpcode), DestReg: CrReg).addReg(RegNo: ExtReg).addReg(RegNo: incr);
12892	} else
12893	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: CmpOpcode), DestReg: CrReg).addReg(RegNo: dest).addReg(RegNo: incr);
12894
12895	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
12896	.addImm(Val: CmpPred)
12897	.addReg(RegNo: CrReg)
12898	.addMBB(MBB: exitMBB);
12899	BB->addSuccessor(Succ: loop2MBB);
12900	BB->addSuccessor(Succ: exitMBB);
12901	BB = loop2MBB;
12902	}
12903	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: StoreMnemonic))
12904	.addReg(RegNo: TmpReg).addReg(RegNo: ptrA).addReg(RegNo: ptrB);
12905	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
12906	.addImm(Val: PPC::PRED_NE).addReg(RegNo: PPC::CR0).addMBB(MBB: loopMBB);
12907	BB->addSuccessor(Succ: loopMBB);
12908	BB->addSuccessor(Succ: exitMBB);
12909
12910	// exitMBB:
12911	// ...
12912	BB = exitMBB;
12913	return BB;
12914	}
12915
12916	static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) {
12917	switch(MI.getOpcode()) {
12918	default:
12919	return false;
12920	case PPC::COPY:
12921	return TII->isSignExtended(Reg: MI.getOperand(i: `1`).getReg(),
12922	MRI: &MI.getMF()->getRegInfo());
12923	case PPC::LHA:
12924	case PPC::LHA8:
12925	case PPC::LHAU:
12926	case PPC::LHAU8:
12927	case PPC::LHAUX:
12928	case PPC::LHAUX8:
12929	case PPC::LHAX:
12930	case PPC::LHAX8:
12931	case PPC::LWA:
12932	case PPC::LWAUX:
12933	case PPC::LWAX:
12934	case PPC::LWAX_32:
12935	case PPC::LWA_32:
12936	case PPC::PLHA:
12937	case PPC::PLHA8:
12938	case PPC::PLHA8pc:
12939	case PPC::PLHApc:
12940	case PPC::PLWA:
12941	case PPC::PLWA8:
12942	case PPC::PLWA8pc:
12943	case PPC::PLWApc:
12944	case PPC::EXTSB:
12945	case PPC::EXTSB8:
12946	case PPC::EXTSB8_32_64:
12947	case PPC::EXTSB8_rec:
12948	case PPC::EXTSB_rec:
12949	case PPC::EXTSH:
12950	case PPC::EXTSH8:
12951	case PPC::EXTSH8_32_64:
12952	case PPC::EXTSH8_rec:
12953	case PPC::EXTSH_rec:
12954	case PPC::EXTSW:
12955	case PPC::EXTSWSLI:
12956	case PPC::EXTSWSLI_32_64:
12957	case PPC::EXTSWSLI_32_64_rec:
12958	case PPC::EXTSWSLI_rec:
12959	case PPC::EXTSW_32:
12960	case PPC::EXTSW_32_64:
12961	case PPC::EXTSW_32_64_rec:
12962	case PPC::EXTSW_rec:
12963	case PPC::SRAW:
12964	case PPC::SRAWI:
12965	case PPC::SRAWI_rec:
12966	case PPC::SRAW_rec:
12967	return true;
12968	}
12969	return false;
12970	}
12971
12972	MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
12973	MachineInstr &MI, MachineBasicBlock *BB,
12974	bool is8bit, // operation
12975	unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
12976	// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12977	const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12978
12979	// If this is a signed comparison and the value being compared is not known
12980	// to be sign extended, sign extend it here.
12981	DebugLoc dl = MI.getDebugLoc();
12982	MachineFunction *F = BB->getParent();
12983	MachineRegisterInfo &RegInfo = F->getRegInfo();
12984	Register incr = MI.getOperand(i: `3`).getReg();
12985	bool IsSignExtended =
12986	incr.isVirtual() && isSignExtended(MI&: *RegInfo.getVRegDef(Reg: incr), TII);
12987
12988	if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
12989	Register ValueReg = RegInfo.createVirtualRegister(RegClass: &PPC::GPRCRegClass);
12990	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: is8bit ? PPC::EXTSB : PPC::EXTSH), DestReg: ValueReg)
12991	.addReg(RegNo: MI.getOperand(i: `3`).getReg());
12992	MI.getOperand(i: `3`).setReg(ValueReg);
12993	incr = ValueReg;
12994	}
12995	// If we support part-word atomic mnemonics, just use them
12996	if (Subtarget.hasPartwordAtomics())
12997	return EmitAtomicBinary(MI, BB, AtomicSize: is8bit ? `1` : `2`, BinOpcode, CmpOpcode,
12998	CmpPred);
12999
13000	// In 64 bit mode we have to use 64 bits for addresses, even though the
13001	// lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
13002	// registers without caring whether they're 32 or 64, but here we're
13003	// doing actual arithmetic on the addresses.
13004	bool is64bit = Subtarget.isPPC64();
13005	bool isLittleEndian = Subtarget.isLittleEndian();
13006	unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13007
13008	const BasicBlock *LLVM_BB = BB->getBasicBlock();
13009	MachineFunction::iterator It = ++BB->getIterator();
13010
13011	Register dest = MI.getOperand(i: `0`).getReg();
13012	Register ptrA = MI.getOperand(i: `1`).getReg();
13013	Register ptrB = MI.getOperand(i: `2`).getReg();
13014
13015	MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13016	MachineBasicBlock *loop2MBB =
13017	CmpOpcode ? F->CreateMachineBasicBlock(BB: LLVM_BB) : nullptr;
13018	MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13019	F->insert(MBBI: It, MBB: loopMBB);
13020	if (CmpOpcode)
13021	F->insert(MBBI: It, MBB: loop2MBB);
13022	F->insert(MBBI: It, MBB: exitMBB);
13023	exitMBB->splice(Where: exitMBB->begin(), Other: BB,
13024	From: std::next(x: MachineBasicBlock::iterator (MI)), To: BB->end());
13025	exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
13026
13027	const TargetRegisterClass *RC =
13028	is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13029	const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13030
13031	Register PtrReg = RegInfo.createVirtualRegister(RegClass: RC);
13032	Register Shift1Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13033	Register ShiftReg =
13034	isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RegClass: GPRC);
13035	Register Incr2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13036	Register MaskReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13037	Register Mask2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13038	Register Mask3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13039	Register Tmp2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13040	Register Tmp3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13041	Register Tmp4Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13042	Register TmpDestReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13043	Register SrwDestReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13044	Register Ptr1Reg;
13045	Register TmpReg =
13046	(!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RegClass: GPRC);
13047
13048	// thisMBB:
13049	// ...
13050	// fallthrough --> loopMBB
13051	BB->addSuccessor(Succ: loopMBB);
13052
13053	// The 4-byte load must be aligned, while a char or short may be
13054	// anywhere in the word. Hence all this nasty bookkeeping code.
13055	// add ptr1, ptrA, ptrB [copy if ptrA==0]
13056	// rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13057	// xori shift, shift1, 24 [16]
13058	// rlwinm ptr, ptr1, 0, 0, 29
13059	// slw incr2, incr, shift
13060	// li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13061	// slw mask, mask2, shift
13062	// loopMBB:
13063	// lwarx tmpDest, ptr
13064	// add tmp, tmpDest, incr2
13065	// andc tmp2, tmpDest, mask
13066	// and tmp3, tmp, mask
13067	// or tmp4, tmp3, tmp2
13068	// stwcx. tmp4, ptr
13069	// bne- loopMBB
13070	// fallthrough --> exitMBB
13071	// srw SrwDest, tmpDest, shift
13072	// rlwinm SrwDest, SrwDest, 0, 24 [16], 31
13073	if (ptrA != ZeroReg) {
13074	Ptr1Reg = RegInfo.createVirtualRegister(RegClass: RC);
13075	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: is64bit ? PPC::ADD8 : PPC::ADD4), DestReg: Ptr1Reg)
13076	.addReg(RegNo: ptrA)
13077	.addReg(RegNo: ptrB);
13078	} else {
13079	Ptr1Reg = ptrB;
13080	}
13081	// We need use 32-bit subregister to avoid mismatch register class in 64-bit
13082	// mode.
13083	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: Shift1Reg)
13084	.addReg(RegNo: Ptr1Reg, flags: `0`, SubReg: is64bit ? PPC::sub_32 : `0`)
13085	.addImm(Val: `3`)
13086	.addImm(Val: `27`)
13087	.addImm(Val: is8bit ? `28` : `27`);
13088	if (!isLittleEndian)
13089	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::XORI), DestReg: ShiftReg)
13090	.addReg(RegNo: Shift1Reg)
13091	.addImm(Val: is8bit ? `24` : `16`);
13092	if (is64bit)
13093	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLDICR), DestReg: PtrReg)
13094	.addReg(RegNo: Ptr1Reg)
13095	.addImm(Val: `0`)
13096	.addImm(Val: `61`);
13097	else
13098	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: PtrReg)
13099	.addReg(RegNo: Ptr1Reg)
13100	.addImm(Val: `0`)
13101	.addImm(Val: `0`)
13102	.addImm(Val: `29`);
13103	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: Incr2Reg).addReg(RegNo: incr).addReg(RegNo: ShiftReg);
13104	if (is8bit)
13105	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LI), DestReg: Mask2Reg).addImm(Val: `255`);
13106	else {
13107	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LI), DestReg: Mask3Reg).addImm(Val: `0`);
13108	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::ORI), DestReg: Mask2Reg)
13109	.addReg(RegNo: Mask3Reg)
13110	.addImm(Val: `65535`);
13111	}
13112	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: MaskReg)
13113	.addReg(RegNo: Mask2Reg)
13114	.addReg(RegNo: ShiftReg);
13115
13116	BB = loopMBB;
13117	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LWARX), DestReg: TmpDestReg)
13118	.addReg(RegNo: ZeroReg)
13119	.addReg(RegNo: PtrReg);
13120	if (BinOpcode)
13121	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: BinOpcode), DestReg: TmpReg)
13122	.addReg(RegNo: Incr2Reg)
13123	.addReg(RegNo: TmpDestReg);
13124	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::ANDC), DestReg: Tmp2Reg)
13125	.addReg(RegNo: TmpDestReg)
13126	.addReg(RegNo: MaskReg);
13127	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: Tmp3Reg).addReg(RegNo: TmpReg).addReg(RegNo: MaskReg);
13128	if (CmpOpcode) {
13129	// For unsigned comparisons, we can directly compare the shifted values.
13130	// For signed comparisons we shift and sign extend.
13131	Register SReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13132	Register CrReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
13133	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: SReg)
13134	.addReg(RegNo: TmpDestReg)
13135	.addReg(RegNo: MaskReg);
13136	unsigned ValueReg = SReg;
13137	unsigned CmpReg = Incr2Reg;
13138	if (CmpOpcode == PPC::CMPW) {
13139	ValueReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13140	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SRW), DestReg: ValueReg)
13141	.addReg(RegNo: SReg)
13142	.addReg(RegNo: ShiftReg);
13143	Register ValueSReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13144	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: is8bit ? PPC::EXTSB : PPC::EXTSH), DestReg: ValueSReg)
13145	.addReg(RegNo: ValueReg);
13146	ValueReg = ValueSReg;
13147	CmpReg = incr;
13148	}
13149	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: CmpOpcode), DestReg: CrReg).addReg(RegNo: ValueReg).addReg(RegNo: CmpReg);
13150	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
13151	.addImm(Val: CmpPred)
13152	.addReg(RegNo: CrReg)
13153	.addMBB(MBB: exitMBB);
13154	BB->addSuccessor(Succ: loop2MBB);
13155	BB->addSuccessor(Succ: exitMBB);
13156	BB = loop2MBB;
13157	}
13158	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::OR), DestReg: Tmp4Reg).addReg(RegNo: Tmp3Reg).addReg(RegNo: Tmp2Reg);
13159	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::STWCX))
13160	.addReg(RegNo: Tmp4Reg)
13161	.addReg(RegNo: ZeroReg)
13162	.addReg(RegNo: PtrReg);
13163	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
13164	.addImm(Val: PPC::PRED_NE)
13165	.addReg(RegNo: PPC::CR0)
13166	.addMBB(MBB: loopMBB);
13167	BB->addSuccessor(Succ: loopMBB);
13168	BB->addSuccessor(Succ: exitMBB);
13169
13170	// exitMBB:
13171	// ...
13172	BB = exitMBB;
13173	// Since the shift amount is not a constant, we need to clear
13174	// the upper bits with a separate RLWINM.
13175	BuildMI(BB&: *BB, I: BB->begin(), MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: dest)
13176	.addReg(RegNo: SrwDestReg)
13177	.addImm(Val: `0`)
13178	.addImm(Val: is8bit ? `24` : `16`)
13179	.addImm(Val: `31`);
13180	BuildMI(BB&: *BB, I: BB->begin(), MIMD: dl, MCID: TII->get(Opcode: PPC::SRW), DestReg: SrwDestReg)
13181	.addReg(RegNo: TmpDestReg)
13182	.addReg(RegNo: ShiftReg);
13183	return BB;
13184	}
13185
13186	llvm::MachineBasicBlock *
13187	PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
13188	MachineBasicBlock MBB) const* {
13189	DebugLoc DL = MI.getDebugLoc();
13190	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13191	const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13192
13193	MachineFunction *MF = MBB->getParent();
13194	MachineRegisterInfo &MRI = MF->getRegInfo();
13195
13196	const BasicBlock *BB = MBB->getBasicBlock();
13197	MachineFunction::iterator I = ++MBB->getIterator();
13198
13199	Register DstReg = MI.getOperand(i: `0`).getReg();
13200	const TargetRegisterClass *RC = MRI.getRegClass(Reg: DstReg);
13201	assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
13202	Register mainDstReg = MRI.createVirtualRegister(RegClass: RC);
13203	Register restoreDstReg = MRI.createVirtualRegister(RegClass: RC);
13204
13205	MVT PVT = getPointerTy(DL: MF->getDataLayout());
13206	assert((PVT == MVT::i64 \|\| PVT == MVT::i32) &&
13207	"Invalid Pointer Size!");
13208	// For v = setjmp(buf), we generate
13209	//
13210	// thisMBB:
13211	// SjLjSetup mainMBB
13212	// bl mainMBB
13213	// v_restore = 1
13214	// b sinkMBB
13215	//
13216	// mainMBB:
13217	// buf[LabelOffset] = LR
13218	// v_main = 0
13219	//
13220	// sinkMBB:
13221	// v = phi(main, restore)
13222	//
13223
13224	MachineBasicBlock *thisMBB = MBB;
13225	MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
13226	MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
13227	MF->insert(MBBI: I, MBB: mainMBB);
13228	MF->insert(MBBI: I, MBB: sinkMBB);
13229
13230	MachineInstrBuilder MIB;
13231
13232	// Transfer the remainder of BB and its successor edges to sinkMBB.
13233	sinkMBB->splice(Where: sinkMBB->begin(), Other: MBB,
13234	From: std::next(x: MachineBasicBlock::iterator (MI)), To: MBB->end());
13235	sinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
13236
13237	// Note that the structure of the jmp_buf used here is not compatible
13238	// with that used by libc, and is not designed to be. Specifically, it
13239	// stores only those 'reserved' registers that LLVM does not otherwise
13240	// understand how to spill. Also, by convention, by the time this
13241	// intrinsic is called, Clang has already stored the frame address in the
13242	// first slot of the buffer and stack address in the third. Following the
13243	// X86 target code, we'll store the jump address in the second slot. We also
13244	// need to save the TOC pointer (R2) to handle jumps between shared
13245	// libraries, and that will be stored in the fourth slot. The thread
13246	// identifier (R13) is not affected.
13247
13248	// thisMBB:
13249	const int64_t LabelOffset = `1` * PVT.getStoreSize();
13250	const int64_t TOCOffset = `3` * PVT.getStoreSize();
13251	const int64_t BPOffset = `4` * PVT.getStoreSize();
13252
13253	// Prepare IP either in reg.
13254	const TargetRegisterClass *PtrRC = getRegClassFor(VT: PVT);
13255	Register LabelReg = MRI.createVirtualRegister(RegClass: PtrRC);
13256	Register BufReg = MI.getOperand(i: `1`).getReg();
13257
13258	if (Subtarget.is64BitELFABI()) {
13259	setUsesTOCBasePtr(*MBB->getParent());
13260	MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::STD))
13261	.addReg(RegNo: PPC::X2)
13262	.addImm(Val: TOCOffset)
13263	.addReg(RegNo: BufReg)
13264	.cloneMemRefs(OtherMI: MI);
13265	}
13266
13267	// Naked functions never have a base pointer, and so we use r1. For all
13268	// other functions, this decision must be delayed until during PEI.
13269	unsigned BaseReg;
13270	if (MF->getFunction().hasFnAttribute(Kind: Attribute::Naked))
13271	BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
13272	else
13273	BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
13274
13275	MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL,
13276	MCID: TII->get(Opcode: Subtarget.isPPC64() ? PPC::STD : PPC::STW))
13277	.addReg(RegNo: BaseReg)
13278	.addImm(Val: BPOffset)
13279	.addReg(RegNo: BufReg)
13280	.cloneMemRefs(OtherMI: MI);
13281
13282	// Setup
13283	MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::BCLalways)).addMBB(MBB: mainMBB);
13284	MIB.addRegMask(Mask: TRI->getNoPreservedMask());
13285
13286	BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LI), DestReg: restoreDstReg).addImm(Val: `1`);
13287
13288	MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::EH_SjLj_Setup))
13289	.addMBB(MBB: mainMBB);
13290	MIB = BuildMI(BB&: *thisMBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::B)).addMBB(MBB: sinkMBB);
13291
13292	thisMBB->addSuccessor(Succ: mainMBB, Prob: BranchProbability::getZero());
13293	thisMBB->addSuccessor(Succ: sinkMBB, Prob: BranchProbability::getOne());
13294
13295	// mainMBB:
13296	// mainDstReg = 0
13297	MIB =
13298	BuildMI(BB: mainMBB, MIMD: DL,
13299	MCID: TII->get(Opcode: Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), DestReg: LabelReg);
13300
13301	// Store IP
13302	if (Subtarget.isPPC64()) {
13303	MIB = BuildMI(BB: mainMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::STD))
13304	.addReg(RegNo: LabelReg)
13305	.addImm(Val: LabelOffset)
13306	.addReg(RegNo: BufReg);
13307	} else {
13308	MIB = BuildMI(BB: mainMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::STW))
13309	.addReg(RegNo: LabelReg)
13310	.addImm(Val: LabelOffset)
13311	.addReg(RegNo: BufReg);
13312	}
13313	MIB.cloneMemRefs(OtherMI: MI);
13314
13315	BuildMI(BB: mainMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::LI), DestReg: mainDstReg).addImm(Val: `0`);
13316	mainMBB->addSuccessor(Succ: sinkMBB);
13317
13318	// sinkMBB:
13319	BuildMI(BB&: *sinkMBB, I: sinkMBB->begin(), MIMD: DL,
13320	MCID: TII->get(Opcode: PPC::PHI), DestReg: DstReg)
13321	.addReg(RegNo: mainDstReg).addMBB(MBB: mainMBB)
13322	.addReg(RegNo: restoreDstReg).addMBB(MBB: thisMBB);
13323
13324	MI.eraseFromParent();
13325	return sinkMBB;
13326	}
13327
13328	MachineBasicBlock *
13329	PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
13330	MachineBasicBlock MBB) const* {
13331	DebugLoc DL = MI.getDebugLoc();
13332	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13333
13334	MachineFunction *MF = MBB->getParent();
13335	MachineRegisterInfo &MRI = MF->getRegInfo();
13336
13337	MVT PVT = getPointerTy(DL: MF->getDataLayout());
13338	assert((PVT == MVT::i64 \|\| PVT == MVT::i32) &&
13339	"Invalid Pointer Size!");
13340
13341	const TargetRegisterClass *RC =
13342	(PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13343	Register Tmp = MRI.createVirtualRegister(RegClass: RC);
13344	// Since FP is only updated here but NOT referenced, it's treated as GPR.
13345	unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
13346	unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
13347	unsigned BP =
13348	(PVT == MVT::i64)
13349	? PPC::X30
13350	: (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
13351	: PPC::R30);
13352
13353	MachineInstrBuilder MIB;
13354
13355	const int64_t LabelOffset = `1` * PVT.getStoreSize();
13356	const int64_t SPOffset = `2` * PVT.getStoreSize();
13357	const int64_t TOCOffset = `3` * PVT.getStoreSize();
13358	const int64_t BPOffset = `4` * PVT.getStoreSize();
13359
13360	Register BufReg = MI.getOperand(i: `0`).getReg();
13361
13362	// Reload FP (the jumped-to function may not have had a
13363	// frame pointer, and if so, then its r31 will be restored
13364	// as necessary).
13365	if (PVT == MVT::i64) {
13366	MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: FP)
13367	.addImm(Val: `0`)
13368	.addReg(RegNo: BufReg);
13369	} else {
13370	MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LWZ), DestReg: FP)
13371	.addImm(Val: `0`)
13372	.addReg(RegNo: BufReg);
13373	}
13374	MIB.cloneMemRefs(OtherMI: MI);
13375
13376	// Reload IP
13377	if (PVT == MVT::i64) {
13378	MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: Tmp)
13379	.addImm(Val: LabelOffset)
13380	.addReg(RegNo: BufReg);
13381	} else {
13382	MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LWZ), DestReg: Tmp)
13383	.addImm(Val: LabelOffset)
13384	.addReg(RegNo: BufReg);
13385	}
13386	MIB.cloneMemRefs(OtherMI: MI);
13387
13388	// Reload SP
13389	if (PVT == MVT::i64) {
13390	MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: SP)
13391	.addImm(Val: SPOffset)
13392	.addReg(RegNo: BufReg);
13393	} else {
13394	MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LWZ), DestReg: SP)
13395	.addImm(Val: SPOffset)
13396	.addReg(RegNo: BufReg);
13397	}
13398	MIB.cloneMemRefs(OtherMI: MI);
13399
13400	// Reload BP
13401	if (PVT == MVT::i64) {
13402	MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: BP)
13403	.addImm(Val: BPOffset)
13404	.addReg(RegNo: BufReg);
13405	} else {
13406	MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LWZ), DestReg: BP)
13407	.addImm(Val: BPOffset)
13408	.addReg(RegNo: BufReg);
13409	}
13410	MIB.cloneMemRefs(OtherMI: MI);
13411
13412	// Reload TOC
13413	if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
13414	setUsesTOCBasePtr(*MBB->getParent());
13415	MIB = BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::LD), DestReg: PPC::X2)
13416	.addImm(Val: TOCOffset)
13417	.addReg(RegNo: BufReg)
13418	.cloneMemRefs(OtherMI: MI);
13419	}
13420
13421	// Jump
13422	BuildMI(BB&: *MBB, I&: MI, MIMD: DL,
13423	MCID: TII->get(Opcode: PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(RegNo: Tmp);
13424	BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
13425
13426	MI.eraseFromParent();
13427	return MBB;
13428	}
13429
13430	bool PPCTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {
13431	// If the function specifically requests inline stack probes, emit them.
13432	if (MF.getFunction().hasFnAttribute(Kind: "probe-stack"))
13433	return MF.getFunction().getFnAttribute(Kind: "probe-stack").getValueAsString() ==
13434	"inline-asm";
13435	return false;
13436	}
13437
13438	unsigned PPCTargetLowering::getStackProbeSize(const MachineFunction &MF) const {
13439	const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
13440	unsigned StackAlign = TFI->getStackAlignment();
13441	assert(StackAlign >= `1` && isPowerOf2_32(StackAlign) &&
13442	"Unexpected stack alignment");
13443	// The default stack probe size is 4096 if the function has no
13444	// stack-probe-size attribute.
13445	const Function &Fn = MF.getFunction();
13446	unsigned StackProbeSize =
13447	Fn.getFnAttributeAsParsedInteger(Kind: "stack-probe-size", Default: `4096`);
13448	// Round down to the stack alignment.
13449	StackProbeSize &= ~(StackAlign - `1`);
13450	return StackProbeSize ? StackProbeSize : StackAlign;
13451	}
13452
13453	// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
13454	// into three phases. In the first phase, it uses pseudo instruction
13455	// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13456	// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13457	// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13458	// MaxCallFrameSize so that it can calculate correct data area pointer.
13459	MachineBasicBlock *
13460	PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
13461	MachineBasicBlock MBB) const* {
13462	const bool isPPC64 = Subtarget.isPPC64();
13463	MachineFunction *MF = MBB->getParent();
13464	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13465	DebugLoc DL = MI.getDebugLoc();
13466	const unsigned ProbeSize = getStackProbeSize(MF: *MF);
13467	const BasicBlock *ProbedBB = MBB->getBasicBlock();
13468	MachineRegisterInfo &MRI = MF->getRegInfo();
13469	// The CFG of probing stack looks as
13470	// +-----+
13471	// \| MBB \|
13472	// +--+--+
13473	// \|
13474	// +----v----+
13475	// +--->+ TestMBB +---+
13476	// \| +----+----+ \|
13477	// \| \| \|
13478	// \| +-----v----+ \|
13479	// +---+ BlockMBB \| \|
13480	// +----------+ \|
13481	// \|
13482	// +---------+ \|
13483	// \| TailMBB +<--+
13484	// +---------+
13485	// In MBB, calculate previous frame pointer and final stack pointer.
13486	// In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13487	// TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13488	// TailMBB is spliced via \p MI.
13489	MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(BB: ProbedBB);
13490	MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(BB: ProbedBB);
13491	MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(BB: ProbedBB);
13492
13493	MachineFunction::iterator MBBIter = ++MBB->getIterator();
13494	MF->insert(MBBI: MBBIter, MBB: TestMBB);
13495	MF->insert(MBBI: MBBIter, MBB: BlockMBB);
13496	MF->insert(MBBI: MBBIter, MBB: TailMBB);
13497
13498	const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13499	const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13500
13501	Register DstReg = MI.getOperand(i: `0`).getReg();
13502	Register NegSizeReg = MI.getOperand(i: `1`).getReg();
13503	Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13504	Register FinalStackPtr = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13505	Register FramePointer = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13506	Register ActualNegSizeReg = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13507
13508	// Since value of NegSizeReg might be realigned in prologepilog, insert a
13509	// PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13510	// NegSize.
13511	unsigned ProbeOpc;
13512	if (!MRI.hasOneNonDBGUse(RegNo: NegSizeReg))
13513	ProbeOpc =
13514	isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13515	else
13516	// By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13517	// and NegSizeReg will be allocated in the same phyreg to avoid
13518	// redundant copy when NegSizeReg has only one use which is current MI and
13519	// will be replaced by PREPARE_PROBED_ALLOCA then.
13520	ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13521	: PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13522	BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: ProbeOpc), DestReg: FramePointer)
13523	.addDef(RegNo: ActualNegSizeReg)
13524	.addReg(RegNo: NegSizeReg)
13525	.add(MO: MI.getOperand(i: `2`))
13526	.add(MO: MI.getOperand(i: `3`));
13527
13528	// Calculate final stack pointer, which equals to SP + ActualNegSize.
13529	BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::ADD8 : PPC::ADD4),
13530	DestReg: FinalStackPtr)
13531	.addReg(RegNo: SPReg)
13532	.addReg(RegNo: ActualNegSizeReg);
13533
13534	// Materialize a scratch register for update.
13535	int64_t NegProbeSize = -(int64_t)ProbeSize;
13536	assert(isInt<`32`>(NegProbeSize) && "Unhandled probe size!");
13537	Register ScratchReg = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13538	if (!isInt<`16`>(x: NegProbeSize)) {
13539	Register TempReg = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13540	BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::LIS8 : PPC::LIS), DestReg: TempReg)
13541	.addImm(Val: NegProbeSize >> `16`);
13542	BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::ORI8 : PPC::ORI),
13543	DestReg: ScratchReg)
13544	.addReg(RegNo: TempReg)
13545	.addImm(Val: NegProbeSize & `0xFFFF`);
13546	} else
13547	BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::LI8 : PPC::LI), DestReg: ScratchReg)
13548	.addImm(Val: NegProbeSize);
13549
13550	{
13551	// Probing leading residual part.
13552	Register Div = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13553	BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::DIVD : PPC::DIVW), DestReg: Div)
13554	.addReg(RegNo: ActualNegSizeReg)
13555	.addReg(RegNo: ScratchReg);
13556	Register Mul = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13557	BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::MULLD : PPC::MULLW), DestReg: Mul)
13558	.addReg(RegNo: Div)
13559	.addReg(RegNo: ScratchReg);
13560	Register NegMod = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13561	BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::SUBF8 : PPC::SUBF), DestReg: NegMod)
13562	.addReg(RegNo: Mul)
13563	.addReg(RegNo: ActualNegSizeReg);
13564	BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::STDUX : PPC::STWUX), DestReg: SPReg)
13565	.addReg(RegNo: FramePointer)
13566	.addReg(RegNo: SPReg)
13567	.addReg(RegNo: NegMod);
13568	}
13569
13570	{
13571	// Remaining part should be multiple of ProbeSize.
13572	Register CmpResult = MRI.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
13573	BuildMI(BB: TestMBB, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::CMPD : PPC::CMPW), DestReg: CmpResult)
13574	.addReg(RegNo: SPReg)
13575	.addReg(RegNo: FinalStackPtr);
13576	BuildMI(BB: TestMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::BCC))
13577	.addImm(Val: PPC::PRED_EQ)
13578	.addReg(RegNo: CmpResult)
13579	.addMBB(MBB: TailMBB);
13580	TestMBB->addSuccessor(Succ: BlockMBB);
13581	TestMBB->addSuccessor(Succ: TailMBB);
13582	}
13583
13584	{
13585	// Touch the block.
13586	// \|P...\|P...\|P...
13587	BuildMI(BB: BlockMBB, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::STDUX : PPC::STWUX), DestReg: SPReg)
13588	.addReg(RegNo: FramePointer)
13589	.addReg(RegNo: SPReg)
13590	.addReg(RegNo: ScratchReg);
13591	BuildMI(BB: BlockMBB, MIMD: DL, MCID: TII->get(Opcode: PPC::B)).addMBB(MBB: TestMBB);
13592	BlockMBB->addSuccessor(Succ: TestMBB);
13593	}
13594
13595	// Calculation of MaxCallFrameSize is deferred to prologepilog, use
13596	// DYNAREAOFFSET pseudo instruction to get the future result.
13597	Register MaxCallFrameSizeReg =
13598	MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
13599	BuildMI(BB: TailMBB, MIMD: DL,
13600	MCID: TII->get(Opcode: isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13601	DestReg: MaxCallFrameSizeReg)
13602	.add(MO: MI.getOperand(i: `2`))
13603	.add(MO: MI.getOperand(i: `3`));
13604	BuildMI(BB: TailMBB, MIMD: DL, MCID: TII->get(Opcode: isPPC64 ? PPC::ADD8 : PPC::ADD4), DestReg: DstReg)
13605	.addReg(RegNo: SPReg)
13606	.addReg(RegNo: MaxCallFrameSizeReg);
13607
13608	// Splice instructions after MI to TailMBB.
13609	TailMBB->splice(Where: TailMBB->end(), Other: MBB,
13610	From: std::next(x: MachineBasicBlock::iterator (MI)), To: MBB->end());
13611	TailMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
13612	MBB->addSuccessor(Succ: TestMBB);
13613
13614	// Delete the pseudo instruction.
13615	MI.eraseFromParent();
13616
13617	++NumDynamicAllocaProbed;
13618	return TailMBB;
13619	}
13620
13621	static bool IsSelectCC(MachineInstr &MI) {
13622	switch (MI.getOpcode()) {
13623	case PPC::SELECT_CC_I4:
13624	case PPC::SELECT_CC_I8:
13625	case PPC::SELECT_CC_F4:
13626	case PPC::SELECT_CC_F8:
13627	case PPC::SELECT_CC_F16:
13628	case PPC::SELECT_CC_VRRC:
13629	case PPC::SELECT_CC_VSFRC:
13630	case PPC::SELECT_CC_VSSRC:
13631	case PPC::SELECT_CC_VSRC:
13632	case PPC::SELECT_CC_SPE4:
13633	case PPC::SELECT_CC_SPE:
13634	return true;
13635	default:
13636	return false;
13637	}
13638	}
13639
13640	static bool IsSelect(MachineInstr &MI) {
13641	switch (MI.getOpcode()) {
13642	case PPC::SELECT_I4:
13643	case PPC::SELECT_I8:
13644	case PPC::SELECT_F4:
13645	case PPC::SELECT_F8:
13646	case PPC::SELECT_F16:
13647	case PPC::SELECT_SPE:
13648	case PPC::SELECT_SPE4:
13649	case PPC::SELECT_VRRC:
13650	case PPC::SELECT_VSFRC:
13651	case PPC::SELECT_VSSRC:
13652	case PPC::SELECT_VSRC:
13653	return true;
13654	default:
13655	return false;
13656	}
13657	}
13658
13659	MachineBasicBlock *
13660	PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
13661	MachineBasicBlock BB) const* {
13662	if (MI.getOpcode() == TargetOpcode::STACKMAP \|\|
13663	MI.getOpcode() == TargetOpcode::PATCHPOINT) {
13664	if (Subtarget.is64BitELFABI() &&
13665	MI.getOpcode() == TargetOpcode::PATCHPOINT &&
13666	!Subtarget.isUsingPCRelativeCalls()) {
13667	// Call lowering should have added an r2 operand to indicate a dependence
13668	// on the TOC base pointer value. It can't however, because there is no
13669	// way to mark the dependence as implicit there, and so the stackmap code
13670	// will confuse it with a regular operand. Instead, add the dependence
13671	// here.
13672	MI.addOperand(Op: MachineOperand::CreateReg(Reg: PPC::X2, isDef: false, isImp: true));
13673	}
13674
13675	return emitPatchPoint(MI, MBB: BB);
13676	}
13677
13678	if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 \|\|
13679	MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
13680	return emitEHSjLjSetJmp(MI, MBB: BB);
13681	} else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 \|\|
13682	MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
13683	return emitEHSjLjLongJmp(MI, MBB: BB);
13684	}
13685
13686	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13687
13688	// To "insert" these instructions we actually have to insert their
13689	// control-flow patterns.
13690	const BasicBlock *LLVM_BB = BB->getBasicBlock();
13691	MachineFunction::iterator It = ++BB->getIterator();
13692
13693	MachineFunction *F = BB->getParent();
13694	MachineRegisterInfo &MRI = F->getRegInfo();
13695
13696	if (Subtarget.hasISEL() &&
13697	(MI.getOpcode() == PPC::SELECT_CC_I4 \|\|
13698	MI.getOpcode() == PPC::SELECT_CC_I8 \|\|
13699	MI.getOpcode() == PPC::SELECT_I4 \|\| MI.getOpcode() == PPC::SELECT_I8)) {
13700	SmallVector<MachineOperand, `2`> Cond;
13701	if (MI.getOpcode() == PPC::SELECT_CC_I4 \|\|
13702	MI.getOpcode() == PPC::SELECT_CC_I8)
13703	Cond.push_back(Elt: MI.getOperand(i: `4`));
13704	else
13705	Cond.push_back(Elt: MachineOperand::CreateImm(Val: PPC::PRED_BIT_SET));
13706	Cond.push_back(Elt: MI.getOperand(i: `1`));
13707
13708	DebugLoc dl = MI.getDebugLoc();
13709	TII->insertSelect(MBB&: *BB, I: MI, DL: dl, DstReg: MI.getOperand(i: `0`).getReg(), Cond,
13710	TrueReg: MI.getOperand(i: `2`).getReg(), FalseReg: MI.getOperand(i: `3`).getReg());
13711	} else if (IsSelectCC(MI) \|\| IsSelect(MI)) {
13712	// The incoming instruction knows the destination vreg to set, the
13713	// condition code register to branch on, the true/false values to
13714	// select between, and a branch opcode to use.
13715
13716	// thisMBB:
13717	// ...
13718	// TrueVal = ...
13719	// cmpTY ccX, r1, r2
13720	// bCC sinkMBB
13721	// fallthrough --> copy0MBB
13722	MachineBasicBlock *thisMBB = BB;
13723	MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13724	MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13725	DebugLoc dl = MI.getDebugLoc();
13726	F->insert(MBBI: It, MBB: copy0MBB);
13727	F->insert(MBBI: It, MBB: sinkMBB);
13728
13729	if (isPhysRegUsedAfter(Reg: PPC::CARRY, MBI: MI.getIterator())) {
13730	copy0MBB->addLiveIn(PhysReg: PPC::CARRY);
13731	sinkMBB->addLiveIn(PhysReg: PPC::CARRY);
13732	}
13733
13734	// Set the call frame size on entry to the new basic blocks.
13735	// See https://reviews.llvm.org/D156113.
13736	unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13737	copy0MBB->setCallFrameSize(CallFrameSize);
13738	sinkMBB->setCallFrameSize(CallFrameSize);
13739
13740	// Transfer the remainder of BB and its successor edges to sinkMBB.
13741	sinkMBB->splice(Where: sinkMBB->begin(), Other: BB,
13742	From: std::next(x: MachineBasicBlock::iterator (MI)), To: BB->end());
13743	sinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
13744
13745	// Next, add the true and fallthrough blocks as its successors.
13746	BB->addSuccessor(Succ: copy0MBB);
13747	BB->addSuccessor(Succ: sinkMBB);
13748
13749	if (IsSelect(MI)) {
13750	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BC))
13751	.addReg(RegNo: MI.getOperand(i: `1`).getReg())
13752	.addMBB(MBB: sinkMBB);
13753	} else {
13754	unsigned SelectPred = MI.getOperand(i: `4`).getImm();
13755	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
13756	.addImm(Val: SelectPred)
13757	.addReg(RegNo: MI.getOperand(i: `1`).getReg())
13758	.addMBB(MBB: sinkMBB);
13759	}
13760
13761	// copy0MBB:
13762	// %FalseValue = ...
13763	// # fallthrough to sinkMBB
13764	BB = copy0MBB;
13765
13766	// Update machine-CFG edges
13767	BB->addSuccessor(Succ: sinkMBB);
13768
13769	// sinkMBB:
13770	// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13771	// ...
13772	BB = sinkMBB;
13773	BuildMI(BB&: *BB, I: BB->begin(), MIMD: dl, MCID: TII->get(Opcode: PPC::PHI), DestReg: MI.getOperand(i: `0`).getReg())
13774	.addReg(RegNo: MI.getOperand(i: `3`).getReg())
13775	.addMBB(MBB: copy0MBB)
13776	.addReg(RegNo: MI.getOperand(i: `2`).getReg())
13777	.addMBB(MBB: thisMBB);
13778	} else if (MI.getOpcode() == PPC::ReadTB) {
13779	// To read the 64-bit time-base register on a 32-bit target, we read the
13780	// two halves. Should the counter have wrapped while it was being read, we
13781	// need to try again.
13782	// ...
13783	// readLoop:
13784	// mfspr Rx,TBU # load from TBU
13785	// mfspr Ry,TB # load from TB
13786	// mfspr Rz,TBU # load from TBU
13787	// cmpw crX,Rx,Rz # check if 'old'='new'
13788	// bne readLoop # branch if they're not equal
13789	// ...
13790
13791	MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13792	MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13793	DebugLoc dl = MI.getDebugLoc();
13794	F->insert(MBBI: It, MBB: readMBB);
13795	F->insert(MBBI: It, MBB: sinkMBB);
13796
13797	// Transfer the remainder of BB and its successor edges to sinkMBB.
13798	sinkMBB->splice(Where: sinkMBB->begin(), Other: BB,
13799	From: std::next(x: MachineBasicBlock::iterator (MI)), To: BB->end());
13800	sinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
13801
13802	BB->addSuccessor(Succ: readMBB);
13803	BB = readMBB;
13804
13805	MachineRegisterInfo &RegInfo = F->getRegInfo();
13806	Register ReadAgainReg = RegInfo.createVirtualRegister(RegClass: &PPC::GPRCRegClass);
13807	Register LoReg = MI.getOperand(i: `0`).getReg();
13808	Register HiReg = MI.getOperand(i: `1`).getReg();
13809
13810	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::MFSPR), DestReg: HiReg).addImm(Val: `269`);
13811	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::MFSPR), DestReg: LoReg).addImm(Val: `268`);
13812	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::MFSPR), DestReg: ReadAgainReg).addImm(Val: `269`);
13813
13814	Register CmpReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
13815
13816	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::CMPW), DestReg: CmpReg)
13817	.addReg(RegNo: HiReg)
13818	.addReg(RegNo: ReadAgainReg);
13819	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
13820	.addImm(Val: PPC::PRED_NE)
13821	.addReg(RegNo: CmpReg)
13822	.addMBB(MBB: readMBB);
13823
13824	BB->addSuccessor(Succ: readMBB);
13825	BB->addSuccessor(Succ: sinkMBB);
13826	} else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
13827	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::ADD4);
13828	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
13829	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::ADD4);
13830	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
13831	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: PPC::ADD4);
13832	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
13833	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: PPC::ADD8);
13834
13835	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13836	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::AND);
13837	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13838	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::AND);
13839	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13840	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: PPC::AND);
13841	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13842	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: PPC::AND8);
13843
13844	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13845	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::OR);
13846	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13847	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::OR);
13848	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
13849	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: PPC::OR);
13850	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
13851	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: PPC::OR8);
13852
13853	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
13854	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::XOR);
13855	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
13856	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::XOR);
13857	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
13858	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: PPC::XOR);
13859	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
13860	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: PPC::XOR8);
13861
13862	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
13863	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::NAND);
13864	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
13865	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::NAND);
13866	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
13867	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: PPC::NAND);
13868	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
13869	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: PPC::NAND8);
13870
13871	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
13872	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: PPC::SUBF);
13873	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
13874	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: PPC::SUBF);
13875	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
13876	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: PPC::SUBF);
13877	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
13878	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: PPC::SUBF8);
13879
13880	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
13881	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: `0`, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_LT);
13882	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
13883	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: `0`, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_LT);
13884	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
13885	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: `0`, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_LT);
13886	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
13887	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: `0`, CmpOpcode: PPC::CMPD, CmpPred: PPC::PRED_LT);
13888
13889	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
13890	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: `0`, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_GT);
13891	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
13892	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: `0`, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_GT);
13893	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
13894	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: `0`, CmpOpcode: PPC::CMPW, CmpPred: PPC::PRED_GT);
13895	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
13896	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: `0`, CmpOpcode: PPC::CMPD, CmpPred: PPC::PRED_GT);
13897
13898	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
13899	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: `0`, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_LT);
13900	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
13901	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: `0`, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_LT);
13902	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
13903	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: `0`, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_LT);
13904	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
13905	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: `0`, CmpOpcode: PPC::CMPLD, CmpPred: PPC::PRED_LT);
13906
13907	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
13908	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: `0`, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_GT);
13909	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
13910	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: `0`, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_GT);
13911	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
13912	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: `0`, CmpOpcode: PPC::CMPLW, CmpPred: PPC::PRED_GT);
13913	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
13914	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: `0`, CmpOpcode: PPC::CMPLD, CmpPred: PPC::PRED_GT);
13915
13916	else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
13917	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: `0`);
13918	else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
13919	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: `0`);
13920	else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
13921	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: `0`);
13922	else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
13923	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: `0`);
13924	else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 \|\|
13925	MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 \|\|
13926	(Subtarget.hasPartwordAtomics() &&
13927	MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) \|\|
13928	(Subtarget.hasPartwordAtomics() &&
13929	MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
13930	bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
13931
13932	auto LoadMnemonic = PPC::LDARX;
13933	auto StoreMnemonic = PPC::STDCX;
13934	switch (MI.getOpcode()) {
13935	default:
13936	llvm_unreachable("Compare and swap of unknown size");
13937	case PPC::ATOMIC_CMP_SWAP_I8:
13938	LoadMnemonic = PPC::LBARX;
13939	StoreMnemonic = PPC::STBCX;
13940	assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13941	break;
13942	case PPC::ATOMIC_CMP_SWAP_I16:
13943	LoadMnemonic = PPC::LHARX;
13944	StoreMnemonic = PPC::STHCX;
13945	assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13946	break;
13947	case PPC::ATOMIC_CMP_SWAP_I32:
13948	LoadMnemonic = PPC::LWARX;
13949	StoreMnemonic = PPC::STWCX;
13950	break;
13951	case PPC::ATOMIC_CMP_SWAP_I64:
13952	LoadMnemonic = PPC::LDARX;
13953	StoreMnemonic = PPC::STDCX;
13954	break;
13955	}
13956	MachineRegisterInfo &RegInfo = F->getRegInfo();
13957	Register dest = MI.getOperand(i: `0`).getReg();
13958	Register ptrA = MI.getOperand(i: `1`).getReg();
13959	Register ptrB = MI.getOperand(i: `2`).getReg();
13960	Register CrReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
13961	Register oldval = MI.getOperand(i: `3`).getReg();
13962	Register newval = MI.getOperand(i: `4`).getReg();
13963	DebugLoc dl = MI.getDebugLoc();
13964
13965	MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13966	MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13967	MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13968	F->insert(MBBI: It, MBB: loop1MBB);
13969	F->insert(MBBI: It, MBB: loop2MBB);
13970	F->insert(MBBI: It, MBB: exitMBB);
13971	exitMBB->splice(Where: exitMBB->begin(), Other: BB,
13972	From: std::next(x: MachineBasicBlock::iterator (MI)), To: BB->end());
13973	exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
13974
13975	// thisMBB:
13976	// ...
13977	// fallthrough --> loopMBB
13978	BB->addSuccessor(Succ: loop1MBB);
13979
13980	// loop1MBB:
13981	// l[bhwd]arx dest, ptr
13982	// cmp[wd] dest, oldval
13983	// bne- exitBB
13984	// loop2MBB:
13985	// st[bhwd]cx. newval, ptr
13986	// bne- loopMBB
13987	// b exitBB
13988	// exitBB:
13989	BB = loop1MBB;
13990	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: LoadMnemonic), DestReg: dest).addReg(RegNo: ptrA).addReg(RegNo: ptrB);
13991	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: is64bit ? PPC::CMPD : PPC::CMPW), DestReg: CrReg)
13992	.addReg(RegNo: dest)
13993	.addReg(RegNo: oldval);
13994	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
13995	.addImm(Val: PPC::PRED_NE)
13996	.addReg(RegNo: CrReg)
13997	.addMBB(MBB: exitMBB);
13998	BB->addSuccessor(Succ: loop2MBB);
13999	BB->addSuccessor(Succ: exitMBB);
14000
14001	BB = loop2MBB;
14002	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: StoreMnemonic))
14003	.addReg(RegNo: newval)
14004	.addReg(RegNo: ptrA)
14005	.addReg(RegNo: ptrB);
14006	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14007	.addImm(Val: PPC::PRED_NE)
14008	.addReg(RegNo: PPC::CR0)
14009	.addMBB(MBB: loop1MBB);
14010	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::B)).addMBB(MBB: exitMBB);
14011	BB->addSuccessor(Succ: loop1MBB);
14012	BB->addSuccessor(Succ: exitMBB);
14013
14014	// exitMBB:
14015	// ...
14016	BB = exitMBB;
14017	} else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 \|\|
14018	MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
14019	// We must use 64-bit registers for addresses when targeting 64-bit,
14020	// since we're actually doing arithmetic on them. Other registers
14021	// can be 32-bit.
14022	bool is64bit = Subtarget.isPPC64();
14023	bool isLittleEndian = Subtarget.isLittleEndian();
14024	bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
14025
14026	Register dest = MI.getOperand(i: `0`).getReg();
14027	Register ptrA = MI.getOperand(i: `1`).getReg();
14028	Register ptrB = MI.getOperand(i: `2`).getReg();
14029	Register oldval = MI.getOperand(i: `3`).getReg();
14030	Register newval = MI.getOperand(i: `4`).getReg();
14031	DebugLoc dl = MI.getDebugLoc();
14032
14033	MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14034	MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14035	MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
14036	F->insert(MBBI: It, MBB: loop1MBB);
14037	F->insert(MBBI: It, MBB: loop2MBB);
14038	F->insert(MBBI: It, MBB: exitMBB);
14039	exitMBB->splice(Where: exitMBB->begin(), Other: BB,
14040	From: std::next(x: MachineBasicBlock::iterator (MI)), To: BB->end());
14041	exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
14042
14043	MachineRegisterInfo &RegInfo = F->getRegInfo();
14044	const TargetRegisterClass *RC =
14045	is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
14046	const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
14047
14048	Register PtrReg = RegInfo.createVirtualRegister(RegClass: RC);
14049	Register Shift1Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14050	Register ShiftReg =
14051	isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RegClass: GPRC);
14052	Register NewVal2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14053	Register NewVal3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14054	Register OldVal2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14055	Register OldVal3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14056	Register MaskReg = RegInfo.createVirtualRegister(RegClass: GPRC);
14057	Register Mask2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14058	Register Mask3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14059	Register Tmp2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14060	Register Tmp4Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
14061	Register TmpDestReg = RegInfo.createVirtualRegister(RegClass: GPRC);
14062	Register Ptr1Reg;
14063	Register TmpReg = RegInfo.createVirtualRegister(RegClass: GPRC);
14064	Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
14065	Register CrReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
14066	// thisMBB:
14067	// ...
14068	// fallthrough --> loopMBB
14069	BB->addSuccessor(Succ: loop1MBB);
14070
14071	// The 4-byte load must be aligned, while a char or short may be
14072	// anywhere in the word. Hence all this nasty bookkeeping code.
14073	// add ptr1, ptrA, ptrB [copy if ptrA==0]
14074	// rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
14075	// xori shift, shift1, 24 [16]
14076	// rlwinm ptr, ptr1, 0, 0, 29
14077	// slw newval2, newval, shift
14078	// slw oldval2, oldval,shift
14079	// li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
14080	// slw mask, mask2, shift
14081	// and newval3, newval2, mask
14082	// and oldval3, oldval2, mask
14083	// loop1MBB:
14084	// lwarx tmpDest, ptr
14085	// and tmp, tmpDest, mask
14086	// cmpw tmp, oldval3
14087	// bne- exitBB
14088	// loop2MBB:
14089	// andc tmp2, tmpDest, mask
14090	// or tmp4, tmp2, newval3
14091	// stwcx. tmp4, ptr
14092	// bne- loop1MBB
14093	// b exitBB
14094	// exitBB:
14095	// srw dest, tmpDest, shift
14096	if (ptrA != ZeroReg) {
14097	Ptr1Reg = RegInfo.createVirtualRegister(RegClass: RC);
14098	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: is64bit ? PPC::ADD8 : PPC::ADD4), DestReg: Ptr1Reg)
14099	.addReg(RegNo: ptrA)
14100	.addReg(RegNo: ptrB);
14101	} else {
14102	Ptr1Reg = ptrB;
14103	}
14104
14105	// We need use 32-bit subregister to avoid mismatch register class in 64-bit
14106	// mode.
14107	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: Shift1Reg)
14108	.addReg(RegNo: Ptr1Reg, flags: `0`, SubReg: is64bit ? PPC::sub_32 : `0`)
14109	.addImm(Val: `3`)
14110	.addImm(Val: `27`)
14111	.addImm(Val: is8bit ? `28` : `27`);
14112	if (!isLittleEndian)
14113	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::XORI), DestReg: ShiftReg)
14114	.addReg(RegNo: Shift1Reg)
14115	.addImm(Val: is8bit ? `24` : `16`);
14116	if (is64bit)
14117	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLDICR), DestReg: PtrReg)
14118	.addReg(RegNo: Ptr1Reg)
14119	.addImm(Val: `0`)
14120	.addImm(Val: `61`);
14121	else
14122	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::RLWINM), DestReg: PtrReg)
14123	.addReg(RegNo: Ptr1Reg)
14124	.addImm(Val: `0`)
14125	.addImm(Val: `0`)
14126	.addImm(Val: `29`);
14127	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: NewVal2Reg)
14128	.addReg(RegNo: newval)
14129	.addReg(RegNo: ShiftReg);
14130	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: OldVal2Reg)
14131	.addReg(RegNo: oldval)
14132	.addReg(RegNo: ShiftReg);
14133	if (is8bit)
14134	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LI), DestReg: Mask2Reg).addImm(Val: `255`);
14135	else {
14136	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LI), DestReg: Mask3Reg).addImm(Val: `0`);
14137	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::ORI), DestReg: Mask2Reg)
14138	.addReg(RegNo: Mask3Reg)
14139	.addImm(Val: `65535`);
14140	}
14141	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::SLW), DestReg: MaskReg)
14142	.addReg(RegNo: Mask2Reg)
14143	.addReg(RegNo: ShiftReg);
14144	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: NewVal3Reg)
14145	.addReg(RegNo: NewVal2Reg)
14146	.addReg(RegNo: MaskReg);
14147	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: OldVal3Reg)
14148	.addReg(RegNo: OldVal2Reg)
14149	.addReg(RegNo: MaskReg);
14150
14151	BB = loop1MBB;
14152	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::LWARX), DestReg: TmpDestReg)
14153	.addReg(RegNo: ZeroReg)
14154	.addReg(RegNo: PtrReg);
14155	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::AND), DestReg: TmpReg)
14156	.addReg(RegNo: TmpDestReg)
14157	.addReg(RegNo: MaskReg);
14158	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::CMPW), DestReg: CrReg)
14159	.addReg(RegNo: TmpReg)
14160	.addReg(RegNo: OldVal3Reg);
14161	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14162	.addImm(Val: PPC::PRED_NE)
14163	.addReg(RegNo: CrReg)
14164	.addMBB(MBB: exitMBB);
14165	BB->addSuccessor(Succ: loop2MBB);
14166	BB->addSuccessor(Succ: exitMBB);
14167
14168	BB = loop2MBB;
14169	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::ANDC), DestReg: Tmp2Reg)
14170	.addReg(RegNo: TmpDestReg)
14171	.addReg(RegNo: MaskReg);
14172	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::OR), DestReg: Tmp4Reg)
14173	.addReg(RegNo: Tmp2Reg)
14174	.addReg(RegNo: NewVal3Reg);
14175	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::STWCX))
14176	.addReg(RegNo: Tmp4Reg)
14177	.addReg(RegNo: ZeroReg)
14178	.addReg(RegNo: PtrReg);
14179	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::BCC))
14180	.addImm(Val: PPC::PRED_NE)
14181	.addReg(RegNo: PPC::CR0)
14182	.addMBB(MBB: loop1MBB);
14183	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: PPC::B)).addMBB(MBB: exitMBB);
14184	BB->addSuccessor(Succ: loop1MBB);
14185	BB->addSuccessor(Succ: exitMBB);
14186
14187	// exitMBB:
14188	// ...
14189	BB = exitMBB;
14190	BuildMI(BB&: *BB, I: BB->begin(), MIMD: dl, MCID: TII->get(Opcode: PPC::SRW), DestReg: dest)
14191	.addReg(RegNo: TmpReg)
14192	.addReg(RegNo: ShiftReg);
14193	} else if (MI.getOpcode() == PPC::FADDrtz) {
14194	// This pseudo performs an FADD with rounding mode temporarily forced
14195	// to round-to-zero. We emit this via custom inserter since the FPSCR
14196	// is not modeled at the SelectionDAG level.
14197	Register Dest = MI.getOperand(i: `0`).getReg();
14198	Register Src1 = MI.getOperand(i: `1`).getReg();
14199	Register Src2 = MI.getOperand(i: `2`).getReg();
14200	DebugLoc dl = MI.getDebugLoc();
14201
14202	MachineRegisterInfo &RegInfo = F->getRegInfo();
14203	Register MFFSReg = RegInfo.createVirtualRegister(RegClass: &PPC::F8RCRegClass);
14204
14205	// Save FPSCR value.
14206	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MFFS), DestReg: MFFSReg);
14207
14208	// Set rounding mode to round-to-zero.
14209	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MTFSB1))
14210	.addImm(Val: `31`)
14211	.addReg(RegNo: PPC::RM, flags: RegState::ImplicitDefine);
14212
14213	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MTFSB0))
14214	.addImm(Val: `30`)
14215	.addReg(RegNo: PPC::RM, flags: RegState::ImplicitDefine);
14216
14217	// Perform addition.
14218	auto MIB = BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::FADD), DestReg: Dest)
14219	.addReg(RegNo: Src1)
14220	.addReg(RegNo: Src2);
14221	if (MI.getFlag(Flag: MachineInstr::NoFPExcept))
14222	MIB.setMIFlag(MachineInstr::NoFPExcept);
14223
14224	// Restore FPSCR value.
14225	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MTFSFb)).addImm(Val: `1`).addReg(RegNo: MFFSReg);
14226	} else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT \|\|
14227	MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT \|\|
14228	MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 \|\|
14229	MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
14230	unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 \|\|
14231	MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
14232	? PPC::ANDI8_rec
14233	: PPC::ANDI_rec;
14234	bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT \|\|
14235	MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
14236
14237	MachineRegisterInfo &RegInfo = F->getRegInfo();
14238	Register Dest = RegInfo.createVirtualRegister(
14239	RegClass: Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
14240
14241	DebugLoc Dl = MI.getDebugLoc();
14242	BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode), DestReg: Dest)
14243	.addReg(RegNo: MI.getOperand(i: `1`).getReg())
14244	.addImm(Val: `1`);
14245	BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: TargetOpcode::COPY),
14246	DestReg: MI.getOperand(i: `0`).getReg())
14247	.addReg(RegNo: IsEQ ? PPC::CR0EQ : PPC::CR0GT);
14248	} else if (MI.getOpcode() == PPC::TCHECK_RET) {
14249	DebugLoc Dl = MI.getDebugLoc();
14250	MachineRegisterInfo &RegInfo = F->getRegInfo();
14251	Register CRReg = RegInfo.createVirtualRegister(RegClass: &PPC::CRRCRegClass);
14252	BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: PPC::TCHECK), DestReg: CRReg);
14253	BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: TargetOpcode::COPY),
14254	DestReg: MI.getOperand(i: `0`).getReg())
14255	.addReg(RegNo: CRReg);
14256	} else if (MI.getOpcode() == PPC::TBEGIN_RET) {
14257	DebugLoc Dl = MI.getDebugLoc();
14258	unsigned Imm = MI.getOperand(i: `1`).getImm();
14259	BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: PPC::TBEGIN)).addImm(Val: Imm);
14260	BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: TargetOpcode::COPY),
14261	DestReg: MI.getOperand(i: `0`).getReg())
14262	.addReg(RegNo: PPC::CR0EQ);
14263	} else if (MI.getOpcode() == PPC::SETRNDi) {
14264	DebugLoc dl = MI.getDebugLoc();
14265	Register OldFPSCRReg = MI.getOperand(i: `0`).getReg();
14266
14267	// Save FPSCR value.
14268	if (MRI.use_empty(RegNo: OldFPSCRReg))
14269	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: OldFPSCRReg);
14270	else
14271	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MFFS), DestReg: OldFPSCRReg);
14272
14273	// The floating point rounding mode is in the bits 62:63 of FPCSR, and has
14274	// the following settings:
14275	// 00 Round to nearest
14276	// 01 Round to 0
14277	// 10 Round to +inf
14278	// 11 Round to -inf
14279
14280	// When the operand is immediate, using the two least significant bits of
14281	// the immediate to set the bits 62:63 of FPSCR.
14282	unsigned Mode = MI.getOperand(i: `1`).getImm();
14283	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: (Mode & `1`) ? PPC::MTFSB1 : PPC::MTFSB0))
14284	.addImm(Val: `31`)
14285	.addReg(RegNo: PPC::RM, flags: RegState::ImplicitDefine);
14286
14287	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: (Mode & `2`) ? PPC::MTFSB1 : PPC::MTFSB0))
14288	.addImm(Val: `30`)
14289	.addReg(RegNo: PPC::RM, flags: RegState::ImplicitDefine);
14290	} else if (MI.getOpcode() == PPC::SETRND) {
14291	DebugLoc dl = MI.getDebugLoc();
14292
14293	// Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
14294	// or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
14295	// If the target doesn't have DirectMove, we should use stack to do the
14296	// conversion, because the target doesn't have the instructions like mtvsrd
14297	// or mfvsrd to do this conversion directly.
14298	auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
14299	if (Subtarget.hasDirectMove()) {
14300	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg)
14301	.addReg(RegNo: SrcReg);
14302	} else {
14303	// Use stack to do the register copy.
14304	unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
14305	MachineRegisterInfo &RegInfo = F->getRegInfo();
14306	const TargetRegisterClass *RC = RegInfo.getRegClass(Reg: SrcReg);
14307	if (RC == &PPC::F8RCRegClass) {
14308	// Copy register from F8RCRegClass to G8RCRegclass.
14309	assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
14310	"Unsupported RegClass.");
14311
14312	StoreOp = PPC::STFD;
14313	LoadOp = PPC::LD;
14314	} else {
14315	// Copy register from G8RCRegClass to F8RCRegclass.
14316	assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
14317	(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
14318	"Unsupported RegClass.");
14319	}
14320
14321	MachineFrameInfo &MFI = F->getFrameInfo();
14322	int FrameIdx = MFI.CreateStackObject(Size: `8`, Alignment: Align (`8`), isSpillSlot: false);
14323
14324	MachineMemOperand *MMOStore = F->getMachineMemOperand(
14325	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *F, FI: FrameIdx, Offset: `0`),
14326	F: MachineMemOperand::MOStore, Size: MFI.getObjectSize(ObjectIdx: FrameIdx),
14327	BaseAlignment: MFI.getObjectAlign(ObjectIdx: FrameIdx));
14328
14329	// Store the SrcReg into the stack.
14330	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: StoreOp))
14331	.addReg(RegNo: SrcReg)
14332	.addImm(Val: `0`)
14333	.addFrameIndex(Idx: FrameIdx)
14334	.addMemOperand(MMO: MMOStore);
14335
14336	MachineMemOperand *MMOLoad = F->getMachineMemOperand(
14337	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *F, FI: FrameIdx, Offset: `0`),
14338	F: MachineMemOperand::MOLoad, Size: MFI.getObjectSize(ObjectIdx: FrameIdx),
14339	BaseAlignment: MFI.getObjectAlign(ObjectIdx: FrameIdx));
14340
14341	// Load from the stack where SrcReg is stored, and save to DestReg,
14342	// so we have done the RegClass conversion from RegClass::SrcReg to
14343	// RegClass::DestReg.
14344	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: LoadOp), DestReg)
14345	.addImm(Val: `0`)
14346	.addFrameIndex(Idx: FrameIdx)
14347	.addMemOperand(MMO: MMOLoad);
14348	}
14349	};
14350
14351	Register OldFPSCRReg = MI.getOperand(i: `0`).getReg();
14352
14353	// Save FPSCR value.
14354	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MFFS), DestReg: OldFPSCRReg);
14355
14356	// When the operand is gprc register, use two least significant bits of the
14357	// register and mtfsf instruction to set the bits 62:63 of FPSCR.
14358	//
14359	// copy OldFPSCRTmpReg, OldFPSCRReg
14360	// (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
14361	// rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
14362	// copy NewFPSCRReg, NewFPSCRTmpReg
14363	// mtfsf 255, NewFPSCRReg
14364	MachineOperand SrcOp = MI.getOperand(i: `1`);
14365	MachineRegisterInfo &RegInfo = F->getRegInfo();
14366	Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14367
14368	copyRegFromG8RCOrF8RC (OldFPSCRTmpReg, OldFPSCRReg);
14369
14370	Register ImDefReg = RegInfo.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14371	Register ExtSrcReg = RegInfo.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14372
14373	// The first operand of INSERT_SUBREG should be a register which has
14374	// subregisters, we only care about its RegClass, so we should use an
14375	// IMPLICIT_DEF register.
14376	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: ImDefReg);
14377	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::INSERT_SUBREG), DestReg: ExtSrcReg)
14378	.addReg(RegNo: ImDefReg)
14379	.add(MO: SrcOp)
14380	.addImm(Val: `1`);
14381
14382	Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(RegClass: &PPC::G8RCRegClass);
14383	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::RLDIMI), DestReg: NewFPSCRTmpReg)
14384	.addReg(RegNo: OldFPSCRTmpReg)
14385	.addReg(RegNo: ExtSrcReg)
14386	.addImm(Val: `0`)
14387	.addImm(Val: `62`);
14388
14389	Register NewFPSCRReg = RegInfo.createVirtualRegister(RegClass: &PPC::F8RCRegClass);
14390	copyRegFromG8RCOrF8RC (NewFPSCRReg, NewFPSCRTmpReg);
14391
14392	// The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
14393	// bits of FPSCR.
14394	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: PPC::MTFSF))
14395	.addImm(Val: `255`)
14396	.addReg(RegNo: NewFPSCRReg)
14397	.addImm(Val: `0`)
14398	.addImm(Val: `0`);
14399	} else if (MI.getOpcode() == PPC::SETFLM) {
14400	DebugLoc Dl = MI.getDebugLoc();
14401
14402	// Result of setflm is previous FPSCR content, so we need to save it first.
14403	Register OldFPSCRReg = MI.getOperand(i: `0`).getReg();
14404	if (MRI.use_empty(RegNo: OldFPSCRReg))
14405	BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: OldFPSCRReg);
14406	else
14407	BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: PPC::MFFS), DestReg: OldFPSCRReg);
14408
14409	// Put bits in 32:63 to FPSCR.
14410	Register NewFPSCRReg = MI.getOperand(i: `1`).getReg();
14411	BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: PPC::MTFSF))
14412	.addImm(Val: `255`)
14413	.addReg(RegNo: NewFPSCRReg)
14414	.addImm(Val: `0`)
14415	.addImm(Val: `0`);
14416	} else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 \|\|
14417	MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
14418	return emitProbedAlloca(MI, MBB: BB);
14419	} else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
14420	DebugLoc DL = MI.getDebugLoc();
14421	Register Src = MI.getOperand(i: `2`).getReg();
14422	Register Lo = MI.getOperand(i: `0`).getReg();
14423	Register Hi = MI.getOperand(i: `1`).getReg();
14424	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY))
14425	.addDef(RegNo: Lo)
14426	.addUse(RegNo: Src, Flags: `0`, SubReg: PPC::sub_gp8_x1);
14427	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY))
14428	.addDef(RegNo: Hi)
14429	.addUse(RegNo: Src, Flags: `0`, SubReg: PPC::sub_gp8_x0);
14430	} else if (MI.getOpcode() == PPC::LQX_PSEUDO \|\|
14431	MI.getOpcode() == PPC::STQX_PSEUDO) {
14432	DebugLoc DL = MI.getDebugLoc();
14433	// Ptr is used as the ptr_rc_no_r0 part
14434	// of LQ/STQ's memory operand and adding result of RA and RB,
14435	// so it has to be g8rc_and_g8rc_nox0.
14436	Register Ptr =
14437	F->getRegInfo().createVirtualRegister(RegClass: &PPC::G8RC_and_G8RC_NOX0RegClass);
14438	Register Val = MI.getOperand(i: `0`).getReg();
14439	Register RA = MI.getOperand(i: `1`).getReg();
14440	Register RB = MI.getOperand(i: `2`).getReg();
14441	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::ADD8), DestReg: Ptr).addReg(RegNo: RA).addReg(RegNo: RB);
14442	BuildMI(BB&: *BB, I&: MI, MIMD: DL,
14443	MCID: MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(Opcode: PPC::LQ)
14444	: TII->get(Opcode: PPC::STQ))
14445	.addReg(RegNo: Val, flags: MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : `0`)
14446	.addImm(Val: `0`)
14447	.addReg(RegNo: Ptr);
14448	} else {
14449	llvm_unreachable("Unexpected instr type to insert");
14450	}
14451
14452	MI.eraseFromParent(); // The pseudo instruction is gone now.
14453	return BB;
14454	}
14455
14456	//===----------------------------------------------------------------------===//
14457	// Target Optimization Hooks
14458	//===----------------------------------------------------------------------===//
14459
14460	static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14461	// For the estimates, convergence is quadratic, so we essentially double the
14462	// number of digits correct after every iteration. For both FRE and FRSQRTE,
14463	// the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14464	// this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14465	int RefinementSteps = Subtarget.hasRecipPrec() ? `1` : `3`;
14466	if (VT.getScalarType() == MVT::f64)
14467	RefinementSteps++;
14468	return RefinementSteps;
14469	}
14470
14471	SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14472	const DenormalMode &Mode) const {
14473	// We only have VSX Vector Test for software Square Root.
14474	EVT VT = Op.getValueType();
14475	if (!isTypeLegal(VT: MVT::i1) \|\|
14476	(VT != MVT::f64 &&
14477	((VT != MVT::v2f64 && VT != MVT::v4f32) \|\| !Subtarget.hasVSX())))
14478	return TargetLowering::getSqrtInputTest(Operand: Op, DAG, Mode);
14479
14480	SDLoc DL(Op);
14481	// The output register of FTSQRT is CR field.
14482	SDValue FTSQRT = DAG.getNode(Opcode: PPCISD::FTSQRT, DL, VT: MVT::i32, Operand: Op);
14483	// ftsqrt BF,FRB
14484	// Let e_b be the unbiased exponent of the double-precision
14485	// floating-point operand in register FRB.
14486	// fe_flag is set to 1 if either of the following conditions occurs.
14487	// - The double-precision floating-point operand in register FRB is a zero,
14488	// a NaN, or an infinity, or a negative value.
14489	// - e_b is less than or equal to -970.
14490	// Otherwise fe_flag is set to 0.
14491	// Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14492	// not eligible for iteration. (zero/negative/infinity/nan or unbiased
14493	// exponent is less than -970)
14494	SDValue SRIdxVal = DAG.getTargetConstant(Val: PPC::sub_eq, DL, VT: MVT::i32);
14495	return SDValue (DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT: MVT::i1,
14496	Op1: FTSQRT, Op2: SRIdxVal),
14497	`0`);
14498	}
14499
14500	SDValue
14501	PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14502	SelectionDAG &DAG) const {
14503	// We only have VSX Vector Square Root.
14504	EVT VT = Op.getValueType();
14505	if (VT != MVT::f64 &&
14506	((VT != MVT::v2f64 && VT != MVT::v4f32) \|\| !Subtarget.hasVSX()))
14507	return TargetLowering::getSqrtResultForDenormInput(Operand: Op, DAG);
14508
14509	return DAG.getNode(Opcode: PPCISD::FSQRT, DL: SDLoc (Op), VT, Operand: Op);
14510	}
14511
14512	SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14513	int Enabled, int &RefinementSteps,
14514	bool &UseOneConstNR,
14515	bool Reciprocal) const {
14516	EVT VT = Operand.getValueType();
14517	if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) \|\|
14518	(VT == MVT::f64 && Subtarget.hasFRSQRTE()) \|\|
14519	(VT == MVT::v4f32 && Subtarget.hasAltivec()) \|\|
14520	(VT == MVT::v2f64 && Subtarget.hasVSX())) {
14521	if (RefinementSteps == ReciprocalEstimate::Unspecified)
14522	RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14523
14524	// The Newton-Raphson computation with a single constant does not provide
14525	// enough accuracy on some CPUs.
14526	UseOneConstNR = !Subtarget.needsTwoConstNR();
14527	return DAG.getNode(Opcode: PPCISD::FRSQRTE, DL: SDLoc (Operand), VT, Operand);
14528	}
14529	return SDValue ();
14530	}
14531
14532	SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14533	int Enabled,
14534	int &RefinementSteps) const {
14535	EVT VT = Operand.getValueType();
14536	if ((VT == MVT::f32 && Subtarget.hasFRES()) \|\|
14537	(VT == MVT::f64 && Subtarget.hasFRE()) \|\|
14538	(VT == MVT::v4f32 && Subtarget.hasAltivec()) \|\|
14539	(VT == MVT::v2f64 && Subtarget.hasVSX())) {
14540	if (RefinementSteps == ReciprocalEstimate::Unspecified)
14541	RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14542	return DAG.getNode(Opcode: PPCISD::FRE, DL: SDLoc (Operand), VT, Operand);
14543	}
14544	return SDValue ();
14545	}
14546
14547	unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
14548	// Note: This functionality is used only when unsafe-fp-math is enabled, and
14549	// on cores with reciprocal estimates (which are used when unsafe-fp-math is
14550	// enabled for division), this functionality is redundant with the default
14551	// combiner logic (once the division -> reciprocal/multiply transformation
14552	// has taken place). As a result, this matters more for older cores than for
14553	// newer ones.
14554
14555	// Combine multiple FDIVs with the same divisor into multiple FMULs by the
14556	// reciprocal if there are two or more FDIVs (for embedded cores with only
14557	// one FP pipeline) for three or more FDIVs (for generic OOO cores).
14558	switch (Subtarget.getCPUDirective()) {
14559	default:
14560	return `3`;
14561	case PPC::DIR_440:
14562	case PPC::DIR_A2:
14563	case PPC::DIR_E500:
14564	case PPC::DIR_E500mc:
14565	case PPC::DIR_E5500:
14566	return `2`;
14567	}
14568	}
14569
14570	// isConsecutiveLSLoc needs to work even if all adds have not yet been
14571	// collapsed, and so we need to look through chains of them.
14572	static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
14573	int64_t& Offset, SelectionDAG &DAG) {
14574	if (DAG.isBaseWithConstantOffset(Op: Loc)) {
14575	Base = Loc.getOperand(i: `0`);
14576	Offset += cast<ConstantSDNode>(Val: Loc.getOperand(i: `1`))->getSExtValue();
14577
14578	// The base might itself be a base plus an offset, and if so, accumulate
14579	// that as well.
14580	getBaseWithConstantOffset(Loc: Loc.getOperand(i: `0`), Base, Offset, DAG);
14581	}
14582	}
14583
14584	static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
14585	unsigned Bytes, int Dist,
14586	SelectionDAG &DAG) {
14587	if (VT.getSizeInBits() / `8` != Bytes)
14588	return false;
14589
14590	SDValue BaseLoc = Base->getBasePtr();
14591	if (Loc.getOpcode() == ISD::FrameIndex) {
14592	if (BaseLoc.getOpcode() != ISD::FrameIndex)
14593	return false;
14594	const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
14595	int FI = cast<FrameIndexSDNode>(Val&: Loc)->getIndex();
14596	int BFI = cast<FrameIndexSDNode>(Val&: BaseLoc)->getIndex();
14597	int FS = MFI.getObjectSize(ObjectIdx: FI);
14598	int BFS = MFI.getObjectSize(ObjectIdx: BFI);
14599	if (FS != BFS \|\| FS != (int)Bytes) return false;
14600	return MFI.getObjectOffset(ObjectIdx: FI) == (MFI.getObjectOffset(ObjectIdx: BFI) + Dist*Bytes);
14601	}
14602
14603	SDValue Base1 = Loc, Base2 = BaseLoc;
14604	int64_t Offset1 = `0`, Offset2 = `0`;
14605	getBaseWithConstantOffset(Loc, Base&: Base1, Offset&: Offset1, DAG);
14606	getBaseWithConstantOffset(Loc: BaseLoc, Base&: Base2, Offset&: Offset2, DAG);
14607	if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
14608	return true;
14609
14610	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14611	const GlobalValue GV1 = nullptr*;
14612	const GlobalValue GV2 = nullptr*;
14613	Offset1 = `0`;
14614	Offset2 = `0`;
14615	bool isGA1 = TLI.isGAPlusOffset(N: Loc.getNode(), GA&: GV1, Offset&: Offset1);
14616	bool isGA2 = TLI.isGAPlusOffset(N: BaseLoc.getNode(), GA&: GV2, Offset&: Offset2);
14617	if (isGA1 && isGA2 && GV1 == GV2)
14618	return Offset1 == (Offset2 + Dist*Bytes);
14619	return false;
14620	}
14621
14622	// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
14623	// not enforce equality of the chain operands.
14624	static bool isConsecutiveLS(SDNode N, LSBaseSDNode Base,
14625	unsigned Bytes, int Dist,
14626	SelectionDAG &DAG) {
14627	if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Val: N)) {
14628	EVT VT = LS->getMemoryVT();
14629	SDValue Loc = LS->getBasePtr();
14630	return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
14631	}
14632
14633	if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
14634	EVT VT;
14635	switch (N->getConstantOperandVal(Num: `1`)) {
14636	default: return false;
14637	case Intrinsic::ppc_altivec_lvx:
14638	case Intrinsic::ppc_altivec_lvxl:
14639	case Intrinsic::ppc_vsx_lxvw4x:
14640	case Intrinsic::ppc_vsx_lxvw4x_be:
14641	VT = MVT::v4i32;
14642	break;
14643	case Intrinsic::ppc_vsx_lxvd2x:
14644	case Intrinsic::ppc_vsx_lxvd2x_be:
14645	VT = MVT::v2f64;
14646	break;
14647	case Intrinsic::ppc_altivec_lvebx:
14648	VT = MVT::i8;
14649	break;
14650	case Intrinsic::ppc_altivec_lvehx:
14651	VT = MVT::i16;
14652	break;
14653	case Intrinsic::ppc_altivec_lvewx:
14654	VT = MVT::i32;
14655	break;
14656	}
14657
14658	return isConsecutiveLSLoc(Loc: N->getOperand(Num: `2`), VT, Base, Bytes, Dist, DAG);
14659	}
14660
14661	if (N->getOpcode() == ISD::INTRINSIC_VOID) {
14662	EVT VT;
14663	switch (N->getConstantOperandVal(Num: `1`)) {
14664	default: return false;
14665	case Intrinsic::ppc_altivec_stvx:
14666	case Intrinsic::ppc_altivec_stvxl:
14667	case Intrinsic::ppc_vsx_stxvw4x:
14668	VT = MVT::v4i32;
14669	break;
14670	case Intrinsic::ppc_vsx_stxvd2x:
14671	VT = MVT::v2f64;
14672	break;
14673	case Intrinsic::ppc_vsx_stxvw4x_be:
14674	VT = MVT::v4i32;
14675	break;
14676	case Intrinsic::ppc_vsx_stxvd2x_be:
14677	VT = MVT::v2f64;
14678	break;
14679	case Intrinsic::ppc_altivec_stvebx:
14680	VT = MVT::i8;
14681	break;
14682	case Intrinsic::ppc_altivec_stvehx:
14683	VT = MVT::i16;
14684	break;
14685	case Intrinsic::ppc_altivec_stvewx:
14686	VT = MVT::i32;
14687	break;
14688	}
14689
14690	return isConsecutiveLSLoc(Loc: N->getOperand(Num: `3`), VT, Base, Bytes, Dist, DAG);
14691	}
14692
14693	return false;
14694	}
14695
14696	// Return true is there is a nearyby consecutive load to the one provided
14697	// (regardless of alignment). We search up and down the chain, looking though
14698	// token factors and other loads (but nothing else). As a result, a true result
14699	// indicates that it is safe to create a new consecutive load adjacent to the
14700	// load provided.
14701	static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
14702	SDValue Chain = LD->getChain();
14703	EVT VT = LD->getMemoryVT();
14704
14705	SmallSet<SDNode *, `16`> LoadRoots;
14706	SmallVector<SDNode *, `8`> Queue(`1`, Chain.getNode());
14707	SmallSet<SDNode *, `16`> Visited;
14708
14709	// First, search up the chain, branching to follow all token-factor operands.
14710	// If we find a consecutive load, then we're done, otherwise, record all
14711	// nodes just above the top-level loads and token factors.
14712	while (!Queue.empty()) {
14713	SDNode *ChainNext = Queue.pop_back_val();
14714	if (!Visited.insert(Ptr: ChainNext).second)
14715	continue;
14716
14717	if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(Val: ChainNext)) {
14718	if (isConsecutiveLS(N: ChainLD, Base: LD, Bytes: VT.getStoreSize(), Dist: `1`, DAG))
14719	return true;
14720
14721	if (!Visited.count(Ptr: ChainLD->getChain().getNode()))
14722	Queue.push_back(Elt: ChainLD->getChain().getNode());
14723	} else if (ChainNext->getOpcode() == ISD::TokenFactor) {
14724	for (const SDUse &O : ChainNext->ops())
14725	if (!Visited.count(Ptr: O.getNode()))
14726	Queue.push_back(Elt: O.getNode());
14727	} else
14728	LoadRoots.insert(Ptr: ChainNext);
14729	}
14730
14731	// Second, search down the chain, starting from the top-level nodes recorded
14732	// in the first phase. These top-level nodes are the nodes just above all
14733	// loads and token factors. Starting with their uses, recursively look though
14734	// all loads (just the chain uses) and token factors to find a consecutive
14735	// load.
14736	Visited.clear();
14737	Queue.clear();
14738
14739	for (SDNode *I : LoadRoots) {
14740	Queue.push_back(Elt: I);
14741
14742	while (!Queue.empty()) {
14743	SDNode *LoadRoot = Queue.pop_back_val();
14744	if (!Visited.insert(Ptr: LoadRoot).second)
14745	continue;
14746
14747	if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(Val: LoadRoot))
14748	if (isConsecutiveLS(N: ChainLD, Base: LD, Bytes: VT.getStoreSize(), Dist: `1`, DAG))
14749	return true;
14750
14751	for (SDNode *U : LoadRoot->users())
14752	if (((isa<MemSDNode>(Val: U) &&
14753	cast<MemSDNode>(Val: U)->getChain().getNode() == LoadRoot) \|\|
14754	U->getOpcode() == ISD::TokenFactor) &&
14755	!Visited.count(Ptr: U))
14756	Queue.push_back(Elt: U);
14757	}
14758	}
14759
14760	return false;
14761	}
14762
14763	/// This function is called when we have proved that a SETCC node can be replaced
14764	/// by subtraction (and other supporting instructions) so that the result of
14765	/// comparison is kept in a GPR instead of CR. This function is purely for
14766	/// codegen purposes and has some flags to guide the codegen process.
14767	static SDValue generateEquivalentSub(SDNode N, int* Size, bool Complement,
14768	bool Swap, SDLoc &DL, SelectionDAG &DAG) {
14769	assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14770
14771	// Zero extend the operands to the largest legal integer. Originally, they
14772	// must be of a strictly smaller size.
14773	auto Op0 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, N1: N->getOperand(Num: `0`),
14774	N2: DAG.getConstant(Val: Size, DL, VT: MVT::i32));
14775	auto Op1 = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, N1: N->getOperand(Num: `1`),
14776	N2: DAG.getConstant(Val: Size, DL, VT: MVT::i32));
14777
14778	// Swap if needed. Depends on the condition code.
14779	if (Swap)
14780	std::swap(a&: Op0, b&: Op1);
14781
14782	// Subtract extended integers.
14783	auto SubNode = DAG.getNode(Opcode: ISD::SUB, DL, VT: MVT::i64, N1: Op0, N2: Op1);
14784
14785	// Move the sign bit to the least significant position and zero out the rest.
14786	// Now the least significant bit carries the result of original comparison.
14787	auto Shifted = DAG.getNode(Opcode: ISD::SRL, DL, VT: MVT::i64, N1: SubNode,
14788	N2: DAG.getConstant(Val: Size - `1`, DL, VT: MVT::i32));
14789	auto Final = Shifted;
14790
14791	// Complement the result if needed. Based on the condition code.
14792	if (Complement)
14793	Final = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i64, N1: Shifted,
14794	N2: DAG.getConstant(Val: `1`, DL, VT: MVT::i64));
14795
14796	return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i1, Operand: Final);
14797	}
14798
14799	SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
14800	DAGCombinerInfo &DCI) const {
14801	assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14802
14803	SelectionDAG &DAG = DCI.DAG;
14804	SDLoc DL(N);
14805
14806	// Size of integers being compared has a critical role in the following
14807	// analysis, so we prefer to do this when all types are legal.
14808	if (!DCI.isAfterLegalizeDAG())
14809	return SDValue ();
14810
14811	// If all users of SETCC extend its value to a legal integer type
14812	// then we replace SETCC with a subtraction
14813	for (const SDNode *U : N->users())
14814	if (U->getOpcode() != ISD::ZERO_EXTEND)
14815	return SDValue ();
14816
14817	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `2`))->get();
14818	auto OpSize = N->getOperand(Num: `0`).getValueSizeInBits();
14819
14820	unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
14821
14822	if (OpSize < Size) {
14823	switch (CC) {
14824	default: break;
14825	case ISD::SETULT:
14826	return generateEquivalentSub(N, Size, Complement: false, Swap: false, DL, DAG);
14827	case ISD::SETULE:
14828	return generateEquivalentSub(N, Size, Complement: true, Swap: true, DL, DAG);
14829	case ISD::SETUGT:
14830	return generateEquivalentSub(N, Size, Complement: false, Swap: true, DL, DAG);
14831	case ISD::SETUGE:
14832	return generateEquivalentSub(N, Size, Complement: true, Swap: false, DL, DAG);
14833	}
14834	}
14835
14836	return SDValue ();
14837	}
14838
14839	SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
14840	DAGCombinerInfo &DCI) const {
14841	SelectionDAG &DAG = DCI.DAG;
14842	SDLoc dl(N);
14843
14844	assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
14845	// If we're tracking CR bits, we need to be careful that we don't have:
14846	// trunc(binary-ops(zext(x), zext(y)))
14847	// or
14848	// trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
14849	// such that we're unnecessarily moving things into GPRs when it would be
14850	// better to keep them in CR bits.
14851
14852	// Note that trunc here can be an actual i1 trunc, or can be the effective
14853	// truncation that comes from a setcc or select_cc.
14854	if (N->getOpcode() == ISD::TRUNCATE &&
14855	N->getValueType(ResNo: `0`) != MVT::i1)
14856	return SDValue ();
14857
14858	if (N->getOperand(Num: `0`).getValueType() != MVT::i32 &&
14859	N->getOperand(Num: `0`).getValueType() != MVT::i64)
14860	return SDValue ();
14861
14862	if (N->getOpcode() == ISD::SETCC \|\|
14863	N->getOpcode() == ISD::SELECT_CC) {
14864	// If we're looking at a comparison, then we need to make sure that the
14865	// high bits (all except for the first) don't matter the result.
14866	ISD::CondCode CC =
14867	cast<CondCodeSDNode>(Val: N->getOperand(
14868	Num: N->getOpcode() == ISD::SETCC ? `2` : `4`))->get();
14869	unsigned OpBits = N->getOperand(Num: `0`).getValueSizeInBits();
14870
14871	if (ISD::isSignedIntSetCC(Code: CC)) {
14872	if (DAG.ComputeNumSignBits(Op: N->getOperand(Num: `0`)) != OpBits \|\|
14873	DAG.ComputeNumSignBits(Op: N->getOperand(Num: `1`)) != OpBits)
14874	return SDValue ();
14875	} else if (ISD::isUnsignedIntSetCC(Code: CC)) {
14876	if (!DAG.MaskedValueIsZero(Op: N->getOperand(Num: `0`),
14877	Mask: APInt::getHighBitsSet(numBits: OpBits, hiBitsSet: OpBits-`1`)) \|\|
14878	!DAG.MaskedValueIsZero(Op: N->getOperand(Num: `1`),
14879	Mask: APInt::getHighBitsSet(numBits: OpBits, hiBitsSet: OpBits-`1`)))
14880	return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
14881	: SDValue ());
14882	} else {
14883	// This is neither a signed nor an unsigned comparison, just make sure
14884	// that the high bits are equal.
14885	KnownBits Op1Known = DAG.computeKnownBits(Op: N->getOperand(Num: `0`));
14886	KnownBits Op2Known = DAG.computeKnownBits(Op: N->getOperand(Num: `1`));
14887
14888	// We don't really care about what is known about the first bit (if
14889	// anything), so pretend that it is known zero for both to ensure they can
14890	// be compared as constants.
14891	Op1Known.Zero.setBit(`0`); Op1Known.One.clearBit(BitPosition: `0`);
14892	Op2Known.Zero.setBit(`0`); Op2Known.One.clearBit(BitPosition: `0`);
14893
14894	if (!Op1Known.isConstant() \|\| !Op2Known.isConstant() \|\|
14895	Op1Known.getConstant() != Op2Known.getConstant())
14896	return SDValue ();
14897	}
14898	}
14899
14900	// We now know that the higher-order bits are irrelevant, we just need to
14901	// make sure that all of the intermediate operations are bit operations, and
14902	// all inputs are extensions.
14903	if (N->getOperand(Num: `0`).getOpcode() != ISD::AND &&
14904	N->getOperand(Num: `0`).getOpcode() != ISD::OR &&
14905	N->getOperand(Num: `0`).getOpcode() != ISD::XOR &&
14906	N->getOperand(Num: `0`).getOpcode() != ISD::SELECT &&
14907	N->getOperand(Num: `0`).getOpcode() != ISD::SELECT_CC &&
14908	N->getOperand(Num: `0`).getOpcode() != ISD::TRUNCATE &&
14909	N->getOperand(Num: `0`).getOpcode() != ISD::SIGN_EXTEND &&
14910	N->getOperand(Num: `0`).getOpcode() != ISD::ZERO_EXTEND &&
14911	N->getOperand(Num: `0`).getOpcode() != ISD::ANY_EXTEND)
14912	return SDValue ();
14913
14914	if ((N->getOpcode() == ISD::SETCC \|\| N->getOpcode() == ISD::SELECT_CC) &&
14915	N->getOperand(Num: `1`).getOpcode() != ISD::AND &&
14916	N->getOperand(Num: `1`).getOpcode() != ISD::OR &&
14917	N->getOperand(Num: `1`).getOpcode() != ISD::XOR &&
14918	N->getOperand(Num: `1`).getOpcode() != ISD::SELECT &&
14919	N->getOperand(Num: `1`).getOpcode() != ISD::SELECT_CC &&
14920	N->getOperand(Num: `1`).getOpcode() != ISD::TRUNCATE &&
14921	N->getOperand(Num: `1`).getOpcode() != ISD::SIGN_EXTEND &&
14922	N->getOperand(Num: `1`).getOpcode() != ISD::ZERO_EXTEND &&
14923	N->getOperand(Num: `1`).getOpcode() != ISD::ANY_EXTEND)
14924	return SDValue ();
14925
14926	SmallVector<SDValue, `4`> Inputs;
14927	SmallVector<SDValue, `8`> BinOps, PromOps;
14928	SmallPtrSet<SDNode *, `16`> Visited;
14929
14930	for (unsigned i = `0`; i < `2`; ++i) {
14931	if (((N->getOperand(Num: i).getOpcode() == ISD::SIGN_EXTEND \|\|
14932	N->getOperand(Num: i).getOpcode() == ISD::ZERO_EXTEND \|\|
14933	N->getOperand(Num: i).getOpcode() == ISD::ANY_EXTEND) &&
14934	N->getOperand(Num: i).getOperand(i: `0`).getValueType() == MVT::i1) \|\|
14935	isa<ConstantSDNode>(Val: N->getOperand(Num: i)))
14936	Inputs.push_back(Elt: N->getOperand(Num: i));
14937	else
14938	BinOps.push_back(Elt: N->getOperand(Num: i));
14939
14940	if (N->getOpcode() == ISD::TRUNCATE)
14941	break;
14942	}
14943
14944	// Visit all inputs, collect all binary operations (and, or, xor and
14945	// select) that are all fed by extensions.
14946	while (!BinOps.empty()) {
14947	SDValue BinOp = BinOps.pop_back_val();
14948
14949	if (!Visited.insert(Ptr: BinOp.getNode()).second)
14950	continue;
14951
14952	PromOps.push_back(Elt: BinOp);
14953
14954	for (unsigned i = `0`, ie = BinOp.getNumOperands(); i != ie; ++i) {
14955	// The condition of the select is not promoted.
14956	if (BinOp.getOpcode() == ISD::SELECT && i == `0`)
14957	continue;
14958	if (BinOp.getOpcode() == ISD::SELECT_CC && i != `2` && i != `3`)
14959	continue;
14960
14961	if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND \|\|
14962	BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND \|\|
14963	BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14964	BinOp.getOperand(i).getOperand(i: `0`).getValueType() == MVT::i1) \|\|
14965	isa<ConstantSDNode>(Val: BinOp.getOperand(i))) {
14966	Inputs.push_back(Elt: BinOp.getOperand(i));
14967	} else if (BinOp.getOperand(i).getOpcode() == ISD::AND \|\|
14968	BinOp.getOperand(i).getOpcode() == ISD::OR \|\|
14969	BinOp.getOperand(i).getOpcode() == ISD::XOR \|\|
14970	BinOp.getOperand(i).getOpcode() == ISD::SELECT \|\|
14971	BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC \|\|
14972	BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE \|\|
14973	BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND \|\|
14974	BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND \|\|
14975	BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
14976	BinOps.push_back(Elt: BinOp.getOperand(i));
14977	} else {
14978	// We have an input that is not an extension or another binary
14979	// operation; we'll abort this transformation.
14980	return SDValue ();
14981	}
14982	}
14983	}
14984
14985	// Make sure that this is a self-contained cluster of operations (which
14986	// is not quite the same thing as saying that everything has only one
14987	// use).
14988	for (unsigned i = `0`, ie = Inputs.size(); i != ie; ++i) {
14989	if (isa<ConstantSDNode>(Val: Inputs [i]))
14990	continue;
14991
14992	for (const SDNode *User : Inputs [i].getNode()->users()) {
14993	if (User != N && !Visited.count(Ptr: User))
14994	return SDValue ();
14995
14996	// Make sure that we're not going to promote the non-output-value
14997	// operand(s) or SELECT or SELECT_CC.
14998	// FIXME: Although we could sometimes handle this, and it does occur in
14999	// practice that one of the condition inputs to the select is also one of
15000	// the outputs, we currently can't deal with this.
15001	if (User->getOpcode() == ISD::SELECT) {
15002	if (User->getOperand(Num: `0`) == Inputs [i])
15003	return SDValue ();
15004	} else if (User->getOpcode() == ISD::SELECT_CC) {
15005	if (User->getOperand(Num: `0`) == Inputs [i] \|\|
15006	User->getOperand(Num: `1`) == Inputs [i])
15007	return SDValue ();
15008	}
15009	}
15010	}
15011
15012	for (unsigned i = `0`, ie = PromOps.size(); i != ie; ++i) {
15013	for (const SDNode *User : PromOps [i].getNode()->users()) {
15014	if (User != N && !Visited.count(Ptr: User))
15015	return SDValue ();
15016
15017	// Make sure that we're not going to promote the non-output-value
15018	// operand(s) or SELECT or SELECT_CC.
15019	// FIXME: Although we could sometimes handle this, and it does occur in
15020	// practice that one of the condition inputs to the select is also one of
15021	// the outputs, we currently can't deal with this.
15022	if (User->getOpcode() == ISD::SELECT) {
15023	if (User->getOperand(Num: `0`) == PromOps [i])
15024	return SDValue ();
15025	} else if (User->getOpcode() == ISD::SELECT_CC) {
15026	if (User->getOperand(Num: `0`) == PromOps [i] \|\|
15027	User->getOperand(Num: `1`) == PromOps [i])
15028	return SDValue ();
15029	}
15030	}
15031	}
15032
15033	// Replace all inputs with the extension operand.
15034	for (unsigned i = `0`, ie = Inputs.size(); i != ie; ++i) {
15035	// Constants may have users outside the cluster of to-be-promoted nodes,
15036	// and so we need to replace those as we do the promotions.
15037	if (isa<ConstantSDNode>(Val: Inputs [i]))
15038	continue;
15039	else
15040	DAG.ReplaceAllUsesOfValueWith(From: Inputs [i], To: Inputs [i].getOperand(i: `0`));
15041	}
15042
15043	std::list<HandleSDNode> PromOpHandles;
15044	for (auto &PromOp : PromOps)
15045	PromOpHandles.emplace_back(args&: PromOp);
15046
15047	// Replace all operations (these are all the same, but have a different
15048	// (i1) return type). DAG.getNode will validate that the types of
15049	// a binary operator match, so go through the list in reverse so that
15050	// we've likely promoted both operands first. Any intermediate truncations or
15051	// extensions disappear.
15052	while (!PromOpHandles.empty()) {
15053	SDValue PromOp = PromOpHandles.back().getValue();
15054	PromOpHandles.pop_back();
15055
15056	if (PromOp.getOpcode() == ISD::TRUNCATE \|\|
15057	PromOp.getOpcode() == ISD::SIGN_EXTEND \|\|
15058	PromOp.getOpcode() == ISD::ZERO_EXTEND \|\|
15059	PromOp.getOpcode() == ISD::ANY_EXTEND) {
15060	if (!isa<ConstantSDNode>(Val: PromOp.getOperand(i: `0`)) &&
15061	PromOp.getOperand(i: `0`).getValueType() != MVT::i1) {
15062	// The operand is not yet ready (see comment below).
15063	PromOpHandles.emplace_front(args&: PromOp);
15064	continue;
15065	}
15066
15067	SDValue RepValue = PromOp.getOperand(i: `0`);
15068	if (isa<ConstantSDNode>(Val: RepValue))
15069	RepValue = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: RepValue);
15070
15071	DAG.ReplaceAllUsesOfValueWith(From: PromOp, To: RepValue);
15072	continue;
15073	}
15074
15075	unsigned C;
15076	switch (PromOp.getOpcode()) {
15077	default: C = `0`; break;
15078	case ISD::SELECT: C = `1`; break;
15079	case ISD::SELECT_CC: C = `2`; break;
15080	}
15081
15082	if ((!isa<ConstantSDNode>(Val: PromOp.getOperand(i: C)) &&
15083	PromOp.getOperand(i: C).getValueType() != MVT::i1) \|\|
15084	(!isa<ConstantSDNode>(Val: PromOp.getOperand(i: C+`1`)) &&
15085	PromOp.getOperand(i: C+`1`).getValueType() != MVT::i1)) {
15086	// The to-be-promoted operands of this node have not yet been
15087	// promoted (this should be rare because we're going through the
15088	// list backward, but if one of the operands has several users in
15089	// this cluster of to-be-promoted nodes, it is possible).
15090	PromOpHandles.emplace_front(args&: PromOp);
15091	continue;
15092	}
15093
15094	SmallVector<SDValue, `3`> Ops(PromOp.getNode()->ops());
15095
15096	// If there are any constant inputs, make sure they're replaced now.
15097	for (unsigned i = `0`; i < `2`; ++i)
15098	if (isa<ConstantSDNode>(Val: Ops [C+i]))
15099	Ops [C+i] = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i1, Operand: Ops [C+i]);
15100
15101	DAG.ReplaceAllUsesOfValueWith(From: PromOp,
15102	To: DAG.getNode(Opcode: PromOp.getOpcode(), DL: dl, VT: MVT::i1, Ops));
15103	}
15104
15105	// Now we're left with the initial truncation itself.
15106	if (N->getOpcode() == ISD::TRUNCATE)
15107	return N->getOperand(Num: `0`);
15108
15109	// Otherwise, this is a comparison. The operands to be compared have just
15110	// changed type (to i1), but everything else is the same.
15111	return SDValue (N, `0`);
15112	}
15113
15114	SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
15115	DAGCombinerInfo &DCI) const {
15116	SelectionDAG &DAG = DCI.DAG;
15117	SDLoc dl(N);
15118
15119	// If we're tracking CR bits, we need to be careful that we don't have:
15120	// zext(binary-ops(trunc(x), trunc(y)))
15121	// or
15122	// zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
15123	// such that we're unnecessarily moving things into CR bits that can more
15124	// efficiently stay in GPRs. Note that if we're not certain that the high
15125	// bits are set as required by the final extension, we still may need to do
15126	// some masking to get the proper behavior.
15127
15128	// This same functionality is important on PPC64 when dealing with
15129	// 32-to-64-bit extensions; these occur often when 32-bit values are used as
15130	// the return values of functions. Because it is so similar, it is handled
15131	// here as well.
15132
15133	if (N->getValueType(ResNo: `0`) != MVT::i32 &&
15134	N->getValueType(ResNo: `0`) != MVT::i64)
15135	return SDValue ();
15136
15137	if (!((N->getOperand(Num: `0`).getValueType() == MVT::i1 && Subtarget.useCRBits()) \|\|
15138	(N->getOperand(Num: `0`).getValueType() == MVT::i32 && Subtarget.isPPC64())))
15139	return SDValue ();
15140
15141	if (N->getOperand(Num: `0`).getOpcode() != ISD::AND &&
15142	N->getOperand(Num: `0`).getOpcode() != ISD::OR &&
15143	N->getOperand(Num: `0`).getOpcode() != ISD::XOR &&
15144	N->getOperand(Num: `0`).getOpcode() != ISD::SELECT &&
15145	N->getOperand(Num: `0`).getOpcode() != ISD::SELECT_CC)
15146	return SDValue ();
15147
15148	SmallVector<SDValue, `4`> Inputs;
15149	SmallVector<SDValue, `8`> BinOps(`1`, N->getOperand(Num: `0`)), PromOps;
15150	SmallPtrSet<SDNode *, `16`> Visited;
15151
15152	// Visit all inputs, collect all binary operations (and, or, xor and
15153	// select) that are all fed by truncations.
15154	while (!BinOps.empty()) {
15155	SDValue BinOp = BinOps.pop_back_val();
15156
15157	if (!Visited.insert(Ptr: BinOp.getNode()).second)
15158	continue;
15159
15160	PromOps.push_back(Elt: BinOp);
15161
15162	for (unsigned i = `0`, ie = BinOp.getNumOperands(); i != ie; ++i) {
15163	// The condition of the select is not promoted.
15164	if (BinOp.getOpcode() == ISD::SELECT && i == `0`)
15165	continue;
15166	if (BinOp.getOpcode() == ISD::SELECT_CC && i != `2` && i != `3`)
15167	continue;
15168
15169	if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE \|\|
15170	isa<ConstantSDNode>(Val: BinOp.getOperand(i))) {
15171	Inputs.push_back(Elt: BinOp.getOperand(i));
15172	} else if (BinOp.getOperand(i).getOpcode() == ISD::AND \|\|
15173	BinOp.getOperand(i).getOpcode() == ISD::OR \|\|
15174	BinOp.getOperand(i).getOpcode() == ISD::XOR \|\|
15175	BinOp.getOperand(i).getOpcode() == ISD::SELECT \|\|
15176	BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
15177	BinOps.push_back(Elt: BinOp.getOperand(i));
15178	} else {
15179	// We have an input that is not a truncation or another binary
15180	// operation; we'll abort this transformation.
15181	return SDValue ();
15182	}
15183	}
15184	}
15185
15186	// The operands of a select that must be truncated when the select is
15187	// promoted because the operand is actually part of the to-be-promoted set.
15188	DenseMap<SDNode *, EVT> SelectTruncOp[`2`];
15189
15190	// Make sure that this is a self-contained cluster of operations (which
15191	// is not quite the same thing as saying that everything has only one
15192	// use).
15193	for (unsigned i = `0`, ie = Inputs.size(); i != ie; ++i) {
15194	if (isa<ConstantSDNode>(Val: Inputs [i]))
15195	continue;
15196
15197	for (SDNode *User : Inputs [i].getNode()->users()) {
15198	if (User != N && !Visited.count(Ptr: User))
15199	return SDValue ();
15200
15201	// If we're going to promote the non-output-value operand(s) or SELECT or
15202	// SELECT_CC, record them for truncation.
15203	if (User->getOpcode() == ISD::SELECT) {
15204	if (User->getOperand(Num: `0`) == Inputs [i])
15205	SelectTruncOp[`0`].insert(KV: std::make_pair(x&: User,
15206	y: User->getOperand(Num: `0`).getValueType()));
15207	} else if (User->getOpcode() == ISD::SELECT_CC) {
15208	if (User->getOperand(Num: `0`) == Inputs [i])
15209	SelectTruncOp[`0`].insert(KV: std::make_pair(x&: User,
15210	y: User->getOperand(Num: `0`).getValueType()));
15211	if (User->getOperand(Num: `1`) == Inputs [i])
15212	SelectTruncOp[`1`].insert(KV: std::make_pair(x&: User,
15213	y: User->getOperand(Num: `1`).getValueType()));
15214	}
15215	}
15216	}
15217
15218	for (unsigned i = `0`, ie = PromOps.size(); i != ie; ++i) {
15219	for (SDNode *User : PromOps [i].getNode()->users()) {
15220	if (User != N && !Visited.count(Ptr: User))
15221	return SDValue ();
15222
15223	// If we're going to promote the non-output-value operand(s) or SELECT or
15224	// SELECT_CC, record them for truncation.
15225	if (User->getOpcode() == ISD::SELECT) {
15226	if (User->getOperand(Num: `0`) == PromOps [i])
15227	SelectTruncOp[`0`].insert(KV: std::make_pair(x&: User,
15228	y: User->getOperand(Num: `0`).getValueType()));
15229	} else if (User->getOpcode() == ISD::SELECT_CC) {
15230	if (User->getOperand(Num: `0`) == PromOps [i])
15231	SelectTruncOp[`0`].insert(KV: std::make_pair(x&: User,
15232	y: User->getOperand(Num: `0`).getValueType()));
15233	if (User->getOperand(Num: `1`) == PromOps [i])
15234	SelectTruncOp[`1`].insert(KV: std::make_pair(x&: User,
15235	y: User->getOperand(Num: `1`).getValueType()));
15236	}
15237	}
15238	}
15239
15240	unsigned PromBits = N->getOperand(Num: `0`).getValueSizeInBits();
15241	bool ReallyNeedsExt = false;
15242	if (N->getOpcode() != ISD::ANY_EXTEND) {
15243	// If all of the inputs are not already sign/zero extended, then
15244	// we'll still need to do that at the end.
15245	for (unsigned i = `0`, ie = Inputs.size(); i != ie; ++i) {
15246	if (isa<ConstantSDNode>(Val: Inputs [i]))
15247	continue;
15248
15249	unsigned OpBits =
15250	Inputs [i].getOperand(i: `0`).getValueSizeInBits();
15251	assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
15252
15253	if ((N->getOpcode() == ISD::ZERO_EXTEND &&
15254	!DAG.MaskedValueIsZero(Op: Inputs [i].getOperand(i: `0`),
15255	Mask: APInt::getHighBitsSet(numBits: OpBits,
15256	hiBitsSet: OpBits-PromBits))) \|\|
15257	(N->getOpcode() == ISD::SIGN_EXTEND &&
15258	DAG.ComputeNumSignBits(Op: Inputs [i].getOperand(i: `0`)) <
15259	(OpBits-(PromBits-`1`)))) {
15260	ReallyNeedsExt = true;
15261	break;
15262	}
15263	}
15264	}
15265
15266	// Replace all inputs, either with the truncation operand, or a
15267	// truncation or extension to the final output type.
15268	for (unsigned i = `0`, ie = Inputs.size(); i != ie; ++i) {
15269	// Constant inputs need to be replaced with the to-be-promoted nodes that
15270	// use them because they might have users outside of the cluster of
15271	// promoted nodes.
15272	if (isa<ConstantSDNode>(Val: Inputs [i]))
15273	continue;
15274
15275	SDValue InSrc = Inputs [i].getOperand(i: `0`);
15276	if (Inputs [i].getValueType() == N->getValueType(ResNo: `0`))
15277	DAG.ReplaceAllUsesOfValueWith(From: Inputs [i], To: InSrc);
15278	else if (N->getOpcode() == ISD::SIGN_EXTEND)
15279	DAG.ReplaceAllUsesOfValueWith(From: Inputs [i],
15280	To: DAG.getSExtOrTrunc(Op: InSrc, DL: dl, VT: N->getValueType(ResNo: `0`)));
15281	else if (N->getOpcode() == ISD::ZERO_EXTEND)
15282	DAG.ReplaceAllUsesOfValueWith(From: Inputs [i],
15283	To: DAG.getZExtOrTrunc(Op: InSrc, DL: dl, VT: N->getValueType(ResNo: `0`)));
15284	else
15285	DAG.ReplaceAllUsesOfValueWith(From: Inputs [i],
15286	To: DAG.getAnyExtOrTrunc(Op: InSrc, DL: dl, VT: N->getValueType(ResNo: `0`)));
15287	}
15288
15289	std::list<HandleSDNode> PromOpHandles;
15290	for (auto &PromOp : PromOps)
15291	PromOpHandles.emplace_back(args&: PromOp);
15292
15293	// Replace all operations (these are all the same, but have a different
15294	// (promoted) return type). DAG.getNode will validate that the types of
15295	// a binary operator match, so go through the list in reverse so that
15296	// we've likely promoted both operands first.
15297	while (!PromOpHandles.empty()) {
15298	SDValue PromOp = PromOpHandles.back().getValue();
15299	PromOpHandles.pop_back();
15300
15301	unsigned C;
15302	switch (PromOp.getOpcode()) {
15303	default: C = `0`; break;
15304	case ISD::SELECT: C = `1`; break;
15305	case ISD::SELECT_CC: C = `2`; break;
15306	}
15307
15308	if ((!isa<ConstantSDNode>(Val: PromOp.getOperand(i: C)) &&
15309	PromOp.getOperand(i: C).getValueType() != N->getValueType(ResNo: `0`)) \|\|
15310	(!isa<ConstantSDNode>(Val: PromOp.getOperand(i: C+`1`)) &&
15311	PromOp.getOperand(i: C+`1`).getValueType() != N->getValueType(ResNo: `0`))) {
15312	// The to-be-promoted operands of this node have not yet been
15313	// promoted (this should be rare because we're going through the
15314	// list backward, but if one of the operands has several users in
15315	// this cluster of to-be-promoted nodes, it is possible).
15316	PromOpHandles.emplace_front(args&: PromOp);
15317	continue;
15318	}
15319
15320	// For SELECT and SELECT_CC nodes, we do a similar check for any
15321	// to-be-promoted comparison inputs.
15322	if (PromOp.getOpcode() == ISD::SELECT \|\|
15323	PromOp.getOpcode() == ISD::SELECT_CC) {
15324	if ((SelectTruncOp[`0`].count(Val: PromOp.getNode()) &&
15325	PromOp.getOperand(i: `0`).getValueType() != N->getValueType(ResNo: `0`)) \|\|
15326	(SelectTruncOp[`1`].count(Val: PromOp.getNode()) &&
15327	PromOp.getOperand(i: `1`).getValueType() != N->getValueType(ResNo: `0`))) {
15328	PromOpHandles.emplace_front(args&: PromOp);
15329	continue;
15330	}
15331	}
15332
15333	SmallVector<SDValue, `3`> Ops(PromOp.getNode()->ops());
15334
15335	// If this node has constant inputs, then they'll need to be promoted here.
15336	for (unsigned i = `0`; i < `2`; ++i) {
15337	if (!isa<ConstantSDNode>(Val: Ops [C+i]))
15338	continue;
15339	if (Ops [C+i].getValueType() == N->getValueType(ResNo: `0`))
15340	continue;
15341
15342	if (N->getOpcode() == ISD::SIGN_EXTEND)
15343	Ops [C+i] = DAG.getSExtOrTrunc(Op: Ops [C+i], DL: dl, VT: N->getValueType(ResNo: `0`));
15344	else if (N->getOpcode() == ISD::ZERO_EXTEND)
15345	Ops [C+i] = DAG.getZExtOrTrunc(Op: Ops [C+i], DL: dl, VT: N->getValueType(ResNo: `0`));
15346	else
15347	Ops [C+i] = DAG.getAnyExtOrTrunc(Op: Ops [C+i], DL: dl, VT: N->getValueType(ResNo: `0`));
15348	}
15349
15350	// If we've promoted the comparison inputs of a SELECT or SELECT_CC,
15351	// truncate them again to the original value type.
15352	if (PromOp.getOpcode() == ISD::SELECT \|\|
15353	PromOp.getOpcode() == ISD::SELECT_CC) {
15354	auto SI0 = SelectTruncOp[`0`].find(Val: PromOp.getNode());
15355	if (SI0 != SelectTruncOp[`0`].end())
15356	Ops [`0`] = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SI0 ->second, Operand: Ops [`0`]);
15357	auto SI1 = SelectTruncOp[`1`].find(Val: PromOp.getNode());
15358	if (SI1 != SelectTruncOp[`1`].end())
15359	Ops [`1`] = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SI1 ->second, Operand: Ops [`1`]);
15360	}
15361
15362	DAG.ReplaceAllUsesOfValueWith(From: PromOp,
15363	To: DAG.getNode(Opcode: PromOp.getOpcode(), DL: dl, VT: N->getValueType(ResNo: `0`), Ops));
15364	}
15365
15366	// Now we're left with the initial extension itself.
15367	if (!ReallyNeedsExt)
15368	return N->getOperand(Num: `0`);
15369
15370	// To zero extend, just mask off everything except for the first bit (in the
15371	// i1 case).
15372	if (N->getOpcode() == ISD::ZERO_EXTEND)
15373	return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `0`),
15374	N2: DAG.getConstant(Val: APInt::getLowBitsSet(
15375	numBits: N->getValueSizeInBits(ResNo: `0`), loBitsSet: PromBits),
15376	DL: dl, VT: N->getValueType(ResNo: `0`)));
15377
15378	assert(N->getOpcode() == ISD::SIGN_EXTEND &&
15379	"Invalid extension type");
15380	EVT ShiftAmountTy = getShiftAmountTy(LHSTy: N->getValueType(ResNo: `0`), DL: DAG.getDataLayout());
15381	SDValue ShiftCst =
15382	DAG.getConstant(Val: N->getValueSizeInBits(ResNo: `0`) - PromBits, DL: dl, VT: ShiftAmountTy);
15383	return DAG.getNode(
15384	Opcode: ISD::SRA, DL: dl, VT: N->getValueType(ResNo: `0`),
15385	N1: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `0`), N2: ShiftCst),
15386	N2: ShiftCst);
15387	}
15388
15389	SDValue PPCTargetLowering::combineSetCC(SDNode *N,
15390	DAGCombinerInfo &DCI) const {
15391	assert(N->getOpcode() == ISD::SETCC &&
15392	"Should be called with a SETCC node");
15393
15394	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `2`))->get();
15395	if (CC == ISD::SETNE \|\| CC == ISD::SETEQ) {
15396	SDValue LHS = N->getOperand(Num: `0`);
15397	SDValue RHS = N->getOperand(Num: `1`);
15398
15399	// If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
15400	if (LHS.getOpcode() == ISD::SUB && isNullConstant(V: LHS.getOperand(i: `0`)) &&
15401	LHS.hasOneUse())
15402	std::swap(a&: LHS, b&: RHS);
15403
15404	// x == 0-y --> x+y == 0
15405	// x != 0-y --> x+y != 0
15406	if (RHS.getOpcode() == ISD::SUB && isNullConstant(V: RHS.getOperand(i: `0`)) &&
15407	RHS.hasOneUse()) {
15408	SDLoc DL(N);
15409	SelectionDAG &DAG = DCI.DAG;
15410	EVT VT = N->getValueType(ResNo: `0`);
15411	EVT OpVT = LHS.getValueType();
15412	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: OpVT, N1: LHS, N2: RHS.getOperand(i: `1`));
15413	return DAG.getSetCC(DL, VT, LHS: Add, RHS: DAG.getConstant(Val: `0`, DL, VT: OpVT), Cond: CC);
15414	}
15415	}
15416
15417	return DAGCombineTruncBoolExt(N, DCI);
15418	}
15419
15420	// Is this an extending load from an f32 to an f64?
15421	static bool isFPExtLoad(SDValue Op) {
15422	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: Op.getNode()))
15423	return LD->getExtensionType() == ISD::EXTLOAD &&
15424	Op.getValueType() == MVT::f64;
15425	return false;
15426	}
15427
15428	/// Reduces the number of fp-to-int conversion when building a vector.
15429	///
15430	/// If this vector is built out of floating to integer conversions,
15431	/// transform it to a vector built out of floating point values followed by a
15432	/// single floating to integer conversion of the vector.
15433	/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
15434	/// becomes (fptosi (build_vector ($A, $B, ...)))
15435	SDValue PPCTargetLowering::
15436	combineElementTruncationToVectorTruncation(SDNode *N,
15437	DAGCombinerInfo &DCI) const {
15438	assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15439	"Should be called with a BUILD_VECTOR node");
15440
15441	SelectionDAG &DAG = DCI.DAG;
15442	SDLoc dl(N);
15443
15444	SDValue FirstInput = N->getOperand(Num: `0`);
15445	assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
15446	"The input operand must be an fp-to-int conversion.");
15447
15448	// This combine happens after legalization so the fp_to_[su]i nodes are
15449	// already converted to PPCSISD nodes.
15450	unsigned FirstConversion = FirstInput.getOperand(i: `0`).getOpcode();
15451	if (FirstConversion == PPCISD::FCTIDZ \|\|
15452	FirstConversion == PPCISD::FCTIDUZ \|\|
15453	FirstConversion == PPCISD::FCTIWZ \|\|
15454	FirstConversion == PPCISD::FCTIWUZ) {
15455	bool IsSplat = true;
15456	bool Is32Bit = FirstConversion == PPCISD::FCTIWZ \|\|
15457	FirstConversion == PPCISD::FCTIWUZ;
15458	EVT SrcVT = FirstInput.getOperand(i: `0`).getValueType();
15459	SmallVector<SDValue, `4`> Ops;
15460	EVT TargetVT = N->getValueType(ResNo: `0`);
15461	for (int i = `0`, e = N->getNumOperands(); i < e; ++i) {
15462	SDValue NextOp = N->getOperand(Num: i);
15463	if (NextOp.getOpcode() != PPCISD::MFVSR)
15464	return SDValue ();
15465	unsigned NextConversion = NextOp.getOperand(i: `0`).getOpcode();
15466	if (NextConversion != FirstConversion)
15467	return SDValue ();
15468	// If we are converting to 32-bit integers, we need to add an FP_ROUND.
15469	// This is not valid if the input was originally double precision. It is
15470	// also not profitable to do unless this is an extending load in which
15471	// case doing this combine will allow us to combine consecutive loads.
15472	if (Is32Bit && !isFPExtLoad(Op: NextOp.getOperand(i: `0`).getOperand(i: `0`)))
15473	return SDValue ();
15474	if (N->getOperand(Num: i) != FirstInput)
15475	IsSplat = false;
15476	}
15477
15478	// If this is a splat, we leave it as-is since there will be only a single
15479	// fp-to-int conversion followed by a splat of the integer. This is better
15480	// for 32-bit and smaller ints and neutral for 64-bit ints.
15481	if (IsSplat)
15482	return SDValue ();
15483
15484	// Now that we know we have the right type of node, get its operands
15485	for (int i = `0`, e = N->getNumOperands(); i < e; ++i) {
15486	SDValue In = N->getOperand(Num: i).getOperand(i: `0`);
15487	if (Is32Bit) {
15488	// For 32-bit values, we need to add an FP_ROUND node (if we made it
15489	// here, we know that all inputs are extending loads so this is safe).
15490	if (In.isUndef())
15491	Ops.push_back(Elt: DAG.getUNDEF(VT: SrcVT));
15492	else {
15493	SDValue Trunc =
15494	DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: MVT::f32, N1: In.getOperand(i: `0`),
15495	N2: DAG.getIntPtrConstant(Val: `1`, DL: dl, /isTarget=/true));
15496	Ops.push_back(Elt: Trunc);
15497	}
15498	} else
15499	Ops.push_back(Elt: In.isUndef() ? DAG.getUNDEF(VT: SrcVT) : In.getOperand(i: `0`));
15500	}
15501
15502	unsigned Opcode;
15503	if (FirstConversion == PPCISD::FCTIDZ \|\|
15504	FirstConversion == PPCISD::FCTIWZ)
15505	Opcode = ISD::FP_TO_SINT;
15506	else
15507	Opcode = ISD::FP_TO_UINT;
15508
15509	EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
15510	SDValue BV = DAG.getBuildVector(VT: NewVT, DL: dl, Ops);
15511	return DAG.getNode(Opcode, DL: dl, VT: TargetVT, Operand: BV);
15512	}
15513	return SDValue ();
15514	}
15515
15516	/// Reduce the number of loads when building a vector.
15517	///
15518	/// Building a vector out of multiple loads can be converted to a load
15519	/// of the vector type if the loads are consecutive. If the loads are
15520	/// consecutive but in descending order, a shuffle is added at the end
15521	/// to reorder the vector.
15522	static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
15523	assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15524	"Should be called with a BUILD_VECTOR node");
15525
15526	SDLoc dl(N);
15527
15528	// Return early for non byte-sized type, as they can't be consecutive.
15529	if (!N->getValueType(ResNo: `0`).getVectorElementType().isByteSized())
15530	return SDValue ();
15531
15532	bool InputsAreConsecutiveLoads = true;
15533	bool InputsAreReverseConsecutive = true;
15534	unsigned ElemSize = N->getValueType(ResNo: `0`).getScalarType().getStoreSize();
15535	SDValue FirstInput = N->getOperand(Num: `0`);
15536	bool IsRoundOfExtLoad = false;
15537	LoadSDNode FirstLoad = nullptr*;
15538
15539	if (FirstInput.getOpcode() == ISD::FP_ROUND &&
15540	FirstInput.getOperand(i: `0`).getOpcode() == ISD::LOAD) {
15541	FirstLoad = cast<LoadSDNode>(Val: FirstInput.getOperand(i: `0`));
15542	IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
15543	}
15544	// Not a build vector of (possibly fp_rounded) loads.
15545	if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) \|\|
15546	N->getNumOperands() == `1`)
15547	return SDValue ();
15548
15549	if (!IsRoundOfExtLoad)
15550	FirstLoad = cast<LoadSDNode>(Val&: FirstInput);
15551
15552	SmallVector<LoadSDNode *, `4`> InputLoads;
15553	InputLoads.push_back(Elt: FirstLoad);
15554	for (int i = `1`, e = N->getNumOperands(); i < e; ++i) {
15555	// If any inputs are fp_round(extload), they all must be.
15556	if (IsRoundOfExtLoad && N->getOperand(Num: i).getOpcode() != ISD::FP_ROUND)
15557	return SDValue ();
15558
15559	SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(Num: i).getOperand(i: `0`) :
15560	N->getOperand(Num: i);
15561	if (NextInput.getOpcode() != ISD::LOAD)
15562	return SDValue ();
15563
15564	SDValue PreviousInput =
15565	IsRoundOfExtLoad ? N->getOperand(Num: i-`1`).getOperand(i: `0`) : N->getOperand(Num: i-`1`);
15566	LoadSDNode *LD1 = cast<LoadSDNode>(Val&: PreviousInput);
15567	LoadSDNode *LD2 = cast<LoadSDNode>(Val&: NextInput);
15568
15569	// If any inputs are fp_round(extload), they all must be.
15570	if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
15571	return SDValue ();
15572
15573	// We only care about regular loads. The PPC-specific load intrinsics
15574	// will not lead to a merge opportunity.
15575	if (!DAG.areNonVolatileConsecutiveLoads(LD: LD2, Base: LD1, Bytes: ElemSize, Dist: `1`))
15576	InputsAreConsecutiveLoads = false;
15577	if (!DAG.areNonVolatileConsecutiveLoads(LD: LD1, Base: LD2, Bytes: ElemSize, Dist: `1`))
15578	InputsAreReverseConsecutive = false;
15579
15580	// Exit early if the loads are neither consecutive nor reverse consecutive.
15581	if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
15582	return SDValue ();
15583	InputLoads.push_back(Elt: LD2);
15584	}
15585
15586	assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
15587	"The loads cannot be both consecutive and reverse consecutive.");
15588
15589	SDValue WideLoad;
15590	SDValue ReturnSDVal;
15591	if (InputsAreConsecutiveLoads) {
15592	assert(FirstLoad && "Input needs to be a LoadSDNode.");
15593	WideLoad = DAG.getLoad(VT: N->getValueType(ResNo: `0`), dl, Chain: FirstLoad->getChain(),
15594	Ptr: FirstLoad->getBasePtr(), PtrInfo: FirstLoad->getPointerInfo(),
15595	Alignment: FirstLoad->getAlign());
15596	ReturnSDVal = WideLoad;
15597	} else if (InputsAreReverseConsecutive) {
15598	LoadSDNode *LastLoad = InputLoads.back();
15599	assert(LastLoad && "Input needs to be a LoadSDNode.");
15600	WideLoad = DAG.getLoad(VT: N->getValueType(ResNo: `0`), dl, Chain: LastLoad->getChain(),
15601	Ptr: LastLoad->getBasePtr(), PtrInfo: LastLoad->getPointerInfo(),
15602	Alignment: LastLoad->getAlign());
15603	SmallVector<int, `16`> Ops;
15604	for (int i = N->getNumOperands() - `1`; i >= `0`; i--)
15605	Ops.push_back(Elt: i);
15606
15607	ReturnSDVal = DAG.getVectorShuffle(VT: N->getValueType(ResNo: `0`), dl, N1: WideLoad,
15608	N2: DAG.getUNDEF(VT: N->getValueType(ResNo: `0`)), Mask: Ops);
15609	} else
15610	return SDValue ();
15611
15612	for (auto *LD : InputLoads)
15613	DAG.makeEquivalentMemoryOrdering(OldLoad: LD, NewMemOp: WideLoad);
15614	return ReturnSDVal;
15615	}
15616
15617	// This function adds the required vector_shuffle needed to get
15618	// the elements of the vector extract in the correct position
15619	// as specified by the CorrectElems encoding.
15620	static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
15621	SDValue Input, uint64_t Elems,
15622	uint64_t CorrectElems) {
15623	SDLoc dl(N);
15624
15625	unsigned NumElems = Input.getValueType().getVectorNumElements();
15626	SmallVector<int, `16`> ShuffleMask(NumElems, -`1`);
15627
15628	// Knowing the element indices being extracted from the original
15629	// vector and the order in which they're being inserted, just put
15630	// them at element indices required for the instruction.
15631	for (unsigned i = `0`; i < N->getNumOperands(); i++) {
15632	if (DAG.getDataLayout().isLittleEndian())
15633	ShuffleMask [CorrectElems & `0xF`] = Elems & `0xF`;
15634	else
15635	ShuffleMask [(CorrectElems & `0xF0`) >> `4`] = (Elems & `0xF0`) >> `4`;
15636	CorrectElems = CorrectElems >> `8`;
15637	Elems = Elems >> `8`;
15638	}
15639
15640	SDValue Shuffle =
15641	DAG.getVectorShuffle(VT: Input.getValueType(), dl, N1: Input,
15642	N2: DAG.getUNDEF(VT: Input.getValueType()), Mask: ShuffleMask);
15643
15644	EVT VT = N->getValueType(ResNo: `0`);
15645	SDValue Conv = DAG.getBitcast(VT, V: Shuffle);
15646
15647	EVT ExtVT = EVT::getVectorVT(Context&: *DAG.getContext(),
15648	VT: Input.getValueType().getVectorElementType(),
15649	NumElements: VT.getVectorNumElements());
15650	return DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT, N1: Conv,
15651	N2: DAG.getValueType(ExtVT));
15652	}
15653
15654	// Look for build vector patterns where input operands come from sign
15655	// extended vector_extract elements of specific indices. If the correct indices
15656	// aren't used, add a vector shuffle to fix up the indices and create
15657	// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
15658	// during instruction selection.
15659	static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
15660	// This array encodes the indices that the vector sign extend instructions
15661	// extract from when extending from one type to another for both BE and LE.
15662	// The right nibble of each byte corresponds to the LE incides.
15663	// and the left nibble of each byte corresponds to the BE incides.
15664	// For example: 0x3074B8FC byte->word
15665	// For LE: the allowed indices are: 0x0,0x4,0x8,0xC
15666	// For BE: the allowed indices are: 0x3,0x7,0xB,0xF
15667	// For example: 0x000070F8 byte->double word
15668	// For LE: the allowed indices are: 0x0,0x8
15669	// For BE: the allowed indices are: 0x7,0xF
15670	uint64_t TargetElems[] = {
15671	`0x3074B8FC`, // b->w
15672	`0x000070F8`, // b->d
15673	`0x10325476`, // h->w
15674	`0x00003074`, // h->d
15675	`0x00001032`, // w->d
15676	};
15677
15678	uint64_t Elems = `0`;
15679	int Index;
15680	SDValue Input;
15681
15682	auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
15683	if (!Op)
15684	return false;
15685	if (Op.getOpcode() != ISD::SIGN_EXTEND &&
15686	Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
15687	return false;
15688
15689	// A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
15690	// of the right width.
15691	SDValue Extract = Op.getOperand(i: `0`);
15692	if (Extract.getOpcode() == ISD::ANY_EXTEND)
15693	Extract = Extract.getOperand(i: `0`);
15694	if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15695	return false;
15696
15697	ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Val: Extract.getOperand(i: `1`));
15698	if (!ExtOp)
15699	return false;
15700
15701	Index = ExtOp->getZExtValue();
15702	if (Input && Input != Extract.getOperand(i: `0`))
15703	return false;
15704
15705	if (!Input)
15706	Input = Extract.getOperand(i: `0`);
15707
15708	Elems = Elems << `8`;
15709	Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << `4`;
15710	Elems \|= Index;
15711
15712	return true;
15713	};
15714
15715	// If the build vector operands aren't sign extended vector extracts,
15716	// of the same input vector, then return.
15717	for (unsigned i = `0`; i < N->getNumOperands(); i++) {
15718	if (!isSExtOfVecExtract (N->getOperand(Num: i))) {
15719	return SDValue ();
15720	}
15721	}
15722
15723	// If the vector extract indices are not correct, add the appropriate
15724	// vector_shuffle.
15725	int TgtElemArrayIdx;
15726	int InputSize = Input.getValueType().getScalarSizeInBits();
15727	int OutputSize = N->getValueType(ResNo: `0`).getScalarSizeInBits();
15728	if (InputSize + OutputSize == `40`)
15729	TgtElemArrayIdx = `0`;
15730	else if (InputSize + OutputSize == `72`)
15731	TgtElemArrayIdx = `1`;
15732	else if (InputSize + OutputSize == `48`)
15733	TgtElemArrayIdx = `2`;
15734	else if (InputSize + OutputSize == `80`)
15735	TgtElemArrayIdx = `3`;
15736	else if (InputSize + OutputSize == `96`)
15737	TgtElemArrayIdx = `4`;
15738	else
15739	return SDValue ();
15740
15741	uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
15742	CorrectElems = DAG.getDataLayout().isLittleEndian()
15743	? CorrectElems & `0x0F0F0F0F0F0F0F0F`
15744	: CorrectElems & `0xF0F0F0F0F0F0F0F0`;
15745	if (Elems != CorrectElems) {
15746	return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
15747	}
15748
15749	// Regular lowering will catch cases where a shuffle is not needed.
15750	return SDValue ();
15751	}
15752
15753	// Look for the pattern of a load from a narrow width to i128, feeding
15754	// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
15755	// (LXVRZX). This node represents a zero extending load that will be matched
15756	// to the Load VSX Vector Rightmost instructions.
15757	static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
15758	SDLoc DL(N);
15759
15760	// This combine is only eligible for a BUILD_VECTOR of v1i128.
15761	if (N->getValueType(ResNo: `0`) != MVT::v1i128)
15762	return SDValue ();
15763
15764	SDValue Operand = N->getOperand(Num: `0`);
15765	// Proceed with the transformation if the operand to the BUILD_VECTOR
15766	// is a load instruction.
15767	if (Operand.getOpcode() != ISD::LOAD)
15768	return SDValue ();
15769
15770	auto *LD = cast<LoadSDNode>(Val&: Operand);
15771	EVT MemoryType = LD->getMemoryVT();
15772
15773	// This transformation is only valid if the we are loading either a byte,
15774	// halfword, word, or doubleword.
15775	bool ValidLDType = MemoryType == MVT::i8 \|\| MemoryType == MVT::i16 \|\|
15776	MemoryType == MVT::i32 \|\| MemoryType == MVT::i64;
15777
15778	// Ensure that the load from the narrow width is being zero extended to i128.
15779	if (!ValidLDType \|\|
15780	(LD->getExtensionType() != ISD::ZEXTLOAD &&
15781	LD->getExtensionType() != ISD::EXTLOAD))
15782	return SDValue ();
15783
15784	SDValue LoadOps[] = {
15785	LD->getChain(), LD->getBasePtr(),
15786	DAG.getIntPtrConstant(Val: MemoryType.getScalarSizeInBits(), DL)};
15787
15788	return DAG.getMemIntrinsicNode(Opcode: PPCISD::LXVRZX, dl: DL,
15789	VTList: DAG.getVTList(VT1: MVT::v1i128, VT2: MVT::Other),
15790	Ops: LoadOps, MemVT: MemoryType, MMO: LD->getMemOperand());
15791	}
15792
15793	SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
15794	DAGCombinerInfo &DCI) const {
15795	assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15796	"Should be called with a BUILD_VECTOR node");
15797
15798	SelectionDAG &DAG = DCI.DAG;
15799	SDLoc dl(N);
15800
15801	if (!Subtarget.hasVSX())
15802	return SDValue ();
15803
15804	// The target independent DAG combiner will leave a build_vector of
15805	// float-to-int conversions intact. We can generate MUCH better code for
15806	// a float-to-int conversion of a vector of floats.
15807	SDValue FirstInput = N->getOperand(Num: `0`);
15808	if (FirstInput.getOpcode() == PPCISD::MFVSR) {
15809	SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
15810	if (Reduced)
15811	return Reduced;
15812	}
15813
15814	// If we're building a vector out of consecutive loads, just load that
15815	// vector type.
15816	SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
15817	if (Reduced)
15818	return Reduced;
15819
15820	// If we're building a vector out of extended elements from another vector
15821	// we have P9 vector integer extend instructions. The code assumes legal
15822	// input types (i.e. it can't handle things like v4i16) so do not run before
15823	// legalization.
15824	if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
15825	Reduced = combineBVOfVecSExt(N, DAG);
15826	if (Reduced)
15827	return Reduced;
15828	}
15829
15830	// On Power10, the Load VSX Vector Rightmost instructions can be utilized
15831	// if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
15832	// is a load from <valid narrow width> to i128.
15833	if (Subtarget.isISA3_1()) {
15834	SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
15835	if (BVOfZLoad)
15836	return BVOfZLoad;
15837	}
15838
15839	if (N->getValueType(ResNo: `0`) != MVT::v2f64)
15840	return SDValue ();
15841
15842	// Looking for:
15843	// (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
15844	if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
15845	FirstInput.getOpcode() != ISD::UINT_TO_FP)
15846	return SDValue ();
15847	if (N->getOperand(Num: `1`).getOpcode() != ISD::SINT_TO_FP &&
15848	N->getOperand(Num: `1`).getOpcode() != ISD::UINT_TO_FP)
15849	return SDValue ();
15850	if (FirstInput.getOpcode() != N->getOperand(Num: `1`).getOpcode())
15851	return SDValue ();
15852
15853	SDValue Ext1 = FirstInput.getOperand(i: `0`);
15854	SDValue Ext2 = N->getOperand(Num: `1`).getOperand(i: `0`);
15855	if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
15856	Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15857	return SDValue ();
15858
15859	ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Val: Ext1.getOperand(i: `1`));
15860	ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Val: Ext2.getOperand(i: `1`));
15861	if (!Ext1Op \|\| !Ext2Op)
15862	return SDValue ();
15863	if (Ext1.getOperand(i: `0`).getValueType() != MVT::v4i32 \|\|
15864	Ext1.getOperand(i: `0`) != Ext2.getOperand(i: `0`))
15865	return SDValue ();
15866
15867	int FirstElem = Ext1Op->getZExtValue();
15868	int SecondElem = Ext2Op->getZExtValue();
15869	int SubvecIdx;
15870	if (FirstElem == `0` && SecondElem == `1`)
15871	SubvecIdx = Subtarget.isLittleEndian() ? `1` : `0`;
15872	else if (FirstElem == `2` && SecondElem == `3`)
15873	SubvecIdx = Subtarget.isLittleEndian() ? `0` : `1`;
15874	else
15875	return SDValue ();
15876
15877	SDValue SrcVec = Ext1.getOperand(i: `0`);
15878	auto NodeType = (N->getOperand(Num: `1`).getOpcode() == ISD::SINT_TO_FP) ?
15879	PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
15880	return DAG.getNode(Opcode: NodeType, DL: dl, VT: MVT::v2f64,
15881	N1: SrcVec, N2: DAG.getIntPtrConstant(Val: SubvecIdx, DL: dl));
15882	}
15883
15884	SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
15885	DAGCombinerInfo &DCI) const {
15886	assert((N->getOpcode() == ISD::SINT_TO_FP \|\|
15887	N->getOpcode() == ISD::UINT_TO_FP) &&
15888	"Need an int -> FP conversion node here");
15889
15890	if (useSoftFloat() \|\| !Subtarget.has64BitSupport())
15891	return SDValue ();
15892
15893	SelectionDAG &DAG = DCI.DAG;
15894	SDLoc dl(N);
15895	SDValue Op(N, `0`);
15896
15897	// Don't handle ppc_fp128 here or conversions that are out-of-range capable
15898	// from the hardware.
15899	if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
15900	return SDValue ();
15901	if (!Op.getOperand(i: `0`).getValueType().isSimple())
15902	return SDValue ();
15903	if (Op.getOperand(i: `0`).getValueType().getSimpleVT() <= MVT (MVT::i1) \|\|
15904	Op.getOperand(i: `0`).getValueType().getSimpleVT() > MVT (MVT::i64))
15905	return SDValue ();
15906
15907	SDValue FirstOperand(Op.getOperand(i: `0`));
15908	bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
15909	(FirstOperand.getValueType() == MVT::i8 \|\|
15910	FirstOperand.getValueType() == MVT::i16);
15911	if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
15912	bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
15913	bool DstDouble = Op.getValueType() == MVT::f64;
15914	unsigned ConvOp = Signed ?
15915	(DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
15916	(DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
15917	SDValue WidthConst =
15918	DAG.getIntPtrConstant(Val: FirstOperand.getValueType() == MVT::i8 ? `1` : `2`,
15919	DL: dl, isTarget: false);
15920	LoadSDNode *LDN = cast<LoadSDNode>(Val: FirstOperand.getNode());
15921	SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
15922	SDValue Ld = DAG.getMemIntrinsicNode(Opcode: PPCISD::LXSIZX, dl,
15923	VTList: DAG.getVTList(VT1: MVT::f64, VT2: MVT::Other),
15924	Ops, MemVT: MVT::i8, MMO: LDN->getMemOperand());
15925	DAG.makeEquivalentMemoryOrdering(OldLoad: LDN, NewMemOp: Ld);
15926
15927	// For signed conversion, we need to sign-extend the value in the VSR
15928	if (Signed) {
15929	SDValue ExtOps[] = { Ld, WidthConst };
15930	SDValue Ext = DAG.getNode(Opcode: PPCISD::VEXTS, DL: dl, VT: MVT::f64, Ops: ExtOps);
15931	return DAG.getNode(Opcode: ConvOp, DL: dl, VT: DstDouble ? MVT::f64 : MVT::f32, Operand: Ext);
15932	} else
15933	return DAG.getNode(Opcode: ConvOp, DL: dl, VT: DstDouble ? MVT::f64 : MVT::f32, Operand: Ld);
15934	}
15935
15936
15937	// For i32 intermediate values, unfortunately, the conversion functions
15938	// leave the upper 32 bits of the value are undefined. Within the set of
15939	// scalar instructions, we have no method for zero- or sign-extending the
15940	// value. Thus, we cannot handle i32 intermediate values here.
15941	if (Op.getOperand(i: `0`).getValueType() == MVT::i32)
15942	return SDValue ();
15943
15944	assert((Op.getOpcode() == ISD::SINT_TO_FP \|\| Subtarget.hasFPCVT()) &&
15945	"UINT_TO_FP is supported only with FPCVT");
15946
15947	// If we have FCFIDS, then use it when converting to single-precision.
15948	// Otherwise, convert to double-precision and then round.
15949	unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15950	? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
15951	: PPCISD::FCFIDS)
15952	: (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
15953	: PPCISD::FCFID);
15954	MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15955	? MVT::f32
15956	: MVT::f64;
15957
15958	// If we're converting from a float, to an int, and back to a float again,
15959	// then we don't need the store/load pair at all.
15960	if ((Op.getOperand(i: `0`).getOpcode() == ISD::FP_TO_UINT &&
15961	Subtarget.hasFPCVT()) \|\|
15962	(Op.getOperand(i: `0`).getOpcode() == ISD::FP_TO_SINT)) {
15963	SDValue Src = Op.getOperand(i: `0`).getOperand(i: `0`);
15964	if (Src.getValueType() == MVT::f32) {
15965	Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f64, Operand: Src);
15966	DCI.AddToWorklist(N: Src.getNode());
15967	} else if (Src.getValueType() != MVT::f64) {
15968	// Make sure that we don't pick up a ppc_fp128 source value.
15969	return SDValue ();
15970	}
15971
15972	unsigned FCTOp =
15973	Op.getOperand(i: `0`).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
15974	PPCISD::FCTIDUZ;
15975
15976	SDValue Tmp = DAG.getNode(Opcode: FCTOp, DL: dl, VT: MVT::f64, Operand: Src);
15977	SDValue FP = DAG.getNode(Opcode: FCFOp, DL: dl, VT: FCFTy, Operand: Tmp);
15978
15979	if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
15980	FP = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: MVT::f32, N1: FP,
15981	N2: DAG.getIntPtrConstant(Val: `0`, DL: dl, /isTarget=/true));
15982	DCI.AddToWorklist(N: FP.getNode());
15983	}
15984
15985	return FP;
15986	}
15987
15988	return SDValue ();
15989	}
15990
15991	// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
15992	// builtins) into loads with swaps.
15993	SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
15994	DAGCombinerInfo &DCI) const {
15995	// Delay VSX load for LE combine until after LegalizeOps to prioritize other
15996	// load combines.
15997	if (DCI.isBeforeLegalizeOps())
15998	return SDValue ();
15999
16000	SelectionDAG &DAG = DCI.DAG;
16001	SDLoc dl(N);
16002	SDValue Chain;
16003	SDValue Base;
16004	MachineMemOperand *MMO;
16005
16006	switch (N->getOpcode()) {
16007	default:
16008	llvm_unreachable("Unexpected opcode for little endian VSX load");
16009	case ISD::LOAD: {
16010	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
16011	Chain = LD->getChain();
16012	Base = LD->getBasePtr();
16013	MMO = LD->getMemOperand();
16014	// If the MMO suggests this isn't a load of a full vector, leave
16015	// things alone. For a built-in, we have to make the change for
16016	// correctness, so if there is a size problem that will be a bug.
16017	if (!MMO->getSize().hasValue() \|\| MMO->getSize().getValue() < `16`)
16018	return SDValue ();
16019	break;
16020	}
16021	case ISD::INTRINSIC_W_CHAIN: {
16022	MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(Val: N);
16023	Chain = Intrin->getChain();
16024	// Similarly to the store case below, Intrin->getBasePtr() doesn't get
16025	// us what we want. Get operand 2 instead.
16026	Base = Intrin->getOperand(Num: `2`);
16027	MMO = Intrin->getMemOperand();
16028	break;
16029	}
16030	}
16031
16032	MVT VecTy = N->getValueType(ResNo: `0`).getSimpleVT();
16033
16034	SDValue LoadOps[] = { Chain, Base };
16035	SDValue Load = DAG.getMemIntrinsicNode(Opcode: PPCISD::LXVD2X, dl,
16036	VTList: DAG.getVTList(VT1: MVT::v2f64, VT2: MVT::Other),
16037	Ops: LoadOps, MemVT: MVT::v2f64, MMO);
16038
16039	DCI.AddToWorklist(N: Load.getNode());
16040	Chain = Load.getValue(R: `1`);
16041	SDValue Swap = DAG.getNode(
16042	Opcode: PPCISD::XXSWAPD, DL: dl, VTList: DAG.getVTList(VT1: MVT::v2f64, VT2: MVT::Other), N1: Chain, N2: Load);
16043	DCI.AddToWorklist(N: Swap.getNode());
16044
16045	// Add a bitcast if the resulting load type doesn't match v2f64.
16046	if (VecTy != MVT::v2f64) {
16047	SDValue N = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: VecTy, Operand: Swap);
16048	DCI.AddToWorklist(N: N.getNode());
16049	// Package {bitcast value, swap's chain} to match Load's shape.
16050	return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: dl, VTList: DAG.getVTList(VT1: VecTy, VT2: MVT::Other),
16051	N1: N, N2: Swap.getValue(R: `1`));
16052	}
16053
16054	return Swap;
16055	}
16056
16057	// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
16058	// builtins) into stores with swaps.
16059	SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
16060	DAGCombinerInfo &DCI) const {
16061	// Delay VSX store for LE combine until after LegalizeOps to prioritize other
16062	// store combines.
16063	if (DCI.isBeforeLegalizeOps())
16064	return SDValue ();
16065
16066	SelectionDAG &DAG = DCI.DAG;
16067	SDLoc dl(N);
16068	SDValue Chain;
16069	SDValue Base;
16070	unsigned SrcOpnd;
16071	MachineMemOperand *MMO;
16072
16073	switch (N->getOpcode()) {
16074	default:
16075	llvm_unreachable("Unexpected opcode for little endian VSX store");
16076	case ISD::STORE: {
16077	StoreSDNode *ST = cast<StoreSDNode>(Val: N);
16078	Chain = ST->getChain();
16079	Base = ST->getBasePtr();
16080	MMO = ST->getMemOperand();
16081	SrcOpnd = `1`;
16082	// If the MMO suggests this isn't a store of a full vector, leave
16083	// things alone. For a built-in, we have to make the change for
16084	// correctness, so if there is a size problem that will be a bug.
16085	if (!MMO->getSize().hasValue() \|\| MMO->getSize().getValue() < `16`)
16086	return SDValue ();
16087	break;
16088	}
16089	case ISD::INTRINSIC_VOID: {
16090	MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(Val: N);
16091	Chain = Intrin->getChain();
16092	// Intrin->getBasePtr() oddly does not get what we want.
16093	Base = Intrin->getOperand(Num: `3`);
16094	MMO = Intrin->getMemOperand();
16095	SrcOpnd = `2`;
16096	break;
16097	}
16098	}
16099
16100	SDValue Src = N->getOperand(Num: SrcOpnd);
16101	MVT VecTy = Src.getValueType().getSimpleVT();
16102
16103	// All stores are done as v2f64 and possible bit cast.
16104	if (VecTy != MVT::v2f64) {
16105	Src = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::v2f64, Operand: Src);
16106	DCI.AddToWorklist(N: Src.getNode());
16107	}
16108
16109	SDValue Swap = DAG.getNode(Opcode: PPCISD::XXSWAPD, DL: dl,
16110	VTList: DAG.getVTList(VT1: MVT::v2f64, VT2: MVT::Other), N1: Chain, N2: Src);
16111	DCI.AddToWorklist(N: Swap.getNode());
16112	Chain = Swap.getValue(R: `1`);
16113	SDValue StoreOps[] = { Chain, Swap, Base };
16114	SDValue Store = DAG.getMemIntrinsicNode(Opcode: PPCISD::STXVD2X, dl,
16115	VTList: DAG.getVTList(VT: MVT::Other),
16116	Ops: StoreOps, MemVT: VecTy, MMO);
16117	DCI.AddToWorklist(N: Store.getNode());
16118	return Store;
16119	}
16120
16121	// Handle DAG combine for STORE (FP_TO_INT F).
16122	SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
16123	DAGCombinerInfo &DCI) const {
16124	SelectionDAG &DAG = DCI.DAG;
16125	SDLoc dl(N);
16126	unsigned Opcode = N->getOperand(Num: `1`).getOpcode();
16127	(void)Opcode;
16128	bool Strict = N->getOperand(Num: `1`)->isStrictFPOpcode();
16129
16130	assert((Opcode == ISD::FP_TO_SINT \|\| Opcode == ISD::FP_TO_UINT \|\|
16131	Opcode == ISD::STRICT_FP_TO_SINT \|\| Opcode == ISD::STRICT_FP_TO_UINT)
16132	&& "Not a FP_TO_INT Instruction!");
16133
16134	SDValue Val = N->getOperand(Num: `1`).getOperand(i: Strict ? `1` : `0`);
16135	EVT Op1VT = N->getOperand(Num: `1`).getValueType();
16136	EVT ResVT = Val.getValueType();
16137
16138	if (!Subtarget.hasVSX() \|\| !Subtarget.hasFPCVT() \|\| !isTypeLegal(VT: ResVT))
16139	return SDValue ();
16140
16141	// Only perform combine for conversion to i64/i32 or power9 i16/i8.
16142	bool ValidTypeForStoreFltAsInt =
16143	(Op1VT == MVT::i32 \|\| (Op1VT == MVT::i64 && Subtarget.isPPC64()) \|\|
16144	(Subtarget.hasP9Vector() && (Op1VT == MVT::i16 \|\| Op1VT == MVT::i8)));
16145
16146	// TODO: Lower conversion from f128 on all VSX targets
16147	if (ResVT == MVT::ppcf128 \|\| (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
16148	return SDValue ();
16149
16150	if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) \|\|
16151	cast<StoreSDNode>(Val: N)->isTruncatingStore() \|\| !ValidTypeForStoreFltAsInt)
16152	return SDValue ();
16153
16154	Val = convertFPToInt(Op: N->getOperand(Num: `1`), DAG, Subtarget);
16155
16156	// Set number of bytes being converted.
16157	unsigned ByteSize = Op1VT.getScalarSizeInBits() / `8`;
16158	SDValue Ops[] = {N->getOperand(Num: `0`), Val, N->getOperand(Num: `2`),
16159	DAG.getIntPtrConstant(Val: ByteSize, DL: dl, isTarget: false),
16160	DAG.getValueType(Op1VT)};
16161
16162	Val = DAG.getMemIntrinsicNode(Opcode: PPCISD::ST_VSR_SCAL_INT, dl,
16163	VTList: DAG.getVTList(VT: MVT::Other), Ops,
16164	MemVT: cast<StoreSDNode>(Val: N)->getMemoryVT(),
16165	MMO: cast<StoreSDNode>(Val: N)->getMemOperand());
16166
16167	return Val;
16168	}
16169
16170	static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
16171	// Check that the source of the element keeps flipping
16172	// (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
16173	bool PrevElemFromFirstVec = Mask [`0`] < NumElts;
16174	for (int i = `1`, e = Mask.size(); i < e; i++) {
16175	if (PrevElemFromFirstVec && Mask [i] < NumElts)
16176	return false;
16177	if (!PrevElemFromFirstVec && Mask [i] >= NumElts)
16178	return false;
16179	PrevElemFromFirstVec = !PrevElemFromFirstVec;
16180	}
16181	return true;
16182	}
16183
16184	static bool isSplatBV(SDValue Op) {
16185	if (Op.getOpcode() != ISD::BUILD_VECTOR)
16186	return false;
16187	SDValue FirstOp;
16188
16189	// Find first non-undef input.
16190	for (int i = `0`, e = Op.getNumOperands(); i < e; i++) {
16191	FirstOp = Op.getOperand(i);
16192	if (!FirstOp.isUndef())
16193	break;
16194	}
16195
16196	// All inputs are undef or the same as the first non-undef input.
16197	for (int i = `1`, e = Op.getNumOperands(); i < e; i++)
16198	if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
16199	return false;
16200	return true;
16201	}
16202
16203	static SDValue isScalarToVec(SDValue Op) {
16204	if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16205	return Op;
16206	if (Op.getOpcode() != ISD::BITCAST)
16207	return SDValue ();
16208	Op = Op.getOperand(i: `0`);
16209	if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16210	return Op;
16211	return SDValue ();
16212	}
16213
16214	// Fix up the shuffle mask to account for the fact that the result of
16215	// scalar_to_vector is not in lane zero. This just takes all values in
16216	// the ranges specified by the min/max indices and adds the number of
16217	// elements required to ensure each element comes from the respective
16218	// position in the valid lane.
16219	// On little endian, that's just the corresponding element in the other
16220	// half of the vector. On big endian, it is in the same half but right
16221	// justified rather than left justified in that half.
16222	static void fixupShuffleMaskForPermutedSToV(
16223	SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
16224	int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
16225	unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
16226	int LHSEltFixup =
16227	Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
16228	int RHSEltFixup =
16229	Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
16230	for (int I = `0`, E = ShuffV.size(); I < E; ++I) {
16231	int Idx = ShuffV [I];
16232	if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
16233	ShuffV [I] += LHSEltFixup;
16234	else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
16235	ShuffV [I] += RHSEltFixup;
16236	}
16237	}
16238
16239	// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
16240	// the original is:
16241	// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
16242	// In such a case, just change the shuffle mask to extract the element
16243	// from the permuted index.
16244	static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
16245	const PPCSubtarget &Subtarget) {
16246	SDLoc dl(OrigSToV);
16247	EVT VT = OrigSToV.getValueType();
16248	assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16249	"Expecting a SCALAR_TO_VECTOR here");
16250	SDValue Input = OrigSToV.getOperand(i: `0`);
16251
16252	if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16253	ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Val: Input.getOperand(i: `1`));
16254	SDValue OrigVector = Input.getOperand(i: `0`);
16255
16256	// Can't handle non-const element indices or different vector types
16257	// for the input to the extract and the output of the scalar_to_vector.
16258	if (Idx && VT == OrigVector.getValueType()) {
16259	unsigned NumElts = VT.getVectorNumElements();
16260	assert(
16261	NumElts > `1` &&
16262	"Cannot produce a permuted scalar_to_vector for one element vector");
16263	SmallVector<int, `16`> NewMask(NumElts, -`1`);
16264	unsigned ResultInElt = NumElts / `2`;
16265	ResultInElt -= Subtarget.isLittleEndian() ? `0` : `1`;
16266	NewMask [ResultInElt] = Idx->getZExtValue();
16267	return DAG.getVectorShuffle(VT, dl, N1: OrigVector, N2: OrigVector, Mask: NewMask);
16268	}
16269	}
16270	return DAG.getNode(Opcode: PPCISD::SCALAR_TO_VECTOR_PERMUTED, DL: dl, VT,
16271	Operand: OrigSToV.getOperand(i: `0`));
16272	}
16273
16274	static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
16275	int HalfVec, int LHSLastElementDefined,
16276	int RHSLastElementDefined) {
16277	for (int Index : ShuffV) {
16278	if (Index < `0`) // Skip explicitly undefined mask indices.
16279	continue;
16280	// Handle first input vector of the vector_shuffle.
16281	if ((LHSLastElementDefined >= `0`) && (Index < HalfVec) &&
16282	(Index > LHSLastElementDefined))
16283	return false;
16284	// Handle second input vector of the vector_shuffle.
16285	if ((RHSLastElementDefined >= `0`) &&
16286	(Index > HalfVec + RHSLastElementDefined))
16287	return false;
16288	}
16289	return true;
16290	}
16291
16292	static SDValue generateSToVPermutedForVecShuffle(
16293	int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
16294	int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
16295	SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
16296	EVT VecShuffOperandType = VecShuffOperand.getValueType();
16297	// Set up the values for the shuffle vector fixup.
16298	NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
16299	// The last element depends on if the input comes from the LHS or RHS.
16300	//
16301	// For example:
16302	// (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
16303	//
16304	// For the LHS: The last element that comes from the LHS is actually 0, not 3
16305	// because elements 1 and higher of a scalar_to_vector are undefined.
16306	// For the RHS: The last element that comes from the RHS is actually 5, not 7
16307	// because elements 1 and higher of a scalar_to_vector are undefined.
16308	// It is also not 4 because the original scalar_to_vector is wider and
16309	// actually contains two i32 elements.
16310	LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
16311	? ScalarSize / ShuffleEltWidth - `1` + FirstElt
16312	: FirstElt;
16313	SDValue SToVPermuted = getSToVPermuted(OrigSToV: SToVNode, DAG, Subtarget);
16314	if (SToVPermuted.getValueType() != VecShuffOperandType)
16315	SToVPermuted = DAG.getBitcast(VT: VecShuffOperandType, V: SToVPermuted);
16316	return SToVPermuted;
16317	}
16318
16319	// On little endian subtargets, combine shuffles such as:
16320	// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
16321	// into:
16322	// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
16323	// because the latter can be matched to a single instruction merge.
16324	// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
16325	// to put the value into element zero. Adjust the shuffle mask so that the
16326	// vector can remain in permuted form (to prevent a swap prior to a shuffle).
16327	// On big endian targets, this is still useful for SCALAR_TO_VECTOR
16328	// nodes with elements smaller than doubleword because all the ways
16329	// of getting scalar data into a vector register put the value in the
16330	// rightmost element of the left half of the vector.
16331	SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
16332	SelectionDAG &DAG) const {
16333	SDValue LHS = SVN->getOperand(Num: `0`);
16334	SDValue RHS = SVN->getOperand(Num: `1`);
16335	auto Mask = SVN->getMask();
16336	int NumElts = LHS.getValueType().getVectorNumElements();
16337	SDValue Res(SVN, `0`);
16338	SDLoc dl(SVN);
16339	bool IsLittleEndian = Subtarget.isLittleEndian();
16340
16341	// On big endian targets this is only useful for subtargets with direct moves.
16342	// On little endian targets it would be useful for all subtargets with VSX.
16343	// However adding special handling for LE subtargets without direct moves
16344	// would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
16345	// which includes direct moves.
16346	if (!Subtarget.hasDirectMove())
16347	return Res;
16348
16349	// If this is not a shuffle of a shuffle and the first element comes from
16350	// the second vector, canonicalize to the commuted form. This will make it
16351	// more likely to match one of the single instruction patterns.
16352	if (Mask [`0`] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
16353	RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
16354	std::swap(a&: LHS, b&: RHS);
16355	Res = DAG.getCommutedVectorShuffle(SV: *SVN);
16356	Mask = cast<ShuffleVectorSDNode>(Val&: Res)->getMask();
16357	}
16358
16359	// Adjust the shuffle mask if either input vector comes from a
16360	// SCALAR_TO_VECTOR and keep the respective input vector in permuted
16361	// form (to prevent the need for a swap).
16362	SmallVector<int, `16`> ShuffV(Mask);
16363	SDValue SToVLHS = isScalarToVec(Op: LHS);
16364	SDValue SToVRHS = isScalarToVec(Op: RHS);
16365	if (SToVLHS \|\| SToVRHS) {
16366	EVT VT = SVN->getValueType(ResNo: `0`);
16367	uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
16368	int ShuffleNumElts = ShuffV.size();
16369	int HalfVec = ShuffleNumElts / `2`;
16370	// The width of the "valid lane" (i.e. the lane that contains the value that
16371	// is vectorized) needs to be expressed in terms of the number of elements
16372	// of the shuffle. It is thereby the ratio of the values before and after
16373	// any bitcast, which will be set later on if the LHS or RHS are
16374	// SCALAR_TO_VECTOR nodes.
16375	unsigned LHSNumValidElts = HalfVec;
16376	unsigned RHSNumValidElts = HalfVec;
16377
16378	// Initially assume that neither input is permuted. These will be adjusted
16379	// accordingly if either input is. Note, that -1 means that all elements
16380	// are undefined.
16381	int LHSFirstElt = `0`;
16382	int RHSFirstElt = ShuffleNumElts;
16383	int LHSLastElt = -`1`;
16384	int RHSLastElt = -`1`;
16385
16386	// Get the permuted scalar to vector nodes for the source(s) that come from
16387	// ISD::SCALAR_TO_VECTOR.
16388	// On big endian systems, this only makes sense for element sizes smaller
16389	// than 64 bits since for 64-bit elements, all instructions already put
16390	// the value into element zero. Since scalar size of LHS and RHS may differ
16391	// after isScalarToVec, this should be checked using their own sizes.
16392	int LHSScalarSize = `0`;
16393	int RHSScalarSize = `0`;
16394	if (SToVLHS) {
16395	LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
16396	if (!IsLittleEndian && LHSScalarSize >= `64`)
16397	return Res;
16398	}
16399	if (SToVRHS) {
16400	RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
16401	if (!IsLittleEndian && RHSScalarSize >= `64`)
16402	return Res;
16403	}
16404	if (LHSScalarSize != `0`)
16405	LHS = generateSToVPermutedForVecShuffle(
16406	ScalarSize: LHSScalarSize, ShuffleEltWidth, NumValidElts&: LHSNumValidElts, FirstElt: LHSFirstElt,
16407	LastElt&: LHSLastElt, VecShuffOperand: LHS, SToVNode: SToVLHS, DAG, Subtarget);
16408	if (RHSScalarSize != `0`)
16409	RHS = generateSToVPermutedForVecShuffle(
16410	ScalarSize: RHSScalarSize, ShuffleEltWidth, NumValidElts&: RHSNumValidElts, FirstElt: RHSFirstElt,
16411	LastElt&: RHSLastElt, VecShuffOperand: RHS, SToVNode: SToVRHS, DAG, Subtarget);
16412
16413	if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElementDefined: LHSLastElt, RHSLastElementDefined: RHSLastElt))
16414	return Res;
16415
16416	// Fix up the shuffle mask to reflect where the desired element actually is.
16417	// The minimum and maximum indices that correspond to element zero for both
16418	// the LHS and RHS are computed and will control which shuffle mask entries
16419	// are to be changed. For example, if the RHS is permuted, any shuffle mask
16420	// entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
16421	fixupShuffleMaskForPermutedSToV(
16422	ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
16423	LHSNumValidElts, RHSNumValidElts, Subtarget);
16424	Res = DAG.getVectorShuffle(VT: SVN->getValueType(ResNo: `0`), dl, N1: LHS, N2: RHS, Mask: ShuffV);
16425
16426	// We may have simplified away the shuffle. We won't be able to do anything
16427	// further with it here.
16428	if (!isa<ShuffleVectorSDNode>(Val: Res))
16429	return Res;
16430	Mask = cast<ShuffleVectorSDNode>(Val&: Res)->getMask();
16431	}
16432
16433	SDValue TheSplat = IsLittleEndian ? RHS : LHS;
16434	// The common case after we commuted the shuffle is that the RHS is a splat
16435	// and we have elements coming in from the splat at indices that are not
16436	// conducive to using a merge.
16437	// Example:
16438	// vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
16439	if (!isSplatBV(Op: TheSplat))
16440	return Res;
16441
16442	// We are looking for a mask such that all even elements are from
16443	// one vector and all odd elements from the other.
16444	if (!isAlternatingShuffMask(Mask, NumElts))
16445	return Res;
16446
16447	// Adjust the mask so we are pulling in the same index from the splat
16448	// as the index from the interesting vector in consecutive elements.
16449	if (IsLittleEndian) {
16450	// Example (even elements from first vector):
16451	// vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
16452	if (Mask [`0`] < NumElts)
16453	for (int i = `1`, e = Mask.size(); i < e; i += `2`) {
16454	if (ShuffV [i] < `0`)
16455	continue;
16456	// If element from non-splat is undef, pick first element from splat.
16457	ShuffV [i] = (ShuffV [i - `1`] >= `0` ? ShuffV [i - `1`] : `0`) + NumElts;
16458	}
16459	// Example (odd elements from first vector):
16460	// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
16461	else
16462	for (int i = `0`, e = Mask.size(); i < e; i += `2`) {
16463	if (ShuffV [i] < `0`)
16464	continue;
16465	// If element from non-splat is undef, pick first element from splat.
16466	ShuffV [i] = (ShuffV [i + `1`] >= `0` ? ShuffV [i + `1`] : `0`) + NumElts;
16467	}
16468	} else {
16469	// Example (even elements from first vector):
16470	// vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
16471	if (Mask [`0`] < NumElts)
16472	for (int i = `0`, e = Mask.size(); i < e; i += `2`) {
16473	if (ShuffV [i] < `0`)
16474	continue;
16475	// If element from non-splat is undef, pick first element from splat.
16476	ShuffV [i] = ShuffV [i + `1`] >= `0` ? ShuffV [i + `1`] - NumElts : `0`;
16477	}
16478	// Example (odd elements from first vector):
16479	// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
16480	else
16481	for (int i = `1`, e = Mask.size(); i < e; i += `2`) {
16482	if (ShuffV [i] < `0`)
16483	continue;
16484	// If element from non-splat is undef, pick first element from splat.
16485	ShuffV [i] = ShuffV [i - `1`] >= `0` ? ShuffV [i - `1`] - NumElts : `0`;
16486	}
16487	}
16488
16489	// If the RHS has undefs, we need to remove them since we may have created
16490	// a shuffle that adds those instead of the splat value.
16491	SDValue SplatVal =
16492	cast<BuildVectorSDNode>(Val: TheSplat.getNode())->getSplatValue();
16493	TheSplat = DAG.getSplatBuildVector(VT: TheSplat.getValueType(), DL: dl, Op: SplatVal);
16494
16495	if (IsLittleEndian)
16496	RHS = TheSplat;
16497	else
16498	LHS = TheSplat;
16499	return DAG.getVectorShuffle(VT: SVN->getValueType(ResNo: `0`), dl, N1: LHS, N2: RHS, Mask: ShuffV);
16500	}
16501
16502	SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
16503	LSBaseSDNode *LSBase,
16504	DAGCombinerInfo &DCI) const {
16505	assert((ISD::isNormalLoad(LSBase) \|\| ISD::isNormalStore(LSBase)) &&
16506	"Not a reverse memop pattern!");
16507
16508	auto IsElementReverse = [](const ShuffleVectorSDNode SVN) -> bool* {
16509	auto Mask = SVN->getMask();
16510	int i = `0`;
16511	auto I = Mask.rbegin();
16512	auto E = Mask.rend();
16513
16514	for (; I != E; ++I) {
16515	if (*I != i)
16516	return false;
16517	i++;
16518	}
16519	return true;
16520	};
16521
16522	SelectionDAG &DAG = DCI.DAG;
16523	EVT VT = SVN->getValueType(ResNo: `0`);
16524
16525	if (!isTypeLegal(VT) \|\| !Subtarget.isLittleEndian() \|\| !Subtarget.hasVSX())
16526	return SDValue ();
16527
16528	// Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
16529	// See comment in PPCVSXSwapRemoval.cpp.
16530	// It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
16531	if (!Subtarget.hasP9Vector())
16532	return SDValue ();
16533
16534	if(!IsElementReverse (SVN))
16535	return SDValue ();
16536
16537	if (LSBase->getOpcode() == ISD::LOAD) {
16538	// If the load return value 0 has more than one user except the
16539	// shufflevector instruction, it is not profitable to replace the
16540	// shufflevector with a reverse load.
16541	for (SDUse &Use : LSBase->uses())
16542	if (Use.getResNo() == `0` &&
16543	Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
16544	return SDValue ();
16545
16546	SDLoc dl(LSBase);
16547	SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
16548	return DAG.getMemIntrinsicNode(
16549	Opcode: PPCISD::LOAD_VEC_BE, dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::Other), Ops: LoadOps,
16550	MemVT: LSBase->getMemoryVT(), MMO: LSBase->getMemOperand());
16551	}
16552
16553	if (LSBase->getOpcode() == ISD::STORE) {
16554	// If there are other uses of the shuffle, the swap cannot be avoided.
16555	// Forcing the use of an X-Form (since swapped stores only have
16556	// X-Forms) without removing the swap is unprofitable.
16557	if (!SVN->hasOneUse())
16558	return SDValue ();
16559
16560	SDLoc dl(LSBase);
16561	SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(Num: `0`),
16562	LSBase->getBasePtr()};
16563	return DAG.getMemIntrinsicNode(
16564	Opcode: PPCISD::STORE_VEC_BE, dl, VTList: DAG.getVTList(VT: MVT::Other), Ops: StoreOps,
16565	MemVT: LSBase->getMemoryVT(), MMO: LSBase->getMemOperand());
16566	}
16567
16568	llvm_unreachable("Expected a load or store node here");
16569	}
16570
16571	static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
16572	unsigned IntrinsicID = Intrin.getConstantOperandVal(i: `1`);
16573	if (IntrinsicID == Intrinsic::ppc_stdcx)
16574	StoreWidth = `8`;
16575	else if (IntrinsicID == Intrinsic::ppc_stwcx)
16576	StoreWidth = `4`;
16577	else if (IntrinsicID == Intrinsic::ppc_sthcx)
16578	StoreWidth = `2`;
16579	else if (IntrinsicID == Intrinsic::ppc_stbcx)
16580	StoreWidth = `1`;
16581	else
16582	return false;
16583	return true;
16584	}
16585
16586	static SDValue DAGCombineAddc(SDNode *N,
16587	llvm::PPCTargetLowering::DAGCombinerInfo &DCI) {
16588	if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(Value: `1`)) {
16589	// (ADDC (ADDE 0, 0, C), -1) -> C
16590	SDValue LHS = N->getOperand(Num: `0`);
16591	SDValue RHS = N->getOperand(Num: `1`);
16592	if (LHS ->getOpcode() == PPCISD::ADDE &&
16593	isNullConstant(V: LHS ->getOperand(Num: `0`)) &&
16594	isNullConstant(V: LHS ->getOperand(Num: `1`)) && isAllOnesConstant(V: RHS)) {
16595	return DCI.CombineTo(N, Res0: SDValue (N, `0`), Res1: LHS ->getOperand(Num: `2`));
16596	}
16597	}
16598	return SDValue ();
16599	}
16600
16601	SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
16602	DAGCombinerInfo &DCI) const {
16603	SelectionDAG &DAG = DCI.DAG;
16604	SDLoc dl(N);
16605	switch (N->getOpcode()) {
16606	default: break;
16607	case ISD::ADD:
16608	return combineADD(N, DCI);
16609	case ISD::AND: {
16610	// We don't want (and (zext (shift...)), C) if C fits in the width of the
16611	// original input as that will prevent us from selecting optimal rotates.
16612	// This only matters if the input to the extend is i32 widened to i64.
16613	SDValue Op1 = N->getOperand(Num: `0`);
16614	SDValue Op2 = N->getOperand(Num: `1`);
16615	if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
16616	Op1.getOpcode() != ISD::ANY_EXTEND) \|\|
16617	!isa<ConstantSDNode>(Val: Op2) \|\| N->getValueType(ResNo: `0`) != MVT::i64 \|\|
16618	Op1.getOperand(i: `0`).getValueType() != MVT::i32)
16619	break;
16620	SDValue NarrowOp = Op1.getOperand(i: `0`);
16621	if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
16622	NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
16623	break;
16624
16625	uint64_t Imm = Op2 ->getAsZExtVal();
16626	// Make sure that the constant is narrow enough to fit in the narrow type.
16627	if (!isUInt<`32`>(x: Imm))
16628	break;
16629	SDValue ConstOp = DAG.getConstant(Val: Imm, DL: dl, VT: MVT::i32);
16630	SDValue NarrowAnd = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, N1: NarrowOp, N2: ConstOp);
16631	return DAG.getZExtOrTrunc(Op: NarrowAnd, DL: dl, VT: N->getValueType(ResNo: `0`));
16632	}
16633	case ISD::SHL:
16634	return combineSHL(N, DCI);
16635	case ISD::SRA:
16636	return combineSRA(N, DCI);
16637	case ISD::SRL:
16638	return combineSRL(N, DCI);
16639	case ISD::MUL:
16640	return combineMUL(N, DCI);
16641	case ISD::FMA:
16642	case PPCISD::FNMSUB:
16643	return combineFMALike(N, DCI);
16644	case PPCISD::SHL:
16645	if (isNullConstant(V: N->getOperand(Num: `0`))) // 0 << V -> 0.
16646	return N->getOperand(Num: `0`);
16647	break;
16648	case PPCISD::SRL:
16649	if (isNullConstant(V: N->getOperand(Num: `0`))) // 0 >>u V -> 0.
16650	return N->getOperand(Num: `0`);
16651	break;
16652	case PPCISD::SRA:
16653	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `0`))) {
16654	if (C->isZero() \|\| // 0 >>s V -> 0.
16655	C->isAllOnes()) // -1 >>s V -> -1.
16656	return N->getOperand(Num: `0`);
16657	}
16658	break;
16659	case ISD::SIGN_EXTEND:
16660	case ISD::ZERO_EXTEND:
16661	case ISD::ANY_EXTEND:
16662	return DAGCombineExtBoolTrunc(N, DCI);
16663	case ISD::TRUNCATE:
16664	return combineTRUNCATE(N, DCI);
16665	case ISD::SETCC:
16666	if (SDValue CSCC = combineSetCC(N, DCI))
16667	return CSCC;
16668	[[fallthrough]];
16669	case ISD::SELECT_CC:
16670	return DAGCombineTruncBoolExt(N, DCI);
16671	case ISD::SINT_TO_FP:
16672	case ISD::UINT_TO_FP:
16673	return combineFPToIntToFP(N, DCI);
16674	case ISD::VECTOR_SHUFFLE:
16675	if (ISD::isNormalLoad(N: N->getOperand(Num: `0`).getNode())) {
16676	LSBaseSDNode* LSBase = cast<LSBaseSDNode>(Val: N->getOperand(Num: `0`));
16677	return combineVReverseMemOP(SVN: cast<ShuffleVectorSDNode>(Val: N), LSBase, DCI);
16678	}
16679	return combineVectorShuffle(SVN: cast<ShuffleVectorSDNode>(Val: N), DAG&: DCI.DAG);
16680	case ISD::STORE: {
16681
16682	EVT Op1VT = N->getOperand(Num: `1`).getValueType();
16683	unsigned Opcode = N->getOperand(Num: `1`).getOpcode();
16684
16685	if (Opcode == ISD::FP_TO_SINT \|\| Opcode == ISD::FP_TO_UINT \|\|
16686	Opcode == ISD::STRICT_FP_TO_SINT \|\| Opcode == ISD::STRICT_FP_TO_UINT) {
16687	SDValue Val = combineStoreFPToInt(N, DCI);
16688	if (Val)
16689	return Val;
16690	}
16691
16692	if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
16693	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: N->getOperand(Num: `1`));
16694	SDValue Val= combineVReverseMemOP(SVN, LSBase: cast<LSBaseSDNode>(Val: N), DCI);
16695	if (Val)
16696	return Val;
16697	}
16698
16699	// Turn STORE (BSWAP) -> sthbrx/stwbrx.
16700	if (cast<StoreSDNode>(Val: N)->isUnindexed() && Opcode == ISD::BSWAP &&
16701	N->getOperand(Num: `1`).getNode()->hasOneUse() &&
16702	(Op1VT == MVT::i32 \|\| Op1VT == MVT::i16 \|\|
16703	(Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
16704
16705	// STBRX can only handle simple types and it makes no sense to store less
16706	// two bytes in byte-reversed order.
16707	EVT mVT = cast<StoreSDNode>(Val: N)->getMemoryVT();
16708	if (mVT.isExtended() \|\| mVT.getSizeInBits() < `16`)
16709	break;
16710
16711	SDValue BSwapOp = N->getOperand(Num: `1`).getOperand(i: `0`);
16712	// Do an any-extend to 32-bits if this is a half-word input.
16713	if (BSwapOp.getValueType() == MVT::i16)
16714	BSwapOp = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i32, Operand: BSwapOp);
16715
16716	// If the type of BSWAP operand is wider than stored memory width
16717	// it need to be shifted to the right side before STBRX.
16718	if (Op1VT.bitsGT(VT: mVT)) {
16719	int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
16720	BSwapOp = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: Op1VT, N1: BSwapOp,
16721	N2: DAG.getConstant(Val: Shift, DL: dl, VT: MVT::i32));
16722	// Need to truncate if this is a bswap of i64 stored as i32/i16.
16723	if (Op1VT == MVT::i64)
16724	BSwapOp = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: BSwapOp);
16725	}
16726
16727	SDValue Ops[] = {
16728	N->getOperand(Num: `0`), BSwapOp, N->getOperand(Num: `2`), DAG.getValueType(mVT)
16729	};
16730	return
16731	DAG.getMemIntrinsicNode(Opcode: PPCISD::STBRX, dl, VTList: DAG.getVTList(VT: MVT::Other),
16732	Ops, MemVT: cast<StoreSDNode>(Val: N)->getMemoryVT(),
16733	MMO: cast<StoreSDNode>(Val: N)->getMemOperand());
16734	}
16735
16736	// STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
16737	// So it can increase the chance of CSE constant construction.
16738	if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
16739	isa<ConstantSDNode>(Val: N->getOperand(Num: `1`)) && Op1VT == MVT::i32) {
16740	// Need to sign-extended to 64-bits to handle negative values.
16741	EVT MemVT = cast<StoreSDNode>(Val: N)->getMemoryVT();
16742	uint64_t Val64 = SignExtend64(X: N->getConstantOperandVal(Num: `1`),
16743	B: MemVT.getSizeInBits());
16744	SDValue Const64 = DAG.getConstant(Val: Val64, DL: dl, VT: MVT::i64);
16745
16746	auto *ST = cast<StoreSDNode>(Val: N);
16747	SDValue NewST = DAG.getStore(Chain: ST->getChain(), dl, Val: Const64,
16748	Ptr: ST->getBasePtr(), Offset: ST->getOffset(), SVT: MemVT,
16749	MMO: ST->getMemOperand(), AM: ST->getAddressingMode(),
16750	/IsTruncating=/true);
16751	// Note we use CombineTo here to prevent DAGCombiner from visiting the
16752	// new store which will change the constant by removing non-demanded bits.
16753	return ST->isUnindexed()
16754	? DCI.CombineTo(N, Res: NewST, /AddTo=/false)
16755	: DCI.CombineTo(N, Res0: NewST, Res1: NewST.getValue(R: `1`), /AddTo=/false);
16756	}
16757
16758	// For little endian, VSX stores require generating xxswapd/lxvd2x.
16759	// Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16760	if (Op1VT.isSimple()) {
16761	MVT StoreVT = Op1VT.getSimpleVT();
16762	if (Subtarget.needsSwapsForVSXMemOps() &&
16763	(StoreVT == MVT::v2f64 \|\| StoreVT == MVT::v2i64 \|\|
16764	StoreVT == MVT::v4f32 \|\| StoreVT == MVT::v4i32))
16765	return expandVSXStoreForLE(N, DCI);
16766	}
16767	break;
16768	}
16769	case ISD::LOAD: {
16770	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
16771	EVT VT = LD->getValueType(ResNo: `0`);
16772
16773	// For little endian, VSX loads require generating lxvd2x/xxswapd.
16774	// Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16775	if (VT.isSimple()) {
16776	MVT LoadVT = VT.getSimpleVT();
16777	if (Subtarget.needsSwapsForVSXMemOps() &&
16778	(LoadVT == MVT::v2f64 \|\| LoadVT == MVT::v2i64 \|\|
16779	LoadVT == MVT::v4f32 \|\| LoadVT == MVT::v4i32))
16780	return expandVSXLoadForLE(N, DCI);
16781	}
16782
16783	// We sometimes end up with a 64-bit integer load, from which we extract
16784	// two single-precision floating-point numbers. This happens with
16785	// std::complex<float>, and other similar structures, because of the way we
16786	// canonicalize structure copies. However, if we lack direct moves,
16787	// then the final bitcasts from the extracted integer values to the
16788	// floating-point numbers turn into store/load pairs. Even with direct moves,
16789	// just loading the two floating-point numbers is likely better.
16790	auto ReplaceTwoFloatLoad = [&]() {
16791	if (VT != MVT::i64)
16792	return false;
16793
16794	if (LD->getExtensionType() != ISD::NON_EXTLOAD \|\|
16795	LD->isVolatile())
16796	return false;
16797
16798	// We're looking for a sequence like this:
16799	// t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
16800	// t16: i64 = srl t13, Constant:i32<32>
16801	// t17: i32 = truncate t16
16802	// t18: f32 = bitcast t17
16803	// t19: i32 = truncate t13
16804	// t20: f32 = bitcast t19
16805
16806	if (!LD->hasNUsesOfValue(NUses: `2`, Value: `0`))
16807	return false;
16808
16809	auto UI = LD->user_begin();
16810	while (UI.getUse().getResNo() != `0`) ++UI;
16811	SDNode Trunc = UI ++;
16812	while (UI.getUse().getResNo() != `0`) ++UI;
16813	SDNode RightShift = UI;
16814	if (Trunc->getOpcode() != ISD::TRUNCATE)
16815	std::swap(a&: Trunc, b&: RightShift);
16816
16817	if (Trunc->getOpcode() != ISD::TRUNCATE \|\|
16818	Trunc->getValueType(ResNo: `0`) != MVT::i32 \|\|
16819	!Trunc->hasOneUse())
16820	return false;
16821	if (RightShift->getOpcode() != ISD::SRL \|\|
16822	!isa<ConstantSDNode>(Val: RightShift->getOperand(Num: `1`)) \|\|
16823	RightShift->getConstantOperandVal(Num: `1`) != `32` \|\|
16824	!RightShift->hasOneUse())
16825	return false;
16826
16827	SDNode Trunc2 = RightShift->user_begin();
16828	if (Trunc2->getOpcode() != ISD::TRUNCATE \|\|
16829	Trunc2->getValueType(ResNo: `0`) != MVT::i32 \|\|
16830	!Trunc2->hasOneUse())
16831	return false;
16832
16833	SDNode Bitcast = Trunc->user_begin();
16834	SDNode Bitcast2 = Trunc2->user_begin();
16835
16836	if (Bitcast->getOpcode() != ISD::BITCAST \|\|
16837	Bitcast->getValueType(ResNo: `0`) != MVT::f32)
16838	return false;
16839	if (Bitcast2->getOpcode() != ISD::BITCAST \|\|
16840	Bitcast2->getValueType(ResNo: `0`) != MVT::f32)
16841	return false;
16842
16843	if (Subtarget.isLittleEndian())
16844	std::swap(a&: Bitcast, b&: Bitcast2);
16845
16846	// Bitcast has the second float (in memory-layout order) and Bitcast2
16847	// has the first one.
16848
16849	SDValue BasePtr = LD->getBasePtr();
16850	if (LD->isIndexed()) {
16851	assert(LD->getAddressingMode() == ISD::PRE_INC &&
16852	"Non-pre-inc AM on PPC?");
16853	BasePtr =
16854	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
16855	N2: LD->getOffset());
16856	}
16857
16858	auto MMOFlags =
16859	LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
16860	SDValue FloatLoad = DAG.getLoad(VT: MVT::f32, dl, Chain: LD->getChain(), Ptr: BasePtr,
16861	PtrInfo: LD->getPointerInfo(), Alignment: LD->getAlign(),
16862	MMOFlags, AAInfo: LD->getAAInfo());
16863	SDValue AddPtr =
16864	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(),
16865	N1: BasePtr, N2: DAG.getIntPtrConstant(Val: `4`, DL: dl));
16866	SDValue FloatLoad2 = DAG.getLoad(
16867	VT: MVT::f32, dl, Chain: SDValue (FloatLoad.getNode(), `1`), Ptr: AddPtr,
16868	PtrInfo: LD->getPointerInfo().getWithOffset(O: `4`),
16869	Alignment: commonAlignment(A: LD->getAlign(), Offset: `4`), MMOFlags, AAInfo: LD->getAAInfo());
16870
16871	if (LD->isIndexed()) {
16872	// Note that DAGCombine should re-form any pre-increment load(s) from
16873	// what is produced here if that makes sense.
16874	DAG.ReplaceAllUsesOfValueWith(From: SDValue (LD, `1`), To: BasePtr);
16875	}
16876
16877	DCI.CombineTo(N: Bitcast2, Res: FloatLoad);
16878	DCI.CombineTo(N: Bitcast, Res: FloatLoad2);
16879
16880	DAG.ReplaceAllUsesOfValueWith(From: SDValue (LD, LD->isIndexed() ? `2` : `1`),
16881	To: SDValue (FloatLoad2.getNode(), `1`));
16882	return true;
16883	};
16884
16885	if (ReplaceTwoFloatLoad ())
16886	return SDValue (N, `0`);
16887
16888	EVT MemVT = LD->getMemoryVT();
16889	Type Ty = MemVT.getTypeForEVT(Context&: DAG.getContext());
16890	Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
16891	if (LD->isUnindexed() && VT.isVector() &&
16892	((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
16893	// P8 and later hardware should just use LOAD.
16894	!Subtarget.hasP8Vector() &&
16895	(VT == MVT::v16i8 \|\| VT == MVT::v8i16 \|\| VT == MVT::v4i32 \|\|
16896	VT == MVT::v4f32))) &&
16897	LD->getAlign() < ABIAlignment) {
16898	// This is a type-legal unaligned Altivec load.
16899	SDValue Chain = LD->getChain();
16900	SDValue Ptr = LD->getBasePtr();
16901	bool isLittleEndian = Subtarget.isLittleEndian();
16902
16903	// This implements the loading of unaligned vectors as described in
16904	// the venerable Apple Velocity Engine overview. Specifically:
16905	// https://developer.apple.com/hardwaredrivers/ve/alignment.html
16906	// https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
16907	//
16908	// The general idea is to expand a sequence of one or more unaligned
16909	// loads into an alignment-based permutation-control instruction (lvsl
16910	// or lvsr), a series of regular vector loads (which always truncate
16911	// their input address to an aligned address), and a series of
16912	// permutations. The results of these permutations are the requested
16913	// loaded values. The trick is that the last "extra" load is not taken
16914	// from the address you might suspect (sizeof(vector) bytes after the
16915	// last requested load), but rather sizeof(vector) - 1 bytes after the
16916	// last requested vector. The point of this is to avoid a page fault if
16917	// the base address happened to be aligned. This works because if the
16918	// base address is aligned, then adding less than a full vector length
16919	// will cause the last vector in the sequence to be (re)loaded.
16920	// Otherwise, the next vector will be fetched as you might suspect was
16921	// necessary.
16922
16923	// We might be able to reuse the permutation generation from
16924	// a different base address offset from this one by an aligned amount.
16925	// The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
16926	// optimization later.
16927	Intrinsic::ID Intr, IntrLD, IntrPerm;
16928	MVT PermCntlTy, PermTy, LDTy;
16929	Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16930	: Intrinsic::ppc_altivec_lvsl;
16931	IntrLD = Intrinsic::ppc_altivec_lvx;
16932	IntrPerm = Intrinsic::ppc_altivec_vperm;
16933	PermCntlTy = MVT::v16i8;
16934	PermTy = MVT::v4i32;
16935	LDTy = MVT::v4i32;
16936
16937	SDValue PermCntl = BuildIntrinsicOp(IID: Intr, Op: Ptr, DAG, dl, DestVT: PermCntlTy);
16938
16939	// Create the new MMO for the new base load. It is like the original MMO,
16940	// but represents an area in memory almost twice the vector size centered
16941	// on the original address. If the address is unaligned, we might start
16942	// reading up to (sizeof(vector)-1) bytes below the address of the
16943	// original unaligned load.
16944	MachineFunction &MF = DAG.getMachineFunction();
16945	MachineMemOperand *BaseMMO =
16946	MF.getMachineMemOperand(MMO: LD->getMemOperand(),
16947	Offset: -(int64_t)MemVT.getStoreSize()+`1`,
16948	Size: `2`*MemVT.getStoreSize()-`1`);
16949
16950	// Create the new base load.
16951	SDValue LDXIntID =
16952	DAG.getTargetConstant(Val: IntrLD, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()));
16953	SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
16954	SDValue BaseLoad =
16955	DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl,
16956	VTList: DAG.getVTList(VT1: PermTy, VT2: MVT::Other),
16957	Ops: BaseLoadOps, MemVT: LDTy, MMO: BaseMMO);
16958
16959	// Note that the value of IncOffset (which is provided to the next
16960	// load's pointer info offset value, and thus used to calculate the
16961	// alignment), and the value of IncValue (which is actually used to
16962	// increment the pointer value) are different! This is because we
16963	// require the next load to appear to be aligned, even though it
16964	// is actually offset from the base pointer by a lesser amount.
16965	int IncOffset = VT.getSizeInBits() / `8`;
16966	int IncValue = IncOffset;
16967
16968	// Walk (both up and down) the chain looking for another load at the real
16969	// (aligned) offset (the alignment of the other load does not matter in
16970	// this case). If found, then do not use the offset reduction trick, as
16971	// that will prevent the loads from being later combined (as they would
16972	// otherwise be duplicates).
16973	if (!findConsecutiveLoad(LD, DAG))
16974	--IncValue;
16975
16976	SDValue Increment =
16977	DAG.getConstant(Val: IncValue, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()));
16978	Ptr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: Ptr.getValueType(), N1: Ptr, N2: Increment);
16979
16980	MachineMemOperand *ExtraMMO =
16981	MF.getMachineMemOperand(MMO: LD->getMemOperand(),
16982	Offset: `1`, Size: `2`*MemVT.getStoreSize()-`1`);
16983	SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
16984	SDValue ExtraLoad =
16985	DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl,
16986	VTList: DAG.getVTList(VT1: PermTy, VT2: MVT::Other),
16987	Ops: ExtraLoadOps, MemVT: LDTy, MMO: ExtraMMO);
16988
16989	SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other,
16990	N1: BaseLoad.getValue(R: `1`), N2: ExtraLoad.getValue(R: `1`));
16991
16992	// Because vperm has a big-endian bias, we must reverse the order
16993	// of the input vectors and complement the permute control vector
16994	// when generating little endian code. We have already handled the
16995	// latter by using lvsr instead of lvsl, so just reverse BaseLoad
16996	// and ExtraLoad here.
16997	SDValue Perm;
16998	if (isLittleEndian)
16999	Perm = BuildIntrinsicOp(IID: IntrPerm,
17000	Op0: ExtraLoad, Op1: BaseLoad, Op2: PermCntl, DAG, dl);
17001	else
17002	Perm = BuildIntrinsicOp(IID: IntrPerm,
17003	Op0: BaseLoad, Op1: ExtraLoad, Op2: PermCntl, DAG, dl);
17004
17005	if (VT != PermTy)
17006	Perm = Subtarget.hasAltivec()
17007	? DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Perm)
17008	: DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT, N1: Perm,
17009	N2: DAG.getTargetConstant(Val: `1`, DL: dl, VT: MVT::i64));
17010	// second argument is 1 because this rounding
17011	// is always exact.
17012
17013	// The output of the permutation is our loaded result, the TokenFactor is
17014	// our new chain.
17015	DCI.CombineTo(N, Res0: Perm, Res1: TF);
17016	return SDValue (N, `0`);
17017	}
17018	}
17019	break;
17020	case ISD::INTRINSIC_WO_CHAIN: {
17021	bool isLittleEndian = Subtarget.isLittleEndian();
17022	unsigned IID = N->getConstantOperandVal(Num: `0`);
17023	Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17024	: Intrinsic::ppc_altivec_lvsl);
17025	if (IID == Intr && N->getOperand(Num: `1`)->getOpcode() == ISD::ADD) {
17026	SDValue Add = N->getOperand(Num: `1`);
17027
17028	int Bits = `4` / 16 byte alignment /;
17029
17030	if (DAG.MaskedValueIsZero(Op: Add ->getOperand(Num: `1`),
17031	Mask: APInt::getAllOnes(numBits: Bits / alignment /)
17032	.zext(width: Add.getScalarValueSizeInBits()))) {
17033	SDNode *BasePtr = Add ->getOperand(Num: `0`).getNode();
17034	for (SDNode *U : BasePtr->users()) {
17035	if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17036	U->getConstantOperandVal(Num: `0`) == IID) {
17037	// We've found another LVSL/LVSR, and this address is an aligned
17038	// multiple of that one. The results will be the same, so use the
17039	// one we've just found instead.
17040
17041	return SDValue (U, `0`);
17042	}
17043	}
17044	}
17045
17046	if (isa<ConstantSDNode>(Val: Add ->getOperand(Num: `1`))) {
17047	SDNode *BasePtr = Add ->getOperand(Num: `0`).getNode();
17048	for (SDNode *U : BasePtr->users()) {
17049	if (U->getOpcode() == ISD::ADD &&
17050	isa<ConstantSDNode>(Val: U->getOperand(Num: `1`)) &&
17051	(Add ->getConstantOperandVal(Num: `1`) - U->getConstantOperandVal(Num: `1`)) %
17052	(`1ULL` << Bits) ==
17053	`0`) {
17054	SDNode *OtherAdd = U;
17055	for (SDNode *V : OtherAdd->users()) {
17056	if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17057	V->getConstantOperandVal(Num: `0`) == IID) {
17058	return SDValue (V, `0`);
17059	}
17060	}
17061	}
17062	}
17063	}
17064	}
17065
17066	// Combine vmaxsw/h/b(a, a's negation) to abs(a)
17067	// Expose the vabsduw/h/b opportunity for down stream
17068	if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
17069	(IID == Intrinsic::ppc_altivec_vmaxsw \|\|
17070	IID == Intrinsic::ppc_altivec_vmaxsh \|\|
17071	IID == Intrinsic::ppc_altivec_vmaxsb)) {
17072	SDValue V1 = N->getOperand(Num: `1`);
17073	SDValue V2 = N->getOperand(Num: `2`);
17074	if ((V1.getSimpleValueType() == MVT::v4i32 \|\|
17075	V1.getSimpleValueType() == MVT::v8i16 \|\|
17076	V1.getSimpleValueType() == MVT::v16i8) &&
17077	V1.getSimpleValueType() == V2.getSimpleValueType()) {
17078	// (0-a, a)
17079	if (V1.getOpcode() == ISD::SUB &&
17080	ISD::isBuildVectorAllZeros(N: V1.getOperand(i: `0`).getNode()) &&
17081	V1.getOperand(i: `1`) == V2) {
17082	return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: V2.getValueType(), Operand: V2);
17083	}
17084	// (a, 0-a)
17085	if (V2.getOpcode() == ISD::SUB &&
17086	ISD::isBuildVectorAllZeros(N: V2.getOperand(i: `0`).getNode()) &&
17087	V2.getOperand(i: `1`) == V1) {
17088	return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: V1.getValueType(), Operand: V1);
17089	}
17090	// (x-y, y-x)
17091	if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
17092	V1.getOperand(i: `0`) == V2.getOperand(i: `1`) &&
17093	V1.getOperand(i: `1`) == V2.getOperand(i: `0`)) {
17094	return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: V1.getValueType(), Operand: V1);
17095	}
17096	}
17097	}
17098	}
17099
17100	break;
17101	case ISD::INTRINSIC_W_CHAIN:
17102	switch (N->getConstantOperandVal(Num: `1`)) {
17103	default:
17104	break;
17105	case Intrinsic::ppc_altivec_vsum4sbs:
17106	case Intrinsic::ppc_altivec_vsum4shs:
17107	case Intrinsic::ppc_altivec_vsum4ubs: {
17108	// These sum-across intrinsics only have a chain due to the side effect
17109	// that they may set the SAT bit. If we know the SAT bit will not be set
17110	// for some inputs, we can replace any uses of their chain with the
17111	// input chain.
17112	if (BuildVectorSDNode *BVN =
17113	dyn_cast<BuildVectorSDNode>(Val: N->getOperand(Num: `3`))) {
17114	APInt APSplatBits, APSplatUndef;
17115	unsigned SplatBitSize;
17116	bool HasAnyUndefs;
17117	bool BVNIsConstantSplat = BVN->isConstantSplat(
17118	SplatValue&: APSplatBits, SplatUndef&: APSplatUndef, SplatBitSize, HasAnyUndefs, MinSplatBits: `0`,
17119	isBigEndian: !Subtarget.isLittleEndian());
17120	// If the constant splat vector is 0, the SAT bit will not be set.
17121	if (BVNIsConstantSplat && APSplatBits == `0`)
17122	DAG.ReplaceAllUsesOfValueWith(From: SDValue (N, `1`), To: N->getOperand(Num: `0`));
17123	}
17124	return SDValue ();
17125	}
17126	case Intrinsic::ppc_vsx_lxvw4x:
17127	case Intrinsic::ppc_vsx_lxvd2x:
17128	// For little endian, VSX loads require generating lxvd2x/xxswapd.
17129	// Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17130	if (Subtarget.needsSwapsForVSXMemOps())
17131	return expandVSXLoadForLE(N, DCI);
17132	break;
17133	}
17134	break;
17135	case ISD::INTRINSIC_VOID:
17136	// For little endian, VSX stores require generating xxswapd/stxvd2x.
17137	// Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17138	if (Subtarget.needsSwapsForVSXMemOps()) {
17139	switch (N->getConstantOperandVal(Num: `1`)) {
17140	default:
17141	break;
17142	case Intrinsic::ppc_vsx_stxvw4x:
17143	case Intrinsic::ppc_vsx_stxvd2x:
17144	return expandVSXStoreForLE(N, DCI);
17145	}
17146	}
17147	break;
17148	case ISD::BSWAP: {
17149	// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
17150	// For subtargets without LDBRX, we can still do better than the default
17151	// expansion even for 64-bit BSWAP (LOAD).
17152	bool Is64BitBswapOn64BitTgt =
17153	Subtarget.isPPC64() && N->getValueType(ResNo: `0`) == MVT::i64;
17154	bool IsSingleUseNormalLd = ISD::isNormalLoad(N: N->getOperand(Num: `0`).getNode()) &&
17155	N->getOperand(Num: `0`).hasOneUse();
17156	if (IsSingleUseNormalLd &&
17157	(N->getValueType(ResNo: `0`) == MVT::i32 \|\| N->getValueType(ResNo: `0`) == MVT::i16 \|\|
17158	(Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
17159	SDValue Load = N->getOperand(Num: `0`);
17160	LoadSDNode *LD = cast<LoadSDNode>(Val&: Load);
17161	// Create the byte-swapping load.
17162	SDValue Ops[] = {
17163	LD->getChain(), // Chain
17164	LD->getBasePtr(), // Ptr
17165	DAG.getValueType(N->getValueType(ResNo: `0`)) // VT
17166	};
17167	SDValue BSLoad =
17168	DAG.getMemIntrinsicNode(Opcode: PPCISD::LBRX, dl,
17169	VTList: DAG.getVTList(VT1: N->getValueType(ResNo: `0`) == MVT::i64 ?
17170	MVT::i64 : MVT::i32, VT2: MVT::Other),
17171	Ops, MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
17172
17173	// If this is an i16 load, insert the truncate.
17174	SDValue ResVal = BSLoad;
17175	if (N->getValueType(ResNo: `0`) == MVT::i16)
17176	ResVal = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i16, Operand: BSLoad);
17177
17178	// First, combine the bswap away. This makes the value produced by the
17179	// load dead.
17180	DCI.CombineTo(N, Res: ResVal);
17181
17182	// Next, combine the load away, we give it a bogus result value but a real
17183	// chain result. The result value is dead because the bswap is dead.
17184	DCI.CombineTo(N: Load.getNode(), Res0: ResVal, Res1: BSLoad.getValue(R: `1`));
17185
17186	// Return N so it doesn't get rechecked!
17187	return SDValue (N, `0`);
17188	}
17189	// Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
17190	// before legalization so that the BUILD_PAIR is handled correctly.
17191	if (!DCI.isBeforeLegalize() \|\| !Is64BitBswapOn64BitTgt \|\|
17192	!IsSingleUseNormalLd)
17193	return SDValue ();
17194	LoadSDNode *LD = cast<LoadSDNode>(Val: N->getOperand(Num: `0`));
17195
17196	// Can't split volatile or atomic loads.
17197	if (!LD->isSimple())
17198	return SDValue ();
17199	SDValue BasePtr = LD->getBasePtr();
17200	SDValue Lo = DAG.getLoad(VT: MVT::i32, dl, Chain: LD->getChain(), Ptr: BasePtr,
17201	PtrInfo: LD->getPointerInfo(), Alignment: LD->getAlign());
17202	Lo = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::i32, Operand: Lo);
17203	BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
17204	N2: DAG.getIntPtrConstant(Val: `4`, DL: dl));
17205	MachineMemOperand *NewMMO = DAG.getMachineFunction().getMachineMemOperand(
17206	MMO: LD->getMemOperand(), Offset: `4`, Size: `4`);
17207	SDValue Hi = DAG.getLoad(VT: MVT::i32, dl, Chain: LD->getChain(), Ptr: BasePtr, MMO: NewMMO);
17208	Hi = DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT: MVT::i32, Operand: Hi);
17209	SDValue Res;
17210	if (Subtarget.isLittleEndian())
17211	Res = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::i64, N1: Hi, N2: Lo);
17212	else
17213	Res = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: MVT::i64, N1: Lo, N2: Hi);
17214	SDValue TF =
17215	DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other,
17216	N1: Hi.getOperand(i: `0`).getValue(R: `1`), N2: Lo.getOperand(i: `0`).getValue(R: `1`));
17217	DAG.ReplaceAllUsesOfValueWith(From: SDValue (LD, `1`), To: TF);
17218	return Res;
17219	}
17220	case PPCISD::VCMP:
17221	// If a VCMP_rec node already exists with exactly the same operands as this
17222	// node, use its result instead of this node (VCMP_rec computes both a CR6
17223	// and a normal output).
17224	//
17225	if (!N->getOperand(Num: `0`).hasOneUse() &&
17226	!N->getOperand(Num: `1`).hasOneUse() &&
17227	!N->getOperand(Num: `2`).hasOneUse()) {
17228
17229	// Scan all of the users of the LHS, looking for VCMP_rec's that match.
17230	SDNode VCMPrecNode = nullptr*;
17231
17232	SDNode *LHSN = N->getOperand(Num: `0`).getNode();
17233	for (SDNode *User : LHSN->users())
17234	if (User->getOpcode() == PPCISD::VCMP_rec &&
17235	User->getOperand(Num: `1`) == N->getOperand(Num: `1`) &&
17236	User->getOperand(Num: `2`) == N->getOperand(Num: `2`) &&
17237	User->getOperand(Num: `0`) == N->getOperand(Num: `0`)) {
17238	VCMPrecNode = User;
17239	break;
17240	}
17241
17242	// If there is no VCMP_rec node, or if the flag value has a single use,
17243	// don't transform this.
17244	if (!VCMPrecNode \|\| VCMPrecNode->hasNUsesOfValue(NUses: `0`, Value: `1`))
17245	break;
17246
17247	// Look at the (necessarily single) use of the flag value. If it has a
17248	// chain, this transformation is more complex. Note that multiple things
17249	// could use the value result, which we should ignore.
17250	SDNode FlagUser = nullptr*;
17251	for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
17252	FlagUser == nullptr; ++UI) {
17253	assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
17254	SDNode *User = UI ->getUser();
17255	for (unsigned i = `0`, e = User->getNumOperands(); i != e; ++i) {
17256	if (User->getOperand(Num: i) == SDValue (VCMPrecNode, `1`)) {
17257	FlagUser = User;
17258	break;
17259	}
17260	}
17261	}
17262
17263	// If the user is a MFOCRF instruction, we know this is safe.
17264	// Otherwise we give up for right now.
17265	if (FlagUser->getOpcode() == PPCISD::MFOCRF)
17266	return SDValue (VCMPrecNode, `0`);
17267	}
17268	break;
17269	case ISD::BR_CC: {
17270	// If this is a branch on an altivec predicate comparison, lower this so
17271	// that we don't have to do a MFOCRF: instead, branch directly on CR6. This
17272	// lowering is done pre-legalize, because the legalizer lowers the predicate
17273	// compare down to code that is difficult to reassemble.
17274	// This code also handles branches that depend on the result of a store
17275	// conditional.
17276	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `1`))->get();
17277	SDValue LHS = N->getOperand(Num: `2`), RHS = N->getOperand(Num: `3`);
17278
17279	int CompareOpc;
17280	bool isDot;
17281
17282	if (!isa<ConstantSDNode>(Val: RHS) \|\| (CC != ISD::SETEQ && CC != ISD::SETNE))
17283	break;
17284
17285	// Since we are doing this pre-legalize, the RHS can be a constant of
17286	// arbitrary bitwidth which may cause issues when trying to get the value
17287	// from the underlying APInt.
17288	auto RHSAPInt = RHS ->getAsAPIntVal();
17289	if (!RHSAPInt.isIntN(N: `64`))
17290	break;
17291
17292	unsigned Val = RHSAPInt.getZExtValue();
17293	auto isImpossibleCompare = [&]() {
17294	// If this is a comparison against something other than 0/1, then we know
17295	// that the condition is never/always true.
17296	if (Val != `0` && Val != `1`) {
17297	if (CC == ISD::SETEQ) // Cond never true, remove branch.
17298	return N->getOperand(Num: `0`);
17299	// Always !=, turn it into an unconditional branch.
17300	return DAG.getNode(Opcode: ISD::BR, DL: dl, VT: MVT::Other,
17301	N1: N->getOperand(Num: `0`), N2: N->getOperand(Num: `4`));
17302	}
17303	return SDValue ();
17304	};
17305	// Combine branches fed by store conditional instructions (st[bhwd]cx).
17306	unsigned StoreWidth = `0`;
17307	if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
17308	isStoreConditional(Intrin: LHS, StoreWidth)) {
17309	if (SDValue Impossible = isImpossibleCompare ())
17310	return Impossible;
17311	PPC::Predicate CompOpc;
17312	// eq 0 => ne
17313	// ne 0 => eq
17314	// eq 1 => eq
17315	// ne 1 => ne
17316	if (Val == `0`)
17317	CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
17318	else
17319	CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
17320
17321	SDValue Ops[] = {LHS.getOperand(i: `0`), LHS.getOperand(i: `2`), LHS.getOperand(i: `3`),
17322	DAG.getConstant(Val: StoreWidth, DL: dl, VT: MVT::i32)};
17323	auto *MemNode = cast<MemSDNode>(Val&: LHS);
17324	SDValue ConstSt = DAG.getMemIntrinsicNode(
17325	Opcode: PPCISD::STORE_COND, dl,
17326	VTList: DAG.getVTList(VT1: MVT::i32, VT2: MVT::Other, VT3: MVT::Glue), Ops,
17327	MemVT: MemNode->getMemoryVT(), MMO: MemNode->getMemOperand());
17328
17329	SDValue InChain;
17330	// Unchain the branch from the original store conditional.
17331	if (N->getOperand(Num: `0`) == LHS.getValue(R: `1`))
17332	InChain = LHS.getOperand(i: `0`);
17333	else if (N->getOperand(Num: `0`).getOpcode() == ISD::TokenFactor) {
17334	SmallVector<SDValue, `4`> InChains;
17335	SDValue InTF = N->getOperand(Num: `0`);
17336	for (int i = `0`, e = InTF.getNumOperands(); i < e; i++)
17337	if (InTF.getOperand(i) != LHS.getValue(R: `1`))
17338	InChains.push_back(Elt: InTF.getOperand(i));
17339	InChain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: InChains);
17340	}
17341
17342	return DAG.getNode(Opcode: PPCISD::COND_BRANCH, DL: dl, VT: MVT::Other, N1: InChain,
17343	N2: DAG.getConstant(Val: CompOpc, DL: dl, VT: MVT::i32),
17344	N3: DAG.getRegister(Reg: PPC::CR0, VT: MVT::i32), N4: N->getOperand(Num: `4`),
17345	N5: ConstSt.getValue(R: `2`));
17346	}
17347
17348	if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17349	getVectorCompareInfo(Intrin: LHS, CompareOpc, isDot, Subtarget)) {
17350	assert(isDot && "Can't compare against a vector result!");
17351
17352	if (SDValue Impossible = isImpossibleCompare ())
17353	return Impossible;
17354
17355	bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == `0`);
17356	// Create the PPCISD altivec 'dot' comparison node.
17357	SDValue Ops[] = {
17358	LHS.getOperand(i: `2`), // LHS of compare
17359	LHS.getOperand(i: `3`), // RHS of compare
17360	DAG.getConstant(Val: CompareOpc, DL: dl, VT: MVT::i32)
17361	};
17362	EVT VTs[] = { LHS.getOperand(i: `2`).getValueType(), MVT::Glue };
17363	SDValue CompNode = DAG.getNode(Opcode: PPCISD::VCMP_rec, DL: dl, ResultTys: VTs, Ops);
17364
17365	// Unpack the result based on how the target uses it.
17366	PPC::Predicate CompOpc;
17367	switch (LHS.getConstantOperandVal(i: `1`)) {
17368	default: // Can't happen, don't crash on invalid number though.
17369	case `0`: // Branch on the value of the EQ bit of CR6.
17370	CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
17371	break;
17372	case `1`: // Branch on the inverted value of the EQ bit of CR6.
17373	CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
17374	break;
17375	case `2`: // Branch on the value of the LT bit of CR6.
17376	CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
17377	break;
17378	case `3`: // Branch on the inverted value of the LT bit of CR6.
17379	CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
17380	break;
17381	}
17382
17383	return DAG.getNode(Opcode: PPCISD::COND_BRANCH, DL: dl, VT: MVT::Other, N1: N->getOperand(Num: `0`),
17384	N2: DAG.getConstant(Val: CompOpc, DL: dl, VT: MVT::i32),
17385	N3: DAG.getRegister(Reg: PPC::CR6, VT: MVT::i32),
17386	N4: N->getOperand(Num: `4`), N5: CompNode.getValue(R: `1`));
17387	}
17388	break;
17389	}
17390	case ISD::BUILD_VECTOR:
17391	return DAGCombineBuildVector(N, DCI);
17392	case PPCISD::ADDC:
17393	return DAGCombineAddc(N, DCI);
17394	}
17395
17396	return SDValue ();
17397	}
17398
17399	SDValue
17400	PPCTargetLowering::BuildSDIVPow2(SDNode N, const* APInt &Divisor,
17401	SelectionDAG &DAG,
17402	SmallVectorImpl<SDNode > &Created) const* {
17403	// fold (sdiv X, pow2)
17404	EVT VT = N->getValueType(ResNo: `0`);
17405	if (VT == MVT::i64 && !Subtarget.isPPC64())
17406	return SDValue ();
17407	if ((VT != MVT::i32 && VT != MVT::i64) \|\|
17408	!(Divisor.isPowerOf2() \|\| Divisor.isNegatedPowerOf2()))
17409	return SDValue ();
17410
17411	SDLoc DL(N);
17412	SDValue N0 = N->getOperand(Num: `0`);
17413
17414	bool IsNegPow2 = Divisor.isNegatedPowerOf2();
17415	unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
17416	SDValue ShiftAmt = DAG.getConstant(Val: Lg2, DL, VT);
17417
17418	SDValue Op = DAG.getNode(Opcode: PPCISD::SRA_ADDZE, DL, VT, N1: N0, N2: ShiftAmt);
17419	Created.push_back(Elt: Op.getNode());
17420
17421	if (IsNegPow2) {
17422	Op = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: `0`, DL, VT), N2: Op);
17423	Created.push_back(Elt: Op.getNode());
17424	}
17425
17426	return Op;
17427	}
17428
17429	//===----------------------------------------------------------------------===//
17430	// Inline Assembly Support
17431	//===----------------------------------------------------------------------===//
17432
17433	void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
17434	KnownBits &Known,
17435	const APInt &DemandedElts,
17436	const SelectionDAG &DAG,
17437	unsigned Depth) const {
17438	Known.resetAll();
17439	switch (Op.getOpcode()) {
17440	default: break;
17441	case PPCISD::LBRX: {
17442	// lhbrx is known to have the top bits cleared out.
17443	if (cast<VTSDNode>(Val: Op.getOperand(i: `2`))->getVT() == MVT::i16)
17444	Known.Zero = `0xFFFF0000`;
17445	break;
17446	}
17447	case PPCISD::ADDE: {
17448	if (Op.getResNo() == `0`) {
17449	// (0\|1), _ = ADDE 0, 0, CARRY
17450	SDValue LHS = Op.getOperand(i: `0`);
17451	SDValue RHS = Op.getOperand(i: `1`);
17452	if (isNullConstant(V: LHS) && isNullConstant(V: RHS))
17453	Known.Zero = ~`1ULL`;
17454	}
17455	break;
17456	}
17457	case ISD::INTRINSIC_WO_CHAIN: {
17458	switch (Op.getConstantOperandVal(i: `0`)) {
17459	default: break;
17460	case Intrinsic::ppc_altivec_vcmpbfp_p:
17461	case Intrinsic::ppc_altivec_vcmpeqfp_p:
17462	case Intrinsic::ppc_altivec_vcmpequb_p:
17463	case Intrinsic::ppc_altivec_vcmpequh_p:
17464	case Intrinsic::ppc_altivec_vcmpequw_p:
17465	case Intrinsic::ppc_altivec_vcmpequd_p:
17466	case Intrinsic::ppc_altivec_vcmpequq_p:
17467	case Intrinsic::ppc_altivec_vcmpgefp_p:
17468	case Intrinsic::ppc_altivec_vcmpgtfp_p:
17469	case Intrinsic::ppc_altivec_vcmpgtsb_p:
17470	case Intrinsic::ppc_altivec_vcmpgtsh_p:
17471	case Intrinsic::ppc_altivec_vcmpgtsw_p:
17472	case Intrinsic::ppc_altivec_vcmpgtsd_p:
17473	case Intrinsic::ppc_altivec_vcmpgtsq_p:
17474	case Intrinsic::ppc_altivec_vcmpgtub_p:
17475	case Intrinsic::ppc_altivec_vcmpgtuh_p:
17476	case Intrinsic::ppc_altivec_vcmpgtuw_p:
17477	case Intrinsic::ppc_altivec_vcmpgtud_p:
17478	case Intrinsic::ppc_altivec_vcmpgtuq_p:
17479	Known.Zero = ~`1U`; // All bits but the low one are known to be zero.
17480	break;
17481	}
17482	break;
17483	}
17484	case ISD::INTRINSIC_W_CHAIN: {
17485	switch (Op.getConstantOperandVal(i: `1`)) {
17486	default:
17487	break;
17488	case Intrinsic::ppc_load2r:
17489	// Top bits are cleared for load2r (which is the same as lhbrx).
17490	Known.Zero = `0xFFFF0000`;
17491	break;
17492	}
17493	break;
17494	}
17495	}
17496	}
17497
17498	Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop ML) const* {
17499	switch (Subtarget.getCPUDirective()) {
17500	default: break;
17501	case PPC::DIR_970:
17502	case PPC::DIR_PWR4:
17503	case PPC::DIR_PWR5:
17504	case PPC::DIR_PWR5X:
17505	case PPC::DIR_PWR6:
17506	case PPC::DIR_PWR6X:
17507	case PPC::DIR_PWR7:
17508	case PPC::DIR_PWR8:
17509	case PPC::DIR_PWR9:
17510	case PPC::DIR_PWR10:
17511	case PPC::DIR_PWR11:
17512	case PPC::DIR_PWR_FUTURE: {
17513	if (!ML)
17514	break;
17515
17516	if (!DisableInnermostLoopAlign32) {
17517	// If the nested loop is an innermost loop, prefer to a 32-byte alignment,
17518	// so that we can decrease cache misses and branch-prediction misses.
17519	// Actual alignment of the loop will depend on the hotness check and other
17520	// logic in alignBlocks.
17521	if (ML->getLoopDepth() > `1` && ML->getSubLoops().empty())
17522	return Align (`32`);
17523	}
17524
17525	const PPCInstrInfo *TII = Subtarget.getInstrInfo();
17526
17527	// For small loops (between 5 and 8 instructions), align to a 32-byte
17528	// boundary so that the entire loop fits in one instruction-cache line.
17529	uint64_t LoopSize = `0`;
17530	for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
17531	for (const MachineInstr &J : **I) {
17532	LoopSize += TII->getInstSizeInBytes(MI: J);
17533	if (LoopSize > `32`)
17534	break;
17535	}
17536
17537	if (LoopSize > `16` && LoopSize <= `32`)
17538	return Align (`32`);
17539
17540	break;
17541	}
17542	}
17543
17544	return TargetLowering::getPrefLoopAlignment(ML);
17545	}
17546
17547	/// getConstraintType - Given a constraint, return the type of
17548	/// constraint it is for this target.
17549	PPCTargetLowering::ConstraintType
17550	PPCTargetLowering::getConstraintType(StringRef Constraint) const {
17551	if (Constraint.size() == `1`) {
17552	switch (Constraint [`0`]) {
17553	default: break;
17554	case `'b'`:
17555	case `'r'`:
17556	case `'f'`:
17557	case `'d'`:
17558	case `'v'`:
17559	case `'y'`:
17560	return C_RegisterClass;
17561	case `'Z'`:
17562	// FIXME: While Z does indicate a memory constraint, it specifically
17563	// indicates an r+r address (used in conjunction with the 'y' modifier
17564	// in the replacement string). Currently, we're forcing the base
17565	// register to be r0 in the asm printer (which is interpreted as zero)
17566	// and forming the complete address in the second register. This is
17567	// suboptimal.
17568	return C_Memory;
17569	}
17570	} else if (Constraint == "wc") { // individual CR bits.
17571	return C_RegisterClass;
17572	} else if (Constraint == "wa" \|\| Constraint == "wd" \|\|
17573	Constraint == "wf" \|\| Constraint == "ws" \|\|
17574	Constraint == "wi" \|\| Constraint == "ww") {
17575	return C_RegisterClass; // VSX registers.
17576	}
17577	return TargetLowering::getConstraintType(Constraint);
17578	}
17579
17580	/// Examine constraint type and operand type and determine a weight value.
17581	/// This object must already have been set up with the operand type
17582	/// and the current alternative constraint selected.
17583	TargetLowering::ConstraintWeight
17584	PPCTargetLowering::getSingleConstraintMatchWeight(
17585	AsmOperandInfo &info, const char constraint) const* {
17586	ConstraintWeight weight = CW_Invalid;
17587	Value *CallOperandVal = info.CallOperandVal;
17588	// If we don't have a value, we can't do a match,
17589	// but allow it at the lowest weight.
17590	if (!CallOperandVal)
17591	return CW_Default;
17592	Type *type = CallOperandVal->getType();
17593
17594	// Look at the constraint type.
17595	if (StringRef (constraint) == "wc" && type->isIntegerTy(Bitwidth: `1`))
17596	return CW_Register; // an individual CR bit.
17597	else if ((StringRef (constraint) == "wa" \|\|
17598	StringRef (constraint) == "wd" \|\|
17599	StringRef (constraint) == "wf") &&
17600	type->isVectorTy())
17601	return CW_Register;
17602	else if (StringRef (constraint) == "wi" && type->isIntegerTy(Bitwidth: `64`))
17603	return CW_Register; // just hold 64-bit integers data.
17604	else if (StringRef (constraint) == "ws" && type->isDoubleTy())
17605	return CW_Register;
17606	else if (StringRef (constraint) == "ww" && type->isFloatTy())
17607	return CW_Register;
17608
17609	switch (*constraint) {
17610	default:
17611	weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
17612	break;
17613	case `'b'`:
17614	if (type->isIntegerTy())
17615	weight = CW_Register;
17616	break;
17617	case `'f'`:
17618	if (type->isFloatTy())
17619	weight = CW_Register;
17620	break;
17621	case `'d'`:
17622	if (type->isDoubleTy())
17623	weight = CW_Register;
17624	break;
17625	case `'v'`:
17626	if (type->isVectorTy())
17627	weight = CW_Register;
17628	break;
17629	case `'y'`:
17630	weight = CW_Register;
17631	break;
17632	case `'Z'`:
17633	weight = CW_Memory;
17634	break;
17635	}
17636	return weight;
17637	}
17638
17639	std::pair<unsigned, const TargetRegisterClass *>
17640	PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
17641	StringRef Constraint,
17642	MVT VT) const {
17643	if (Constraint.size() == `1`) {
17644	// GCC RS6000 Constraint Letters
17645	switch (Constraint [`0`]) {
17646	case `'b'`: // R1-R31
17647	if (VT == MVT::i64 && Subtarget.isPPC64())
17648	return std::make_pair(x: `0U`, y: &PPC::G8RC_NOX0RegClass);
17649	return std::make_pair(x: `0U`, y: &PPC::GPRC_NOR0RegClass);
17650	case `'r'`: // R0-R31
17651	if (VT == MVT::i64 && Subtarget.isPPC64())
17652	return std::make_pair(x: `0U`, y: &PPC::G8RCRegClass);
17653	return std::make_pair(x: `0U`, y: &PPC::GPRCRegClass);
17654	// 'd' and 'f' constraints are both defined to be "the floating point
17655	// registers", where one is for 32-bit and the other for 64-bit. We don't
17656	// really care overly much here so just give them all the same reg classes.
17657	case `'d'`:
17658	case `'f'`:
17659	if (Subtarget.hasSPE()) {
17660	if (VT == MVT::f32 \|\| VT == MVT::i32)
17661	return std::make_pair(x: `0U`, y: &PPC::GPRCRegClass);
17662	if (VT == MVT::f64 \|\| VT == MVT::i64)
17663	return std::make_pair(x: `0U`, y: &PPC::SPERCRegClass);
17664	} else {
17665	if (VT == MVT::f32 \|\| VT == MVT::i32)
17666	return std::make_pair(x: `0U`, y: &PPC::F4RCRegClass);
17667	if (VT == MVT::f64 \|\| VT == MVT::i64)
17668	return std::make_pair(x: `0U`, y: &PPC::F8RCRegClass);
17669	}
17670	break;
17671	case `'v'`:
17672	if (Subtarget.hasAltivec() && VT.isVector())
17673	return std::make_pair(x: `0U`, y: &PPC::VRRCRegClass);
17674	else if (Subtarget.hasVSX())
17675	// Scalars in Altivec registers only make sense with VSX.
17676	return std::make_pair(x: `0U`, y: &PPC::VFRCRegClass);
17677	break;
17678	case `'y'`: // crrc
17679	return std::make_pair(x: `0U`, y: &PPC::CRRCRegClass);
17680	}
17681	} else if (Constraint == "wc" && Subtarget.useCRBits()) {
17682	// An individual CR bit.
17683	return std::make_pair(x: `0U`, y: &PPC::CRBITRCRegClass);
17684	} else if ((Constraint == "wa" \|\| Constraint == "wd" \|\|
17685	Constraint == "wf" \|\| Constraint == "wi") &&
17686	Subtarget.hasVSX()) {
17687	// A VSX register for either a scalar (FP) or vector. There is no
17688	// support for single precision scalars on subtargets prior to Power8.
17689	if (VT.isVector())
17690	return std::make_pair(x: `0U`, y: &PPC::VSRCRegClass);
17691	if (VT == MVT::f32 && Subtarget.hasP8Vector())
17692	return std::make_pair(x: `0U`, y: &PPC::VSSRCRegClass);
17693	return std::make_pair(x: `0U`, y: &PPC::VSFRCRegClass);
17694	} else if ((Constraint == "ws" \|\| Constraint == "ww") && Subtarget.hasVSX()) {
17695	if (VT == MVT::f32 && Subtarget.hasP8Vector())
17696	return std::make_pair(x: `0U`, y: &PPC::VSSRCRegClass);
17697	else
17698	return std::make_pair(x: `0U`, y: &PPC::VSFRCRegClass);
17699	} else if (Constraint == "lr") {
17700	if (VT == MVT::i64)
17701	return std::make_pair(x: `0U`, y: &PPC::LR8RCRegClass);
17702	else
17703	return std::make_pair(x: `0U`, y: &PPC::LRRCRegClass);
17704	}
17705
17706	// Handle special cases of physical registers that are not properly handled
17707	// by the base class.
17708	if (Constraint [`0`] == `'{'` && Constraint [Constraint.size() - `1`] == `'}'`) {
17709	// If we name a VSX register, we can't defer to the base class because it
17710	// will not recognize the correct register (their names will be VSL{0-31}
17711	// and V{0-31} so they won't match). So we match them here.
17712	if (Constraint.size() > `3` && Constraint [`1`] == `'v'` && Constraint [`2`] == `'s'`) {
17713	int VSNum = atoi(nptr: Constraint.data() + `3`);
17714	assert(VSNum >= `0` && VSNum <= `63` &&
17715	"Attempted to access a vsr out of range");
17716	if (VSNum < `32`)
17717	return std::make_pair(x: PPC::VSL0 + VSNum, y: &PPC::VSRCRegClass);
17718	return std::make_pair(x: PPC::V0 + VSNum - `32`, y: &PPC::VSRCRegClass);
17719	}
17720
17721	// For float registers, we can't defer to the base class as it will match
17722	// the SPILLTOVSRRC class.
17723	if (Constraint.size() > `3` && Constraint [`1`] == `'f'`) {
17724	int RegNum = atoi(nptr: Constraint.data() + `2`);
17725	if (RegNum > `31` \|\| RegNum < `0`)
17726	report_fatal_error(reason: "Invalid floating point register number");
17727	if (VT == MVT::f32 \|\| VT == MVT::i32)
17728	return Subtarget.hasSPE()
17729	? std::make_pair(x: PPC::R0 + RegNum, y: &PPC::GPRCRegClass)
17730	: std::make_pair(x: PPC::F0 + RegNum, y: &PPC::F4RCRegClass);
17731	if (VT == MVT::f64 \|\| VT == MVT::i64)
17732	return Subtarget.hasSPE()
17733	? std::make_pair(x: PPC::S0 + RegNum, y: &PPC::SPERCRegClass)
17734	: std::make_pair(x: PPC::F0 + RegNum, y: &PPC::F8RCRegClass);
17735	}
17736	}
17737
17738	std::pair<unsigned, const TargetRegisterClass *> R =
17739	TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
17740
17741	// r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
17742	// (which we call X[0-9]+). If a 64-bit value has been requested, and a
17743	// 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
17744	// register.
17745	// FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
17746	// the AsmName field from RegisterInfo.td, then this would not be necessary.*
17747	if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
17748	PPC::GPRCRegClass.contains(Reg: R.first))
17749	return std::make_pair(x: TRI->getMatchingSuperReg(Reg: R.first,
17750	SubIdx: PPC::sub_32, RC: &PPC::G8RCRegClass),
17751	y: &PPC::G8RCRegClass);
17752
17753	// GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
17754	if (!R.second && StringRef ("{cc}").equals_insensitive(RHS: Constraint)) {
17755	R.first = PPC::CR0;
17756	R.second = &PPC::CRRCRegClass;
17757	}
17758	// FIXME: This warning should ideally be emitted in the front end.
17759	const auto &TM = getTargetMachine();
17760	if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
17761	if (((R.first >= PPC::V20 && R.first <= PPC::V31) \|\|
17762	(R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
17763	(R.second == &PPC::VSRCRegClass \|\| R.second == &PPC::VSFRCRegClass))
17764	errs() << "warning: vector registers 20 to 32 are reserved in the "
17765	"default AIX AltiVec ABI and cannot be used\n";
17766	}
17767
17768	return R;
17769	}
17770
17771	/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
17772	/// vector. If it is invalid, don't add anything to Ops.
17773	void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
17774	StringRef Constraint,
17775	std::vector<SDValue> &Ops,
17776	SelectionDAG &DAG) const {
17777	SDValue Result;
17778
17779	// Only support length 1 constraints.
17780	if (Constraint.size() > `1`)
17781	return;
17782
17783	char Letter = Constraint [`0`];
17784	switch (Letter) {
17785	default: break;
17786	case `'I'`:
17787	case `'J'`:
17788	case `'K'`:
17789	case `'L'`:
17790	case `'M'`:
17791	case `'N'`:
17792	case `'O'`:
17793	case `'P'`: {
17794	ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Val&: Op);
17795	if (!CST) return; // Must be an immediate to match.
17796	SDLoc dl(Op);
17797	int64_t Value = CST->getSExtValue();
17798	EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
17799	// numbers are printed as such.
17800	switch (Letter) {
17801	default: llvm_unreachable("Unknown constraint letter!");
17802	case `'I'`: // "I" is a signed 16-bit constant.
17803	if (isInt<`16`>(x: Value))
17804	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
17805	break;
17806	case `'J'`: // "J" is a constant with only the high-order 16 bits nonzero.
17807	if (isShiftedUInt<`16`, `16`>(x: Value))
17808	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
17809	break;
17810	case `'L'`: // "L" is a signed 16-bit constant shifted left 16 bits.
17811	if (isShiftedInt<`16`, `16`>(x: Value))
17812	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
17813	break;
17814	case `'K'`: // "K" is a constant with only the low-order 16 bits nonzero.
17815	if (isUInt<`16`>(x: Value))
17816	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
17817	break;
17818	case `'M'`: // "M" is a constant that is greater than 31.
17819	if (Value > `31`)
17820	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
17821	break;
17822	case `'N'`: // "N" is a positive constant that is an exact power of two.
17823	if (Value > `0` && isPowerOf2_64(Value))
17824	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
17825	break;
17826	case `'O'`: // "O" is the constant zero.
17827	if (Value == `0`)
17828	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
17829	break;
17830	case `'P'`: // "P" is a constant whose negation is a signed 16-bit constant.
17831	if (isInt<`16`>(x: -Value))
17832	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
17833	break;
17834	}
17835	break;
17836	}
17837	}
17838
17839	if (Result.getNode()) {
17840	Ops.push_back(x: Result);
17841	return;
17842	}
17843
17844	// Handle standard constraint letters.
17845	TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
17846	}
17847
17848	void PPCTargetLowering::CollectTargetIntrinsicOperands(const CallInst &I,
17849	SmallVectorImpl<SDValue> &Ops,
17850	SelectionDAG &DAG) const {
17851	if (I.getNumOperands() <= `1`)
17852	return;
17853	if (!isa<ConstantSDNode>(Val: Ops [`1`].getNode()))
17854	return;
17855	auto IntrinsicID = Ops [`1`].getNode()->getAsZExtVal();
17856	if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
17857	IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
17858	return;
17859
17860	if (MDNode *MDN = I.getMetadata(KindID: LLVMContext::MD_annotation))
17861	Ops.push_back(Elt: DAG.getMDNode(MD: MDN));
17862	}
17863
17864	// isLegalAddressingMode - Return true if the addressing mode represented
17865	// by AM is legal for this target, for a load/store of the specified type.
17866	bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
17867	const AddrMode &AM, Type *Ty,
17868	unsigned AS,
17869	Instruction I) const* {
17870	// Vector type r+i form is supported since power9 as DQ form. We don't check
17871	// the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
17872	// imm form is preferred and the offset can be adjusted to use imm form later
17873	// in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
17874	// max offset to check legal addressing mode, we should be a little aggressive
17875	// to contain other offsets for that LSRUse.
17876	if (Ty->isVectorTy() && AM.BaseOffs != `0` && !Subtarget.hasP9Vector())
17877	return false;
17878
17879	// PPC allows a sign-extended 16-bit immediate field.
17880	if (AM.BaseOffs <= -(`1LL` << `16`) \|\| AM.BaseOffs >= (`1LL` << `16`)-`1`)
17881	return false;
17882
17883	// No global is ever allowed as a base.
17884	if (AM.BaseGV)
17885	return false;
17886
17887	// PPC only support r+r,
17888	switch (AM.Scale) {
17889	case `0`: // "r+i" or just "i", depending on HasBaseReg.
17890	break;
17891	case `1`:
17892	if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
17893	return false;
17894	// Otherwise we have r+r or r+i.
17895	break;
17896	case `2`:
17897	if (AM.HasBaseReg \|\| AM.BaseOffs) // 2r+r or 2r+i is not allowed.
17898	return false;
17899	// Allow 2r as r+r.*
17900	break;
17901	default:
17902	// No other scales are supported.
17903	return false;
17904	}
17905
17906	return true;
17907	}
17908
17909	SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
17910	SelectionDAG &DAG) const {
17911	MachineFunction &MF = DAG.getMachineFunction();
17912	MachineFrameInfo &MFI = MF.getFrameInfo();
17913	MFI.setReturnAddressIsTaken(true);
17914
17915	if (verifyReturnAddressArgumentIsConstant(Op, DAG))
17916	return SDValue ();
17917
17918	SDLoc dl(Op);
17919	unsigned Depth = Op.getConstantOperandVal(i: `0`);
17920
17921	// Make sure the function does not optimize away the store of the RA to
17922	// the stack.
17923	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
17924	FuncInfo->setLRStoreRequired();
17925	auto PtrVT = getPointerTy(DL: MF.getDataLayout());
17926
17927	if (Depth > `0`) {
17928	// The link register (return address) is saved in the caller's frame
17929	// not the callee's stack frame. So we must get the caller's frame
17930	// address and load the return address at the LR offset from there.
17931	SDValue FrameAddr =
17932	DAG.getLoad(VT: Op.getValueType(), dl, Chain: DAG.getEntryNode(),
17933	Ptr: LowerFRAMEADDR(Op, DAG), PtrInfo: MachinePointerInfo ());
17934	SDValue Offset =
17935	DAG.getConstant(Val: Subtarget.getFrameLowering()->getReturnSaveOffset(), DL: dl,
17936	VT: Subtarget.getScalarIntVT());
17937	return DAG.getLoad(VT: PtrVT, dl, Chain: DAG.getEntryNode(),
17938	Ptr: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: FrameAddr, N2: Offset),
17939	PtrInfo: MachinePointerInfo ());
17940	}
17941
17942	// Just load the return address off the stack.
17943	SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
17944	return DAG.getLoad(VT: PtrVT, dl, Chain: DAG.getEntryNode(), Ptr: RetAddrFI,
17945	PtrInfo: MachinePointerInfo ());
17946	}
17947
17948	SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
17949	SelectionDAG &DAG) const {
17950	SDLoc dl(Op);
17951	unsigned Depth = Op.getConstantOperandVal(i: `0`);
17952
17953	MachineFunction &MF = DAG.getMachineFunction();
17954	MachineFrameInfo &MFI = MF.getFrameInfo();
17955	MFI.setFrameAddressIsTaken(true);
17956
17957	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
17958	bool isPPC64 = PtrVT == MVT::i64;
17959
17960	// Naked functions never have a frame pointer, and so we use r1. For all
17961	// other functions, this decision must be delayed until during PEI.
17962	unsigned FrameReg;
17963	if (MF.getFunction().hasFnAttribute(Kind: Attribute::Naked))
17964	FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
17965	else
17966	FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
17967
17968	SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl, Reg: FrameReg,
17969	VT: PtrVT);
17970	while (Depth--)
17971	FrameAddr = DAG.getLoad(VT: Op.getValueType(), dl, Chain: DAG.getEntryNode(),
17972	Ptr: FrameAddr, PtrInfo: MachinePointerInfo ());
17973	return FrameAddr;
17974	}
17975
17976	#define GET_REGISTER_MATCHER
17977	#include "PPCGenAsmMatcher.inc"
17978
17979	Register PPCTargetLowering::getRegisterByName(const char *RegName, LLT VT,
17980	const MachineFunction &MF) const {
17981	bool IsPPC64 = Subtarget.isPPC64();
17982
17983	bool Is64Bit = IsPPC64 && VT == LLT::scalar(SizeInBits: `64`);
17984	if (!Is64Bit && VT != LLT::scalar(SizeInBits: `32`))
17985	report_fatal_error(reason: "Invalid register global variable type");
17986
17987	Register Reg = MatchRegisterName(Name: RegName);
17988	if (!Reg)
17989	return Reg;
17990
17991	// FIXME: Unable to generate code for `-O2` but okay for `-O0`.
17992	// Need followup investigation as to why.
17993	if ((IsPPC64 && Reg == PPC::R2) \|\| Reg == PPC::R0)
17994	report_fatal_error(reason: Twine("Trying to reserve an invalid register \"" +
17995	StringRef (RegName) + "\"."));
17996
17997	// Convert GPR to GP8R register for 64bit.
17998	if (Is64Bit && StringRef (RegName).starts_with_insensitive(Prefix: "r"))
17999	Reg = Reg.id() - PPC::R0 + PPC::X0;
18000
18001	return Reg;
18002	}
18003
18004	bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
18005	// 32-bit SVR4 ABI access everything as got-indirect.
18006	if (Subtarget.is32BitELFABI())
18007	return true;
18008
18009	// AIX accesses everything indirectly through the TOC, which is similar to
18010	// the GOT.
18011	if (Subtarget.isAIXABI())
18012	return true;
18013
18014	CodeModel::Model CModel = getTargetMachine().getCodeModel();
18015	// If it is small or large code model, module locals are accessed
18016	// indirectly by loading their address from .toc/.got.
18017	if (CModel == CodeModel::Small \|\| CModel == CodeModel::Large)
18018	return true;
18019
18020	// JumpTable and BlockAddress are accessed as got-indirect.
18021	if (isa<JumpTableSDNode>(Val: GA) \|\| isa<BlockAddressSDNode>(Val: GA))
18022	return true;
18023
18024	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val&: GA))
18025	return Subtarget.isGVIndirectSymbol(GV: G->getGlobal());
18026
18027	return false;
18028	}
18029
18030	bool
18031	PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode GA) const* {
18032	// The PowerPC target isn't yet aware of offsets.
18033	return false;
18034	}
18035
18036	bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
18037	const CallInst &I,
18038	MachineFunction &MF,
18039	unsigned Intrinsic) const {
18040	switch (Intrinsic) {
18041	case Intrinsic::ppc_atomicrmw_xchg_i128:
18042	case Intrinsic::ppc_atomicrmw_add_i128:
18043	case Intrinsic::ppc_atomicrmw_sub_i128:
18044	case Intrinsic::ppc_atomicrmw_nand_i128:
18045	case Intrinsic::ppc_atomicrmw_and_i128:
18046	case Intrinsic::ppc_atomicrmw_or_i128:
18047	case Intrinsic::ppc_atomicrmw_xor_i128:
18048	case Intrinsic::ppc_cmpxchg_i128:
18049	Info.opc = ISD::INTRINSIC_W_CHAIN;
18050	Info.memVT = MVT::i128;
18051	Info.ptrVal = I.getArgOperand(i: `0`);
18052	Info.offset = `0`;
18053	Info.align = Align (`16`);
18054	Info.flags = MachineMemOperand::MOLoad \| MachineMemOperand::MOStore \|
18055	MachineMemOperand::MOVolatile;
18056	return true;
18057	case Intrinsic::ppc_atomic_load_i128:
18058	Info.opc = ISD::INTRINSIC_W_CHAIN;
18059	Info.memVT = MVT::i128;
18060	Info.ptrVal = I.getArgOperand(i: `0`);
18061	Info.offset = `0`;
18062	Info.align = Align (`16`);
18063	Info.flags = MachineMemOperand::MOLoad \| MachineMemOperand::MOVolatile;
18064	return true;
18065	case Intrinsic::ppc_atomic_store_i128:
18066	Info.opc = ISD::INTRINSIC_VOID;
18067	Info.memVT = MVT::i128;
18068	Info.ptrVal = I.getArgOperand(i: `2`);
18069	Info.offset = `0`;
18070	Info.align = Align (`16`);
18071	Info.flags = MachineMemOperand::MOStore \| MachineMemOperand::MOVolatile;
18072	return true;
18073	case Intrinsic::ppc_altivec_lvx:
18074	case Intrinsic::ppc_altivec_lvxl:
18075	case Intrinsic::ppc_altivec_lvebx:
18076	case Intrinsic::ppc_altivec_lvehx:
18077	case Intrinsic::ppc_altivec_lvewx:
18078	case Intrinsic::ppc_vsx_lxvd2x:
18079	case Intrinsic::ppc_vsx_lxvw4x:
18080	case Intrinsic::ppc_vsx_lxvd2x_be:
18081	case Intrinsic::ppc_vsx_lxvw4x_be:
18082	case Intrinsic::ppc_vsx_lxvl:
18083	case Intrinsic::ppc_vsx_lxvll: {
18084	EVT VT;
18085	switch (Intrinsic) {
18086	case Intrinsic::ppc_altivec_lvebx:
18087	VT = MVT::i8;
18088	break;
18089	case Intrinsic::ppc_altivec_lvehx:
18090	VT = MVT::i16;
18091	break;
18092	case Intrinsic::ppc_altivec_lvewx:
18093	VT = MVT::i32;
18094	break;
18095	case Intrinsic::ppc_vsx_lxvd2x:
18096	case Intrinsic::ppc_vsx_lxvd2x_be:
18097	VT = MVT::v2f64;
18098	break;
18099	default:
18100	VT = MVT::v4i32;
18101	break;
18102	}
18103
18104	Info.opc = ISD::INTRINSIC_W_CHAIN;
18105	Info.memVT = VT;
18106	Info.ptrVal = I.getArgOperand(i: `0`);
18107	Info.offset = -VT.getStoreSize()+`1`;
18108	Info.size = `2`*VT.getStoreSize()-`1`;
18109	Info.align = Align (`1`);
18110	Info.flags = MachineMemOperand::MOLoad;
18111	return true;
18112	}
18113	case Intrinsic::ppc_altivec_stvx:
18114	case Intrinsic::ppc_altivec_stvxl:
18115	case Intrinsic::ppc_altivec_stvebx:
18116	case Intrinsic::ppc_altivec_stvehx:
18117	case Intrinsic::ppc_altivec_stvewx:
18118	case Intrinsic::ppc_vsx_stxvd2x:
18119	case Intrinsic::ppc_vsx_stxvw4x:
18120	case Intrinsic::ppc_vsx_stxvd2x_be:
18121	case Intrinsic::ppc_vsx_stxvw4x_be:
18122	case Intrinsic::ppc_vsx_stxvl:
18123	case Intrinsic::ppc_vsx_stxvll: {
18124	EVT VT;
18125	switch (Intrinsic) {
18126	case Intrinsic::ppc_altivec_stvebx:
18127	VT = MVT::i8;
18128	break;
18129	case Intrinsic::ppc_altivec_stvehx:
18130	VT = MVT::i16;
18131	break;
18132	case Intrinsic::ppc_altivec_stvewx:
18133	VT = MVT::i32;
18134	break;
18135	case Intrinsic::ppc_vsx_stxvd2x:
18136	case Intrinsic::ppc_vsx_stxvd2x_be:
18137	VT = MVT::v2f64;
18138	break;
18139	default:
18140	VT = MVT::v4i32;
18141	break;
18142	}
18143
18144	Info.opc = ISD::INTRINSIC_VOID;
18145	Info.memVT = VT;
18146	Info.ptrVal = I.getArgOperand(i: `1`);
18147	Info.offset = -VT.getStoreSize()+`1`;
18148	Info.size = `2`*VT.getStoreSize()-`1`;
18149	Info.align = Align (`1`);
18150	Info.flags = MachineMemOperand::MOStore;
18151	return true;
18152	}
18153	case Intrinsic::ppc_stdcx:
18154	case Intrinsic::ppc_stwcx:
18155	case Intrinsic::ppc_sthcx:
18156	case Intrinsic::ppc_stbcx: {
18157	EVT VT;
18158	auto Alignment = Align (`8`);
18159	switch (Intrinsic) {
18160	case Intrinsic::ppc_stdcx:
18161	VT = MVT::i64;
18162	break;
18163	case Intrinsic::ppc_stwcx:
18164	VT = MVT::i32;
18165	Alignment = Align (`4`);
18166	break;
18167	case Intrinsic::ppc_sthcx:
18168	VT = MVT::i16;
18169	Alignment = Align (`2`);
18170	break;
18171	case Intrinsic::ppc_stbcx:
18172	VT = MVT::i8;
18173	Alignment = Align (`1`);
18174	break;
18175	}
18176	Info.opc = ISD::INTRINSIC_W_CHAIN;
18177	Info.memVT = VT;
18178	Info.ptrVal = I.getArgOperand(i: `0`);
18179	Info.offset = `0`;
18180	Info.align = Alignment;
18181	Info.flags = MachineMemOperand::MOStore \| MachineMemOperand::MOVolatile;
18182	return true;
18183	}
18184	default:
18185	break;
18186	}
18187
18188	return false;
18189	}
18190
18191	/// It returns EVT::Other if the type should be determined using generic
18192	/// target-independent logic.
18193	EVT PPCTargetLowering::getOptimalMemOpType(
18194	const MemOp &Op, const AttributeList &FuncAttributes) const {
18195	if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
18196	// We should use Altivec/VSX loads and stores when available. For unaligned
18197	// addresses, unaligned VSX loads are only fast starting with the P8.
18198	if (Subtarget.hasAltivec() && Op.size() >= `16`) {
18199	if (Op.isMemset() && Subtarget.hasVSX()) {
18200	uint64_t TailSize = Op.size() % `16`;
18201	// For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
18202	// element if vector element type matches tail store. For tail size
18203	// 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
18204	if (TailSize > `2` && TailSize <= `4`) {
18205	return MVT::v8i16;
18206	}
18207	return MVT::v4i32;
18208	}
18209	if (Op.isAligned(AlignCheck: Align (`16`)) \|\| Subtarget.hasP8Vector())
18210	return MVT::v4i32;
18211	}
18212	}
18213
18214	if (Subtarget.isPPC64()) {
18215	return MVT::i64;
18216	}
18217
18218	return MVT::i32;
18219	}
18220
18221	/// Returns true if it is beneficial to convert a load of a constant
18222	/// to just the constant itself.
18223	bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
18224	Type Ty) const* {
18225	assert(Ty->isIntegerTy());
18226
18227	unsigned BitSize = Ty->getPrimitiveSizeInBits();
18228	return !(BitSize == `0` \|\| BitSize > `64`);
18229	}
18230
18231	bool PPCTargetLowering::isTruncateFree(Type Ty1, Type Ty2) const {
18232	if (!Ty1->isIntegerTy() \|\| !Ty2->isIntegerTy())
18233	return false;
18234	unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
18235	unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
18236	return NumBits1 == `64` && NumBits2 == `32`;
18237	}
18238
18239	bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
18240	if (!VT1.isInteger() \|\| !VT2.isInteger())
18241	return false;
18242	unsigned NumBits1 = VT1.getSizeInBits();
18243	unsigned NumBits2 = VT2.getSizeInBits();
18244	return NumBits1 == `64` && NumBits2 == `32`;
18245	}
18246
18247	bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
18248	// Generally speaking, zexts are not free, but they are free when they can be
18249	// folded with other operations.
18250	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
18251	EVT MemVT = LD->getMemoryVT();
18252	if ((MemVT == MVT::i1 \|\| MemVT == MVT::i8 \|\| MemVT == MVT::i16 \|\|
18253	(Subtarget.isPPC64() && MemVT == MVT::i32)) &&
18254	(LD->getExtensionType() == ISD::NON_EXTLOAD \|\|
18255	LD->getExtensionType() == ISD::ZEXTLOAD))
18256	return true;
18257	}
18258
18259	// FIXME: Add other cases...
18260	// - 32-bit shifts with a zext to i64
18261	// - zext after ctlz, bswap, etc.
18262	// - zext after and by a constant mask
18263
18264	return TargetLowering::isZExtFree(Val, VT2);
18265	}
18266
18267	bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
18268	assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
18269	"invalid fpext types");
18270	// Extending to float128 is not free.
18271	if (DestVT == MVT::f128)
18272	return false;
18273	return true;
18274	}
18275
18276	bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
18277	return isInt<`16`>(x: Imm) \|\| isUInt<`16`>(x: Imm);
18278	}
18279
18280	bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
18281	return isInt<`16`>(x: Imm) \|\| isUInt<`16`>(x: Imm);
18282	}
18283
18284	bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align,
18285	MachineMemOperand::Flags,
18286	unsigned Fast) const* {
18287	if (DisablePPCUnaligned)
18288	return false;
18289
18290	// PowerPC supports unaligned memory access for simple non-vector types.
18291	// Although accessing unaligned addresses is not as efficient as accessing
18292	// aligned addresses, it is generally more efficient than manual expansion,
18293	// and generally only traps for software emulation when crossing page
18294	// boundaries.
18295
18296	if (!VT.isSimple())
18297	return false;
18298
18299	if (VT.isFloatingPoint() && !VT.isVector() &&
18300	!Subtarget.allowsUnalignedFPAccess())
18301	return false;
18302
18303	if (VT.getSimpleVT().isVector()) {
18304	if (Subtarget.hasVSX()) {
18305	if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
18306	VT != MVT::v4f32 && VT != MVT::v4i32)
18307	return false;
18308	} else {
18309	return false;
18310	}
18311	}
18312
18313	if (VT == MVT::ppcf128)
18314	return false;
18315
18316	if (Fast)
18317	*Fast = `1`;
18318
18319	return true;
18320	}
18321
18322	bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
18323	SDValue C) const {
18324	// Check integral scalar types.
18325	if (!VT.isScalarInteger())
18326	return false;
18327	if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) {
18328	if (!ConstNode->getAPIntValue().isSignedIntN(N: `64`))
18329	return false;
18330	// This transformation will generate >= 2 operations. But the following
18331	// cases will generate <= 2 instructions during ISEL. So exclude them.
18332	// 1. If the constant multiplier fits 16 bits, it can be handled by one
18333	// HW instruction, ie. MULLI
18334	// 2. If the multiplier after shifted fits 16 bits, an extra shift
18335	// instruction is needed than case 1, ie. MULLI and RLDICR
18336	int64_t Imm = ConstNode->getSExtValue();
18337	unsigned Shift = llvm::countr_zero<uint64_t>(Val: Imm);
18338	Imm >>= Shift;
18339	if (isInt<`16`>(x: Imm))
18340	return false;
18341	uint64_t UImm = static_cast<uint64_t>(Imm);
18342	if (isPowerOf2_64(Value: UImm + `1`) \|\| isPowerOf2_64(Value: UImm - `1`) \|\|
18343	isPowerOf2_64(Value: `1` - UImm) \|\| isPowerOf2_64(Value: -`1` - UImm))
18344	return true;
18345	}
18346	return false;
18347	}
18348
18349	bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
18350	EVT VT) const {
18351	return isFMAFasterThanFMulAndFAdd(
18352	F: MF.getFunction(), Ty: VT.getTypeForEVT(Context&: MF.getFunction().getContext()));
18353	}
18354
18355	bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
18356	Type Ty) const* {
18357	if (Subtarget.hasSPE() \|\| Subtarget.useSoftFloat())
18358	return false;
18359	switch (Ty->getScalarType()->getTypeID()) {
18360	case Type::FloatTyID:
18361	case Type::DoubleTyID:
18362	return true;
18363	case Type::FP128TyID:
18364	return Subtarget.hasP9Vector();
18365	default:
18366	return false;
18367	}
18368	}
18369
18370	// FIXME: add more patterns which are not profitable to hoist.
18371	bool PPCTargetLowering::isProfitableToHoist(Instruction I) const* {
18372	if (!I->hasOneUse())
18373	return true;
18374
18375	Instruction *User = I->user_back();
18376	assert(User && "A single use instruction with no uses.");
18377
18378	switch (I->getOpcode()) {
18379	case Instruction::FMul: {
18380	// Don't break FMA, PowerPC prefers FMA.
18381	if (User->getOpcode() != Instruction::FSub &&
18382	User->getOpcode() != Instruction::FAdd)
18383	return true;
18384
18385	const TargetOptions &Options = getTargetMachine().Options;
18386	const Function *F = I->getFunction();
18387	const DataLayout &DL = F->getDataLayout();
18388	Type *Ty = User->getOperand(i: `0`)->getType();
18389
18390	return !(
18391	isFMAFasterThanFMulAndFAdd(F: *F, Ty) &&
18392	isOperationLegalOrCustom(Op: ISD::FMA, VT: getValueType(DL, Ty)) &&
18393	(Options.AllowFPOpFusion == FPOpFusion::Fast \|\| Options.UnsafeFPMath));
18394	}
18395	case Instruction::Load: {
18396	// Don't break "store (load float)" pattern, this pattern will be combined*
18397	// to "store (load int32)" in later InstCombine pass. See function
18398	// combineLoadToOperationType. On PowerPC, loading a float point takes more
18399	// cycles than loading a 32 bit integer.
18400	LoadInst *LI = cast<LoadInst>(Val: I);
18401	// For the loads that combineLoadToOperationType does nothing, like
18402	// ordered load, it should be profitable to hoist them.
18403	// For swifterror load, it can only be used for pointer to pointer type, so
18404	// later type check should get rid of this case.
18405	if (!LI->isUnordered())
18406	return true;
18407
18408	if (User->getOpcode() != Instruction::Store)
18409	return true;
18410
18411	if (I->getType()->getTypeID() != Type::FloatTyID)
18412	return true;
18413
18414	return false;
18415	}
18416	default:
18417	return true;
18418	}
18419	return true;
18420	}
18421
18422	const MCPhysReg *
18423	PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
18424	// LR is a callee-save register, but we must treat it as clobbered by any call
18425	// site. Hence we include LR in the scratch registers, which are in turn added
18426	// as implicit-defs for stackmaps and patchpoints. The same reasoning applies
18427	// to CTR, which is used by any indirect call.
18428	static const MCPhysReg ScratchRegs[] = {
18429	PPC::X12, PPC::LR8, PPC::CTR8, `0`
18430	};
18431
18432	return ScratchRegs;
18433	}
18434
18435	Register PPCTargetLowering::getExceptionPointerRegister(
18436	const Constant PersonalityFn) const* {
18437	return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
18438	}
18439
18440	Register PPCTargetLowering::getExceptionSelectorRegister(
18441	const Constant PersonalityFn) const* {
18442	return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
18443	}
18444
18445	bool
18446	PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
18447	EVT VT , unsigned DefinedValues) const {
18448	if (VT == MVT::v2i64)
18449	return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
18450
18451	if (Subtarget.hasVSX())
18452	return true;
18453
18454	return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
18455	}
18456
18457	Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode N) const* {
18458	if (DisableILPPref \|\| Subtarget.enableMachineScheduler())
18459	return TargetLowering::getSchedulingPreference(N);
18460
18461	return Sched::ILP;
18462	}
18463
18464	// Create a fast isel object.
18465	FastISel *
18466	PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
18467	const TargetLibraryInfo LibInfo) const* {
18468	return PPC::createFastISel(FuncInfo, LibInfo);
18469	}
18470
18471	// 'Inverted' means the FMA opcode after negating one multiplicand.
18472	// For example, (fma -a b c) = (fnmsub a b c)
18473	static unsigned invertFMAOpcode(unsigned Opc) {
18474	switch (Opc) {
18475	default:
18476	llvm_unreachable("Invalid FMA opcode for PowerPC!");
18477	case ISD::FMA:
18478	return PPCISD::FNMSUB;
18479	case PPCISD::FNMSUB:
18480	return ISD::FMA;
18481	}
18482	}
18483
18484	SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
18485	bool LegalOps, bool OptForSize,
18486	NegatibleCost &Cost,
18487	unsigned Depth) const {
18488	if (Depth > SelectionDAG::MaxRecursionDepth)
18489	return SDValue ();
18490
18491	unsigned Opc = Op.getOpcode();
18492	EVT VT = Op.getValueType();
18493	SDNodeFlags Flags = Op.getNode()->getFlags();
18494
18495	switch (Opc) {
18496	case PPCISD::FNMSUB:
18497	if (!Op.hasOneUse() \|\| !isTypeLegal(VT))
18498	break;
18499
18500	const TargetOptions &Options = getTargetMachine().Options;
18501	SDValue N0 = Op.getOperand(i: `0`);
18502	SDValue N1 = Op.getOperand(i: `1`);
18503	SDValue N2 = Op.getOperand(i: `2`);
18504	SDLoc Loc(Op);
18505
18506	NegatibleCost N2Cost = NegatibleCost::Expensive;
18507	SDValue NegN2 =
18508	getNegatedExpression(Op: N2, DAG, LegalOps, OptForSize, Cost&: N2Cost, Depth: Depth + `1`);
18509
18510	if (!NegN2)
18511	return SDValue ();
18512
18513	// (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
18514	// (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
18515	// These transformations may change sign of zeroes. For example,
18516	// -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
18517	if (Flags.hasNoSignedZeros() \|\| Options.NoSignedZerosFPMath) {
18518	// Try and choose the cheaper one to negate.
18519	NegatibleCost N0Cost = NegatibleCost::Expensive;
18520	SDValue NegN0 = getNegatedExpression(Op: N0, DAG, LegalOps, OptForSize,
18521	Cost&: N0Cost, Depth: Depth + `1`);
18522
18523	NegatibleCost N1Cost = NegatibleCost::Expensive;
18524	SDValue NegN1 = getNegatedExpression(Op: N1, DAG, LegalOps, OptForSize,
18525	Cost&: N1Cost, Depth: Depth + `1`);
18526
18527	if (NegN0 && N0Cost <= N1Cost) {
18528	Cost = std::min(a: N0Cost, b: N2Cost);
18529	return DAG.getNode(Opcode: Opc, DL: Loc, VT, N1: NegN0, N2: N1, N3: NegN2, Flags);
18530	} else if (NegN1) {
18531	Cost = std::min(a: N1Cost, b: N2Cost);
18532	return DAG.getNode(Opcode: Opc, DL: Loc, VT, N1: N0, N2: NegN1, N3: NegN2, Flags);
18533	}
18534	}
18535
18536	// (fneg (fnmsub a b c)) => (fma a b (fneg c))
18537	if (isOperationLegal(Op: ISD::FMA, VT)) {
18538	Cost = N2Cost;
18539	return DAG.getNode(Opcode: ISD::FMA, DL: Loc, VT, N1: N0, N2: N1, N3: NegN2, Flags);
18540	}
18541
18542	break;
18543	}
18544
18545	return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
18546	Cost, Depth);
18547	}
18548
18549	// Override to enable LOAD_STACK_GUARD lowering on Linux.
18550	bool PPCTargetLowering::useLoadStackGuardNode(const Module &M) const {
18551	if (M.getStackProtectorGuard() == "tls" \|\| Subtarget.isTargetLinux())
18552	return true;
18553	return TargetLowering::useLoadStackGuardNode(M);
18554	}
18555
18556	// Override to disable global variable loading on Linux and insert AIX canary
18557	// word declaration.
18558	void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
18559	if (Subtarget.isAIXABI()) {
18560	M.getOrInsertGlobal(Name: AIXSSPCanaryWordName,
18561	Ty: PointerType::getUnqual(C&: M.getContext()));
18562	return;
18563	}
18564	if (!Subtarget.isTargetLinux())
18565	return TargetLowering::insertSSPDeclarations(M);
18566	}
18567
18568	Value PPCTargetLowering::getSDagStackGuard(const* Module &M) const {
18569	if (Subtarget.isAIXABI())
18570	return M.getGlobalVariable(Name: AIXSSPCanaryWordName);
18571	return TargetLowering::getSDagStackGuard(M);
18572	}
18573
18574	bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
18575	bool ForCodeSize) const {
18576	if (!VT.isSimple() \|\| !Subtarget.hasVSX())
18577	return false;
18578
18579	switch(VT.getSimpleVT().SimpleTy) {
18580	default:
18581	// For FP types that are currently not supported by PPC backend, return
18582	// false. Examples: f16, f80.
18583	return false;
18584	case MVT::f32:
18585	case MVT::f64: {
18586	if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
18587	// we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
18588	return true;
18589	}
18590	bool IsExact;
18591	APSInt IntResult(`16`, false);
18592	// The rounding mode doesn't really matter because we only care about floats
18593	// that can be converted to integers exactly.
18594	Imm.convertToInteger(Result&: IntResult, RM: APFloat::rmTowardZero, IsExact: &IsExact);
18595	// For exact values in the range [-16, 15] we can materialize the float.
18596	if (IsExact && IntResult <= `15` && IntResult >= -`16`)
18597	return true;
18598	return Imm.isZero();
18599	}
18600	case MVT::ppcf128:
18601	return Imm.isPosZero();
18602	}
18603	}
18604
18605	// For vector shift operation op, fold
18606	// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
18607	static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
18608	SelectionDAG &DAG) {
18609	SDValue N0 = N->getOperand(Num: `0`);
18610	SDValue N1 = N->getOperand(Num: `1`);
18611	EVT VT = N0.getValueType();
18612	unsigned OpSizeInBits = VT.getScalarSizeInBits();
18613	unsigned Opcode = N->getOpcode();
18614	unsigned TargetOpcode;
18615
18616	switch (Opcode) {
18617	default:
18618	llvm_unreachable("Unexpected shift operation");
18619	case ISD::SHL:
18620	TargetOpcode = PPCISD::SHL;
18621	break;
18622	case ISD::SRL:
18623	TargetOpcode = PPCISD::SRL;
18624	break;
18625	case ISD::SRA:
18626	TargetOpcode = PPCISD::SRA;
18627	break;
18628	}
18629
18630	if (VT.isVector() && TLI.isOperationLegal(Op: Opcode, VT) &&
18631	N1 ->getOpcode() == ISD::AND)
18632	if (ConstantSDNode *Mask = isConstOrConstSplat(N: N1 ->getOperand(Num: `1`)))
18633	if (Mask->getZExtValue() == OpSizeInBits - `1`)
18634	return DAG.getNode(Opcode: TargetOpcode, DL: SDLoc (N), VT, N1: N0, N2: N1 ->getOperand(Num: `0`));
18635
18636	return SDValue ();
18637	}
18638
18639	SDValue PPCTargetLowering::combineVectorShift(SDNode *N,
18640	DAGCombinerInfo &DCI) const {
18641	EVT VT = N->getValueType(ResNo: `0`);
18642	assert(VT.isVector() && "Vector type expected.");
18643
18644	unsigned Opc = N->getOpcode();
18645	assert((Opc == ISD::SHL \|\| Opc == ISD::SRL \|\| Opc == ISD::SRA) &&
18646	"Unexpected opcode.");
18647
18648	if (!isOperationLegal(Op: Opc, VT))
18649	return SDValue ();
18650
18651	EVT EltTy = VT.getScalarType();
18652	unsigned EltBits = EltTy.getSizeInBits();
18653	if (EltTy != MVT::i64 && EltTy != MVT::i32)
18654	return SDValue ();
18655
18656	SDValue N1 = N->getOperand(Num: `1`);
18657	uint64_t SplatBits = `0`;
18658	bool AddSplatCase = false;
18659	unsigned OpcN1 = N1.getOpcode();
18660	if (OpcN1 == PPCISD::VADD_SPLAT &&
18661	N1.getConstantOperandVal(i: `1`) == VT.getVectorNumElements()) {
18662	AddSplatCase = true;
18663	SplatBits = N1.getConstantOperandVal(i: `0`);
18664	}
18665
18666	if (!AddSplatCase) {
18667	if (OpcN1 != ISD::BUILD_VECTOR)
18668	return SDValue ();
18669
18670	unsigned SplatBitSize;
18671	bool HasAnyUndefs;
18672	APInt APSplatBits, APSplatUndef;
18673	BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Val&: N1);
18674	bool BVNIsConstantSplat =
18675	BVN->isConstantSplat(SplatValue&: APSplatBits, SplatUndef&: APSplatUndef, SplatBitSize,
18676	HasAnyUndefs, MinSplatBits: `0`, isBigEndian: !Subtarget.isLittleEndian());
18677	if (!BVNIsConstantSplat \|\| SplatBitSize != EltBits)
18678	return SDValue ();
18679	SplatBits = APSplatBits.getZExtValue();
18680	}
18681
18682	SDLoc DL(N);
18683	SDValue N0 = N->getOperand(Num: `0`);
18684	// PPC vector shifts by word/double look at only the low 5/6 bits of the
18685	// shift vector, which means the max value is 31/63. A shift vector of all
18686	// 1s will be truncated to 31/63, which is useful as vspltiw is limited to
18687	// -16 to 15 range.
18688	if (SplatBits == (EltBits - `1`)) {
18689	unsigned NewOpc;
18690	switch (Opc) {
18691	case ISD::SHL:
18692	NewOpc = PPCISD::SHL;
18693	break;
18694	case ISD::SRL:
18695	NewOpc = PPCISD::SRL;
18696	break;
18697	case ISD::SRA:
18698	NewOpc = PPCISD::SRA;
18699	break;
18700	}
18701	SDValue SplatOnes = getCanonicalConstSplat(Val: `255`, SplatSize: `1`, VT, DAG&: DCI.DAG, dl: DL);
18702	return DCI.DAG.getNode(Opcode: NewOpc, DL, VT, N1: N0, N2: SplatOnes);
18703	}
18704
18705	if (Opc != ISD::SHL \|\| !isOperationLegal(Op: ISD::ADD, VT))
18706	return SDValue ();
18707
18708	// For 64-bit there is no splat immediate so we want to catch shift by 1 here
18709	// before the BUILD_VECTOR is replaced by a load.
18710	if (EltTy != MVT::i64 \|\| SplatBits != `1`)
18711	return SDValue ();
18712
18713	return DCI.DAG.getNode(Opcode: ISD::ADD, DL: SDLoc (N), VT, N1: N0, N2: N0);
18714	}
18715
18716	SDValue PPCTargetLowering::combineSHL(SDNode N, DAGCombinerInfo &DCI) const* {
18717	if (auto Value = stripModuloOnShift(TLI: *this, N, DAG&: DCI.DAG))
18718	return Value;
18719
18720	if (N->getValueType(ResNo: `0`).isVector())
18721	return combineVectorShift(N, DCI);
18722
18723	SDValue N0 = N->getOperand(Num: `0`);
18724	ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
18725	if (!Subtarget.isISA3_0() \|\| !Subtarget.isPPC64() \|\|
18726	N0.getOpcode() != ISD::SIGN_EXTEND \|\|
18727	N0.getOperand(i: `0`).getValueType() != MVT::i32 \|\| CN1 == nullptr \|\|
18728	N->getValueType(ResNo: `0`) != MVT::i64)
18729	return SDValue ();
18730
18731	// We can't save an operation here if the value is already extended, and
18732	// the existing shift is easier to combine.
18733	SDValue ExtsSrc = N0.getOperand(i: `0`);
18734	if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
18735	ExtsSrc.getOperand(i: `0`).getOpcode() == ISD::AssertSext)
18736	return SDValue ();
18737
18738	SDLoc DL(N0);
18739	SDValue ShiftBy = SDValue (CN1, `0`);
18740	// We want the shift amount to be i32 on the extswli, but the shift could
18741	// have an i64.
18742	if (ShiftBy.getValueType() == MVT::i64)
18743	ShiftBy = DCI.DAG.getConstant(Val: CN1->getZExtValue(), DL, VT: MVT::i32);
18744
18745	return DCI.DAG.getNode(Opcode: PPCISD::EXTSWSLI, DL, VT: MVT::i64, N1: N0 ->getOperand(Num: `0`),
18746	N2: ShiftBy);
18747	}
18748
18749	SDValue PPCTargetLowering::combineSRA(SDNode N, DAGCombinerInfo &DCI) const* {
18750	if (auto Value = stripModuloOnShift(TLI: *this, N, DAG&: DCI.DAG))
18751	return Value;
18752
18753	if (N->getValueType(ResNo: `0`).isVector())
18754	return combineVectorShift(N, DCI);
18755
18756	return SDValue ();
18757	}
18758
18759	SDValue PPCTargetLowering::combineSRL(SDNode N, DAGCombinerInfo &DCI) const* {
18760	if (auto Value = stripModuloOnShift(TLI: *this, N, DAG&: DCI.DAG))
18761	return Value;
18762
18763	if (N->getValueType(ResNo: `0`).isVector())
18764	return combineVectorShift(N, DCI);
18765
18766	return SDValue ();
18767	}
18768
18769	// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
18770	// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
18771	// When C is zero, the equation (addi Z, -C) can be simplified to Z
18772	// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
18773	static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
18774	const PPCSubtarget &Subtarget) {
18775	if (!Subtarget.isPPC64())
18776	return SDValue ();
18777
18778	SDValue LHS = N->getOperand(Num: `0`);
18779	SDValue RHS = N->getOperand(Num: `1`);
18780
18781	auto isZextOfCompareWithConstant = [](SDValue Op) {
18782	if (Op.getOpcode() != ISD::ZERO_EXTEND \|\| !Op.hasOneUse() \|\|
18783	Op.getValueType() != MVT::i64)
18784	return false;
18785
18786	SDValue Cmp = Op.getOperand(i: `0`);
18787	if (Cmp.getOpcode() != ISD::SETCC \|\| !Cmp.hasOneUse() \|\|
18788	Cmp.getOperand(i: `0`).getValueType() != MVT::i64)
18789	return false;
18790
18791	if (auto *Constant = dyn_cast<ConstantSDNode>(Val: Cmp.getOperand(i: `1`))) {
18792	int64_t NegConstant = `0` - Constant->getSExtValue();
18793	// Due to the limitations of the addi instruction,
18794	// -C is required to be [-32768, 32767].
18795	return isInt<`16`>(x: NegConstant);
18796	}
18797
18798	return false;
18799	};
18800
18801	bool LHSHasPattern = isZextOfCompareWithConstant (LHS);
18802	bool RHSHasPattern = isZextOfCompareWithConstant (RHS);
18803
18804	// If there is a pattern, canonicalize a zext operand to the RHS.
18805	if (LHSHasPattern && !RHSHasPattern)
18806	std::swap(a&: LHS, b&: RHS);
18807	else if (!LHSHasPattern && !RHSHasPattern)
18808	return SDValue ();
18809
18810	SDLoc DL(N);
18811	EVT CarryType = Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
18812	SDVTList VTs = DAG.getVTList(VT1: MVT::i64, VT2: CarryType);
18813	SDValue Cmp = RHS.getOperand(i: `0`);
18814	SDValue Z = Cmp.getOperand(i: `0`);
18815	auto *Constant = cast<ConstantSDNode>(Val: Cmp.getOperand(i: `1`));
18816	int64_t NegConstant = `0` - Constant->getSExtValue();
18817
18818	switch(cast<CondCodeSDNode>(Val: Cmp.getOperand(i: `2`))->get()) {
18819	default: break;
18820	case ISD::SETNE: {
18821	// when C == 0
18822	// --> addze X, (addic Z, -1).carry
18823	// /
18824	// add X, (zext(setne Z, C))--
18825	// \ when -32768 <= -C <= 32767 && C != 0
18826	// --> addze X, (addic (addi Z, -C), -1).carry
18827	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: Z,
18828	N2: DAG.getConstant(Val: NegConstant, DL, VT: MVT::i64));
18829	SDValue AddOrZ = NegConstant != `0` ? Add : Z;
18830	SDValue Addc =
18831	DAG.getNode(Opcode: ISD::UADDO_CARRY, DL, VTList: DAG.getVTList(VT1: MVT::i64, VT2: CarryType),
18832	N1: AddOrZ, N2: DAG.getAllOnesConstant(DL, VT: MVT::i64),
18833	N3: DAG.getConstant(Val: `0`, DL, VT: CarryType));
18834	return DAG.getNode(Opcode: ISD::UADDO_CARRY, DL, VTList: VTs, N1: LHS,
18835	N2: DAG.getConstant(Val: `0`, DL, VT: MVT::i64),
18836	N3: SDValue (Addc.getNode(), `1`));
18837	}
18838	case ISD::SETEQ: {
18839	// when C == 0
18840	// --> addze X, (subfic Z, 0).carry
18841	// /
18842	// add X, (zext(sete Z, C))--
18843	// \ when -32768 <= -C <= 32767 && C != 0
18844	// --> addze X, (subfic (addi Z, -C), 0).carry
18845	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: Z,
18846	N2: DAG.getConstant(Val: NegConstant, DL, VT: MVT::i64));
18847	SDValue AddOrZ = NegConstant != `0` ? Add : Z;
18848	SDValue Subc =
18849	DAG.getNode(Opcode: ISD::USUBO_CARRY, DL, VTList: DAG.getVTList(VT1: MVT::i64, VT2: CarryType),
18850	N1: DAG.getConstant(Val: `0`, DL, VT: MVT::i64), N2: AddOrZ,
18851	N3: DAG.getConstant(Val: `0`, DL, VT: CarryType));
18852	SDValue Invert = DAG.getNode(Opcode: ISD::XOR, DL, VT: CarryType, N1: Subc.getValue(R: `1`),
18853	N2: DAG.getConstant(Val: `1UL`, DL, VT: CarryType));
18854	return DAG.getNode(Opcode: ISD::UADDO_CARRY, DL, VTList: VTs, N1: LHS,
18855	N2: DAG.getConstant(Val: `0`, DL, VT: MVT::i64), N3: Invert);
18856	}
18857	}
18858
18859	return SDValue ();
18860	}
18861
18862	// Transform
18863	// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
18864	// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
18865	// In this case both C1 and C2 must be known constants.
18866	// C1+C2 must fit into a 34 bit signed integer.
18867	static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
18868	const PPCSubtarget &Subtarget) {
18869	if (!Subtarget.isUsingPCRelativeCalls())
18870	return SDValue ();
18871
18872	// Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
18873	// If we find that node try to cast the Global Address and the Constant.
18874	SDValue LHS = N->getOperand(Num: `0`);
18875	SDValue RHS = N->getOperand(Num: `1`);
18876
18877	if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
18878	std::swap(a&: LHS, b&: RHS);
18879
18880	if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
18881	return SDValue ();
18882
18883	// Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
18884	GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(Val: LHS.getOperand(i: `0`));
18885	ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(Val&: RHS);
18886
18887	// Check that both casts succeeded.
18888	if (!GSDN \|\| !ConstNode)
18889	return SDValue ();
18890
18891	int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
18892	SDLoc DL(GSDN);
18893
18894	// The signed int offset needs to fit in 34 bits.
18895	if (!isInt<`34`>(x: NewOffset))
18896	return SDValue ();
18897
18898	// The new global address is a copy of the old global address except
18899	// that it has the updated Offset.
18900	SDValue GA =
18901	DAG.getTargetGlobalAddress(GV: GSDN->getGlobal(), DL, VT: GSDN->getValueType(ResNo: `0`),
18902	offset: NewOffset, TargetFlags: GSDN->getTargetFlags());
18903	SDValue MatPCRel =
18904	DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: GSDN->getValueType(ResNo: `0`), Operand: GA);
18905	return MatPCRel;
18906	}
18907
18908	SDValue PPCTargetLowering::combineADD(SDNode N, DAGCombinerInfo &DCI) const* {
18909	if (auto Value = combineADDToADDZE(N, DAG&: DCI.DAG, Subtarget))
18910	return Value;
18911
18912	if (auto Value = combineADDToMAT_PCREL_ADDR(N, DAG&: DCI.DAG, Subtarget))
18913	return Value;
18914
18915	return SDValue ();
18916	}
18917
18918	// Detect TRUNCATE operations on bitcasts of float128 values.
18919	// What we are looking for here is the situtation where we extract a subset
18920	// of bits from a 128 bit float.
18921	// This can be of two forms:
18922	// 1) BITCAST of f128 feeding TRUNCATE
18923	// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
18924	// The reason this is required is because we do not have a legal i128 type
18925	// and so we want to prevent having to store the f128 and then reload part
18926	// of it.
18927	SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
18928	DAGCombinerInfo &DCI) const {
18929	// If we are using CRBits then try that first.
18930	if (Subtarget.useCRBits()) {
18931	// Check if CRBits did anything and return that if it did.
18932	if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
18933	return CRTruncValue;
18934	}
18935
18936	SDLoc dl(N);
18937	SDValue Op0 = N->getOperand(Num: `0`);
18938
18939	// Looking for a truncate of i128 to i64.
18940	if (Op0.getValueType() != MVT::i128 \|\| N->getValueType(ResNo: `0`) != MVT::i64)
18941	return SDValue ();
18942
18943	int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? `1` : `0`;
18944
18945	// SRL feeding TRUNCATE.
18946	if (Op0.getOpcode() == ISD::SRL) {
18947	ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Val: Op0.getOperand(i: `1`));
18948	// The right shift has to be by 64 bits.
18949	if (!ConstNode \|\| ConstNode->getZExtValue() != `64`)
18950	return SDValue ();
18951
18952	// Switch the element number to extract.
18953	EltToExtract = EltToExtract ? `0` : `1`;
18954	// Update Op0 past the SRL.
18955	Op0 = Op0.getOperand(i: `0`);
18956	}
18957
18958	// BITCAST feeding a TRUNCATE possibly via SRL.
18959	if (Op0.getOpcode() == ISD::BITCAST &&
18960	Op0.getValueType() == MVT::i128 &&
18961	Op0.getOperand(i: `0`).getValueType() == MVT::f128) {
18962	SDValue Bitcast = DCI.DAG.getBitcast(VT: MVT::v2i64, V: Op0.getOperand(i: `0`));
18963	return DCI.DAG.getNode(
18964	Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: MVT::i64, N1: Bitcast,
18965	N2: DCI.DAG.getTargetConstant(Val: EltToExtract, DL: dl, VT: MVT::i32));
18966	}
18967	return SDValue ();
18968	}
18969
18970	SDValue PPCTargetLowering::combineMUL(SDNode N, DAGCombinerInfo &DCI) const* {
18971	SelectionDAG &DAG = DCI.DAG;
18972
18973	ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N: N->getOperand(Num: `1`));
18974	if (!ConstOpOrElement)
18975	return SDValue ();
18976
18977	// An imul is usually smaller than the alternative sequence for legal type.
18978	if (DAG.getMachineFunction().getFunction().hasMinSize() &&
18979	isOperationLegal(Op: ISD::MUL, VT: N->getValueType(ResNo: `0`)))
18980	return SDValue ();
18981
18982	auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
18983	switch (this->Subtarget.getCPUDirective()) {
18984	default:
18985	// TODO: enhance the condition for subtarget before pwr8
18986	return false;
18987	case PPC::DIR_PWR8:
18988	// type mul add shl
18989	// scalar 4 1 1
18990	// vector 7 2 2
18991	return true;
18992	case PPC::DIR_PWR9:
18993	case PPC::DIR_PWR10:
18994	case PPC::DIR_PWR11:
18995	case PPC::DIR_PWR_FUTURE:
18996	// type mul add shl
18997	// scalar 5 2 2
18998	// vector 7 2 2
18999
19000	// The cycle RATIO of related operations are showed as a table above.
19001	// Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
19002	// scalar and vector type. For 2 instrs patterns, add/sub + shl
19003	// are 4, it is always profitable; but for 3 instrs patterns
19004	// (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
19005	// So we should only do it for vector type.
19006	return IsAddOne && IsNeg ? VT.isVector() : true;
19007	}
19008	};
19009
19010	EVT VT = N->getValueType(ResNo: `0`);
19011	SDLoc DL(N);
19012
19013	const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
19014	bool IsNeg = MulAmt.isNegative();
19015	APInt MulAmtAbs = MulAmt.abs();
19016
19017	if ((MulAmtAbs - `1`).isPowerOf2()) {
19018	// (mul x, 2^N + 1) => (add (shl x, N), x)
19019	// (mul x, -(2^N + 1)) => -(add (shl x, N), x)
19020
19021	if (!IsProfitable (IsNeg, true, VT))
19022	return SDValue ();
19023
19024	SDValue Op0 = N->getOperand(Num: `0`);
19025	SDValue Op1 =
19026	DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N->getOperand(Num: `0`),
19027	N2: DAG.getConstant(Val: (MulAmtAbs - `1`).logBase2(), DL, VT));
19028	SDValue Res = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: Op1);
19029
19030	if (!IsNeg)
19031	return Res;
19032
19033	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: `0`, DL, VT), N2: Res);
19034	} else if ((MulAmtAbs + `1`).isPowerOf2()) {
19035	// (mul x, 2^N - 1) => (sub (shl x, N), x)
19036	// (mul x, -(2^N - 1)) => (sub x, (shl x, N))
19037
19038	if (!IsProfitable (IsNeg, false, VT))
19039	return SDValue ();
19040
19041	SDValue Op0 = N->getOperand(Num: `0`);
19042	SDValue Op1 =
19043	DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N->getOperand(Num: `0`),
19044	N2: DAG.getConstant(Val: (MulAmtAbs + `1`).logBase2(), DL, VT));
19045
19046	if (!IsNeg)
19047	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op1, N2: Op0);
19048	else
19049	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0, N2: Op1);
19050
19051	} else {
19052	return SDValue ();
19053	}
19054	}
19055
19056	// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
19057	// in combiner since we need to check SD flags and other subtarget features.
19058	SDValue PPCTargetLowering::combineFMALike(SDNode *N,
19059	DAGCombinerInfo &DCI) const {
19060	SDValue N0 = N->getOperand(Num: `0`);
19061	SDValue N1 = N->getOperand(Num: `1`);
19062	SDValue N2 = N->getOperand(Num: `2`);
19063	SDNodeFlags Flags = N->getFlags();
19064	EVT VT = N->getValueType(ResNo: `0`);
19065	SelectionDAG &DAG = DCI.DAG;
19066	const TargetOptions &Options = getTargetMachine().Options;
19067	unsigned Opc = N->getOpcode();
19068	bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
19069	bool LegalOps = !DCI.isBeforeLegalizeOps();
19070	SDLoc Loc(N);
19071
19072	if (!isOperationLegal(Op: ISD::FMA, VT))
19073	return SDValue ();
19074
19075	// Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
19076	// since (fnmsub a b c)=-0 while c-ab=+0.
19077	if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
19078	return SDValue ();
19079
19080	// (fma (fneg a) b c) => (fnmsub a b c)
19081	// (fnmsub (fneg a) b c) => (fma a b c)
19082	if (SDValue NegN0 = getCheaperNegatedExpression(Op: N0, DAG, LegalOps, OptForSize: CodeSize))
19083	return DAG.getNode(Opcode: invertFMAOpcode(Opc), DL: Loc, VT, N1: NegN0, N2: N1, N3: N2, Flags);
19084
19085	// (fma a (fneg b) c) => (fnmsub a b c)
19086	// (fnmsub a (fneg b) c) => (fma a b c)
19087	if (SDValue NegN1 = getCheaperNegatedExpression(Op: N1, DAG, LegalOps, OptForSize: CodeSize))
19088	return DAG.getNode(Opcode: invertFMAOpcode(Opc), DL: Loc, VT, N1: N0, N2: NegN1, N3: N2, Flags);
19089
19090	return SDValue ();
19091	}
19092
19093	bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst CI) const* {
19094	// Only duplicate to increase tail-calls for the 64bit SysV ABIs.
19095	if (!Subtarget.is64BitELFABI())
19096	return false;
19097
19098	// If not a tail call then no need to proceed.
19099	if (!CI->isTailCall())
19100	return false;
19101
19102	// If sibling calls have been disabled and tail-calls aren't guaranteed
19103	// there is no reason to duplicate.
19104	auto &TM = getTargetMachine();
19105	if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
19106	return false;
19107
19108	// Can't tail call a function called indirectly, or if it has variadic args.
19109	const Function *Callee = CI->getCalledFunction();
19110	if (!Callee \|\| Callee->isVarArg())
19111	return false;
19112
19113	// Make sure the callee and caller calling conventions are eligible for tco.
19114	const Function *Caller = CI->getParent()->getParent();
19115	if (!areCallingConvEligibleForTCO_64SVR4(CallerCC: Caller->getCallingConv(),
19116	CalleeCC: CI->getCallingConv()))
19117	return false;
19118
19119	// If the function is local then we have a good chance at tail-calling it
19120	return getTargetMachine().shouldAssumeDSOLocal(GV: Callee);
19121	}
19122
19123	bool PPCTargetLowering::
19124	isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
19125	const Value *Mask = AndI.getOperand(i: `1`);
19126	// If the mask is suitable for andi. or andis. we should sink the and.
19127	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Mask)) {
19128	// Can't handle constants wider than 64-bits.
19129	if (CI->getBitWidth() > `64`)
19130	return false;
19131	int64_t ConstVal = CI->getZExtValue();
19132	return isUInt<`16`>(x: ConstVal) \|\|
19133	(isUInt<`16`>(x: ConstVal >> `16`) && !(ConstVal & `0xFFFF`));
19134	}
19135
19136	// For non-constant masks, we can always use the record-form and.
19137	return true;
19138	}
19139
19140	/// getAddrModeForFlags - Based on the set of address flags, select the most
19141	/// optimal instruction format to match by.
19142	PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
19143	// This is not a node we should be handling here.
19144	if (Flags == PPC::MOF_None)
19145	return PPC::AM_None;
19146	// Unaligned D-Forms are tried first, followed by the aligned D-Forms.
19147	for (auto FlagSet : AddrModesMap.at(k: PPC::AM_DForm))
19148	if ((Flags & FlagSet) == FlagSet)
19149	return PPC::AM_DForm;
19150	for (auto FlagSet : AddrModesMap.at(k: PPC::AM_DSForm))
19151	if ((Flags & FlagSet) == FlagSet)
19152	return PPC::AM_DSForm;
19153	for (auto FlagSet : AddrModesMap.at(k: PPC::AM_DQForm))
19154	if ((Flags & FlagSet) == FlagSet)
19155	return PPC::AM_DQForm;
19156	for (auto FlagSet : AddrModesMap.at(k: PPC::AM_PrefixDForm))
19157	if ((Flags & FlagSet) == FlagSet)
19158	return PPC::AM_PrefixDForm;
19159	// If no other forms are selected, return an X-Form as it is the most
19160	// general addressing mode.
19161	return PPC::AM_XForm;
19162	}
19163
19164	/// Set alignment flags based on whether or not the Frame Index is aligned.
19165	/// Utilized when computing flags for address computation when selecting
19166	/// load and store instructions.
19167	static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
19168	SelectionDAG &DAG) {
19169	bool IsAdd = ((N.getOpcode() == ISD::ADD) \|\| (N.getOpcode() == ISD::OR));
19170	FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: IsAdd ? N.getOperand(i: `0`) : N);
19171	if (!FI)
19172	return;
19173	const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
19174	unsigned FrameIndexAlign = MFI.getObjectAlign(ObjectIdx: FI->getIndex()).value();
19175	// If this is (add $FI, $S16Imm), the alignment flags are already set
19176	// based on the immediate. We just need to clear the alignment flags
19177	// if the FI alignment is weaker.
19178	if ((FrameIndexAlign % `4`) != `0`)
19179	FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
19180	if ((FrameIndexAlign % `16`) != `0`)
19181	FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
19182	// If the address is a plain FrameIndex, set alignment flags based on
19183	// FI alignment.
19184	if (!IsAdd) {
19185	if ((FrameIndexAlign % `4`) == `0`)
19186	FlagSet \|= PPC::MOF_RPlusSImm16Mult4;
19187	if ((FrameIndexAlign % `16`) == `0`)
19188	FlagSet \|= PPC::MOF_RPlusSImm16Mult16;
19189	}
19190	}
19191
19192	/// Given a node, compute flags that are used for address computation when
19193	/// selecting load and store instructions. The flags computed are stored in
19194	/// FlagSet. This function takes into account whether the node is a constant,
19195	/// an ADD, OR, or a constant, and computes the address flags accordingly.
19196	static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
19197	SelectionDAG &DAG) {
19198	// Set the alignment flags for the node depending on if the node is
19199	// 4-byte or 16-byte aligned.
19200	auto SetAlignFlagsForImm = [&](uint64_t Imm) {
19201	if ((Imm & `0x3`) == `0`)
19202	FlagSet \|= PPC::MOF_RPlusSImm16Mult4;
19203	if ((Imm & `0xf`) == `0`)
19204	FlagSet \|= PPC::MOF_RPlusSImm16Mult16;
19205	};
19206
19207	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N)) {
19208	// All 32-bit constants can be computed as LIS + Disp.
19209	const APInt &ConstImm = CN->getAPIntValue();
19210	if (ConstImm.isSignedIntN(N: `32`)) { // Flag to handle 32-bit constants.
19211	FlagSet \|= PPC::MOF_AddrIsSImm32;
19212	SetAlignFlagsForImm (ConstImm.getZExtValue());
19213	setAlignFlagsForFI(N, FlagSet, DAG);
19214	}
19215	if (ConstImm.isSignedIntN(N: `34`)) // Flag to handle 34-bit constants.
19216	FlagSet \|= PPC::MOF_RPlusSImm34;
19217	else // Let constant materialization handle large constants.
19218	FlagSet \|= PPC::MOF_NotAddNorCst;
19219	} else if (N.getOpcode() == ISD::ADD \|\| provablyDisjointOr(DAG, N)) {
19220	// This address can be represented as an addition of:
19221	// - Register + Imm16 (possibly a multiple of 4/16)
19222	// - Register + Imm34
19223	// - Register + PPCISD::Lo
19224	// - Register + Register
19225	// In any case, we won't have to match this as Base + Zero.
19226	SDValue RHS = N.getOperand(i: `1`);
19227	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: RHS)) {
19228	const APInt &ConstImm = CN->getAPIntValue();
19229	if (ConstImm.isSignedIntN(N: `16`)) {
19230	FlagSet \|= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
19231	SetAlignFlagsForImm (ConstImm.getZExtValue());
19232	setAlignFlagsForFI(N, FlagSet, DAG);
19233	}
19234	if (ConstImm.isSignedIntN(N: `34`))
19235	FlagSet \|= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
19236	else
19237	FlagSet \|= PPC::MOF_RPlusR; // Register.
19238	} else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(i: `1`))
19239	FlagSet \|= PPC::MOF_RPlusLo; // PPCISD::Lo.
19240	else
19241	FlagSet \|= PPC::MOF_RPlusR;
19242	} else { // The address computation is not a constant or an addition.
19243	setAlignFlagsForFI(N, FlagSet, DAG);
19244	FlagSet \|= PPC::MOF_NotAddNorCst;
19245	}
19246	}
19247
19248	static bool isPCRelNode(SDValue N) {
19249	return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR \|\|
19250	isValidPCRelNode<ConstantPoolSDNode>(N) \|\|
19251	isValidPCRelNode<GlobalAddressSDNode>(N) \|\|
19252	isValidPCRelNode<JumpTableSDNode>(N) \|\|
19253	isValidPCRelNode<BlockAddressSDNode>(N));
19254	}
19255
19256	/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
19257	/// the address flags of the load/store instruction that is to be matched.
19258	unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
19259	SelectionDAG &DAG) const {
19260	unsigned FlagSet = PPC::MOF_None;
19261
19262	// Compute subtarget flags.
19263	if (!Subtarget.hasP9Vector())
19264	FlagSet \|= PPC::MOF_SubtargetBeforeP9;
19265	else
19266	FlagSet \|= PPC::MOF_SubtargetP9;
19267
19268	if (Subtarget.hasPrefixInstrs())
19269	FlagSet \|= PPC::MOF_SubtargetP10;
19270
19271	if (Subtarget.hasSPE())
19272	FlagSet \|= PPC::MOF_SubtargetSPE;
19273
19274	// Check if we have a PCRel node and return early.
19275	if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
19276	return FlagSet;
19277
19278	// If the node is the paired load/store intrinsics, compute flags for
19279	// address computation and return early.
19280	unsigned ParentOp = Parent->getOpcode();
19281	if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) \|\|
19282	(ParentOp == ISD::INTRINSIC_VOID))) {
19283	unsigned ID = Parent->getConstantOperandVal(Num: `1`);
19284	if ((ID == Intrinsic::ppc_vsx_lxvp) \|\| (ID == Intrinsic::ppc_vsx_stxvp)) {
19285	SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
19286	? Parent->getOperand(Num: `2`)
19287	: Parent->getOperand(Num: `3`);
19288	computeFlagsForAddressComputation(N: IntrinOp, FlagSet, DAG);
19289	FlagSet \|= PPC::MOF_Vector;
19290	return FlagSet;
19291	}
19292	}
19293
19294	// Mark this as something we don't want to handle here if it is atomic
19295	// or pre-increment instruction.
19296	if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Val: Parent))
19297	if (LSB->isIndexed())
19298	return PPC::MOF_None;
19299
19300	// Compute in-memory type flags. This is based on if there are scalars,
19301	// floats or vectors.
19302	const MemSDNode *MN = dyn_cast<MemSDNode>(Val: Parent);
19303	assert(MN && "Parent should be a MemSDNode!");
19304	EVT MemVT = MN->getMemoryVT();
19305	unsigned Size = MemVT.getSizeInBits();
19306	if (MemVT.isScalarInteger()) {
19307	assert(Size <= `128` &&
19308	"Not expecting scalar integers larger than 16 bytes!");
19309	if (Size < `32`)
19310	FlagSet \|= PPC::MOF_SubWordInt;
19311	else if (Size == `32`)
19312	FlagSet \|= PPC::MOF_WordInt;
19313	else
19314	FlagSet \|= PPC::MOF_DoubleWordInt;
19315	} else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
19316	if (Size == `128`)
19317	FlagSet \|= PPC::MOF_Vector;
19318	else if (Size == `256`) {
19319	assert(Subtarget.pairedVectorMemops() &&
19320	"256-bit vectors are only available when paired vector memops is "
19321	"enabled!");
19322	FlagSet \|= PPC::MOF_Vector;
19323	} else
19324	llvm_unreachable("Not expecting illegal vectors!");
19325	} else { // Floating point type: can be scalar, f128 or vector types.
19326	if (Size == `32` \|\| Size == `64`)
19327	FlagSet \|= PPC::MOF_ScalarFloat;
19328	else if (MemVT == MVT::f128 \|\| MemVT.isVector())
19329	FlagSet \|= PPC::MOF_Vector;
19330	else
19331	llvm_unreachable("Not expecting illegal scalar floats!");
19332	}
19333
19334	// Compute flags for address computation.
19335	computeFlagsForAddressComputation(N, FlagSet, DAG);
19336
19337	// Compute type extension flags.
19338	if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Val: Parent)) {
19339	switch (LN->getExtensionType()) {
19340	case ISD::SEXTLOAD:
19341	FlagSet \|= PPC::MOF_SExt;
19342	break;
19343	case ISD::EXTLOAD:
19344	case ISD::ZEXTLOAD:
19345	FlagSet \|= PPC::MOF_ZExt;
19346	break;
19347	case ISD::NON_EXTLOAD:
19348	FlagSet \|= PPC::MOF_NoExt;
19349	break;
19350	}
19351	} else
19352	FlagSet \|= PPC::MOF_NoExt;
19353
19354	// For integers, no extension is the same as zero extension.
19355	// We set the extension mode to zero extension so we don't have
19356	// to add separate entries in AddrModesMap for loads and stores.
19357	if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
19358	FlagSet \|= PPC::MOF_ZExt;
19359	FlagSet &= ~PPC::MOF_NoExt;
19360	}
19361
19362	// If we don't have prefixed instructions, 34-bit constants should be
19363	// treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
19364	bool IsNonP1034BitConst =
19365	((PPC::MOF_RPlusSImm34 \| PPC::MOF_AddrIsSImm32 \| PPC::MOF_SubtargetP10) &
19366	FlagSet) == PPC::MOF_RPlusSImm34;
19367	if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
19368	IsNonP1034BitConst)
19369	FlagSet \|= PPC::MOF_NotAddNorCst;
19370
19371	return FlagSet;
19372	}
19373
19374	/// SelectForceXFormMode - Given the specified address, force it to be
19375	/// represented as an indexed [r+r] operation (an XForm instruction).
19376	PPC::AddrMode PPCTargetLowering::SelectForceXFormMode(SDValue N, SDValue &Disp,
19377	SDValue &Base,
19378	SelectionDAG &DAG) const {
19379
19380	PPC::AddrMode Mode = PPC::AM_XForm;
19381	int16_t ForceXFormImm = `0`;
19382	if (provablyDisjointOr(DAG, N) &&
19383	!isIntS16Immediate(Op: N.getOperand(i: `1`), Imm&: ForceXFormImm)) {
19384	Disp = N.getOperand(i: `0`);
19385	Base = N.getOperand(i: `1`);
19386	return Mode;
19387	}
19388
19389	// If the address is the result of an add, we will utilize the fact that the
19390	// address calculation includes an implicit add. However, we can reduce
19391	// register pressure if we do not materialize a constant just for use as the
19392	// index register. We only get rid of the add if it is not an add of a
19393	// value and a 16-bit signed constant and both have a single use.
19394	if (N.getOpcode() == ISD::ADD &&
19395	(!isIntS16Immediate(Op: N.getOperand(i: `1`), Imm&: ForceXFormImm) \|\|
19396	!N.getOperand(i: `1`).hasOneUse() \|\| !N.getOperand(i: `0`).hasOneUse())) {
19397	Disp = N.getOperand(i: `0`);
19398	Base = N.getOperand(i: `1`);
19399	return Mode;
19400	}
19401
19402	// Otherwise, use R0 as the base register.
19403	Disp = DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19404	VT: N.getValueType());
19405	Base = N;
19406
19407	return Mode;
19408	}
19409
19410	bool PPCTargetLowering::splitValueIntoRegisterParts(
19411	SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
19412	unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
19413	EVT ValVT = Val.getValueType();
19414	// If we are splitting a scalar integer into f64 parts (i.e. so they
19415	// can be placed into VFRC registers), we need to zero extend and
19416	// bitcast the values. This will ensure the value is placed into a
19417	// VSR using direct moves or stack operations as needed.
19418	if (PartVT == MVT::f64 &&
19419	(ValVT == MVT::i32 \|\| ValVT == MVT::i16 \|\| ValVT == MVT::i8)) {
19420	Val = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i64, Operand: Val);
19421	Val = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MVT::f64, Operand: Val);
19422	Parts[`0`] = Val;
19423	return true;
19424	}
19425	return false;
19426	}
19427
19428	SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
19429	SelectionDAG &DAG) const {
19430	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19431	TargetLowering::CallLoweringInfo CLI(DAG);
19432	EVT RetVT = Op.getValueType();
19433	Type RetTy = RetVT.getTypeForEVT(Context&: DAG.getContext());
19434	SDValue Callee =
19435	DAG.getExternalSymbol(Sym: LibCallName, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
19436	bool SignExtend = TLI.shouldSignExtendTypeInLibCall(Ty: RetTy, IsSigned: false);
19437	TargetLowering::ArgListTy Args;
19438	TargetLowering::ArgListEntry Entry;
19439	for (const SDValue &N : Op ->op_values()) {
19440	EVT ArgVT = N.getValueType();
19441	Type ArgTy = ArgVT.getTypeForEVT(Context&: DAG.getContext());
19442	Entry.Node = N;
19443	Entry.Ty = ArgTy;
19444	Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(Ty: ArgTy, IsSigned: SignExtend);
19445	Entry.IsZExt = !Entry.IsSExt;
19446	Args.push_back(x: Entry);
19447	}
19448
19449	SDValue InChain = DAG.getEntryNode();
19450	SDValue TCChain = InChain;
19451	const Function &F = DAG.getMachineFunction().getFunction();
19452	bool isTailCall =
19453	TLI.isInTailCallPosition(DAG, Node: Op.getNode(), Chain&: TCChain) &&
19454	(RetTy == F.getReturnType() \|\| F.getReturnType()->isVoidTy());
19455	if (isTailCall)
19456	InChain = TCChain;
19457	CLI.setDebugLoc(SDLoc (Op))
19458	.setChain(InChain)
19459	.setLibCallee(CC: CallingConv::C, ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
19460	.setTailCall(isTailCall)
19461	.setSExtResult(SignExtend)
19462	.setZExtResult(!SignExtend)
19463	.setIsPostTypeLegalization(true);
19464	return TLI.LowerCallTo(CLI).first;
19465	}
19466
19467	SDValue PPCTargetLowering::lowerLibCallBasedOnType(
19468	const char LibCallFloatName, const* char *LibCallDoubleName, SDValue Op,
19469	SelectionDAG &DAG) const {
19470	if (Op.getValueType() == MVT::f32)
19471	return lowerToLibCall(LibCallName: LibCallFloatName, Op, DAG);
19472
19473	if (Op.getValueType() == MVT::f64)
19474	return lowerToLibCall(LibCallName: LibCallDoubleName, Op, DAG);
19475
19476	return SDValue ();
19477	}
19478
19479	bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
19480	SDNodeFlags Flags = Op.getNode()->getFlags();
19481	return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
19482	Flags.hasNoNaNs() && Flags.hasNoInfs();
19483	}
19484
19485	bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
19486	return Op.getNode()->getFlags().hasApproximateFuncs();
19487	}
19488
19489	bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
19490	return getTargetMachine().Options.PPCGenScalarMASSEntries;
19491	}
19492
19493	SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
19494	const char *LibCallFloatName,
19495	const char *LibCallDoubleNameFinite,
19496	const char *LibCallFloatNameFinite,
19497	SDValue Op,
19498	SelectionDAG &DAG) const {
19499	if (!isScalarMASSConversionEnabled() \|\| !isLowringToMASSSafe(Op))
19500	return SDValue ();
19501
19502	if (!isLowringToMASSFiniteSafe(Op))
19503	return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
19504	DAG);
19505
19506	return lowerLibCallBasedOnType(LibCallFloatName: LibCallFloatNameFinite,
19507	LibCallDoubleName: LibCallDoubleNameFinite, Op, DAG);
19508	}
19509
19510	SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
19511	return lowerLibCallBase(LibCallDoubleName: "__xl_pow", LibCallFloatName: "__xl_powf", LibCallDoubleNameFinite: "__xl_pow_finite",
19512	LibCallFloatNameFinite: "__xl_powf_finite", Op, DAG);
19513	}
19514
19515	SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
19516	return lowerLibCallBase(LibCallDoubleName: "__xl_sin", LibCallFloatName: "__xl_sinf", LibCallDoubleNameFinite: "__xl_sin_finite",
19517	LibCallFloatNameFinite: "__xl_sinf_finite", Op, DAG);
19518	}
19519
19520	SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
19521	return lowerLibCallBase(LibCallDoubleName: "__xl_cos", LibCallFloatName: "__xl_cosf", LibCallDoubleNameFinite: "__xl_cos_finite",
19522	LibCallFloatNameFinite: "__xl_cosf_finite", Op, DAG);
19523	}
19524
19525	SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
19526	return lowerLibCallBase(LibCallDoubleName: "__xl_log", LibCallFloatName: "__xl_logf", LibCallDoubleNameFinite: "__xl_log_finite",
19527	LibCallFloatNameFinite: "__xl_logf_finite", Op, DAG);
19528	}
19529
19530	SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
19531	return lowerLibCallBase(LibCallDoubleName: "__xl_log10", LibCallFloatName: "__xl_log10f", LibCallDoubleNameFinite: "__xl_log10_finite",
19532	LibCallFloatNameFinite: "__xl_log10f_finite", Op, DAG);
19533	}
19534
19535	SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
19536	return lowerLibCallBase(LibCallDoubleName: "__xl_exp", LibCallFloatName: "__xl_expf", LibCallDoubleNameFinite: "__xl_exp_finite",
19537	LibCallFloatNameFinite: "__xl_expf_finite", Op, DAG);
19538	}
19539
19540	// If we happen to match to an aligned D-Form, check if the Frame Index is
19541	// adequately aligned. If it is not, reset the mode to match to X-Form.
19542	static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
19543	PPC::AddrMode &Mode) {
19544	if (!isa<FrameIndexSDNode>(Val: N))
19545	return;
19546	if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) \|\|
19547	(Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
19548	Mode = PPC::AM_XForm;
19549	}
19550
19551	/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
19552	/// compute the address flags of the node, get the optimal address mode based
19553	/// on the flags, and set the Base and Disp based on the address mode.
19554	PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent,
19555	SDValue N, SDValue &Disp,
19556	SDValue &Base,
19557	SelectionDAG &DAG,
19558	MaybeAlign Align) const {
19559	SDLoc DL(Parent);
19560
19561	// Compute the address flags.
19562	unsigned Flags = computeMOFlags(Parent, N, DAG);
19563
19564	// Get the optimal address mode based on the Flags.
19565	PPC::AddrMode Mode = getAddrModeForFlags(Flags);
19566
19567	// If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
19568	// Select an X-Form load if it is not.
19569	setXFormForUnalignedFI(N, Flags, Mode);
19570
19571	// Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
19572	if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
19573	assert(Subtarget.isUsingPCRelativeCalls() &&
19574	"Must be using PC-Relative calls when a valid PC-Relative node is "
19575	"present!");
19576	Mode = PPC::AM_PCRel;
19577	}
19578
19579	// Set Base and Disp accordingly depending on the address mode.
19580	switch (Mode) {
19581	case PPC::AM_DForm:
19582	case PPC::AM_DSForm:
19583	case PPC::AM_DQForm: {
19584	// This is a register plus a 16-bit immediate. The base will be the
19585	// register and the displacement will be the immediate unless it
19586	// isn't sufficiently aligned.
19587	if (Flags & PPC::MOF_RPlusSImm16) {
19588	SDValue Op0 = N.getOperand(i: `0`);
19589	SDValue Op1 = N.getOperand(i: `1`);
19590	int16_t Imm = Op1 ->getAsZExtVal();
19591	if (!Align \|\| isAligned(Lhs: *Align, SizeInBytes: Imm)) {
19592	Disp = DAG.getSignedTargetConstant(Val: Imm, DL, VT: N.getValueType());
19593	Base = Op0;
19594	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: Op0)) {
19595	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
19596	fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
19597	}
19598	break;
19599	}
19600	}
19601	// This is a register plus the @lo relocation. The base is the register
19602	// and the displacement is the global address.
19603	else if (Flags & PPC::MOF_RPlusLo) {
19604	Disp = N.getOperand(i: `1`).getOperand(i: `0`); // The global address.
19605	assert(Disp.getOpcode() == ISD::TargetGlobalAddress \|\|
19606	Disp.getOpcode() == ISD::TargetGlobalTLSAddress \|\|
19607	Disp.getOpcode() == ISD::TargetConstantPool \|\|
19608	Disp.getOpcode() == ISD::TargetJumpTable);
19609	Base = N.getOperand(i: `0`);
19610	break;
19611	}
19612	// This is a constant address at most 32 bits. The base will be
19613	// zero or load-immediate-shifted and the displacement will be
19614	// the low 16 bits of the address.
19615	else if (Flags & PPC::MOF_AddrIsSImm32) {
19616	auto *CN = cast<ConstantSDNode>(Val&: N);
19617	EVT CNType = CN->getValueType(ResNo: `0`);
19618	uint64_t CNImm = CN->getZExtValue();
19619	// If this address fits entirely in a 16-bit sext immediate field, codegen
19620	// this as "d, 0".
19621	int16_t Imm;
19622	if (isIntS16Immediate(N: CN, Imm) && (!Align \|\| isAligned(Lhs: *Align, SizeInBytes: Imm))) {
19623	Disp = DAG.getSignedTargetConstant(Val: Imm, DL, VT: CNType);
19624	Base = DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19625	VT: CNType);
19626	break;
19627	}
19628	// Handle 32-bit sext immediate with LIS + Addr mode.
19629	if ((CNType == MVT::i32 \|\| isInt<`32`>(x: CNImm)) &&
19630	(!Align \|\| isAligned(Lhs: *Align, SizeInBytes: CNImm))) {
19631	int32_t Addr = (int32_t)CNImm;
19632	// Otherwise, break this down into LIS + Disp.
19633	Disp = DAG.getSignedTargetConstant(Val: (int16_t)Addr, DL, VT: MVT::i32);
19634	Base = DAG.getSignedTargetConstant(Val: (Addr - (int16_t)Addr) >> `16`, DL,
19635	VT: MVT::i32);
19636	uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
19637	Base = SDValue (DAG.getMachineNode(Opcode: LIS, dl: DL, VT: CNType, Op1: Base), `0`);
19638	break;
19639	}
19640	}
19641	// Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
19642	Disp = DAG.getTargetConstant(Val: `0`, DL, VT: getPointerTy(DL: DAG.getDataLayout()));
19643	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: N)) {
19644	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
19645	fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
19646	} else
19647	Base = N;
19648	break;
19649	}
19650	case PPC::AM_PrefixDForm: {
19651	int64_t Imm34 = `0`;
19652	unsigned Opcode = N.getOpcode();
19653	if (((Opcode == ISD::ADD) \|\| (Opcode == ISD::OR)) &&
19654	(isIntS34Immediate(Op: N.getOperand(i: `1`), Imm&: Imm34))) {
19655	// N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
19656	Disp = DAG.getSignedTargetConstant(Val: Imm34, DL, VT: N.getValueType());
19657	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: `0`)))
19658	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
19659	else
19660	Base = N.getOperand(i: `0`);
19661	} else if (isIntS34Immediate(Op: N, Imm&: Imm34)) {
19662	// The address is a 34-bit signed immediate.
19663	Disp = DAG.getSignedTargetConstant(Val: Imm34, DL, VT: N.getValueType());
19664	Base = DAG.getRegister(Reg: PPC::ZERO8, VT: N.getValueType());
19665	}
19666	break;
19667	}
19668	case PPC::AM_PCRel: {
19669	// When selecting PC-Relative instructions, "Base" is not utilized as
19670	// we select the address as [PC+imm].
19671	Disp = N;
19672	break;
19673	}
19674	case PPC::AM_None:
19675	break;
19676	default: { // By default, X-Form is always available to be selected.
19677	// When a frame index is not aligned, we also match by XForm.
19678	FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: N);
19679	Base = FI ? N : N.getOperand(i: `1`);
19680	Disp = FI ? DAG.getRegister(Reg: Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19681	VT: N.getValueType())
19682	: N.getOperand(i: `0`);
19683	break;
19684	}
19685	}
19686	return Mode;
19687	}
19688
19689	CCAssignFn *PPCTargetLowering::ccAssignFnForCall(CallingConv::ID CC,
19690	bool Return,
19691	bool IsVarArg) const {
19692	switch (CC) {
19693	case CallingConv::Cold:
19694	return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
19695	default:
19696	return CC_PPC64_ELF;
19697	}
19698	}
19699
19700	bool PPCTargetLowering::shouldInlineQuadwordAtomics() const {
19701	return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
19702	}
19703
19704	TargetLowering::AtomicExpansionKind
19705	PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst AI) const* {
19706	unsigned Size = AI->getType()->getPrimitiveSizeInBits();
19707	if (shouldInlineQuadwordAtomics() && Size == `128`)
19708	return AtomicExpansionKind::MaskedIntrinsic;
19709
19710	switch (AI->getOperation()) {
19711	case AtomicRMWInst::UIncWrap:
19712	case AtomicRMWInst::UDecWrap:
19713	case AtomicRMWInst::USubCond:
19714	case AtomicRMWInst::USubSat:
19715	return AtomicExpansionKind::CmpXChg;
19716	default:
19717	return TargetLowering::shouldExpandAtomicRMWInIR(RMW: AI);
19718	}
19719
19720	llvm_unreachable("unreachable atomicrmw operation");
19721	}
19722
19723	TargetLowering::AtomicExpansionKind
19724	PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst AI) const* {
19725	unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
19726	if (shouldInlineQuadwordAtomics() && Size == `128`)
19727	return AtomicExpansionKind::MaskedIntrinsic;
19728	return AtomicExpansionKind::LLSC;
19729	}
19730
19731	static Intrinsic::ID
19732	getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp) {
19733	switch (BinOp) {
19734	default:
19735	llvm_unreachable("Unexpected AtomicRMW BinOp");
19736	case AtomicRMWInst::Xchg:
19737	return Intrinsic::ppc_atomicrmw_xchg_i128;
19738	case AtomicRMWInst::Add:
19739	return Intrinsic::ppc_atomicrmw_add_i128;
19740	case AtomicRMWInst::Sub:
19741	return Intrinsic::ppc_atomicrmw_sub_i128;
19742	case AtomicRMWInst::And:
19743	return Intrinsic::ppc_atomicrmw_and_i128;
19744	case AtomicRMWInst::Or:
19745	return Intrinsic::ppc_atomicrmw_or_i128;
19746	case AtomicRMWInst::Xor:
19747	return Intrinsic::ppc_atomicrmw_xor_i128;
19748	case AtomicRMWInst::Nand:
19749	return Intrinsic::ppc_atomicrmw_nand_i128;
19750	}
19751	}
19752
19753	Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
19754	IRBuilderBase &Builder, AtomicRMWInst AI, Value AlignedAddr, Value *Incr,
19755	Value Mask, Value ShiftAmt, AtomicOrdering Ord) const {
19756	assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
19757	Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19758	Type *ValTy = Incr->getType();
19759	assert(ValTy->getPrimitiveSizeInBits() == `128`);
19760	Type *Int64Ty = Type::getInt64Ty(C&: M->getContext());
19761	Value *IncrLo = Builder.CreateTrunc(V: Incr, DestTy: Int64Ty, Name: "incr_lo");
19762	Value *IncrHi =
19763	Builder.CreateTrunc(V: Builder.CreateLShr(LHS: Incr, RHS: `64`), DestTy: Int64Ty, Name: "incr_hi");
19764	Value *LoHi = Builder.CreateIntrinsic(
19765	ID: getIntrinsicForAtomicRMWBinOp128(BinOp: AI->getOperation()), Types: {},
19766	Args: {AlignedAddr, IncrLo, IncrHi});
19767	Value *Lo = Builder.CreateExtractValue(Agg: LoHi, Idxs: `0`, Name: "lo");
19768	Value *Hi = Builder.CreateExtractValue(Agg: LoHi, Idxs: `1`, Name: "hi");
19769	Lo = Builder.CreateZExt(V: Lo, DestTy: ValTy, Name: "lo64");
19770	Hi = Builder.CreateZExt(V: Hi, DestTy: ValTy, Name: "hi64");
19771	return Builder.CreateOr(
19772	LHS: Lo, RHS: Builder.CreateShl(LHS: Hi, RHS: ConstantInt::get(Ty: ValTy, V: `64`)), Name: "val64");
19773	}
19774
19775	Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
19776	IRBuilderBase &Builder, AtomicCmpXchgInst CI, Value AlignedAddr,
19777	Value CmpVal, Value NewVal, Value Mask, AtomicOrdering Ord) const* {
19778	assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
19779	Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19780	Type *ValTy = CmpVal->getType();
19781	assert(ValTy->getPrimitiveSizeInBits() == `128`);
19782	Function *IntCmpXchg =
19783	Intrinsic::getOrInsertDeclaration(M, id: Intrinsic::ppc_cmpxchg_i128);
19784	Type *Int64Ty = Type::getInt64Ty(C&: M->getContext());
19785	Value *CmpLo = Builder.CreateTrunc(V: CmpVal, DestTy: Int64Ty, Name: "cmp_lo");
19786	Value *CmpHi =
19787	Builder.CreateTrunc(V: Builder.CreateLShr(LHS: CmpVal, RHS: `64`), DestTy: Int64Ty, Name: "cmp_hi");
19788	Value *NewLo = Builder.CreateTrunc(V: NewVal, DestTy: Int64Ty, Name: "new_lo");
19789	Value *NewHi =
19790	Builder.CreateTrunc(V: Builder.CreateLShr(LHS: NewVal, RHS: `64`), DestTy: Int64Ty, Name: "new_hi");
19791	emitLeadingFence(Builder, Inst: CI, Ord);
19792	Value *LoHi =
19793	Builder.CreateCall(Callee: IntCmpXchg, Args: {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
19794	emitTrailingFence(Builder, Inst: CI, Ord);
19795	Value *Lo = Builder.CreateExtractValue(Agg: LoHi, Idxs: `0`, Name: "lo");
19796	Value *Hi = Builder.CreateExtractValue(Agg: LoHi, Idxs: `1`, Name: "hi");
19797	Lo = Builder.CreateZExt(V: Lo, DestTy: ValTy, Name: "lo64");
19798	Hi = Builder.CreateZExt(V: Hi, DestTy: ValTy, Name: "hi64");
19799	return Builder.CreateOr(
19800	LHS: Lo, RHS: Builder.CreateShl(LHS: Hi, RHS: ConstantInt::get(Ty: ValTy, V: `64`)), Name: "val64");
19801	}
19802

Browse the source code of llvm_projects/llvm/lib/Target/PowerPC/PPCISelLowering.cpp